我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用theano.tensor.abs_()。
def iou_loss(p, t): # print "pass" tp, tt = p.reshape((p.shape[0], 2, 2)), t.reshape((t.shape[0], 2, 2)) overlaps_t0 = T.maximum(tp[:, 0, :], tt[:, 0, :]) overlaps_t1 = T.minimum(tp[:, 1, :], tt[:, 1, :]) intersection = overlaps_t1 - overlaps_t0 bool_overlap = T.min(intersection, axis=1) > 0 intersection = intersection[:, 0] * intersection[:, 1] intersection = T.maximum(intersection, np.float32(0.)) dims_p = tp[:, 1, :] - tp[:, 0, :] areas_p = dims_p[:, 0] * dims_p[:, 1] dims_t = tt[:, 1, :] - tt[:, 0, :] areas_t = dims_t[:, 0] * dims_t[:, 1] union = areas_p + areas_t - intersection loss = 1. - T.minimum( T.exp(T.log(T.abs_(intersection)) - T.log(T.abs_(union) + np.float32(1e-5))), np.float32(1.) ) # return loss return T.mean(loss)
def discrim(X): current_input = dropout(X, 0.3) ### encoder ### cv1 = relu(dnn_conv(current_input, aew1, subsample=(1,1), border_mode=(1,1))) cv2 = relu(batchnorm(dnn_conv(cv1, aew2, subsample=(4,4), border_mode=(2,2)), g=aeg2, b=aeb2)) cv3 = relu(batchnorm(dnn_conv(cv2, aew3, subsample=(1,1), border_mode=(1,1)), g=aeg3, b=aeb3)) cv4 = relu(batchnorm(dnn_conv(cv3, aew4, subsample=(4,4), border_mode=(2,2)), g=aeg4, b=aeb4)) cv5 = relu(batchnorm(dnn_conv(cv4, aew5, subsample=(1,1), border_mode=(1,1)), g=aeg5, b=aeb5)) cv6 = relu(batchnorm(dnn_conv(cv5, aew6, subsample=(4,4), border_mode=(0,0)), g=aeg6, b=aeb6)) ### decoder ### dv6 = relu(batchnorm(deconv(cv6, aew6, subsample=(4,4), border_mode=(0,0)), g=aeg6t, b=aeb6t)) dv5 = relu(batchnorm(deconv(dv6, aew5, subsample=(1,1), border_mode=(1,1)), g=aeg5t, b=aeb5t)) dv4 = relu(batchnorm(deconv(dv5, aew4, subsample=(4,4), border_mode=(2,2)), g=aeg4t, b=aeb4t)) dv3 = relu(batchnorm(deconv(dv4, aew3, subsample=(1,1), border_mode=(1,1)), g=aeg3t, b=aeb3t)) dv2 = relu(batchnorm(deconv(dv3, aew2, subsample=(4,4), border_mode=(2,2)), g=aeg2t, b=aeb2t)) dv1 = tanh(deconv(dv2, aew1, subsample=(1,1), border_mode=(1,1))) rX = dv1 mse = T.sqrt(T.sum(T.abs_(T.flatten(X-rX, 2)),axis=1)) + T.sqrt(T.sum(T.flatten((X-rX)**2, 2), axis=1)) return T.flatten(cv6, 2), rX, mse
def discrim(X): current_input = dropout(X, 0.3) ### encoder ### cv1 = relu(dnn_conv(current_input, aew1, subsample=(1,1), border_mode=(1,1))) cv2 = relu(batchnorm(dnn_conv(cv1, aew2, subsample=(4,4), border_mode=(2,2)), g=aeg2, b=aeb2)) cv3 = relu(batchnorm(dnn_conv(cv2, aew3, subsample=(1,1), border_mode=(1,1)), g=aeg3, b=aeb3)) cv4 = relu(batchnorm(dnn_conv(cv3, aew4, subsample=(4,4), border_mode=(2,2)), g=aeg4, b=aeb4)) cv5 = relu(batchnorm(dnn_conv(cv4, aew5, subsample=(1,1), border_mode=(1,1)), g=aeg5, b=aeb5)) cv6 = relu(batchnorm(dnn_conv(cv5, aew6, subsample=(4,4), border_mode=(0,0)), g=aeg6, b=aeb6)) ### decoder ### dv6 = relu(batchnorm(deconv(cv6, aew6, subsample=(4,4), border_mode=(0,0)), g=aeg6t, b=aeb6t)) dv5 = relu(batchnorm(deconv(dv6, aew5, subsample=(1,1), border_mode=(1,1)), g=aeg5t, b=aeb5t)) dv4 = relu(batchnorm(deconv(dv5, aew4, subsample=(4,4), border_mode=(2,2)), g=aeg4t, b=aeb4t)) dv3 = relu(batchnorm(deconv(dv4, aew3, subsample=(1,1), border_mode=(1,1)), g=aeg3t, b=aeb3t)) dv2 = relu(batchnorm(deconv(dv3, aew2, subsample=(4,4), border_mode=(2,2)), g=aeg2t, b=aeb2t)) dv1 = tanh(deconv(dv2, aew1, subsample=(1,1), border_mode=(1,1))) rX = dv1 mse = T.sqrt(T.sum(T.abs_(T.flatten(X-rX, 2)),axis=1)) + T.sqrt(T.sum(T.flatten((X-rX)**2, 2), axis=1)) # L1 and L2 loss return T.flatten(cv6, 2), rX, mse
def get_output_for(self, input, **kwargs): distances = conv_pairwise_distance(input, self.V) similarities = T.exp(-distances / T.abs_(self.gamma)) norm = T.sum(similarities, 1).reshape((similarities.shape[0], 1, similarities.shape[2], similarities.shape[3])) membership = similarities / (norm + self.eps) histogram = T.mean(membership, axis=(2, 3)) if self.spatial_level == 1: pivot1, pivot2 = membership.shape[2] / 2, membership.shape[3] / 2 h1 = T.mean(membership[:, :, :pivot1, :pivot2], axis=(2, 3)) h2 = T.mean(membership[:, :, :pivot1, pivot2:], axis=(2, 3)) h3 = T.mean(membership[:, :, pivot1:, :pivot2], axis=(2, 3)) h4 = T.mean(membership[:, :, pivot1:, pivot2:], axis=(2, 3)) # Pyramid is not used in the paper # histogram = T.horizontal_stack(h1, h2, h3, h4) histogram = T.horizontal_stack(histogram, h1, h2, h3, h4) return histogram
def sym_logdensity(self, x): """ x is a matrix of column datapoints (VxB) V = n_visible, B = batch size """ def density_given_previous_a_and_x(x, w, V_alpha, b_alpha, V_mu, b_mu, V_sigma, b_sigma, activations_factor, p_prev, a_prev, x_prev): a = a_prev + T.dot(T.shape_padright(x_prev, 1), T.shape_padleft(w, 1)) h = self.nonlinearity(a * activations_factor) # BxH Alpha = T.nnet.softmax(T.dot(h, V_alpha) + T.shape_padleft(b_alpha)) # BxC Mu = T.dot(h, V_mu) + T.shape_padleft(b_mu) # BxC Sigma = T.exp((T.dot(h, V_sigma) + T.shape_padleft(b_sigma))) # BxC p = p_prev + log_sum_exp(T.log(Alpha) - T.log(2 * Sigma) - T.abs_(Mu - T.shape_padright(x, 1)) / Sigma) return (p, a, x) # First element is different (it is predicted from the bias only) a0 = T.zeros_like(T.dot(x.T, self.W)) # BxH p0 = T.zeros_like(x[0]) x0 = T.ones_like(x[0]) ([ps, _as, _xs], updates) = theano.scan(density_given_previous_a_and_x, sequences=[x, self.W, self.V_alpha, self.b_alpha, self.V_mu, self.b_mu, self.V_sigma, self.b_sigma, self.activation_rescaling], outputs_info=[p0, a0, x0]) return (ps[-1], updates)
def binarize_conv_input(conv_input, k): # This is from BinaryNet. # This acts like sign function during forward pass. and like hard_tanh during back propagation bin_conv_out = binary_tanh_unit(conv_input) # scaling factor for the activation. A =T.abs_(conv_input) # K will have scaling matrixces for each input in the batch. # K's shape = (batch_size, 1, map_height, map_width) k_shape = k.eval().shape pad = (k_shape[-2]/2, k_shape[-1]/2) # support the kernel stride. This is necessary for AlexNet K = theano.tensor.nnet.conv2d(A, k, border_mode=pad) return bin_conv_out, K
def huber(delta): """ Huber loss, robust at 0 :param delta: delta parameter :return: loss value """ import theano.tensor as T def inner(target, output): d = target - output a = .5 * d**2 b = delta * (T.abs_(d) - delta / 2.) l = T.switch(T.abs_(d) <= delta, a, b) return l return inner
def mean_absolute_error(y_true, y_pred): return T.abs_(y_pred - y_true).mean(axis=-1)
def mean_absolute_percentage_error(y_true, y_pred): return T.abs_((y_true - y_pred) / T.clip(T.abs_(y_true), epsilon, np.inf)).mean(axis=-1) * 100.
def smooth_l1_loss(predictions, targets, sigma=1.5): cond = np.float32(1. / sigma / sigma) point_five = np.float32(0.5) sigma_t = np.float32(sigma) sub_const = np.float32(0.5 / sigma / sigma) diff = T.abs_(predictions - targets) out = T.switch(T.lt(diff, cond), point_five * sigma_t * diff * sigma_t * diff, diff - sub_const) return T.mean(T.sum(out, axis=1))
def abs(x): return T.abs_(x)
def MeanAbsoluteError(y_true, y_pred): return T.abs_(y_pred - y_true).mean()
def __call__(self, x): return (x + T.abs_(x)) / 2.0
def __call__(self, x): return T.clip((x + T.abs_(x)) / 2.0, 0., self.clip)
def __call__(self, x): f1 = 0.5 * (1 + self.leak) f2 = 0.5 * (1 - self.leak) return f1 * x + f2 * T.abs_(x)
def __call__(self, x, leak): if x.ndim == 4: leak = leak.dimshuffle('x', 0, 'x', 'x') f1 = 0.5 * (1 + leak) f2 = 0.5 * (1 - leak) return f1 * x + f2 * T.abs_(x)
def L1Loss(y_pred, y_true): return T.abs_(y_pred - y_true).mean()
def TruncatedL1(y_pred, y_true, tr): return T.maximum(T.abs_(y_pred - y_true), tr).mean()
def fn(x): res = [] for y in [(x + T.abs_(x)) / 2.0, ( 1.03 * x + 0.97 * T.abs_(x) ) / 2.0, T.nnet.sigmoid(x), T.clip(x + 0.5, 0., 1.), T.clip(x, -1., 1.), T.tanh(x)]: res.append( y ) res.append( T.grad(y, x) ) return res
def abs(self, x): return T.abs_(x)
def define_loss(self): self.pred_func = - TT.sum(TT.abs_(self.e[self.rows,:] + self.r[self.cols,:] - self.e[self.tubes,:]),1) self.loss = TT.maximum( 0, self.margin + TT.sum(TT.abs_(self.e[self.rows[:self.batch_size],:] + self.r[self.cols[:self.batch_size],:] - self.e[self.tubes[:self.batch_size],:]),1) \ - (1.0/self.neg_ratio) * TT.sum(TT.sum(TT.abs_(self.e[self.rows[self.batch_size:],:] + self.r[self.cols[self.batch_size:],:] - self.e[self.tubes[self.batch_size:],:]),1).reshape((int(self.batch_size),int(self.neg_ratio))),1) ).mean() self.regul_func = 0
def abs(self, t): return T.abs_(t)
def __init__(self, name): self.name = name self.options = {"tanh": [T.tanh, np.tanh], "sigmoid": [T.nnet.sigmoid, lambda x: 1.0 / (1.0 + np.exp(-x))], "RLU": [lambda x: x * (x > 0), lambda x: x * (x > 0)], "softsign": [lambda x: x / (1 + T.abs_(x)), lambda x: x / (1 + np.abs(x))], "exponential": [T.exp, np.exp]}
def log_normal(x, mean, std, eps=1e-5): """ Compute log pdf of a Gaussian distribution with diagonal covariance, at values x. Variance is parameterized as standard deviation. .. math:: \log p(x) = \log \mathcal{N}(x; \mu, \sigma^2I) Parameters ---------- x : Theano tensor Values at which to evaluate pdf. mean : Theano tensor Mean of the Gaussian distribution. std : Theano tensor Standard deviation of the diagonal covariance Gaussian. eps : float Small number added to standard deviation to avoid NaNs. Returns ------- Theano tensor Element-wise log probability, this has to be summed for multi-variate distributions. See also -------- log_normal1 : using variance parameterization log_normal2 : using log variance parameterization """ abs_std = T.abs_(std) + eps return c - T.log(abs_std) - (x - mean)**2 / (2 * abs_std**2)
def grad_clipping(g, t): return T.switch(T.abs_(g) >= t, t / T.abs_(g) * g, g)
def __init__(self, x_in, n_in, n_out, activation=[], seed=0): """ Initialize the neural network Inputs: - x_in: symbolic variable representing the input to the network - n_in: number of dimensions the input will have - n_out: a list with the number of units the hidden/output layers should have - activation: if any, the activation functions applied to the hidden/output layers - seed: initial random seed used for the initialization of the layers """ if activation: assert len(n_out) == len(activation), "need as many activation functions as layers" rng = np.random.RandomState(seed) # create all the layers self.layers = [] self.params = [] for i, n in enumerate(n_out): # layers get as input x_in or the output of the previous layer self.layers.append( NNLayer(x_in if not self.layers else self.layers[-1].output, n_out[i - 1] if i else n_in, n, activation[i] if activation else None, rng) ) self.params.extend(self.layers[-1].params) self.output = self.layers[-1].output # Define regularization # L1 norm self.L1 = sum([abs(l.W).sum() for l in self.layers]) # square of L2 norm self.L2_sqr = sum([(l.W ** 2).sum() for l in self.layers]) # orthogonalization of weights in the NN (probably not - only in the linear case) self.orthNN = sum([T.abs_(T.dot(l.W.T, l.W) - T.nlinalg.diag(T.nlinalg.diag(T.dot(l.W.T, l.W)))).sum() / float(l.W.get_value().shape[1]) for l in self.layers[:1]]) / float(len(self.layers[:1])) # orthogonalization of weights from embedding to output as YY^T is the eigendecomposition, i.e. W_1 should be orthogonal # normalize by 1/(d**2-d) to be independent of the dimensionality of the embedding d = self.layers[-1].W.get_value().shape[0] self.orthOT = T.abs_(T.dot(self.layers[-1].W, self.layers[-1].W.T) - T.nlinalg.diag( T.nlinalg.diag(T.dot(self.layers[-1].W, self.layers[-1].W.T)))).sum() / float(d * d - d) # unit length weights in the last layer self.normOT = T.abs_(1. - T.sqrt((self.layers[-1].W ** 2).sum(axis=0))).sum() / float(self.layers[-1].W.get_value().shape[1])
def abs(inp): return T.abs_(inp)
def adamax(l_rate, beta1=0.9, beta2=0.999, epsilon=1e-6, parameters=None, grads=None): one = T.constant(1.) t = theano.shared(name='iteration', value=np.float32(1.)) def update_rule(param, moment, u, df): m_t = beta1 * moment + (one-beta1) * df u_t = T.maximum(beta2*u, T.abs_(df)) x = (lr/(1.-beta1**t)) * (m_t/u_t) updates = (param, param-x), (moment, m_t), (u, u_t) return updates moments = [theano.shared(name='m_{}'.format(param), value=param.get_value() * 0., broadcastable=param.broadcastable) for param in parameters] upd = [theano.shared(name='u_{}'.format(param), value=param.get_value() * 0., broadcastable=param.broadcastable) for param in parameters] updates = [] for p, m, u, g in zip(params, moments, upd, grads): p_update, m_update, u_update = update_rule(p, m, u, g) updates.append(p_update) updates.append(m_update) updates.append(u_update) updates.append((t, t+1)) return updates
def binarize_conv_filters(W): """Binarize convolution weights and find the weight scaling factor W : theano tensor : convolution layer weight of dimension no_filters x no_feat_maps x h x w """ # symbolic binary weight Wb = T.cast(T.switch(T.ge(W, 0),1,-1), theano.config.floatX) # BinaryNet method #Wb = T.cast(T.switch(T.round(hard_sigmoid(W),1,-1)), theano.config.floatX) # weight scaling factor # FIXME: directly compute the mean along axis 1,2,3 instead of reshaping alpha = T.mean( T.reshape(T.abs_(W), (W.shape[0], W.shape[1]*W.shape[2]*W.shape[3])), axis=1) return Wb, alpha
def binarize_fc_weights(W): # symbolic binary weight Wb = T.cast(T.switch(T.ge(W, 0),1,-1), theano.config.floatX) # BinaryNet method #Wb = T.cast(T.switch(T.round(hard_sigmoid(W)),1,-1), theano.config.floatX) alpha = T.mean(T.abs_(W), axis=0) return Wb, alpha
def binarize_fc_input(fc_input): bin_out = binary_tanh_unit(fc_input) if(fc_input.ndim == 4): # prev layer is conv or pooling. hence compute the l1 norm using all maps beta = T.mean(T.abs_(fc_input), axis=[1, 2, 3]) else: # feeding layer is FC layer beta = T.mean(T.abs_(fc_input), axis=1) return bin_out, beta
def binarize_conv_input(conv_input, k): bin_conv_out = SignTheano(conv_input) # scaling factor for the activation. A =T.abs_(conv_input) # K will have scaling matrixces for each input in the batch. # K's shape = (batch_size, 1, map_height, map_width) k_shape = k.eval().shape pad = (k_shape[-2]/2, k_shape[-1]/2) K = theano.tensor.nnet.conv2d(A, k, border_mode=pad) return bin_conv_out, K
def custom_objective1(y_true, y_pred): """ Custom objective function :param y_true: real value :param y_pred: predicted value :return: cost """ # weight_matrix = ((y1 * y)<0) weight_matrix = 1 * ((y_true*y_pred) < 0) # T.abs_(y1-y) # (y1-y)**2 # (weight_matrix) return T.mean(0.5*(1+weight_matrix)*(y_true-y_pred)**2)
def custom_objective2(y_true, y_pred): """ Custom objective function :param y_true: real value :param y_pred: predicted value :return: cost """ # weight_matrix = ((y1 * y)<0) weight_matrix = T.exp(T.abs_(y_true-y_pred)) # T.abs_(y1-y) # (y1-y)**2 # (weight_matrix) return T.mean(0.5*weight_matrix*(y_true-y_pred)**2)
def policy_loss(values, a_probs, norm=True, entropy_coeff=.0): bias = T.sum(a_probs*values, axis=1, keepdims=True) adv = (values - bias) if norm: adv /= (T.abs_(bias) + 1e-8) adv = theano.gradient.disconnected_grad(adv) objective = a_probs * adv entropy = -1. * T.sum(T.log(a_probs + 1e-8) * a_probs, axis=1, keepdims=True) actor_loss = -1. * T.mean(objective + entropy_coeff*entropy, axis=-1) return actor_loss
def new_attention_step(self, ct, prev_g, mem, q_q): cWq = T.stack([T.dot(T.dot(ct, self.W_b), q_q)]) cWm = T.stack([T.dot(T.dot(ct, self.W_b), mem)]) z = T.concatenate([ct, mem, q_q, ct * q_q, ct * mem, T.abs_(ct - q_q), T.abs_(ct - mem), cWq, cWm]) l_1 = T.dot(self.W_1, z) + self.b_1 l_1 = T.tanh(l_1) l_2 = T.dot(self.W_2, l_1) + self.b_2 G = T.nnet.sigmoid(l_2)[0] return G