我们从Python开源项目中,提取了以下44个代码示例,用于说明如何使用chainer.functions.expand_dims()。
def __call__(self, state): h = state for layer in self.hidden_layers: h = F.relu(layer(h)) v = self.v(h) mu = self.mu(h) if self.scale_mu: mu = scale_by_tanh(mu, high=self.action_space.high, low=self.action_space.low) mat_diag = F.exp(self.mat_diag(h)) if hasattr(self, 'mat_non_diag'): mat_non_diag = self.mat_non_diag(h) tril = lower_triangular_matrix(mat_diag, mat_non_diag) mat = matmul_v3(tril, tril, transb=True) else: mat = F.expand_dims(mat_diag ** 2, axis=2) return QuadraticActionValue( mu, mat, v, min_action=self.action_space.low, max_action=self.action_space.high)
def __call__(self, state): h = self.hidden_layers(state) v = self.v(h) mu = self.mu(h) if self.scale_mu: mu = scale_by_tanh(mu, high=self.action_space.high, low=self.action_space.low) mat_diag = F.exp(self.mat_diag(h)) if hasattr(self, 'mat_non_diag'): mat_non_diag = self.mat_non_diag(h) tril = lower_triangular_matrix(mat_diag, mat_non_diag) mat = matmul_v3(tril, tril, transb=True) else: mat = F.expand_dims(mat_diag ** 2, axis=2) return QuadraticActionValue( mu, mat, v, min_action=self.action_space.low, max_action=self.action_space.high)
def predict(self, xs): """ batch: list of splitted sentences """ xs = [self.extractor.process(x) for x in xs] batchsize = len(xs) ws, cs, ls = zip(*xs) ws = map(self.emb_word, ws) cs = [F.squeeze( F.max_pooling_2d( self.conv_char( F.expand_dims( self.emb_char(c), 1)), (l, 1))) for c, l in zip(cs, ls)] xs_f = [F.dropout(F.concat([w, c]), self.dropout_ratio, train=self.train) for w, c in zip(ws, cs)] xs_b = [x[::-1] for x in xs_f] cx_f, hx_f, cx_b, hx_b = self._init_state(batchsize) _, _, hs_f = self.lstm_f(hx_f, cx_f, xs_f, train=self.train) _, _, hs_b = self.lstm_b(hx_b, cx_b, xs_b, train=self.train) hs_b = [x[::-1] for x in hs_b] ys = [self.linear2(F.relu(self.linear1(F.concat([h_f, h_b])))) for h_f, h_b in zip(hs_f, hs_b)] return [y.data[1:-1] for y in ys]
def __call__(self, X, ht_enc): pad = self._kernel_size - 1 WX = self.W(X) if pad > 0: WX = WX[..., :-pad] Vh = self.V(ht_enc) # copy Vh # e.g. # WX = [[[ 0 1 2] # [ 3 4 5] # [ 6 7 8] # Vh = [[11, 12, 13]] # # Vh, WX = F.broadcast(F.expand_dims(Vh, axis=2), WX) # # WX = [[[ 0 1 2] # [ 3 4 5] # [ 6 7 8] # Vh = [[[ 11 11 11] # [ 12 12 12] # [ 13 13 13] Vh, WX = functions.broadcast(functions.expand_dims(Vh, axis=2), WX) return self.pool(functions.split_axis(WX + Vh, self.num_split, axis=1))
def _context(self, p, fb_mat, fbe_mat): batch_size, source_length, _ = fb_mat.data.shape # {pe,e}_mat: shape = [batch * srclen, atten] pe_mat = F.reshape( F.broadcast_to( F.expand_dims(self.p_e(p), 1), [batch_size, source_length, self.atten_size]), [batch_size * source_length, self.atten_size]) e_mat = F.tanh(fbe_mat + pe_mat) # a_mat: shape = [batch, srclen] a_mat = F.softmax(F.reshape(self.e_a(e_mat), [batch_size, source_length])) # q: shape = [batch, 2 * hidden] q = F.reshape( F.batch_matmul(a_mat, fb_mat, transa=True), [batch_size, 2 * self.hidden_size]) return q
def __call__(self, x): minibatch_size = x.shape[0] activation = F.reshape(self.t(x), (-1, self.n_kernels, self.kernel_dim)) activation_ex = F.expand_dims(activation, 3) activation_ex_t = F.expand_dims(F.transpose(activation, (1, 2, 0)), 0) activation_ex, activation_ex_t = F.broadcast(activation_ex, activation_ex_t) diff = activation_ex - activation_ex_t xp = chainer.cuda.get_array_module(x.data) eps = F.expand_dims(xp.eye(minibatch_size, dtype=xp.float32), 1) eps = F.broadcast_to(eps, (minibatch_size, self.n_kernels, minibatch_size)) sum_diff = F.sum(abs(diff), axis=2) sum_diff = F.broadcast_to(sum_diff, eps.shape) abs_diff = sum_diff + eps minibatch_features = F.sum(F.exp(-abs_diff), 2) return F.concat((x, minibatch_features), axis=1)
def __call__(self, x): xp = chainer.cuda.get_array_module(x.data) batchsize = x.shape[0] if self.train_weights == False and self.initial_T is not None: self.T.W.data = self.initial_T M = F.reshape(self.T(x), (-1, self.num_kernels, self.ndim_kernel)) M = F.expand_dims(M, 3) M_T = F.transpose(M, (3, 1, 2, 0)) M, M_T = F.broadcast(M, M_T) norm = F.sum(abs(M - M_T), axis=2) eraser = F.broadcast_to(xp.eye(batchsize, dtype=x.dtype).reshape((batchsize, 1, batchsize)), norm.shape) c_b = F.exp(-(norm + 1e6 * eraser)) o_b = F.sum(c_b, axis=2) if self.train_weights == False: self.initial_T = self.T.W.data return F.concat((x, o_b), axis=1)
def __call__(self, v, h, label): v_t = self.vertical_conv_t(v) v_s = self.vertical_conv_s(v) to_vertical_t = self.v_to_h_conv_t(v_t) to_vertical_s = self.v_to_h_conv_s(v_s) # v_gate = self.vertical_gate_conv(v) # label bias is added to both vertical and horizontal conv # here we take only shape as it should be the same label = F.broadcast_to(F.expand_dims(F.expand_dims(self.label(label), -1), -1), v_t.shape) v_t, v_s = v_t + label, v_s + label v = F.tanh(v_t) * F.sigmoid(v_s) h_t = self.horizontal_conv_t(h) h_s = self.horizontal_conv_s(h) h_t, h_s = h_t + to_vertical_t + label, h_s + to_vertical_s + label h = self.horizontal_output(F.tanh(h_t) * F.sigmoid(h_s)) return v, h
def __call__(self, x, action): h1 = F.relu(self.conv1(x)) index = F.expand_dims(np.array(action, dtype=np.int32), axis=0) h2 = F.relu(self.embed_conv2d(index, x)) h = F.concat((h1, h2), axis=1) # Glue together the action convolutions h = F.relu(self.conv2(h)) h = F.relu(self.conv3(h)) h = F.relu(self.conv_gru1(h)) h_img = F.relu(self.deconv1(h)) h_img = self.deconv2(h_img) h_action = F.relu(self.linear1(h)) h_action = self.linear2(h_action) return h_img, h_action
def __call__(self, x_image, t_image, x_action, t_action): self.y_image, self.y_action = self.predictor(x_image, x_action) predicted_action = self.action_meaning( F.argmax(self.y_action, axis=1).data[0]) real_action = self.action_meaning(t_action) if predicted_action != real_action: print("Predicted action:", predicted_action, "it was actually", real_action) image_loss = F.mean_squared_error(self.y_image, t_image) self.error_mask = normalize_2d(F.squared_error(self.y_image, t_image)) action_loss = F.softmax_cross_entropy( self.y_action, F.expand_dims(np.array(t_action, dtype=np.int32), axis=0), ) print('Image loss', image_loss.data, ', Action loss:', action_loss.data) return self.weight * image_loss + (1.0 - self.weight) * action_loss
def scale_by_tanh(x, low, high): xp = cuda.get_array_module(x.data) scale = (high - low) / 2 scale = xp.expand_dims(xp.asarray(scale, dtype=np.float32), axis=0) mean = (high + low) / 2 mean = xp.expand_dims(xp.asarray(mean, dtype=np.float32), axis=0) return F.tanh(x) * scale + mean
def update_on_policy(self, statevar): assert self.t_start < self.t if not self.disable_online_update: next_values = {} for t in range(self.t_start + 1, self.t): next_values[t - 1] = self.past_values[t] if statevar is None: next_values[self.t - 1] = chainer.Variable( self.xp.zeros_like(self.past_values[self.t - 1].data)) else: with state_kept(self.model): _, v = self.model(statevar) next_values[self.t - 1] = v log_probs = {t: self.past_action_distrib[t].log_prob( self.xp.asarray(self.xp.expand_dims(a, 0))) for t, a in self.past_actions.items()} self.online_batch_losses.append(self.compute_loss( t_start=self.t_start, t_stop=self.t, rewards=self.past_rewards, values=self.past_values, next_values=next_values, log_probs=log_probs)) if len(self.online_batch_losses) == self.batchsize: loss = chainerrl.functions.sum_arrays( self.online_batch_losses) / self.batchsize self.update(loss) self.online_batch_losses = [] self.init_history_data_for_online_update()
def forward(self, ws, cs, ls, dep_ts=None): batchsize = len(ws) xp = chainer.cuda.get_array_module(ws[0]) ws = map(self.emb_word, ws) cs = [F.squeeze( F.max_pooling_2d( self.conv_char( F.expand_dims( self.emb_char(c), 1)), (int(l[0]), 1))) for c, l in zip(cs, ls)] xs_f = [F.dropout(F.concat([w, c]), self.dropout_ratio, train=self.train) for w, c in zip(ws, cs)] xs_b = [x[::-1] for x in xs_f] cx_f, hx_f, cx_b, hx_b = self._init_state(xp, batchsize) _, _, hs_f = self.lstm_f(hx_f, cx_f, xs_f, train=self.train) _, _, hs_b = self.lstm_b(hx_b, cx_b, xs_b, train=self.train) hs_b = [x[::-1] for x in hs_b] hs = [F.concat([h_f, h_b]) for h_f, h_b in zip(hs_f, hs_b)] dep_ys = [self.biaffine_arc( F.elu(F.dropout(self.arc_dep(h), 0.32, train=self.train)), F.elu(F.dropout(self.arc_head(h), 0.32, train=self.train))) for h in hs] if dep_ts is not None: heads = dep_ts else: heads = [F.argmax(y, axis=1) for y in dep_ys] cat_ys = [ self.biaffine_tag( F.elu(F.dropout(self.rel_dep(h), 0.32, train=self.train)), F.elu(F.dropout(self.rel_head( F.embed_id(t, h, ignore_label=IGNORE)), 0.32, train=self.train))) \ for h, t in zip(hs, heads)] return cat_ys, dep_ys
def forward(self, ws, cs, ls): """ xs [(w,s,p,y), ..., ] w: word, c: char, l: length, y: label """ batchsize = len(ws) # cs: [(sentence length, max word length)] ws = map(self.emb_word, ws) # ls: [(sentence length, char dim)] # before conv: (sent len, 1, max word len, char_size) # after conv: (sent len, char_size, max word len, 1) # after max_pool: (sent len, char_size, 1, 1) cs = [F.squeeze( F.max_pooling_2d( self.conv_char( F.expand_dims( self.emb_char(c), 1)), (l, 1))) for c, l in zip(cs, ls)] # [(sentence length, (word_dim + char_dim))] xs_f = [F.dropout(F.concat([w, c]), self.dropout_ratio, train=self.train) for w, c in zip(ws, cs)] xs_b = [x[::-1] for x in xs_f] cx_f, hx_f, cx_b, hx_b = self._init_state(batchsize) _, _, hs_f = self.lstm_f(hx_f, cx_f, xs_f, train=self.train) _, _, hs_b = self.lstm_b(hx_b, cx_b, xs_b, train=self.train) hs_b = [x[::-1] for x in hs_b] # ys: [(sentence length, number of category)] hs = [F.concat([h_f, h_b]) for h_f, h_b in zip(hs_f, hs_b)] cat_ys = [self.linear_cat2(F.relu(self.linear_cat1(h))) for h in hs] dep_ys = [self.biaffine( F.relu(F.dropout(self.linear_dep(h), 0.32, train=self.train)), F.relu(F.dropout(self.linear_head(h), 0.32, train=self.train))) for h in hs] return cat_ys, dep_ys
def get_greedy_action(Q, obs, show_f=False): xp = Q.xp obs = xp.expand_dims(xp.asarray(obs, dtype=np.float32), 0) with chainer.no_backprop_mode(): f = Q.feature(obs) q = Q.predict(f)[0] #q = Q(obs).data[0] if show_f: show_feature(f) return int(xp.argmax(q))
def mean_clipped_loss(y, t): # Add an axis because F.huber_loss only accepts arrays with ndim >= 2 y = F.expand_dims(y, axis=-1) t = F.expand_dims(t, axis=-1) return F.sum(F.huber_loss(y, t, 1.0)) / y.shape[0]
def get_greedy_action(Q, obs): xp = Q.xp obs = xp.expand_dims(xp.asarray(obs, dtype=np.float32), 0) with chainer.no_backprop_mode(): q = Q(obs).data[0] return int(xp.argmax(q))
def _attend(self, p): p = self.xh(p) p = F.expand_dims(p, 1) p = F.broadcast_to(p, self.shape2) h = F.tanh(self.h + p) shape3 = (self.batchsize * self.src_len, self.dim_hid) h_reshaped = F.reshape(h, shape3) weight_reshaped = self.hw(h_reshaped) weight = F.reshape(weight_reshaped, (self.batchsize, self.src_len, 1)) weight = F.where(self.mask, weight, self.minf) attention = F.softmax(weight) return attention
def __call__(self, x): return functions.expand_dims(x, self.axis)
def forward_one_step(self, X, ht_enc): pad = self._kernel_size - 1 WX = self.W(X)[..., -pad-1, None] Vh = self.V(ht_enc) Vh, WX = functions.broadcast(functions.expand_dims(Vh, axis=2), WX) return self.pool(functions.split_axis(WX + Vh, self.num_split, axis=1))
def check_forward(self, x_data): x = chainer.Variable(x_data) y = functions.expand_dims(x, self.axis) self.assertEqual(y.data.shape, self.out_shape) y_expect = numpy.expand_dims(cuda.to_cpu(x_data), self.axis) self.assertEqual(y.data.dtype, self.dtype) numpy.testing.assert_array_equal(cuda.to_cpu(y.data), y_expect)
def check_backward(self, x_data): x = chainer.Variable(x_data) y = functions.expand_dims(x, self.axis) y.grad = y.data y.backward() gradient_check.assert_allclose(x.data, x.grad, atol=0, rtol=0)
def test_invalid_dim(self): x = chainer.Variable(self.x) with self.assertRaises(chainer.utils.type_check.InvalidType): functions.expand_dims(x, self.x.ndim + 1) with self.assertRaises(chainer.utils.type_check.InvalidType): functions.expand_dims(x, -self.x.ndim - 2)
def _encode(self, x_list): batch_size = len(x_list[0]) source_length = len(x_list) # Encoding fc = bc = f = b = _zeros((batch_size, self.hidden_size)) i_list = [self.x_i(_mkivar(x)) for x in x_list] f_list = [] b_list = [] for i in i_list: fc, f = F.lstm(fc, self.i_f(i) + self.f_f(f)) f_list.append(f) for i in reversed(i_list): bc, b = F.lstm(bc, self.i_b(i) + self.b_b(b)) b_list.append(b) b_list.reverse() # Making concatenated matrix # {f,b}_mat: shape = [batch, srclen, hidden] f_mat = F.concat([F.expand_dims(f, 1) for f in f_list], 1) b_mat = F.concat([F.expand_dims(b, 1) for b in b_list], 1) # fb_mat: shape = [batch, srclen, 2 * hidden] fb_mat = F.concat([f_mat, b_mat], 2) # fbe_mat: shape = [batch * srclen, atten] fbe_mat = self.fb_e( F.reshape(fb_mat, [batch_size * source_length, 2 * self.hidden_size])) return fb_mat, fbe_mat, fc, bc, f_list[-1], b_list[0]
def __call__(self, chars): xp = self.xp if not isinstance(chars, (tuple, list)): chars = [chars] lengths = [len(_chars) for _chars in chars] n_words = len(lengths) pad_width = self._pad_width char_ids = F.PadSequence(length=max(lengths) + pad_width, padding=self._pad_id).forward(chars)[0] left_pads = xp.full((n_words, pad_width), self._pad_id, xp.int32) char_ids = xp.concatenate((left_pads, xp.array(char_ids)), axis=1) """note: cupy does not have `inf`.""" mask = xp.full(char_ids.shape, np.inf) for i, length in enumerate(lengths): mask[i, pad_width:pad_width + length] = 0 mask = xp.expand_dims(mask, axis=2) xs = self.embed(char_ids) xs = F.dropout(xs, self._dropout) C = self.conv(F.expand_dims(xs, axis=1)) C = F.transpose(F.squeeze(C, axis=3), (0, 2, 1)) """ assert C.shape == (n_words, pad_width + max(lengths) + pad_width, self.out_size) """ ys = F.max(C - mask, axis=1) return ys
def kl_div(mu1, lv1, lv2): # KL Divergence between given normal and prior at N(0, sigma_2) # Prior assumes mean at zero # lns2 - lns1 + (s2^2 + (u1 - u2)**2)/ 2s2**2 - 0.5 if len(lv1.shape) == 2: lv1 = F.expand_dims(lv1, 0) mu1 = F.expand_dims(mu1, 0) lv2 = F.broadcast_to(lv2, lv1.shape) v12 = F.exp(lv1)**2.0 v22 = F.exp(lv2)**2.0 return lv2 - lv1 + .5 * v12 / v22 + .5 * mu1**2. / v22 - .5
def calc_loss_recurrent(self, frames, actions, rewards, done_list, size_list): # TODO self.max_step -> max_step s = Variable(frames.astype(np.float32)) self.model_target.reset_state() # Refresh model_target's state self.model_target.q_function(s[0]) # Update target model initial state target_q = self.xp.zeros((self.max_step, self.replay_batch_size), dtype=np.float32) selected_q_tuple = [None for _ in range(self.max_step)] for frame in range(0, self.max_step): q = self.model.q_function(s[frame]) q_dash = self.model_target.q_function(s[frame+1]) # Q(s',*): shape is (batch_size, action_num) max_q_dash = q_dash.data.max(axis=1) # max_a Q(s',a): shape is (batch_size,) if self.clipping: rs = self.xp.sign(rewards[frame]) else: rs = rewards[frame] target_q[frame] = rs + self.xp.logical_not(done_list[frame]).astype(np.int)*(self.gamma*max_q_dash) selected_q_tuple[frame] = F.select_item(q, actions[frame].astype(np.int)) enable = self.xp.broadcast_to(self.xp.arange(self.max_step), (self.replay_batch_size, self.max_step)) size_list = self.xp.expand_dims(cuda.to_gpu(size_list), -1) enable = (enable < size_list).T selected_q = F.concat(selected_q_tuple, axis=0) # element-wise huber loss huber_loss = F.huber_loss( F.expand_dims(F.flatten(target_q), axis=1), F.expand_dims(selected_q, axis=1), delta=1.0) huber_loss = F.reshape(huber_loss, enable.shape) zeros = self.xp.zeros(enable.shape, dtype=np.float32) loss = F.sum(F.where(enable, huber_loss, zeros)) #/ self.replay_batch_size #print("loss", loss.data) return loss
def __init__(self, *args, mask='B', **kwargs): super(MaskedConvolution2D, self).__init__( *args, **kwargs ) Cout, Cin, kh, kw = self.W.shape pre_mask = self.xp.ones_like(self.W.data).astype('f') yc, xc = kh // 2, kw // 2 # context masking - subsequent pixels won't hav access to next pixels (spatial dim) pre_mask[:, :, yc+1:, :] = 0.0 pre_mask[:, :, yc:, xc+1:] = 0.0 # same pixel masking - pixel won't access next color (conv filter dim) def bmask(i_out, i_in): cout_idx = np.expand_dims(np.arange(Cout) % 3 == i_out, 1) cin_idx = np.expand_dims(np.arange(Cin) % 3 == i_in, 0) a1, a2 = np.broadcast_arrays(cout_idx, cin_idx) return a1 * a2 for j in range(3): pre_mask[bmask(j, j), yc, xc] = 0.0 if mask == 'A' else 1.0 pre_mask[bmask(0, 1), yc, xc] = 0.0 pre_mask[bmask(0, 2), yc, xc] = 0.0 pre_mask[bmask(1, 2), yc, xc] = 0.0 self.mask = pre_mask
def __call__(self, x, train=True): h_x = self.embed(x) n_words = h_x.shape[1] h_x = F.expand_dims(h_x, 1) h_x = F.relu(self.bnorm1(self.cnn1(h_x))) h_x = F.max_pooling_2d(h_x, (n_words, self.__vec_size)) h_x = F.relu(self.l1(h_x)) return self.l2(h_x)
def __call__(self, x): h_x = self.embed(x) n_words = h_x.shape[1] h_x = F.expand_dims(h_x, 1) h_x = F.relu(self.bnorm1(self.cnn1(h_x))) h_x = F.max_pooling_2d(h_x, (n_words, self.vec_size)) h_x = F.relu(self.l1(h_x)) return self.l2(h_x)
def squared_distance(self, anc, pos, neg): """ Compute anchor-positive distance and anchor-negative distance on batches of anchors, positive, and negative samples. """ dist_pos = F.expand_dims(F.batch_l2_norm_squared(anc - pos), 1) dist_neg = F.expand_dims(F.batch_l2_norm_squared(anc - neg), 1) return dist_pos, dist_neg
def normalize_2d(x): exp = F.exp(x[0]) sums = F.sum(F.sum(exp, axis=-1), axis=-1) expanded = F.expand_dims(F.expand_dims(sums, axis=-1), axis=-1) denominator = F.tile(expanded, (1, 160, 210)) return exp / denominator
def process_image(raw_image): floated = raw_image.astype('float32') / 255.0 transposed = F.transpose(floated) expanded = F.expand_dims(transposed, 0) # Make a "batch size" of 1 return expanded
def compute_loss(self, t_start, t_stop, rewards, values, next_values, log_probs): seq_len = t_stop - t_start assert len(rewards) == seq_len assert len(values) == seq_len assert len(next_values) == seq_len assert len(log_probs) == seq_len pi_losses = [] v_losses = [] for t in range(t_start, t_stop): d = min(t_stop - t, self.rollout_len) # Discounted sum of immediate rewards R_seq = sum(self.gamma ** i * rewards[t + i] for i in range(d)) # Discounted sum of log likelihoods G = chainerrl.functions.weighted_sum_arrays( xs=[log_probs[t + i] for i in range(d)], weights=[self.gamma ** i for i in range(d)]) G = F.expand_dims(G, -1) last_v = next_values[t + d - 1] if not self.backprop_future_values: last_v = chainer.Variable(last_v.data) # C_pi only backprop through pi C_pi = (- values[t].data + self.gamma ** d * last_v.data + R_seq - self.tau * G) # C_v only backprop through v C_v = (- values[t] + self.gamma ** d * last_v + R_seq - self.tau * G.data) pi_losses.append(C_pi ** 2) v_losses.append(C_v ** 2) pi_loss = chainerrl.functions.sum_arrays(pi_losses) / 2 v_loss = chainerrl.functions.sum_arrays(v_losses) / 2 # Re-scale pi loss so that it is independent from tau pi_loss /= self.tau pi_loss *= self.pi_loss_coef v_loss *= self.v_loss_coef if self.normalize_loss_by_steps: pi_loss /= seq_len v_loss /= seq_len if self.process_idx == 0: self.logger.debug('pi_loss:%s v_loss:%s', pi_loss.data, v_loss.data) return pi_loss + F.reshape(v_loss, pi_loss.data.shape)
def __fit(self, content_image, style_image, epoch_num, callback=None): xp = self.xp input_image = None height, width = content_image.shape[-2:] base_epoch = 0 for stride in [4, 2, 1][-self.resolution_num:]: if width // stride < 64: continue content_x = xp.asarray(content_image[:,:,::stride,::stride]) if self.keep_color: style_x = util.luminance_only(xp.asarray(style_image[:,:,::stride,::stride]), content_x) else: style_x = xp.asarray(style_image[:,:,::stride,::stride]) content_layer_names = self.content_layer_names with chainer.using_config('enable_backprop', False): content_layers = self.model(content_x) content_layers = [(name, content_layers[name]) for name in content_layer_names] style_layer_names = self.style_layer_names with chainer.using_config('enable_backprop', False): style_layers = self.model(style_x) style_patches = [] for name in style_layer_names: patch = util.patch(style_layers[name]) patch_norm = F.expand_dims(F.sum(patch ** 2, axis=1) ** 0.5, 1) style_patches.append((name, patch, patch_norm)) if input_image is None: if self.initial_image == 'content': input_image = xp.asarray(content_image[:,:,::stride,::stride]) else: input_image = xp.random.uniform(-20, 20, size=content_x.shape).astype(np.float32) else: input_image = input_image.repeat(2, 2).repeat(2, 3) h, w = content_x.shape[-2:] input_image = input_image[:,:,:h,:w] link = chainer.Link(x=input_image.shape) if self.device_id >= 0: link.to_gpu() link.x.data[:] = xp.asarray(input_image) self.optimizer.setup(link) for epoch in six.moves.range(epoch_num): loss_info = self.__fit_one(link, content_layers, style_patches) if callback: callback(base_epoch + epoch, link.x, loss_info) base_epoch += epoch_num input_image = link.x.data return link.x
def __call__(self, xs): """ xs [(w,s,p,y), ..., ] w: word, c: char, l: length, y: label """ batchsize = len(xs) ws, cs, ls, ts = zip(*xs) # cs: [(sentence length, max word length)] ws = map(self.emb_word, ws) # ls: [(sentence length, char dim)] # cs = map(lambda (c, l): F.sum(self.emb_char(c), 1) / l, zip(cs, ls)) # cs = [F.reshape(F.average_pooling_2d( # F.expand_dims(self.emb_char(c), 0), (l, 1)), (-1, self.char_dim)) # for c, l in zip(cs, ls)] # before conv: (sent len, 1, max word len, char_size) # after conv: (sent len, char_size, max word len, 1) # after max_pool: (sent len, char_size, 1, 1) cs = [F.squeeze( F.max_pooling_2d( self.conv_char( F.expand_dims( self.emb_char(c), 1)), (l, 1))) for c, l in zip(cs, ls)] # [(sentence length, (word_dim + char_dim))] xs_f = [F.dropout(F.concat([w, c]), self.dropout_ratio, train=self.train) for w, c in zip(ws, cs)] xs_b = [x[::-1] for x in xs_f] cx_f, hx_f, cx_b, hx_b = self._init_state(batchsize) _, _, hs_f = self.lstm_f(hx_f, cx_f, xs_f, train=self.train) _, _, hs_b = self.lstm_b(hx_b, cx_b, xs_b, train=self.train) hs_b = [x[::-1] for x in hs_b] # ys: [(sentence length, number of category)] ys = [self.linear2(F.relu(self.linear1(F.concat([h_f, h_b])))) for h_f, h_b in zip(hs_f, hs_b)] # ys = [self.linear2(F.relu( # self.linear1( # F.squeeze( # F.transpose( # F.relu(self.conv1( # F.reshape( # F.concat([h_f, h_b]), # (1, 1, -1, 2 * self.hidden_dim))), (0, 3, 2, 1)) # ))))) # for h_f, h_b in zip(hs_f, hs_b)] loss = reduce(lambda x, y: x + y, [F.softmax_cross_entropy(y, t) for y, t in zip(ys, ts)]) acc = reduce(lambda x, y: x + y, [F.accuracy(y, t, ignore_label=IGNORE) for y, t in zip(ys, ts)]) acc /= batchsize chainer.report({ "loss": loss, "accuracy": acc }, self) return loss
def pool(self, WX, skip_mask=None): Z, F, O, I = None, None, None, None # f-pooling if len(self._pooling) == 1: assert len(WX) == 2 Z, F = WX Z = functions.tanh(Z) F = self.zoneout(F) # fo-pooling if len(self._pooling) == 2: assert len(WX) == 3 Z, F, O = WX Z = functions.tanh(Z) F = self.zoneout(F) O = functions.sigmoid(O) # ifo-pooling if len(self._pooling) == 3: assert len(WX) == 4 Z, F, O, I = WX Z = functions.tanh(Z) F = self.zoneout(F) O = functions.sigmoid(O) I = functions.sigmoid(I) assert Z is not None assert F is not None T = Z.shape[2] for t in xrange(T): zt = Z[..., t] ft = F[..., t] ot = 1 if O is None else O[..., t] it = 1 - ft if I is None else I[..., t] xt = 1 if skip_mask is None else skip_mask[:, t, None] # will be used for seq2seq to skip PAD if self.ct is None: self.ct = (1 - ft) * zt * xt else: self.ct = ft * self.ct + it * zt * xt self.ht = self.ct if O is None else ot * self.ct if self.H is None: self.H = functions.expand_dims(self.ht, 2) else: self.H = functions.concat((self.H, functions.expand_dims(self.ht, 2)), axis=2) return self.H
def __call__(self, X, ht_enc, H_enc, skip_mask=None): pad = self._kernel_size - 1 WX = self.W(X) if pad > 0: WX = WX[:, :, :-pad] Vh = self.V(ht_enc) Vh, WX = functions.broadcast(functions.expand_dims(Vh, axis=2), WX) # f-pooling Z, F, O = functions.split_axis(WX + Vh, 3, axis=1) Z = functions.tanh(Z) F = self.zoneout(F) O = functions.sigmoid(O) T = Z.shape[2] # compute ungated hidden states self.contexts = [] for t in xrange(T): z = Z[..., t] f = F[..., t] if t == 0: ct = (1 - f) * z self.contexts.append(ct) else: ct = f * self.contexts[-1] + (1 - f) * z self.contexts.append(ct) if skip_mask is not None: assert skip_mask.shape[1] == H_enc.shape[2] softmax_bias = (skip_mask == 0) * -1e6 # compute attention weights (eq.8) H_enc = functions.swapaxes(H_enc, 1, 2) for t in xrange(T): ct = self.contexts[t] bias = 0 if skip_mask is None else softmax_bias[..., None] # to skip PAD mask = 1 if skip_mask is None else skip_mask[..., None] # to skip PAD alpha = functions.batch_matmul(H_enc, ct) + bias alpha = functions.softmax(alpha) * mask alpha = functions.broadcast_to(alpha, H_enc.shape) # copy kt = functions.sum(alpha * H_enc, axis=1) ot = O[..., t] self.ht = ot * self.o(functions.concat((kt, ct), axis=1)) if t == 0: self.H = functions.expand_dims(self.ht, 2) else: self.H = functions.concat((self.H, functions.expand_dims(self.ht, 2)), axis=2) return self.H
def forward_one_step(self, X, ht_enc, H_enc, skip_mask): pad = self._kernel_size - 1 WX = self.W(X)[:, :, -pad-1, None] Vh = self.V(ht_enc) Vh, WX = functions.broadcast(functions.expand_dims(Vh, axis=2), WX) # f-pooling Z, F, O = functions.split_axis(WX + Vh, 3, axis=1) Z = functions.tanh(Z) F = self.zoneout(F) O = functions.sigmoid(O) T = Z.shape[2] # compute ungated hidden states for t in xrange(T): z = Z[..., t] f = F[..., t] if self.contexts is None: ct = (1 - f) * z self.contexts = [ct] else: ct = f * self.contexts[-1] + (1 - f) * z self.contexts.append(ct) if skip_mask is not None: assert skip_mask.shape[1] == H_enc.shape[2] softmax_bias = (skip_mask == 0) * -1e6 # compute attention weights (eq.8) H_enc = functions.swapaxes(H_enc, 1, 2) for t in xrange(T): ct = self.contexts[t - T] bias = 0 if skip_mask is None else softmax_bias[..., None] # to skip PAD mask = 1 if skip_mask is None else skip_mask[..., None] # to skip PAD alpha = functions.batch_matmul(H_enc, ct) + bias alpha = functions.softmax(alpha) * mask alpha = functions.broadcast_to(alpha, H_enc.shape) # copy kt = functions.sum(alpha * H_enc, axis=1) ot = O[..., t] self.ht = ot * self.o(functions.concat((kt, ct), axis=1)) if self.H is None: self.H = functions.expand_dims(self.ht, 2) else: self.H = functions.concat((self.H, functions.expand_dims(self.ht, 2)), axis=2) return self.H
def calcAttention(self, h1, hList, aList, encLen, cMBSize, args): # attention????????????????h1??? if self.attn_mode == 0: return h1 # 1, attention???????? target1 = self.model.attnIn_L1(h1) # ?????? # (cMBSize, self.hDim) => (cMBSize, 1, self.hDim) target2 = chaFunc.expand_dims(target1, axis=1) # (cMBSize, 1, self.hDim) => (cMBSize, encLen, self.hDim) target3 = chaFunc.broadcast_to(target2, (cMBSize, encLen, self.hDim)) # target3 = chaFunc.broadcast_to(chaFunc.reshape( # target1, (cMBSize, 1, self.hDim)), (cMBSize, encLen, self.hDim)) # 2, attention????????? if self.attn_mode == 1: # bilinear # bilinear??attention?????hList1 == hList2 ??? # shape: (cMBSize, encLen) aval = chaFunc.sum(target3 * aList, axis=2) elif self.attn_mode == 2: # MLP # attnSum ???????? t1 = chaFunc.reshape(target3, (cMBSize * encLen, self.hDim)) # (cMBSize*encLen, self.hDim) => (cMBSize*encLen, 1) t2 = self.model.attnSum(chaFunc.tanh(t1 + aList)) # shape: (cMBSize, encLen) aval = chaFunc.reshape(t2, (cMBSize, encLen)) # aval = chaFunc.reshape(self.model.attnSum( # chaFunc.tanh(t1 + aList)), (cMBSize, encLen)) else: assert 0, "ERROR" # 3, softmax???? cAttn1 = chaFunc.softmax(aval) # (cMBSize, encLen) # 4, attention???????context vector???????? # (cMBSize, encLen) => (cMBSize, 1, encLen) cAttn2 = chaFunc.expand_dims(cAttn1, axis=1) # (1, encLen) x (encLen, hDim) ?????(matmul)?cMBSize????? # => (cMBSize, 1, hDim) cAttn3 = chaFunc.batch_matmul(cAttn2, hList) # cAttn3 = chaFunc.batch_matmul(chaFunc.reshape( # cAttn1, (cMBSize, 1, encLen)), hList) # axis=1???1???????????? context = chaFunc.reshape(cAttn3, (cMBSize, self.hDim)) # 4, attention???????context vector???????? # ?????????? # (cMBSize, scrLen) => (cMBSize, scrLen, hDim) # cAttn2 = chaFunc.reshape(cAttn1, (cMBSize, encLen, 1)) # (cMBSize, scrLen) => (cMBSize, scrLen, hDim) # cAttn3 = chaFunc.broadcast_to(cAttn2, (cMBSize, encLen, self.hDim)) # ???????? (cMBSize, encLen, hDim) # => (cMBSize, hDim) # axis=1 ????? # context = chaFunc.sum(aList * cAttn3, axis=1) # 6, attention?????????? c1 = chaFunc.concat((h1, context)) c2 = self.model.attnOut_L2(c1) finalH = chaFunc.tanh(c2) # finalH = chaFunc.tanh(self.model.attnOut_L2( # chaFunc.concat((h1, context)))) return finalH # context # ??????