Python theano.tensor 模块,roll() 实例源码


项目:lstmprovisor-python    作者:Impro-Visor    | 项目源码 | 文件源码
def generate(self, relative_position, cur_chord_root, cur_chord_type, **kwargs):
        Generate a chord input for a given timestep.

            relative_position: A theano tensor (int32) of shape (n_parallel), giving the
                current relative position for this timestep
            cur_chord_root: A theano tensor (int32) of shape (n_parallel) giving the unshifted chord root
            cur_chord_type: A theano tensor (int32) of shape (n_parallel, CHORD_WIDTH), giving the unshifted chord
                type representation, parsed from the leadsheet

            piece: A theano tensor (float32) of shape (n_parallel, PART_WIDTH)
        def _map_fn(pos, chord):
            # Now pos is scalar and chord is of shape (CHORD_WIDTH), so we can roll
            return T.roll(chord, (-pos)%12, 0)

        shifted_chords, _ =, sequences=[relative_position-cur_chord_root, cur_chord_type])

        # shifted_chords = theano.printing.Print("ChordShiftInputPart")(shifted_chords)
        # shifted_chords = T.opt.Assert()(shifted_chords, T.eq(shifted_chords.shape[1], self.PART_WIDTH))
        return shifted_chords
项目:transfer    作者:kimiyoung    | 项目源码 | 文件源码
def get_output_for(self, input, **kwargs):
        def norm_fn(f, mask, label, previous, W_sim):
            # f: inst * class, mask: inst, previous: inst * class, W_sim: class * class
            next = previous.dimshuffle(0, 1, 'x') + f.dimshuffle(0, 'x', 1) + W_sim.dimshuffle('x', 0, 1)
            if COST:
                next = next + COST_CONST * (1.0 - T.extra_ops.to_one_hot(label, self.num_classes).dimshuffle(0, 'x', 1))
            # next: inst * prev * cur
            next = theano_logsumexp(next, axis = 1)
            # next: inst * class
            mask = mask.dimshuffle(0, 'x')
            next = previous * (1.0 - mask) + next * mask
            return next

        f =, self.W)
        # f: inst * time * class

        initial = f[:, 0, :]
        if CRF_INIT:
            initial = initial + self.W_init[0].dimshuffle('x', 0)
        if COST:
            initial = initial + COST_CONST * (1.0 - T.extra_ops.to_one_hot(self.label_input[:, 0], self.num_classes))
        outputs, _ = theano.scan(fn = norm_fn, \
         sequences = [f.dimshuffle(1, 0, 2)[1: ], self.mask_input.dimshuffle(1, 0)[1: ], self.label_input.dimshuffle(1, 0)[1:]], \
         outputs_info = initial, non_sequences = [self.W_sim], strict = True)
        norm = T.sum(theano_logsumexp(outputs[-1], axis = 1))

        f_pot = (f.reshape((-1, f.shape[-1]))[T.arange(f.shape[0] * f.shape[1]), self.label_input.flatten()] * self.mask_input.flatten()).sum()
        if CRF_INIT:
            f_pot += self.W_init[0][self.label_input[:, 0]].sum()

        labels = self.label_input
        # labels: inst * time
        shift_labels = T.roll(labels, -1, axis = 1)
        mask = self.mask_input
        # mask : inst * time
        shift_mask = T.roll(mask, -1, axis = 1)

        g_pot = (self.W_sim[labels.flatten(), shift_labels.flatten()] * mask.flatten() * shift_mask.flatten()).sum()

        return - (f_pot + g_pot - norm) / f.shape[0]
项目:fg-gating    作者:kimiyoung    | 项目源码 | 文件源码
def get_output_for(self, input, **kwargs):
        def norm_fn(f, mask, label, previous, W_sim):
            # f: batch * class, mask: batch, label: batch, previous: batch * class, W_sim: class * class
            # previous: batch * class

            next = previous.dimshuffle(0, 1, 'x') + f.dimshuffle(0, 'x', 1) + W_sim.dimshuffle('x', 0, 1) # batch * class * class
            next = theano_logsumexp(next, axis = 1) # batch * class
            mask = mask.dimshuffle(0, 'x')
            next = previous * (1.0 - mask) + next * mask
            return next

        f = input # batch * time * class
        if self.end_points:
            for i in range(self.num_classes):
                f = T.inc_subtensor(f[:, 0, i], self.W_end_points[0, i])
                f = T.inc_subtensor(f[:, -1, i], self.W_end_points[1, i])

        initial = f[:, 0, :]
        outputs, _ = theano.scan(fn = norm_fn, \
         sequences = [f.dimshuffle(1, 0, 2)[1: ], self.mask_input.dimshuffle(1, 0)[1: ], self.label_input.dimshuffle(1, 0)[1:]], \
         outputs_info = initial, non_sequences = [self.W_sim], strict = True)
        norm = T.sum(theano_logsumexp(outputs[-1], axis = 1))

        f_pot = (f.reshape((-1, f.shape[-1]))[T.arange(f.shape[0] * f.shape[1]), self.label_input.flatten()] * self.mask_input.flatten()).sum()

        labels = self.label_input # batch * time
        shift_labels = T.roll(labels, -1, axis = 1)
        mask = self.mask_input # batch * time
        shift_mask = T.roll(mask, -1, axis = 1)

        g_pot = (self.W_sim[labels.flatten(), shift_labels.flatten()] * mask.flatten() * shift_mask.flatten()).sum()

        return - (f_pot + g_pot - norm) / f.shape[0] if self.normalize else - (f_pot + g_pot - norm)
项目:lstmprovisor-python    作者:Impro-Visor    | 项目源码 | 文件源码
def decode_to_probs(self, activations, relative_position, low_bound, high_bound):
        squashed = T.reshape(activations, (-1,self.RAW_ENCODING_WIDTH))
        n_parallel = squashed.shape[0]
        probs = T.nnet.softmax(squashed)

        def _scan_fn(cprobs, cpos):

            if self.with_artic:
                abs_probs = cprobs[:2]
                rel_probs = cprobs[2:]
                rel_probs = cprobs
                abs_probs = T.ones((2,))

            aligned = T.roll(rel_probs, (cpos-low_bound)%12)

            num_tile = int(math.ceil((high_bound-low_bound)/self.WINDOW_SIZE))

            tiled = T.tile(aligned, (num_tile,))[:(high_bound-low_bound)]

            full = T.concatenate([abs_probs, tiled], 0)
            return full

        # probs = theano.printing.Print("probs",['shape'])(probs)
        # relative_position = theano.printing.Print("relative_position",['shape'])(relative_position)
        from_scan, _ =, sequences=[probs, T.flatten(relative_position)])
        # from_scan = theano.printing.Print("from_scan",['shape'])(from_scan)
        newshape = T.concatenate([activations.shape[:-1],[2+high_bound-low_bound]],0)
        fixed = T.reshape(from_scan, newshape, ndim=activations.ndim)
        return fixed
项目:lstmprovisor-python    作者:Impro-Visor    | 项目源码 | 文件源码
def __init__(self, input_parts, layer_sizes, output_size, window_size=0, dropout=0, mode="drop", unroll_batch_num=None):
            input_parts: A list of InputParts
            layer_sizes: A list of the form [ (indep, per_note), ... ] where
                    indep is the number of non-shifted cells to have, and
                    per_note is the number of cells to have per window note, which shift as the
                        network moves
                    Alternately can just be [ indep, ... ]
            output_size: An integer, the width of the desired output
            dropout: How much dropout to apply.
            mode: Either "drop" or "roll". If drop, discard memory that goes out of range. If roll, roll it instead

        self.input_parts = input_parts
        self.window_size = window_size

        layer_sizes = [x if isinstance(x,tuple) else (x,0) for x in layer_sizes]
        self.layer_sizes = layer_sizes
        self.tot_layer_sizes = [(indep + per_note*self.window_size) for indep, per_note in layer_sizes]

        self.output_size = output_size
        self.dropout = dropout

        self.input_size = sum(part.PART_WIDTH for part in input_parts)

        self.cells = StackedCells( self.input_size, celltype=LSTM, activation=T.tanh, layers = self.tot_layer_sizes )
        self.cells.layers.append(Layer(self.tot_layer_sizes[-1], self.output_size, activation = lambda x:x))

        assert mode in ("drop", "roll"), "Must specify either drop or roll mode"
        self.mode = mode

        self.unroll_batch_num = unroll_batch_num
项目:sgnmt    作者:ucam-smt    | 项目源码 | 文件源码
def cost_matrix(self, application_call, outputs, mask=None, **kwargs):
        """Adapted from ``BaseSequenceGenerator.cost_matrix``
        # We assume the data has axes (time, batch, features, ...)
        batch_size = outputs.shape[1]

        # Prepare input for the iterative part
        states = dict_subset(kwargs, self._state_names, must_have=False)
        # masks in context are optional (e.g. `attended_mask`)
        contexts = dict_subset(kwargs, self._context_names, must_have=False)
        feedback =
        inputs = self.fork.apply(feedback, as_dict=True)

        # Run the recurrent network
        results = self.transition.apply(
            mask=mask, return_initial_states=True, as_dict=True,
            **dict_union(inputs, states, contexts))

        # Separate the deliverables. The last states are discarded: they
        # are not used to predict any output symbol. The initial glimpses
        # are discarded because they are not used for prediction.
        # Remember, glimpses are computed _before_ output stage, states are
        # computed after.
        states = {name: results[name][:-1] for name in self._state_names}
        glimpses = {name: results[name][1:] for name in self._glimpse_names}

        # Compute the cost
        feedback = tensor.roll(feedback, 1, 0)
        feedback = tensor.set_subtensor(
        readouts = self.readout.readout(
            feedback=feedback, **dict_union(states, glimpses, contexts))
        costs = self.readout.cost(readouts, outputs)
        if mask is not None:
            costs *= mask

        for name, variable in list(glimpses.items()) + list(states.items()):
                variable.copy(), name=name)

        # This variables can be used to initialize the initial states of the
        # next batch using the last states of the current batch.
        for name in self._state_names:
                results[name][-1].copy(), name=name+"_final_value")

        if not self.pruning_variables_initialized:
            self.results = results
            self.pruning_variables_initialized = True
        return costs
项目:DCNMT    作者:SwordYork    | 项目源码 | 文件源码
def cost_matrix_nmt(self, application_call, target_char_seq, target_sample_matrix, target_resample_matrix,
                        target_word_mask, target_char_aux, target_prev_char_seq, target_prev_char_aux, **kwargs):
        """Returns generation costs for output sequences.

        See Also
        :meth:`cost` : Scalar cost.

        # We assume the data has axes (time, batch, features, ...)
        batch_size = target_char_seq.shape[1]

        # Prepare input for the iterative part
        states = dict_subset(kwargs, self._state_names, must_have=False)
        # masks in context are optional (e.g. `attended_mask`)
        contexts = dict_subset(kwargs, self._context_names, must_have=False)
        feedback = self.readout.feedback_apply(target_char_seq, target_sample_matrix, target_char_aux)
        inputs = self.fork.apply(feedback, as_dict=True)

        # Run the recurrent network
        results = self.transition.apply(
            mask=target_word_mask, return_initial_states=True, as_dict=True,
            **dict_union(inputs, states, contexts))

        # Separate the deliverables. The last states are discarded: they
        # are not used to predict any output symbol. The initial glimpses
        # are discarded because they are not used for prediction.
        # Remember, glimpses are computed _before_ output stage, states are
        # computed after.
        states = {name: results[name][:-1] for name in self._state_names}
        glimpses = {name: results[name][1:] for name in self._glimpse_names}

        feedback = tensor.roll(feedback, 1, 0)
        init_feedback = self.readout.single_feedback(self.readout.initial_outputs(batch_size), batch_size)
        if self.trg_dgru_depth == 1:
            feedback = tensor.set_subtensor(feedback[0], init_feedback)
            feedback = tensor.set_subtensor(feedback[0], init_feedback[-1])

        decoder_readout_outputs = self.readout.readout(
            feedback=feedback, **dict_union(states, glimpses, contexts))
        resampled_representation = tensor.batched_dot(target_resample_matrix,
                                                      decoder_readout_outputs.dimshuffle([1, 0, 2]))
        resampled_readouts = resampled_representation.dimshuffle([1, 0, 2])
        readouts_chars = self.readout.readout_gru(target_prev_char_seq, target_prev_char_aux, resampled_readouts)

        # Compute the cost
        costs = self.readout.cost(readouts_chars, target_char_seq)

        for name, variable in list(glimpses.items()) + list(states.items()):
                variable.copy(), name=name)

        # This variables can be used to initialize the initial states of the
        # next batch using the last states of the current batch.
        for name in self._state_names + self._glimpse_names:
                results[name][-1].copy(), name=name + "_final_value")

        return costs