Python six.moves 模块,xrange() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用six.moves.xrange()

项目:paragraph2vec    作者:thunlp    | 项目源码 | 文件源码
def update_expectations(self):
        """
        Since we're doing lazy updates on lambda, at any given moment
        the current state of lambda may not be accurate. This function
        updates all of the elements of lambda and Elogbeta
        so that if (for example) we want to print out the
        topics we've learned we'll get the correct behavior.
        """
        for w in xrange(self.m_W):
            self.m_lambda[:, w] *= np.exp(self.m_r[-1] -
                                          self.m_r[self.m_timestamp[w]])
        self.m_Elogbeta = sp.psi(self.m_eta + self.m_lambda) - \
            sp.psi(self.m_W * self.m_eta + self.m_lambda_sum[:, np.newaxis])

        self.m_timestamp[:] = self.m_updatect
        self.m_status_up_to_date = True
项目:workflows.kyoyue    作者:wizyoung    | 项目源码 | 文件源码
def _lost_point_level2(modules, modules_count):
    lost_point = 0

    modules_range = xrange(modules_count - 1)

    for row in modules_range:
        this_row = modules[row]
        next_row = modules[row+1]
        for col in modules_range:
            count = 0
            if this_row[col]:
                count += 1
            if next_row[col]:
                count += 1
            if this_row[col + 1]:
                count += 1
            if next_row[col + 1]:
                count += 1
            if count == 0 or count == 4:
                lost_point += 3

    return lost_point
项目:workflows.kyoyue    作者:wizyoung    | 项目源码 | 文件源码
def write(self, buffer):
        if self.mode == MODE_NUMBER:
            for i in xrange(0, len(self.data), 3):
                chars = self.data[i:i + 3]
                bit_length = NUMBER_LENGTH[len(chars)]
                buffer.put(int(chars), bit_length)
        elif self.mode == MODE_ALPHA_NUM:
            for i in xrange(0, len(self.data), 2):
                chars = self.data[i:i + 2]
                if len(chars) > 1:
                    buffer.put(
                        ALPHA_NUM.find(chars[0]) * 45 +
                        ALPHA_NUM.find(chars[1]), 11)
                else:
                    buffer.put(ALPHA_NUM.find(chars), 6)
        else:
            if six.PY3:
                # Iterating a bytestring in Python 3 returns an integer,
                # no need to ord().
                data = self.data
            else:
                data = [ord(c) for c in self.data]
            for c in data:
                buffer.put(c, 8)
项目:paragraph2vec    作者:thunlp    | 项目源码 | 文件源码
def lda_e_step(doc_word_ids, doc_word_counts, alpha, beta, max_iter=100):
    gamma = np.ones(len(alpha))
    expElogtheta = np.exp(dirichlet_expectation(gamma))
    betad = beta[:, doc_word_ids]
    phinorm = np.dot(expElogtheta, betad) + 1e-100
    counts = np.array(doc_word_counts)
    for _ in xrange(max_iter):
        lastgamma = gamma

        gamma = alpha + expElogtheta * np.dot(counts / phinorm, betad.T)
        Elogtheta = dirichlet_expectation(gamma)
        expElogtheta = np.exp(Elogtheta)
        phinorm = np.dot(expElogtheta, betad) + 1e-100
        meanchange = np.mean(abs(gamma - lastgamma))
        if (meanchange < meanchangethresh):
            break

    likelihood = np.sum(counts * np.log(phinorm))
    likelihood += np.sum((alpha - gamma) * Elogtheta)
    likelihood += np.sum(sp.gammaln(gamma) - sp.gammaln(alpha))
    likelihood += sp.gammaln(np.sum(alpha)) - sp.gammaln(np.sum(gamma))

    return (likelihood, gamma)
项目:paragraph2vec    作者:thunlp    | 项目源码 | 文件源码
def hdp_to_lda(self):
        """
        Compute the LDA almost equivalent HDP.
        """
        # alpha
        sticks = self.m_var_sticks[0] / (self.m_var_sticks[0] + self.m_var_sticks[1])
        alpha = np.zeros(self.m_T)
        left = 1.0
        for i in xrange(0, self.m_T - 1):
            alpha[i] = sticks[i] * left
            left = left - alpha[i]
        alpha[self.m_T - 1] = left
        alpha = alpha * self.m_alpha

        # beta
        beta = (self.m_lambda + self.m_eta) / (self.m_W * self.m_eta + \
                self.m_lambda_sum[:, np.newaxis])

        return (alpha, beta)
项目:paragraph2vec    作者:thunlp    | 项目源码 | 文件源码
def init_sims(self, replace=False):
        """
        Precompute L2-normalized vectors.

        If `replace` is set, forget the original vectors and only keep the normalized
        ones = saves lots of memory!

        Note that you **cannot continue training** after doing a replace. The model becomes
        effectively read-only = you can call `most_similar`, `similarity` etc., but not `train`.

        """
        if getattr(self, 'syn0norm', None) is None or replace:
            logger.info("precomputing L2-norms of word weight vectors")
            if replace:
                for i in xrange(self.syn0.shape[0]):
                    self.syn0[i, :] /= sqrt((self.syn0[i, :] ** 2).sum(-1))
                self.syn0norm = self.syn0
                if hasattr(self, 'syn1'):
                    del self.syn1
            else:
                self.syn0norm = (self.syn0 / sqrt((self.syn0 ** 2).sum(-1))[..., newaxis]).astype(REAL)
项目:paragraph2vec    作者:thunlp    | 项目源码 | 文件源码
def iter_chunks(self, chunksize=None):
        """
        Iteratively yield the index as chunks of documents, each of size <= chunksize.

        The chunk is returned in its raw form (matrix or sparse matrix slice).
        The size of the chunk may be smaller than requested; it is up to the caller
        to check the result for real length, using `chunk.shape[0]`.
        """
        self.close_shard()

        if chunksize is None:
            # if not explicitly specified, use the chunksize from the constructor
            chunksize = self.chunksize

        for shard in self.shards:
            query = shard.get_index().index
            for chunk_start in xrange(0, query.shape[0], chunksize):
                # scipy.sparse doesn't allow slicing beyond real size of the matrix
                # (unlike numpy). so, clip the end of the chunk explicitly to make
                # scipy.sparse happy
                chunk_end = min(query.shape[0], chunk_start + chunksize)
                chunk = query[chunk_start: chunk_end] # create a view
                yield chunk
项目:ecpy    作者:elliptic-shiho    | 项目源码 | 文件源码
def _mul(s, A, B):
    if len(A) == 1 and len(A[0]) == 1:
      A = A[0][0]
      return s.element_class(s, map(lambda y: map(lambda x: A * x, y), B))
    elif len(B) == 1 and len(B[0]) == 1:
      B = B[0][0]
      return s.element_class(s, map(lambda y: map(lambda x: x * B, y), A))
    deg_total_1 = max([len(X) + len(Y) - 1 for X, Y in itertools.product(A, B)])
    deg_total_2 = len(list(itertools.product(A, B)))
    ret = [[0] * deg_total_1 for _ in xrange(deg_total_2)]
    deg1 = 0
    for X in A:
      deg2 = 0
      for Y in B:
        for x, y in enumerate(X):
          for u, v in enumerate(Y):
            ret[deg1 + deg2][x + u] += y * v
        deg2 += 1
      deg1 += 1
    return s.element_class(s, ret)
项目:source_separation_ml_jeju    作者:hjkwon0609    | 项目源码 | 文件源码
def bss_eval_global(mixed_wav, src1_wav, src2_wav, pred_src1_wav, pred_src2_wav):
    len_cropped = pred_src1_wav.shape[-1]
    src1_wav = src1_wav[:len_cropped]
    src2_wav = src2_wav[:len_cropped]
    mixed_wav = mixed_wav[:len_cropped]
    gnsdr, gsir, gsar = np.zeros(2), np.zeros(2), np.zeros(2)
    total_len = 0
    # for i in range(2):
    sdr, sir, sar, _ = bss_eval_sources(np.array([src1_wav, src2_wav]),
                                        np.array([pred_src1_wav, pred_src2_wav]), True)
    sdr_mixed, _, _, _ = bss_eval_sources(np.array([src1_wav, src2_wav]),
                                          np.array([mixed_wav, mixed_wav]), True)
    nsdr = sdr - sdr_mixed
    gnsdr += len_cropped * nsdr
    gsir += len_cropped * sir
    gsar += len_cropped * sar
    total_len += len_cropped
    gnsdr = gnsdr / total_len
    gsir = gsir / total_len
    gsar = gsar / total_len
    return gnsdr, gsir, gsar
项目:tefla    作者:litan    | 项目源码 | 文件源码
def _enqueue_thread_fn(self, sess, coord, start_epoch, data_set):
        training_X, training_y, validation_X, validation_y = \
            data_set.training_X, data_set.training_y, data_set.validation_X, data_set.validation_y

        for epoch in moves.xrange(start_epoch, self.num_epochs + 1):
            np.random.seed(epoch + self.seed_delta)
            for batch_data in self.training_iterator(training_X, training_y):
                if coord.should_stop():
                    return
                sess.run(self.enqueue_op,
                         feed_dict={self.batch_X: batch_data[0],
                                    self.batch_y: self._adjust_ground_truth(batch_data[1])})

            for batch_data in self.validation_iterator(validation_X, validation_y):
                if coord.should_stop():
                    return
                sess.run(self.enqueue_op,
                         feed_dict={self.batch_X: batch_data[0],
                                    self.batch_y: self._adjust_ground_truth(batch_data[1])})
项目:wtte-rnn    作者:ragulpr    | 项目源码 | 文件源码
def df_to_padded_padded_to_df_runner(t_col):
        n_seqs = 5
        max_seq_length = 10
        ids = xrange(n_seqs)
        cols_to_expand = ['event', 'int_column', 'double_column']
        np.random.seed(1)

        df = generate_random_df(n_seqs, max_seq_length)
        df = df.reset_index(drop=True)

        # Column names to transform to tensor
        dtypes = df[cols_to_expand].dtypes.values
        padded = df_to_padded(df, cols_to_expand, 'id', t_col)

        df_new = padded_to_df(padded, cols_to_expand,
                              dtypes, ids, 'id', t_col)
        # Pandas is awful. Index changes when slicing
        df = df[['id', t_col] + cols_to_expand].reset_index(drop=True)
        pd.util.testing.assert_frame_equal(df, df_new)
项目:wtte-rnn    作者:ragulpr    | 项目源码 | 文件源码
def padded_events_to_not_censored(events, discrete_time):
    seq_lengths = get_padded_seq_lengths(events)
    n_seqs = events.shape[0]
    is_not_censored = np.copy(events)

    for i in xrange(n_seqs):
        if seq_lengths[i] > 0:
            is_not_censored[i][:seq_lengths[i]] = get_is_not_censored(
                events[i][:seq_lengths[i]], discrete_time)
    return is_not_censored

# MISC / Data munging

# def df_to_padded_memcost(df, id_col='id', t_col='t'):
#     """
#         Calculates memory cost of padded using the alternative routes.
#         # number of arrays = features+tte+u = n_features+2
#         # To list? Pad betweeen?
#         # To array ->(pad after)
#     """

#     print('Not yet implemented')
#     return None
项目:wtte-rnn    作者:ragulpr    | 项目源码 | 文件源码
def get_is_not_censored(is_event, discrete_time=True):
    """ Calculates non-censoring indicator `u` for one vector.

        :param array is_event: logical or numeric array indicating event.
        :param Boolean discrete_time: if `True`, last observation is conditionally censored.
    """
    n = len(is_event)
    is_not_censored = np.copy(is_event)

    if discrete_time:
        # Last obs is conditionally censored
        event_seen = is_event[-1]
        for i in reversed(xrange(n)):
            if is_event[i] and not event_seen:
                event_seen = is_event[i]
            is_not_censored[i] = event_seen
    else:
        # Last obs is always censored
        event_seen = False
        for i in reversed(xrange(n)):
            is_not_censored[i] = event_seen
            if is_event[i] and not event_seen:
                event_seen = is_event[i]

    return is_not_censored
项目:topical_word_embeddings    作者:thunlp    | 项目源码 | 文件源码
def lda_e_step(doc_word_ids, doc_word_counts, alpha, beta, max_iter=100):
    gamma = np.ones(len(alpha))
    expElogtheta = np.exp(dirichlet_expectation(gamma))
    betad = beta[:, doc_word_ids]
    phinorm = np.dot(expElogtheta, betad) + 1e-100
    counts = np.array(doc_word_counts)
    for _ in xrange(max_iter):
        lastgamma = gamma

        gamma = alpha + expElogtheta * np.dot(counts / phinorm, betad.T)
        Elogtheta = dirichlet_expectation(gamma)
        expElogtheta = np.exp(Elogtheta)
        phinorm = np.dot(expElogtheta, betad) + 1e-100
        meanchange = np.mean(abs(gamma - lastgamma))
        if (meanchange < meanchangethresh):
            break

    likelihood = np.sum(counts * np.log(phinorm))
    likelihood += np.sum((alpha - gamma) * Elogtheta)
    likelihood += np.sum(sp.gammaln(gamma) - sp.gammaln(alpha))
    likelihood += sp.gammaln(np.sum(alpha)) - sp.gammaln(np.sum(gamma))

    return (likelihood, gamma)
项目:topical_word_embeddings    作者:thunlp    | 项目源码 | 文件源码
def update_expectations(self):
        """
        Since we're doing lazy updates on lambda, at any given moment
        the current state of lambda may not be accurate. This function
        updates all of the elements of lambda and Elogbeta
        so that if (for example) we want to print out the
        topics we've learned we'll get the correct behavior.
        """
        for w in xrange(self.m_W):
            self.m_lambda[:, w] *= np.exp(self.m_r[-1] -
                                          self.m_r[self.m_timestamp[w]])
        self.m_Elogbeta = sp.psi(self.m_eta + self.m_lambda) - \
            sp.psi(self.m_W * self.m_eta + self.m_lambda_sum[:, np.newaxis])

        self.m_timestamp[:] = self.m_updatect
        self.m_status_up_to_date = True
项目:topical_word_embeddings    作者:thunlp    | 项目源码 | 文件源码
def hdp_to_lda(self):
        """
        Compute the LDA almost equivalent HDP.
        """
        # alpha
        sticks = self.m_var_sticks[0] / (self.m_var_sticks[0] + self.m_var_sticks[1])
        alpha = np.zeros(self.m_T)
        left = 1.0
        for i in xrange(0, self.m_T - 1):
            alpha[i] = sticks[i] * left
            left = left - alpha[i]
        alpha[self.m_T - 1] = left
        alpha = alpha * self.m_alpha

        # beta
        beta = (self.m_lambda + self.m_eta) / (self.m_W * self.m_eta + \
                self.m_lambda_sum[:, np.newaxis])

        return (alpha, beta)
项目:topical_word_embeddings    作者:thunlp    | 项目源码 | 文件源码
def init_sims(self, replace=False):
        """
        Precompute L2-normalized vectors.

        If `replace` is set, forget the original vectors and only keep the normalized
        ones = saves lots of memory!

        Note that you **cannot continue training** after doing a replace. The model becomes
        effectively read-only = you can call `most_similar`, `similarity` etc., but not `train`.

        """
        if getattr(self, 'syn0norm', None) is None or replace:
            logger.info("precomputing L2-norms of word weight vectors")
            if replace:
                for i in xrange(self.syn0.shape[0]):
                    self.syn0[i, :] /= sqrt((self.syn0[i, :] ** 2).sum(-1))
                self.syn0norm = self.syn0
                if hasattr(self, 'syn1'):
                    del self.syn1
            else:
                self.syn0norm = (self.syn0 / sqrt((self.syn0 ** 2).sum(-1))[..., newaxis]).astype(REAL)
项目:topical_word_embeddings    作者:thunlp    | 项目源码 | 文件源码
def compactify(self):
        """
        Assign new word ids to all words.

        This is done to make the ids more compact, e.g. after some tokens have
        been removed via :func:`filter_tokens` and there are gaps in the id series.
        Calling this method will remove the gaps.
        """
        logger.debug("rebuilding dictionary, shrinking gaps")

        # build mapping from old id -> new id
        idmap = dict(izip(itervalues(self.token2id), xrange(len(self.token2id))))

        # reassign mappings to new ids
        self.token2id = dict((token, idmap[tokenid]) for token, tokenid in iteritems(self.token2id))
        self.id2token = {}
        self.dfs = dict((idmap[tokenid], freq) for tokenid, freq in iteritems(self.dfs))
项目:topical_word_embeddings    作者:thunlp    | 项目源码 | 文件源码
def iter_chunks(self, chunksize=None):
        """
        Iteratively yield the index as chunks of documents, each of size <= chunksize.

        The chunk is returned in its raw form (matrix or sparse matrix slice).
        The size of the chunk may be smaller than requested; it is up to the caller
        to check the result for real length, using `chunk.shape[0]`.
        """
        self.close_shard()

        if chunksize is None:
            # if not explicitly specified, use the chunksize from the constructor
            chunksize = self.chunksize

        for shard in self.shards:
            query = shard.get_index().index
            for chunk_start in xrange(0, query.shape[0], chunksize):
                # scipy.sparse doesn't allow slicing beyond real size of the matrix
                # (unlike numpy). so, clip the end of the chunk explicitly to make
                # scipy.sparse happy
                chunk_end = min(query.shape[0], chunk_start + chunksize)
                chunk = query[chunk_start: chunk_end] # create a view
                yield chunk
项目:topical_word_embeddings    作者:thunlp    | 项目源码 | 文件源码
def lda_e_step(doc_word_ids, doc_word_counts, alpha, beta, max_iter=100):
    gamma = np.ones(len(alpha))
    expElogtheta = np.exp(dirichlet_expectation(gamma))
    betad = beta[:, doc_word_ids]
    phinorm = np.dot(expElogtheta, betad) + 1e-100
    counts = np.array(doc_word_counts)
    for _ in xrange(max_iter):
        lastgamma = gamma

        gamma = alpha + expElogtheta * np.dot(counts / phinorm, betad.T)
        Elogtheta = dirichlet_expectation(gamma)
        expElogtheta = np.exp(Elogtheta)
        phinorm = np.dot(expElogtheta, betad) + 1e-100
        meanchange = np.mean(abs(gamma - lastgamma))
        if (meanchange < meanchangethresh):
            break

    likelihood = np.sum(counts * np.log(phinorm))
    likelihood += np.sum((alpha - gamma) * Elogtheta)
    likelihood += np.sum(sp.gammaln(gamma) - sp.gammaln(alpha))
    likelihood += sp.gammaln(np.sum(alpha)) - sp.gammaln(np.sum(gamma))

    return (likelihood, gamma)
项目:topical_word_embeddings    作者:thunlp    | 项目源码 | 文件源码
def update_expectations(self):
        """
        Since we're doing lazy updates on lambda, at any given moment
        the current state of lambda may not be accurate. This function
        updates all of the elements of lambda and Elogbeta
        so that if (for example) we want to print out the
        topics we've learned we'll get the correct behavior.
        """
        for w in xrange(self.m_W):
            self.m_lambda[:, w] *= np.exp(self.m_r[-1] -
                                          self.m_r[self.m_timestamp[w]])
        self.m_Elogbeta = sp.psi(self.m_eta + self.m_lambda) - \
            sp.psi(self.m_W * self.m_eta + self.m_lambda_sum[:, np.newaxis])

        self.m_timestamp[:] = self.m_updatect
        self.m_status_up_to_date = True
项目:topical_word_embeddings    作者:thunlp    | 项目源码 | 文件源码
def hdp_to_lda(self):
        """
        Compute the LDA almost equivalent HDP.
        """
        # alpha
        sticks = self.m_var_sticks[0] / (self.m_var_sticks[0] + self.m_var_sticks[1])
        alpha = np.zeros(self.m_T)
        left = 1.0
        for i in xrange(0, self.m_T - 1):
            alpha[i] = sticks[i] * left
            left = left - alpha[i]
        alpha[self.m_T - 1] = left
        alpha = alpha * self.m_alpha

        # beta
        beta = (self.m_lambda + self.m_eta) / (self.m_W * self.m_eta + \
                self.m_lambda_sum[:, np.newaxis])

        return (alpha, beta)
项目:topical_word_embeddings    作者:thunlp    | 项目源码 | 文件源码
def init_sims(self, replace=False):
        """
        Precompute L2-normalized vectors.

        If `replace` is set, forget the original vectors and only keep the normalized
        ones = saves lots of memory!

        Note that you **cannot continue training** after doing a replace. The model becomes
        effectively read-only = you can call `most_similar`, `similarity` etc., but not `train`.

        """
        if getattr(self, 'syn0norm', None) is None or replace:
            logger.info("precomputing L2-norms of word weight vectors")
            if replace:
                for i in xrange(self.syn0.shape[0]):
                    self.syn0[i, :] /= sqrt((self.syn0[i, :] ** 2).sum(-1))
                self.syn0norm = self.syn0
                if hasattr(self, 'syn1'):
                    del self.syn1
            else:
                self.syn0norm = (self.syn0 / sqrt((self.syn0 ** 2).sum(-1))[..., newaxis]).astype(REAL)
项目:topical_word_embeddings    作者:thunlp    | 项目源码 | 文件源码
def compactify(self):
        """
        Assign new word ids to all words.

        This is done to make the ids more compact, e.g. after some tokens have
        been removed via :func:`filter_tokens` and there are gaps in the id series.
        Calling this method will remove the gaps.
        """
        logger.debug("rebuilding dictionary, shrinking gaps")

        # build mapping from old id -> new id
        idmap = dict(izip(itervalues(self.token2id), xrange(len(self.token2id))))

        # reassign mappings to new ids
        self.token2id = dict((token, idmap[tokenid]) for token, tokenid in iteritems(self.token2id))
        self.id2token = {}
        self.dfs = dict((idmap[tokenid], freq) for tokenid, freq in iteritems(self.dfs))
项目:topical_word_embeddings    作者:thunlp    | 项目源码 | 文件源码
def iter_chunks(self, chunksize=None):
        """
        Iteratively yield the index as chunks of documents, each of size <= chunksize.

        The chunk is returned in its raw form (matrix or sparse matrix slice).
        The size of the chunk may be smaller than requested; it is up to the caller
        to check the result for real length, using `chunk.shape[0]`.
        """
        self.close_shard()

        if chunksize is None:
            # if not explicitly specified, use the chunksize from the constructor
            chunksize = self.chunksize

        for shard in self.shards:
            query = shard.get_index().index
            for chunk_start in xrange(0, query.shape[0], chunksize):
                # scipy.sparse doesn't allow slicing beyond real size of the matrix
                # (unlike numpy). so, clip the end of the chunk explicitly to make
                # scipy.sparse happy
                chunk_end = min(query.shape[0], chunk_start + chunksize)
                chunk = query[chunk_start: chunk_end] # create a view
                yield chunk
项目:topical_word_embeddings    作者:thunlp    | 项目源码 | 文件源码
def lda_e_step(doc_word_ids, doc_word_counts, alpha, beta, max_iter=100):
    gamma = np.ones(len(alpha))
    expElogtheta = np.exp(dirichlet_expectation(gamma))
    betad = beta[:, doc_word_ids]
    phinorm = np.dot(expElogtheta, betad) + 1e-100
    counts = np.array(doc_word_counts)
    for _ in xrange(max_iter):
        lastgamma = gamma

        gamma = alpha + expElogtheta * np.dot(counts / phinorm, betad.T)
        Elogtheta = dirichlet_expectation(gamma)
        expElogtheta = np.exp(Elogtheta)
        phinorm = np.dot(expElogtheta, betad) + 1e-100
        meanchange = np.mean(abs(gamma - lastgamma))
        if (meanchange < meanchangethresh):
            break

    likelihood = np.sum(counts * np.log(phinorm))
    likelihood += np.sum((alpha - gamma) * Elogtheta)
    likelihood += np.sum(sp.gammaln(gamma) - sp.gammaln(alpha))
    likelihood += sp.gammaln(np.sum(alpha)) - sp.gammaln(np.sum(gamma))

    return (likelihood, gamma)
项目:topical_word_embeddings    作者:thunlp    | 项目源码 | 文件源码
def update_expectations(self):
        """
        Since we're doing lazy updates on lambda, at any given moment
        the current state of lambda may not be accurate. This function
        updates all of the elements of lambda and Elogbeta
        so that if (for example) we want to print out the
        topics we've learned we'll get the correct behavior.
        """
        for w in xrange(self.m_W):
            self.m_lambda[:, w] *= np.exp(self.m_r[-1] -
                                          self.m_r[self.m_timestamp[w]])
        self.m_Elogbeta = sp.psi(self.m_eta + self.m_lambda) - \
            sp.psi(self.m_W * self.m_eta + self.m_lambda_sum[:, np.newaxis])

        self.m_timestamp[:] = self.m_updatect
        self.m_status_up_to_date = True
项目:topical_word_embeddings    作者:thunlp    | 项目源码 | 文件源码
def init_sims(self, replace=False):
        """
        Precompute L2-normalized vectors.

        If `replace` is set, forget the original vectors and only keep the normalized
        ones = saves lots of memory!

        Note that you **cannot continue training** after doing a replace. The model becomes
        effectively read-only = you can call `most_similar`, `similarity` etc., but not `train`.

        """
        if getattr(self, 'syn0norm', None) is None or replace:
            logger.info("precomputing L2-norms of word weight vectors")
            if replace:
                for i in xrange(self.syn0.shape[0]):
                    self.syn0[i, :] /= sqrt((self.syn0[i, :] ** 2).sum(-1))
                self.syn0norm = self.syn0
                if hasattr(self, 'syn1'):
                    del self.syn1
            else:
                self.syn0norm = (self.syn0 / sqrt((self.syn0 ** 2).sum(-1))[..., newaxis]).astype(REAL)
项目:topical_word_embeddings    作者:thunlp    | 项目源码 | 文件源码
def compactify(self):
        """
        Assign new word ids to all words.

        This is done to make the ids more compact, e.g. after some tokens have
        been removed via :func:`filter_tokens` and there are gaps in the id series.
        Calling this method will remove the gaps.
        """
        logger.debug("rebuilding dictionary, shrinking gaps")

        # build mapping from old id -> new id
        idmap = dict(izip(itervalues(self.token2id), xrange(len(self.token2id))))

        # reassign mappings to new ids
        self.token2id = dict((token, idmap[tokenid]) for token, tokenid in iteritems(self.token2id))
        self.id2token = {}
        self.dfs = dict((idmap[tokenid], freq) for tokenid, freq in iteritems(self.dfs))
项目:keras-gp    作者:alshedivat    | 项目源码 | 文件源码
def test_fit(epochs=10, seed=42):
    rng = np.random.RandomState(seed)

    for nb_outputs in [1, 2]:
        # Generate dummy data
        X_tr = rng.normal(size=(N, input_shape[0], input_shape[1]))
        Y_tr = [rng.normal(size=(N, 1)) for _ in xrange(nb_outputs)]

        # Build & compile the model
        model = build_model(nb_outputs)
        loss = [gen_gp_loss(gp) for gp in model.output_gp_layers]
        model.compile(optimizer=optimizer, loss=loss)

        # Train the model
        model.fit(X_tr, Y_tr,
                  epochs=epochs,
                  batch_size=batch_size,
                  verbose=2)
项目:keras-gp    作者:alshedivat    | 项目源码 | 文件源码
def test_finetune(gp_n_iter=10, seed=42):
    rng = np.random.RandomState(seed)

    for nb_outputs in [1, 2]:
        # Generate dummy data
        X_tr = rng.normal(size=(N, input_shape[0], input_shape[1]))
        Y_tr = [rng.normal(size=(N, 1)) for _ in xrange(nb_outputs)]

        # Build & compile the model
        model = build_model(nb_outputs)
        loss = [gen_gp_loss(gp) for gp in model.output_gp_layers]
        model.compile(optimizer=optimizer, loss=loss)

        # Finetune the model
        model.finetune(X_tr, Y_tr,
                       batch_size=batch_size,
                       gp_n_iter=gp_n_iter,
                       verbose=0)
项目:keras-gp    作者:alshedivat    | 项目源码 | 文件源码
def test_predict(seed=42):
    rng = np.random.RandomState(seed)

    for nb_outputs in [1, 2]:
        # Generate dummy data
        X_tr = rng.normal(size=(N, input_shape[0], input_shape[1]))
        Y_tr = [rng.normal(size=(N, 1)) for _ in xrange(nb_outputs)]
        X_ts = rng.normal(size=(N, input_shape[0], input_shape[1]))
        Y_ts = [rng.normal(size=(N, 1)) for _ in xrange(nb_outputs)]

        # Build & compile the model
        model = build_model(nb_outputs)
        loss = [gen_gp_loss(gp) for gp in model.output_gp_layers]
        model.compile(optimizer=optimizer, loss=loss)

        # Predict
        Y_pr = model.predict(X_ts, X_tr, Y_tr,
                             batch_size=batch_size, verbose=0)
        assert type(Y_pr) is list
        assert len(Y_pr) == len(Y_ts)
        assert np.all([(yp.shape == yt.shape) for yp, yt in zip(Y_pr, Y_ts)])
项目:Tacotron    作者:MU94W    | 项目源码 | 文件源码
def __call__(self, inputs, is_training=True, scope=None):
        """
        Args:
            inputs: with shape -> (batch_size, time_step/width, units/channels)
        """
        with tf.variable_scope(scope or type(self).__name__):
            in_channels = inputs.shape[-1].value
            conv_lst = []
            for idk in xrange(1, self.K + 1):
                with tf.variable_scope('inner_conv_%d' % idk):
                    conv_k = self.activation(__conv1d_alone_time__(inputs, idk, in_channels, in_channels))
                conv_lst.append(conv_k)

            stacked_conv = tf.stack(conv_lst, axis=-1)   # shape -> (batch_size, time_step/width, units/channels, K/height)
            #re_shape = tf.shape(stacked_conv)[:2] + [1, in_channels * self.K]
            re_shape = [tf.shape(stacked_conv)[0], tf.shape(stacked_conv)[1], 1, in_channels * self.K]
            stacked_conv = tf.reshape(stacked_conv, shape=re_shape)     # shape -> (batch_size, time_step/width, 1, units*K/channels)

            ### max pool along time
            ksize = [1, 2, 1, 1]
            strid = [1, 1, 1, 1]
            pooled_conv = tf.squeeze(tf.nn.max_pool(stacked_conv, ksize, strid, 'SAME'), axis=2)    # shape -> (batch_size, time_step/width, units*K/channels)

            return pooled_conv
项目:Tacotron    作者:MU94W    | 项目源码 | 文件源码
def __call__(self, inputs, is_training=True, scope=None):
        """
        Args:
            inputs: with shape -> (batch_size, time_step/width, units/channels)
        """
        with tf.variable_scope(scope or type(self).__name__):
            in_channels = inputs.shape[-1].value
            conv_lst = []
            for idk in xrange(1, self.K + 1):
                with tf.variable_scope('inner_conv_%d' % idk):
                    conv_k = self.activation(__conv1d_alone_time__(inputs, idk, in_channels, in_channels))
                    norm_k = tf.contrib.layers.batch_norm(conv_k, is_training=is_training, updates_collections=None)
                conv_lst.append(norm_k)

            stacked_conv = tf.stack(conv_lst, axis=-1)   # shape -> (batch_size, time_step/width, units/channels, K/height)
            re_shape = [tf.shape(stacked_conv)[0], tf.shape(stacked_conv)[1], 1, in_channels * self.K]
            stacked_conv = tf.reshape(stacked_conv, shape=re_shape)     # shape -> (batch_size, time_step/width, 1, units*K/channels)

            ### max pool along time
            ksize = [1, 2, 1, 1]
            strid = [1, 1, 1, 1]
            pooled_conv = tf.squeeze(tf.nn.max_pool(stacked_conv, ksize, strid, 'SAME'), axis=2)    # shape -> (batch_size, time_step/width, units*K/channels)

            return pooled_conv
项目:TensorFlow101    作者:nihit    | 项目源码 | 文件源码
def eval_in_batches(self, data, session):
    """
    Get all predictions for a dataset by running it in small batches.
    """
    size = data.shape[0]
    eval_batch_size = self.config.EVAL_BATCH_SIZE
    if size < eval_batch_size:
      raise ValueError("batch size for evals larger than dataset: %d" % size)
    predictions = numpy.ndarray(shape=(size, self.config.NUM_LABELS), dtype=numpy.float32)
    for begin in xrange(0, size, eval_batch_size):
      end = begin + eval_batch_size
      if end <= size:
        predictions[begin:end, :] = session.run(
            self.eval_prediction,
            feed_dict=self.create_feed_dict(data[begin:end, ...]))
      else:
        batch_predictions = session.run(
            self.eval_prediction,
            feed_dict=self.create_feed_dict(data[-eval_batch_size:, ...]))
        predictions[begin:, :] = batch_predictions[begin - size:, :]
    return predictions
项目:TensorFlow101    作者:nihit    | 项目源码 | 文件源码
def featurize_in_batches(self, session, sample_size):
    """
    Get fully connected layer1 features by running it in small batches.
    """
    batch_size = self.config.EVAL_BATCH_SIZE
    if sample_size < batch_size:
      raise ValueError("batch size for evals larger than dataset: %d" % size)
    features = numpy.ndarray(shape=(sample_size, 512), dtype=numpy.float32)
    labels = numpy.ndarray(shape=(sample_size,),dtype=numpy.int64)
    for begin in xrange(0, sample_size, batch_size):
      end = begin + batch_size
      if end <= sample_size:
        features[begin:end, :] = session.run(
            self.featurize,
            feed_dict=self.create_feed_dict(self.train_data[begin:end, ...]))
        labels[begin:end] = self.train_labels[begin:end]
      else:
        features[end-batch_size:end,:] = session.run(
            self.featurize,
            feed_dict=self.create_feed_dict(self.train_data[end-batch_size:end, ...]))
        labels[end-batch_size:end] = self.train_labels[end-batch_size:end]
    return features, labels
项目:wiki-sem-500    作者:belph    | 项目源码 | 文件源码
def _from_word2vec_binary(fname):
    with _open(fname, 'rb') as fin:
      words = []
      header = _decode(fin.readline())
      vocab_size, layer1_size = list(map(int, header.split())) # throws for invalid file format
      vectors = np.zeros((vocab_size, layer1_size), dtype=float32)
      binary_len = np.dtype(float32).itemsize * layer1_size
      for line_no in xrange(vocab_size):
        # mixed text and binary: read text first, then binary
        word = []
        while True:
          ch = fin.read(1)
          if ch == b' ':
            break
          if ch != b'\n': # ignore newlines in front of words (some binary files have newline, some don't)
            word.append(ch)
        word = _decode(b''.join(word))
        index = line_no
        words.append(word)
        vectors[index, :] = np.fromstring(fin.read(binary_len), dtype=float32)
      return words, vectors
项目:chainer-qrnn    作者:musyoku    | 项目源码 | 文件源码
def test_attentive_decoder():
    np.random.seed(0)
    enc_shape = (2, 3, 5)
    dec_shape = (2, 4, 7)
    prod = enc_shape[0] * enc_shape[1] * enc_shape[2]
    enc_data = np.arange(0, prod, dtype=np.float32).reshape(enc_shape) / prod
    prod = dec_shape[0] * dec_shape[1] * dec_shape[2]
    dec_data = np.arange(0, prod, dtype=np.float32).reshape(dec_shape) / prod
    skip_mask = np.ones((enc_data.shape[0], enc_data.shape[2]), dtype=np.float32)
    skip_mask[:, :1] = 0
    skip_mask[0, :2] = 0

    encoder = QRNNEncoder(enc_shape[1], 4, kernel_size=4, pooling="fo", zoneout=False, zoneout_ratio=0.5)
    decoder = QRNNGlobalAttentiveDecoder(dec_shape[1], 4, kernel_size=4, zoneout=False, zoneout_ratio=0.5)

    H = encoder(enc_data, skip_mask)
    ht = encoder.get_last_hidden_state()
    Y = decoder(dec_data, ht, H, skip_mask)

    decoder.reset_state()
    for t in xrange(dec_shape[2]):
        y = decoder.forward_one_step(dec_data[:, :, :t+1], ht, H, skip_mask)
        assert np.sum((y.data - Y.data[:, :, :t+1]) ** 2) == 0
        print("t = {} OK".format(t))
项目:iscnlp    作者:iscnlp    | 项目源码 | 文件源码
def create_beam_items(self, beam):
        beam_items = []
        for b in xrange(len(beam)):
            config = beam[b]
            prevScore = config.score
            dense_feats = self.template.feat_template(config.nodes,
                                                      config.stack,
                                                      config.b0)
            pr_scores = self.clf.predict_proba(dense_feats)[0]
            pr_scores = np.log(pr_scores)
            predictions = zip(pr_scores, self.clf.classes_)
            valid_trans = self.get_valid_transitions(config)
            for score, (action, label) in predictions:
                if self.transitions[action] in valid_trans:
                    next_transition = valid_trans[self.transitions[action]]
                    heapq.heappush(
                        beam_items,
                        (prevScore + score, b, next_transition, label))
                    if len(beam_items) > self.beamwidth:
                        heapq.heappop(beam_items)
        return beam_items
项目:pyssp    作者:shunsukeaihara    | 项目源码 | 文件源码
def _compute(self, signal):
        ltsds = np.zeros(self._windownum)
        prev = 0
        pair = None
        result = []
        for l in xrange(self._windownum):
            ltsd = self._ltsd(signal, l)
            ltsds[l] = ltsd
            x = self._is_signal(signal, ltsd, l)
            if x:  # signal
                if prev == 0:  # start signal segment
                    pair = [l]
                prev = 1
            else:  # noise
                if prev == 1:  # end signal segment
                    pair.append(l-1)
                    result.append(pair)
                    pair = None
                prev = 0
        return result, ltsds
项目:deb-python-cassandra-driver    作者:openstack    | 项目源码 | 文件源码
def test_generator_param(self):
        result = bind_params("%s", ((i for i in xrange(3)),), Encoder())
        self.assertEqual(result, "[0, 1, 2]")
项目:deb-python-cassandra-driver    作者:openstack    | 项目源码 | 文件源码
def test_multiple_query_plans(self):
        hosts = [0, 1, 2, 3]
        policy = RoundRobinPolicy()
        policy.populate(None, hosts)
        for i in xrange(20):
            qplan = list(policy.make_query_plan())
            self.assertEqual(sorted(qplan), hosts)
项目:deb-python-cassandra-driver    作者:openstack    | 项目源码 | 文件源码
def push(self, data):
        chunk_size = self.out_buffer_size
        for i in xrange(0, len(data), chunk_size):
            self._write_queue.put(data[i:i + chunk_size])
项目:deb-python-cassandra-driver    作者:openstack    | 项目源码 | 文件源码
def execute(self, concurrency, fail_fast):
        self._fail_fast = fail_fast
        self._results_queue = []
        self._current = 0
        self._exec_count = 0
        with self._condition:
            for n in xrange(concurrency):
                if not self._execute_next():
                    break
        return self._results()
项目:TFCommon    作者:MU94W    | 项目源码 | 文件源码
def prepare_batch(self):
        if not self.split_bool:
            self.feed_single_batch(self.fetch_one_batch())
        else:
            many_records = [self.fetch_one_record() for _ in xrange(self.batch_size * self.split_nums)]
            for batch in self.split_strategy(many_records):
                self.feed_single_batch(self.pre_process_batch(batch))
项目:TFCommon    作者:MU94W    | 项目源码 | 文件源码
def prepare_validation(self):
        if not self.split_bool:
            while self._record_index <= (self._total_samples - self.batch_size):
                self.feed_single_batch(self.fetch_one_batch())
            remain_batch = []
            while self._record_index != 0:
                remain_batch.append(self.fetch_one_record())
            self.feed_single_batch(self.pre_process_batch(remain_batch))
        else:
            many_records = [self.fetch_one_record() for _ in xrange(self._total_samples)]
            for batch in self.split_strategy(many_records):
                self.feed_single_batch(self.pre_process_batch(batch))
项目:TFCommon    作者:MU94W    | 项目源码 | 文件源码
def fetch_one_batch(self):
        records = [self.fetch_one_record() for _ in xrange(self.batch_size)]
        try:
            pre_processed = self.pre_process_batch(records)
        except Exception as e:
            print('[E] pre_processed failed')
            pre_processed = []
        return pre_processed
项目:workflows.kyoyue    作者:wizyoung    | 项目源码 | 文件源码
def _lost_point_level3(modules, modules_count):
    modules_range_short = xrange(modules_count-6)

    lost_point = 0
    for row in xrange(modules_count):
        this_row = modules[row]
        for col in modules_range_short:
            if (this_row[col]
                    and not this_row[col + 1]
                    and this_row[col + 2]
                    and this_row[col + 3]
                    and this_row[col + 4]
                    and not this_row[col + 5]
                    and this_row[col + 6]):
                lost_point += 40

    for col in xrange(modules_count):
        for row in modules_range_short:
            if (modules[row][col]
                    and not modules[row + 1][col]
                    and modules[row + 2][col]
                    and modules[row + 3][col]
                    and modules[row + 4][col]
                    and not modules[row + 5][col]
                    and modules[row + 6][col]):
                lost_point += 40

    return lost_point
项目:workflows.kyoyue    作者:wizyoung    | 项目源码 | 文件源码
def _lost_point_level4(modules, modules_count):
    modules_range = xrange(modules_count)
    dark_count = 0

    for row in modules_range:
        this_row = modules[row]
        for col in modules_range:
            if this_row[col]:
                dark_count += 1

    ratio = abs(100 * dark_count / modules_count / modules_count - 50) / 5
    return ratio * 10
项目:face_dataset_summarizing    作者:binLearning    | 项目源码 | 文件源码
def main():
  line_type = {'names': ('id', 'url',
                         'lt_x', 'lt_y', # left top
                         'rb_x', 'rb_y', # right bottom
                         'pose', 'score', 'curation'),
               'formats': ('S16', 'S256',
                           np.float, np.float,
                           np.float, np.float,
                           np.float, np.float, np.int)}

  rt_dir_info = sys.argv[1]
  dir_urls = sys.argv[2]
  if not os.path.exists(dir_urls):
    os.mkdir(dir_urls)

  list_info_files = os.listdir(rt_dir_info)

  for info_file_name in list_info_files[:]:
    individual_name,_ = os.path.splitext(info_file_name)

    info_file_path = os.path.join(rt_dir_info, info_file_name)

    list_info = np.loadtxt(info_file_path, dtype=line_type, comments=None)
    total_num = list_info.size

    print(individual_name, total_num)

    save_path = os.path.join(dir_urls, info_file_name)
    with open(save_path, 'w') as fp:
      for proc_num in xrange(total_num):
        str_url = list_info[proc_num]['url']
        fp.write('{}\n'.format(str_url))