Python itertools 模块,tee() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用itertools.tee()

项目:dontwi    作者:vocalodon    | 项目源码 | 文件源码
def dump_status_strings(conf):
    dontwi = Dontwi(conf)
    in_cn = dontwi.get_connector("inbound")
    in_cn.connect()
    operation_cf = dontwi.config.items["operation"]
    trigger_str = dontwi.get_trigger()
    [since, until, limit] = [
        dontwi.config.inbound.get(option, "")
        for option in ["since", "until", "limit"]]
    statuses, statuses2 = tee(in_cn.get_timeline_statuses_by_hashtag(
        hashtag=trigger_str, since=since, until=until, limit=limit))
    status_pr = StatusText(dontwi.config.outbound)
    result_log = ResultLog(dontwi.config.items)
    summaries = dontwi.summaries_to_be_listed_in_waiting_list(result_log=result_log,
                                                              status_pr=status_pr,
                                                              statuses=statuses,
                                                              trigger_str=trigger_str)
    status_dc = {a_status.status["id"]: a_status.status["content"]
                 for a_status in statuses2}
    dump_strs = ["{0}\n{1}\n{2}\n[{3}]".format(a_summary["inbound_status_id"], a_summary["status_string"],
                                               a_summary["inbound_status_url"], status_dc[a_summary["inbound_status_id"]])
                 for a_summary in summaries]
    for lint_str in dump_strs:
        print(lint_str)
项目:ProtScan    作者:gianlucacorrado    | 项目源码 | 文件源码
def get_supervised_data(self, preprocessed, bin_sites,
                            active_learning=False, random_state=1234,
                            n_jobs=-1):
        """Compute the feature matrix and the regression values."""
        preprocessed, preprocessed_ = tee(preprocessed)
        if self.mode == 'sequence':
            dists = [attr['dist'] for attr, _ in preprocessed_]
        else:
            dists = [g.graph['id']['dist'] for g in preprocessed_]
        vals = np.array([common.dist_to_val(d, self.max_dist) for d in dists])
        if self.mode == 'sequence':
            self.vectorizer = SeqVectorizer(auto_weights=True,
                                            **self.vectorizer_args)
        else:
            self.vectorizer = GraphVectorizer(auto_weights=True,
                                              **self.vectorizer_args)
        matrix = vectorize(preprocessed, vectorizer=self.vectorizer,
                           block_size=400, n_jobs=n_jobs)
        return matrix, vals
项目:ProtScan    作者:gianlucacorrado    | 项目源码 | 文件源码
def get_predict_data(self, preprocessed, n_jobs=-1):
        """Compute the feature matrix and extract the subseq info."""
        def _subdict(dic):
            subdict = dict((k, dic[k]) for k in [
                           'tr_name', 'center', 'tr_len'] if k in dic)
            return subdict

        preprocessed, preprocessed_ = tee(preprocessed)
        if self.mode == 'sequence':
            info = [_subdict(attr) for attr, _ in preprocessed_]
        else:
            info = [_subdict(g.graph['id']) for g in preprocessed_]

        if self.mode == 'sequence':
            self.vectorizer = SeqVectorizer(auto_weights=True,
                                            **self.vectorizer_args)
        else:
            self.vectorizer = GraphVectorizer(auto_weights=True,
                                              **self.vectorizer_args)
        matrix = vectorize(preprocessed, vectorizer=self.vectorizer,
                           block_size=400, n_jobs=n_jobs)
        return matrix, info
项目:ProtScan    作者:gianlucacorrado    | 项目源码 | 文件源码
def graph_preprocessor(graphs, which_set, bin_sites=None, max_dist=None,
                       random_state=1234, **params):
    """Preprocess graphs."""
    assert which_set == 'train' or which_set == 'test', \
        "which_set must be either 'train' or 'test'."

    if which_set == 'train':
        graphs = add_distance(graphs, bin_sites)
        graphs = split_iterator(graphs, **params)
        graphs = add_type(graphs, max_dist)
        return graphs
    elif which_set == 'test':
        graphs, graphs_ = tee(graphs)
        full_graphs = transform_dictionary(graphs_)
        graphs = split_iterator(graphs, **params)
        return full_graphs, graphs
    else:
        raise Exception("ERROR: unrecognized which_set type: %s" %
                        which_set)
项目:code    作者:ActiveState    | 项目源码 | 文件源码
def hamming_numbers():
    # Generate "5-smooth" numbers, also called "Hamming numbers"
    # or "Regular numbers".  See: http://en.wikipedia.org/wiki/Regular_number
    # Finds solutions to 2**i * 3**j * 5**k  for some integers i, j, and k.

    def deferred_output():
        'Works like a forward reference to the "output" global variable'
        for i in output:
            yield i

    result, p2, p3, p5 = tee(deferred_output(), 4)  # split the output streams
    m2 = (2*x for x in p2)                          # multiples of 2
    m3 = (3*x for x in p3)                          # multiples of 3
    m5 = (5*x for x in p5)                          # multiples of 5
    merged = merge(m2, m3, m5)
    combined = chain([1], merged)                   # prepend starting point
    output = (k for k, v in groupby(combined))      # eliminate duplicates

    return result
项目:PYKE    作者:muddyfish    | 项目源码 | 文件源码
def apply_inf_list(self, a:Node.infinite, b:Node.infinite):
        def apply_iterator(a, b):
            a, a_copy = tee(a, 2)
            b, b_copy = tee(b, 2)
            yield self.run(next(a_copy), [next(b_copy)])
            size = 1
            while 1:
                next_a = next(a_copy)
                next_b = next(b_copy)
                a, new_a = tee(a, 2)
                b, new_b = tee(b, 2)
                yield from (self.run(next(new_a), [next_b]) for i in range(size))
                yield from (self.run(next_a, [next(new_b)]) for i in range(size))
                yield self.run(next_a, [next_b])
                size += 1
        return DummyList(apply_iterator(a, b))
项目:nstock    作者:ybenitezf    | 项目源码 | 文件源码
def __call__(self, tokens):
        from itertools import tee

        count = len(self.filters)
        # Tee the token iterator and wrap each teed iterator with the
        # corresponding filter
        gens = [filter(t.copy() for t in gen) for filter, gen
                in zip(self.filters, tee(tokens, count))]
        # Keep a count of the number of running iterators
        running = count
        while running:
            for i, gen in enumerate(gens):
                if gen is not None:
                    try:
                        yield next(gen)
                    except StopIteration:
                        gens[i] = None
                        running -= 1
项目:eea.corpus    作者:eea    | 项目源码 | 文件源码
def build_phrase_models(content, base_path, settings):
    """ Build and save the phrase models
    """

    ngram_level = int(settings['level'])

    # According to tee() docs, this may be inefficient in terms of memory.
    # We need to do this because we need multiple passes through the
    # content stream.
    content = chain.from_iterable(doc.tokenized_text for doc in content)
    cs1, cs2 = tee(content, 2)

    for i in range(ngram_level-1):
        phrases = Phrases(cs1)
        path = "%s.%s" % (base_path, i + 2)     # save path as n-gram level
        logger.info("Phrase processor: Saving %s", path)
        phrases.save(path)
        # TODO: gensim complains about not using Phraser(phrases)
        content = phrases[cs2]  # tokenize phrases in content stream
        cs1, cs2 = tee(content, 2)
项目:python-tutorial    作者:Akuli    | 项目源码 | 文件源码
def find_links(file):
    """Find all markdown links in a file object.

    Yield (lineno, regexmatch) tuples.
    """
    # don't yield same link twice
    seen = set()

    # we need to loop over the file two lines at a time to support
    # multi-line (actually two-line) links, so this is kind of a mess
    firsts, seconds = itertools.tee(file)
    next(seconds)  # first line is never second line

    # we want 1-based indexing instead of 0-based and one-line links get
    # caught from linepair[1], so we need to start at two
    for lineno, linepair in enumerate(zip(firsts, seconds), start=2):
        lines = linepair[0] + linepair[1]
        for match in re.finditer(_LINK_REGEX, lines, flags=re.DOTALL):
            if match.group(0) not in seen:
                seen.add(match.group(0))
                yield match, lineno
项目:rl_trading    作者:ucaiado    | 项目源码 | 文件源码
def __init__(self, l_hours, i_milis=2, i_sec=None):
        '''
        Initiate a NextStopTime object. Save all parameters as attributes

        :param l_hours: list. Hours to be used in stoptime calculation
        :param i_milis*: integer. Number of miliseconds between each stoptime
        :param i_sec*: integer. Number of seconds between each stoptime. If
            defined, the i_milis is not used
        '''
        i_noise = None
        if i_milis > 4:
            i_noise = min(1, i_milis/5)
        self.gen_stoptime = get_next_stoptime(l_hours, i_milis, i_sec, i_noise)
        self.gen_stoptime, self.gen_backup = itertools.tee(self.gen_stoptime)
        self.s_last_stoptime = ''
        self.s_stoptime_was_set = ''
        self.s_time = "{:0>2}:{:0>2}:{:0>2}.{:0>3}"
        self.b_use_last = False
项目:sbds    作者:steemit    | 项目源码 | 文件源码
def trailing_windows(window_size=24, window_units='hours', window_count=3):
    """

    Args:
        window_size (int):
        window_units (str):
        window_count (int):

    Yields:
        Dict[str,str]

    """
    tos, froms = tee(trailing_periods(window_size, window_units, window_count))
    next(froms, None)
    for to, _from in zip(tos, froms):
        yield {'_from': _from, 'to': to}
项目:clopure    作者:vbkaisetsu    | 项目源码 | 文件源码
def iter_split_evaluate_wrapper(self, fn, local_vars, in_size, q_in, q_out):
        l = Lock()
        idx_q = Queue()
        def split_iter():
            try:
                while True:
                    l.acquire()
                    i, data_in = q_in.get()
                    idx_q.put(i)
                    if data_in is EOFMessage:
                        return
                    yield data_in
            except BaseException:
                traceback.print_exc(file=sys.stdout)
        gs = itertools.tee(split_iter(), in_size)
        for data_out in self.evaluate((fn,) + tuple((lambda i: (x[i] for x in gs[i]))(i) for i in range(in_size)), local_vars=local_vars):
            q_out.put((idx_q.get(), data_out))
            l.release()
        q_out.put((0, EOFMessage))
项目:concepts    作者:sminez    | 项目源码 | 文件源码
def iwindowed(iterable, n):
    '''
    Take successive n-tuples from an iterable using a sliding window
    '''
    # Take n copies of the iterable
    iterables = tee(iterable, n)

    # Advance each to the correct starting position
    for step, it in enumerate(iterables):
        for s in range(step):
            next(it)

    # Zip the modified iterables and yield the elements as a genreator
    # NOTE: not using zip longest as we want to stop when we reach the end
    for t in zip(*iterables):
        yield t
项目:trainer    作者:nutszebra    | 项目源码 | 文件源码
def dummy_type_tee():
        """Give itertools.tee(yielder)[0]

        Edited date:
            160704

        Test:
            160704

        Returns:
            itertools.tee: this is used self.type_generator_or_tee
        """
        def dummy():
            yield None
        copy1, copy2 = itertools.tee(dummy())
        return copy2
项目:zippy    作者:securesystemslab    | 项目源码 | 文件源码
def __call__(self, tokens):
        from itertools import tee

        count = len(self.filters)
        # Tee the token iterator and wrap each teed iterator with the
        # corresponding filter
        gens = [filter(t.copy() for t in gen) for filter, gen
                in zip(self.filters, tee(tokens, count))]
        # Keep a count of the number of running iterators
        running = count
        while running:
            for i, gen in enumerate(gens):
                if gen is not None:
                    try:
                        yield next(gen)
                    except StopIteration:
                        gens[i] = None
                        running -= 1
项目:WhooshSearch    作者:rokartnaz    | 项目源码 | 文件源码
def __call__(self, tokens):
        from itertools import tee

        count = len(self.filters)
        # Tee the token iterator and wrap each teed iterator with the
        # corresponding filter
        gens = [filter(t.copy() for t in gen) for filter, gen
                in zip(self.filters, tee(tokens, count))]
        # Keep a count of the number of running iterators
        running = count
        while running:
            for i, gen in enumerate(gens):
                if gen is not None:
                    try:
                        yield next(gen)
                    except StopIteration:
                        gens[i] = None
                        running -= 1
项目:pypuf    作者:nils-wisiol    | 项目源码 | 文件源码
def approx_stabilities(instance, num, reps, random_instance=RandomState()):
    """
    This function approximates the stability of the given `instance` for
    `num` challenges evaluating it `reps` times per challenge. The stability
    is the probability that the instance gives the correct response when
    evaluated.
    :param instance: pypuf.simulation.base.Simulation
                     The instance for the stability approximation
    :param num: int
                Amount of challenges to be evaluated
    :param reps: int
                 Amount of repetitions per challenge
    :return: array of float
             Array of the stabilities for each challenge
    """

    challenges = sample_inputs(instance.n, num, random_instance)
    responses = zeros((reps, num))
    for i in range(reps):
        challenges, unpacked_challenges = itertools.tee(challenges)
        responses[i, :] = instance.eval(array(list(unpacked_challenges)))
    return 0.5 + 0.5 * np_abs(np_sum(responses, axis=0)) / reps
项目:tichu-tournament    作者:aragos    | 项目源码 | 文件源码
def prev_this_next(items):
    """
    Loop over a collection with look-ahead and look-back.

    From Thomas Guest, 
    http://wordaligned.org/articles/zippy-triples-served-with-python

    Seriously useful looping tool (Google "zippy triples")
    lets you loop a collection and see the previous and next items,
    which get set to None at the ends.

    To be used in layout algorithms where one wants a peek at the
    next item coming down the pipe.

    """

    extend = itertools.chain([None], items, [None])
    prev, this, next = itertools.tee(extend, 3)
    try:
        next(this)
        next(next)
        next(next)
    except StopIteration:
        pass
    return zip(prev, this, next)
项目:tabkit    作者:yandex-tabkit    | 项目源码 | 文件源码
def parse_file_keeplines(lines, require_order=None):
    r"""
    >>> def gen_lines(x):
    ...     yield "# field:int\n"
    ...     for i in range(x):
    ...         yield "%s\n" % (test_field,)
    >>> parsed = parse_file_keeplines(gen_lines(2))
    >>> next(parsed)
    '# field:int\n'
    >>> test_field = 1; next(parsed)
    ('1\n', Rec(field=1))
    >>> test_field = 2; next(parsed)
    ('2\n', Rec(field=2))
    """
    lines_iter, lines_iter_parse = tee(iter(lines), 2)
    try:
        yield next(lines_iter)
    except StopIteration:
        raise Exception("No header")
    for line, rec in izip(lines_iter, parse_file(lines_iter_parse)):
        yield line, rec
项目:ww    作者:Tygs    | 项目源码 | 文件源码
def __iter__(self):
        """ Return the inner iterator

            Example:

                >>> from ww import g
                >>> gen = g(range(10))
                >>> iter(gen) == gen.iterator
                True

            Returns:
                Inner iterator.

            Raises:
                RuntimeError: if trying call __iter__ after calling .tee()
        """
        if self._tee_called:
            raise RuntimeError("You can't iterate on a g object after g.tee "
                               "has been called on it.")
        return self.iterator

    # TODO: type self, and stuff that returns things depending on self
项目:ww    作者:Tygs    | 项目源码 | 文件源码
def __mul__(self, num):
        # type: (int) -> IterableWrapper
        """ Duplicate itself and concatenate the results.

            It's basically a shortcut for `g().chain(*g().tee())`.

            Args:
                num: The number of times to duplicate.

            Example:

                >>> from ww import g
                >>> (g(range(3)) * 3).list()
                [0, 1, 2, 0, 1, 2, 0, 1, 2]
                >>> (2 * g(range(3))).list()
                [0, 1, 2, 0, 1, 2]
        """
        clones = itertools.tee(self.iterator, num)
        return self.__class__(itertools.chain(*clones))
项目:ww    作者:Tygs    | 项目源码 | 文件源码
def tee(self, num=2):
        # type: (int) -> IterableWrapper
        """ Return copies of this generator.

            Proxy to itertools.tee().

           If you want to concatenate the results afterwards, use
           g() * x instead of g().tee(x) which does that for you.

            Args:
                num: The number of returned generators.

            Example:

                >>> from ww import g
                >>> a, b, c = g(range(3)).tee(3)
                >>> [tuple(a), tuple(b), tuple(c)]
                [(0, 1, 2), (0, 1, 2), (0, 1, 2)]
        """
        cls = self.__class__
        gen = cls(cls(x) for x in itertools.tee(self.iterator, num))
        self._tee_called = True
        return gen

    # TODO: allow negative end boundary
项目:ww    作者:Tygs    | 项目源码 | 文件源码
def copy(self):
        # type: () -> IterableWrapper
        """ Return an exact copy of the iterable.

            The reference of the new iterable will be the same as the source
            when `copy()` was called.

            Example:

                >>> from ww import g
                >>> my_g_1 = g(range(3))
                >>> my_g_2 = my_g_1.copy()
                >>> next(my_g_1)
                0
                >>> next(my_g_1)
                1
                >>> next(my_g_2)
                0
        """

        self.iterator, new = itertools.tee(self.iterator)
        return self.__class__(new)
项目:gougo    作者:amaozhao    | 项目源码 | 文件源码
def previous_current_next(items):
    """
    From http://www.wordaligned.org/articles/zippy-triples-served-with-python

    Creates an iterator which returns (previous, current, next) triples,
    with ``None`` filling in when there is no previous or next
    available.
    """
    extend = itertools.chain([None], items, [None])
    prev, cur, nex = itertools.tee(extend, 3)
    # Advancing an iterator twice when we know there are two items (the
    # two Nones at the start and at the end) will never fail except if
    # `items` is some funny StopIteration-raising generator. There's no point
    # in swallowing this exception.
    next(cur)
    next(nex)
    next(nex)
    return zip(prev, cur, nex)
项目:CVProject    作者:hieuxinhe94    | 项目源码 | 文件源码
def __call__(self, seq):
    min_order = self.min_order
    max_order = self.max_order
    t = tee(seq, max_order)
    for i in xrange(max_order):
      for j in xrange(i):
        # advance iterators, ignoring result
        t[i].next()
    while True:
      token = ''.join(tn.next() for tn in t)
      if len(token) < max_order: break
      for n in xrange(min_order-1, max_order):
        yield token[:n+1]
    for a in xrange(max_order-1):
      for b in xrange(min_order, max_order-a):
        yield token[a:a+b]
项目:CVProject    作者:hieuxinhe94    | 项目源码 | 文件源码
def __call__(self, seq):
    _seq = str.split(seq)
    min_order = self.min_order
    max_order = self.max_order
    t = tee(_seq, max_order)
    for i in xrange(max_order):
      for j in xrange(i):
        # advance iterators, ignoring result
        t[i].next()
    while True:
      token = [tn.next() for tn in t]
      if len(token) < max_order: break
      for n in xrange(min_order-1, max_order):
        yield ' '.join(token[:n+1])
    for a in xrange(max_order-1):
      for b in xrange(min_order, max_order-a):
        yield ' '.join(token[a:a+b])
项目:QualquerMerdaAPI    作者:tiagovizoto    | 项目源码 | 文件源码
def __call__(self, tokens):
        from itertools import tee

        count = len(self.filters)
        # Tee the token iterator and wrap each teed iterator with the
        # corresponding filter
        gens = [filter(t.copy() for t in gen) for filter, gen
                in zip(self.filters, tee(tokens, count))]
        # Keep a count of the number of running iterators
        running = count
        while running:
            for i, gen in enumerate(gens):
                if gen is not None:
                    try:
                        yield next(gen)
                    except StopIteration:
                        gens[i] = None
                        running -= 1
项目:npstreams    作者:LaurentRDC    | 项目源码 | 文件源码
def itercopy(iterable, copies = 2):
    """
    Split iterable into 'copies'. Once this is done, the original iterable *should
    not* be used again.

    Parameters
    ----------
    iterable : iterable
        Iterable to be split. Once it is split, the original iterable
        should not be used again.
    copies : int, optional
        Number of copies. Also determines the number of returned iterables.

    Returns
    -------
    iter1, iter2, ... : iterable
        Copies of ``iterable``.

    Examples
    --------
    By rebinding the name of the original iterable, we make sure that it
    will never be used again.

    >>> from npstreams import itercopy
    >>> evens = (2*n for n in range(1000))
    >>> evens, evens_copy = itercopy(evens, copies = 2)

    See Also
    --------
    itertools.tee : equivalent function
    """
    # itercopy is included because documentation of itertools.tee isn't obvious
    # to everyone
    return tee(iterable, copies)
项目:kinect-2-libras    作者:inessadl    | 项目源码 | 文件源码
def nsmallest(n, iterable, key=None):
    """Find the n smallest elements in a dataset.

    Equivalent to:  sorted(iterable, key=key)[:n]
    """
    # Short-cut for n==1 is to use min() when len(iterable)>0
    if n == 1:
        it = iter(iterable)
        head = list(islice(it, 1))
        if not head:
            return []
        if key is None:
            return [min(chain(head, it))]
        return [min(chain(head, it), key=key)]

    # When n>=size, it's faster to use sorted()
    try:
        size = len(iterable)
    except (TypeError, AttributeError):
        pass
    else:
        if n >= size:
            return sorted(iterable, key=key)[:n]

    # When key is none, use simpler decoration
    if key is None:
        it = izip(iterable, count())                        # decorate
        result = _nsmallest(n, it)
        return map(itemgetter(0), result)                   # undecorate

    # General case, slowest method
    in1, in2 = tee(iterable)
    it = izip(imap(key, in1), count(), in2)                 # decorate
    result = _nsmallest(n, it)
    return map(itemgetter(2), result)                       # undecorate
项目:kinect-2-libras    作者:inessadl    | 项目源码 | 文件源码
def nlargest(n, iterable, key=None):
    """Find the n largest elements in a dataset.

    Equivalent to:  sorted(iterable, key=key, reverse=True)[:n]
    """

    # Short-cut for n==1 is to use max() when len(iterable)>0
    if n == 1:
        it = iter(iterable)
        head = list(islice(it, 1))
        if not head:
            return []
        if key is None:
            return [max(chain(head, it))]
        return [max(chain(head, it), key=key)]

    # When n>=size, it's faster to use sorted()
    try:
        size = len(iterable)
    except (TypeError, AttributeError):
        pass
    else:
        if n >= size:
            return sorted(iterable, key=key, reverse=True)[:n]

    # When key is none, use simpler decoration
    if key is None:
        it = izip(iterable, count(0,-1))                    # decorate
        result = _nlargest(n, it)
        return map(itemgetter(0), result)                   # undecorate

    # General case, slowest method
    in1, in2 = tee(iterable)
    it = izip(imap(key, in1), count(0,-1), in2)             # decorate
    result = _nlargest(n, it)
    return map(itemgetter(2), result)                       # undecorate
项目:ProtScan    作者:gianlucacorrado    | 项目源码 | 文件源码
def random_partition_iter(iterable, n_splits, random_state=1234):
    """Partition a generator in a random way (should mantain the unbalance)."""
    iterable, iterable_ = tee(iterable)
    size = iterator_size(iterable_)
    part_ids = random_partition(size, n_splits=n_splits,
                                random_state=random_state)
    parts = list()
    for p in part_ids:
        iterable, iterable_ = tee(iterable)
        parts.append(selection_iterator(iterable_, p))
    return parts
项目:ProtScan    作者:gianlucacorrado    | 项目源码 | 文件源码
def balanced_split(sequences, bin_sites, n_splits,
                   random_state=1234):
    """Balanced split over binding/non-binding sequences."""
    # find the transcript names of positive and negatives
    sequences, sequences_ = tee(sequences)
    pos_ids = list()
    neg_ids = list()
    for i, (attr, _) in enumerate(sequences_):
        tr_name = attr['tr_name']
        is_binding = bin_sites.get(tr_name, False)
        if is_binding:
            pos_ids.append(i)
        else:
            neg_ids.append(i)

    random.seed(random_state)
    random.shuffle(pos_ids)
    random.shuffle(neg_ids)

    pos_split_points = \
        [int(len(pos_ids) * (float(i) / n_splits)) for i in range(1, n_splits)]
    neg_split_points = \
        [int(len(neg_ids) * (float(i) / n_splits)) for i in range(1, n_splits)]

    parts = list()
    for pos, neg in izip(np.split(pos_ids, pos_split_points),
                         np.split(neg_ids, neg_split_points)):
        sequences, sequences_ = tee(sequences)
        parts.append(selection_iterator(
            sequences_, np.concatenate([pos, neg])))
    return parts
项目:ProtScan    作者:gianlucacorrado    | 项目源码 | 文件源码
def balanced_fraction(sequences, bin_sites, opt_fraction=1.0,
                      random_state=1234):
    """Balanced sample of sequences (over binding/non-binding)."""
    # find the transcript names of positive and negatives
    sequences, sequences_ = tee(sequences)
    pos_names = list()
    neg_names = list()
    for attr, _ in sequences_:
        tr_name = attr['tr_name']
        is_binding = bin_sites.get(tr_name, False)
        if is_binding:
            pos_names.append(tr_name)
        else:
            neg_names.append(tr_name)
    # sample from positives and negatives
    selected = list()
    random.seed(random_state)
    k_pos = max(1, int(opt_fraction * len(pos_names)))
    selected.extend(random.sample(pos_names, k_pos))
    k_neg = max(1, int(opt_fraction * len(neg_names)))
    selected.extend(random.sample(neg_names, k_neg))
    # yield only sequences in selected
    for attr, s in sequences:
        tr_name = attr['tr_name']
        if tr_name in selected:
            yield attr, s
项目:ProtScan    作者:gianlucacorrado    | 项目源码 | 文件源码
def cross_vote(self, sequences, bin_sites, fit_batch_size=500,
                   pre_batch_size=200, max_splits=100000,
                   active_learning=False, random_state=1234, n_jobs=-1):
        """2-fold cross fit and vote."""
        votes = dict()
        part1, part2 = balanced_split(sequences, bin_sites, n_splits=2,
                                      random_state=random_state)

        part1, part1_ = tee(part1)
        part2, part2_ = tee(part2)

        # fold 1
        logger.debug("Fold 1")
        tr, te = part1, part2
        self._fit(tr, bin_sites, fit_batch_size, max_splits, active_learning,
                  random_state, n_jobs)
        part_votes = self.vote(
            te, pre_batch_size, max_splits, random_state, n_jobs)
        votes.update(part_votes)

        # fold 2
        logger.debug("Fold 2")
        tr, te = part2_, part1_
        self._fit(tr, bin_sites, fit_batch_size, max_splits, active_learning,
                  random_state, n_jobs)
        part_votes = self.vote(
            te, pre_batch_size, max_splits, random_state, n_jobs)
        votes.update(part_votes)
        return votes
项目:Flask_Blog    作者:sugarguo    | 项目源码 | 文件源码
def parse(self, file, boundary, content_length):
        formstream, filestream = tee(
            self.parse_parts(file, boundary, content_length), 2)
        form = (p[1] for p in formstream if p[0] == 'form')
        files = (p[1] for p in filestream if p[0] == 'file')
        return self.cls(form), self.cls(files)
项目:code    作者:ActiveState    | 项目源码 | 文件源码
def pairwise(iterable):
    "s -> (s0,s1), (s1,s2), (s2, s3), ..."
    # from the itertools module documentation recipe

    a, b = tee(iterable)
    next(b, None)
    return izip(a, b)
项目:code    作者:ActiveState    | 项目源码 | 文件源码
def __init__(self, iterable):
        self._a, self._b = tee(iter(iterable), 2)
        self._previous = None
        self._peeked   = self._b.next()
项目:ravel    作者:ravel-net    | 项目源码 | 文件源码
def pairwise(iterable):
    a, b = tee(iterable)
    next(b, None)
    return izip(a, b)
项目:tableschema-elasticsearch-py    作者:frictionlessdata    | 项目源码 | 文件源码
def write(self, bucket, doc_type, rows, primary_key, update=False, as_generator=False):

        if primary_key is None or len(primary_key) == 0:
            raise ValueError('primary_key cannot be an empty list')

        def actions(rows_, doc_type_, primary_key_, update_):
            if update_:
                for row_ in rows_:
                    yield {
                        '_op_type': 'update',
                        '_index': bucket,
                        '_type': doc_type_,
                        '_id': self.generate_doc_id(row_, primary_key_),
                        '_source': {
                            'doc': row_,
                            'doc_as_upsert': True
                        }
                    }
            else:
                for row_ in rows_:
                    yield {
                        '_op_type': 'index',
                        '_index': bucket,
                        '_type': doc_type_,
                        '_id': self.generate_doc_id(row_, primary_key_),
                        '_source': row_
                    }

        iterables = itertools.tee(rows)
        actions_iterable = actions(iterables[0], doc_type, primary_key, update)

        iter = zip(streaming_bulk(self.__es, actions=actions_iterable), iterables[1])

        if as_generator:
            for result, row in iter:
                yield row
        else:
            collections.deque(iter, maxlen=0)

        self.__es.indices.flush(bucket)
项目:swjtu-pyscraper    作者:Desgard    | 项目源码 | 文件源码
def parse(self, file, boundary, content_length):
        formstream, filestream = tee(
            self.parse_parts(file, boundary, content_length), 2)
        form = (p[1] for p in formstream if p[0] == 'form')
        files = (p[1] for p in filestream if p[0] == 'file')
        return self.cls(form), self.cls(files)
项目:TrackToTrip    作者:ruipgil    | 项目源码 | 文件源码
def pairwise(iterable):
    "s -> (s0,s1), (s1,s2), (s2, s3), ..."
    now, nxt = tee(iterable)
    next(nxt, None)
    return izip(now, nxt)
项目:datapipelines-python    作者:meraki-analytics    | 项目源码 | 文件源码
def _pairwise(iterable: Iterable[T]) -> Iterable[Tuple[T, T]]:
    a, b = tee(iterable)
    next(b, None)
    return zip(a, b)
项目:django-souvenirs    作者:appsembler    | 项目源码 | 文件源码
def _usage_for_periods(periods):
    """
    Generate a sequence of dictionaries of usage data corresponding to periods,
    each of which should be a tuple of (start, end) datetimes, where start is
    inclusive and end is exclusive.

    Each dictionary in the generated sequence has this form:

        {
            period: {
                start: datetime,
                end: datetime,
            }
            usage: {
                registered_users: int,
                activated_users: int,
                active_users: int,
            }
        }

    """
    rp, ap, periods = itertools.tee(periods, 3)
    ir = (registered_users_as_of(end) for start, end in rp)
    ia = (count_active_users(*p) for p in ap)
    for p, r, active in izip(periods, ir, ia):
        start, end = p
        registered, activated = r
        yield dict(
            period=dict(
                start=start,
                end=end,
            ),
            usage=dict(
                registered_users=registered,
                activated_users=activated,
                active_users=active,
            ),
        )
项目:zanph    作者:zanph    | 项目源码 | 文件源码
def parse(self, file, boundary, content_length):
        formstream, filestream = tee(
            self.parse_parts(file, boundary, content_length), 2)
        form = (p[1] for p in formstream if p[0] == 'form')
        files = (p[1] for p in filestream if p[0] == 'file')
        return self.cls(form), self.cls(files)
项目:iosxr-ansible    作者:ios-xr    | 项目源码 | 文件源码
def get_next(iterable):
    item, next_item = itertools.tee(iterable, 2)
    next_item = itertools.islice(next_item, 1, None)
    return zip_longest(item, next_item)
项目:hostapd-mana    作者:adde88    | 项目源码 | 文件源码
def nsmallest(n, iterable, key=None):
    """Find the n smallest elements in a dataset.

    Equivalent to:  sorted(iterable, key=key)[:n]
    """
    # Short-cut for n==1 is to use min() when len(iterable)>0
    if n == 1:
        it = iter(iterable)
        head = list(islice(it, 1))
        if not head:
            return []
        if key is None:
            return [min(chain(head, it))]
        return [min(chain(head, it), key=key)]

    # When n>=size, it's faster to use sorted()
    try:
        size = len(iterable)
    except (TypeError, AttributeError):
        pass
    else:
        if n >= size:
            return sorted(iterable, key=key)[:n]

    # When key is none, use simpler decoration
    if key is None:
        it = izip(iterable, count())                        # decorate
        result = _nsmallest(n, it)
        return map(itemgetter(0), result)                   # undecorate

    # General case, slowest method
    in1, in2 = tee(iterable)
    it = izip(imap(key, in1), count(), in2)                 # decorate
    result = _nsmallest(n, it)
    return map(itemgetter(2), result)                       # undecorate
项目:hostapd-mana    作者:adde88    | 项目源码 | 文件源码
def nlargest(n, iterable, key=None):
    """Find the n largest elements in a dataset.

    Equivalent to:  sorted(iterable, key=key, reverse=True)[:n]
    """

    # Short-cut for n==1 is to use max() when len(iterable)>0
    if n == 1:
        it = iter(iterable)
        head = list(islice(it, 1))
        if not head:
            return []
        if key is None:
            return [max(chain(head, it))]
        return [max(chain(head, it), key=key)]

    # When n>=size, it's faster to use sorted()
    try:
        size = len(iterable)
    except (TypeError, AttributeError):
        pass
    else:
        if n >= size:
            return sorted(iterable, key=key, reverse=True)[:n]

    # When key is none, use simpler decoration
    if key is None:
        it = izip(iterable, count(0,-1))                    # decorate
        result = _nlargest(n, it)
        return map(itemgetter(0), result)                   # undecorate

    # General case, slowest method
    in1, in2 = tee(iterable)
    it = izip(imap(key, in1), count(0,-1), in2)             # decorate
    result = _nlargest(n, it)
    return map(itemgetter(2), result)                       # undecorate
项目:wltrace    作者:jhshi    | 项目源码 | 文件源码
def pairwise(it):
    a, b = itertools.tee(it)
    next(b, None)
    return itertools.izip(a, b)
项目:pyconjp-website    作者:pyconjp    | 项目源码 | 文件源码
def pairwise(iterable):
    a, b = itertools.tee(iterable)
    b.next()
    return itertools.izip_longest(a, b)
项目:phredutils    作者:doctaphred    | 项目源码 | 文件源码
def filters(iterable, *predicates):
    """Filter the iterable on each given predicate.

    >>> div_by_two = lambda x: not x % 2
    >>> div_by_three = lambda x: not x % 3
    >>> twos, threes = filters(range(10), div_by_two, div_by_three)
    >>> list(twos)
    [0, 2, 4, 6, 8]
    >>> list(threes)
    [0, 3, 6, 9]
    """
    tees = tee(iterable, len(predicates))
    return tuple(filter(pred, t) for pred, t in zip(predicates, tees))