Python heapq 模块,nlargest() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用heapq.nlargest()

项目:Python-Scripts-Repo-on-Data-Science    作者:qalhata    | 项目源码 | 文件源码
def extractFeatures(self, article, n, customStopWords=None):
        # pass in article as a tuple ( text, title)
        text = article[0]
        # extract the text
        title = article[1]
        # extract the title
        sentences = sent_tokenize(text)
        # split text into sentences
        word_sent = [word_tokenize(sentences.lower()) for a in sentences]
        # split sentences into words
        self._freq = self._compute_frequencies(word_sent, customStopWords)
        # calculate word freq using member func created above
        if n < 0:
            # how many features (words) to return - a -ve number means
            # no feature ( word) selection, just return all features
            return nlargest(len(self._freq_keys()),
                            self._freq, key=self._freq.get)
        else:
            # here we say if calling e func has asked for a subset
            # then return only the 'n' largest features, i.e. the
            # most important words ( important == frequent, less stopwords)
            return nlargest(n, self._freq, key=self._freq.get)
项目:Python-Scripts-Repo-on-Data-Science    作者:qalhata    | 项目源码 | 文件源码
def summarize(self, article, n):
        text = article[0]
        text = article[1]
        sentences = sent_tokenize(text)
        word_sent = [word_tokenize(s.lower()) for s in sentences]
        self._freq = self._compute_frequencies(word_sent)
        ranking = defaultdict(int)
        for i, sentence in enumerate(word_sent):
            for word in sentence:
                if word in self._freq:
                    ranking[i] += self._freq[word]
        sentences_index = nlargest(n, ranking, key=ranking.get)
        return [sentences[j] for j in sentences_index]

##############################################################################
# TEST
项目:ltls    作者:kjasinska    | 项目源码 | 文件源码
def create_ranking2(edge_weight, k, adj, num):
    sink = len(adj)
    heaps = [[] for i in xrange(sink + 1)]
    heaps[0] = [(0, [])]

    for current in xrange(sink):
        for child in adj[current]:
            for length, path in heaps[current]:
                new_path = list(path)
                new_path.append(current)
                # this can be done better using this heapreplace
                ew = edge_weight[0, num[(current, child)]]
                heapq.heappush(heaps[child], (length + ew, new_path))
                heaps[child] = heapq.nlargest(k, heaps[child])
                # TODO what with equal lenght paths?
    # result: heaps[sink]
    return [(length, tuple(zip(nodes, nodes[1:] + [sink]))) for length, nodes in heaps[sink]]
项目:MIT-Thesis    作者:alec-heif    | 项目源码 | 文件源码
def top(self, num, key=None):
        """
        Get the top N elements from an RDD.

        .. note:: This method should only be used if the resulting array is expected
            to be small, as all the data is loaded into the driver's memory.

        .. note:: It returns the list sorted in descending order.

        >>> sc.parallelize([10, 4, 2, 12, 3]).top(1)
        [12]
        >>> sc.parallelize([2, 3, 4, 5, 6], 2).top(2)
        [6, 5]
        >>> sc.parallelize([10, 4, 2, 12, 3]).top(3, key=str)
        [4, 3, 2]
        """
        def topIterator(iterator):
            yield heapq.nlargest(num, iterator, key=key)

        def merge(a, b):
            return heapq.nlargest(num, a + b, key=key)

        return self.mapPartitions(topIterator).reduce(merge)
项目:desert-mirage    作者:valentour    | 项目源码 | 文件源码
def nth_largest(n, iter_list):
    """``O(nlogn)`` time if ``n`` is median. 
    Better if largest or smallest.

    Notes
    -----
    Adopted and/or modified from reference(s):
    FogleBird on stackoverflow.com/questions/1034846/
    """
    length = len(iter_list)
    if n >= length:
        return heapq.nlargest(length, iter_list)[-1]
    return heapq.nlargest(n, iter_list)[-1]

# OS utilities
项目:pandachaika    作者:pandabuilder    | 项目源码 | 文件源码
def get_scored_matches(word: str, possibilities: List[str], n: int=3, cutoff: float=0.6) -> List[Tuple[float, str]]:
    if not n > 0:
        raise ValueError("n must be > 0: %r" % (n,))
    if not (0.0 <= cutoff <= 1.0):
        raise ValueError("cutoff must be in [0.0, 1.0]: %r" % (cutoff,))
    result = []
    s: SequenceMatcher = SequenceMatcher()
    s.set_seq2(word)
    for x in possibilities:
        s.set_seq1(x)
        if s.real_quick_ratio() >= cutoff and s.quick_ratio() >= cutoff and s.ratio() >= cutoff:
            result.append((s.ratio(), x))

    # Move the best scorers to head of list
    result = heapq.nlargest(n, result)
    # Strip scores for the best n matches
    return result
项目:dl4nlp    作者:yohokuno    | 项目源码 | 文件源码
def build_dictionary(sentences, size):
    """
    Create dictionary containing most frequent words in the sentences
    :param sentences: sequence of sentence that contains words
        Caution: the sequence might be exhausted after calling this function!
    :param size: size of dictionary you want
    :return: dictionary that maps word to index (starting from 1)
    """
    dictionary = defaultdict(int)
    for sentence in sentences:
        for token in sentence:
            dictionary[token] += 1
    frequent_pairs = nlargest(size, dictionary.items(), itemgetter(1))
    words, frequencies = zip(*frequent_pairs)
    result = {word: index + 1 for index, word in enumerate(words)}
    return result
项目:minke    作者:DistrictDataLabs    | 项目源码 | 文件源码
def keyphrases(self, N=20, fileids=None, categories=None):
        """
        Returns the top N keyphrases grouped by document id.
        TODO: this currently ignores fileids/categories.
        """
        if not self.tfidfs or not self.lexicon or not self.fileids:
            raise ValueError("Must call the score method first!")

        for idx, doc in enumerate(self.tfidfs):
            fileid = self.fileids[idx]

            # Get the top N terms by TF-IDF score
            scores = [
                (self.lexicon[wid], score)
                for wid, score in heapq.nlargest(N, doc, key=itemgetter(1))
            ]

            yield fileid, scores
项目:pyxem    作者:pyxem    | 项目源码 | 文件源码
def correlate_library(image, library, n_largest):
    """Correlates all simulated diffraction templates in a DiffractionLibrary
    with a particular experimental diffraction pattern (image) stored as a
    numpy array.
    """
    i=0
    out_arr = np.zeros((n_largest * len(library),5))
    for key in library.keys():
        if n_largest:
            pass
        else:
            n_largest=len(library[key])
        correlations = dict()
        for orientation, diffraction_pattern in library[key].items():
            correlation = correlate(image, diffraction_pattern)
            correlations[orientation] = correlation
        res = nlargest(n_largest, correlations.items(), key=itemgetter(1))
        for j in np.arange(n_largest):
            out_arr[j + i*n_largest][0] = i
            out_arr[j + i*n_largest][1] = res[j][0][0]
            out_arr[j + i*n_largest][2] = res[j][0][1]
            out_arr[j + i*n_largest][3] = res[j][0][2]
            out_arr[j + i*n_largest][4] = res[j][1]
        i = i + 1
    return out_arr
项目:respeaker_virtualenv    作者:respeaker    | 项目源码 | 文件源码
def determine_intent(self, utterance, num_results=1):
        """
        Given an utterance, provide a valid intent.

        :param utterance: an ascii or unicode string representing natural language speech

        :param num_results: a maximum number of results to be returned.

        :return: A generator the yields dictionaries.
        """
        intents = []
        for domain in self.domains:
            gen = self.domains[domain].determine_intent(utterance=utterance,
                                                        num_results=1)
            for intent in gen:
                intents.append(intent)

        heapq.nlargest(
            num_results, intents, key=lambda domain: domain['confidence'])
        for intent in intents:
            yield intent
项目:atap    作者:foxbook    | 项目源码 | 文件源码
def nbest_centrality(G, metric, n=10, attr="centrality", **kwargs):
    # Compute the centrality scores for each vertex
    scores = metric(G, **kwargs)

    # Set the score as a property on each node
    nx.set_node_attributes(G, attr, scores)

    # Filter scores (do not include in book)
    ntypes = nx.get_node_attributes(G, 'type')
    phrases = [
        item for item in scores.items()
        if ntypes.get(item[0], None) == "keyphrase"
    ]

    # Find the top n scores and print them along with their index
    topn = heapq.nlargest(n, phrases, key=itemgetter(1))
    for idx, item in enumerate(topn):
        print("{}. {}: {:0.4f}".format(idx+1, *item))

    return G
项目:Chiaki-Nanami    作者:Ikusaba-san    | 项目源码 | 文件源码
def newusers(self, ctx, *, count=5):
        """Tells you the newest members of the server.

        This is useful to check if any suspicious members have joined.

        The minimum is 3 members. If no number is given I'll show the last 5 members.
        """
        human_delta = time.human_timedelta
        count = max(count, 3)
        members = heapq.nlargest(count, ctx.guild.members, key=attrgetter('joined_at'))

        names = map(str, members)
        values = (
            (f'**Joined:** {human_delta(member.joined_at)}\n'
             f'**Created:** {human_delta(member.created_at)}\n{"-" * 40}')
            for member in members
        )
        entries = zip(names, values)

        title = f'The {formats.pluralize(**{"newest members": len(members)})}'
        pages = EmbedFieldPages(ctx, entries, lines_per_page=5, colour=0x00FF00, title=title)
        await pages.interact()
项目:lsdc    作者:febert    | 项目源码 | 文件源码
def largest_export_versions(n):
  """Creates a filter that keeps the largest n export versions.

  Args:
    n: number of versions to keep.

  Returns:
    A filter function that keeps the n largest paths.
  """
  def keep(paths):
    heap = []
    for idx, path in enumerate(paths):
      if path.export_version is not None:
        heapq.heappush(heap, (path.export_version, idx))
    keepers = [paths[i] for _, i in heapq.nlargest(n, heap)]
    return sorted(keepers)

  return keep
项目:lsdc    作者:febert    | 项目源码 | 文件源码
def largest_export_versions(n):
  """Creates a filter that keeps the largest n export versions.

  Args:
    n: number of versions to keep.

  Returns:
    A filter function that keeps the n largest paths.
  """
  def keep(paths):
    heap = []
    for idx, path in enumerate(paths):
      if path.export_version is not None:
        heapq.heappush(heap, (path.export_version, idx))
    keepers = [paths[i] for _, i in heapq.nlargest(n, heap)]
    return sorted(keepers)

  return keep
项目:Surprise    作者:NicolasHug    | 项目源码 | 文件源码
def estimate(self, u, i):

        if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)):
            raise PredictionImpossible('User and/or item is unkown.')

        x, y = self.switch(u, i)

        neighbors = [(self.sim[x, x2], r) for (x2, r) in self.yr[y]]
        k_neighbors = heapq.nlargest(self.k, neighbors, key=lambda t: t[0])

        # compute weighted average
        sum_sim = sum_ratings = actual_k = 0
        for (sim, r) in k_neighbors:
            if sim > 0:
                sum_sim += sim
                sum_ratings += sim * r
                actual_k += 1

        if actual_k < self.min_k:
            raise PredictionImpossible('Not enough neighbors.')

        est = sum_ratings / sum_sim

        details = {'actual_k': actual_k}
        return est, details
项目:AlgorithmsByPython    作者:Jack-Lee-Hiter    | 项目源码 | 文件源码
def GetLeastNumbers(self, tinput, k):
        import heapq
        if tinput == None or len(tinput) < k or len(tinput) <= 0 or k <= 0:
            return []
        output = []
        for number in tinput:
            if len(output) < k:
                output.append(number)
            else:
                # ?????? ???
                # output = heapq.nsmallest(k, output)
                # if number >= output[-1]:
                #     continue
                # else:
                #     output[-1] = number
                # ?????? ??
                output = heapq.nlargest(k, output)
                if number >= output[0]:
                    continue
                else:
                    output[0] = number
        return output[::-1]     # ???? return output
项目:pytesseractID    作者:iChenwin    | 项目源码 | 文件源码
def findIDcnt(countours):
    #????????
    widths = []
    for idx, cnt in enumerate(countours):
        x, y, width, height = cv2.boundingRect(cnt)
        widths.insert(idx, width)

    #???????????
    IDList = heapq.nlargest(3, widths)
    #???????????????????
    IDcnts = []
    for idx, item in enumerate(IDList):
        index = widths.index(item)
        IDcnts.insert(idx, countours[index])
    # print IDcnts

    return IDcnts

# ????
项目:knowledge-graph-keras    作者:eshijia    | 项目源码 | 文件源码
def make_submit(self, model, submit_file):
        data = self.eval_sets().values()[0]
        target_lines = list()
        answers = np.asarray([[idx] for idx in self.entity.keys()])
        for i, d in enumerate(data):
            num_candidate = len(self.entity)
            index_entities = xrange(num_candidate)

            terms = d.split('\t')
            subjects = np.asarray([[terms[0]]] * num_candidate)
            relations = np.asarray([[terms[1]]] * num_candidate)

            sims = model.predict([subjects, relations, answers], batch_size=num_candidate).flatten()
            print(i)
            r = rankdata(sims, method='ordinal')
            index_candidates = nlargest(200, index_entities, key=lambda j: r[j])
            one_line = ' '.join([str(index_candidate) for index_candidate in index_candidates])
            target_lines.append(one_line + '\n')
        submit_file.writelines(target_lines)
项目:knowledge-graph-keras    作者:eshijia    | 项目源码 | 文件源码
def make_submit_rt(self, model, submit_file):
        data = self.eval_sets_rt().values()[0]
        target_lines = list()
        answers = np.asarray([[idx] for idx in self.entity.keys()])
        for i, d in enumerate(data):
            num_candidate = len(self.entity)
            index_entities = xrange(num_candidate)

            terms = d.split('\t')
            relations = np.asarray([[terms[0]]] * num_candidate)
            objects = np.asarray([[terms[1]]] * num_candidate)

            sims = model.predict_rt([answers, relations, objects], batch_size=num_candidate).flatten()
            print(i)
            r = rankdata(sims, method='ordinal')
            index_candidates = nlargest(200, index_entities, key=lambda j: r[j])
            one_line = ' '.join([str(index_candidate) for index_candidate in index_candidates])
            target_lines.append(one_line + '\n')
        submit_file.writelines(target_lines)
项目:pipenv    作者:pypa    | 项目源码 | 文件源码
def extractBests(query, choices, processor=default_processor, scorer=default_scorer, score_cutoff=0, limit=5):
    """Get a list of the best matches to a collection of choices.

    Convenience function for getting the choices with best scores.

    Args:
        query: A string to match against
        choices: A list or dictionary of choices, suitable for use with
            extract().
        processor: Optional function for transforming choices before matching.
            See extract().
        scorer: Scoring function for extract().
        score_cutoff: Optional argument for score threshold. No matches with
            a score less than this number will be returned. Defaults to 0.
        limit: Optional maximum for the number of elements returned. Defaults
            to 5.

    Returns: A a list of (match, score) tuples.
    """

    best_list = extractWithoutOrder(query, choices, processor, scorer, score_cutoff)
    return heapq.nlargest(limit, best_list, key=lambda i: i[1]) if limit is not None else \
        sorted(best_list, key=lambda i: i[1], reverse=True)
项目:pyspark    作者:v-v-vishnevskiy    | 项目源码 | 文件源码
def top(self, num, key=None):
        """
        Get the top N elements from a RDD.

        Note: It returns the list sorted in descending order.

        >>> sc.parallelize([10, 4, 2, 12, 3]).top(1)
        [12]
        >>> sc.parallelize([2, 3, 4, 5, 6], 2).top(2)
        [6, 5]
        >>> sc.parallelize([10, 4, 2, 12, 3]).top(3, key=str)
        [4, 3, 2]
        """
        def topIterator(iterator):
            yield heapq.nlargest(num, iterator, key=key)

        def merge(a, b):
            return heapq.nlargest(num, a + b, key=key)

        return self.mapPartitions(topIterator).reduce(merge)
项目:talktown    作者:james-owen-ryan    | 项目源码 | 文件源码
def _choose_vacant_home_or_vacant_lot(self):
        """Choose a vacant home to move into or a vacant lot to build on.

        Currently, a person scores all the vacant homes/lots in town and then selects
        one of the top three. TODO: Probabilistically select from all homes/lots using the
        scores to derive likelihoods of selecting each.
        """
        home_and_lot_scores = self._rate_all_vacant_homes_and_vacant_lots()
        if len(home_and_lot_scores) >= 3:
            # Pick from top three
            top_three_choices = heapq.nlargest(3, home_and_lot_scores, key=home_and_lot_scores.get)
            if random.random() < 0.6:
                choice = top_three_choices[0]
            elif random.random() < 0.9:
                choice = top_three_choices[1]
            else:
                choice = top_three_choices[2]
        elif home_and_lot_scores:
            choice = list(home_and_lot_scores)[0]
        else:
            choice = None
        return choice
项目:talktown    作者:james-owen-ryan    | 项目源码 | 文件源码
def _init_acquire_currently_occupied_lot(self):
        """If there are no vacant lots in town, acquire a lot and demolish the home currently on it."""
        lot_scores = self._rate_all_occupied_lots()
        if len(lot_scores) >= 3:
            # Pick from top three
            top_three_choices = heapq.nlargest(3, lot_scores, key=lot_scores.get)
            if random.random() < 0.6:
                choice = top_three_choices[0]
            elif random.random() < 0.9:
                choice = top_three_choices[1]
            else:
                choice = top_three_choices[2]
        elif lot_scores:
            choice = max(lot_scores)
        else:
            raise Exception("A company attempted to secure an *occupied* lot in town but somehow could not.")
        return choice
项目:watcher    作者:nosmokingbandit    | 项目源码 | 文件源码
def extractBests(query, choices, processor=None, scorer=None, score_cutoff=0, limit=5):
    """Get a list of the best matches to a collection of choices.

    Convenience function for getting the choices with best scores.

    Args:
        query: A string to match against
        choices: A list or dictionary of choices, suitable for use with
            extract().
        processor: Optional function for transforming choices before matching.
            See extract().
        scorer: Scoring function for extract().
        score_cutoff: Optional argument for score threshold. No matches with
            a score less than this number will be returned. Defaults to 0.
        limit: Optional maximum for the number of elements returned. Defaults
            to 5.

    Returns: A a list of (match, score) tuples.
    """

    best_list = extractWithoutOrder(query, choices, processor, scorer, score_cutoff)
    return heapq.nlargest(limit, best_list, key=lambda i: i[1]) if limit is not None else \
        sorted(best_list, key=lambda i: i[1], reverse=True)
项目:dissertation    作者:james-owen-ryan    | 项目源码 | 文件源码
def _init_acquire_currently_occupied_lot(self):
        """If there are no vacant lots in town, acquire a lot and demolish the home currently on it."""
        lot_scores = self._rate_all_occupied_lots()
        if len(lot_scores) >= 3:
            # Pick from top three
            top_three_choices = heapq.nlargest(3, lot_scores, key=lot_scores.get)
            if random.random() < 0.6:
                choice = top_three_choices[0]
            elif random.random() < 0.9:
                choice = top_three_choices[1]
            else:
                choice = top_three_choices[2]
        elif lot_scores:
            choice = max(lot_scores)
        else:
            raise Exception("A company attempted to secure an *occupied* lot in town but somehow could not.")
        return choice
项目:python-    作者:secondtonone1    | 项目源码 | 文件源码
def most_common(self, n=None):
        '''List the n most common elements and their counts from the most
        common to the least.  If n is None, then list all element counts.

        >>> Counter('abcdeabcdabcaba').most_common(3)
        [('a', 5), ('b', 4), ('c', 3)]

        '''
        # Emulate Bag.sortedByCount from Smalltalk
        if n is None:
            return sorted(self.items(), key=_itemgetter(1), reverse=True)
        return _heapq.nlargest(n, self.items(), key=_itemgetter(1))
项目:kinect-2-libras    作者:inessadl    | 项目源码 | 文件源码
def most_common(self, n=None):
        '''List the n most common elements and their counts from the most
        common to the least.  If n is None, then list all element counts.

        >>> Counter('abcdeabcdabcaba').most_common(3)
        [('a', 5), ('b', 4), ('c', 3)]

        '''
        # Emulate Bag.sortedByCount from Smalltalk
        if n is None:
            return sorted(self.iteritems(), key=_itemgetter(1), reverse=True)
        return _heapq.nlargest(n, self.iteritems(), key=_itemgetter(1))
项目:health-mosconi    作者:GNUHealth-Mosconi    | 项目源码 | 文件源码
def global_search(cls, text, limit, menu='ir.ui.menu'):
        """
        Search on models for text including menu
        Returns a list of tuple (ratio, model, model_name, id, name, icon)
        The size of the list is limited to limit
        """
        pool = Pool()
        ModelAccess = pool.get('ir.model.access')

        if not limit > 0:
            raise ValueError('limit must be > 0: %r' % (limit,))

        models = cls.search(['OR',
                ('global_search_p', '=', True),
                ('model', '=', menu),
                ])
        access = ModelAccess.get_access([m.model for m in models])
        s = StringMatcher()
        if isinstance(text, str):
            text = text.decode('utf-8')
        s.set_seq2(text)

        def generate():
            for model in models:
                if not access[model.model]['read']:
                    continue
                Model = pool.get(model.model)
                if not hasattr(Model, 'search_global'):
                    continue
                for record, name, icon in Model.search_global(text):
                    if isinstance(name, str):
                        name = name.decode('utf-8')
                    s.set_seq1(name)
                    yield (s.ratio(), model.model, model.rec_name,
                        record.id, name, icon)
        return heapq.nlargest(int(limit), generate())
项目:histwords    作者:williamleif    | 项目源码 | 文件源码
def closest(self, w, n=10):
        """
        Assumes the vectors have been normalized.
        """
        scores = self.m.dot(self.represent(w))
        return heapq.nlargest(n, zip(scores, self.iw))
项目:histwords    作者:williamleif    | 项目源码 | 文件源码
def closest_contexts(self, w, n=10):
        scores = self.ec.m.dot(self.ew.represent(w))
        pairs = zip(scores, self.ec.iw)[1:]
        return heapq.nlargest(n, pairs)
项目:histwords    作者:williamleif    | 项目源码 | 文件源码
def closest_contexts(self, w, n=10):
        """
        Assumes the vectors have been normalized.
        """
        scores = self.represent(w)
        return heapq.nlargest(n, zip(scores.data, [self.ic[i] for i in scores.indices]))
项目:histwords    作者:williamleif    | 项目源码 | 文件源码
def closest(self, w, n=10):
        """
        Assumes the vectors have been normalized.
        """
        scores = self.m.dot(self.represent(w).T).T.tocsr()
        return heapq.nlargest(n, zip(scores.data, [self.iw[i] for i in scores.indices]))
项目:histwords    作者:williamleif    | 项目源码 | 文件源码
def closest(self, w, n=10):
        """
        Assumes the vectors have been normalized.
        """
        scores = self.m.dot(self.represent(w))
        return heapq.nlargest(n, zip(scores, self.iw))
项目:histwords    作者:williamleif    | 项目源码 | 文件源码
def closest(self, w, n=10):
        """
        Assumes the vectors have been normalized.
        """
        if self.oov(w):
            return []
        scores = self.m.dot(self.represent(w).T).T.tocsr()
        return heapq.nlargest(n, zip(scores.data, [self.iw[i] for i in scores.indices]))
项目:histwords    作者:williamleif    | 项目源码 | 文件源码
def closest_first_order(self, w, n=10):
        if self.oov(w):
            return []
        scores = self.m[self.wi[w], :]
        return heapq.nlargest(n, zip(scores.data, [self.iw[i] for i in scores.indices]))
项目:hostapd-mana    作者:adde88    | 项目源码 | 文件源码
def most_common(self, n=None):
        '''List the n most common elements and their counts from the most
        common to the least.  If n is None, then list all element counts.

        >>> Counter('abcdeabcdabcaba').most_common(3)
        [('a', 5), ('b', 4), ('c', 3)]

        '''
        # Emulate Bag.sortedByCount from Smalltalk
        if n is None:
            return sorted(self.iteritems(), key=_itemgetter(1), reverse=True)
        return _heapq.nlargest(n, self.iteritems(), key=_itemgetter(1))
项目:nstock    作者:ybenitezf    | 项目源码 | 文件源码
def most_frequent_terms(self, fieldname, number=5, prefix=''):
        """Returns the top 'number' most frequent terms in the given field as a
        list of (frequency, text) tuples.
        """

        gen = ((terminfo.weight(), text) for text, terminfo
               in self.iter_prefix(fieldname, prefix))
        return nlargest(number, gen)
项目:nstock    作者:ybenitezf    | 项目源码 | 文件源码
def most_distinctive_terms(self, fieldname, number=5, prefix=''):
        """Returns the top 'number' terms with the highest `tf*idf` scores as
        a list of (score, text) tuples.
        """

        N = float(self.doc_count())
        gen = ((terminfo.weight() * log(N / terminfo.doc_frequency()), text)
               for text, terminfo in self.iter_prefix(fieldname, prefix))
        return nlargest(number, gen)
项目:nstock    作者:ybenitezf    | 项目源码 | 文件源码
def top_fragments(fragments, count, scorer, order, minscore=1):
    scored_fragments = ((scorer(f), f) for f in fragments)
    scored_fragments = nlargest(count, scored_fragments)
    best_fragments = [sf for score, sf in scored_fragments if score >= minscore]
    best_fragments.sort(key=order)
    return best_fragments
项目:ltls    作者:kjasinska    | 项目源码 | 文件源码
def create_ranking3(edge_weight, k, adj, num):
    sink = len(adj)
    EMPTY = -2
    ROOT = -1
    MIN_LENGTH = float('-inf')
    # heaps = [[(0, EMPTY, 0) for j in range(k)] for i in xrange(sink + 1)]
    heaps = [[(MIN_LENGTH, EMPTY, 0) for j in range(k + 1)] for i in xrange(sink + 1)]
    heaps[0][0] = (0, ROOT, 0)
    # forward
    for current in xrange(sink):
        new_rank = 0
        for length, parent, rank in heaps[current]:
            if parent != EMPTY:
                for child in adj[current]:
                    ew = edge_weight[0, num[(current, child)]]
                    new_length = length + ew
                    # heapq.heapreplace(heaps[child], (new_length, current, new_rank))
                    heapq.heappush(heaps[child], (new_length, current, new_rank))
                    heaps[child] = heapq.nlargest(k, heaps[child])
            new_rank += 1
    # backward
    ranking = []
    for rank in xrange(k):
        path = []
        current = sink
        current_rank = rank
        while current != ROOT:
            path.append(current)
            _, current, current_rank = heaps[current][current_rank]
        length, _, _ = heaps[sink][rank]
        path = list(reversed(path))
        path = tuple(zip(path[:-1], path[1:]))
        ranking.append((length, path))
    return ranking
项目:RSeQC    作者:MonashBioinformaticsPlatform    | 项目源码 | 文件源码
def S_diff(lst):
    '''Given a list of int or float, calculate S_diff and S_point'''

    S_avg = sum(lst) / len(lst)
    S_dist = [i-S_avg for i in lst] #distance to average
    S_cum=[]    #list of cumulative sum
    S_cum.append(0)
    for i in range(0,len(S_dist)):
        S_cum.append(S_cum[i] + S_dist[i])
    return [nlargest(1,range(0,len(S_cum)),key=lambda i: S_cum[i]),(max(S_cum) - min(S_cum))]
    #return the index of maximum_diff index, and maximum_diff
项目:wort2vek    作者:HPI-DeepLearning    | 项目源码 | 文件源码
def similar_to_vec(self, v, N=10):
      sims = self._vecs.dot(v)
      sims = heapq.nlargest(N, zip(sims,self._vocab,self._vecs))
      return sims
项目:wort2vek    作者:HPI-DeepLearning    | 项目源码 | 文件源码
def most_similar(self, word, N=10):
      w = self._vocab.index(word)
      sims = self._vecs.dot(self._vecs[w])
      sims = heapq.nlargest(N, zip(sims,self._vocab))
      return sims
项目:wort2vek    作者:HPI-DeepLearning    | 项目源码 | 文件源码
def analogy(self, pos1, neg1, pos2,N=10,mult=True):
      wvecs, vocab = self._vecs, self._vocab
      p1 = vocab.index(pos1)
      p2 = vocab.index(pos2)
      n1 = vocab.index(neg1)
      if mult:
         p1,p2,n1 = [(1+wvecs.dot(wvecs[i]))/2 for i in (p1,p2,n1)]
         if N == 1:
            return max(((v,w) for v,w in izip((p1 * p2 / n1),vocab) if w not in [pos1,pos2,neg1]))
         return heapq.nlargest(N,((v,w) for v,w in izip((p1 * p2 / n1),vocab) if w not in [pos1,pos2,neg1]))
      else:
         p1,p2,n1 = [(wvecs.dot(wvecs[i])) for i in (p1,p2,n1)]
         if N == 1:
            return max(((v,w) for v,w in izip((p1 + p2 - n1),vocab) if w not in [pos1,pos2,neg1]))
         return heapq.nlargest(N,((v,w) for v,w in izip((p1 + p2 - n1),vocab) if w not in [pos1,pos2,neg1]))
项目:ivaochdoc    作者:ivaoch    | 项目源码 | 文件源码
def most_common(self, n=None):
        '''List the n most common elements and their counts from the most
        common to the least.  If n is None, then list all element counts.

        >>> Counter('abcdeabcdabcaba').most_common(3)
        [('a', 5), ('b', 4), ('c', 3)]

        '''
        # Emulate Bag.sortedByCount from Smalltalk
        if n is None:
            return sorted(self.items(), key=_itemgetter(1), reverse=True)
        return _heapq.nlargest(n, self.items(), key=_itemgetter(1))
项目:crypto-forcast    作者:7yl4r    | 项目源码 | 文件源码
def run(self):
        top_10 = nlargest(10, self._input_iterator())
        with self.output().open('w') as out_file:
            for streams, artist in top_10:
                out_line = '\t'.join([
                    str(self.date_interval.date_a),
                    str(self.date_interval.date_b),
                    artist,
                    str(streams)
                ])
                out_file.write((out_line + '\n'))
项目:Intranet-Penetration    作者:yuxiaokui    | 项目源码 | 文件源码
def most_common(self, n=None):
        '''List the n most common elements and their counts from the most
        common to the least.  If n is None, then list all element counts.

        >>> Counter('abcdeabcdabcaba').most_common(3)
        [('a', 5), ('b', 4), ('c', 3)]

        '''
        # Emulate Bag.sortedByCount from Smalltalk
        if n is None:
            return sorted(self.iteritems(), key=_itemgetter(1), reverse=True)
        return _heapq.nlargest(n, self.iteritems(), key=_itemgetter(1))
项目:MKFQ    作者:maojingios    | 项目源码 | 文件源码
def most_common(self, n=None):
        '''List the n most common elements and their counts from the most
        common to the least.  If n is None, then list all element counts.

        >>> Counter('abcdeabcdabcaba').most_common(3)
        [('a', 5), ('b', 4), ('c', 3)]

        '''
        # Emulate Bag.sortedByCount from Smalltalk
        if n is None:
            return sorted(self.iteritems(), key=_itemgetter(1), reverse=True)
        return _heapq.nlargest(n, self.iteritems(), key=_itemgetter(1))
项目:hakkuframework    作者:4shadoww    | 项目源码 | 文件源码
def most_common(self, n=None):
        '''List the n most common elements and their counts from the most
        common to the least.  If n is None, then list all element counts.

        >>> Counter('abcdeabcdabcaba').most_common(3)
        [('a', 5), ('b', 4), ('c', 3)]

        '''
        # Emulate Bag.sortedByCount from Smalltalk
        if n is None:
            return sorted(self.items(), key=_itemgetter(1), reverse=True)
        return _heapq.nlargest(n, self.items(), key=_itemgetter(1))
项目:lexsub    作者:orenmel    | 项目源码 | 文件源码
def vec_to_str(subvec, max_n):

    sub_list_sorted = heapq.nlargest(max_n, subvec, key=lambda x: x[1])
    sub_strs = [' '.join([word, wf2ws(weight)]) for word, weight in sub_list_sorted]
    return '\t'.join(sub_strs)