Python difflib 模块,SequenceMatcher() 实例源码


项目    作者:corpnewt    | 项目源码 | 文件源码
def search(searchTerm, list, keyName : str = None, numMatches : int = 3):
    """Searches the provided list for the searchTerm - using a keyName if provided for dicts."""
    if len(list) < 1:
        return None
    # Iterate through the list and create a list of items
    searchList = []
    for item in list:
        if keyName:
            testName = item[keyName]
            testName = item
        matchRatio = difflib.SequenceMatcher(None, searchTerm.lower(), testName.lower()).ratio()
        # matchRatio = Levenshtein.ratio(searchTerm.lower(), testName.lower())
        searchList.append({ 'Item' : item, 'Ratio' : matchRatio })
    # sort the servers by population
    searchList = sorted(searchList, key=lambda x:x['Ratio'], reverse=True)
    if numMatches > len(searchList):
        # Less than three - let's just give what we've got
        numMatches = len(searchList)
    return searchList[:numMatches]
项目:CodingDojo    作者:ComputerSocietyUNB    | 项目源码 | 文件源码
def validate(self, password, user=None):
        if not user:

        for attribute_name in self.user_attributes:
            value = getattr(user, attribute_name, None)
            if not value or not isinstance(value, string_types):
            value_parts = re.split('\W+', value) + [value]
            for value_part in value_parts:
                if SequenceMatcher(a=password.lower(), b=value_part.lower()).quick_ratio() > self.max_similarity:
                    verbose_name = force_text(user._meta.get_field(attribute_name).verbose_name)
                    raise ValidationError(
                        _("The password is too similar to the %(verbose_name)s."),
                        params={'verbose_name': verbose_name},
项目:NarshaTech    作者:KimJangHyeon    | 项目源码 | 文件源码
def validate(self, password, user=None):
        if not user:

        for attribute_name in self.user_attributes:
            value = getattr(user, attribute_name, None)
            if not value or not isinstance(value, string_types):
            value_parts = re.split('\W+', value) + [value]
            for value_part in value_parts:
                if SequenceMatcher(a=password.lower(), b=value_part.lower()).quick_ratio() > self.max_similarity:
                    verbose_name = force_text(user._meta.get_field(attribute_name).verbose_name)
                    raise ValidationError(
                        _("The password is too similar to the %(verbose_name)s."),
                        params={'verbose_name': verbose_name},
项目:scm-workbench    作者:barry-scott    | 项目源码 | 文件源码
def filecompare( self, filename_left, filename_right ):
        if type(filename_left) == type([]):
            lines_left = filename_left
                lines_left = wb_read_file.readFileContentsAsUnicode( filename_left ).split('\n')

            except IOError as e:
                print( 'Error opening %s\n%s' % (filename_left, e) )
                return 0

        if type(filename_right) == type([]):
            lines_right = filename_right
                lines_right = wb_read_file.readFileContentsAsUnicode( filename_right ).split('\n')

            except IOError as e:
                print( 'Error opening %s\n%s' % (filename_right, e) )
                return 0

        lines_left = [eolRemoval( line ) for line in lines_left]
        lines_right = [eolRemoval( line ) for line in lines_right]

        matcher = difflib.SequenceMatcher( isLineJunk, lines_left, lines_right )
        for tag, left_lo, left_hi, right_lo, right_hi in matcher.get_opcodes():
            if tag == 'replace':
                self.fancy_replace( lines_left, left_lo, left_hi, lines_right, right_lo, right_hi )
            elif tag == 'delete':
                self.dump( self.text_body.addDeletedLine, lines_left, left_lo, left_hi )
            elif tag == 'insert':
                self.dump( self.text_body.addInsertedLine, lines_right, right_lo, right_hi )
            elif tag == 'equal':
                self.dump( self.text_body.addNormalLine, lines_left, left_lo, left_hi )
                raise ValueError( 'unknown tag ' + str( tag ) )

        return 1

# need to strip any \n or \r thats on the end of the line
项目:Scrum    作者:prakharchoudhary    | 项目源码 | 文件源码
def validate(self, password, user=None):
        if not user:

        for attribute_name in self.user_attributes:
            value = getattr(user, attribute_name, None)
            if not value or not isinstance(value, string_types):
            value_parts = re.split(r'\W+', value) + [value]
            for value_part in value_parts:
                if SequenceMatcher(a=password.lower(), b=value_part.lower()).quick_ratio() >= self.max_similarity:
                        verbose_name = force_text(user._meta.get_field(attribute_name).verbose_name)
                    except FieldDoesNotExist:
                        verbose_name = attribute_name
                    raise ValidationError(
                        _("The password is too similar to the %(verbose_name)s."),
                        params={'verbose_name': verbose_name},
项目:django    作者:alexsukhrin    | 项目源码 | 文件源码
def validate(self, password, user=None):
        if not user:

        for attribute_name in self.user_attributes:
            value = getattr(user, attribute_name, None)
            if not value or not isinstance(value, string_types):
            value_parts = re.split(r'\W+', value) + [value]
            for value_part in value_parts:
                if SequenceMatcher(a=password.lower(), b=value_part.lower()).quick_ratio() >= self.max_similarity:
                        verbose_name = force_text(user._meta.get_field(attribute_name).verbose_name)
                    except FieldDoesNotExist:
                        verbose_name = attribute_name
                    raise ValidationError(
                        _("The password is too similar to the %(verbose_name)s."),
                        params={'verbose_name': verbose_name},
项目:Taigabot    作者:FrozenPigs    | 项目源码 | 文件源码
def get_matching_blocks(self):
        size = min(len(self.b), len(self.b))
        threshold = min(self.threshold, size / 4)
        actual = difflib.SequenceMatcher.get_matching_blocks(self)
        return [item for item in actual
                if item[2] > threshold
                or not item[2]]
项目:pandachaika    作者:pandabuilder    | 项目源码 | 文件源码
def get_scored_matches(word: str, possibilities: List[str], n: int=3, cutoff: float=0.6) -> List[Tuple[float, str]]:
    if not n > 0:
        raise ValueError("n must be > 0: %r" % (n,))
    if not (0.0 <= cutoff <= 1.0):
        raise ValueError("cutoff must be in [0.0, 1.0]: %r" % (cutoff,))
    result = []
    s: SequenceMatcher = SequenceMatcher()
    for x in possibilities:
        if s.real_quick_ratio() >= cutoff and s.quick_ratio() >= cutoff and s.ratio() >= cutoff:
            result.append((s.ratio(), x))

    # Move the best scorers to head of list
    result = heapq.nlargest(n, result)
    # Strip scores for the best n matches
    return result
项目:aurora    作者:carnby    | 项目源码 | 文件源码
def build_token_counts(characterizer, texts):
    tokenizer = Tokenizer(characterizer=characterizer)
    tokenizer.train([t['text'] for t in texts])

    token_counts = Counter()
    seq_matcher = difflib.SequenceMatcher()

    for t in texts:
        t['tokens'] = tokenizer.tokenize(t['text'])
        if not t['tokens']:

        if 'urls' in t['entities'] and t['entities']['urls']:
            #TODO: replace those urls instead of adding them
            for url in t['entities']['urls']:

        if t['__is_rt__']:


    return token_counts
项目:Gypsy    作者:benticarlos    | 项目源码 | 文件源码
def validate(self, password, user=None):
        if not user:

        for attribute_name in self.user_attributes:
            value = getattr(user, attribute_name, None)
            if not value or not isinstance(value, string_types):
            value_parts = re.split('\W+', value) + [value]
            for value_part in value_parts:
                if SequenceMatcher(a=password.lower(), b=value_part.lower()).quick_ratio() > self.max_similarity:
                    verbose_name = force_text(user._meta.get_field(attribute_name).verbose_name)
                    raise ValidationError(
                        _("The password is too similar to the %(verbose_name)s."),
                        params={'verbose_name': verbose_name},
项目:wanblog    作者:wanzifa    | 项目源码 | 文件源码
def validate(self, password, user=None):
        if not user:

        for attribute_name in self.user_attributes:
            value = getattr(user, attribute_name, None)
            if not value or not isinstance(value, string_types):
            value_parts = re.split('\W+', value) + [value]
            for value_part in value_parts:
                if SequenceMatcher(a=password.lower(), b=value_part.lower()).quick_ratio() > self.max_similarity:
                    verbose_name = force_text(user._meta.get_field(attribute_name).verbose_name)
                    raise ValidationError(
                        _("The password is too similar to the %(verbose_name)s."),
                        params={'verbose_name': verbose_name},
项目:orizonhub    作者:gumblex    | 项目源码 | 文件源码
def sededit(a, b, context=0):
    Take two strings and output a sed-like diff
    if a == b:
        return ''
    a_len = len(a)
    b_len = len(b)
    start1, end1, start2, end2 = a_len, 0, b_len, 0
    s = difflib.SequenceMatcher(None, a, b)
    for tag, i1, i2, j1, j2 in s.get_opcodes():
        if tag == 'equal':
        elif tag == 'insert':
            ins = 1
            ins = 0
        start1 = max(min(i1-context-ins, start1), 0)
        start2 = max(min(j1-context-ins, start2), 0)
        end1 = min(max(i2+context+ins, end1), a_len)
        end2 = min(max(j2+context+ins, end2), b_len)
    return 's/%s%s%s/%s/' % (
            ('' if start1 else '^'), a[start1:end1],
            ('$' if end1 == a_len else ''), b[start2:end2])
项目:autobib    作者:jdumas    | 项目源码 | 文件源码
def pick_best(title, item1, item2):
    Pick best record among two items with identical scores.
    def compare(x):
        return difflib.SequenceMatcher(None, title.lower(), x.lower()).ratio()
    if not item1['title']:
        return item2
    elif not item2['title']:
        return item2
    r1 = compare(item1['title'][0])
    r2 = compare(item2['title'][0])
    if r1 > r2:
        return item1
    elif r2 > r1:
        return item2
        # Try to find other discriminating criteria... e.g. prefer journal-articles
        if score_type(item1["type"]) > score_type(item2["type"]):
            return item1
            return item2
项目:tabmaster    作者:NicolasMinghetti    | 项目源码 | 文件源码
def validate(self, password, user=None):
        if not user:

        for attribute_name in self.user_attributes:
            value = getattr(user, attribute_name, None)
            if not value or not isinstance(value, string_types):
            value_parts = re.split('\W+', value) + [value]
            for value_part in value_parts:
                if SequenceMatcher(a=password.lower(), b=value_part.lower()).quick_ratio() > self.max_similarity:
                    verbose_name = force_text(user._meta.get_field(attribute_name).verbose_name)
                    raise ValidationError(
                        _("The password is too similar to the %(verbose_name)s."),
                        params={'verbose_name': verbose_name},
项目:text-matcher    作者:JonathanReeve    | 项目源码 | 文件源码
def get_initial_matches(self):
        This does the main work of finding matching n-gram sequences between
        the texts.
        sequence = SequenceMatcher(None,self.textAgrams,self.textBgrams)
        matchingBlocks = sequence.get_matching_blocks()

        # Only return the matching sequences that are higher than the
        # threshold given by the user.
        highMatchingBlocks = [match for match in matchingBlocks if match.size > self.threshold]

        numBlocks = len(highMatchingBlocks)

        if numBlocks > 0:
            print('%s total matches found.' % numBlocks, flush=True)

        return highMatchingBlocks
项目:adversarial-squad    作者:robinjia    | 项目源码 | 文件源码
def render_diff(old_text, new_text):
  print (old_text, old_text.__class__)
  print (new_text, new_text.__class__)
  sm = difflib.SequenceMatcher(a=old_text, b=new_text)
  out_toks = []
  for opcode, s1, e1, s2, e2 in sm.get_opcodes():
    if opcode == 'equal':
    elif opcode == 'insert':
      out_toks.append('<span class="insert">' + new_text[s2:e2] + '</span>')
    elif opcode == 'delete':
      out_toks.append('<span class="delete">' + old_text[s1:e1] + '</span>')
    elif opcode == 'replace':
      out_toks.append('<span class="delete">' + old_text[s1:e1] + '</span>')
      out_toks.append('<span class="insert">' + new_text[s2:e2] + '</span>')
  return ''.join(out_toks)
项目:ims    作者:ims-team    | 项目源码 | 文件源码
def validate(self, password, user=None):
        if not user:

        for attribute_name in self.user_attributes:
            value = getattr(user, attribute_name, None)
            if not value or not isinstance(value, string_types):
            value_parts = re.split('\W+', value) + [value]
            for value_part in value_parts:
                if SequenceMatcher(a=password.lower(), b=value_part.lower()).quick_ratio() > self.max_similarity:
                    verbose_name = force_text(user._meta.get_field(attribute_name).verbose_name)
                    raise ValidationError(
                        _("The password is too similar to the %(verbose_name)s."),
                        params={'verbose_name': verbose_name},
项目:lifesoundtrack    作者:MTG    | 项目源码 | 文件源码
def validate(self, password, user=None):
        if not user:

        for attribute_name in self.user_attributes:
            value = getattr(user, attribute_name, None)
            if not value or not isinstance(value, string_types):
            value_parts = re.split(r'\W+', value) + [value]
            for value_part in value_parts:
                if SequenceMatcher(a=password.lower(), b=value_part.lower()).quick_ratio() > self.max_similarity:
                        verbose_name = force_text(user._meta.get_field(attribute_name).verbose_name)
                    except FieldDoesNotExist:
                        verbose_name = attribute_name
                    raise ValidationError(
                        _("The password is too similar to the %(verbose_name)s."),
                        params={'verbose_name': verbose_name},
项目:onreview    作者:ichi404gh    | 项目源码 | 文件源码
def get_diff_lines(self):
        import difflib

        postdiffs = list()
        commentdiffs = list()

        s = difflib.SequenceMatcher(lambda x: x.isspace(),, self.code)
        for o in s.get_opcodes():
            if o[0] in ('replace','delete'):
                postdiffs.append(('mod', s.a[o[1]:o[2]]))
            if o[0] in ('replace','insert'):
                commentdiffs.append(('mod', s.b[o[3]:o[4]]))
            if o[0] == 'equal':
                postdiffs.append(('eq', s.a[o[1]:o[2]]))
                commentdiffs.append(('eq', s.b[o[3]:o[4]]))

        return (postdiffs,commentdiffs)
项目:script.module.metadatautils    作者:marcelveldt    | 项目源码 | 文件源码
def get_custom_path(self, searchtitle, title):
        '''locate custom folder on disk as pvrart location'''
        title_path = ""
        custom_path = self._mutils.addon.getSetting("pvr_art_custom_path")
        if custom_path and self._mutils.addon.getSetting("pvr_art_custom") == "true":
            delim = "\\" if "\\" in custom_path else "/"
            dirs = xbmcvfs.listdir(custom_path)[0]
            for strictness in [1, 0.95, 0.9, 0.8]:
                if title_path:
                for directory in dirs:
                    if title_path:
                    directory = directory.decode("utf-8")
                    curpath = os.path.join(custom_path, directory) + delim
                    for item in [title, searchtitle]:
                        match = SM(None, item, directory).ratio()
                        if match >= strictness:
                            title_path = curpath
            if not title_path and self._mutils.addon.getSetting("pvr_art_download") == "true":
                title_path = os.path.join(custom_path, normalize_string(title)) + delim
        return title_path
项目:django-open-lecture    作者:DmLitov4    | 项目源码 | 文件源码
def validate(self, password, user=None):
        if not user:

        for attribute_name in self.user_attributes:
            value = getattr(user, attribute_name, None)
            if not value or not isinstance(value, string_types):
            value_parts = re.split('\W+', value) + [value]
            for value_part in value_parts:
                if SequenceMatcher(a=password.lower(), b=value_part.lower()).quick_ratio() > self.max_similarity:
                    verbose_name = force_text(user._meta.get_field(attribute_name).verbose_name)
                    raise ValidationError(
                        _("The password is too similar to the %(verbose_name)s."),
                        params={'verbose_name': verbose_name},
项目    作者:orione7    | 项目源码 | 文件源码
def ratio(s1, s2):

    if s1 is None:
        raise TypeError("s1 is None")
    if s2 is None:
        raise TypeError("s2 is None")
    s1, s2 = utils.make_type_consistent(s1, s2)

    if len(s1) == 0 or len(s2) == 0:
        return 0

    m = SequenceMatcher(None, s1, s2)
    return utils.intr(100 * m.ratio())

# todo: skip duplicate indexes for a little more speed
项目:ComicReader.bundle    作者:coryo    | 项目源码 | 文件源码
def P_update_tree(self, user, archive_path):  # private, plex can't use _var
        """update the cache of the dir read state for everything between cb_path and archive_path."""
        Log.Debug('updating tree {}'.format(archive_path))
        base = Prefs['cb_path']
        x = difflib.SequenceMatcher(a=base, b=archive_path)
        for tag, i1, i2, j1, j2 in x.get_opcodes():
            if tag == 'insert':
                    diff = os.path.split(archive_path[j1:j2])[0]
                    d = diff.replace('\\', '/').split('/')[1]
                    path = os.path.join(base, d)
                    Log.Debug('archive root: {}'.format(path))
                    if os.path.abspath(base) == os.path.abspath(path):
                        Log.Debug('item is in root dir. skipping.')
                        state = self.dir_read_state(user, path, True)
                except Exception as e:
                    Log.Error('P_update_tree {}'.format(e))
项目:travlr    作者:gauravkulkarni96    | 项目源码 | 文件源码
def validate(self, password, user=None):
        if not user:

        for attribute_name in self.user_attributes:
            value = getattr(user, attribute_name, None)
            if not value or not isinstance(value, string_types):
            value_parts = re.split('\W+', value) + [value]
            for value_part in value_parts:
                if SequenceMatcher(a=password.lower(), b=value_part.lower()).quick_ratio() > self.max_similarity:
                    verbose_name = force_text(user._meta.get_field(attribute_name).verbose_name)
                    raise ValidationError(
                        _("The password is too similar to the %(verbose_name)s."),
                        params={'verbose_name': verbose_name},
项目:Eagle    作者:magerx    | 项目源码 | 文件源码
def reset(self):
        Resets thread data model

        self.disableStdOut = False
        self.hashDBCursor = None
        self.inTransaction = False
        self.lastComparisonPage = None
        self.lastComparisonHeaders = None
        self.lastErrorPage = None
        self.lastHTTPError = None
        self.lastRedirectMsg = None
        self.lastQueryDuration = 0
        self.lastRequestMsg = None
        self.lastRequestUID = 0
        self.lastRedirectURL = None
        self.resumed = False
        self.retriesCount = 0
        self.seqMatcher = difflib.SequenceMatcher(None)
        self.shared = shared
        self.valueStack = []
项目:algo-trading-pipeline    作者:NeuralKnot    | 项目源码 | 文件源码
def get_relevant_entities(self, google_cloud_entities, target_entities, target_wikipedia_urls):
        entities_to_return = []
        target_wikipedia_urls_lower = [target_wikipedia_url.lower() for target_wikipedia_url in target_wikipedia_urls]

        for google_cloud_entity in google_cloud_entities:
            # Look at Wikipedia URLs
            if google_cloud_entity.wikipedia_url and google_cloud_entity.wikipedia_url.lower() in target_wikipedia_urls_lower:

            # Look at names
            a =" ")
            for target_entity in target_entities:
                b = target_entity.lower().split(" ")

                if google_cloud_entity in entities_to_return:

                for google_cloud_entity_part in a:
                    for target_entity_part in b:
                        ratio = SequenceMatcher(None, google_cloud_entity_part, target_entity_part).ratio()

                        if ratio > 0.7:

                    if google_cloud_entity in entities_to_return:

        return entities_to_return
项目:Flask_Blog    作者:sugarguo    | 项目源码 | 文件源码
def closest_rule(self, adapter):
        def score_rule(rule):
            return sum([
                0.98 * difflib.SequenceMatcher(
                    None, rule.endpoint, self.endpoint
                0.01 * bool(set(self.values or ()).issubset(rule.arguments)),
                0.01 * bool(rule.methods and self.method in rule.methods)

        if adapter and
            return max(, key=score_rule)
            return None
项目:plexMusicPlayer    作者:Tyzer34    | 项目源码 | 文件源码
def similar(a, b):
    return SequenceMatcher(None, a, b).ratio()
项目:swjtu-pyscraper    作者:Desgard    | 项目源码 | 文件源码
def closest_rule(self, adapter):
        def score_rule(rule):
            return sum([
                0.98 * difflib.SequenceMatcher(
                    None, rule.endpoint, self.endpoint
                0.01 * bool(set(self.values or ()).issubset(rule.arguments)),
                0.01 * bool(rule.methods and self.method in rule.methods)

        if adapter and
            return max(, key=score_rule)
            return None
项目:health-mosconi    作者:GNUHealth-Mosconi    | 项目源码 | 文件源码
def global_search(cls, text, limit, menu=''):
        Search on models for text including menu
        Returns a list of tuple (ratio, model, model_name, id, name, icon)
        The size of the list is limited to limit
        pool = Pool()
        ModelAccess = pool.get('ir.model.access')

        if not limit > 0:
            raise ValueError('limit must be > 0: %r' % (limit,))

        models =['OR',
                ('global_search_p', '=', True),
                ('model', '=', menu),
        access = ModelAccess.get_access([m.model for m in models])
        s = StringMatcher()
        if isinstance(text, str):
            text = text.decode('utf-8')

        def generate():
            for model in models:
                if not access[model.model]['read']:
                Model = pool.get(model.model)
                if not hasattr(Model, 'search_global'):
                for record, name, icon in Model.search_global(text):
                    if isinstance(name, str):
                        name = name.decode('utf-8')
                    yield (s.ratio(), model.model, model.rec_name,
              , name, icon)
        return heapq.nlargest(int(limit), generate())
项目:zanph    作者:zanph    | 项目源码 | 文件源码
def closest_rule(self, adapter):
        def score_rule(rule):
            return sum([
                0.98 * difflib.SequenceMatcher(
                    None, rule.endpoint, self.endpoint
                0.01 * bool(set(self.values or ()).issubset(rule.arguments)),
                0.01 * bool(rule.methods and self.method in rule.methods)

        if adapter and
            return max(, key=score_rule)
            return None
项目:release-script    作者:mitodl    | 项目源码 | 文件源码
def match_user(slack_users, author_name, threshold=0.6):
    Do a fuzzy match of author name to full name. If it matches, return a formatted Slack handle. Else return original
    full name.

        slack_users (list of dict): A list of slack users from their API
        author_name (str): The commit author's full name
        threshold (float): All matches must be at least this high to pass.

        str: The slack markup for the handle of that author.
             If one can't be found, the author's name is returned unaltered.

    lower_author_name = reformatted_full_name(author_name)

    def match_for_user(slack_user):
        """Get match ratio for slack user, or 0 if below threshold"""
        lower_name = reformatted_full_name(slack_user['profile']['real_name'])
        ratio = SequenceMatcher(a=lower_author_name, b=lower_name).ratio()
        if ratio >= threshold:
            return ratio
            return 0

    slack_matches = [(slack_user, match_for_user(slack_user)) for slack_user in slack_users]
    slack_matches = [(slack_user, match) for (slack_user, match) in slack_matches if match >= threshold]

    if len(slack_matches) > 0:
        matched_user = max(slack_matches, key=lambda pair: pair[1])[0]
        return "<@{id}>".format(id=matched_user['id'])
        return author_name
项目:zing    作者:evernote    | 项目源码 | 文件源码
def opcodes(self):
        sm = difflib.SequenceMatcher(None,
        return sm.get_opcodes()
项目:Sci-Finder    作者:snverse    | 项目源码 | 文件源码
def closest_rule(self, adapter):
        def _score_rule(rule):
            return sum([
                0.98 * difflib.SequenceMatcher(
                    None, rule.endpoint, self.endpoint
                0.01 * bool(set(self.values or ()).issubset(rule.arguments)),
                0.01 * bool(rule.methods and self.method in rule.methods)

        if adapter and
            return max(, key=_score_rule)
项目:Sci-Finder    作者:snverse    | 项目源码 | 文件源码
def closest_rule(self, adapter):
        def _score_rule(rule):
            return sum([
                0.98 * difflib.SequenceMatcher(
                    None, rule.endpoint, self.endpoint
                0.01 * bool(set(self.values or ()).issubset(rule.arguments)),
                0.01 * bool(rule.methods and self.method in rule.methods)

        if adapter and
            return max(, key=_score_rule)
项目:harbour-sailfinder    作者:DylanVanAssche    | 项目源码 | 文件源码
def closest_rule(self, adapter):
        def score_rule(rule):
            return sum([
                0.98 * difflib.SequenceMatcher(
                    None, rule.endpoint, self.endpoint
                0.01 * bool(set(self.values or ()).issubset(rule.arguments)),
                0.01 * bool(rule.methods and self.method in rule.methods)

        if adapter and
            return max(, key=score_rule)
            return None
项目:harbour-sailfinder    作者:DylanVanAssche    | 项目源码 | 文件源码
def closest_rule(self, adapter):
        def score_rule(rule):
            return sum([
                0.98 * difflib.SequenceMatcher(
                    None, rule.endpoint, self.endpoint
                0.01 * bool(set(self.values or ()).issubset(rule.arguments)),
                0.01 * bool(rule.methods and self.method in rule.methods)

        if adapter and
            return max(, key=score_rule)
            return None
项目:Texty    作者:sarthfrey    | 项目源码 | 文件源码
def closest_rule(self, adapter):
        def score_rule(rule):
            return sum([
                0.98 * difflib.SequenceMatcher(
                    None, rule.endpoint, self.endpoint
                0.01 * bool(set(self.values or ()).issubset(rule.arguments)),
                0.01 * bool(rule.methods and self.method in rule.methods)

        if adapter and
            return max(, key=score_rule)
            return None
项目:epuap-watchdog    作者:ad-m    | 项目源码 | 文件源码
def diff_text(a, b):
    s = SequenceMatcher(None, a, b)
    opcode = {'replace': lambda i1, i2, j1, j2: "<strike>%s</strike><strong>%s</strong>" % (a[i1:i2], b[j1:j2]),
              'delete': lambda i1, i2, j1, j2: "<strike>%s</strike>" % (a[i1:i2], ),
              'insert': lambda i1, i2, j1, j2: "<strong>%s</strong>" % (b[j1:j2], ),
              'equal': lambda i1, i2, j1, j2: a[i1:i2]}
    return safe("".join(opcode[tag](*args) for tag, *args in s.get_opcodes()))
项目:OPMLtoMM    作者:adxsoft    | 项目源码 | 文件源码
def print_diffs(expected,actual):
    s = SequenceMatcher(None,a,b)
    print '\n'
    for block in s.get_matching_blocks():
        # print "a[%d] and b[%d] match for %d elements" % block
        print '\nACTUAL has matching Error at '+str(aendpos)
        print 'Expected ='+expected[bendpos:bendpos+100]+'\nFound    ='+actual[aendpos:aendpos+100]
        print 'Matched values from 0 to '+str(aendpos-1)+' are'
        print ' EXPECTED='+bchunk
        print ' ACTUAL  ='+achunk
        print ''
        if ctr==0:

## Unit Tests - OPML to MM conversions
# These tests are designed to run in the local project folder opmltomm
项目:rapier    作者:apigee-labs    | 项目源码 | 文件源码
def similar(self, a, b):
        return SequenceMatcher(None, a, b).ratio() > self.similarity_ratio
项目:tsubasa-reddit-bot    作者:ArmandSyah    | 项目源码 | 文件源码
def similar(a, b):
    return SequenceMatcher(None, a, b).ratio()
项目:bpy_lambda    作者:bcongdon    | 项目源码 | 文件源码
def get_best_similar(data):
    import difflib
    key, use_similar, similar_pool = data

    # try to find some close key in existing messages...
    # Optimized code inspired by difflib.get_close_matches (as we only need the best match).
    # We also consider to never make a match when len differs more than -len_key / 2, +len_key * 2 (which is valid
    # as long as use_similar is not below ~0.7).
    # Gives an overall ~20% of improvement!
    #tmp = difflib.get_close_matches(key[1], similar_pool, n=1, cutoff=use_similar)
    #if tmp:
        #tmp = tmp[0]
    tmp = None
    s = difflib.SequenceMatcher()
    len_key = len(key[1])
    min_len = len_key // 2
    max_len = len_key * 2
    for x in similar_pool:
        if min_len < len(x) < max_len:
            if s.real_quick_ratio() >= use_similar and s.quick_ratio() >= use_similar:
                sratio = s.ratio()
                if sratio >= use_similar:
                    tmp = x
                    use_similar = sratio
    return key, tmp
项目:base_function    作者:Rockyzsu    | 项目源码 | 文件源码
def similar(a, b):
    return SequenceMatcher(None, a, b).ratio()
    #return Differ(None, a, b).ratio()
项目:RPoint    作者:george17-meet    | 项目源码 | 文件源码
def closest_rule(self, adapter):
        def _score_rule(rule):
            return sum([
                0.98 * difflib.SequenceMatcher(
                    None, rule.endpoint, self.endpoint
                0.01 * bool(set(self.values or ()).issubset(rule.arguments)),
                0.01 * bool(rule.methods and self.method in rule.methods)

        if adapter and
            return max(, key=_score_rule)
项目:isni-reconcile    作者:cmh2166    | 项目源码 | 文件源码
def ratio(s1, s2):
    s1, s2 = utils.make_type_consistent(s1, s2)

    m = SequenceMatcher(None, s1, s2)
    return utils.intr(100 * m.ratio())
项目:isni-reconcile    作者:cmh2166    | 项目源码 | 文件源码
def partial_ratio(s1, s2):
    """"Return the ratio of the most similar substring
    as a number between 0 and 100."""
    s1, s2 = utils.make_type_consistent(s1, s2)

    if len(s1) <= len(s2):
        shorter = s1
        longer = s2
        shorter = s2
        longer = s1

    m = SequenceMatcher(None, shorter, longer)
    blocks = m.get_matching_blocks()

    # each block represents a sequence of matching characters in a string
    # of the form (idx_1, idx_2, len)
    # the best partial match will block align with at least one of those blocks
    #   e.g. shorter = "abcd", longer = XXXbcdeEEE
    #   block = (1,3,3)
    #   best score === ratio("abcd", "Xbcd")
    scores = []
    for block in blocks:
        long_start = block[1] - block[0] if (block[1] - block[0]) > 0 else 0
        long_end = long_start + len(shorter)
        long_substr = longer[long_start:long_end]

        m2 = SequenceMatcher(None, shorter, long_substr)
        r = m2.ratio()
        if r > .995:
            return 100

    return utils.intr(100 * max(scores))

# Advanced Scoring Functions #
项目:isni-reconcile    作者:cmh2166    | 项目源码 | 文件源码
def closest_rule(self, adapter):
        def score_rule(rule):
            return sum([
                0.98 * difflib.SequenceMatcher(
                    None, rule.endpoint, self.endpoint
                0.01 * bool(set(self.values or ()).issubset(rule.arguments)),
                0.01 * bool(rule.methods and self.method in rule.methods)

        if adapter and
            return max(, key=score_rule)
            return None
项目:isni-reconcile    作者:cmh2166    | 项目源码 | 文件源码
def get_matching_blocks(self):
        size = min(len(self.b), len(self.b))
        threshold = min(self.threshold, size / 4)
        actual = difflib.SequenceMatcher.get_matching_blocks(self)
        return [item for item in actual
                if item[2] > threshold
                or not item[2]]
项目:speech-to-text    作者:pluteski    | 项目源码 | 文件源码
def ratcliff_obershelp_similarity(a, b):
    A kind of approximate string matching.
    Computes the generalized Ratcliff/Obershelp similarity of two strings
    as the number of matching characters divided by the total number of characters in the two strings.
    Matching characters are those in the longest common subsequence plus,
    recursively matching characters in the unmatched region on either side of the longest common subsequence.
    if a and b:
        return SequenceMatcher(None, a, b).ratio()
        return None