Python regex 模块,finditer() 实例源码

我们从Python开源项目中,提取了以下19个代码示例,用于说明如何使用regex.finditer()

项目:dotnet-binary-deserializer    作者:koutto    | 项目源码 | 文件源码
def build_partial_stringtable(self, inband_elements):
        """
        Use extracted in-band elements to populate partial StringTable with correct index
        """
        # Find reference max index into decoded data
        max_index = 1
        regex = re.compile(r'\[\[VALUE_0x([0-9a-fA-F]+)\]\]')
        for match in regex.finditer(self.output):
            if int(match.group(1), 16) > max_index:
                max_index = int(match.group(1), 16)

        # Compute beginning index of partial StringTable
        begin_index = max_index - (len(inband_elements)-1)*2

        # Build partial StringTable
        partial_stringtable = collections.OrderedDict()
        for i in range(begin_index, max_index+1, 2):
            partial_stringtable[i] = inband_elements.pop(0)

        return partial_stringtable
项目:dotnet-binary-deserializer    作者:koutto    | 项目源码 | 文件源码
def extract_inband_dictionary_from_xml(self):
        """
        Extract known elements from StringTable that are inside the XML
        They must respect the syntax [[VALUE|ST_0xXX]]
        Those elements are aimed at being converted in binary
        """
        inband_dictionary = {}

        # Find all reference to in-band dictionary into xml
        regex = re.compile(r'\[\[(.*?)\|ST_0x([0-9a-fA-F]+)\]\]')
        for match in regex.finditer(self.input):
            inband_dictionary[int(match.group(2), 16)] = match.group(1)

        # Replace [[VALUE|ST_0xXX]] by [[VALUE_0xXX]] into xml
        regex = re.compile(r'\[\[(?P<value>.)*?\|ST_0x(?P<number>[0-9a-fA-F]+)\]\]')
        self.input = re.sub(regex, '[[VALUE_0x\g<number>]]', self.input)

        #print self.input
        return inband_dictionary
项目:panphon    作者:dmort27    | 项目源码 | 文件源码
def map_to_dogol_prime(self, s):
        """Map a string to Dogolpolsky' classes

        Args:
            s (unicode): IPA word

        Returns:
            (unicode): word with all segments collapsed to D' classes
        """
        segs = []
        for seg in self.fm.seg_regex.finditer(s):
            fts = self.fm.fts(seg.group(0))
            for mask, label in self.dogol_prime:
                if fts >= mask:
                    segs.append(label)
                    break
        return ''.join(segs)
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_finditer(self):
        it = regex.finditer(r":+", "a:b::c:::d")
        self.assertEqual([item[0] for item in it], [':', '::', ':::'])
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_bug_581080(self):
        it = regex.finditer(r"\s", "a b")
        self.assertEqual(it.next().span(), (1, 2))
        self.assertRaises(StopIteration, lambda: it.next())

        scanner = regex.compile(r"\s").scanner("a b")
        self.assertEqual(scanner.search().span(), (1, 2))
        self.assertEqual(scanner.search(), None)
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_bug_817234(self):
        it = regex.finditer(r".*", "asdf")
        self.assertEqual(it.next().span(), (0, 4))
        self.assertEqual(it.next().span(), (4, 4))
        self.assertRaises(StopIteration, lambda: it.next())
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_zerowidth(self):
        # Issue 3262.
        self.assertEqual(regex.split(r"\b", "a b"), ['a b'])
        self.assertEqual(regex.split(r"(?V1)\b", "a b"), ['', 'a', ' ', 'b',
          ''])

        # Issue 1647489.
        self.assertEqual(regex.findall(r"^|\w+", "foo bar"), ['', 'foo',
          'bar'])
        self.assertEqual([m[0] for m in regex.finditer(r"^|\w+", "foo bar")],
          ['', 'foo', 'bar'])
        self.assertEqual(regex.findall(r"(?r)^|\w+", "foo bar"), ['bar', 'foo',
          ''])
        self.assertEqual([m[0] for m in regex.finditer(r"(?r)^|\w+",
          "foo bar")], ['bar', 'foo', ''])
        self.assertEqual(regex.findall(r"(?V1)^|\w+", "foo bar"), ['', 'foo',
          'bar'])
        self.assertEqual([m[0] for m in regex.finditer(r"(?V1)^|\w+",
          "foo bar")], ['', 'foo', 'bar'])
        self.assertEqual(regex.findall(r"(?rV1)^|\w+", "foo bar"), ['bar',
          'foo', ''])
        self.assertEqual([m[0] for m in regex.finditer(r"(?rV1)^|\w+",
          "foo bar")], ['bar', 'foo', ''])

        self.assertEqual(regex.split("", "xaxbxc"), ['xaxbxc'])
        self.assertEqual([m for m in regex.splititer("", "xaxbxc")],
          ['xaxbxc'])

        self.assertEqual(regex.split("(?r)", "xaxbxc"), ['xaxbxc'])
        self.assertEqual([m for m in regex.splititer("(?r)", "xaxbxc")],
          ['xaxbxc'])

        self.assertEqual(regex.split("(?V1)", "xaxbxc"), ['', 'x', 'a', 'x',
          'b', 'x', 'c', ''])
        self.assertEqual([m for m in regex.splititer("(?V1)", "xaxbxc")], ['',
          'x', 'a', 'x', 'b', 'x', 'c', ''])

        self.assertEqual(regex.split("(?rV1)", "xaxbxc"), ['', 'c', 'x', 'b',
          'x', 'a', 'x', ''])
        self.assertEqual([m for m in regex.splititer("(?rV1)", "xaxbxc")], ['',
          'c', 'x', 'b', 'x', 'a', 'x', ''])
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_bug_10328 (self):
        # Issue 10328.
        pat = regex.compile(r'(?mV0)(?P<trailing_ws>[ \t]+\r*$)|(?P<no_final_newline>(?<=[^\n])\Z)')
        self.assertEqual(pat.subn(lambda m: '<' + m.lastgroup + '>',
          'foobar '), ('foobar<trailing_ws>', 1))
        self.assertEqual([m.group() for m in pat.finditer('foobar ')], [' ',
          ''])
        pat = regex.compile(r'(?mV1)(?P<trailing_ws>[ \t]+\r*$)|(?P<no_final_newline>(?<=[^\n])\Z)')
        self.assertEqual(pat.subn(lambda m: '<' + m.lastgroup + '>',
          'foobar '), ('foobar<trailing_ws><no_final_newline>', 2))
        self.assertEqual([m.group() for m in pat.finditer('foobar ')], [' ',
          ''])
项目:backrefs    作者:facelessuser    | 项目源码 | 文件源码
def find_flags(self, s, quotes, re_verbose, re_version):
            """Find verbose and Unicode flags."""

            new = []
            start = 0
            verbose_flag = re_verbose
            version_flag = re_version
            avoid = quotes + self.groups
            avoid.sort()
            if version_flag and verbose_flag:
                return bool(verbose_flag), version_flag
            for a in avoid:
                new.append(s[start:a[0] + 1])
                start = a[1]
            new.append(s[start:])
            for m in self._regex_flags.finditer(self._empty.join(new)):
                if m.group(2):
                    if self._verbose_flag in m.group(2):
                        verbose_flag = True
                    if self._V0 in m.group(2):
                        version_flag = V0
                    elif self._V1 in m.group(2):
                        version_flag = V1
                if version_flag and verbose_flag:
                    break
            return bool(verbose_flag), version_flag if version_flag else regex.DEFAULT_VERSION
项目:backrefs    作者:facelessuser    | 项目源码 | 文件源码
def finditer(
        pattern, string, flags=0, pos=None, endpos=None, overlapped=False,
        partial=False, concurrent=None, **kwargs
    ):
        """Wrapper for `finditer`."""

        return regex.finditer(
            _apply_search_backrefs(pattern, flags), string,
            flags, pos, endpos, overlapped, partial, concurrent, **kwargs
        )
项目:bubblesub    作者:rr-    | 项目源码 | 文件源码
def spell_check_plain_text(dictionary, text):
    text = regex.sub(
        r'\\[Nnh]',
        '  ',  # two spaces so that matches mantain position in text
        text)

    for match in regex.finditer(r'\p{L}[\p{L}\p{P}]*\p{L}|\p{L}', text):
        if not dictionary.check(match.group(0)):
            yield (match.start(), match.end())
项目:panphon    作者:dmort27    | 项目源码 | 文件源码
def segment_text(text, seg_regex=SEG_REGEX):
    """Return an iterator of segments in the text.

    Args:
        text (unicode): string of IPA Unicode text
        seg_regex (_regex.Pattern): compiled regex defining a segment (base +
                                    modifiers)

    Return:
        generator: segments in the input text
    """
    for m in seg_regex.finditer(text):
        yield m.group(0)
项目:panphon    作者:dmort27    | 项目源码 | 文件源码
def fts(s):
    """Given string `s` with +/-[alphabetical sequence]s, return list of features.

    Args:
        s (str): string with segments of the sort "+son -syl 0cor"

    Return:
        list: list of (value, feature) tuples
    """
    return [m.groups() for m in FT_REGEX.finditer(s)]
项目:panphon    作者:dmort27    | 项目源码 | 文件源码
def pat(p):
    """Given a string `p` with feature matrices (features grouped with square
    brackets into segments, return a list of sets of (value, feature) tuples.

    Args:
        p (str): list of feature matrices as strings

    Return:
        list: list of sets of (value, feature) tuples
    """
    pattern = []
    for matrix in [m.group(0) for m in MT_REGEX.finditer(p)]:
        segment = set([m.groups() for m in FT_REGEX.finditer(matrix)])
        pattern.append(segment)
    return pattern
项目:panphon    作者:dmort27    | 项目源码 | 文件源码
def segs(self, word):
        """Returns a list of segments from a word

        Args:
            word (unicode): input word as Unicode IPA string

        Returns:
            list: list of strings corresponding to segments found in `word`
        """
        return [m.group('all') for m in self.seg_regex.finditer(word)]
项目:panphon    作者:dmort27    | 项目源码 | 文件源码
def filter_string(self, word):
        """Return a string like the input but containing only legal IPA segments

        Args:
            word (unicode): input string to be filtered

        Returns:
            unicode: string identical to `word` but with invalid IPA segments
                     absent

        """
        segs = [m.group(0) for m in self.seg_regex.finditer(word)]
        return ''.join(segs)
项目:panphon    作者:dmort27    | 项目源码 | 文件源码
def __init__(self, names, features={}, ftstr='', weights=None):
        """Construct a `Segment` object

        Args:
            names (list): ordered list of feature names
            features (dict): name-value pairs for specified features
            ftstr (unicode): a string, each /(+|0|-)\w+/ sequence of which is
                             interpreted as a feature specification
            weights (float): order list of feature weights/saliences
            """
        self.n2s = {-1: '-', 0: '0', 1: '+'}
        self.s2n = {k: v for (v, k) in self.n2s.items()}
        self.names = names
        """Set a feature specification"""
        self.data = {}
        for name in names:
            if name in features:
                self.data[name] = features[name]
            else:
                self.data[name] = 0
        for m in re.finditer(r'(\+|0|-)(\w+)', ftstr):
            v, k = m.groups()
            self.data[k] = self.s2n[v]
        if weights:
            self.weights = weights
        else:
            self.weights = [1 for _ in names]
项目:panphon    作者:dmort27    | 项目源码 | 文件源码
def ftstr2dict(ftstr):
    fts = {}
    for m in re.finditer(r'([-0+])(\w+)', ftstr):
        v, k = m.groups()
        fts[k] = {'-': -1, '0': 0, '+': 1}[v]
    return fts
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_copy(self):
        # PatternObjects are immutable, therefore there's no need to clone them.
        r = regex.compile("a")
        self.assert_(copy.copy(r) is r)
        self.assert_(copy.deepcopy(r) is r)

        # MatchObjects are normally mutable because the target string can be
        # detached. However, after the target string has been detached, a
        # MatchObject becomes immutable, so there's no need to clone it.
        m = r.match("a")
        self.assert_(copy.copy(m) is not m)
        self.assert_(copy.deepcopy(m) is not m)

        self.assert_(m.string is not None)
        m2 = copy.copy(m)
        m2.detach_string()
        self.assert_(m.string is not None)
        self.assert_(m2.string is None)

        # The following behaviour matches that of the re module.
        it = regex.finditer(".", "ab")
        it2 = copy.copy(it)
        self.assertEqual(it.next().group(), "a")
        self.assertEqual(it2.next().group(), "b")

        # The following behaviour matches that of the re module.
        it = regex.finditer(".", "ab")
        it2 = copy.deepcopy(it)
        self.assertEqual(it.next().group(), "a")
        self.assertEqual(it2.next().group(), "b")

        # The following behaviour is designed to match that of copying 'finditer'.
        it = regex.splititer(" ", "a b")
        it2 = copy.copy(it)
        self.assertEqual(it.next(), "a")
        self.assertEqual(it2.next(), "b")

        # The following behaviour is designed to match that of copying 'finditer'.
        it = regex.splititer(" ", "a b")
        it2 = copy.deepcopy(it)
        self.assertEqual(it.next(), "a")
        self.assertEqual(it2.next(), "b")