Python regex 模块,findall() 实例源码

我们从Python开源项目中,提取了以下26个代码示例,用于说明如何使用regex.findall()

项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_re_findall(self):
        self.assertEqual(regex.findall(":+", "abc"), [])
        self.assertEqual(regex.findall(":+", "a:b::c:::d"), [':', '::', ':::'])
        self.assertEqual(regex.findall("(:+)", "a:b::c:::d"), [':', '::',
          ':::'])
        self.assertEqual(regex.findall("(:)(:*)", "a:b::c:::d"), [(':', ''),
          (':', ':'), (':', '::')])

        self.assertEqual(regex.findall(r"\((?P<test>.{0,5}?TEST)\)",
          "(MY TEST)"), ["MY TEST"])
        self.assertEqual(regex.findall(r"\((?P<test>.{0,3}?TEST)\)",
          "(MY TEST)"), ["MY TEST"])
        self.assertEqual(regex.findall(r"\((?P<test>.{0,3}?T)\)", "(MY T)"),
          ["MY T"])

        self.assertEqual(regex.findall(r"[^a]{2}[A-Z]", "\n  S"), ['  S'])
        self.assertEqual(regex.findall(r"[^a]{2,3}[A-Z]", "\n  S"), ['\n  S'])
        self.assertEqual(regex.findall(r"[^a]{2,3}[A-Z]", "\n   S"), ['   S'])

        self.assertEqual(regex.findall(r"X(Y[^Y]+?){1,2}( |Q)+DEF",
          "XYABCYPPQ\nQ DEF"), [('YPPQ\n', ' ')])

        self.assertEqual(regex.findall(r"(\nTest(\n+.+?){0,2}?)?\n+End",
          "\nTest\nxyz\nxyz\nEnd"), [('\nTest\nxyz\nxyz', '\nxyz')])
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_overlapped(self):
        self.assertEqual(regex.findall(r"..", "abcde"), ['ab', 'cd'])
        self.assertEqual(regex.findall(r"..", "abcde", overlapped=True), ['ab',
          'bc', 'cd', 'de'])
        self.assertEqual(regex.findall(r"(?r)..", "abcde"), ['de', 'bc'])
        self.assertEqual(regex.findall(r"(?r)..", "abcde", overlapped=True),
          ['de', 'cd', 'bc', 'ab'])
        self.assertEqual(regex.findall(r"(.)(-)(.)", "a-b-c", overlapped=True),
          [("a", "-", "b"), ("b", "-", "c")])

        self.assertEqual([m[0] for m in regex.finditer(r"..", "abcde")], ['ab',
          'cd'])
        self.assertEqual([m[0] for m in regex.finditer(r"..", "abcde",
          overlapped=True)], ['ab', 'bc', 'cd', 'de'])
        self.assertEqual([m[0] for m in regex.finditer(r"(?r)..", "abcde")],
          ['de', 'bc'])
        self.assertEqual([m[0] for m in regex.finditer(r"(?r)..", "abcde",
          overlapped=True)], ['de', 'cd', 'bc', 'ab'])

        self.assertEqual([m.groups() for m in regex.finditer(r"(.)(-)(.)",
          "a-b-c", overlapped=True)], [("a", "-", "b"), ("b", "-", "c")])
        self.assertEqual([m.groups() for m in regex.finditer(r"(?r)(.)(-)(.)",
          "a-b-c", overlapped=True)], [("b", "-", "c"), ("a", "-", "b")])
项目:BarcSeek    作者:NCBI-Hackathons    | 项目源码 | 文件源码
def barcode_to_regex(barcode: str, error_rate: Optional[int]=None):
    """Convert a barcode string to a regex pattern
    barcode [str]           The barcode string to turn into a regex
    error_rate [int]=None   The error rate"""
    pattern = '' # type: str
    umi = regex.findall(r'(N+)', barcode, regex.IGNORECASE) # type: List[str]
    umi_lengths = tuple(map(len, umi)) # type: Tuple[int]
    filtered_barcode = filter(None, barcode.upper().split('N')) # type: filter
    for index, subpattern in enumerate(filtered_barcode): # type: int, str
        barcode_pattern = '(' + subpattern + ')' # type: str
        if error_rate:
            barcode_pattern += '{e<=' + str(error_rate) + '}'
        pattern += barcode_pattern
        try:
            umi_pattern = '(' + ''.join(itertools.repeat('[ACGT]', umi_lengths[index])) + ')' # type: str
        except IndexError:
            break
        else:
            if error_rate:
                umi_pattern += '{e<=' + str(error_rate) + '}'
            pattern += umi_pattern
    find_barcode = regex.compile(r'%s' % pattern, regex.ENHANCEMATCH)
    return find_barcode
项目:panphon    作者:dmort27    | 项目源码 | 文件源码
def compile_regex_from_str(self, pat):
        """Given a string describing features masks for a sequence of segments,
        return a compiled regex matching the corresponding strings.

        Args:
            ft_str (str): feature masks, each enclosed in square brackets, in
            which the features are delimited by any standard delimiter.

        Returns:
           Pattern: regular expression pattern equivalent to `ft_str`
        """
        s2n = {'-': -1, '0': 0, '+': 1}
        seg_res = []
        for mat in re.findall(r'\[[^]]+\]+', pat):
            ft_mask = {k: s2n[v] for (v, k) in re.findall(r'([+-])(\w+)', mat)}
            segs = self.all_segs_matching_fts(ft_mask)
            seg_res.append('({})'.format('|'.join(segs)))
        regexp = ''.join(seg_res)
        return re.compile(regexp)
项目:open-syllabus-project    作者:davidmcclure    | 项目源码 | 文件源码
def tokenize_field(value):

    """
    Extract normalized tokens from a field.

    Args:
        value (str): The field value.

    Returns:
        list: The cleaned tokens.
    """

    # Extract tokens.
    tokens = regex.findall('\p{L}{2,}', value.lower())

    # Remove articles.
    tokens = [t for t in tokens if t not in [
        'a', 'an', 'the', 'and',
    ]]

    return tokens
项目:ShadowSocksShare-OpenShift    作者:the0demiurge    | 项目源码 | 文件源码
def request_url(url, headers=None, name=''):
    print('req', url)

    data = set()
    servers = list()
    try:
        response = requests.get(url, headers=headers, verify=False).text
        data.update(map(lambda x: re.sub('\s', '', x), re.findall('ssr?://[a-zA-Z0-9=]+', response)))
        soup = BeautifulSoup(response, 'html.parser')
        title = soup.find('title').text

        info = {'message': '', 'url': url, 'name': str(title)}
        for i, server in enumerate(data):
            try:
                servers.append(parse(server, ' '.join([title, name, str(i)])))
            except Exception as e:
                logging.exception(e, stack_info=False)
                print('URL:', url, 'SERVER', server)
    except Exception as e:
        print(url)
        logging.exception(e, stack_info=False)
        return [], {'message': str(e), 'url': '', 'name': ''}
    return servers, info
项目:siamese_sentiment    作者:jcavalieri8619    | 项目源码 | 文件源码
def generate_char_list(string, strip_html=True):
    if strip_html:
        s = strip_html_tags(string.lower())
    else:
        s = string.lower()
    normalized_string = regex.sub(r'\s+', r' ', s)  # change any kind of whitespace to a single space

    list_norm_chars = regex.findall(r"\w|[?!'#@$:\"&*=,]", normalized_string)
    return list_norm_chars
项目:siamese_sentiment    作者:jcavalieri8619    | 项目源码 | 文件源码
def generate_word_list(string, strip_html=True):
    if strip_html:
        s = strip_html_tags(string.lower())
    else:
        s = string.lower()

    normalized_string = regex.sub(r"\s+", r' ', s)  # change any kind of whitespace to a single space

    # list of words all words seen during training including strings like '!!!' , '??', '....'
    # as these repeated punctuations tend to imply more than the're gramatical meaning
    list_normalized_string = regex.findall(r"\b\w+[']?\w*\b|\!+|\?+|\.{3,}", normalized_string)
    return list_normalized_string
项目:ine5430-gomoku    作者:igoryamamoto    | 项目源码 | 文件源码
def countOccurrences(text, searchFor):
    '''
        Count all occurrences of the string "searchFor" in the text "text"
    '''
    return len(re.findall(searchFor, text, overlapped=True))
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_weakref(self):
        s = 'QabbbcR'
        x = regex.compile('ab+c')
        y = proxy(x)
        if x.findall('QabbbcR') != y.findall('QabbbcR'):
            self.fail()
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_bug_1661(self):
        # Verify that flags do not get silently ignored with compiled patterns
        pattern = regex.compile('.')
        self.assertRaisesRegex(ValueError, self.FLAGS_WITH_COMPILED_PAT,
          lambda: regex.match(pattern, 'A', regex.I))
        self.assertRaisesRegex(ValueError, self.FLAGS_WITH_COMPILED_PAT,
          lambda: regex.search(pattern, 'A', regex.I))
        self.assertRaisesRegex(ValueError, self.FLAGS_WITH_COMPILED_PAT,
          lambda: regex.findall(pattern, 'A', regex.I))
        self.assertRaisesRegex(ValueError, self.FLAGS_WITH_COMPILED_PAT,
          lambda: regex.compile(pattern, regex.I))
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_bug_117612(self):
        self.assertEqual(regex.findall(r"(a|(b))", "aba"), [('a', ''), ('b',
          'b'), ('a', '')])
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_re_groupref(self):
        self.assertEqual(regex.match(r'^(\|)?([^()]+)\1$', '|a|')[:], ('|a|',
          '|', 'a'))
        self.assertEqual(regex.match(r'^(\|)?([^()]+)\1?$', 'a')[:], ('a',
          None, 'a'))
        self.assertEqual(regex.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
        self.assertEqual(regex.match(r'^(\|)?([^()]+)\1$', '|a'), None)
        self.assertEqual(regex.match(r'^(?:(a)|c)(\1)$', 'aa')[:], ('aa', 'a',
          'a'))
        self.assertEqual(regex.match(r'^(?:(a)|c)(\1)?$', 'c')[:], ('c', None,
          None))

        self.assertEqual(regex.findall("(?i)(.{1,40}?),(.{1,40}?)(?:;)+(.{1,80}).{1,40}?\\3(\ |;)+(.{1,80}?)\\1",
          "TEST, BEST; LEST ; Lest 123 Test, Best"), [('TEST', ' BEST',
          ' LEST', ' ', '123 ')])
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_word_class(self):
        self.assertEqual(regex.findall(ur"(?u)\w+",
          u" \u0939\u093f\u0928\u094d\u0926\u0940,"),
          [u'\u0939\u093f\u0928\u094d\u0926\u0940'])
        self.assertEqual(regex.findall(ur"(?u)\W+",
          u" \u0939\u093f\u0928\u094d\u0926\u0940,"), [u' ', u','])
        self.assertEqual(regex.split(ur"(?uV1)\b",
          u" \u0939\u093f\u0928\u094d\u0926\u0940,"), [u' ',
          u'\u0939\u093f\u0928\u094d\u0926\u0940', u','])
        self.assertEqual(regex.split(ur"(?uV1)\B",
          u" \u0939\u093f\u0928\u094d\u0926\u0940,"), [u'', u' \u0939',
          u'\u093f', u'\u0928', u'\u094d', u'\u0926', u'\u0940,', u''])
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_search_anchor(self):
        self.assertEqual(regex.findall(r"\G\w{2}", "abcd ef"), ['ab', 'cd'])
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_zerowidth(self):
        # Issue 3262.
        self.assertEqual(regex.split(r"\b", "a b"), ['a b'])
        self.assertEqual(regex.split(r"(?V1)\b", "a b"), ['', 'a', ' ', 'b',
          ''])

        # Issue 1647489.
        self.assertEqual(regex.findall(r"^|\w+", "foo bar"), ['', 'foo',
          'bar'])
        self.assertEqual([m[0] for m in regex.finditer(r"^|\w+", "foo bar")],
          ['', 'foo', 'bar'])
        self.assertEqual(regex.findall(r"(?r)^|\w+", "foo bar"), ['bar', 'foo',
          ''])
        self.assertEqual([m[0] for m in regex.finditer(r"(?r)^|\w+",
          "foo bar")], ['bar', 'foo', ''])
        self.assertEqual(regex.findall(r"(?V1)^|\w+", "foo bar"), ['', 'foo',
          'bar'])
        self.assertEqual([m[0] for m in regex.finditer(r"(?V1)^|\w+",
          "foo bar")], ['', 'foo', 'bar'])
        self.assertEqual(regex.findall(r"(?rV1)^|\w+", "foo bar"), ['bar',
          'foo', ''])
        self.assertEqual([m[0] for m in regex.finditer(r"(?rV1)^|\w+",
          "foo bar")], ['bar', 'foo', ''])

        self.assertEqual(regex.split("", "xaxbxc"), ['xaxbxc'])
        self.assertEqual([m for m in regex.splititer("", "xaxbxc")],
          ['xaxbxc'])

        self.assertEqual(regex.split("(?r)", "xaxbxc"), ['xaxbxc'])
        self.assertEqual([m for m in regex.splititer("(?r)", "xaxbxc")],
          ['xaxbxc'])

        self.assertEqual(regex.split("(?V1)", "xaxbxc"), ['', 'x', 'a', 'x',
          'b', 'x', 'c', ''])
        self.assertEqual([m for m in regex.splititer("(?V1)", "xaxbxc")], ['',
          'x', 'a', 'x', 'b', 'x', 'c', ''])

        self.assertEqual(regex.split("(?rV1)", "xaxbxc"), ['', 'c', 'x', 'b',
          'x', 'a', 'x', ''])
        self.assertEqual([m for m in regex.splititer("(?rV1)", "xaxbxc")], ['',
          'c', 'x', 'b', 'x', 'a', 'x', ''])
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_grapheme(self):
        self.assertEqual(regex.match(ur"(?u)\X", u"\xE0").span(), (0, 1))
        self.assertEqual(regex.match(ur"(?u)\X", u"a\u0300").span(), (0, 2))

        self.assertEqual(regex.findall(ur"(?u)\X",
          u"a\xE0a\u0300e\xE9e\u0301"), [u'a', u'\xe0', u'a\u0300', u'e',
          u'\xe9', u'e\u0301'])
        self.assertEqual(regex.findall(ur"(?u)\X{3}",
          u"a\xE0a\u0300e\xE9e\u0301"), [u'a\xe0a\u0300', u'e\xe9e\u0301'])
        self.assertEqual(regex.findall(ur"(?u)\X", u"\r\r\n\u0301A\u0301"),
          [u'\r', u'\r\n', u'\u0301', u'A\u0301'])
项目:backrefs    作者:facelessuser    | 项目源码 | 文件源码
def findall(
        pattern, string, flags=0, pos=None, endpos=None, overlapped=False,
        concurrent=None, **kwargs
    ):
        """Wrapper for `findall`."""

        return regex.findall(
            _apply_search_backrefs(pattern, flags), string,
            flags, pos, endpos, overlapped, concurrent, **kwargs
        )
项目:wikt2pron    作者:abuccts    | 项目源码 | 文件源码
def expand_template(self, text):
        """Expand IPA Template through Wiktionary API.

        Used to expand ``{{*-IPA}}`` template in parser and return IPA list.

        Parameters
        ----------
        text : string
            String of template text inside "{{" and "}}".

        Returns
        -------
        list of string
            List of expanded IPA text.

        Examples
        --------
        >>> parser = Parser()
        >>> template = "{{la-IPA|eccl=yes|th?saurus}}"
        >>> parser.expand_template(template)
        ['/t?e??sau?.rus/', '[t?e??sau?.r?s]', '/te?sau?.rus/']
        """
        self.param["text"] = text.encode("utf-8")
        res = urlopen(self.api, urlencode(self.param).encode()).read()
        content = json.loads(res.decode("utf-8"))
        html = content["expandtemplates"]["wikitext"]
        # Use BeautifulSoup instead of raw regex expr
        # return self.regex["IPA"].findall(html)
        soup = BeautifulSoup(html, "html.parser")
        span = soup.find_all("span", {"class": "IPA"})
        return list(map(lambda x: x.text, span))
项目:wikt2pron    作者:abuccts    | 项目源码 | 文件源码
def parse(self, wiki_text, title=None):
        """Parse Wiktionary wiki text.

        Split Wiktionary wiki text into different langugaes and return
        parseed IPA result.

        Parameters
        ----------
        wiki_text : string
            String of Wiktionary wiki text, from XML dump or Wiktionary API.
        title: string
            String of wiki entry title.

        Returns
        -------
        dict
            Dict of parsed IPA results.
            Key: language name; Value: list of IPA text.
        """
        self.title = title
        parse_result = {}
        h2_lst = self.regex["h2"].findall(wiki_text)
        if self.lang and self.lang not in h2_lst:
            parse_result = {self.lang: "Language not found."}
            return parse_result
        h2_split = self.regex["h2"].split(wiki_text)
        i = 0
        while i < len(h2_split):
            if h2_split[i] in h2_lst:
                if not self.lang or h2_split[i] == self.lang:
                    pronunciation = self.parse_detail(h2_split[i+1])
                    if not pronunciation:
                        pronunciation = "IPA not found."
                    parse_result[h2_split[i]] = pronunciation
                i += 1
            i += 1
        return parse_result
项目:wikt2pron    作者:abuccts    | 项目源码 | 文件源码
def parse_detail(self, wiki_text, depth=3):
        """Parse the section of a certain language in wiki text.

        Parse pronunciation section of the certain language recursively.

        Parameters
        ----------
        wiki_text : string
            String of wiki text in a language section.
        depth : int
            Integer indicated depth of pronunciation section.

        Returns
        -------
        list of dict
            List of extracted IPA text in
            ``{"IPA": "", "X-SAMPA": "", "lang": ""}`` format.
        """
        parse_result = []
        detail_lst = self.regex["h" + str(depth)].findall(wiki_text)
        detail_split = self.regex["h" + str(depth)].split(wiki_text)
        # To avoid maximum recursion depth exceeded.
        if len(detail_split) > 99999:
            return "Maximum recursion depth exceeded in wiki text."
        i = 0
        while i < len(detail_split):
            if detail_split[i] in detail_lst:
                header_name = detail_split[i].lower()
                if header_name == "pronunciation":
                    parse_result += \
                        self.parse_pronunciation(detail_split[i+1])
                elif ("etymology" in header_name and
                      header_name != "etymology"):
                    parse_result += \
                        self.parse_detail(detail_split[i+1], depth=4)
                i += 1
            i += 1
        return parse_result
项目:ShadowSocksShare-OpenShift    作者:the0demiurge    | 项目源码 | 文件源码
def get_href(string, pattern='.*'):
    found = re.findall('(?<=<a\s+href=")[^"]+(?=">%s</a>)' % pattern, string)
    if found:
        return found[0]
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_named_lists(self):
        options = [u"one", u"two", u"three"]
        self.assertEqual(regex.match(ur"333\L<bar>444", u"333one444",
          bar=options).group(), u"333one444")
        self.assertEqual(regex.match(ur"(?i)333\L<bar>444", u"333TWO444",
          bar=options).group(), u"333TWO444")
        self.assertEqual(regex.match(ur"333\L<bar>444", u"333four444",
          bar=options), None)

        options = ["one", "two", "three"]
        self.assertEqual(regex.match(r"333\L<bar>444", "333one444",
          bar=options).group(), "333one444")
        self.assertEqual(regex.match(r"(?i)333\L<bar>444", "333TWO444",
          bar=options).group(), "333TWO444")
        self.assertEqual(regex.match(r"333\L<bar>444", "333four444",
          bar=options), None)

        self.assertEqual(repr(type(regex.compile(r"3\L<bar>4\L<bar>+5",
          bar=["one", "two", "three"]))), self.PATTERN_CLASS)

        self.assertEqual(regex.findall(r"^\L<options>", "solid QWERT",
          options=set(['good', 'brilliant', '+s\\ol[i}d'])), [])
        self.assertEqual(regex.findall(r"^\L<options>", "+solid QWERT",
          options=set(['good', 'brilliant', '+solid'])), ['+solid'])

        options = [u"STRASSE"]
        self.assertEqual(regex.match(ur"(?fiu)\L<words>",
          u"stra\N{LATIN SMALL LETTER SHARP S}e", words=options).span(), (0,
          6))

        options = [u"STRASSE", u"stress"]
        self.assertEqual(regex.match(ur"(?fiu)\L<words>",
          u"stra\N{LATIN SMALL LETTER SHARP S}e", words=options).span(), (0,
          6))

        options = [u"stra\N{LATIN SMALL LETTER SHARP S}e"]
        self.assertEqual(regex.match(ur"(?fiu)\L<words>", u"STRASSE",
          words=options).span(), (0, 7))

        options = ["kit"]
        self.assertEqual(regex.search(ur"(?iu)\L<words>", u"SKITS",
          words=options).span(), (1, 4))
        self.assertEqual(regex.search(ur"(?iu)\L<words>",
          u"SK\N{LATIN CAPITAL LETTER I WITH DOT ABOVE}TS",
          words=options).span(), (1, 4))

        self.assertEqual(regex.search(ur"(?fiu)\b(\w+) +\1\b",
          u" stra\N{LATIN SMALL LETTER SHARP S}e STRASSE ").span(), (1, 15))
        self.assertEqual(regex.search(ur"(?fiu)\b(\w+) +\1\b",
          u" STRASSE stra\N{LATIN SMALL LETTER SHARP S}e ").span(), (1, 15))

        self.assertEqual(regex.search(r"^\L<options>$", "", options=[]).span(),
          (0, 0))
项目:wikt2pron    作者:abuccts    | 项目源码 | 文件源码
def transliterate(text):
    def repl1(match):
        c, d = match.group(1), match.group(2)
        if d == "":
            return c + "a"
        return c + d
    def repl2(match):
        opt, first, second, third = \
            match.group(1), match.group(2), match.group(3), match.group(4)
        if (re.match("[" + special_cons + "]", first) and \
            re.match("?", second) and \
            (first + second + third) not in perm_cl.keys()) or \
            re.match("?[???]", first + second):
            return "a" + opt + first + second + third
        return "" + opt + first + second + third
    def repl3(match):
        succ, prev = match.group(1), match.group(2)
        if succ + prev == "a":
            return succ + "??" + prev
        if succ == "" and re.match("[" + vowel + "]", prev):
            return succ + "?" + prev
        if succ in nasal_assim.keys():
            return succ + nasal_assim[succ] + prev
        return succ + "n" + prev
    def repl4(match):
        k = match.group()
        if k in conv.keys():
            return conv[k]
        return k

    text = re.sub("([" + all_cons + "]??)([" + vowel + "?]?)", repl1, text)

    for word in re.findall("[?-?a]+", text):
        orig_word = str(word)
        rev_word = word[::-1]
        rev_word = re.sub("^a(??)([" + all_cons + "])(.)(.?)", repl2, rev_word)
        while re.match(syncope_pattern, rev_word):
            rev_word = re.sub(syncope_pattern, r"\1\2\3\4", rev_word)
        rev_word = re.sub("(.?)?(.)", repl3, rev_word)
        text = re.sub(orig_word, rev_word[::-1], text)

    text = re.sub(".??", repl4, text)
    text = re.sub("a([iu])?", r"a?\1", text)
    text = re.sub("???", repl4, text)
    return unicodedata.normalize("NFC", text)
项目:ShadowSocksShare-OpenShift    作者:the0demiurge    | 项目源码 | 文件源码
def request_iss(url='http://ss.ishadowx.com/'):
    print('req iss...')

    try:
        data = requests.get(url)
        soup = BeautifulSoup(data.text, 'html.parser')
    except Exception as e:
        logging.exception(e, stack_info=True)
        return [], {'message': str(e), 'url': '', 'name': ''}

    try:

        info = {
            'message': soup.find('div', attrs={'id': 'portfolio'}).find('div', attrs={'class': 'section-title text-center center'}).text,
            'name': 'ishadowx',
            'url': url}

        '''servers[-1]['name'] = tmp[0]
        servers[-1]['server'] = tmp[0]
        servers[-1]['server_port'] = tmp[0]
        servers[-1]['password'] = tmp[0]
        servers[-1]['method'] = tmp[0]
        servers[-1]['ssr_protocol'] = tmp[0]
        servers[-1]['obfs'] = tmp[0]'''

        soup = BeautifulSoup(data.text, 'html.parser')
        server_data = soup.find_all('div', attrs={'class': 'hover-text'})
        servers = list()
    except Exception as e:
        logging.exception(e, stack_info=True)
        return [], {'message': str(e), 'url': '', 'name': ''}

    for i, server in enumerate(server_data):
        try:
            servers.append(dict())
            server_data = server.text.strip().split('\n')
            servers[-1]['server'] = server_data[0].split(':')[-1].strip()
            servers[-1]['server_port'] = re.findall('\d+', server_data[1])[0]
            servers[-1]['remarks'] = ' '.join(['ss.ishadowx.com', str(i)])
            servers[-1]['password'] = server_data[2].split(':')[-1].strip()
            servers[-1]['method'] = server_data[3].split(':')[-1].strip()
            if 'QR' not in server_data[4]:
                servers[-1]['ssr_protocol'], servers[-1]['obfs'] = server_data[4].strip().split(maxsplit=1)
                servers[-1]['remarks'] = ' '.join([servers[-1]['remarks'], 'SSR'])
        except Exception as e:
            logging.exception(e, stack_info=True)
    return servers, info
项目:ShadowSocksShare-OpenShift    作者:the0demiurge    | 项目源码 | 文件源码
def request_newpac(url='https://github.com/Alvin9999/new-pac/wiki/ss%E5%85%8D%E8%B4%B9%E8%B4%A6%E5%8F%B7'):
    data = requests.get(url)
    soup = BeautifulSoup(data.text, 'html.parser')

    ss_list = list()

    for i in soup.find_all('p'):
        if re.match('\<p\>\s*???\d+[^:?]*[:?]', str(i)):
            ss_list.append(str(i))

    servers = list()
    for i in ss_list:
        servers.append(dict())
        servers[-1]['string'] = i
        # name
        tmp = re.findall('???\d+[^:?]*(?=\s*[:?])', i)
        if tmp:
            servers[-1]['remarks'] = tmp[0]

        # server
        tmp = re.findall('(?<=???\s*\d+[^:?]*[:?]\s*[^a-zA-Z0-9_]*)[\w\d\.]+', i)
        if tmp:
            servers[-1]['server'] = tmp[0]

        # server_port
        tmp = re.findall('(?<=??\s*[^:?]*[:?]\s*[^a-zA-Z0-9_]*)\d+', i)
        if tmp:
            servers[-1]['server_port'] = tmp[0]

        # password
        tmp = re.findall('(?<=??\s*[^:?]*[:?]\s*[^a-zA-Z0-9_]*)[a-zA-Z\d\.\+\-_\*\\/]+', i)
        if tmp:
            servers[-1]['password'] = tmp[0]

        # method
        tmp = re.findall('(?<=???[??]\s*[^:?]*[:?]\s*[^a-zA-Z0-9_]*)[a-zA-Z\d\.\+\-_\*\\/]+', i)
        if tmp:
            servers[-1]['method'] = tmp[0]

        # SSR??
        tmp = re.findall('(?<=SSR??\s*[^:?]*[:?]\s*[^a-zA-Z_0-9]*)[a-zA-Z\d\.\+\-_\*\\/]+', i)
        if tmp:
            servers[-1]['ssr_protocol'] = tmp[0]

        # ??
        tmp = re.findall('(?<=??\s*[^:?]*[:?]\s*[^a-zA-Z0-9_]*)[a-zA-Z\d\.\+\-_\*\\/]+', i)
        if tmp:
            servers[-1]['obfs'] = tmp[0]
    info = {'message': '', 'name': 'new-pac', 'url': url}
    return servers, info