Python regex 模块,IGNORECASE 实例源码

我们从Python开源项目中,提取了以下23个代码示例,用于说明如何使用regex.IGNORECASE

项目:DrQA    作者:facebookresearch    | 项目源码 | 文件源码
def __init__(self, **kwargs):
        """
        Args:
            annotators: None or empty set (only tokenizes).
            substitutions: if true, normalizes some token types (e.g. quotes).
        """
        self._regexp = regex.compile(
            '(?P<digit>%s)|(?P<title>%s)|(?P<abbr>%s)|(?P<neg>%s)|(?P<hyph>%s)|'
            '(?P<contr1>%s)|(?P<alphanum>%s)|(?P<contr2>%s)|(?P<sdquote>%s)|'
            '(?P<edquote>%s)|(?P<ssquote>%s)|(?P<esquote>%s)|(?P<dash>%s)|'
            '(?<ellipses>%s)|(?P<punct>%s)|(?P<nonws>%s)' %
            (self.DIGIT, self.TITLE, self.ABBRV, self.NEGATION, self.HYPHEN,
             self.CONTRACTION1, self.ALPHA_NUM, self.CONTRACTION2,
             self.START_DQUOTE, self.END_DQUOTE, self.START_SQUOTE,
             self.END_SQUOTE, self.DASH, self.ELLIPSES, self.PUNCT,
             self.NON_WS),
            flags=regex.IGNORECASE + regex.UNICODE + regex.MULTILINE
        )
        if len(kwargs.get('annotators', {})) > 0:
            logger.warning('%s only tokenizes! Skipping annotators: %s' %
                           (type(self).__name__, kwargs.get('annotators')))
        self.annotators = set()
        self.substitutions = kwargs.get('substitutions', True)
项目:BarcSeek    作者:NCBI-Hackathons    | 项目源码 | 文件源码
def barcode_to_regex(barcode: str, error_rate: Optional[int]=None):
    """Convert a barcode string to a regex pattern
    barcode [str]           The barcode string to turn into a regex
    error_rate [int]=None   The error rate"""
    pattern = '' # type: str
    umi = regex.findall(r'(N+)', barcode, regex.IGNORECASE) # type: List[str]
    umi_lengths = tuple(map(len, umi)) # type: Tuple[int]
    filtered_barcode = filter(None, barcode.upper().split('N')) # type: filter
    for index, subpattern in enumerate(filtered_barcode): # type: int, str
        barcode_pattern = '(' + subpattern + ')' # type: str
        if error_rate:
            barcode_pattern += '{e<=' + str(error_rate) + '}'
        pattern += barcode_pattern
        try:
            umi_pattern = '(' + ''.join(itertools.repeat('[ACGT]', umi_lengths[index])) + ')' # type: str
        except IndexError:
            break
        else:
            if error_rate:
                umi_pattern += '{e<=' + str(error_rate) + '}'
            pattern += umi_pattern
    find_barcode = regex.compile(r'%s' % pattern, regex.ENHANCEMATCH)
    return find_barcode
项目:DrQA_cn    作者:AmoseKang    | 项目源码 | 文件源码
def __init__(self, **kwargs):
        """
        Args:
            annotators: None or empty set (only tokenizes).
            substitutions: if true, normalizes some token types (e.g. quotes).
        """
        self._regexp = regex.compile(
            '(?P<digit>%s)|(?P<title>%s)|(?P<abbr>%s)|(?P<neg>%s)|(?P<hyph>%s)|'
            '(?P<contr1>%s)|(?P<alphanum>%s)|(?P<contr2>%s)|(?P<sdquote>%s)|'
            '(?P<edquote>%s)|(?P<ssquote>%s)|(?P<esquote>%s)|(?P<dash>%s)|'
            '(?<ellipses>%s)|(?P<punct>%s)|(?P<nonws>%s)' %
            (self.DIGIT, self.TITLE, self.ABBRV, self.NEGATION, self.HYPHEN,
             self.CONTRACTION1, self.ALPHA_NUM, self.CONTRACTION2,
             self.START_DQUOTE, self.END_DQUOTE, self.START_SQUOTE,
             self.END_SQUOTE, self.DASH, self.ELLIPSES, self.PUNCT,
             self.NON_WS),
            flags=regex.IGNORECASE + regex.UNICODE + regex.MULTILINE
        )
        if len(kwargs.get('annotators', {})) > 0:
            logger.warning('%s only tokenizes! Skipping annotators: %s' %
                           (type(self).__name__, kwargs.get('annotators')))
        self.annotators = set()
        self.substitutions = kwargs.get('substitutions', True)
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_constants(self):
        if regex.I != regex.IGNORECASE:
            self.fail()
        if regex.L != regex.LOCALE:
            self.fail()
        if regex.M != regex.MULTILINE:
            self.fail()
        if regex.S != regex.DOTALL:
            self.fail()
        if regex.X != regex.VERBOSE:
            self.fail()
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_ascii_and_unicode_flag(self):
        # Unicode patterns.
        for flags in (0, regex.UNICODE):
            pat = regex.compile(u'\xc0', flags | regex.IGNORECASE)
            self.assertEqual(bool(pat.match(u'\xe0')), True)
            pat = regex.compile(u'\w', flags)
            self.assertEqual(bool(pat.match(u'\xe0')), True)

        pat = regex.compile(u'\xc0', regex.ASCII | regex.IGNORECASE)
        self.assertEqual(pat.match(u'\xe0'), None)
        pat = regex.compile(u'(?a)\xc0', regex.IGNORECASE)
        self.assertEqual(pat.match(u'\xe0'), None)
        pat = regex.compile(u'\w', regex.ASCII)
        self.assertEqual(pat.match(u'\xe0'), None)
        pat = regex.compile(u'(?a)\w')
        self.assertEqual(pat.match(u'\xe0'), None)

        # String patterns.
        for flags in (0, regex.ASCII):
            pat = regex.compile('\xc0', flags | regex.IGNORECASE)
            self.assertEqual(pat.match('\xe0'), None)
            pat = regex.compile('\w')
            self.assertEqual(pat.match('\xe0'), None)

        self.assertRaisesRegex(ValueError, self.MIXED_FLAGS, lambda:
          regex.compile('(?au)\w'))
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def calc_unwanted_chars_re(self):
        unwanted_chars_re = u'[^\p{{AlNum}}{safe_chars}]+'.format(safe_chars=re.escape(self._safe_chars or ''))
        self.unwanted_chars_re = re.compile(unwanted_chars_re, re.IGNORECASE)

        if self._stop_words:
            unwanted_chars_and_words_re = unwanted_chars_re + u'|(?<!\p{AlNum})(?:\L<stop_words>)(?!\p{AlNum})'
            self.unwanted_chars_and_words_re = re.compile(unwanted_chars_and_words_re, re.IGNORECASE, stop_words=self._stop_words)
        else:
            self.unwanted_chars_and_words_re = None
项目:DrQA    作者:facebookresearch    | 项目源码 | 文件源码
def regex_match(text, pattern):
    """Test if a regex pattern is contained within a text."""
    try:
        pattern = re.compile(
            pattern,
            flags=re.IGNORECASE + re.UNICODE + re.MULTILINE,
        )
    except BaseException:
        return False
    return pattern.search(text) is not None
项目:DrQA    作者:facebookresearch    | 项目源码 | 文件源码
def regex_match_score(prediction, pattern):
    """Check if the prediction matches the given regular expression."""
    try:
        compiled = re.compile(
            pattern,
            flags=re.IGNORECASE + re.UNICODE + re.MULTILINE
        )
    except BaseException:
        logger.warn('Regular expression failed to compile: %s' % pattern)
        return False
    return compiled.match(prediction) is not None
项目:DrQA    作者:facebookresearch    | 项目源码 | 文件源码
def __init__(self, **kwargs):
        """
        Args:
            annotators: None or empty set (only tokenizes).
        """
        self._regexp = regex.compile(
            '(%s)|(%s)' % (self.ALPHA_NUM, self.NON_WS),
            flags=regex.IGNORECASE + regex.UNICODE + regex.MULTILINE
        )
        if len(kwargs.get('annotators', {})) > 0:
            logger.warning('%s only tokenizes! Skipping annotators: %s' %
                           (type(self).__name__, kwargs.get('annotators')))
        self.annotators = set()
项目:jok3r    作者:koutto    | 项目源码 | 文件源码
def replaceURL(self, url):
        """
        Replace [URL]
        """
        pattern = re.compile('\[URL\]', re.IGNORECASE)
        self.parsed_cmdline = pattern.sub(url, self.parsed_cmdline)
项目:jok3r    作者:koutto    | 项目源码 | 文件源码
def replaceHOST(self, host):
        """
        Replace [HOST]
        """
        pattern = re.compile('\[HOST\]', re.IGNORECASE)
        self.parsed_cmdline = pattern.sub(host, self.parsed_cmdline)
项目:jok3r    作者:koutto    | 项目源码 | 文件源码
def replaceIP(self, ip):
        """
        Replace [IP]
        """
        pattern = re.compile('\[IP\]', re.IGNORECASE)
        self.parsed_cmdline = pattern.sub(ip, self.parsed_cmdline)
项目:jok3r    作者:koutto    | 项目源码 | 文件源码
def replacePORT(self, port):
        """
        Replace [PORT]
        """
        pattern = re.compile('\[PORT\]', re.IGNORECASE)
        self.parsed_cmdline = pattern.sub(str(port), self.parsed_cmdline)
项目:jok3r    作者:koutto    | 项目源码 | 文件源码
def replacePROTOCOL(self, protocol):
        """
        Replace [PROTOCOL]
        """
        pattern = re.compile('\[PROTOCOL\]', re.IGNORECASE)
        self.parsed_cmdline = pattern.sub(protocol, self.parsed_cmdline)
项目:jok3r    作者:koutto    | 项目源码 | 文件源码
def replaceOUTPUT(self, output_file):
        """
        Replace [OUTPUT] if present
        Otherwise, add at the end of the command: 2>&1 | tee [OUTPUT]
        """
        pattern = re.compile('\[OUTPUT\]', re.IGNORECASE)
        if pattern.search(self.parsed_cmdline):
            self.parsed_cmdline = pattern.sub('"{0}"'.format(output_file), self.parsed_cmdline)
        else:
            self.parsed_cmdline += ' 2>&1 | tee "{0}"'.format(output_file)
项目:jok3r    作者:koutto    | 项目源码 | 文件源码
def replaceOUTPUTDIR(self, output_dir):
        """
        Replace [OUTPUTDIR] if present
        """
        pattern = re.compile('\[OUTPUTDIR\]', re.IGNORECASE)
        self.parsed_cmdline = pattern.sub(output_dir, self.parsed_cmdline)
项目:jok3r    作者:koutto    | 项目源码 | 文件源码
def replaceTOOLBOXDIR(self, toolbox_dir):
        """
        Replace [TOOLBOXDIR] (toolbox directory)
        """
        pattern = re.compile('\[TOOLBOXDIR\]', re.IGNORECASE)
        self.parsed_cmdline = pattern.sub(toolbox_dir, self.parsed_cmdline)
项目:jok3r    作者:koutto    | 项目源码 | 文件源码
def replaceWORDLISTSDIR(self, wordlists_dir):
        """
        Replace [WORDLISTSDIR] (wordlists directory)
        """
        pattern = re.compile('\[WORDLISTSDIR\]', re.IGNORECASE)
        self.parsed_cmdline = pattern.sub(wordlists_dir, self.parsed_cmdline)
项目:jok3r    作者:koutto    | 项目源码 | 文件源码
def replaceSpecificTags(self, service, specific_args):
        """
        Replace specific tags (depends on the selected service) 
        eg. for http :
        [SSL option="value"]
        [CMS cms1="val" cms2="val" ... default="val"]
        """
        for tag in Constants.SPECIFIC_TOOL_OPTIONS[service].keys():
            option_type = SpecificOptions.specificOptionType(service, tag)

            if option_type == 'boolean':
                try:
                    pattern = re.compile(r'\[' + tag.upper() + '\s+option\s*=\s*[\'"](?P<option>.*?)[\'"]\s*\]', re.IGNORECASE)
                    m = pattern.search(self.parsed_cmdline)
                    # option is True
                    if tag in specific_args.keys() and specific_args[tag]:
                        self.parsed_cmdline = pattern.sub(m.group('option'), self.parsed_cmdline)
                    # option is False
                    else:
                        self.parsed_cmdline = pattern.sub('', self.parsed_cmdline)
                except Exception as e:
                    pass    

            elif option_type == 'list_member':  
                try:
                    #print tag
                    #print specific_args
                    pattern = regex.compile(r'\[' + tag.upper() + '(?:\s+(?P<name>\w+)\s*=\s*[\'"](?P<value>[ a-zA-Z0-9_,;:-]*)[\'"])+\s*\]', regex.IGNORECASE)
                    m = pattern.search(self.parsed_cmdline)
                    capt = m.capturesdict()
                    #print capt
                    if tag in specific_args.keys() and specific_args[tag]:
                        value = capt['value'][capt['name'].index(specific_args[tag])]
                        self.parsed_cmdline = pattern.sub(value, self.parsed_cmdline)
                    elif 'default' in [e.lower() for e in capt['name']]:
                        value = capt['value'][capt['name'].index('default')]
                        self.parsed_cmdline = pattern.sub(value, self.parsed_cmdline)
                    else:
                        self.parsed_cmdline = pattern.sub('', self.parsed_cmdline)
                except Exception as e:
                    pass
项目:DrQA_cn    作者:AmoseKang    | 项目源码 | 文件源码
def regex_match(text, pattern):
    """Test if a regex pattern is contained within a text."""
    try:
        pattern = re.compile(
            pattern,
            flags=re.IGNORECASE + re.UNICODE + re.MULTILINE,
        )
    except BaseException:
        return False
    return pattern.search(text) is not None
项目:DrQA_cn    作者:AmoseKang    | 项目源码 | 文件源码
def regex_match_score(prediction, pattern):
    """Check if the prediction matches the given regular expression."""
    try:
        compiled = re.compile(
            pattern,
            flags=re.IGNORECASE + re.UNICODE + re.MULTILINE
        )
    except BaseException:
        logger.warn('Regular expression failed to compile: %s' % pattern)
        return False
    return compiled.match(prediction) is not None
项目:DrQA_cn    作者:AmoseKang    | 项目源码 | 文件源码
def __init__(self, **kwargs):
        """
        Args:
            annotators: None or empty set (only tokenizes).
        """
        self._regexp = regex.compile(
            '(%s)|(%s)' % (self.ALPHA_NUM, self.NON_WS),
            flags=regex.IGNORECASE + regex.UNICODE + regex.MULTILINE
        )
        if len(kwargs.get('annotators', {})) > 0:
            logger.warning('%s only tokenizes! Skipping annotators: %s' %
                           (type(self).__name__, kwargs.get('annotators')))
        self.annotators = set()
项目:memex-dossier-open    作者:dossier    | 项目源码 | 文件源码
def make_xpath_ranges(html, phrase):
    '''Given a HTML string and a `phrase`, build a regex to find offsets
    for the phrase, and then build a list of `XPathRange` objects for
    it.  If this fails, return empty list.

    '''
    if not html:
        return []
    if not isinstance(phrase, unicode):
        try:
            phrase = phrase.decode('utf8')
        except:
            logger.info('failed %r.decode("utf8")', exc_info=True)
            return []
    phrase_re = re.compile(
        phrase, flags=re.UNICODE | re.IGNORECASE | re.MULTILINE)
    spans = []
    for match in phrase_re.finditer(html, overlapped=False):
        spans.append(match.span())  # a list of tuple(start, end) char indexes

    # now run fancy aligner magic to get xpath info and format them as
    # XPathRange per above
    try:
        xpath_ranges = list(char_offsets_to_xpaths(html, spans))
    except:
        logger.info('failed to get xpaths', exc_info=True)
        return []
    ranges = []
    for xpath_range in filter(None, xpath_ranges):
        ranges.append(dict(
            start=dict(node=xpath_range.start_xpath,
                       idx=xpath_range.start_offset + 1),
            end=dict(node=xpath_range.end_xpath,
                     idx=xpath_range.end_offset)))

    return ranges