Python unicodedata 模块,lookup() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用unicodedata.lookup()

项目:civet    作者:TheJacksonLaboratory    | 项目源码 | 文件源码
def cleanup_command_line():

    if not sys.stdin.encoding or sys.stdin.encoding == 'ascii':
        return

    conversion_pairs = {
        'EN DASH': '-',
        'EM DASH': '--',
        'LEFT DOUBLE QUOTATION MARK': '"',
        'RIGHT DOUBLE QUOTATION MARK': '"',
        'LEFT SINGLE QUOTATION MARK': "'",
        'RIGHT SINGLE QUOTATION MARK': "'",
    }

    for i in range(len(sys.argv)):
        # create a unicode string with the decoded contents of the corresponding
        # sys.argv string
        decoded = unicode(sys.argv[i], sys.stdin.encoding)
        for key, val in conversion_pairs.iteritems():
            decoded = unicode.replace(decoded, unicodedata.lookup(key), val)
        # Should we be doing 'strict' here instead of 'replace'?
        sys.argv[i] = decoded.encode(sys.stdin.encoding, 'replace')
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def parse_repl_named_char(source):
    "Parses a named character in a replacement string."
    saved_pos = source.pos
    if source.match("{"):
        name = source.get_while(ALPHA | set(" "))

        if source.match("}"):
            try:
                value = unicodedata.lookup(name)
                return ord(value)
            except KeyError:
                raise error("undefined character name", source.string,
                  source.pos)

    source.pos = saved_pos
    return None
项目:leetcode    作者:thomasyimgit    | 项目源码 | 文件源码
def unicode_name_matches(self, text):
        u"""Match Latex-like syntax for unicode characters base
        on the name of the character.

        This does  ``\\GREEK SMALL LETTER ETA`` -> ``?``

        Works only on valid python 3 identifier, or on combining characters that
        will combine to form a valid identifier.

        Used on Python 3 only.
        """
        slashpos = text.rfind('\\')
        if slashpos > -1:
            s = text[slashpos+1:]
            try :
                unic = unicodedata.lookup(s)
                # allow combining chars
                if ('a'+unic).isidentifier():
                    return '\\'+s,[unic]
            except KeyError:
                pass
        return u'', []
项目:zippy    作者:securesystemslab    | 项目源码 | 文件源码
def _token_splittable(token):
    """
    Predicate for whether a token name can be split into multiple tokens.

    A token is splittable if it does not contain an underscore character and
    it is not the name of a Greek letter. This is used to implicitly convert
    expressions like 'xyz' into 'x*y*z'.
    """
    if '_' in token:
        return False
    else:
        try:
            return not unicodedata.lookup('GREEK SMALL LETTER ' + token)
        except KeyError:
            pass
    if len(token) > 1:
        return True
    return False
项目:espedite    作者:idimitrakopoulos    | 项目源码 | 文件源码
def __init__(self, msg='', maxspin=0, minspin=10, speed=5):
        # Count of a spin
        self.count = 0
        self.out = sys.stdout
        self.flag = False
        self.max = maxspin
        self.min = minspin
        # Any message to print first ?
        self.msg = msg
        # Complete printed string
        self.string = ''
        # Speed is given as number of spins a second
        # Use it to calculate spin wait time
        self.waittime = 1.0 / float(speed * 4)
        if os.name == 'posix':
            self.spinchars = (unicodedata.lookup('FIGURE DASH'), u'\\ ', u'| ', u'/ ')
        else:
            # The unicode dash character does not show
            # up properly in Windows console.
            self.spinchars = (u'-', u'\\ ', u'| ', u'/ ')
        threading.Thread.__init__(self, None, None, "Spin Thread")
项目:chalktalk_docs    作者:loremIpsum1771    | 项目源码 | 文件源码
def escape(m):
    all, tail = m.group(0, 1)
    assert all.startswith("\\")
    esc = simple_escapes.get(tail)
    if esc is not None:
        return esc
    elif tail.startswith("x"):
        return chr(convert_hex(tail, 2))
    elif tail.startswith('u'):
        return unichr(convert_hex(tail, 4))
    elif tail.startswith('U'):
        return unichr(convert_hex(tail, 8))
    elif tail.startswith('N'):
        import unicodedata
        try:
            return unicodedata.lookup(tail[1:-1])
        except KeyError:
            raise ValueError("undefined character name %r" % tail[1:-1])
    else:
        try:
            return chr(int(tail, 8))
        except ValueError:
            raise ValueError("invalid octal string escape ('\\%s')" % tail)
项目:web_ctp    作者:molebot    | 项目源码 | 文件源码
def test_aliases(self):
        # Check that the aliases defined in the NameAliases.txt file work.
        # This should be updated when new aliases are added or the file
        # should be downloaded and parsed instead.  See #12753.
        aliases = [
            ('LATIN CAPITAL LETTER GHA', 0x01A2),
            ('LATIN SMALL LETTER GHA', 0x01A3),
            ('KANNADA LETTER LLLA', 0x0CDE),
            ('LAO LETTER FO FON', 0x0E9D),
            ('LAO LETTER FO FAY', 0x0E9F),
            ('LAO LETTER RO', 0x0EA3),
            ('LAO LETTER LO', 0x0EA5),
            ('TIBETAN MARK BKA- SHOG GI MGO RGYAN', 0x0FD0),
            ('YI SYLLABLE ITERATION MARK', 0xA015),
            ('PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET', 0xFE18),
            ('BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS', 0x1D0C5)
        ]
        for alias, codepoint in aliases:
            self.checkletter(alias, chr(codepoint))
            name = unicodedata.name(chr(codepoint))
            self.assertNotEqual(name, alias)
            self.assertEqual(unicodedata.lookup(alias),
                             unicodedata.lookup(name))
            with self.assertRaises(KeyError):
                unicodedata.ucd_3_2_0.lookup(alias)
项目:web_ctp    作者:molebot    | 项目源码 | 文件源码
def test_named_sequences_full(self):
        # Check all the named sequences
        url = ("http://www.unicode.org/Public/%s/ucd/NamedSequences.txt" %
               unicodedata.unidata_version)
        try:
            testdata = support.open_urlresource(url, encoding="utf-8",
                                                check=check_version)
        except (IOError, HTTPException):
            self.skipTest("Could not retrieve " + url)
        self.addCleanup(testdata.close)
        for line in testdata:
            line = line.strip()
            if not line or line.startswith('#'):
                continue
            seqname, codepoints = line.split(';')
            codepoints = ''.join(chr(int(cp, 16)) for cp in codepoints.split())
            self.assertEqual(unicodedata.lookup(seqname), codepoints)
            with self.assertRaises(SyntaxError):
                self.checkletter(seqname, None)
            with self.assertRaises(KeyError):
                unicodedata.ucd_3_2_0.lookup(seqname)
项目:tichu-tournament    作者:aragos    | 项目源码 | 文件源码
def start_unichar(self, attr):
        if 'name' in attr:
            if 'code' in attr:
                self._syntax_error('<unichar/> invalid with both name and code attributes')
            try:
                v = unicodedata.lookup(attr['name'])
            except KeyError:
                self._syntax_error('<unichar/> invalid name attribute\n"%s"' % ascii(attr['name']))
                v = '\0'
        elif 'code' in attr:
            try:
                v = int(eval(attr['code']))
                v = chr(v) if isPy3 else unichr(v)
            except:
                self._syntax_error('<unichar/> invalid code attribute %s' % ascii(attr['code']))
                v = '\0'
        else:
            v = None
            if attr:
                self._syntax_error('<unichar/> invalid attribute %s' % list(attr.keys())[0])

        if v is not None:
            self.handle_data(v)
        self._push('unichar',_selfClosingTag='unichar')
项目:Repobot    作者:Desgard    | 项目源码 | 文件源码
def unicode_name_matches(self, text):
        u"""Match Latex-like syntax for unicode characters base 
        on the name of the character.

        This does  \\GREEK SMALL LETTER ETA -> ?

        Works only on valid python 3 identifier, or on combining characters that 
        will combine to form a valid identifier.

        Used on Python 3 only.
        """
        slashpos = text.rfind('\\')
        if slashpos > -1:
            s = text[slashpos+1:]
            try :
                unic = unicodedata.lookup(s)
                # allow combining chars
                if ('a'+unic).isidentifier():
                    return '\\'+s,[unic]
            except KeyError:
                pass
        return u'', []
项目:ouroboros    作者:pybee    | 项目源码 | 文件源码
def test_aliases(self):
        # Check that the aliases defined in the NameAliases.txt file work.
        # This should be updated when new aliases are added or the file
        # should be downloaded and parsed instead.  See #12753.
        aliases = [
            ('LATIN CAPITAL LETTER GHA', 0x01A2),
            ('LATIN SMALL LETTER GHA', 0x01A3),
            ('KANNADA LETTER LLLA', 0x0CDE),
            ('LAO LETTER FO FON', 0x0E9D),
            ('LAO LETTER FO FAY', 0x0E9F),
            ('LAO LETTER RO', 0x0EA3),
            ('LAO LETTER LO', 0x0EA5),
            ('TIBETAN MARK BKA- SHOG GI MGO RGYAN', 0x0FD0),
            ('YI SYLLABLE ITERATION MARK', 0xA015),
            ('PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET', 0xFE18),
            ('BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS', 0x1D0C5)
        ]
        for alias, codepoint in aliases:
            self.checkletter(alias, chr(codepoint))
            name = unicodedata.name(chr(codepoint))
            self.assertNotEqual(name, alias)
            self.assertEqual(unicodedata.lookup(alias),
                             unicodedata.lookup(name))
            with self.assertRaises(KeyError):
                unicodedata.ucd_3_2_0.lookup(alias)
项目:ouroboros    作者:pybee    | 项目源码 | 文件源码
def test_named_sequences_full(self):
        # Check all the named sequences
        url = ("http://www.pythontest.net/unicode/%s/NamedSequences.txt" %
               unicodedata.unidata_version)
        try:
            testdata = support.open_urlresource(url, encoding="utf-8",
                                                check=check_version)
        except (OSError, HTTPException):
            self.skipTest("Could not retrieve " + url)
        self.addCleanup(testdata.close)
        for line in testdata:
            line = line.strip()
            if not line or line.startswith('#'):
                continue
            seqname, codepoints = line.split(';')
            codepoints = ''.join(chr(int(cp, 16)) for cp in codepoints.split())
            self.assertEqual(unicodedata.lookup(seqname), codepoints)
            with self.assertRaises(SyntaxError):
                self.checkletter(seqname, None)
            with self.assertRaises(KeyError):
                unicodedata.ucd_3_2_0.lookup(seqname)
项目:Menotexport    作者:Xunius    | 项目源码 | 文件源码
def _greekletters(letterlist):
    for l in letterlist:
        ucharname = l.upper()
        if (ucharname == 'LAMBDA'):
            ucharname = 'LAMDA'
        smallname = "GREEK SMALL LETTER "+ucharname;
        if (ucharname == 'EPSILON'):
            smallname = "GREEK LUNATE EPSILON SYMBOL"
        if (ucharname == 'PHI'):
            smallname = "GREEK PHI SYMBOL"
        _default_macro_list.append(
            (l, unicodedata.lookup(smallname))
            );
        _default_macro_list.append(
            (l[0].upper()+l[1:], unicodedata.lookup("GREEK CAPITAL LETTER "+ucharname))
            );
项目:Tobo-Cogs    作者:Tobotimus    | 项目源码 | 文件源码
def _check_files():
    if not dataIO.is_valid_json(TRIGGERS_PATH):
        _LOGGER.info("Creating json: " + TRIGGERS_PATH)
        dataIO.save_json(TRIGGERS_PATH, DEFAULT_SETTINGS)
    else: # Backwards compatibility check
        triggers = dataIO.load_json(TRIGGERS_PATH)
        for text, emoji_list in triggers['text_triggers'].items():
            for idx, emoji in enumerate(emoji_list):
                try:
                    emoji = lookup(emoji)
                except KeyError:
                    pass
                else:
                    emoji_list[idx] = emoji
            triggers['text_triggers'][text] = emoji_list
        for user, emoji_list in triggers['user_triggers'].items():
            for idx, emoji in enumerate(emoji_list):
                try:
                    emoji = lookup(emoji)
                except KeyError:
                    pass
                else:
                    emoji_list[idx] = emoji
            triggers['user_triggers'][user] = emoji_list
        dataIO.save_json(TRIGGERS_PATH, triggers)
项目:Python-iBeacon-Scan    作者:NikNitro    | 项目源码 | 文件源码
def _token_splittable(token):
    """
    Predicate for whether a token name can be split into multiple tokens.

    A token is splittable if it does not contain an underscore character and
    it is not the name of a Greek letter. This is used to implicitly convert
    expressions like 'xyz' into 'x*y*z'.
    """
    if '_' in token:
        return False
    else:
        try:
            return not unicodedata.lookup('GREEK SMALL LETTER ' + token)
        except KeyError:
            pass
    if len(token) > 1:
        return True
    return False
项目:kbe_server    作者:xiaohaoppy    | 项目源码 | 文件源码
def test_aliases(self):
        # Check that the aliases defined in the NameAliases.txt file work.
        # This should be updated when new aliases are added or the file
        # should be downloaded and parsed instead.  See #12753.
        aliases = [
            ('LATIN CAPITAL LETTER GHA', 0x01A2),
            ('LATIN SMALL LETTER GHA', 0x01A3),
            ('KANNADA LETTER LLLA', 0x0CDE),
            ('LAO LETTER FO FON', 0x0E9D),
            ('LAO LETTER FO FAY', 0x0E9F),
            ('LAO LETTER RO', 0x0EA3),
            ('LAO LETTER LO', 0x0EA5),
            ('TIBETAN MARK BKA- SHOG GI MGO RGYAN', 0x0FD0),
            ('YI SYLLABLE ITERATION MARK', 0xA015),
            ('PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET', 0xFE18),
            ('BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS', 0x1D0C5)
        ]
        for alias, codepoint in aliases:
            self.checkletter(alias, chr(codepoint))
            name = unicodedata.name(chr(codepoint))
            self.assertNotEqual(name, alias)
            self.assertEqual(unicodedata.lookup(alias),
                             unicodedata.lookup(name))
            with self.assertRaises(KeyError):
                unicodedata.ucd_3_2_0.lookup(alias)
项目:kbe_server    作者:xiaohaoppy    | 项目源码 | 文件源码
def test_named_sequences_full(self):
        # Check all the named sequences
        url = ("http://www.unicode.org/Public/%s/ucd/NamedSequences.txt" %
               unicodedata.unidata_version)
        try:
            testdata = support.open_urlresource(url, encoding="utf-8",
                                                check=check_version)
        except (OSError, HTTPException):
            self.skipTest("Could not retrieve " + url)
        self.addCleanup(testdata.close)
        for line in testdata:
            line = line.strip()
            if not line or line.startswith('#'):
                continue
            seqname, codepoints = line.split(';')
            codepoints = ''.join(chr(int(cp, 16)) for cp in codepoints.split())
            self.assertEqual(unicodedata.lookup(seqname), codepoints)
            with self.assertRaises(SyntaxError):
                self.checkletter(seqname, None)
            with self.assertRaises(KeyError):
                unicodedata.ucd_3_2_0.lookup(seqname)
项目:blender    作者:gastrodia    | 项目源码 | 文件源码
def unicode_name_matches(self, text):
        u"""Match Latex-like syntax for unicode characters base 
        on the name of the character.

        This does  \\GREEK SMALL LETTER ETA -> ?

        Works only on valid python 3 identifier, or on combining characters that 
        will combine to form a valid identifier.

        Used on Python 3 only.
        """
        slashpos = text.rfind('\\')
        if slashpos > -1:
            s = text[slashpos+1:]
            try :
                unic = unicodedata.lookup(s)
                # allow combining chars
                if ('a'+unic).isidentifier():
                    return '\\'+s,[unic]
            except KeyError:
                pass
        return u'', []
项目:yatta_reader    作者:sound88    | 项目源码 | 文件源码
def unicode_name_matches(self, text):
        u"""Match Latex-like syntax for unicode characters base
        on the name of the character.

        This does  ``\\GREEK SMALL LETTER ETA`` -> ``?``

        Works only on valid python 3 identifier, or on combining characters that
        will combine to form a valid identifier.

        Used on Python 3 only.
        """
        slashpos = text.rfind('\\')
        if slashpos > -1:
            s = text[slashpos+1:]
            try :
                unic = unicodedata.lookup(s)
                # allow combining chars
                if ('a'+unic).isidentifier():
                    return '\\'+s,[unic]
            except KeyError:
                pass
        return u'', []
项目:code    作者:ActiveState    | 项目源码 | 文件源码
def insert_accented(self, c, accent):
        if c.isalpha():
            if c.isupper():
                cap = 'capital'
            else:
                cap = 'small'
            try:
                c = lookup("latin %s letter %c with %s" % (cap, c, accent))
                self.insert(INSERT, c)
                # Prevent plain letter from being inserted too, tell Tk to
                # stop handling this event
                return "break"
            except KeyError, e:
                pass
项目:bittyband    作者:yam655    | 项目源码 | 文件源码
def expand_unicode(s):
    """ Convert unicode reference in to a Unicode string. """
    if s.startswith(r'\u') or s.startswith(r'\U'):
        return chr(int(s,16))
    if s.startswith(r'\N{'):
        name = s[3:-1]
        try:
            return unicodedata.lookup(name)
        except:
            raise ConfigError("Failed to find unicode value with name {}\n".format(name))
    else:
        return s
项目:llk    作者:Tycx2ry    | 项目源码 | 文件源码
def u(s):
        """Generate Unicode string from a string input, encoding Unicode characters.

        This is expected to work in the same way as u'<string>' would work in Python
        2.x (although it is not completely robust as it is based on a simple set of
        regexps).
        """
        us = re.sub(_U16_RE, lambda m: unichr(int(m.group('hexval'), 16)), unicode(s))
        us = re.sub(_U32_RE, lambda m: unichr(int(m.group('hexval'), 16)), us)
        us = re.sub(_UNAME_RE, lambda m: unicodedata.lookup(m.group('name')), us)
        return us
项目:spiderfoot    作者:wi-fi-analyzer    | 项目源码 | 文件源码
def u(s):
        """Generate Unicode string from a string input, encoding Unicode characters.

        This is expected to work in the same way as u'<string>' would work in Python
        2.x (although it is not completely robust as it is based on a simple set of
        regexps).
        """
        us = re.sub(_U16_RE, lambda m: unichr(int(m.group('hexval'), 16)), unicode(s))
        us = re.sub(_U32_RE, lambda m: unichr(int(m.group('hexval'), 16)), us)
        us = re.sub(_UNAME_RE, lambda m: unicodedata.lookup(m.group('name')), us)
        return us
项目:coquery    作者:gkunter    | 项目源码 | 文件源码
def dia_to_unicode(s):
    """
    Translates a string that contains CELEX encodings of diacritics to a
    Unicode string.

    Parameters
    ----------
    s : string
        A string containing CELEX diacritics (see CELEX/english/eol/README
        for details)

    Returns
    -------
    s : string
        The corresponding unicode string
    """

    encoded_diacritics = {
        "#": "COMBINING ACUTE ACCENT",
        "`": "COMBINING GRAVE ACCENT",
        '"': "COMBINING DIAERESIS",
        "^": "COMBINING CIRCUMFLEX ACCENT",
        ",": "COMBINING CEDILLA",
        "~": "COMBINING TILDE",
        "@": "COMBINING RING ABOVE"}

    diacritic = None
    char_list = []
    for ch in s:
        if ch in encoded_diacritics:
            diacritic = unicodedata.lookup(encoded_diacritics[ch])
        else:
            char_list.append(ch)
            # add diacritics:
            if diacritic:
                char_list.append(diacritic)
                diacritic = None
    # join and normalize characters:
    unicode_string = unicodedata.normalize("NFC", "".join(char_list))
    return unicode_string
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def parse_named_char(source, info, in_set):
    "Parses a named character."
    saved_pos = source.pos
    if source.match("{"):
        name = source.get_while(NAMED_CHAR_PART)
        if source.match("}"):
            try:
                value = unicodedata.lookup(name)
                return make_character(info, ord(value), in_set)
            except KeyError:
                raise error("undefined character name", source.string,
                  source.pos)

    source.pos = saved_pos
    return make_character(info, ord("N"), in_set)
项目:backrefs    作者:facelessuser    | 项目源码 | 文件源码
def unicode_name(self, name):
        """Insert Unicode value by its name."""

        value = ord(unicodedata.lookup(name))
        return '\\%03o' % value if value <= 0xFF else compat.uchr(value)
项目:zippy    作者:securesystemslab    | 项目源码 | 文件源码
def U(name):
        """unicode character by name or None if not found"""
        try:
            u = unicodedata.lookup(name)
        except KeyError:
            u = None

            global unicode_warnings
            unicode_warnings += 'No \'%s\' in unicodedata\n' % name

        return u
项目:zippy    作者:securesystemslab    | 项目源码 | 文件源码
def test_ascii_letters(self):
        import unicodedata

        for char in "".join(map(chr, range(ord("a"), ord("z")))):
            name = "LATIN SMALL LETTER %s" % char.upper()
            code = unicodedata.lookup(name)
            self.assertEqual(unicodedata.name(code), name)
项目:zippy    作者:securesystemslab    | 项目源码 | 文件源码
def test_bmp_characters(self):
        import unicodedata
        count = 0
        for code in range(0x10000):
            char = chr(code)
            name = unicodedata.name(char, None)
            if name is not None:
                self.assertEqual(unicodedata.lookup(name), char)
                count += 1
项目:zippy    作者:securesystemslab    | 项目源码 | 文件源码
def test_errors(self):
        import unicodedata
        self.assertRaises(TypeError, unicodedata.name)
        self.assertRaises(TypeError, unicodedata.name, 'xx')
        self.assertRaises(TypeError, unicodedata.lookup)
        self.assertRaises(KeyError, unicodedata.lookup, 'unknown')
项目:oil    作者:oilshell    | 项目源码 | 文件源码
def test_ascii_letters(self):
        import unicodedata

        for char in "".join(map(chr, xrange(ord("a"), ord("z")))):
            name = "LATIN SMALL LETTER %s" % char.upper()
            code = unicodedata.lookup(name)
            self.assertEqual(unicodedata.name(code), name)
项目:oil    作者:oilshell    | 项目源码 | 文件源码
def test_bmp_characters(self):
        import unicodedata
        count = 0
        for code in xrange(0x10000):
            char = unichr(code)
            name = unicodedata.name(char, None)
            if name is not None:
                self.assertEqual(unicodedata.lookup(name), char)
                count += 1
项目:oil    作者:oilshell    | 项目源码 | 文件源码
def test_errors(self):
        import unicodedata
        self.assertRaises(TypeError, unicodedata.name)
        self.assertRaises(TypeError, unicodedata.name, u'xx')
        self.assertRaises(TypeError, unicodedata.lookup)
        self.assertRaises(KeyError, unicodedata.lookup, u'unknown')
项目:python2-tracer    作者:extremecoders-re    | 项目源码 | 文件源码
def test_ascii_letters(self):
        import unicodedata

        for char in "".join(map(chr, xrange(ord("a"), ord("z")))):
            name = "LATIN SMALL LETTER %s" % char.upper()
            code = unicodedata.lookup(name)
            self.assertEqual(unicodedata.name(code), name)
项目:python2-tracer    作者:extremecoders-re    | 项目源码 | 文件源码
def test_bmp_characters(self):
        import unicodedata
        count = 0
        for code in xrange(0x10000):
            char = unichr(code)
            name = unicodedata.name(char, None)
            if name is not None:
                self.assertEqual(unicodedata.lookup(name), char)
                count += 1
项目:python2-tracer    作者:extremecoders-re    | 项目源码 | 文件源码
def test_errors(self):
        import unicodedata
        self.assertRaises(TypeError, unicodedata.name)
        self.assertRaises(TypeError, unicodedata.name, u'xx')
        self.assertRaises(TypeError, unicodedata.lookup)
        self.assertRaises(KeyError, unicodedata.lookup, u'unknown')
项目:orizonhub    作者:gumblex    | 项目源码 | 文件源码
def cmd_do(expr, msg=None):
    actions = collections.OrderedDict((
        ('shrug', \\_(?)_/¯'),
        ('lenny', '( ?° ?? ?°)'),
        ('flip', '??°?°??? ???'),
        ('homo', '?????o???'),
        ('look', '?_?'),
        ('cn', '[citation needed]'),
        ('boom', '??'),
        ('tweet', '??'),
        ('blink', '??'),
        ('see-no-evil', '??'),
        ('hear-no-evil', '??'),
        ('speak-no-evil', '??'),
        ('evil', '??????'),
        ('table', '(?>_<)?</?lq??>'),
        ('release-upgrade', '????'),
        ('however', ('???????????\n??????????\n'
                     'Something happened\n???????\n'
                     '?????????????\n???????\n???????')),
        ('mac', ('?????\n????\n???????\n????\n'
                 '?????\n??????\n??\n????'))
    ))
    origexpr = expr
    expr = expr.lower()
    res = actions.get(expr)
    if res:
        return res
    elif expr == 'help':
        return ', '.join(actions.keys())
    else:
        try:
            res = unicodedata.lookup(expr)
            return res
        except KeyError:
            pass
        if len(expr) <= 10:
            res = ', '.join(unicodedata.name(ch) for ch in origexpr)
            return res
        else:
            return 'Something happened.'
项目:facebook-bulk-group-inviter    作者:danielireson    | 项目源码 | 文件源码
def _get_base_character(c):
    desc = unicodedata.name(unicode(c))
    cutoff = desc.find(' WITH ')
    if cutoff != -1:
        desc = desc[:cutoff]
    return unicodedata.lookup(desc)
项目:web_ctp    作者:molebot    | 项目源码 | 文件源码
def test_ascii_letters(self):
        for char in "".join(map(chr, range(ord("a"), ord("z")))):
            name = "LATIN SMALL LETTER %s" % char.upper()
            code = unicodedata.lookup(name)
            self.assertEqual(unicodedata.name(code), name)
项目:web_ctp    作者:molebot    | 项目源码 | 文件源码
def test_bmp_characters(self):
        for code in range(0x10000):
            char = chr(code)
            name = unicodedata.name(char, None)
            if name is not None:
                self.assertEqual(unicodedata.lookup(name), char)
项目:web_ctp    作者:molebot    | 项目源码 | 文件源码
def test_named_sequences_sample(self):
        # Check a few named sequences.  See #12753.
        sequences = [
            ('LATIN SMALL LETTER R WITH TILDE', '\u0072\u0303'),
            ('TAMIL SYLLABLE SAI', '\u0BB8\u0BC8'),
            ('TAMIL SYLLABLE MOO', '\u0BAE\u0BCB'),
            ('TAMIL SYLLABLE NNOO', '\u0BA3\u0BCB'),
            ('TAMIL CONSONANT KSS', '\u0B95\u0BCD\u0BB7\u0BCD'),
        ]
        for seqname, codepoints in sequences:
            self.assertEqual(unicodedata.lookup(seqname), codepoints)
            with self.assertRaises(SyntaxError):
                self.checkletter(seqname, None)
            with self.assertRaises(KeyError):
                unicodedata.ucd_3_2_0.lookup(seqname)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_unicode(self
                     ):  # See GH 6885 - get_dummies chokes on unicode values
        import unicodedata
        e = 'e'
        eacute = unicodedata.lookup('LATIN SMALL LETTER E WITH ACUTE')
        s = [e, eacute, eacute]
        res = get_dummies(s, prefix='letter', sparse=self.sparse)
        exp = DataFrame({'letter_e': {0: 1.0,
                                      1: 0.0,
                                      2: 0.0},
                         u('letter_%s') % eacute: {0: 0.0,
                                                   1: 1.0,
                                                   2: 1.0}})
        assert_frame_equal(res, exp)
项目:ninja_generator    作者:rampantpixels    | 项目源码 | 文件源码
def normalize_char(c):
  try:
    cname = unicodedata.name( unicode(c) )
    cname = cname[:cname.index( ' WITH' )]
    return unicodedata.lookup( cname )
  except ( ValueError, KeyError ):
    return c
项目:snake    作者:AnonymousDapper    | 项目源码 | 文件源码
def unicode(self, name):
        return lookup(name)

    # Safe, fast math parser
项目:pefile.pypy    作者:cloudtracer    | 项目源码 | 文件源码
def test_ascii_letters(self):
        import unicodedata

        for char in "".join(map(chr, xrange(ord("a"), ord("z")))):
            name = "LATIN SMALL LETTER %s" % char.upper()
            code = unicodedata.lookup(name)
            self.assertEqual(unicodedata.name(code), name)
项目:pefile.pypy    作者:cloudtracer    | 项目源码 | 文件源码
def test_bmp_characters(self):
        import unicodedata
        count = 0
        for code in xrange(0x10000):
            char = unichr(code)
            name = unicodedata.name(char, None)
            if name is not None:
                self.assertEqual(unicodedata.lookup(name), char)
                count += 1
项目:pefile.pypy    作者:cloudtracer    | 项目源码 | 文件源码
def test_errors(self):
        import unicodedata
        self.assertRaises(TypeError, unicodedata.name)
        self.assertRaises(TypeError, unicodedata.name, u'xx')
        self.assertRaises(TypeError, unicodedata.lookup)
        self.assertRaises(KeyError, unicodedata.lookup, u'unknown')
项目:ouroboros    作者:pybee    | 项目源码 | 文件源码
def test_ascii_letters(self):
        for char in "".join(map(chr, range(ord("a"), ord("z")))):
            name = "LATIN SMALL LETTER %s" % char.upper()
            code = unicodedata.lookup(name)
            self.assertEqual(unicodedata.name(code), name)
项目:ouroboros    作者:pybee    | 项目源码 | 文件源码
def test_bmp_characters(self):
        for code in range(0x10000):
            char = chr(code)
            name = unicodedata.name(char, None)
            if name is not None:
                self.assertEqual(unicodedata.lookup(name), char)
项目:ouroboros    作者:pybee    | 项目源码 | 文件源码
def test_named_sequences_sample(self):
        # Check a few named sequences.  See #12753.
        sequences = [
            ('LATIN SMALL LETTER R WITH TILDE', '\u0072\u0303'),
            ('TAMIL SYLLABLE SAI', '\u0BB8\u0BC8'),
            ('TAMIL SYLLABLE MOO', '\u0BAE\u0BCB'),
            ('TAMIL SYLLABLE NNOO', '\u0BA3\u0BCB'),
            ('TAMIL CONSONANT KSS', '\u0B95\u0BCD\u0BB7\u0BCD'),
        ]
        for seqname, codepoints in sequences:
            self.assertEqual(unicodedata.lookup(seqname), codepoints)
            with self.assertRaises(SyntaxError):
                self.checkletter(seqname, None)
            with self.assertRaises(KeyError):
                unicodedata.ucd_3_2_0.lookup(seqname)