Python unicodedata 模块,normalize() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用unicodedata.normalize()

项目:alfred-mpd    作者:deanishe    | 项目源码 | 文件源码
def fold_to_ascii(self, text):
        """Convert non-ASCII characters to closest ASCII equivalent.

        .. versionadded:: 1.3

        .. note:: This only works for a subset of European languages.

        :param text: text to convert
        :type text: ``unicode``
        :returns: text containing only ASCII characters
        :rtype: ``unicode``

        """
        if isascii(text):
            return text
        text = ''.join([ASCII_REPLACEMENTS.get(c, c) for c in text])
        return unicode(unicodedata.normalize('NFKD',
                       text).encode('ascii', 'ignore'))
项目:aiodownload    作者:jelloslinger    | 项目源码 | 文件源码
def clean_filename(filename):
    """Return a sanitized filename (replace / strip out illegal characters)

    :param filename: string used for a filename
    :type filename: str

    :return: sanitized filename
    :rtype: str
    """

    return ''.join([
        c for c in unicodedata.normalize(
            'NFKD',
            ''.join([REPLACEMENT_CHAR.get(c, c) for c in filename])
        )
        if not unicodedata.combining(c) and c in '-_.() {0}{1}'.format(string.ascii_letters, string.digits)
    ])
项目:dabdabrevolution    作者:harryparkdotio    | 项目源码 | 文件源码
def filename(self):
        """ Name of the file on the client file system, but normalized to ensure
            file system compatibility. An empty filename is returned as 'empty'.

            Only ASCII letters, digits, dashes, underscores and dots are
            allowed in the final filename. Accents are removed, if possible.
            Whitespace is replaced by a single dash. Leading or tailing dots
            or dashes are removed. The filename is limited to 255 characters.
        """
        fname = self.raw_filename
        if not isinstance(fname, unicode):
            fname = fname.decode('utf8', 'ignore')
        fname = normalize('NFKD', fname)
        fname = fname.encode('ASCII', 'ignore').decode('ASCII')
        fname = os.path.basename(fname.replace('\\', os.path.sep))
        fname = re.sub(r'[^a-zA-Z0-9-_.\s]', '', fname).strip()
        fname = re.sub(r'[-\s]+', '-', fname).strip('.-')
        return fname[:255] or 'empty'
项目:Gank-Alfred-Workflow    作者:hujiaweibujidao    | 项目源码 | 文件源码
def fold_to_ascii(self, text):
        """Convert non-ASCII characters to closest ASCII equivalent.

        .. versionadded:: 1.3

        .. note:: This only works for a subset of European languages.

        :param text: text to convert
        :type text: ``unicode``
        :returns: text containing only ASCII characters
        :rtype: ``unicode``

        """
        if isascii(text):
            return text
        text = ''.join([ASCII_REPLACEMENTS.get(c, c) for c in text])
        return unicode(unicodedata.normalize('NFKD',
                       text).encode('ascii', 'ignore'))
项目:Flask_Blog    作者:sugarguo    | 项目源码 | 文件源码
def decode_as_string(text, encoding=None):
    """
    Decode the console or file output explicitly using getpreferredencoding.
    The text paraemeter should be a encoded string, if not no decode occurs
    If no encoding is given, getpreferredencoding is used.  If encoding is
    specified, that is used instead.  This would be needed for SVN --xml
    output.  Unicode is explicitly put in composed NFC form.

    --xml should be UTF-8 (SVN Issue 2938) the discussion on the Subversion
    DEV List from 2007 seems to indicate the same.
    """
    #text should be a byte string

    if encoding is None:
        encoding = _console_encoding

    if not isinstance(text, unicode):
        text = text.decode(encoding)

    text = unicodedata.normalize('NFC', text)

    return text
项目:Location_Assistance    作者:KamalAwasthi    | 项目源码 | 文件源码
def delete_friends(request):
    current_username = request.POST.get('username')
    current_friendName = request.POST.get('friendUsername')
    ol=[]
    try:
        existingUser = FriendList.objects.get(user__username = current_username)
        user_friends = existingUser.getfoo()
        for c in user_friends:
            c = unicodedata.normalize('NFKD', c).encode('ascii','ignore')
            if(c == current_friendName):
                continue
            ol.append(c)
        existingUser.friendList = json.dumps(ol)
        existingUser.save()
    except:
        ol=[]
    return HttpResponse(json.dumps(ol))
项目:linter    作者:ethz-asl    | 项目源码 | 文件源码
def GetLineWidth(line):
  """Determines the width of the line in column positions.

  Args:
    line: A string, which may be a Unicode string.

  Returns:
    The width of the line in column positions, accounting for Unicode
    combining characters and wide characters.
  """
  if isinstance(line, unicode):
    width = 0
    for uc in unicodedata.normalize('NFC', line):
      if unicodedata.east_asian_width(uc) in ('W', 'F'):
        width += 2
      elif not unicodedata.combining(uc):
        width += 1
    return width
  else:
    return len(line)
项目:noc-orchestrator    作者:DirceuSilvaLabs    | 项目源码 | 文件源码
def append_utf8(self, text):
        try:
            from Naked.toolshed.system import file_exists
            if not file_exists(self.filepath):
                raise IOError("The file specified for the text append does not exist (Naked.toolshed.file.py:append_utf8).")
            import codecs
            import unicodedata
            norm_text = unicodedata.normalize('NFKD', text) # NKFD normalization of the unicode data before write
            with codecs.open(self.filepath, mode='a', encoding="utf_8") as appender:
                appender.write(norm_text)
        except Exception as e:
            if DEBUG_FLAG:
                sys.stderr.write("Naked Framework Error: Unable to append text to the file with the append_utf8 method (Naked.toolshed.file.py).")
            raise e

    #------------------------------------------------------------------------------
    # [ gzip method (writer) ]
    #   writes data to gzip compressed file
    #   Note: adds .gz extension to filename if user did not specify it in the FileWriter class constructor
    #   Note: uses compresslevel = 6 as default to balance speed and compression level (which in general is not significantly less than 9)
    #   Tests: test_IO.py :: test_file_gzip_ascii_readwrite, test_file_gzip_utf8_readwrite,
    #               test_file_gzip_utf8_readwrite_explicit_decode
    #------------------------------------------------------------------------------
项目:noc-orchestrator    作者:DirceuSilvaLabs    | 项目源码 | 文件源码
def gzip(self, text, compression_level=6):
        try:
            import gzip
            if not self.filepath.endswith(".gz"):
                self.filepath = self.filepath + ".gz"
            with gzip.open(self.filepath, 'wb', compresslevel=compression_level) as gzip_writer:
                gzip_writer.write(text)
        except UnicodeEncodeError as ue:
            import unicodedata
            norm_text = unicodedata.normalize('NFKD', text) # NKFD normalization of the unicode data before write
            import codecs
            binary_data = codecs.encode(norm_text, "utf_8")
            with gzip.open(self.filepath, 'wb', compresslevel=compression_level) as gzip_writer:
                gzip_writer.write(binary_data)
        except Exception as e:
            if DEBUG_FLAG:
                sys.stderr.write("Naked Framework Error: unable to gzip compress the file with the gzip method (Naked.toolshed.file.py).")
            raise e

    #------------------------------------------------------------------------------
    # [ write method ]
    #   Universal text file writer that writes by system default or utf-8 encoded unicode if throws UnicdeEncodeError
    #   Tests: test_IO.py :: test_file_ascii_readwrite, test_file_ascii_readwrite_missing_file,
    #    test_file_utf8_write_raises_unicodeerror
    #------------------------------------------------------------------------------
项目:noc-orchestrator    作者:DirceuSilvaLabs    | 项目源码 | 文件源码
def readlines_utf8(self):
        try:
            import codecs
            with codecs.open(self.filepath, encoding='utf-8', mode='r') as uni_reader:
                modified_text_list = []
                for line in uni_reader:
                    import unicodedata
                    norm_line = unicodedata.normalize('NFKD', line) # NKFD normalization of the unicode data before use
                    modified_text_list.append(norm_line)
                return modified_text_list
        except Exception as e:
            if DEBUG_FLAG:
                sys.stderr.write("Naked Framework Error: unable to read lines in the unicode file with the readlines_utf8 method (Naked.toolshed.file.py)")
            raise e

    #------------------------------------------------------------------------------
    # [ read_gzip ] (byte string)
    #   reads data from a gzip compressed file
    #   returns the decompressed binary data from the file
    #   Note: if decompressing unicode file, set encoding="utf-8"
    #   Tests: test_IO.py :: test_file_gzip_ascii_readwrite, test_file_gzip_utf8_readwrite,
    #              test_file_read_gzip_missing_file
    #------------------------------------------------------------------------------
项目:noc-orchestrator    作者:DirceuSilvaLabs    | 项目源码 | 文件源码
def read_utf8(self):
        try:
            import codecs
            f = codecs.open(self.filepath, encoding='utf_8', mode='r')
        except IOError as ioe:
            if DEBUG_FLAG:
                sys.stderr.write("Naked Framework Error: Unable to open file for read with read_utf8() method (Naked.toolshed.file.py).")
            raise ioe
        try:
            textstring = f.read()
            import unicodedata
            norm_text = unicodedata.normalize('NFKD', textstring) # NKFD normalization of the unicode data before returns
            return norm_text
        except Exception as e:
            if DEBUG_FLAG:
                sys.stderr.write("Naked Framework Error: Unable to read the file with UTF-8 encoding using the read_utf8() method (Naked.toolshed.file.py).")
            raise e
        finally:
            f.close()
项目:noc-orchestrator    作者:DirceuSilvaLabs    | 项目源码 | 文件源码
def append_utf8(self, text):
        try:
            from Naked.toolshed.system import file_exists
            if not file_exists(self.filepath):
                raise IOError("The file specified for the text append does not exist (Naked.toolshed.file.py:append_utf8).")
            import codecs
            import unicodedata
            norm_text = unicodedata.normalize('NFKD', text) # NKFD normalization of the unicode data before write
            with codecs.open(self.filepath, mode='a', encoding="utf_8") as appender:
                appender.write(norm_text)
        except Exception as e:
            if DEBUG_FLAG:
                sys.stderr.write("Naked Framework Error: Unable to append text to the file with the append_utf8 method (Naked.toolshed.file.py).")
            raise e

    #------------------------------------------------------------------------------
    # [ gzip method (writer) ]
    #   writes data to gzip compressed file
    #   Note: adds .gz extension to filename if user did not specify it in the FileWriter class constructor
    #   Note: uses compresslevel = 6 as default to balance speed and compression level (which in general is not significantly less than 9)
    #   Tests: test_IO.py :: test_file_gzip_ascii_readwrite, test_file_gzip_utf8_readwrite,
    #               test_file_gzip_utf8_readwrite_explicit_decode
    #------------------------------------------------------------------------------
项目:noc-orchestrator    作者:DirceuSilvaLabs    | 项目源码 | 文件源码
def gzip(self, text, compression_level=6):
        try:
            import gzip
            if not self.filepath.endswith(".gz"):
                self.filepath = self.filepath + ".gz"
            with gzip.open(self.filepath, 'wb', compresslevel=compression_level) as gzip_writer:
                gzip_writer.write(text)
        except UnicodeEncodeError as ue:
            import unicodedata
            norm_text = unicodedata.normalize('NFKD', text) # NKFD normalization of the unicode data before write
            import codecs
            binary_data = codecs.encode(norm_text, "utf_8")
            with gzip.open(self.filepath, 'wb', compresslevel=compression_level) as gzip_writer:
                gzip_writer.write(binary_data)
        except Exception as e:
            if DEBUG_FLAG:
                sys.stderr.write("Naked Framework Error: unable to gzip compress the file with the gzip method (Naked.toolshed.file.py).")
            raise e

    #------------------------------------------------------------------------------
    # [ write method ]
    #   Universal text file writer that writes by system default or utf-8 encoded unicode if throws UnicdeEncodeError
    #   Tests: test_IO.py :: test_file_ascii_readwrite, test_file_ascii_readwrite_missing_file,
    #    test_file_utf8_write_raises_unicodeerror
    #------------------------------------------------------------------------------
项目:noc-orchestrator    作者:DirceuSilvaLabs    | 项目源码 | 文件源码
def write_utf8(self, text):
        try:
            import codecs
            f = codecs.open(self.filepath, encoding='utf_8', mode='w')
        except IOError as ioe:
            if DEBUG_FLAG:
                sys.stderr.write("Naked Framework Error: Unable to open file for write with the write_utf8() method (Naked.toolshed.file.py).")
            raise ioe
        try:
            import unicodedata
            norm_text = unicodedata.normalize('NFKD', text) # NKFD normalization of the unicode data before write
            f.write(norm_text)
        except Exception as e:
            if DEBUG_FLAG:
                sys.stderr.write("Naked Framework Error: Unable to write UTF-8 encoded text to file with the write_utf8() method (Naked.toolshed.file.py).")
            raise e
        finally:
            f.close()

#------------------------------------------------------------------------------
# [ FileReader class ]
#  reads data from local files
#  filename assigned in constructor (inherited from IO class interface)
#------------------------------------------------------------------------------
项目:noc-orchestrator    作者:DirceuSilvaLabs    | 项目源码 | 文件源码
def readlines_utf8(self):
        try:
            import codecs
            with codecs.open(self.filepath, encoding='utf-8', mode='r') as uni_reader:
                modified_text_list = []
                for line in uni_reader:
                    import unicodedata
                    norm_line = unicodedata.normalize('NFKD', line) # NKFD normalization of the unicode data before use
                    modified_text_list.append(norm_line)
                return modified_text_list
        except Exception as e:
            if DEBUG_FLAG:
                sys.stderr.write("Naked Framework Error: unable to read lines in the unicode file with the readlines_utf8 method (Naked.toolshed.file.py)")
            raise e

    #------------------------------------------------------------------------------
    # [ read_gzip ] (byte string)
    #   reads data from a gzip compressed file
    #   returns the decompressed binary data from the file
    #   Note: if decompressing unicode file, set encoding="utf-8"
    #   Tests: test_IO.py :: test_file_gzip_ascii_readwrite, test_file_gzip_utf8_readwrite,
    #              test_file_read_gzip_missing_file
    #------------------------------------------------------------------------------
项目:noc-orchestrator    作者:DirceuSilvaLabs    | 项目源码 | 文件源码
def read_utf8(self):
        try:
            import codecs
            f = codecs.open(self.filepath, encoding='utf_8', mode='r')
        except IOError as ioe:
            if DEBUG_FLAG:
                sys.stderr.write("Naked Framework Error: Unable to open file for read with read_utf8() method (Naked.toolshed.file.py).")
            raise ioe
        try:
            textstring = f.read()
            import unicodedata
            norm_text = unicodedata.normalize('NFKD', textstring) # NKFD normalization of the unicode data before returns
            return norm_text
        except Exception as e:
            if DEBUG_FLAG:
                sys.stderr.write("Naked Framework Error: Unable to read the file with UTF-8 encoding using the read_utf8() method (Naked.toolshed.file.py).")
            raise e
        finally:
            f.close()
项目:workflows.kyoyue    作者:wizyoung    | 项目源码 | 文件源码
def fold_to_ascii(self, text):
        """Convert non-ASCII characters to closest ASCII equivalent.

        .. versionadded:: 1.3

        .. note:: This only works for a subset of European languages.

        :param text: text to convert
        :type text: ``unicode``
        :returns: text containing only ASCII characters
        :rtype: ``unicode``

        """
        if isascii(text):
            return text
        text = ''.join([ASCII_REPLACEMENTS.get(c, c) for c in text])
        return unicode(unicodedata.normalize('NFKD',
                       text).encode('ascii', 'ignore'))
项目:alphy    作者:maximepeschard    | 项目源码 | 文件源码
def fold_to_ascii(self, text):
        """Convert non-ASCII characters to closest ASCII equivalent.

        .. versionadded:: 1.3

        .. note:: This only works for a subset of European languages.

        :param text: text to convert
        :type text: ``unicode``
        :returns: text containing only ASCII characters
        :rtype: ``unicode``

        """
        if isascii(text):
            return text
        text = ''.join([ASCII_REPLACEMENTS.get(c, c) for c in text])
        return unicode(unicodedata.normalize('NFKD',
                       text).encode('ascii', 'ignore'))
项目:CodingDojo    作者:ComputerSocietyUNB    | 项目源码 | 文件源码
def sanitize_separators(value):
    """
    Sanitizes a value according to the current decimal and
    thousand separator setting. Used with form field input.
    """
    if settings.USE_L10N and isinstance(value, six.string_types):
        parts = []
        decimal_separator = get_format('DECIMAL_SEPARATOR')
        if decimal_separator in value:
            value, decimals = value.split(decimal_separator, 1)
            parts.append(decimals)
        if settings.USE_THOUSAND_SEPARATOR:
            thousand_sep = get_format('THOUSAND_SEPARATOR')
            if thousand_sep == '.' and value.count('.') == 1 and len(value.split('.')[-1]) != 3:
                # Special case where we suspect a dot meant decimal separator (see #22171)
                pass
            else:
                for replacement in {
                        thousand_sep, unicodedata.normalize('NFKD', thousand_sep)}:
                    value = value.replace(replacement, '')
        parts.append(value)
        value = '.'.join(reversed(parts))
    return value
项目:CodingDojo    作者:ComputerSocietyUNB    | 项目源码 | 文件源码
def chars(self, num, truncate=None, html=False):
        """
        Returns the text truncated to be no longer than the specified number
        of characters.

        Takes an optional argument of what should be used to notify that the
        string has been truncated, defaulting to a translatable string of an
        ellipsis (...).
        """
        length = int(num)
        text = unicodedata.normalize('NFC', self._wrapped)

        # Calculate the length to truncate to (max length - end_text length)
        truncate_len = length
        for char in self.add_truncation_text('', truncate):
            if not unicodedata.combining(char):
                truncate_len -= 1
                if truncate_len == 0:
                    break
        if html:
            return self._truncate_html(length, truncate, text, truncate_len, False)
        return self._text_chars(length, truncate, text, truncate_len)
项目:enigma2    作者:OpenLD    | 项目源码 | 文件源码
def getLcdPiconName(serviceName):
    #remove the path and name fields, and replace ':' by '_'
    sname = '_'.join(GetWithAlternative(serviceName).split(':', 10)[:10])
    pngname = findLcdPicon(sname)
    if not pngname:
        fields = sname.split('_', 3)
        if len(fields) > 2 and fields[2] != '1': #fallback to 1 for services with different service types
            fields[2] = '1'
        if len(fields) > 0 and fields[0] != '1': #fallback to 1 for other reftypes
            fields[0] = '1'
        pngname = findLcdPicon('_'.join(fields))
    if not pngname: # picon by channel name
        name = ServiceReference(serviceName).getServiceName()
        name = unicodedata.normalize('NFKD', unicode(name, 'utf_8', errors='ignore')).encode('ASCII', 'ignore')
        name = re.sub('[^a-z0-9]', '', name.replace('&', 'and').replace('+', 'plus').replace('*', 'star').lower())
        if len(name) > 0:
            pngname = findLcdPicon(name)
            if not pngname and len(name) > 2 and name.endswith('hd'):
                pngname = findLcdPicon(name[:-2])
    return pngname
项目:enigma2    作者:OpenLD    | 项目源码 | 文件源码
def getPiconLName(serviceName):
    #remove the path and name fields, and replace ':' by '_'
    sname = '_'.join(GetWithAlternative(serviceName).split(':', 10)[:10])
    pngname = findPiconL(sname)
    if not pngname:
        fields = sname.split('_', 3)
        if len(fields) > 2 and fields[2] != '2':
            #fallback to 1 for tv services with nonstandard servicetypes
            fields[2] = '1'
            pngname = findPiconL('_'.join(fields))
    if not pngname: # picon by channel name
        name = ServiceReference(serviceName).getServiceName()
        name = unicodedata.normalize('NFKD', unicode(name, 'utf_8', errors='ignore')).encode('ASCII', 'ignore')
        excludeChars = ['/', '\\', '\'', '"', '`', '?', ' ', '(', ')', ':', '<', '>', '|', '.', '\n']
        name = re.sub('[%s]' % ''.join(excludeChars), '', name)
        name = name.replace('&', 'and')
        name = name.replace('+', 'plus')
        name = name.replace('*', 'star')
        name = name.lower()
        if len(name) > 0:
            pngname = findPicon(name)
            if not pngname and len(name) > 2 and name.endswith('hd'):
                pngname = findPicon(name[:-2])
    return pngname
项目:fscan    作者:danielmoraes    | 项目源码 | 文件源码
def weekday_portuguese_to_english(string):
    string = string.lower()
    string = string.strip()
    string = string.replace("-", " ")
    string = ''.join((c for c in unicodedata.normalize('NFD', string)
                      if unicodedata.category(c) != 'Mn'))

    string = string.replace(",", " ")
    string = string.split(" ")[0]
    if string in [u"dom", u"domingo"]:
        return "Sunday"
    elif string in [u"seg", u"segunda", u"segunda-feira"]:
        return "Monday"
    elif string in [u"ter", u"terca", u"terça", u"terca-feira", u"terça-feira"]:
        return "Tuesday"
    elif string in [u"qua", u"quarta", u"quarta-feira"]:
        return "Wednesday"
    elif string in [u"qui", u"quinta", u"quinta-feira"]:
        return "Thursday"
    elif string in [u"sex", u"sexta", u"sexta-feira"]:
        return "Friday"
    elif string in [u"sab", u"sáb", u"sabado", u"sábado"]:
        return "Saturday"
项目:script.skin.helper.skinbackup    作者:marcelveldt    | 项目源码 | 文件源码
def normalize_string(text):
    '''normalize string, strip all special chars'''
    text = text.replace(":", "")
    text = text.replace("/", "-")
    text = text.replace("\\", "-")
    text = text.replace("<", "")
    text = text.replace(">", "")
    text = text.replace("*", "")
    text = text.replace("?", "")
    text = text.replace('|', "")
    text = text.replace('(', "")
    text = text.replace(')', "")
    text = text.replace("\"", "")
    text = text.strip()
    text = text.rstrip('.')
    if not isinstance(text, unicode):
        text = text.decode("utf-8")
    text = unicodedata.normalize('NFKD', text)
    return text
项目:GAMADV-XTD    作者:taers232c    | 项目源码 | 文件源码
def to_unicode(source, encoding="utf-8", param="value"):
    """Helper to normalize input to unicode.

    :arg source:
        source bytes/unicode to process.

    :arg encoding:
        encoding to use when decoding bytes instances.

    :param param:
        optional name of variable/noun to reference when raising errors.

    :raises TypeError: if source is not unicode or bytes.

    :returns:
        * returns unicode strings unchanged.
        * returns bytes strings decoded using *encoding*
    """
    assert encoding
    if isinstance(source, unicode):
        return source
    elif isinstance(source, bytes):
        return source.decode(encoding)
    else:
        raise ExpectedStringError(source, param)
项目:MOSFiT    作者:guillochon    | 项目源码 | 文件源码
def slugify(value, allow_unicode=False):
    """Slugify string to make it a valid filename.

    Convert to ASCII if 'allow_unicode' is False. Convert spaces to hyphens.
    Remove characters that aren't alphanumerics, underscores, or hyphens.
    Also strip leading and trailing whitespace.
    """
    import unicodedata
    value = str(value)
    if allow_unicode:
        value = unicodedata.normalize('NFKC', value)
    else:
        value = unicodedata.normalize('NFKD', value).encode(
            'ascii', 'ignore').decode('ascii')
    value = re.sub(r'[^\w\s-]', '', value).strip()
    return re.sub(r'[-\s]+', '-', value)


# Below from
# http://stackoverflow.com/questions/2333872/atomic-writing-to-file-with-python
项目:GoToMeetingTools    作者:plongitudes    | 项目源码 | 文件源码
def fold_to_ascii(self, text):
        """Convert non-ASCII characters to closest ASCII equivalent.

        .. versionadded:: 1.3

        .. note:: This only works for a subset of European languages.

        :param text: text to convert
        :type text: ``unicode``
        :returns: text containing only ASCII characters
        :rtype: ``unicode``

        """
        if isascii(text):
            return text
        text = ''.join([ASCII_REPLACEMENTS.get(c, c) for c in text])
        return unicode(unicodedata.normalize('NFKD',
                       text).encode('ascii', 'ignore'))
项目:lagbot    作者:mikevb1    | 项目源码 | 文件源码
def charinfo(self, ctx, *, chars):
        """Get unicode character info."""
        if not chars:
            return
        chars = unicodedata.normalize('NFC', chars)
        if len(chars) > 25:
            await ctx.send('Too many emoji.')
            return
        embed = discord.Embed()
        for char in chars:
            uc = hex(ord(char))[2:]
            name = unicodedata.name(char, 'unknown')
            if name in {'SPACE', 'EM QUAD', 'EN QUAD'} or ' SPACE' in name:
                char = '" "'
            short = len(uc) <= 4
            code = f'`\\{"u" if short else "U"}{uc.lower().zfill(4 if short else 8)}`'
            embed.add_field(name=name,
                            value=f'{char} [{code}](http://www.fileformat.info/info/unicode/char/{uc}/index.htm)')
        await ctx.send(embed=embed)
项目:behelper    作者:istommao    | 项目源码 | 文件源码
def fold_to_ascii(self, text):
        """Convert non-ASCII characters to closest ASCII equivalent.

        .. versionadded:: 1.3

        .. note:: This only works for a subset of European languages.

        :param text: text to convert
        :type text: ``unicode``
        :returns: text containing only ASCII characters
        :rtype: ``unicode``

        """
        if isascii(text):
            return text
        text = ''.join([ASCII_REPLACEMENTS.get(c, c) for c in text])
        return unicode(unicodedata.normalize('NFKD',
                       text).encode('ascii', 'ignore'))
项目:radar    作者:amoose136    | 项目源码 | 文件源码
def fold_to_ascii(self, text):
        """Convert non-ASCII characters to closest ASCII equivalent.

        .. versionadded:: 1.3

        .. note:: This only works for a subset of European languages.

        :param text: text to convert
        :type text: ``unicode``
        :returns: text containing only ASCII characters
        :rtype: ``unicode``

        """
        if isascii(text):
            return text
        text = ''.join([ASCII_REPLACEMENTS.get(c, c) for c in text])
        return unicode(unicodedata.normalize('NFKD',
                       text).encode('ascii', 'ignore'))
项目:Mmrz-Sync    作者:zhanglintc    | 项目源码 | 文件源码
def filename(self):
        ''' Name of the file on the client file system, but normalized to ensure
            file system compatibility. An empty filename is returned as 'empty'.

            Only ASCII letters, digits, dashes, underscores and dots are
            allowed in the final filename. Accents are removed, if possible.
            Whitespace is replaced by a single dash. Leading or tailing dots
            or dashes are removed. The filename is limited to 255 characters.
        '''
        fname = self.raw_filename
        if not isinstance(fname, unicode):
            fname = fname.decode('utf8', 'ignore')
        fname = normalize('NFKD', fname).encode('ASCII', 'ignore').decode('ASCII')
        fname = os.path.basename(fname.replace('\\', os.path.sep))
        fname = re.sub(r'[^a-zA-Z0-9-_.\s]', '', fname).strip()
        fname = re.sub(r'[-\s]+', '-', fname).strip('.-')
        return fname[:255] or 'empty'
项目:ChemDataExtractor    作者:mcs07    | 项目源码 | 文件源码
def __init__(self, form='NFKC', strip=True, collapse=True, hyphens=False, quotes=False, ellipsis=False,
                 slashes=False, tildes=False):
        """

        :param string form: Normal form for unicode normalization.
        :param bool strip: Whether to strip whitespace from start and end.
        :param bool collapse: Whether to collapse all whitespace (tabs, newlines) down to single spaces.
        :param bool hyphens: Whether to normalize all hyphens, minuses and dashes to the ASCII hyphen-minus character.
        :param bool quotes: Whether to normalize all apostrophes, quotes and primes to the ASCII quote character.
        :param bool ellipsis: Whether to normalize ellipses to three full stops.
        :param bool slashes: Whether to normalize slash characters to the ASCII slash character.
        :param bool tildes: Whether to normalize tilde characters to the ASCII tilde character.
        """
        self.form = form
        self.strip = strip
        self.collapse = collapse
        self.hyphens = hyphens
        self.quotes = quotes
        self.ellipsis = ellipsis
        self.slashes = slashes
        self.tildes = tildes
项目:plugin.audio.spotify    作者:marcelveldt    | 项目源码 | 文件源码
def normalize_string(text):
    import unicodedata
    text = text.replace(":", "")
    text = text.replace("/", "-")
    text = text.replace("\\", "-")
    text = text.replace("<", "")
    text = text.replace(">", "")
    text = text.replace("*", "")
    text = text.replace("?", "")
    text = text.replace('|', "")
    text = text.replace('(', "")
    text = text.replace(')', "")
    text = text.replace("\"", "")
    text = text.strip()
    text = text.rstrip('.')
    text = unicodedata.normalize('NFKD', try_decode(text))
    return text
项目:ynm3k    作者:socrateslee    | 项目源码 | 文件源码
def filename(self):
        ''' Name of the file on the client file system, but normalized to ensure
            file system compatibility. An empty filename is returned as 'empty'.

            Only ASCII letters, digits, dashes, underscores and dots are
            allowed in the final filename. Accents are removed, if possible.
            Whitespace is replaced by a single dash. Leading or tailing dots
            or dashes are removed. The filename is limited to 255 characters.
        '''
        fname = self.raw_filename
        if not isinstance(fname, unicode):
            fname = fname.decode('utf8', 'ignore')
        fname = normalize('NFKD', fname).encode('ASCII', 'ignore').decode('ASCII')
        fname = os.path.basename(fname.replace('\\', os.path.sep))
        fname = re.sub(r'[^a-zA-Z0-9-_.\s]', '', fname).strip()
        fname = re.sub(r'[-\s]+', '-', fname).strip('.-')
        return fname[:255] or 'empty'
项目:alfred-workflows    作者:arthurhammer    | 项目源码 | 文件源码
def fold_to_ascii(self, text):
        """Convert non-ASCII characters to closest ASCII equivalent.

        .. versionadded:: 1.3

        .. note:: This only works for a subset of European languages.

        :param text: text to convert
        :type text: ``unicode``
        :returns: text containing only ASCII characters
        :rtype: ``unicode``

        """
        if isascii(text):
            return text
        text = ''.join([ASCII_REPLACEMENTS.get(c, c) for c in text])
        return unicode(unicodedata.normalize('NFKD',
                       text).encode('ascii', 'ignore'))
项目:alfred-zebra    作者:r0x73    | 项目源码 | 文件源码
def fold_to_ascii(self, text):
        """Convert non-ASCII characters to closest ASCII equivalent.

        .. versionadded:: 1.3

        .. note:: This only works for a subset of European languages.

        :param text: text to convert
        :type text: ``unicode``
        :returns: text containing only ASCII characters
        :rtype: ``unicode``

        """
        if isascii(text):
            return text
        text = ''.join([ASCII_REPLACEMENTS.get(c, c) for c in text])
        return unicode(unicodedata.normalize('NFKD',
                       text).encode('ascii', 'ignore'))
项目:warriorframework    作者:warriorframework    | 项目源码 | 文件源码
def filename(self):
        """ Name of the file on the client file system, but normalized to ensure
            file system compatibility. An empty filename is returned as 'empty'.

            Only ASCII letters, digits, dashes, underscores and dots are
            allowed in the final filename. Accents are removed, if possible.
            Whitespace is replaced by a single dash. Leading or tailing dots
            or dashes are removed. The filename is limited to 255 characters.
        """
        fname = self.raw_filename
        if not isinstance(fname, unicode):
            fname = fname.decode('utf8', 'ignore')
        fname = normalize('NFKD', fname).encode('ASCII', 'ignore').decode('ASCII')
        fname = os.path.basename(fname.replace('\\', os.path.sep))
        fname = re.sub(r'[^a-zA-Z0-9-_.\s]', '', fname).strip()
        fname = re.sub(r'[-\s]+', '-', fname).strip('.-')
        return fname[:255] or 'empty'
项目:warriorframework    作者:warriorframework    | 项目源码 | 文件源码
def filename(self):
        """ Name of the file on the client file system, but normalized to ensure
            file system compatibility. An empty filename is returned as 'empty'.

            Only ASCII letters, digits, dashes, underscores and dots are
            allowed in the final filename. Accents are removed, if possible.
            Whitespace is replaced by a single dash. Leading or tailing dots
            or dashes are removed. The filename is limited to 255 characters.
        """
        fname = self.raw_filename
        if not isinstance(fname, unicode):
            fname = fname.decode('utf8', 'ignore')
        fname = normalize('NFKD', fname)
        fname = fname.encode('ASCII', 'ignore').decode('ASCII')
        fname = os.path.basename(fname.replace('\\', os.path.sep))
        fname = re.sub(r'[^a-zA-Z0-9-_.\s]', '', fname).strip()
        fname = re.sub(r'[-\s]+', '-', fname).strip('.-')
        return fname[:255] or 'empty'
项目:touch-pay-client    作者:HackPucBemobi    | 项目源码 | 文件源码
def urlify(s, maxlen=80, keep_underscores=False):
    """
    Converts incoming string to a simplified ASCII subset.
    if (keep_underscores): underscores are retained in the string
    else: underscores are translated to hyphens (default)
    """
    s = to_unicode(s)                     # to unicode
    s = s.lower()                         # to lowercase
    s = unicodedata.normalize('NFKD', s)  # replace special characters
    s = to_native(s, charset='ascii', errors='ignore')       # encode as ASCII
    s = re.sub('&\w+?;', '', s)           # strip html entities
    if keep_underscores:
        s = re.sub('\s+', '-', s)         # whitespace to hyphens
        s = re.sub('[^\w\-]', '', s)
        # strip all but alphanumeric/underscore/hyphen
    else:
        s = re.sub('[\s_]+', '-', s)      # whitespace & underscores to hyphens
        s = re.sub('[^a-z0-9\-]', '', s)  # strip all but alphanumeric/hyphen
    s = re.sub('[-_][-_]+', '-', s)       # collapse strings of hyphens
    s = s.strip('-')                      # remove leading and trailing hyphens
    return s[:maxlen]                     # enforce maximum length
项目:ln2sql    作者:FerreroJeremy    | 项目源码 | 文件源码
def remove_accents(self, string):
        nkfd_form = unicodedata.normalize('NFKD', str(string))
        return "".join([c for c in nkfd_form if not unicodedata.combining(c)])
项目:ln2sql    作者:FerreroJeremy    | 项目源码 | 文件源码
def remove_accents(self, string):
        nkfd_form = unicodedata.normalize('NFKD', str(string))
        return "".join([c for c in nkfd_form if not unicodedata.combining(c)])
项目:ln2sql    作者:FerreroJeremy    | 项目源码 | 文件源码
def remove_accents(self, string):
        nkfd_form = unicodedata.normalize('NFKD', str(string))
        return "".join([c for c in nkfd_form if not unicodedata.combining(c)])
项目:ln2sql    作者:FerreroJeremy    | 项目源码 | 文件源码
def remove_accents(self, string):
        nkfd_form = unicodedata.normalize('NFKD', str(string))
        return "".join([c for c in nkfd_form if not unicodedata.combining(c)])
项目:alelo_ofx    作者:dantas    | 项目源码 | 文件源码
def _convert_transaction(transaction):
    date = transaction['date'].strftime("%Y%m%d%H%M%S")
    return dict2xml.convert("STMTTRN", {
        "DTPOSTED": date,
        "FITID": date,
        "TRNAMT": transaction['signal'] + transaction['amount'],
        "MEMO": unicodedata.normalize('NFD',
            transaction['description']).encode('ascii', 'ignore'),
    })
项目:alfred-mpd    作者:deanishe    | 项目源码 | 文件源码
def decode(self, text, encoding=None, normalization=None):
        """Return ``text`` as normalised unicode.

        If ``encoding`` and/or ``normalization`` is ``None``, the
        ``input_encoding``and ``normalization`` parameters passed to
        :class:`Workflow` are used.

        :param text: string
        :type text: encoded or Unicode string. If ``text`` is already a
            Unicode string, it will only be normalised.
        :param encoding: The text encoding to use to decode ``text`` to
            Unicode.
        :type encoding: ``unicode`` or ``None``
        :param normalization: The nomalisation form to apply to ``text``.
        :type normalization: ``unicode`` or ``None``
        :returns: decoded and normalised ``unicode``

        :class:`Workflow` uses "NFC" normalisation by default. This is the
        standard for Python and will work well with data from the web (via
        :mod:`~workflow.web` or :mod:`json`).

        OS X, on the other hand, uses "NFD" normalisation (nearly), so data
        coming from the system (e.g. via :mod:`subprocess` or
        :func:`os.listdir`/:mod:`os.path`) may not match. You should either
        normalise this data, too, or change the default normalisation used by
        :class:`Workflow`.

        """
        encoding = encoding or self._input_encoding
        normalization = normalization or self._normalizsation
        if not isinstance(text, unicode):
            text = unicode(text, encoding)
        return unicodedata.normalize(normalization, text)
项目:alfred-mpd    作者:deanishe    | 项目源码 | 文件源码
def uni(s):
        """Coerce `s` to normalised Unicode."""
        ustr = s.decode('utf-8')
        return normalize('NFD', ustr)
项目:alfred-mpd    作者:deanishe    | 项目源码 | 文件源码
def text(self):
        """Unicode-decoded content of response body.

        If no encoding can be determined from HTTP headers or the content
        itself, the encoded response body will be returned instead.

        :returns: Body of HTTP response
        :rtype: :class:`unicode` or :class:`str`

        """
        if self.encoding:
            return unicodedata.normalize('NFC', unicode(self.content,
                                                        self.encoding))
        return self.content
项目:python-    作者:secondtonone1    | 项目源码 | 文件源码
def decompose(path):
    if isinstance(path, six.text_type):
        return unicodedata.normalize('NFD', path)
    try:
        path = path.decode('utf-8')
        path = unicodedata.normalize('NFD', path)
        path = path.encode('utf-8')
    except UnicodeError:
        pass  # Not UTF-8
    return path
项目:my-first-blog    作者:AnkurBegining    | 项目源码 | 文件源码
def check_nfc(label):

    if unicodedata.normalize('NFC', label) != label:
        raise IDNAError('Label must be in Normalization Form C')
项目:my-first-blog    作者:AnkurBegining    | 项目源码 | 文件源码
def uts46_remap(domain, std3_rules=True, transitional=False):
    """Re-map the characters in the string according to UTS46 processing."""
    from .uts46data import uts46data
    output = u""
    try:
        for pos, char in enumerate(domain):
            code_point = ord(char)
            uts46row = uts46data[code_point if code_point < 256 else
                bisect.bisect_left(uts46data, (code_point, "Z")) - 1]
            status = uts46row[1]
            replacement = uts46row[2] if len(uts46row) == 3 else None
            if (status == "V" or
                    (status == "D" and not transitional) or
                    (status == "3" and std3_rules and replacement is None)):
                output += char
            elif replacement is not None and (status == "M" or
                    (status == "3" and std3_rules) or
                    (status == "D" and transitional)):
                output += replacement
            elif status != "I":
                raise IndexError()
        return unicodedata.normalize("NFC", output)
    except IndexError:
        raise InvalidCodepoint(
            "Codepoint {0} not allowed at position {1} in {2}".format(
            _unot(code_point), pos + 1, repr(domain)))