Python codecs 模块,utf_8_decode() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用codecs.utf_8_decode()

项目:python-    作者:secondtonone1    | 项目源码 | 文件源码
def _buffer_decode(self, input, errors, final):
        if self.first:
            if len(input) < 3:
                if codecs.BOM_UTF8.startswith(input):
                    # not enough data to decide if this really is a BOM
                    # => try again on the next call
                    return ("", 0)
                else:
                    self.first = 0
            else:
                self.first = 0
                if input[:3] == codecs.BOM_UTF8:
                    (output, consumed) = \
                       codecs.utf_8_decode(input[3:], errors, final)
                    return (output, consumed+3)
        return codecs.utf_8_decode(input, errors, final)
项目:centos-base-consul    作者:zeroc0d3lab    | 项目源码 | 文件源码
def __init__(self, stream):
        self.name = None
        self.stream = None
        self.stream_pointer = 0
        self.eof = True
        self.buffer = ''
        self.pointer = 0
        self.full_buffer = unicode('')
        self.full_pointer = 0
        self.raw_buffer = None
        self.raw_decode = codecs.utf_8_decode
        self.encoding = 'utf-8'
        self.index = 0
        self.line = 0
        self.column = 0

        self.stream = stream
        self.name = getattr(stream, 'name', '<file>')
        self.eof = False
        self.raw_buffer = None

        while not self.eof and (self.raw_buffer is None or len(self.raw_buffer) < 2):
            self.update_raw()
        self.update(1)
项目:ivaochdoc    作者:ivaoch    | 项目源码 | 文件源码
def _buffer_decode(self, input, errors, final):
        if self.first:
            if len(input) < 3:
                if codecs.BOM_UTF8.startswith(input):
                    # not enough data to decide if this really is a BOM
                    # => try again on the next call
                    return ("", 0)
                else:
                    self.first = 0
            else:
                self.first = 0
                if input[:3] == codecs.BOM_UTF8:
                    (output, consumed) = \
                       codecs.utf_8_decode(input[3:], errors, final)
                    return (output, consumed+3)
        return codecs.utf_8_decode(input, errors, final)
项目:zippy    作者:securesystemslab    | 项目源码 | 文件源码
def _buffer_decode(self, input, errors, final):
        if self.first:
            if len(input) < 3:
                if codecs.BOM_UTF8.startswith(input):
                    # not enough data to decide if this really is a BOM
                    # => try again on the next call
                    return ("", 0)
                else:
                    self.first = 0
            else:
                self.first = 0
                if input[:3] == codecs.BOM_UTF8:
                    (output, consumed) = \
                       codecs.utf_8_decode(input[3:], errors, final)
                    return (output, consumed+3)
        return codecs.utf_8_decode(input, errors, final)
项目:zippy    作者:securesystemslab    | 项目源码 | 文件源码
def test_decode_unicode(self):
        # Most decoders don't accept unicode input
        decoders = [
            codecs.utf_7_decode,
            codecs.utf_8_decode,
            codecs.utf_16_le_decode,
            codecs.utf_16_be_decode,
            codecs.utf_16_ex_decode,
            codecs.utf_32_decode,
            codecs.utf_32_le_decode,
            codecs.utf_32_be_decode,
            codecs.utf_32_ex_decode,
            codecs.latin_1_decode,
            codecs.ascii_decode,
            codecs.charmap_decode,
        ]
        if hasattr(codecs, "mbcs_decode"):
            decoders.append(codecs.mbcs_decode)
        for decoder in decoders:
            self.assertRaises(TypeError, decoder, "xxx")
项目:news-for-good    作者:thecodinghub    | 项目源码 | 文件源码
def _buffer_decode(self, input, errors, final):
        if self.first:
            if len(input) < 3:
                if codecs.BOM_UTF8.startswith(input):
                    # not enough data to decide if this really is a BOM
                    # => try again on the next call
                    return ("", 0)
                else:
                    self.first = 0
            else:
                self.first = 0
                if input[:3] == codecs.BOM_UTF8:
                    (output, consumed) = \
                       codecs.utf_8_decode(input[3:], errors, final)
                    return (output, consumed+3)
        return codecs.utf_8_decode(input, errors, final)
项目:Tencent_Cartoon_Download    作者:Fretice    | 项目源码 | 文件源码
def _buffer_decode(self, input, errors, final):
        if self.first:
            if len(input) < 3:
                if codecs.BOM_UTF8.startswith(input):
                    # not enough data to decide if this really is a BOM
                    # => try again on the next call
                    return ("", 0)
                else:
                    self.first = 0
            else:
                self.first = 0
                if input[:3] == codecs.BOM_UTF8:
                    (output, consumed) = \
                       codecs.utf_8_decode(input[3:], errors, final)
                    return (output, consumed+3)
        return codecs.utf_8_decode(input, errors, final)
项目:fieldsight-kobocat    作者:awemulya    | 项目源码 | 文件源码
def _buffer_decode(self, input, errors, final):
        if self.first:
            if len(input) < 3:
                if codecs.BOM_UTF8.startswith(input):
                    # not enough data to decide if this really is a BOM
                    # => try again on the next call
                    return ("", 0)
                else:
                    self.first = 0
            else:
                self.first = 0
                if input[:3] == codecs.BOM_UTF8:
                    (output, consumed) = \
                       codecs.utf_8_decode(input[3:], errors, final)
                    return (output, consumed+3)
        return codecs.utf_8_decode(input, errors, final)
项目:islam-buddy    作者:hamir    | 项目源码 | 文件源码
def determine_encoding(self):
        # type: () -> None
        while not self.eof and (self.raw_buffer is None or
                                len(self.raw_buffer) < 2):
            self.update_raw()
        if isinstance(self.raw_buffer, binary_type):
            if self.raw_buffer.startswith(codecs.BOM_UTF16_LE):
                self.raw_decode = codecs.utf_16_le_decode  # type: ignore
                self.encoding = 'utf-16-le'
            elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE):
                self.raw_decode = codecs.utf_16_be_decode  # type: ignore
                self.encoding = 'utf-16-be'
            else:
                self.raw_decode = codecs.utf_8_decode  # type: ignore
                self.encoding = 'utf-8'
        self.update(1)

    # 4 if 32 bit unicode supported, 2 e.g. on MacOS (issue 56)
项目:web_ctp    作者:molebot    | 项目源码 | 文件源码
def test_decode_unicode(self):
        # Most decoders don't accept unicode input
        decoders = [
            codecs.utf_7_decode,
            codecs.utf_8_decode,
            codecs.utf_16_le_decode,
            codecs.utf_16_be_decode,
            codecs.utf_16_ex_decode,
            codecs.utf_32_decode,
            codecs.utf_32_le_decode,
            codecs.utf_32_be_decode,
            codecs.utf_32_ex_decode,
            codecs.latin_1_decode,
            codecs.ascii_decode,
            codecs.charmap_decode,
        ]
        if hasattr(codecs, "mbcs_decode"):
            decoders.append(codecs.mbcs_decode)
        for decoder in decoders:
            self.assertRaises(TypeError, decoder, "xxx")
项目:es_email_intel    作者:xujun10110    | 项目源码 | 文件源码
def send_mail(json_string):
    # Extract sender and subject
    json_blob = json.loads(json_string)
    sender = json_blob['headers']['From']
    sender = re.sub('^.*\<', '', sender)
    EMAIL_TO = re.sub('\>.*$', '', sender)
    if BB_DEBUG: syslog.syslog(syslog.LOG_ERR, 'Invoked send_mail(json_string) for '+EMAIL_TO)
    subj = common_functions.extract_subject(json_blob['headers'])
    if BB_DEBUG: syslog.syslog(syslog.LOG_ERR, 'Invoked send_mail(json_string) subject '+subj)
    #SUBJECT = 'Extracted IOCs for: '+subj.decode("utf-8", "ignore")
    SUBJECT = 'Extracted IOCs for: '+str(codecs.utf_8_decode(subj.encode('utf8'))[0])
    if BB_DEBUG: syslog.syslog(syslog.LOG_ERR, 'Invoked send_mail(json_string) subject '+SUBJECT)

    msg = MIMEText(json2string(json_string), _charset='utf-8')

    msg['Subject'] = SUBJECT 
    msg['From'] = EMAIL_FROM
    msg['To'] = EMAIL_TO
    if BB_DEBUG: syslog.syslog(syslog.LOG_ERR, 'Invoked send_mail(json_string) msg composed ')

    server = smtplib.SMTP(EMAIL_SERVER)
    server.sendmail(EMAIL_FROM, EMAIL_TO, msg.as_string())
    if BB_DEBUG: syslog.syslog(syslog.LOG_ERR, 'Finished')
项目:CloudPrint    作者:William-An    | 项目源码 | 文件源码
def _buffer_decode(self, input, errors, final):
        if self.first:
            if len(input) < 3:
                if codecs.BOM_UTF8.startswith(input):
                    # not enough data to decide if this really is a BOM
                    # => try again on the next call
                    return ("", 0)
                else:
                    self.first = 0
            else:
                self.first = 0
                if input[:3] == codecs.BOM_UTF8:
                    (output, consumed) = \
                       codecs.utf_8_decode(input[3:], errors, final)
                    return (output, consumed+3)
        return codecs.utf_8_decode(input, errors, final)
项目:ouroboros    作者:pybee    | 项目源码 | 文件源码
def _buffer_decode(self, input, errors, final):
        if self.first:
            if len(input) < 3:
                if codecs.BOM_UTF8.startswith(input):
                    # not enough data to decide if this really is a BOM
                    # => try again on the next call
                    return ("", 0)
                else:
                    self.first = 0
            else:
                self.first = 0
                if input[:3] == codecs.BOM_UTF8:
                    (output, consumed) = \
                       codecs.utf_8_decode(input[3:], errors, final)
                    return (output, consumed+3)
        return codecs.utf_8_decode(input, errors, final)
项目:ouroboros    作者:pybee    | 项目源码 | 文件源码
def test_decode_unicode(self):
        # Most decoders don't accept unicode input
        decoders = [
            codecs.utf_7_decode,
            codecs.utf_8_decode,
            codecs.utf_16_le_decode,
            codecs.utf_16_be_decode,
            codecs.utf_16_ex_decode,
            codecs.utf_32_decode,
            codecs.utf_32_le_decode,
            codecs.utf_32_be_decode,
            codecs.utf_32_ex_decode,
            codecs.latin_1_decode,
            codecs.ascii_decode,
            codecs.charmap_decode,
        ]
        if hasattr(codecs, "mbcs_decode"):
            decoders.append(codecs.mbcs_decode)
        for decoder in decoders:
            self.assertRaises(TypeError, decoder, "xxx")
项目:python-    作者:secondtonone1    | 项目源码 | 文件源码
def decode(input, errors='strict'):
    return codecs.utf_8_decode(input, errors, True)
项目:python-    作者:secondtonone1    | 项目源码 | 文件源码
def decode(input, errors='strict'):
    prefix = 0
    if input[:3] == codecs.BOM_UTF8:
        input = input[3:]
        prefix = 3
    (output, consumed) = codecs.utf_8_decode(input, errors, True)
    return (output, consumed+prefix)
项目:python-    作者:secondtonone1    | 项目源码 | 文件源码
def decode(self, input, errors='strict'):
        if len(input) < 3:
            if codecs.BOM_UTF8.startswith(input):
                # not enough data to decide if this is a BOM
                # => try again on the next call
                return ("", 0)
        elif input[:3] == codecs.BOM_UTF8:
            self.decode = codecs.utf_8_decode
            (output, consumed) = codecs.utf_8_decode(input[3:],errors)
            return (output, consumed+3)
        # (else) no BOM present
        self.decode = codecs.utf_8_decode
        return codecs.utf_8_decode(input, errors)

### encodings module API
项目:kinect-2-libras    作者:inessadl    | 项目源码 | 文件源码
def decode(input, errors='strict'):
    return codecs.utf_8_decode(input, errors, True)
项目:kinect-2-libras    作者:inessadl    | 项目源码 | 文件源码
def decode(input, errors='strict'):
    prefix = 0
    if input[:3] == codecs.BOM_UTF8:
        input = input[3:]
        prefix = 3
    (output, consumed) = codecs.utf_8_decode(input, errors, True)
    return (output, consumed+prefix)
项目:kinect-2-libras    作者:inessadl    | 项目源码 | 文件源码
def _buffer_decode(self, input, errors, final):
        if self.first:
            if len(input) < 3:
                if codecs.BOM_UTF8.startswith(input):
                    # not enough data to decide if this really is a BOM
                    # => try again on the next call
                    return (u"", 0)
                else:
                    self.first = None
            else:
                self.first = None
                if input[:3] == codecs.BOM_UTF8:
                    (output, consumed) = codecs.utf_8_decode(input[3:], errors, final)
                    return (output, consumed+3)
        return codecs.utf_8_decode(input, errors, final)
项目:kinect-2-libras    作者:inessadl    | 项目源码 | 文件源码
def decode(self, input, errors='strict'):
        if len(input) < 3:
            if codecs.BOM_UTF8.startswith(input):
                # not enough data to decide if this is a BOM
                # => try again on the next call
                return (u"", 0)
        elif input[:3] == codecs.BOM_UTF8:
            self.decode = codecs.utf_8_decode
            (output, consumed) = codecs.utf_8_decode(input[3:],errors)
            return (output, consumed+3)
        # (else) no BOM present
        self.decode = codecs.utf_8_decode
        return codecs.utf_8_decode(input, errors)

### encodings module API
项目:swjtu-pyscraper    作者:Desgard    | 项目源码 | 文件源码
def decode(input, errors='strict'):
    prefix = 0
    if input[:3] == codecs.BOM_UTF8:
        input = input[3:]
        prefix = 3
    (output, consumed) = codecs.utf_8_decode(input, errors, True)
    return (output, consumed+prefix)
项目:swjtu-pyscraper    作者:Desgard    | 项目源码 | 文件源码
def _buffer_decode(self, input, errors, final):
        if self.first:
            if len(input) < 3:
                if codecs.BOM_UTF8.startswith(input):
                    # not enough data to decide if this really is a BOM
                    # => try again on the next call
                    return (u"", 0)
                else:
                    self.first = None
            else:
                self.first = None
                if input[:3] == codecs.BOM_UTF8:
                    (output, consumed) = codecs.utf_8_decode(input[3:], errors, final)
                    return (output, consumed+3)
        return codecs.utf_8_decode(input, errors, final)
项目:swjtu-pyscraper    作者:Desgard    | 项目源码 | 文件源码
def decode(self, input, errors='strict'):
        if len(input) < 3:
            if codecs.BOM_UTF8.startswith(input):
                # not enough data to decide if this is a BOM
                # => try again on the next call
                return (u"", 0)
        elif input[:3] == codecs.BOM_UTF8:
            self.decode = codecs.utf_8_decode
            (output, consumed) = codecs.utf_8_decode(input[3:],errors)
            return (output, consumed+3)
        # (else) no BOM present
        self.decode = codecs.utf_8_decode
        return codecs.utf_8_decode(input, errors)

### encodings module API
项目:swjtu-pyscraper    作者:Desgard    | 项目源码 | 文件源码
def decode(input, errors='strict'):
    return codecs.utf_8_decode(input, errors, True)
项目:noc-orchestrator    作者:DirceuSilvaLabs    | 项目源码 | 文件源码
def determine_encoding(self):
        while not self.eof and len(self.raw_buffer) < 2:
            self.update_raw()
        if not isinstance(self.raw_buffer, unicode):
            if self.raw_buffer.startswith(codecs.BOM_UTF16_LE):
                self.raw_decode = codecs.utf_16_le_decode
                self.encoding = 'utf-16-le'
            elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE):
                self.raw_decode = codecs.utf_16_be_decode
                self.encoding = 'utf-16-be'
            else:
                self.raw_decode = codecs.utf_8_decode
                self.encoding = 'utf-8'
        self.update(1)
项目:noc-orchestrator    作者:DirceuSilvaLabs    | 项目源码 | 文件源码
def determine_encoding(self):
        while not self.eof and len(self.raw_buffer) < 2:
            self.update_raw()
        if not isinstance(self.raw_buffer, unicode):
            if self.raw_buffer.startswith(codecs.BOM_UTF16_LE):
                self.raw_decode = codecs.utf_16_le_decode
                self.encoding = 'utf-16-le'
            elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE):
                self.raw_decode = codecs.utf_16_be_decode
                self.encoding = 'utf-16-be'
            else:
                self.raw_decode = codecs.utf_8_decode
                self.encoding = 'utf-8'
        self.update(1)
项目:petronia    作者:groboclown    | 项目源码 | 文件源码
def determine_encoding(self):
        while not self.eof and (self.raw_buffer is None or len(self.raw_buffer) < 2):
            self.update_raw()
        if isinstance(self.raw_buffer, bytes):
            if self.raw_buffer.startswith(codecs.BOM_UTF16_LE):
                self.raw_decode = codecs.utf_16_le_decode
                self.encoding = 'utf-16-le'
            elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE):
                self.raw_decode = codecs.utf_16_be_decode
                self.encoding = 'utf-16-be'
            else:
                self.raw_decode = codecs.utf_8_decode
                self.encoding = 'utf-8'
        self.update(1)
项目:hostapd-mana    作者:adde88    | 项目源码 | 文件源码
def decode(input, errors='strict'):
    return codecs.utf_8_decode(input, errors, True)
项目:hostapd-mana    作者:adde88    | 项目源码 | 文件源码
def decode(input, errors='strict'):
    prefix = 0
    if input[:3] == codecs.BOM_UTF8:
        input = input[3:]
        prefix = 3
    (output, consumed) = codecs.utf_8_decode(input, errors, True)
    return (output, consumed+prefix)
项目:hostapd-mana    作者:adde88    | 项目源码 | 文件源码
def _buffer_decode(self, input, errors, final):
        if self.first:
            if len(input) < 3:
                if codecs.BOM_UTF8.startswith(input):
                    # not enough data to decide if this really is a BOM
                    # => try again on the next call
                    return (u"", 0)
                else:
                    self.first = None
            else:
                self.first = None
                if input[:3] == codecs.BOM_UTF8:
                    (output, consumed) = codecs.utf_8_decode(input[3:], errors, final)
                    return (output, consumed+3)
        return codecs.utf_8_decode(input, errors, final)
项目:hostapd-mana    作者:adde88    | 项目源码 | 文件源码
def decode(self, input, errors='strict'):
        if len(input) < 3:
            if codecs.BOM_UTF8.startswith(input):
                # not enough data to decide if this is a BOM
                # => try again on the next call
                return (u"", 0)
        elif input[:3] == codecs.BOM_UTF8:
            self.decode = codecs.utf_8_decode
            (output, consumed) = codecs.utf_8_decode(input[3:],errors)
            return (output, consumed+3)
        # (else) no BOM present
        self.decode = codecs.utf_8_decode
        return codecs.utf_8_decode(input, errors)

### encodings module API
项目:Sci-Finder    作者:snverse    | 项目源码 | 文件源码
def decode(input, errors='strict'):
    prefix = 0
    if input[:3] == codecs.BOM_UTF8:
        input = input[3:]
        prefix = 3
    (output, consumed) = codecs.utf_8_decode(input, errors, True)
    return (output, consumed+prefix)
项目:Sci-Finder    作者:snverse    | 项目源码 | 文件源码
def _buffer_decode(self, input, errors, final):
        if self.first:
            if len(input) < 3:
                if codecs.BOM_UTF8.startswith(input):
                    # not enough data to decide if this really is a BOM
                    # => try again on the next call
                    return (u"", 0)
                else:
                    self.first = None
            else:
                self.first = None
                if input[:3] == codecs.BOM_UTF8:
                    (output, consumed) = codecs.utf_8_decode(input[3:], errors, final)
                    return (output, consumed+3)
        return codecs.utf_8_decode(input, errors, final)
项目:Sci-Finder    作者:snverse    | 项目源码 | 文件源码
def decode(self, input, errors='strict'):
        if len(input) < 3:
            if codecs.BOM_UTF8.startswith(input):
                # not enough data to decide if this is a BOM
                # => try again on the next call
                return (u"", 0)
        elif input[:3] == codecs.BOM_UTF8:
            self.decode = codecs.utf_8_decode
            (output, consumed) = codecs.utf_8_decode(input[3:],errors)
            return (output, consumed+3)
        # (else) no BOM present
        self.decode = codecs.utf_8_decode
        return codecs.utf_8_decode(input, errors)

### encodings module API
项目:Sci-Finder    作者:snverse    | 项目源码 | 文件源码
def decode(input, errors='strict'):
    return codecs.utf_8_decode(input, errors, True)
项目:Sci-Finder    作者:snverse    | 项目源码 | 文件源码
def decode(input, errors='strict'):
    prefix = 0
    if input[:3] == codecs.BOM_UTF8:
        input = input[3:]
        prefix = 3
    (output, consumed) = codecs.utf_8_decode(input, errors, True)
    return (output, consumed+prefix)
项目:Sci-Finder    作者:snverse    | 项目源码 | 文件源码
def decode(self, input, errors='strict'):
        if len(input) < 3:
            if codecs.BOM_UTF8.startswith(input):
                # not enough data to decide if this is a BOM
                # => try again on the next call
                return (u"", 0)
        elif input[:3] == codecs.BOM_UTF8:
            self.decode = codecs.utf_8_decode
            (output, consumed) = codecs.utf_8_decode(input[3:],errors)
            return (output, consumed+3)
        # (else) no BOM present
        self.decode = codecs.utf_8_decode
        return codecs.utf_8_decode(input, errors)

### encodings module API
项目:Sci-Finder    作者:snverse    | 项目源码 | 文件源码
def decode(input, errors='strict'):
    return codecs.utf_8_decode(input, errors, True)
项目:ivaochdoc    作者:ivaoch    | 项目源码 | 文件源码
def decode(input, errors='strict'):
    return codecs.utf_8_decode(input, errors, True)
项目:ivaochdoc    作者:ivaoch    | 项目源码 | 文件源码
def decode(input, errors='strict'):
    prefix = 0
    if input[:3] == codecs.BOM_UTF8:
        input = input[3:]
        prefix = 3
    (output, consumed) = codecs.utf_8_decode(input, errors, True)
    return (output, consumed+prefix)
项目:ivaochdoc    作者:ivaoch    | 项目源码 | 文件源码
def decode(self, input, errors='strict'):
        if len(input) < 3:
            if codecs.BOM_UTF8.startswith(input):
                # not enough data to decide if this is a BOM
                # => try again on the next call
                return ("", 0)
        elif input[:3] == codecs.BOM_UTF8:
            self.decode = codecs.utf_8_decode
            (output, consumed) = codecs.utf_8_decode(input[3:],errors)
            return (output, consumed+3)
        # (else) no BOM present
        self.decode = codecs.utf_8_decode
        return codecs.utf_8_decode(input, errors)

### encodings module API
项目:RPoint    作者:george17-meet    | 项目源码 | 文件源码
def decode(input, errors='strict'):
    prefix = 0
    if input[:3] == codecs.BOM_UTF8:
        input = input[3:]
        prefix = 3
    (output, consumed) = codecs.utf_8_decode(input, errors, True)
    return (output, consumed+prefix)
项目:RPoint    作者:george17-meet    | 项目源码 | 文件源码
def _buffer_decode(self, input, errors, final):
        if self.first:
            if len(input) < 3:
                if codecs.BOM_UTF8.startswith(input):
                    # not enough data to decide if this really is a BOM
                    # => try again on the next call
                    return (u"", 0)
                else:
                    self.first = None
            else:
                self.first = None
                if input[:3] == codecs.BOM_UTF8:
                    (output, consumed) = codecs.utf_8_decode(input[3:], errors, final)
                    return (output, consumed+3)
        return codecs.utf_8_decode(input, errors, final)
项目:RPoint    作者:george17-meet    | 项目源码 | 文件源码
def decode(self, input, errors='strict'):
        if len(input) < 3:
            if codecs.BOM_UTF8.startswith(input):
                # not enough data to decide if this is a BOM
                # => try again on the next call
                return (u"", 0)
        elif input[:3] == codecs.BOM_UTF8:
            self.decode = codecs.utf_8_decode
            (output, consumed) = codecs.utf_8_decode(input[3:],errors)
            return (output, consumed+3)
        # (else) no BOM present
        self.decode = codecs.utf_8_decode
        return codecs.utf_8_decode(input, errors)

### encodings module API
项目:RPoint    作者:george17-meet    | 项目源码 | 文件源码
def decode(input, errors='strict'):
    return codecs.utf_8_decode(input, errors, True)
项目:AshsSDK    作者:thehappydinoa    | 项目源码 | 文件源码
def determine_encoding(self):
        while not self.eof and len(self.raw_buffer) < 2:
            self.update_raw()
        if not isinstance(self.raw_buffer, unicode):
            if self.raw_buffer.startswith(codecs.BOM_UTF16_LE):
                self.raw_decode = codecs.utf_16_le_decode
                self.encoding = 'utf-16-le'
            elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE):
                self.raw_decode = codecs.utf_16_be_decode
                self.encoding = 'utf-16-be'
            else:
                self.raw_decode = codecs.utf_8_decode
                self.encoding = 'utf-8'
        self.update(1)
项目:habilitacion    作者:GabrielBD    | 项目源码 | 文件源码
def decode(input, errors='strict'):
    prefix = 0
    if input[:3] == codecs.BOM_UTF8:
        input = input[3:]
        prefix = 3
    (output, consumed) = codecs.utf_8_decode(input, errors, True)
    return (output, consumed+prefix)
项目:habilitacion    作者:GabrielBD    | 项目源码 | 文件源码
def _buffer_decode(self, input, errors, final):
        if self.first:
            if len(input) < 3:
                if codecs.BOM_UTF8.startswith(input):
                    # not enough data to decide if this really is a BOM
                    # => try again on the next call
                    return (u"", 0)
                else:
                    self.first = None
            else:
                self.first = None
                if input[:3] == codecs.BOM_UTF8:
                    (output, consumed) = codecs.utf_8_decode(input[3:], errors, final)
                    return (output, consumed+3)
        return codecs.utf_8_decode(input, errors, final)
项目:habilitacion    作者:GabrielBD    | 项目源码 | 文件源码
def decode(self, input, errors='strict'):
        if len(input) < 3:
            if codecs.BOM_UTF8.startswith(input):
                # not enough data to decide if this is a BOM
                # => try again on the next call
                return (u"", 0)
        elif input[:3] == codecs.BOM_UTF8:
            self.decode = codecs.utf_8_decode
            (output, consumed) = codecs.utf_8_decode(input[3:],errors)
            return (output, consumed+3)
        # (else) no BOM present
        self.decode = codecs.utf_8_decode
        return codecs.utf_8_decode(input, errors)

### encodings module API