Python codecs 模块,BOM_UTF16 实例源码

我们从Python开源项目中,提取了以下11个代码示例,用于说明如何使用codecs.BOM_UTF16

项目:pmi_sprint_reporter    作者:cumc-dbmi    | 项目源码 | 文件源码
def remove_bom(filename):
    if os.path.isfile(filename):
        f = open(filename, 'rb')

        # read first 4 bytes
        header = f.read(4)

        # check for BOM
        bom_len = 0
        encodings = [(codecs.BOM_UTF32, 4),
                     (codecs.BOM_UTF16, 2),
                     (codecs.BOM_UTF8, 3)]

        # remove appropriate number of bytes
        for h, l in encodings:
            if header.startswith(h):
                bom_len = l
                break
        f.seek(0)
        f.read(bom_len)
        return f
项目:touch-pay-client    作者:HackPucBemobi    | 项目源码 | 文件源码
def export(self):
        out = StringIO()
        final = StringIO()
        import csv
        writer = csv.writer(out, delimiter='\t')
        if self.rows:
            import codecs
            final.write(codecs.BOM_UTF16)
            writer.writerow(
                [to_unicode(col, "utf8") for col in self.rows.colnames])
            data = out.getvalue().decode("utf8")
            data = data.encode("utf-16")
            data = data[2:]
            final.write(data)
            out.truncate(0)

        records = self.represented()
        for row in records:
            writer.writerow(
                [str(col).decode('utf8').encode("utf-8") for col in row])
            data = out.getvalue().decode("utf8")
            data = data.encode("utf-16")
            data = data[2:]
            final.write(data)

            out.truncate(0)
        return str(final.getvalue())
项目:true_review_web2py    作者:lucadealfaro    | 项目源码 | 文件源码
def export(self):
        out = cStringIO.StringIO()
        final = cStringIO.StringIO()
        import csv
        writer = csv.writer(out, delimiter='\t')
        if self.rows:
            import codecs
            final.write(codecs.BOM_UTF16)
            writer.writerow(
                [unicode(col).encode("utf8") for col in self.rows.colnames])
            data = out.getvalue().decode("utf8")
            data = data.encode("utf-16")
            data = data[2:]
            final.write(data)
            out.truncate(0)

        records = self.represented()
        for row in records:
            writer.writerow(
                [str(col).decode('utf8').encode("utf-8") for col in row])
            data = out.getvalue().decode("utf8")
            data = data.encode("utf-16")
            data = data[2:]
            final.write(data)

            out.truncate(0)
        return str(final.getvalue())
项目:spc    作者:whbrewer    | 项目源码 | 文件源码
def export(self):

        out = cStringIO.StringIO()
        final = cStringIO.StringIO()
        import csv
        writer = csv.writer(out, delimiter='\t')
        if self.rows:
            import codecs
            final.write(codecs.BOM_UTF16)
            writer.writerow(
                [unicode(col).encode("utf8") for col in self.rows.colnames])
            data = out.getvalue().decode("utf8")
            data = data.encode("utf-16")
            data = data[2:]
            final.write(data)
            out.truncate(0)
        records = self.represented()
        for row in records:
            writer.writerow(
                [str(col).decode('utf8').encode("utf-8") for col in row])
            data = out.getvalue().decode("utf8")
            data = data.encode("utf-16")
            data = data[2:]
            final.write(data)
            out.truncate(0)
        return str(final.getvalue())
项目:kolibri    作者:learningequality    | 项目源码 | 文件源码
def _wmic_output():
    """
    Returns the output from running the built-in `wmic` command.

    Redirects the output of `wmic` to a temporary file and then reads it back in.
    This would be cleaner if done using subprocess, but attempting to capture
    `stdout` internally led to freezing under Windows XP. (This may have been
    happening because the script is not being run as a main process.)
    """

    # choose a unique file name (re-entrant/thread-safe/crash-safe)
    OUTPUT_PATH = os.path.join(
        tempfile.gettempdir(),
        "kolibri_disks-{}.txt".format(uuid.uuid4())
    )

    # pipe output from the WMIC command to the temp file
    cmd = "wmic logicaldisk list full /format:csv > {}".format(OUTPUT_PATH)
    returnCode = os.system(cmd)
    if returnCode:
        raise Exception("Could not run command '{}'".format(cmd))

    # output from WMIC is ostensibly UTF-16
    with open(OUTPUT_PATH, 'rb') as f:
        bin_output = f.read()

    # The very first time WMIC is run on a windows machine, the output gets mangled.
    # The BOM is replaced by WMIC's initialization message, so we need to put it back.
    # (On all subsequent runs, these next lines do nothing.)
    INIT_MSG = "Please wait while WMIC is being installed.".encode('ascii')  # Yes, ascii.
    bin_output = bin_output.replace(INIT_MSG, codecs.BOM_UTF16)

    # finally, decode the well-formatted UTF-16 byte string
    output = bin_output.decode('utf-16')

    # clean up temp file
    os.remove(OUTPUT_PATH)

    return output
项目:Problematica-public    作者:TechMaz    | 项目源码 | 文件源码
def export(self):
        out = cStringIO.StringIO()
        final = cStringIO.StringIO()
        import csv
        writer = csv.writer(out, delimiter='\t')
        if self.rows:
            import codecs
            final.write(codecs.BOM_UTF16)
            writer.writerow(
                [unicode(col).encode("utf8") for col in self.rows.colnames])
            data = out.getvalue().decode("utf8")
            data = data.encode("utf-16")
            data = data[2:]
            final.write(data)
            out.truncate(0)

        records = self.represented()
        for row in records:
            writer.writerow(
                [str(col).decode('utf8').encode("utf-8") for col in row])
            data = out.getvalue().decode("utf8")
            data = data.encode("utf-16")
            data = data[2:]
            final.write(data)

            out.truncate(0)
        return str(final.getvalue())
项目:OpenDoor    作者:stanislav-web    | 项目源码 | 文件源码
def decode(str, errors='strict'):
        """
        Decode strings

        :param str str: input string
        :param str errors:error level
        :return: str
        """

        output = ''
        try:
            if len(str) < 3:
                if codecs.BOM_UTF8.startswith(str):
                    # not enough data to decide if this is a BOM
                    # => try again on the next call
                    output = ""

            elif str[:3] == codecs.BOM_UTF8:
                (output, sizes) = codecs.utf_8_decode(str[3:], errors)
            elif str[:3] == codecs.BOM_UTF16:
                output = str[3:].decode('utf16')
            else:
                # (else) no BOM present
                (output, sizes) = codecs.utf_8_decode(str, errors)
            return str(output)
        except (UnicodeDecodeError, Exception):
            # seems, its getting not a content (images, file, etc)
            try:
                return str.decode('cp1251')
            except (UnicodeDecodeError, Exception):
                return ""
项目:rekall-agent-server    作者:rekall-innovations    | 项目源码 | 文件源码
def export(self):
        out = StringIO()
        final = StringIO()
        import csv
        writer = csv.writer(out, delimiter='\t')
        if self.rows:
            import codecs
            final.write(codecs.BOM_UTF16)
            writer.writerow(
                [to_unicode(col, "utf8") for col in self.rows.colnames])
            data = out.getvalue().decode("utf8")
            data = data.encode("utf-16")
            data = data[2:]
            final.write(data)
            out.truncate(0)

        records = self.represented()
        for row in records:
            writer.writerow(
                [str(col).decode('utf8').encode("utf-8") for col in row])
            data = out.getvalue().decode("utf8")
            data = data.encode("utf-16")
            data = data[2:]
            final.write(data)

            out.truncate(0)
        return str(final.getvalue())
项目:slugiot-client    作者:slugiot    | 项目源码 | 文件源码
def export(self):
        out = cStringIO.StringIO()
        final = cStringIO.StringIO()
        import csv
        writer = csv.writer(out, delimiter='\t')
        if self.rows:
            import codecs
            final.write(codecs.BOM_UTF16)
            writer.writerow(
                [unicode(col).encode("utf8") for col in self.rows.colnames])
            data = out.getvalue().decode("utf8")
            data = data.encode("utf-16")
            data = data[2:]
            final.write(data)
            out.truncate(0)

        records = self.represented()
        for row in records:
            writer.writerow(
                [str(col).decode('utf8').encode("utf-8") for col in row])
            data = out.getvalue().decode("utf8")
            data = data.encode("utf-16")
            data = data[2:]
            final.write(data)

            out.truncate(0)
        return str(final.getvalue())
项目:misp-modules    作者:MISP    | 项目源码 | 文件源码
def get_decoded_header(header, value):
    subject, encoding = decode_header(value)[0]
    subject = subject.strip()  # extra whitespace will mess up encoding
    if isinstance(subject, bytes):
        # Remove Byte Order Mark (BOM) from UTF strings
        if encoding == 'utf-8':
            return re.sub(codecs.BOM_UTF8, b"", subject).decode(encoding)
        if encoding == 'utf-16':
            return re.sub(codecs.BOM_UTF16, b"", subject).decode(encoding)
        elif encoding == 'utf-32':
            return re.sub(codecs.BOM_UTF32, b"", subject).decode(encoding)
        # Try various UTF decodings for any unknown 8bit encodings
        elif encoding == 'unknown-8bit':
            for enc in [('utf-8', codecs.BOM_UTF8),
                        ('utf-32', codecs.BOM_UTF32),  # 32 before 16 so it raises errors
                        ('utf-16', codecs.BOM_UTF16)]:
                try:
                    return re.sub(enc[1], b"", subject).decode(enc[0])
                except UnicodeDecodeError:
                    continue
            # If none of those encoding work return it in RFC2047 format
            return str(subject)
        # Provide RFC2047 format string if encoding is a unknown encoding
        # Better to have the analyst decode themselves than to provide a mangled string
        elif encoding is None:
            return str(subject)
        else:
            return subject.decode(encoding)
项目:StuffShare    作者:StuffShare    | 项目源码 | 文件源码
def export(self):
        out = cStringIO.StringIO()
        final = cStringIO.StringIO()
        import csv
        writer = csv.writer(out, delimiter='\t')
        if self.rows:
            import codecs
            final.write(codecs.BOM_UTF16)
            writer.writerow(
                [unicode(col).encode("utf8") for col in self.rows.colnames])
            data = out.getvalue().decode("utf8")
            data = data.encode("utf-16")
            data = data[2:]
            final.write(data)
            out.truncate(0)

        records = self.represented()
        for row in records:
            writer.writerow(
                [str(col).decode('utf8').encode("utf-8") for col in row])
            data = out.getvalue().decode("utf8")
            data = data.encode("utf-16")
            data = data[2:]
            final.write(data)

            out.truncate(0)
        return str(final.getvalue())