Python htmlentitydefs 模块,entitydefs() 实例源码

我们从Python开源项目中,提取了以下25个代码示例,用于说明如何使用htmlentitydefs.entitydefs()

项目:TGC.bundle    作者:bubonic    | 项目源码 | 文件源码
def handle_entityref(self, ref):
            # called for each entity reference, e.g. for "©", ref will be "copy"
            if ref in ('lt', 'gt', 'quot', 'amp', 'apos'):
                text = '&%s;' % ref
            else:
                # entity resolution graciously donated by Aaron Swartz
                def name2cp(k):
                    import htmlentitydefs
                    k = htmlentitydefs.entitydefs[k]
                    if k.startswith("&#") and k.endswith(";"):
                        return int(k[2:-1]) # not in latin-1
                    return ord(k)
                try: name2cp(ref)
                except KeyError: text = "&%s;" % ref
                else: text = unichr(name2cp(ref)).encode('utf-8')
            self.c = text
            if self.data and self.recording:
                last = self.data.pop()
                self.newdata = ''.join([last,self.c])
                self.data.append(self.newdata)
                #print "Newdata:     ", self.newdata
                self.switch = 1
项目:TGC.bundle    作者:bubonic    | 项目源码 | 文件源码
def handle_entityref(self, ref):
            # called for each entity reference, e.g. for "©", ref will be "copy"
            if ref in ('lt', 'gt', 'quot', 'amp', 'apos'):
                text = '&%s;' % ref
            else:
                # entity resolution graciously donated by Aaron Swartz
                def name2cp(k):
                    import htmlentitydefs
                    k = htmlentitydefs.entitydefs[k]
                    if k.startswith("&#") and k.endswith(";"):
                        return int(k[2:-1]) # not in latin-1
                    return ord(k)
                try: name2cp(ref)
                except KeyError: text = "&%s;" % ref
                else: text = unichr(name2cp(ref)).encode('utf-8')
            self.c = text
            if self.data and self.recording:
                last = self.data.pop()
                self.newdata = ''.join([last,self.c])
                self.data.append(self.newdata)
                #print "Newdata:     ", self.newdata
                self.switch = 1
项目:plugin.video.streamondemand-pureita    作者:orione7    | 项目源码 | 文件源码
def __init__(self, html=0):
        try:
            import sgmlop
        except ImportError:
            raise RuntimeError("sgmlop parser not available")
        self.__builder = ElementTree.TreeBuilder()
        if html:
            import htmlentitydefs
            self.entitydefs.update(htmlentitydefs.entitydefs)
        self.__parser = sgmlop.XMLParser()
        self.__parser.register(self)

    ##
    # Feeds data to the parser.
    #
    # @param data Encoded data.
项目:xspfy    作者:sepehr    | 项目源码 | 文件源码
def handle_entityref(self, ref):
        # called for each entity reference, e.g. for '©', ref will be 'copy'
        if not self.elementstack: return
        if _debug: sys.stderr.write('entering handle_entityref with %s\n' % ref)
        if ref in ('lt', 'gt', 'quot', 'amp', 'apos'):
            text = '&%s;' % ref
        else:
            # entity resolution graciously donated by Aaron Swartz
            def name2cp(k):
                import htmlentitydefs
                if hasattr(htmlentitydefs, 'name2codepoint'): # requires Python 2.3
                    return htmlentitydefs.name2codepoint[k]
                k = htmlentitydefs.entitydefs[k]
                if k.startswith('&#') and k.endswith(';'):
                    return int(k[2:-1]) # not in latin-1
                return ord(k)
            try: name2cp(ref)
            except KeyError: text = '&%s;' % ref
            else: text = unichr(name2cp(ref)).encode('utf-8')
        self.elementstack[-1][2].append(text)
项目:kinect-2-libras    作者:inessadl    | 项目源码 | 文件源码
def __init__(self, html=0, target=None, encoding=None):
        self.__builder = ElementTree.TreeBuilder()
        if html:
            import htmlentitydefs
            self.entitydefs.update(htmlentitydefs.entitydefs)
        xmllib.XMLParser.__init__(self)

    ##
    # Feeds data to the parser.
    #
    # @param data Encoded data.
项目:darkc0de-old-stuff    作者:tuwid    | 项目源码 | 文件源码
def handle_entityref(self, ref):
        if ref in entitydefs:
            self.result += '&%s;' % ref
        else:
            self.result += xssescape('&%s' % ref)
项目:nstock    作者:ybenitezf    | 项目源码 | 文件源码
def name2cp(k):
    if k == 'apos': return ord("'")
    if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3
        return htmlentitydefs.name2codepoint[k]
    else:
        k = htmlentitydefs.entitydefs[k]
        if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1
        return ord(codecs.latin_1_decode(k)[0])
项目:ExptWizNote    作者:Ext4FAT    | 项目源码 | 文件源码
def name2cp(k):
    if k == 'apos': return ord("'")
    if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3
        return htmlentitydefs.name2codepoint[k]
    else:
        k = htmlentitydefs.entitydefs[k]
        if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1
        return ord(codecs.latin_1_decode(k)[0])
项目:true_review_web2py    作者:lucadealfaro    | 项目源码 | 文件源码
def handle_entityref(self, ref):
        if self.in_disallowed[-1]:
            return
        elif ref in entitydefs:
            self.result += '&%s;' % ref
        else:
            self.result += xssescape('&%s' % ref)
项目:spc    作者:whbrewer    | 项目源码 | 文件源码
def handle_entityref(self, ref):
        if self.in_disallowed:
            return
        elif ref in entitydefs:
            self.result += '&%s;' % ref
        else:
            self.result += xssescape('&%s' % ref)
项目:catchup4kodi    作者:catchup4kodi    | 项目源码 | 文件源码
def name2cp(k):
    if k == 'apos': return ord("'")
    if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3
        return htmlentitydefs.name2codepoint[k]
    else:
        k = htmlentitydefs.entitydefs[k]
        if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1
        return ord(codecs.latin_1_decode(k)[0])
项目:Problematica-public    作者:TechMaz    | 项目源码 | 文件源码
def handle_entityref(self, ref):
        if self.in_disallowed[-1]:
            return
        elif ref in entitydefs:
            self.result += '&%s;' % ref
        else:
            self.result += xssescape('&%s' % ref)
项目:localdocindex    作者:stcioc    | 项目源码 | 文件源码
def name2cp(k):
    if k == 'apos': return ord("'")
    if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3
        return htmlentitydefs.name2codepoint[k]
    else:
        k = htmlentitydefs.entitydefs[k]
        if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1
        return ord(codecs.latin_1_decode(k)[0])
项目:plugin.video.streamondemand-pureita    作者:orione7    | 项目源码 | 文件源码
def __init__(self, html=0):
        self.__builder = ElementTree.TreeBuilder()
        if html:
            import htmlentitydefs
            self.entitydefs.update(htmlentitydefs.entitydefs)
        xmllib.XMLParser.__init__(self)

    ##
    # Feeds data to the parser.
    #
    # @param data Encoded data.
项目:web3py    作者:web2py    | 项目源码 | 文件源码
def handle_entityref(self, ref):
        if self.in_disallowed[-1]:
            return
        elif ref in entitydefs:
            self.result += '&%s;' % ref
        else:
            self.result += xssescape('&%s' % ref)
项目:slugiot-client    作者:slugiot    | 项目源码 | 文件源码
def handle_entityref(self, ref):
        if self.in_disallowed[-1]:
            return
        elif ref in entitydefs:
            self.result += '&%s;' % ref
        else:
            self.result += xssescape('&%s' % ref)
项目:griffith    作者:Strit    | 项目源码 | 文件源码
def convert_entities(text):

    def conv(ents):
        entities = htmlentitydefs.entitydefs
        ents = ents.group(0)
        ent_code = entities.get(ents[1:-1], None)
        if ent_code:
            try:
                ents = unicode(ent_code, 'UTF-8')
            except UnicodeDecodeError:
                ents = unicode(ent_code, 'latin-1')
            except Exception, ex:
                print("error occurred while converting entity %s: %s" % (ents, ex))

            # check if it still needs conversion
            if not ENTITY.search(ents):
                return ents

        if ents[1] == '#':
            code = ents[2:-1]
            base = 10
            if code[0] == 'x':
                code = code[1:]
                base = 16
            return unichr(int(code, base))
        else:
            return

    in_entity = ENTITY.search(text)
    if not in_entity:
        return text
    else:
        ctext = in_entity.re.sub(conv, text)
        return ctext
项目:script.reddit.reader    作者:gedisony    | 项目源码 | 文件源码
def name2cp(k):
    if k == 'apos': return ord("'")
    if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3
        return htmlentitydefs.name2codepoint[k]
    else:
        k = htmlentitydefs.entitydefs[k]
        if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1
        return ord(codecs.latin_1_decode(k)[0])
项目:empyrion-python-api    作者:huhlig    | 项目源码 | 文件源码
def __init__(self, html=0, target=None, encoding=None):
        self.__builder = ElementTree.TreeBuilder()
        if html:
            import htmlentitydefs
            self.entitydefs.update(htmlentitydefs.entitydefs)
        xmllib.XMLParser.__init__(self)

    ##
    # Feeds data to the parser.
    #
    # @param data Encoded data.
项目:googMeow    作者:aaaddress1    | 项目源码 | 文件源码
def name2cp(k):
    if k == 'apos': return ord("'")
    if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3
        return htmlentitydefs.name2codepoint[k]
    else:
        k = htmlentitydefs.entitydefs[k]
        if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1
        return ord(codecs.latin_1_decode(k)[0])
项目:gitsome    作者:donnemartin    | 项目源码 | 文件源码
def name2cp(k):
    if k == 'apos': return ord("'")
    if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3
        return htmlentitydefs.name2codepoint[k]
    else:
        k = htmlentitydefs.entitydefs[k]
        if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1
        return ord(codecs.latin_1_decode(k)[0])
项目:machine-learning-python    作者:pspxiaochen    | 项目源码 | 文件源码
def lookslikehtml(s):
        # must have a close tag or an entity reference to qualify
        if not (re.search(r'</(\w+)>', s) or re.search(r'&#?\w+;', s)):
            return

        # all tags must be in a restricted subset of valid HTML tags
        if any((t for t in re.findall(r'</?(\w+)', s) if t.lower() not in _HTMLSanitizer.acceptable_elements)):
            return

        # all entities must have been defined as valid HTML entities
        if any((e for e in re.findall(r'&(\w+);', s) if e not in entitydefs)):
            return

        return 1
项目:machine-learning-python    作者:pspxiaochen    | 项目源码 | 文件源码
def lookslikehtml(s):
        # must have a close tag or an entity reference to qualify
        if not (re.search(r'</(\w+)>', s) or re.search(r'&#?\w+;', s)):
            return

        # all tags must be in a restricted subset of valid HTML tags
        if any((t for t in re.findall(r'</?(\w+)', s) if t.lower() not in _HTMLSanitizer.acceptable_elements)):
            return

        # all entities must have been defined as valid HTML entities
        if any((e for e in re.findall(r'&(\w+);', s) if e not in entitydefs)):
            return

        return 1
项目:mbox-to-csv    作者:jarrodparkes    | 项目源码 | 文件源码
def name2cp(k):
    if k == 'apos': return ord("'")
    if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3
        return htmlentitydefs.name2codepoint[k]
    else:
        k = htmlentitydefs.entitydefs[k]
        if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1
        return ord(codecs.latin_1_decode(k)[0])
项目:StuffShare    作者:StuffShare    | 项目源码 | 文件源码
def handle_entityref(self, ref):
        if self.in_disallowed:
            return
        elif ref in entitydefs:
            self.result += '&%s;' % ref
        else:
            self.result += xssescape('&%s' % ref)