我们从Python开源项目中,提取了以下25个代码示例,用于说明如何使用htmlentitydefs.entitydefs()。
def handle_entityref(self, ref): # called for each entity reference, e.g. for "©", ref will be "copy" if ref in ('lt', 'gt', 'quot', 'amp', 'apos'): text = '&%s;' % ref else: # entity resolution graciously donated by Aaron Swartz def name2cp(k): import htmlentitydefs k = htmlentitydefs.entitydefs[k] if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1 return ord(k) try: name2cp(ref) except KeyError: text = "&%s;" % ref else: text = unichr(name2cp(ref)).encode('utf-8') self.c = text if self.data and self.recording: last = self.data.pop() self.newdata = ''.join([last,self.c]) self.data.append(self.newdata) #print "Newdata: ", self.newdata self.switch = 1
def __init__(self, html=0): try: import sgmlop except ImportError: raise RuntimeError("sgmlop parser not available") self.__builder = ElementTree.TreeBuilder() if html: import htmlentitydefs self.entitydefs.update(htmlentitydefs.entitydefs) self.__parser = sgmlop.XMLParser() self.__parser.register(self) ## # Feeds data to the parser. # # @param data Encoded data.
def handle_entityref(self, ref): # called for each entity reference, e.g. for '©', ref will be 'copy' if not self.elementstack: return if _debug: sys.stderr.write('entering handle_entityref with %s\n' % ref) if ref in ('lt', 'gt', 'quot', 'amp', 'apos'): text = '&%s;' % ref else: # entity resolution graciously donated by Aaron Swartz def name2cp(k): import htmlentitydefs if hasattr(htmlentitydefs, 'name2codepoint'): # requires Python 2.3 return htmlentitydefs.name2codepoint[k] k = htmlentitydefs.entitydefs[k] if k.startswith('&#') and k.endswith(';'): return int(k[2:-1]) # not in latin-1 return ord(k) try: name2cp(ref) except KeyError: text = '&%s;' % ref else: text = unichr(name2cp(ref)).encode('utf-8') self.elementstack[-1][2].append(text)
def __init__(self, html=0, target=None, encoding=None): self.__builder = ElementTree.TreeBuilder() if html: import htmlentitydefs self.entitydefs.update(htmlentitydefs.entitydefs) xmllib.XMLParser.__init__(self) ## # Feeds data to the parser. # # @param data Encoded data.
def handle_entityref(self, ref): if ref in entitydefs: self.result += '&%s;' % ref else: self.result += xssescape('&%s' % ref)
def name2cp(k): if k == 'apos': return ord("'") if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3 return htmlentitydefs.name2codepoint[k] else: k = htmlentitydefs.entitydefs[k] if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1 return ord(codecs.latin_1_decode(k)[0])
def handle_entityref(self, ref): if self.in_disallowed[-1]: return elif ref in entitydefs: self.result += '&%s;' % ref else: self.result += xssescape('&%s' % ref)
def handle_entityref(self, ref): if self.in_disallowed: return elif ref in entitydefs: self.result += '&%s;' % ref else: self.result += xssescape('&%s' % ref)
def __init__(self, html=0): self.__builder = ElementTree.TreeBuilder() if html: import htmlentitydefs self.entitydefs.update(htmlentitydefs.entitydefs) xmllib.XMLParser.__init__(self) ## # Feeds data to the parser. # # @param data Encoded data.
def convert_entities(text): def conv(ents): entities = htmlentitydefs.entitydefs ents = ents.group(0) ent_code = entities.get(ents[1:-1], None) if ent_code: try: ents = unicode(ent_code, 'UTF-8') except UnicodeDecodeError: ents = unicode(ent_code, 'latin-1') except Exception, ex: print("error occurred while converting entity %s: %s" % (ents, ex)) # check if it still needs conversion if not ENTITY.search(ents): return ents if ents[1] == '#': code = ents[2:-1] base = 10 if code[0] == 'x': code = code[1:] base = 16 return unichr(int(code, base)) else: return in_entity = ENTITY.search(text) if not in_entity: return text else: ctext = in_entity.re.sub(conv, text) return ctext
def lookslikehtml(s): # must have a close tag or an entity reference to qualify if not (re.search(r'</(\w+)>', s) or re.search(r'&#?\w+;', s)): return # all tags must be in a restricted subset of valid HTML tags if any((t for t in re.findall(r'</?(\w+)', s) if t.lower() not in _HTMLSanitizer.acceptable_elements)): return # all entities must have been defined as valid HTML entities if any((e for e in re.findall(r'&(\w+);', s) if e not in entitydefs)): return return 1