Python HTMLParser.HTMLParser 模块,feed() 实例源码

我们从Python开源项目中,提取了以下25个代码示例,用于说明如何使用HTMLParser.HTMLParser.feed()

项目:kivy_gosh    作者:mcroni    | 项目源码 | 文件源码
def remove(self, item):
        """
        This is as list.remove but works with id.

        data = '<a><b></b><b></b></a>'
        html = Html()
        dom = html.feed(data)

        for root, ind in dom.sail_with_root():
            if ind.name == 'b':
                root.remove(ind)

        print dom

        It should print.

        <a ></a>
        """

        index = self.index(item)
        del self[index]
项目:kivy_gosh    作者:mcroni    | 项目源码 | 文件源码
def take(self, *args):
        """
        It returns the first object whose one of its
        attributes matches (key0, value0), (key1, value1), ... .

        Example:

        data = '<a><b id="foo" size="1"></b></a>'
        html = Html()
        dom = html.feed(data)

        print dom.take(('id', 'foo'))
        print dom.take(('id', 'foo'), ('size', '2'))
        """

        seq = self.match(*args)

        try:
            item = seq.next()
        except StopIteration:
            return None
        else:
            return item
项目:kivy_gosh    作者:mcroni    | 项目源码 | 文件源码
def walk_with_root(self):
        """
        Like walk but carries root.

        Example:

        html = Html()
        data = '<body><em>alpha</em></body>'
        dom = html.feed(data)

        for (root, name, attr), (ind, name, attr) in dom.walk_with_root():
            print root, name, ind, name

        Output:

        <em >alpha</em> 1 alpha 1
        <body ><em >alpha</em></body> em <em >alpha</em> em
        <body ><em >alpha</em></body> body <body ><em >alpha</em></body> body    
        """

        for root, ind in self.sail_with_root():
            yield ((root, root.name, root.attr), 
                   (ind, ind.name, ind.attr))
项目:kivy_gosh    作者:mcroni    | 项目源码 | 文件源码
def __init__(self, data):
        """
        The data holds the characters.

        Example:

        html = Html()
        data = '<body><em>alpha</em></body>'
        dom = html.feed(data)
        x = dom.fst('em')
        x.append(Data('\nbeta'))

        It outputs.

        <body ><em >alpha
        beta</em></body>
        """

        Root.__init__(self, DATA)
        self.data = data
项目:wikilinks    作者:trovdimi    | 项目源码 | 文件源码
def feed(self, data):
        self.reset()
        HTMLParser.feed(self, data)
项目:wikilinks    作者:trovdimi    | 项目源码 | 文件源码
def feed(self, data):
        self.reset()
        HTMLParser.feed(self, data)
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def feed(self, chars):
        # [8]
        if self.phase in [self.TERMINATED, self.FOUND]:
            self._terminate()

        return HTMLParser.feed(self, chars)
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def findHTMLMeta(stream):
    """Look for a meta http-equiv tag with the YADIS header name.

    @param stream: Source of the html text
    @type stream: Object that implements a read() method that works
        like file.read

    @return: The URI from which to fetch the XRDS document
    @rtype: str

    @raises MetaNotFound: raised with the content that was
        searched as the first parameter.
    """
    parser = YadisHTMLParser()
    chunks = []

    while 1:
        chunk = stream.read(CHUNK_SIZE)
        if not chunk:
            # End of file
            break

        chunks.append(chunk)
        try:
            parser.feed(chunk)
        except HTMLParseError, why:
            # HTML parse error, so bail
            chunks.append(stream.read())
            break
        except ParseDone, why:
            uri = why[0]
            if uri is None:
                # Parse finished, but we may need the rest of the file
                chunks.append(stream.read())
                break
            else:
                return uri

    content = ''.join(chunks)
    raise MetaNotFound(content)
项目:mdNotes    作者:EternityForest    | 项目源码 | 文件源码
def feed(self, in_html):
        self.output = ""
        HTMLParser.feed(self, in_html)
        return self.output
项目:mdNotes    作者:EternityForest    | 项目源码 | 文件源码
def html_to_md(h):
    p = MyHTMLParser()
    return p.feed(h)
项目:kivy_gosh    作者:mcroni    | 项目源码 | 文件源码
def sail(self):
        """ 
        This is used to navigate through the xml/html document.
        Every xml/html object is represented by a python class
        instance that inherits from Root.

        The method sail is used to return an iterator
        for these objects.

        Example:
        data = '<a> <b> </b> </a>'

        html = Html()
        dom = html.feed(data)

        for ind in dom.sail():
            print type(ind),',', ind.name

        It would output.

        <class 'ehp.Root'> , a
        <class 'ehp.Root'> , b
        """

        for indi in self[:]:
            for indj in indi.sail():
                yield(indj)

            yield(indi)
项目:kivy_gosh    作者:mcroni    | 项目源码 | 文件源码
def index(self, item):
        """
        This is similar to index but uses id
        to check for equality.

        Example:

        data = '<a><b></b><b></b></a>'
        html = Html()
        dom = html.feed(data)

        for root, ind in dom.sail_with_root():
            print root.name, ind.name, root.index(ind)


        It would print.

        a b 0
        a b 1
         a 0        

        The line where it appears ' a 0' corresponds to the
        outmost object. The outmost object is an instance of Root
        that contains all the other objects.
        """

        count = 0
        for ind in self:
            if ind is item: return count
            count = count + 1

        raise ValueError
项目:kivy_gosh    作者:mcroni    | 项目源码 | 文件源码
def find(self, name, *args):
        """
        It is used to find all objects that match name.

        Example 1:

        data = '<a><b></b><b></b></a>'
        html = Html()
        dom = html.feed(data)

        for ind in dom.find('b'):
            print ind

        It should print.

        <b ></b>
        <b ></b>

        Example 2.

        data = '<body> <p> alpha. </p> <p style="color:green"> beta.</p> </body>'
        html = Html()
        dom  = html.feed(data)

        for ind in dom.find('p', ('style', 'color:green')):
            print ind

        Output.


        <p style="color:green" > beta.</p>
        """

        for ind in self.sail():
            if ind.name == name:
                for key, value in args:
                    if ind.attr[key] != value:
                        break
                else:
                    yield(ind)
项目:kivy_gosh    作者:mcroni    | 项目源码 | 文件源码
def find_with_root(self, name, *args):
        """
        Like Root.find but returns its parent tag.

        from ehp import *

        html = Html()
        dom = html.feed('''<body> <p> alpha </p> <p> beta </p> </body>''')

        for root, ind in dom.find_with_root('p'):
            root.remove(ind)

        print dom

        It would output.

        <body >   </body>        
        """

        for root, ind in self.sail_with_root():
            if ind.name == name:
                for key, value in args:
                    if ind.attr[key] != value:
                        break
                else:
                    yield(root, ind)
项目:kivy_gosh    作者:mcroni    | 项目源码 | 文件源码
def match(self, *args):
        """
        It returns a sequence of objects whose attributes match.
        (key0, value0), (key1, value1), ... .

        Example:

        data = '<a size="1"><b size="1"></b></a>'
        html = Html()
        dom = html.feed(data)

        for ind in dom.match(('size', '1')):
            print ind

        It would print.

        <b size="1" ></b>
        <a size="1" ><b size="1" ></b></a>
        """

        for ind in self.sail():
            for key, value in args:
                if ind.attr[key] != value: 
                    break
            else: 
                yield(ind)
项目:kivy_gosh    作者:mcroni    | 项目源码 | 文件源码
def match_with_root(self, *args):
        """
        Like Root.match but with its parent tag.

        Example:

        from ehp import *

        html = Html()
        dom  = html.feed('''<body> <p style="color:black"> xxx </p> 
                         <p style = "color:black"> mmm </p></body>''')

        for root, ind in dom.match_with_root(('style', 'color:black')):
            del ind.attr['style']

        item = dom.fst('body')
        item.attr['style'] = 'color:black'

        print dom

        Output.

        <body style="color:black" > <p > xxx </p> 
                         <p > mmm </p></body>
        """

        for root, ind in self.sail_with_root():
            for key, value in args:
                if ind.attr[key] != value: 
                    break
            else: 
                yield(root, ind)
项目:kivy_gosh    作者:mcroni    | 项目源码 | 文件源码
def text(self):
        """
        It returns all objects whose name matches DATA.
        It basically returns a string corresponding
        to all asci characters that are inside a xml/html
        tag.


        Example:

        html = Html()
        data = '<body><em>This is all the text.</em></body>'
        dom = html.feed(data)

        print dom.fst('em').text()

        It outputs.

        This is all the text.

        Notice that if you call text() on an item with
        children then it returns all the *printable* characters
        for that node.
        """

        return self.join('', DATA)
项目:kivy_gosh    作者:mcroni    | 项目源码 | 文件源码
def walk(self):
        """
        Like sail but carries name and attr.

        Example:

        html = Html()
        data = '<body> <em> This is all the text.</em></body>'
        dom = html.feed(data)

        for ind, name, attr in dom.walk():
            print 'TAG:', ind
            print 'NAME:', name
            print 'ATTR:', attr

        It should print.

        TAG:  
        NAME: 1
        ATTR: 
        TAG:  This is all the text.
        NAME: 1
        ATTR: 
        TAG: <em > This is all the text.</em>
        NAME: em
        ATTR: 
        TAG: <body > <em > This is all the text.</em></body>
        NAME: body
        ATTR: 
        """

        for ind in self.sail():
            yield (ind, ind.name, ind.attr)
项目:kivy_gosh    作者:mcroni    | 项目源码 | 文件源码
def fromfile(self, filename):
        """
        It builds a structure from a file.
        """

        fd   = open(fname, 'r')
        data = fd.read()
        fd.close()
        return self.feed(data)
项目:micro-blog    作者:nickChenyx    | 项目源码 | 文件源码
def feed(self, chars):
        # [8]
        if self.phase in [self.TERMINATED, self.FOUND]:
            self._terminate()

        return HTMLParser.feed(self, chars)
项目:micro-blog    作者:nickChenyx    | 项目源码 | 文件源码
def findHTMLMeta(stream):
    """Look for a meta http-equiv tag with the YADIS header name.

    @param stream: Source of the html text
    @type stream: Object that implements a read() method that works
        like file.read

    @return: The URI from which to fetch the XRDS document
    @rtype: str

    @raises MetaNotFound: raised with the content that was
        searched as the first parameter.
    """
    parser = YadisHTMLParser()
    chunks = []

    while 1:
        chunk = stream.read(CHUNK_SIZE)
        if not chunk:
            # End of file
            break

        chunks.append(chunk)
        try:
            parser.feed(chunk)
        except HTMLParseError, why:
            # HTML parse error, so bail
            chunks.append(stream.read())
            break
        except ParseDone, why:
            uri = why[0]
            if uri is None:
                # Parse finished, but we may need the rest of the file
                chunks.append(stream.read())
                break
            else:
                return uri

    content = ''.join(chunks)
    raise MetaNotFound(content)
项目:Hawkeye    作者:tozhengxq    | 项目源码 | 文件源码
def feed(self, chars):
        # [8]
        if self.phase in [self.TERMINATED, self.FOUND]:
            self._terminate()

        return HTMLParser.feed(self, chars)
项目:Hawkeye    作者:tozhengxq    | 项目源码 | 文件源码
def findHTMLMeta(stream):
    """Look for a meta http-equiv tag with the YADIS header name.

    @param stream: Source of the html text
    @type stream: Object that implements a read() method that works
        like file.read

    @return: The URI from which to fetch the XRDS document
    @rtype: str

    @raises MetaNotFound: raised with the content that was
        searched as the first parameter.
    """
    parser = YadisHTMLParser()
    chunks = []

    while 1:
        chunk = stream.read(CHUNK_SIZE)
        if not chunk:
            # End of file
            break

        chunks.append(chunk)
        try:
            parser.feed(chunk)
        except HTMLParseError, why:
            # HTML parse error, so bail
            chunks.append(stream.read())
            break
        except ParseDone, why:
            uri = why[0]
            if uri is None:
                # Parse finished, but we may need the rest of the file
                chunks.append(stream.read())
                break
            else:
                return uri

    content = ''.join(chunks)
    raise MetaNotFound(content)
项目:kivy_gosh    作者:mcroni    | 项目源码 | 文件源码
def join(self, delim, *args):
        """
        It joins all the objects whose name appears in args.

        Example 1:

        html = Html()
        data = '<a><b> This is cool. </b><b> That is. </b></a>'
        dom = html.feed(data)

        print dom.join('', 'b')
        print type(dom.join('b'))

        It would print.

        <b > This is cool. </b><b > That is. </b>
        <type 'str'>

        Example 2:

        html = Html()
        data = '<a><b> alpha</b><c>beta</c> <b>gamma</a>'
        dom = html.feed(data)

        print dom.join('', 'b', 'c')

        It would print.

        <b > alpha</b><c >beta</c><b >gamma</b>

        Example 3:


        html = Html()
        data = '<a><b>alpha</b><c>beta</c><b>gamma</a>'
        dom = html.feed(data)

        print dom.join('\n', DATA)

        It would print.

        alpha
        beta
        gamma
        """

        data = ''

        for ind in self.sail():
            if ind.name in args:
                data = '%s%s%s' % (data, delim, ind)

        return data
项目:kivy_gosh    作者:mcroni    | 项目源码 | 文件源码
def fst(self, name, *args):
        """
        It returns the first object whose name
        matches.

        Example 1:

        html = Html()
        data = '<body> <em> Cool. </em></body>'
        dom = html.feed(data)

        print dom.fst('em')

        It outputs.

        <em > Cool. </em>

        Example 2:

        data = '<body> <p> alpha. </p> <p style="color:green"> beta.</p> </body>'
        html = Html()
        dom  = html.feed(data)

        for ind in dom.find('p', ('style', 'color:green')):
            print ind

        print dom.fst('p', ('style', 'color:green'))
        print dom.fst_with_root('p', ('style', 'color:green'))

        Output:

        <p style="color:green" > beta.</p>
        <p style="color:green" > beta.</p>
        (<ehp.Tag object at 0xb7216c0c>, <ehp.Tag object at 0xb7216d24>)
        """


        # for ind in self.sail():
        #    if ind.name == name:
        #        for key, value in args:
        #            if ind.attr[key] != value:
        #                break
        #        else:
        #            return ind

        seq = self.find(name, *args)

        try:
            item = seq.next()
        except StopIteration:
            return None
        else:
            return item