Python html.parser 模块,feed() 实例源码

我们从Python开源项目中,提取了以下17个代码示例,用于说明如何使用html.parser.feed()

项目:libiocage    作者:iocage    | 项目源码 | 文件源码
def _get_eol_list(self) -> typing.List[str]:
        """Scrapes the FreeBSD website and returns a list of EOL RELEASES"""
        request = urllib.request.Request(
            self.eol_url,
            headers={
                "Accept-Charset": "utf-8"
            }
        )
        with urllib.request.urlopen(request) as response:  # nosec: B310

            if response.getcode() != 200:  # noqa: T484
                iocage.lib.errors.DistributionEOLWarningDownloadFailed(
                    logger=self.logger,
                    level="warning"
                )
                return []

            parser = EOLParser()
            data = response.read().decode("utf-8", "ignore")
            parser.feed(data)
            parser.close()

            return parser.eol_releases
项目:zippy    作者:securesystemslab    | 项目源码 | 文件源码
def _run_check(self, source, expected_events, collector=None):
        if collector is None:
            collector = self.get_collector()
        parser = collector
        for s in source:
            parser.feed(s)
        parser.close()
        events = parser.get_events()
        if events != expected_events:
            self.fail("received events did not match expected events\n"
                      "Expected:\n" + pprint.pformat(expected_events) +
                      "\nReceived:\n" + pprint.pformat(events))
项目:zippy    作者:securesystemslab    | 项目源码 | 文件源码
def _parse_error(self, source):
        def parse(source=source):
            parser = self.get_collector()
            parser.feed(source)
            parser.close()
        self.assertRaises(html.parser.HTMLParseError, parse)
项目:web_ctp    作者:molebot    | 项目源码 | 文件源码
def _run_check(self, source, expected_events, collector=None):
        if collector is None:
            collector = self.get_collector()
        parser = collector
        for s in source:
            parser.feed(s)
        parser.close()
        events = parser.get_events()
        if events != expected_events:
            self.fail("received events did not match expected events\n"
                      "Expected:\n" + pprint.pformat(expected_events) +
                      "\nReceived:\n" + pprint.pformat(events))
项目:web_ctp    作者:molebot    | 项目源码 | 文件源码
def _parse_error(self, source):
        def parse(source=source):
            parser = self.get_collector()
            parser.feed(source)
            parser.close()
        self.assertRaises(html.parser.HTMLParseError, parse)
项目:fondamentibook    作者:xelatihy    | 项目源码 | 文件源码
def parse(html):
    '''Esegue il parsing HTML del testo html e
    ritorna la radice dell'albero.'''
    parser = _MyHTMLParser()
    parser.feed(html)
    return parser.root
项目:Mac-Python-3.X    作者:L1nwatch    | 项目源码 | 文件源码
def main():
    htm = open("sheet001.htm").read()
    parser = ToolHireParser()
    parser.feed(htm)
    print(parser.dates)
项目:ouroboros    作者:pybee    | 项目源码 | 文件源码
def _run_check(self, source, expected_events, collector=None):
        if collector is None:
            collector = self.get_collector()
        parser = collector
        for s in source:
            parser.feed(s)
        parser.close()
        events = parser.get_events()
        if events != expected_events:
            self.fail("received events did not match expected events" +
                      "\nSource:\n" + repr(source) +
                      "\nExpected:\n" + pprint.pformat(expected_events) +
                      "\nReceived:\n" + pprint.pformat(events))
项目:ouroboros    作者:pybee    | 项目源码 | 文件源码
def _parse_error(self, source):
        def parse(source=source):
            parser = self.get_collector()
            parser.feed(source)
            parser.close()
        with self.assertRaises(html.parser.HTMLParseError):
            with self.assertWarns(DeprecationWarning):
                parse()
项目:ouroboros    作者:pybee    | 项目源码 | 文件源码
def test_convert_charrefs_dropped_text(self):
        # #23144: make sure that all the events are triggered when
        # convert_charrefs is True, even if we don't call .close()
        parser = EventCollector(convert_charrefs=True)
        # before the fix, bar & baz was missing
        parser.feed("foo <a>link</a> bar &amp; baz")
        self.assertEqual(
            parser.get_events(),
            [('data', 'foo '), ('starttag', 'a', []), ('data', 'link'),
             ('endtag', 'a'), ('data', ' bar & baz')]
        )
项目:tools    作者:freedict    | 项目源码 | 文件源码
def extract_links(from_string):
    """Return a list with all links contained in the HTML page passed as input
    parameter."""
    parser = LinkExtractor()
    parser.feed(from_string)
    return parser.links
项目:kbe_server    作者:xiaohaoppy    | 项目源码 | 文件源码
def _run_check(self, source, expected_events, collector=None):
        if collector is None:
            collector = self.get_collector()
        parser = collector
        for s in source:
            parser.feed(s)
        parser.close()
        events = parser.get_events()
        if events != expected_events:
            self.fail("received events did not match expected events" +
                      "\nSource:\n" + repr(source) +
                      "\nExpected:\n" + pprint.pformat(expected_events) +
                      "\nReceived:\n" + pprint.pformat(events))
项目:kbe_server    作者:xiaohaoppy    | 项目源码 | 文件源码
def _parse_error(self, source):
        def parse(source=source):
            parser = self.get_collector()
            parser.feed(source)
            parser.close()
        with self.assertRaises(html.parser.HTMLParseError):
            with self.assertWarns(DeprecationWarning):
                parse()
项目:pygogapi    作者:Yepoleb    | 项目源码 | 文件源码
def find_scripts(site):
    parser = ScriptParser()
    parser.feed(site)
    return parser.scripts
项目:bpy_lambda    作者:bcongdon    | 项目源码 | 文件源码
def execute(self, context):
        import html.parser
        import urllib.request

        remote_platforms = []

        ps = context.scene.ge_publish_settings

        # create lib folder if not already available
        lib_path = bpy.path.abspath(ps.lib_path)
        if not os.path.exists(lib_path):
            os.makedirs(lib_path)

        print("Retrieving list of platforms from blender.org...", end=" ", flush=True)

        class AnchorParser(html.parser.HTMLParser):
            def handle_starttag(self, tag, attrs):
                if tag == 'a':
                    for key, value in attrs:
                        if key == 'href' and value.startswith('blender'):
                            remote_platforms.append(value)

        url = 'http://download.blender.org/release/Blender' + bpy.app.version_string.split()[0]
        parser = AnchorParser()
        data = urllib.request.urlopen(url).read()
        parser.feed(str(data))

        print("done", flush=True)

        print("Downloading files (this will take a while depending on your internet connection speed).", flush=True)
        for i in remote_platforms:
            src = '/'.join((url, i))
            dst = os.path.join(lib_path, i)

            dst_dir = '.'.join([i for i in dst.split('.') if i not in {'zip', 'tar', 'bz2'}])
            if not os.path.exists(dst) and not os.path.exists(dst.split('.')[0]):
                print("Downloading " + src + "...", end=" ", flush=True)
                urllib.request.urlretrieve(src, dst)
                print("done", flush=True)
            else:
                print("Reusing existing file: " + dst, flush=True)

            print("Unpacking " + dst + "...", end=" ", flush=True)
            if os.path.exists(dst_dir):
                shutil.rmtree(dst_dir)
            shutil.unpack_archive(dst, dst_dir)
            print("done", flush=True)

        print("Creating platform from libs...", flush=True)
        bpy.ops.scene.publish_auto_platforms()
        return {'FINISHED'}
项目:open-ledger    作者:creativecommons    | 项目源码 | 文件源码
def serialize(result):
    """For a given Met result, map that to our database"""
    imageinfos = result['ImageInfo']
    thumbnail = None
    url = None
    for info in imageinfos:
        if info['PrimaryDisplay']:
            # Use this one
            thumbnail = ENDPOINT_BASE_IMAGE_URL + info['Thumbnail']
            url = ENDPOINT_BASE_IMAGE_URL + info['LargeWebsite']
            break
    if not url:
        log.warning("Did not get an image URL for %s", result)
        return
    image = models.Image(url=url)
    image.provider = PROVIDER_NAME
    image.source = SOURCE_NAME

    # Creator might be a few fields
    tombstone = result['Tombstone']
    creator_names = []
    for t in tombstone:
        if t['Name'] in CREATOR_LABELS:
            val = t['Value']
            parser = CreatorParser()
            parser.feed(val)
            creator_names.append(" ".join(parser.out))
    if len(creator_names) > 0:
        image.creator = ", ".join(creator_names)

    image.thumbnail = thumbnail
    image.license = "cc0"
    image.license_version = '1.0'
    image.foreign_identifier = result['CollectionObject']['CRDID']
    image.foreign_landing_url = FOREIGN_LANDING_BASE_URL + str(image.foreign_identifier)
    image.title = result['CollectionObject']['Title']
    image.identifier = signals.create_identifier(image.url)
    image.last_synced_with_source = timezone.now()
    try:
        image.save()
        log.info("Adding image %s-%s (%s) identifier %s", image.title, image.creator, image.foreign_identifier, image.identifier)
    except IntegrityError as e:
        log.warn(e)
        pass
    return image
项目:blender-addons    作者:scorpion81    | 项目源码 | 文件源码
def execute(self, context):
        import html.parser
        import urllib.request

        remote_platforms = []

        ps = context.scene.ge_publish_settings

        # create lib folder if not already available
        lib_path = bpy.path.abspath(ps.lib_path)
        if not os.path.exists(lib_path):
            os.makedirs(lib_path)

        print("Retrieving list of platforms from blender.org...", end=" ", flush=True)

        class AnchorParser(html.parser.HTMLParser):
            def handle_starttag(self, tag, attrs):
                if tag == 'a':
                    for key, value in attrs:
                        if key == 'href' and value.startswith('blender'):
                            remote_platforms.append(value)

        url = 'http://download.blender.org/release/Blender' + bpy.app.version_string.split()[0]
        parser = AnchorParser()
        data = urllib.request.urlopen(url).read()
        parser.feed(str(data))

        print("done", flush=True)

        print("Downloading files (this will take a while depending on your internet connection speed).", flush=True)
        for i in remote_platforms:
            src = '/'.join((url, i))
            dst = os.path.join(lib_path, i)

            dst_dir = '.'.join([i for i in dst.split('.') if i not in {'zip', 'tar', 'bz2'}])
            if not os.path.exists(dst) and not os.path.exists(dst.split('.')[0]):
                print("Downloading " + src + "...", end=" ", flush=True)
                urllib.request.urlretrieve(src, dst)
                print("done", flush=True)
            else:
                print("Reusing existing file: " + dst, flush=True)

            print("Unpacking " + dst + "...", end=" ", flush=True)
            if os.path.exists(dst_dir):
                shutil.rmtree(dst_dir)
            shutil.unpack_archive(dst, dst_dir)
            print("done", flush=True)

        print("Creating platform from libs...", flush=True)
        bpy.ops.scene.publish_auto_platforms()
        return {'FINISHED'}