Python HTMLParser.HTMLParser 模块,__init__() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用HTMLParser.HTMLParser.__init__()

项目:GAMADV-XTD    作者:taers232c    | 项目源码 | 文件源码
def __init__(self, f, fieldnames=None, encoding=UTF8, **kwds):
    self.encoding = encoding
    try:
      self.reader = csv.reader(UTF8Recoder(f, encoding) if self.encoding != UTF8 else f, dialect=csv.excel, **kwds)
      if not fieldnames:
        self.fieldnames = self.reader.next()
        if len(self.fieldnames) > 0 and self.fieldnames[0].startswith(codecs.BOM_UTF8):
          self.fieldnames[0] = self.fieldnames[0].replace(codecs.BOM_UTF8, u'', 1)
      else:
        self.fieldnames = fieldnames
    except (csv.Error, StopIteration):
      self.fieldnames = []
    except LookupError as e:
      Cmd.Backup()
      usageErrorExit(e)
    self.numfields = len(self.fieldnames)
项目:DataTree    作者:tvgrabbers    | 项目源码 | 文件源码
def __init__(self, data_def = None, warnaction = "default", warngoal = sys.stderr, caller_id = 0):
        self.tree_lock = RLock()
        with self.tree_lock:
            self.dtc = DataTreeConstants()
            self.known_urlid = (0, 4, 11, 14)
            self.known_linkid = (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)
            self.errorcode = dte.dtDataDefOK
            self.caller_id = caller_id
            self.cdata_def = {}
            self.ddtype = ""
            if sys.modules['DataTreeGrab']._warnings == None:
                sys.modules['DataTreeGrab']._warnings = _Warnings(warnaction, warngoal, caller_id)

            elif caller_id not in sys.modules['DataTreeGrab']._warnings._ids or warnaction != None:
                sys.modules['DataTreeGrab']._warnings.set_warnaction(warnaction, caller_id)

            if isinstance(data_def, dict):
                self.data_def = data_def
                self.convert_data_def()

            else:
                self.data_def = {}
项目:DataTree    作者:tvgrabbers    | 项目源码 | 文件源码
def __init__(self, dtree, data = None, parent = None, key = None):
        self.type = "value"
        self.key = key
        self.keys = []
        self.key_index = {}
        self.value = None
        DATAnode.__init__(self, dtree, parent)
        with self.node_lock:
            if isinstance(data, list):
                self.type = "list"
                for k in range(len(data)):
                    JSONnode(self.dtree, data[k], self, k)

            elif isinstance(data, dict):
                self.type = "dict"
                for k, item in data.items():
                    JSONnode(self.dtree, item, self, k)

            else:
                self.type = "value"
                self.value = data
项目:DataTree    作者:tvgrabbers    | 项目源码 | 文件源码
def __init__(self, data, output = sys.stdout, warnaction = "default", warngoal = sys.stderr, caller_id = 0):
        DATAtree.__init__(self, output, warnaction, warngoal, caller_id)
        with self.tree_lock:
            self.tree_type ='json'
            self.extract_from_parent = True
            self.data = data
            # Read the json data into the tree
            try:
                self.root = JSONnode(self, data, key = 'ROOT')
                self.start_node = self.root

            except:
                self.warn('Unable to parse the JSON data. Invalid dataset!', dtDataWarning, 1)
                self.start_node = NULLnode()

# end JSONtree
项目:DataTree    作者:tvgrabbers    | 项目源码 | 文件源码
def __init__(self, data_def, data = None, warnaction = "default", warngoal = sys.stderr, caller_id = 0):
        self.tree_lock = RLock()
        with self.tree_lock:
            self.dtc = DataTreeConstants()
            self.ddconv = DataDef_Convert(warnaction = warnaction , warngoal = warngoal, caller_id = caller_id)
            self.caller_id = caller_id
            self.print_tags = False
            self.print_searchtree = False
            self.show_result = False
            self.fle = sys.stdout
            if sys.modules['DataTreeGrab']._warnings == None:
                sys.modules['DataTreeGrab']._warnings = _Warnings(warnaction, warngoal, caller_id)

            else:
                sys.modules['DataTreeGrab']._warnings.set_warnaction(warnaction, caller_id)

            self.searchtree = None
            self.timezone = pytz.utc
            self.errorcode = dte.dtDataInvalid
            self.result = []
            self.data_def = None
            self.init_data_def(data_def)
            if data != None:
                self.init_data(data)
项目:true_review_web2py    作者:lucadealfaro    | 项目源码 | 文件源码
def __init__(self, *components, **attributes):
        """
        Args:
            components: any components that should be nested in this element
            attributes: any attributes you want to give to this element

        Raises:
            SyntaxError: when a stand alone tag receives components
        """

        if self.tag[-1:] == '/' and components:
            raise SyntaxError('<%s> tags cannot have components'
                              % self.tag)
        if len(components) == 1 and isinstance(components[0], (list, tuple)):
            self.components = list(components[0])
        else:
            self.components = list(components)
        self.attributes = attributes
        self._fixup()
        # converts special attributes in components attributes
        self.parent = None
        for c in self.components:
            self._setnode(c)
        self._postprocessing()
项目:true_review_web2py    作者:lucadealfaro    | 项目源码 | 文件源码
def __init__(self, data, **args):
        self.data = data
        self.attributes = args
        self.components = []
        if not '_class' in self.attributes:
            self['_class'] = 'web2py-menu web2py-menu-vertical'
        if not 'ul_class' in self.attributes:
            self['ul_class'] = 'web2py-menu-vertical'
        if not 'li_class' in self.attributes:
            self['li_class'] = 'web2py-menu-expand'
        if not 'li_first' in self.attributes:
            self['li_first'] = 'web2py-menu-first'
        if not 'li_last' in self.attributes:
            self['li_last'] = 'web2py-menu-last'
        if not 'li_active' in self.attributes:
            self['li_active'] = 'web2py-menu-active'
        if not 'mobile' in self.attributes:
            self['mobile'] = False
项目:true_review_web2py    作者:lucadealfaro    | 项目源码 | 文件源码
def __init__(self,
                 text, extra=None, allowed=None, sep='p',
                 url=None, environment=None, latex='google',
                 autolinks='default',
                 protolinks='default',
                 class_prefix='',
                 id_prefix='markmin_',
                 **kwargs):
        self.text = text
        self.extra = extra or {}
        self.allowed = allowed or {}
        self.sep = sep
        self.url = URL if url == True else url
        self.environment = environment
        self.latex = latex
        self.autolinks = autolinks
        self.protolinks = protolinks
        self.class_prefix = class_prefix
        self.id_prefix = id_prefix
        self.kwargs = kwargs
项目:spc    作者:whbrewer    | 项目源码 | 文件源码
def __init__(self, *components, **attributes):
        """
        :param *components: any components that should be nested in this element
        :param **attributes: any attributes you want to give to this element

        :raises SyntaxError: when a stand alone tag receives components
        """

        if self.tag[-1:] == '/' and components:
            raise SyntaxError('<%s> tags cannot have components'
                              % self.tag)
        if len(components) == 1 and isinstance(components[0], (list, tuple)):
            self.components = list(components[0])
        else:
            self.components = list(components)
        self.attributes = attributes
        self._fixup()
        # converts special attributes in components attributes
        self.parent = None
        for c in self.components:
            self._setnode(c)
        self._postprocessing()
项目:spc    作者:whbrewer    | 项目源码 | 文件源码
def __init__(self, data, **args):
        self.data = data
        self.attributes = args
        self.components = []
        if not '_class' in self.attributes:
            self['_class'] = 'web2py-menu web2py-menu-vertical'
        if not 'ul_class' in self.attributes:
            self['ul_class'] = 'web2py-menu-vertical'
        if not 'li_class' in self.attributes:
            self['li_class'] = 'web2py-menu-expand'
        if not 'li_first' in self.attributes:
            self['li_first'] = 'web2py-menu-first'
        if not 'li_last' in self.attributes:
            self['li_last'] = 'web2py-menu-last'
        if not 'li_active' in self.attributes:
            self['li_active'] = 'web2py-menu-active'
        if not 'mobile' in self.attributes:
            self['mobile'] = False
项目:spc    作者:whbrewer    | 项目源码 | 文件源码
def __init__(self, text, extra=None, allowed=None, sep='p',
                 url=None, environment=None, latex='google',
                 autolinks='default',
                 protolinks='default',
                 class_prefix='',
                 id_prefix='markmin_'):
        self.text = text
        self.extra = extra or {}
        self.allowed = allowed or {}
        self.sep = sep
        self.url = URL if url == True else url
        self.environment = environment
        self.latex = latex
        self.autolinks = autolinks
        self.protolinks = protolinks
        self.class_prefix = class_prefix
        self.id_prefix = id_prefix
项目:Problematica-public    作者:TechMaz    | 项目源码 | 文件源码
def __init__(self, *components, **attributes):
        """
        Args:
            components: any components that should be nested in this element
            attributes: any attributes you want to give to this element

        Raises:
            SyntaxError: when a stand alone tag receives components
        """

        if self.tag[-1:] == '/' and components:
            raise SyntaxError('<%s> tags cannot have components'
                              % self.tag)
        if len(components) == 1 and isinstance(components[0], (list, tuple)):
            self.components = list(components[0])
        else:
            self.components = list(components)
        self.attributes = attributes
        self._fixup()
        # converts special attributes in components attributes
        self.parent = None
        for c in self.components:
            self._setnode(c)
        self._postprocessing()
项目:Problematica-public    作者:TechMaz    | 项目源码 | 文件源码
def __init__(self, data, **args):
        self.data = data
        self.attributes = args
        self.components = []
        if '_class' not in self.attributes:
            self['_class'] = 'web2py-menu web2py-menu-vertical'
        if 'ul_class' not in self.attributes:
            self['ul_class'] = 'web2py-menu-vertical'
        if 'li_class' not in self.attributes:
            self['li_class'] = 'web2py-menu-expand'
        if 'li_first' not in self.attributes:
            self['li_first'] = 'web2py-menu-first'
        if 'li_last' not in self.attributes:
            self['li_last'] = 'web2py-menu-last'
        if 'li_active' not in self.attributes:
            self['li_active'] = 'web2py-menu-active'
        if 'mobile' not in self.attributes:
            self['mobile'] = False
项目:Problematica-public    作者:TechMaz    | 项目源码 | 文件源码
def __init__(self,
                 text, extra=None, allowed=None, sep='p',
                 url=None, environment=None, latex='google',
                 autolinks='default',
                 protolinks='default',
                 class_prefix='',
                 id_prefix='markmin_',
                 **kwargs):
        self.text = text
        self.extra = extra or {}
        self.allowed = allowed or {}
        self.sep = sep
        self.url = URL if url is True else url
        self.environment = environment
        self.latex = latex
        self.autolinks = autolinks
        self.protolinks = protolinks
        self.class_prefix = class_prefix
        self.id_prefix = id_prefix
        self.kwargs = kwargs
项目:microscan    作者:MiniSafe    | 项目源码 | 文件源码
def parseLink(html=""):
    from HTMLParser import HTMLParser

    class Html(HTMLParser):
        def __init__(self):
            HTMLParser.__init__(self)
            self.result = set()

        def getResult(self):
            return self.result

        def handle_startendtag(self, tag, attrs):
            self.handle_starttag(tag, attrs)
            self.handle_endtag(tag)

        def handle_starttag(self, tag, attrs):
            if tag == 'a':
                for key, value in attrs:
                    if key == "href":
                        self.result.add(value)

    h = Html()
    h.feed(html)
    return h.result
项目:veripress    作者:veripress    | 项目源码 | 文件源码
def __init__(self):
        """Initialize attributes."""
        if sys.version.startswith('3.'):
            # Python 3.x
            super().__init__(convert_charrefs=False)
        else:
            # use HTMLParser.__init__ because HTMLParser is an 'old' style class, which cannot be passed to super()
            # see http://codependentcodr.blogspot.com/2012/02/python-htmlparser-and-super.html
            HTMLParser.__init__(self)

        self._root = _HtmlHeaderNode(level=0)  # root node with no data of itself, only 'children' matters
        self._curr_node = self._root  # most recently handled header node
        self._in_header = False
        self._header_id_count = {}  # record header ids to avoid collisions
        self._html = ''  # full HTML string parsed
        self._temp_start_tag = ''  # temporary HTML start tag of this current header node
项目:search-plugins    作者:qbittorrent    | 项目源码 | 文件源码
def __init__(self, results, url):
            HTMLParser.__init__(self)
            self.results = results
            self.url = url
            self.current_item = {} # One torrent result
            self.add_query = True
            self.torrent_info_index = 0 # Count of the meta data encountered
            self.torrent_info_array = []
            self.meta_data_grabbing = 0
            self.meta_data_array = []
            self.torrent_no_files = 0
            self.torrent_date_added = 0
            self.torrent_popularity = 0
            self.mangnet_link = ""
            self.desc_link = ""
            self.torrent_name = ""
项目:slugiot-client    作者:slugiot    | 项目源码 | 文件源码
def __init__(self, *components, **attributes):
        """
        Args:
            components: any components that should be nested in this element
            attributes: any attributes you want to give to this element

        Raises:
            SyntaxError: when a stand alone tag receives components
        """

        if self.tag[-1:] == '/' and components:
            raise SyntaxError('<%s> tags cannot have components'
                              % self.tag)
        if len(components) == 1 and isinstance(components[0], (list, tuple)):
            self.components = list(components[0])
        else:
            self.components = list(components)
        self.attributes = attributes
        self._fixup()
        # converts special attributes in components attributes
        self.parent = None
        for c in self.components:
            self._setnode(c)
        self._postprocessing()
项目:slugiot-client    作者:slugiot    | 项目源码 | 文件源码
def __init__(self, data, **args):
        self.data = data
        self.attributes = args
        self.components = []
        if not '_class' in self.attributes:
            self['_class'] = 'web2py-menu web2py-menu-vertical'
        if not 'ul_class' in self.attributes:
            self['ul_class'] = 'web2py-menu-vertical'
        if not 'li_class' in self.attributes:
            self['li_class'] = 'web2py-menu-expand'
        if not 'li_first' in self.attributes:
            self['li_first'] = 'web2py-menu-first'
        if not 'li_last' in self.attributes:
            self['li_last'] = 'web2py-menu-last'
        if not 'li_active' in self.attributes:
            self['li_active'] = 'web2py-menu-active'
        if not 'mobile' in self.attributes:
            self['mobile'] = False
项目:slugiot-client    作者:slugiot    | 项目源码 | 文件源码
def __init__(self,
                 text, extra=None, allowed=None, sep='p',
                 url=None, environment=None, latex='google',
                 autolinks='default',
                 protolinks='default',
                 class_prefix='',
                 id_prefix='markmin_',
                 **kwargs):
        self.text = text
        self.extra = extra or {}
        self.allowed = allowed or {}
        self.sep = sep
        self.url = URL if url == True else url
        self.environment = environment
        self.latex = latex
        self.autolinks = autolinks
        self.protolinks = protolinks
        self.class_prefix = class_prefix
        self.id_prefix = id_prefix
        self.kwargs = kwargs
项目:grasp    作者:textgain    | 项目源码 | 文件源码
def __init__(self, model, label, data=[]):
        """ Returns a new Model calibrated on the given data,
            which is a set of (vector, label)-tuples.
        """
        self._model = model
        self._label = label
        # Isotonic regression:
        y = ((model.predict(v)[label], label == x) for v, x in data)
        y = sorted(y) # monotonic
        y = zip(*y)
        y = list(y or ((),()))
        x = list(y[0])
        y = list(y[1])
        y = pav(y)
        x = [0] + x + [1]
        y = [0] + y + [1]
        f = {}
        i = 0
        # Linear interpolation:
        for p in range(100 + 1):
            p *= 0.01
            while x[i] < p:
                i += 1
            f[p] = (y[i-1] * (x[i] - p) + y[i] * (p - x[i-1])) / (x[i] - x[i-1])
        self._f = f
项目:grasp    作者:textgain    | 项目源码 | 文件源码
def __init__(self, path='WordNet-3.0'):
        """ Opens the WordNet database from the given path 
            (that contains dict/index.noun, dict/data.noun, ...)
        """
        self._f = {} # {'n': <open file 'dict/index.noun'>}

        for k, v in (('n', 'noun'), ('v', 'verb'), ('a', 'adj' ), ('r', 'adv' )):

            f = cd(path, 'dict',  'data.%s' % v)
            f = open(f, 'rb')
            self._f[k] = f

            f = cd(path, 'dict', 'index.%s' % v)
            f = open(f, 'r')
            for s in f:
                if not s.startswith(' '):
                    s = s.strip()
                    s = s.split(' ')
                    p = s[-int(s[2]):]
                    w = s[0]
                    w = w.replace('_', ' ')
                    self[w, k] = p # {('grasp', 'n'): (offset1, ...)}
            f.close()
项目:smartschool    作者:asifkodur    | 项目源码 | 文件源码
def __init__(self):
        HTMLParser.__init__(self)

        self.F_DATA=[]   #KEEPS WHOLE TABLE DATA OF THE WEBPAGE
        self.TABLE=[]
        self.ROW=[]



        self.table_no=0
        self.row_no=0
        self.col_no=0
        self.current_table=0
        self.current_row=0
        self.current_col=0
        self.print_flag=False
        self.col_data=''
        self.pagination=False

        self.first_page=False# sets true only if processing the first page of the output for it is from that
        #page the links of next pages of reports are fetched
        self.page_link_list=[] # Stores total pages found in the first page"
项目:asyncmultitasks    作者:willwinworld    | 项目源码 | 文件源码
def get_links(html):  # ????????
    class URLSeeker(HTMLParser):
        def __init__(self):
            HTMLParser.__init__(self)  # ?? ???super.__init__(self)
            self.urls = []

        def handle_starttag(self, tag, attrs):
            href = dict(attrs).get('href')
            if href and tag == 'a':
                self.urls.append(href)

    url_seeker = URLSeeker()
    url_seeker.feed(html)
    print('@@'*20)
    print(url_seeker.urls)
    print('@@'*20)
    return url_seeker.urls  # ?????????
项目:kivy_gosh    作者:mcroni    | 项目源码 | 文件源码
def __init__(self, data):
        """
        The data holds the characters.

        Example:

        html = Html()
        data = '<body><em>alpha</em></body>'
        dom = html.feed(data)
        x = dom.fst('em')
        x.append(Data('\nbeta'))

        It outputs.

        <body ><em >alpha
        beta</em></body>
        """

        Root.__init__(self, DATA)
        self.data = data
项目:GAMADV-X    作者:taers232c    | 项目源码 | 文件源码
def __init__(self, f, fieldnames=None, encoding=UTF8, **kwds):
    self.encoding = encoding
    try:
      self.reader = csv.reader(UTF8Recoder(f, encoding) if self.encoding != UTF8 else f, dialect=csv.excel, **kwds)
      if not fieldnames:
        self.fieldnames = self.reader.next()
        if len(self.fieldnames) > 0 and self.fieldnames[0].startswith(codecs.BOM_UTF8):
          self.fieldnames[0] = self.fieldnames[0].replace(codecs.BOM_UTF8, u'', 1)
      else:
        self.fieldnames = fieldnames
    except (csv.Error, StopIteration):
      self.fieldnames = []
    except LookupError as e:
      Cmd.Backup()
      usageErrorExit(e)
    self.numfields = len(self.fieldnames)
项目:Bahubali---DDOS-Toolkit    作者:navanchauhan    | 项目源码 | 文件源码
def __init__(self, parser, name, attrs=None, parent=None,
                 previous=None):
        "Basic constructor."

        # We don't actually store the parser object: that lets extracted
        # chunks be garbage-collected
        self.parserClass = parser.__class__
        self.isSelfClosing = parser.isSelfClosingTag(name)
        self.name = name
        if attrs == None:
            attrs = []
        self.attrs = attrs
        self.contents = []
        self.setup(parent, previous)
        self.hidden = False
        self.containsSubstitutions = False
        self.convertHTMLEntities = parser.convertHTMLEntities
        self.convertXMLEntities = parser.convertXMLEntities
        self.escapeUnrecognizedEntities = parser.escapeUnrecognizedEntities

        def convert(kval):
            "Converts HTML, XML and numeric entities in the attribute value."
            k, val = kval
            if val is None:
                return kval
            return (k, re.sub("&(#\d+|#x[0-9a-fA-F]+|\w+);",
                              self._convertEntities, val))
        self.attrs = map(convert, self.attrs)
项目:Bahubali---DDOS-Toolkit    作者:navanchauhan    | 项目源码 | 文件源码
def __init__(self, name=None, attrs={}, text=None, **kwargs):
        self.name = name
        if isString(attrs):
            kwargs['class'] = attrs
            attrs = None
        if kwargs:
            if attrs:
                attrs = attrs.copy()
                attrs.update(kwargs)
            else:
                attrs = kwargs
        self.attrs = attrs
        self.text = text
项目:Bahubali---DDOS-Toolkit    作者:navanchauhan    | 项目源码 | 文件源码
def __init__(self, source):
        list.__init__([])
        self.source = source

# Now, some helper functions.
项目:Bahubali---DDOS-Toolkit    作者:navanchauhan    | 项目源码 | 文件源码
def __init__(self, soup):
        HTMLParser.__init__(self)
        self.soup = soup

    # We inherit feed() and reset().
项目:Bahubali---DDOS-Toolkit    作者:navanchauhan    | 项目源码 | 文件源码
def reset(self):
        Tag.__init__(self, self, self.ROOT_TAG_NAME)
        self.hidden = 1
        self.builder.reset()
        self.currentData = []
        self.currentTag = None
        self.tagStack = []
        self.quoteStack = []
        self.pushTag(self)
项目:Bahubali---DDOS-Toolkit    作者:navanchauhan    | 项目源码 | 文件源码
def __init__(self, markup, overrideEncodings=[],
                 smartQuotesTo='xml', isHTML=False):
        self.declaredHTMLEncoding = None
        self.markup, documentEncoding, sniffedEncoding = \
                     self._detectEncoding(markup, isHTML)
        self.smartQuotesTo = smartQuotesTo
        self.triedEncodings = []
        if markup == '' or isinstance(markup, unicode):
            self.originalEncoding = None
            self.unicode = unicode(markup)
            return

        u = None
        for proposedEncoding in overrideEncodings:
            u = self._convertFrom(proposedEncoding)
            if u: break
        if not u:
            for proposedEncoding in (documentEncoding, sniffedEncoding):
                u = self._convertFrom(proposedEncoding)
                if u: break

        # If no luck and we have auto-detection library, try that:
        if not u and chardet and not isinstance(self.markup, unicode):
            u = self._convertFrom(chardet.detect(self.markup)['encoding'])

        # As a last resort, try utf-8 and windows-1252:
        if not u:
            for proposed_encoding in ("utf-8", "windows-1252"):
                u = self._convertFrom(proposed_encoding)
                if u: break

        self.unicode = u
        if not u: self.originalEncoding = None
项目:stockcmd    作者:zzzaaa12    | 项目源码 | 文件源码
def __init__(self, argv):
        self.user_stock_list = USER_STOCK_LIST
        self.stock_list = []
        self.query_list = []
        self.stock_query_str = ''
        self.data = []
        self.twse_url = TWSE_SERVER + '/stock/api/getStockInfo.jsp?ex_ch='
        self.json_data = ''
        self.argv = argv

    # append stock in monitor mode
项目:stockcmd    作者:zzzaaa12    | 项目源码 | 文件源码
def __init__(self):
        HTMLParser.__init__(self)
        self.data = []
        self.item_limit = 15
        self.item_index = 0
项目:darkc0de-old-stuff    作者:tuwid    | 项目源码 | 文件源码
def __init__(self, q, start, proxy=None, check=True, callback=None):
        HTMLParser.__init__(self)
        self.__q = quote(q)
        self.__start = int(start)
        self.__s = 0
        self.__callback = callback
        self.__check = check
        self.__buffer = ""
        self.__proxy = proxy
项目:darkc0de-old-stuff    作者:tuwid    | 项目源码 | 文件源码
def __init__(self, ldomain, scandpth, lps):
        HTMLParser.__init__(self)
        self.url = ldomain
        self.db = {self.url: 1}
        self.node = [self.url]

        self.depth = scandpth 
        self.max_span = lps 
        self.links_found = 0
项目:GAMADV-XTD    作者:taers232c    | 项目源码 | 文件源码
def __init__(self):
    HTMLParser.__init__(self)
    self.__text = []
项目:GAMADV-XTD    作者:taers232c    | 项目源码 | 文件源码
def __init__(self, f, encoding):
    self.reader = codecs.getreader(encoding)(f)
项目:GAMADV-XTD    作者:taers232c    | 项目源码 | 文件源码
def __init__(self, f, encoding, **kwds):
    # Redirect output to a queue
    import cStringIO
    self.queue = cStringIO.StringIO()
    self.writer = csv.writer(self.queue, **kwds)
    self.stream = f
    self.encoding = encoding
    self.encoder = codecs.getincrementalencoder(self.encoding)()
项目:GAMADV-XTD    作者:taers232c    | 项目源码 | 文件源码
def __init__(self, f, fieldnames, encoding, **kwds):
    super(UnicodeDictWriter, self).__init__(f, fieldnames, **kwds)
    self.writer = UnicodeWriter(f, encoding, **kwds)

# Open a CSV file, get optional arguments [charset <String>] [columndelimiter <Character>] [quotechar <Character>] [fields <FieldNameList>]
项目:wikilinks    作者:trovdimi    | 项目源码 | 文件源码
def __init__(self):
        HTMLParser.__init__(self)
        self.fed = []
        self.fed_text = None
        self.table_counter = 0
项目:wikilinks    作者:trovdimi    | 项目源码 | 文件源码
def __init__(self):
        HTMLParser.__init__(self)
        self.fed = []
        self.fed_in_section = []
        self.fed_text = None
        self.section_found = False
        self.section_name = False
        self.table_counter = 0
        self.lead_found = False
        self.tracking_link = False
        self.tracking_see_also = False
        self.navbox_counter = 0
        self.in_section = False
        # self.paragraph_found = False
        # self.paragraph_counter = 0
项目:mooder    作者:phith0n    | 项目源码 | 文件源码
def __init__(self, allows = []):
        HTMLParser.__init__(self)
        self.allow_tags = allows if allows else self.allow_tags
        self.result = []
        self.start = []
        self.data = []
项目:xiaodi    作者:shenaishiren    | 项目源码 | 文件源码
def __init__(self, allows = []):
        HTMLParser.__init__(self)
        self.allow_tags = allows if allows else self.allow_tags
        self.result = []
        self.start = []
        self.data = []
项目:mx    作者:graalvm    | 项目源码 | 文件源码
def __init__(self):
        HTMLParser.__init__(self)
项目:mx    作者:graalvm    | 项目源码 | 文件源码
def __init__(self):
        HTMLParser.__init__(self)
        self.files = []
项目:DataTree    作者:tvgrabbers    | 项目源码 | 文件源码
def __init__(self, warnaction = None, warngoal = sys.stderr, caller_id = 0):
        self.warn_lock = RLock()
        self.onceregistry = {}
        self.filters = []
        self._ids = []
        if not caller_id in self._ids:
            self._ids.append(caller_id)
        self.warngoal = warngoal
        if warnaction == None:
            warnaction = "default"

        self.set_warnaction(warnaction, caller_id)
项目:DataTree    作者:tvgrabbers    | 项目源码 | 文件源码
def __init__(self, dtree, parent = None):
        self.node_lock = RLock()
        with self.node_lock:
            self.dtc = DataTreeConstants()
            self.children = []
            self.dtree = dtree
            self.parent = parent
            self.value = None
            self.child_index = 0
            self.level = 0
            self.links = {}
            self.links["values"] = {}
            self.links["nodes"] = {}
            self.end_links = {}
            self.end_links["values"] = {}
            self.end_links["nodes"] = {}

            self.is_root = bool(self.parent == None)
            n = self
            while not n.is_root:
                n = n.parent

            self.root = n
            if isinstance(parent, DATAnode):
                self.parent.append_child(self)
                self.level = parent.level + 1
项目:DataTree    作者:tvgrabbers    | 项目源码 | 文件源码
def __init__(self, dtree, data = None, parent = None):
        self.tag = u''
        self.text = u''
        self.tail = u''
        self.attributes = {}
        self.attr_names = []
        DATAnode.__init__(self, dtree, parent)
        with self.node_lock:
            if isinstance(data, (str, unicode)):
                self.tag = data.lower().strip()

            elif isinstance(data, list):
                if len(data) > 0:
                    self.tag = data[0].lower().strip()

                if len(data) > 1 and isinstance(data[1], (list, tuple)):
                    for a in data[1]:
                        if isinstance(a[1], (str, unicode)):
                            self.attributes[a[0].lower().strip()] = a[1].strip()

                        else:
                            self.attributes[a[0].lower().strip()] = a[1]

                    if 'class' in self.attributes.keys():
                        self.attr_names.append('class')

                    if 'id' in self.attributes.keys():
                        self.attr_names.append('id')

                    for a in self.attributes.keys():
                        if a not in self.attr_names:
                            self.attr_names.append(a)
项目:DataTree    作者:tvgrabbers    | 项目源码 | 文件源码
def __init__(self, data, autoclose_tags=[], print_tags = False, output = sys.stdout, warnaction = "default", warngoal = sys.stderr, caller_id = 0):
        HTMLParser.__init__(self)
        DATAtree.__init__(self, output, warnaction, warngoal, caller_id)
        with self.tree_lock:
            self.tree_type ='html'
            self.print_tags = print_tags
            self.autoclose_tags = autoclose_tags
            self.is_tail = False
            self.root = HTMLnode(self, 'root')
            self.current_node = self.root
            self.last_node = None
            self.text = u''
            self.open_tags = {}
            self.count_tags(data)
            # read the html page into the tree
            try:
                # Cover for incomplete reads where the essentiel body part is retrieved
                for ctag in ('body', 'BODY', 'html', 'HTML', 'xml', 'XML'):
                    if u'<%s>' % (ctag, ) in data and not u'</%s>' % (ctag, ) in data:
                        data = u'%s</%s>' % (data, ctag)

                self.feed(data)
                self.reset()
                self.start_node = self.root

            except:
                self.warn('Unable to parse the HTML data. Invalid dataset!', dtDataWarning, 1)
                self.start_node = NULLnode()