Python lxml.etree 模块,parse() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用lxml.etree.parse()

项目:dati-ckan-docker    作者:italia    | 项目源码 | 文件源码
def schematron(cls, schema):
        transforms = [
            "xml/schematron/iso_dsdl_include.xsl",
            "xml/schematron/iso_abstract_expand.xsl",
            "xml/schematron/iso_svrl_for_xslt1.xsl",
            ]
        if isinstance(schema, file):
            compiled = etree.parse(schema)
        else:
            compiled = schema
        for filename in transforms:
            with resource_stream(
                    __name__, filename) as stream:
                xform_xml = etree.parse(stream)
                xform = etree.XSLT(xform_xml)
                compiled = xform(compiled)
        return etree.XSLT(compiled)
项目:core-framework    作者:RedhawkSDR    | 项目源码 | 文件源码
def parse(inFileName):
    doc = parsexml_(inFileName)
    rootNode = doc.getroot()
    rootTag, rootClass = get_root_tag(rootNode)
    if rootClass is None:
        rootTag = 'softPkg'
        rootClass = softPkg
    rootObj = rootClass.factory()
    rootObj.build(rootNode)
    # Enable Python to collect the space used by the DOM.
    doc = None
##     sys.stdout.write('<?xml version="1.0" ?>\n')
##     rootObj.export(sys.stdout, 0, name_=rootTag,
##         namespacedef_='',
##         pretty_print=True)
    return rootObj
项目:core-framework    作者:RedhawkSDR    | 项目源码 | 文件源码
def parse(inFileName):
    doc = parsexml_(inFileName)
    rootNode = doc.getroot()
    rootTag, rootClass = get_root_tag(rootNode)
    if rootClass is None:
        rootTag = 'devicepkg'
        rootClass = devicepkg
    rootObj = rootClass.factory()
    rootObj.build(rootNode)
    # Enable Python to collect the space used by the DOM.
    doc = None
##     sys.stdout.write('<?xml version="1.0" ?>\n')
##     rootObj.export(sys.stdout, 0, name_=rootTag,
##         namespacedef_='',
##         pretty_print=True)
    return rootObj
项目:core-framework    作者:RedhawkSDR    | 项目源码 | 文件源码
def parse(inFileName):
    doc = parsexml_(inFileName)
    rootNode = doc.getroot()
    rootTag, rootClass = get_root_tag(rootNode)
    if rootClass is None:
        rootTag = 'deviceconfiguration'
        rootClass = deviceconfiguration
    rootObj = rootClass.factory()
    rootObj.build(rootNode)
    # Enable Python to collect the space used by the DOM.
    doc = None
##     sys.stdout.write('<?xml version="1.0" ?>\n')
##     rootObj.export(sys.stdout, 0, name_=rootTag,
##         namespacedef_='',
##         pretty_print=True)
    return rootObj
项目:core-framework    作者:RedhawkSDR    | 项目源码 | 文件源码
def parse(inFileName):
    doc = parsexml_(inFileName)
    rootNode = doc.getroot()
    rootTag, rootClass = get_root_tag(rootNode)
    if rootClass is None:
        rootTag = 'domainmanagerconfiguration'
        rootClass = domainmanagerconfiguration
    rootObj = rootClass.factory()
    rootObj.build(rootNode)
    # Enable Python to collect the space used by the DOM.
    doc = None
##     sys.stdout.write('<?xml version="1.0" ?>\n')
##     rootObj.export(sys.stdout, 0, name_=rootTag,
##         namespacedef_='',
##         pretty_print=True)
    return rootObj
项目:core-framework    作者:RedhawkSDR    | 项目源码 | 文件源码
def parse(inFileName):
    doc = parsexml_(inFileName)
    rootNode = doc.getroot()
    rootTag, rootClass = get_root_tag(rootNode)
    if rootClass is None:
        rootTag = 'profile'
        rootClass = profile
    rootObj = rootClass.factory()
    rootObj.build(rootNode)
    # Enable Python to collect the space used by the DOM.
    doc = None
##     sys.stdout.write('<?xml version="1.0" ?>\n')
##     rootObj.export(sys.stdout, 0, name_=rootTag,
##         namespacedef_='',
##         pretty_print=True)
    return rootObj
项目:core-framework    作者:RedhawkSDR    | 项目源码 | 文件源码
def parse(inFileName):
    doc = parsexml_(inFileName)
    rootNode = doc.getroot()
    rootTag, rootClass = get_root_tag(rootNode)
    if rootClass is None:
        rootTag = 'softwareassembly'
        rootClass = softwareassembly
    rootObj = rootClass.factory()
    rootObj.build(rootNode)
    # Enable Python to collect the space used by the DOM.
    doc = None
##     sys.stdout.write('<?xml version="1.0" ?>\n')
##     rootObj.export(sys.stdout, 0, name_=rootTag,
##         namespacedef_='',
##         pretty_print=True)
    return rootObj
项目:core-framework    作者:RedhawkSDR    | 项目源码 | 文件源码
def parse(inFileName):
    doc = parsexml_(inFileName)
    rootNode = doc.getroot()
    rootTag, rootClass = get_root_tag(rootNode)
    if rootClass is None:
        rootTag = 'softwarecomponent'
        rootClass = softwarecomponent
    rootObj = rootClass.factory()
    rootObj.build(rootNode)
    # Enable Python to collect the space used by the DOM.
    doc = None
##     sys.stdout.write('<?xml version="1.0" ?>\n')
##     rootObj.export(sys.stdout, 0, name_=rootTag,
##         namespacedef_='',
##         pretty_print=True)
    return rootObj
项目:core-framework    作者:RedhawkSDR    | 项目源码 | 文件源码
def parse(inFileName):
    doc = parsexml_(inFileName)
    rootNode = doc.getroot()
    rootTag, rootClass = get_root_tag(rootNode)
    if rootClass is None:
        rootTag = 'properties'
        rootClass = properties
    rootObj = rootClass.factory()
    rootObj.build(rootNode)
    # Enable Python to collect the space used by the DOM.
    doc = None
##     sys.stdout.write('<?xml version="1.0" ?>\n')
##     rootObj.export(sys.stdout, 0, name_=rootTag,
##         namespacedef_='',
##         pretty_print=True)
    return rootObj
项目:aniwall    作者:worron    | 项目源码 | 文件源码
def _load_image_data(self, file_, source):
        """Read image settings from SVG tags"""
        tree = etree.parse(source, self.parser)
        root = tree.getroot()
        xhtml = "{%s}" % root.nsmap[None]

        imagedata = ImageData(file_, tree)

        transform_tag = root.find(".//%s*[@id='transform']" % xhtml)
        imagedata.set_transform(transform_tag)

        background_tag = root.find(".//%s*[@id='background']" % xhtml)
        imagedata.set_background(background_tag)

        counter = count(1)
        while True:
            index = next(counter)
            id_ = "color" + str(index)
            tag = root.find(".//%s*[@id='%s']" % (xhtml, id_))
            if tag is None:
                break
            imagedata.set_color(tag, id_)

        return imagedata
项目:ssbio    作者:SBRG    | 项目源码 | 文件源码
def pdb_chain_stoichiometry_biomolone(pdbid):
    """Get the stoichiometry of the chains in biological assembly 1 as a dictionary.

    Steps taken are:
    1) Download PDB and parse header, make biomolecule if provided
    2) Count how many times each chain appears in biomolecule #1
    3) Convert chain id to uniprot id
    4) Return final dictionary

    Args:
        pdbid (str): 4 character PDB ID

    Returns:
        dict: {(ChainID,UniProtID): # occurences}
    """
    pass
项目:PyGenAlg    作者:RaphDeau    | 项目源码 | 文件源码
def addParamFile(cls, paramsFile):
        paramXmlRoot = etree.parse(paramsFile)
        for param in paramXmlRoot.getroot():
            if param.tag == "Parameter":
                paramLabel = param.get("name")
                exec("cls."+paramLabel.upper()+"_LABEL=\""+paramLabel+"\"")
                for element in param:
                    if element.tag == "Check_Method":
                        exec(element.text)
                        methodName = element.text.split(' ')[1].split('(')[0]
                        exec("cls."+paramLabel.upper()+"_"+element.tag.upper()+"="+methodName)
                    elif element.tag != "NeededAttributes":
                        exec("cls."+paramLabel.upper()+"_"+element.tag.upper()+"="+element.text)
                    else:
                        for attribute in element:
                            if attribute.tag == "Attribute":
                                exec("cls."+attribute.get("name")+"="+attribute.text)
                if paramLabel not in cls.ALL_PARAMS:
                    cls.ALL_PARAMS.append(paramLabel)
                else:
                    raise PYGA_ParametersError("ERROR: Parameter " + unicode(paramLabel) + " defined twice.")
项目:EventStoryLine    作者:tommasoc80    | 项目源码 | 文件源码
def produce_output(inputf, outfile):

    ecbplus = etree.parse(inputf, etree.XMLParser(remove_blank_text=True))
    root_ecbplus = ecbplus.getroot()
    root_ecbplus.getchildren()

    event_mentions = extract_event_CAT(ecbplus)
    event_per_sentence = event_sentence(ecbplus, event_mentions)
    event_pairs = generate_event_pairs(event_per_sentence)

#    print(event_mentions)
    for k, v in event_pairs.items():
        for i in v:
            output = open(outfile, "a")
            output.writelines("_".join(event_mentions[i[0]]) + "\t" + "_".join(event_mentions[i[1]]) + "\tPRECONDITION"  + "\n")
            output.close()
项目:llk    作者:Tycx2ry    | 项目源码 | 文件源码
def __init__(self, file_like) :
        parser = etree.XMLParser(ns_clean=True)
        tree = etree.parse(file_like, parser)
        gexf_xml = tree.getroot()
        tag = self.ns_clean(gexf_xml.tag).lower()
        if tag <> "gexf" :
            self.msg_unexpected_tag("gexf", tag)
            return
        self.gexf_obj = None
        for child in gexf_xml :
            tag = self.ns_clean(child.tag).lower()
            if tag == "meta" :
                meta_xml = child
                self.gexf_obj = self.extract_gexf_obj(meta_xml)
            if tag == "graph" :
                graph_xml = child
                if self.gexf_obj == None :
                    self.msg_unexpected_tag("meta", tag)
                    return
                self.graph_obj = self.extract_graph_obj(graph_xml)
项目:xmlschema    作者:brunato    | 项目源码 | 文件源码
def test_to_dict_from_etree(self):
        vh_xml_tree = _ElementTree.parse('examples/vehicles/vehicles.xml')
        col_xml_tree = _ElementTree.parse('examples/collection/collection.xml')

        xml_dict = self.vh_schema.to_dict(vh_xml_tree)
        self.assertNotEqual(xml_dict, _VEHICLES_DICT)  # XSI namespace unmapped

        xml_dict = self.vh_schema.to_dict(vh_xml_tree, namespaces=self.namespaces)
        self.assertEqual(xml_dict, _VEHICLES_DICT)

        xml_dict = xmlschema.to_dict(vh_xml_tree, self.vh_schema.url, namespaces=self.namespaces)
        self.assertEqual(xml_dict, _VEHICLES_DICT)

        xml_dict = self.col_schema.to_dict(col_xml_tree)
        self.assertNotEqual(xml_dict, _COLLECTION_DICT)

        xml_dict = self.col_schema.to_dict(col_xml_tree, namespaces=self.namespaces)
        self.assertEqual(xml_dict, _COLLECTION_DICT)

        xml_dict = xmlschema.to_dict(col_xml_tree, self.col_schema.url, namespaces=self.namespaces)
        self.assertEqual(xml_dict, _COLLECTION_DICT)
项目:health-stats    作者:ex-nerd    | 项目源码 | 文件源码
def parse_log(self, path):
        session = DBSession()

        # This file is big enough (and compressed) that we might as well just parse
        # it once and worry about saving memory if/when that becomes an issue.
        hk_events = []
        with ZipFile(path, 'r') as zfile:
            xfile = zfile.open('apple_health_export/export.xml')
            tree = etree.parse(xfile)
            root = tree.getroot()
            for rnum, record in enumerate(root.iterfind('.//Record'), start=1):
                event = self.parse_record(record)
                if event:
                    hk_events.append(event)

        # find earliest/latest and delete any existing rows from this range
        times = [e.time for e in hk_events]
        self._flush_old_data(session, self.SOURCE, min(times), max(times))
        session.commit()

        # Now we can restart the csv reader to actually load the data
        for event in hk_events:
            session.merge(event)
        print("Adding {} events".format(len(hk_events)))
        session.commit()
项目:simLAB    作者:kamwar    | 项目源码 | 文件源码
def readXml(self, simType):
        path = os.path.dirname(__file__)
        if simType == types.TYPE_USIM:
            path = os.path.join(path, "sim_files_3g.xml")
        else:
            path = os.path.join(path, "sim_files_2g.xml")
        tree = etree.ElementTree()
        if not os.path.exists(path):
            logging.warning("File %s not exists" %path)
            logging.info("Create xml")
            if simType == types.TYPE_USIM:
                root = etree.Element('sim_3G')
            else:
                root = etree.Element('sim_2G')
        else:
            parser = etree.XMLParser(remove_blank_text=True)
            root = etree.parse(path, parser).getroot()
        return path, root
项目:PySIGNFe    作者:thiagopena    | 项目源码 | 文件源码
def _le_xml(self, arquivo):
        if arquivo is None:
            return False

        if not isinstance(arquivo, basestring):
            arquivo = etree.tounicode(arquivo)

        if arquivo is not None:
            if isinstance(arquivo, basestring): 
                if NAMESPACE_NFSE in arquivo:
                    arquivo = por_acentos(arquivo)
                if u'<' in arquivo:
                    self._xml = etree.fromstring(tira_abertura(arquivo))
                else:
                    arq = open(arquivo)
                    txt = ''.join(arq.readlines())
                    txt = tira_abertura(txt)
                    arq.close()
                    self._xml = etree.fromstring(txt)
            else:
                self._xml = etree.parse(arquivo)
            return True

        return False
项目:PySIGNFe    作者:thiagopena    | 项目源码 | 文件源码
def validar(self):
        arquivo_esquema = self.caminho_esquema + self.arquivo_esquema

        # Aqui é importante remover a declaração do encoding
        # para evitar erros de conversão unicode para ascii
        xml = tira_abertura(self.xml).encode(u'utf-8')

        esquema = etree.XMLSchema(etree.parse(arquivo_esquema))

        if not esquema.validate(etree.fromstring(xml)):
            for e in esquema.error_log:
                if e.level == 1:
                    self.alertas.append(e.message.replace('{http://www.portalfiscal.inf.br/nfe}', ''))
                elif e.level == 2:
                    self.erros.append(e.message.replace('{http://www.portalfiscal.inf.br/nfe}', ''))

        return esquema.error_log
项目:spiderfoot    作者:wi-fi-analyzer    | 项目源码 | 文件源码
def __init__(self, file_like) :
        parser = etree.XMLParser(ns_clean=True)
        tree = etree.parse(file_like, parser)
        gexf_xml = tree.getroot()
        tag = self.ns_clean(gexf_xml.tag).lower()
        if tag <> "gexf" :
            self.msg_unexpected_tag("gexf", tag)
            return
        self.gexf_obj = None
        for child in gexf_xml :
            tag = self.ns_clean(child.tag).lower()
            if tag == "meta" :
                meta_xml = child
                self.gexf_obj = self.extract_gexf_obj(meta_xml)
            if tag == "graph" :
                graph_xml = child
                if self.gexf_obj == None :
                    self.msg_unexpected_tag("meta", tag)
                    return
                self.graph_obj = self.extract_graph_obj(graph_xml)
项目:warriorframework    作者:warriorframework    | 项目源码 | 文件源码
def xml_to_xsd_validation(file_xml, file_xsd):
        """ Verify that the XML compliance with XSD
        Arguments:
            1. file_xml: Input xml file
            2. file_xsd: xsd file which needs to be validated against xml
        Return:
            No return value
        """
        try:
            print_info("Validating:{0}".format(file_xml))
            print_info("xsd_file:{0}".format(file_xsd))
            xml_doc = parse(file_xml)
            xsd_doc = parse(file_xsd)
            xmlschema = XMLSchema(xsd_doc)
            xmlschema.assert_(xml_doc)
            return True

        except XMLSyntaxError as err:
            print_error("PARSING ERROR:{0}".format(err))
            return False

        except AssertionError, err:
            print_error("Incorrect XML schema: {0}".format(err))
            return False
项目:warriorframework    作者:warriorframework    | 项目源码 | 文件源码
def getChildTextbyParentAttribute (datafile, pnode, patt, pattval, cnode):
    """
    Seraches XML file for the parent node with a specific value. Finds the child node and returns
    its text
    datafile = xml file searched
    pnode = parent node
    patt = parent node attribute
    patval = parent node attribute value
    cnode = child node
    """
    tree = ElementTree.parse(datafile)
    root = tree.getroot()
    value = False
    for node in root.findall(pnode):
        attribute = node.get(patt)
        if attribute == pattval:
            cnode = node.find(cnode)
            if cnode is not None:
                value = cnode.text
            else:
                return None
            break
    return value
项目:warriorframework    作者:warriorframework    | 项目源码 | 文件源码
def getChildTextbyParentTag (datafile, pnode, cnode):
    """
    Seraches XML file for the first parent. Finds the child node and returns its text
    datafile = xml file searched
    pnode = parent node
    cnode = child node
    """
    value = False
    tree = ElementTree.parse(datafile)
    root = tree.getroot()
    node = root.find(pnode)
    if node is not None:
        child = node.find(cnode)
        if child is not None:
            value = child.text
            return value
        else:
            # print_info("could not find cnode under the given pnode")
            return value
    else:
        # print_info("could not find pnode in the provided file")
        return value
项目:warriorframework    作者:warriorframework    | 项目源码 | 文件源码
def getChildTextbyOtherChild (datafile, pnode, cnode, cvalue, rnode):
    """
    Searches XML file for the parent node. Finds the 1st child node and checks its value
    if value is a match, then search for second child and return its value
    datafile = xml file searched
    pnode = parent node
    cnode = child node
    cvalue = child node value
    rnode = reference node or False if doesn't exist
    """
    tree = ElementTree.parse(datafile)
    root = tree.getroot()
    rnodev = False

    for node in root.findall(pnode):
        value = node.find(cnode).text
        if value == cvalue:
            # print_debug("-D- rnode: '%s'" % rnode)
            if node.find(rnode) is not None:
                rnodev = node.find(rnode).text
                break
    return rnodev
项目:warriorframework    作者:warriorframework    | 项目源码 | 文件源码
def verifyParentandChildrenMatch (datafile, pnode, cnode, cvalue, rnode, rvalue):
    """
    Searches XML file for the parent node. Finds the 1st child node and checks its value
    if value is a match, then search for second child and check if its value matches
    datafile = xml file searched
    pnode = parent node
    cnode = child node
    cvalue = child node value
    rnode = reference node
    rvalue = refernce node value
    """
    tree = ElementTree.parse(datafile)
    root = tree.getroot()
    status = False
    for node in root.findall(pnode):
        value = node.find(cnode).text
        if value == cvalue:

            if node.find(rnode) is not None:
                cnodev = node.find(rnode).text
                # print_debug("-D- cnodev: '%s', rvalue : '%s'" % (cnodev, rvalue))
                if cnodev == rvalue:
                    # print_debug("-D- BREAK END METHOD verifyParentandChildrenMatch_Status '%s'" % status)
                    return True
    return status
项目:warriorframework    作者:warriorframework    | 项目源码 | 文件源码
def getElementsListWithTagAttribValueMatch(datafile, tag, attrib, value):
    """
    This method takes an xml document as input and finds all the sub elements (parent/children)
    containing specified tag and an attribute with the specified value.

    Returns a list of matching elements.

    Arguments:
    datafile = input xml file to be parsed.
    tag = tag value of the sub-element(parent/child) to be searched for.
    attrib = attribute name for  the sub-element with above given tag should have.
    value = attribute value that the sub-element with above given tag, attribute should have.
    """
    element_list = []
    root = ElementTree.parse(datafile).getroot()
    for element in root.iterfind(".//%s[@%s='%s']" % (tag, attrib, value)):
        element_list.append(element)
    return element_list
项目:warriorframework    作者:warriorframework    | 项目源码 | 文件源码
def getElementListWithSpecificXpath(datafile, xpath):
    """
    This method takes an xml document as input and finds all the sub elements (parent/children)
    containing specified xpath

    Returns a list of matching elements.

    Arguments:
    parent = parent element
    xpath = a valid xml path value as supported by python, refer https://docs.python.org/2/library/xml.etree.elementtree.html
    """
    element_list = []
    root = ElementTree.parse(datafile).getroot()
    for element in root.iterfind(xpath):
        element_list.append(element)
    return element_list
项目:warriorframework    作者:warriorframework    | 项目源码 | 文件源码
def getConfigElementTextWithSpecificXpath(datafile, xpath):
    """
    This method takes an xml document as input and finds the first sub element (parent/children)
    containing specified xpath which should be a filepath to a netconf config file

    Returns the element text attribute

    Arguments:
    parent = parent element
    xpath = a valid xml path value as supported by python, refer https://docs.python.org/2/library/xml.etree.elementtree.html
    """
    root = ElementTree.parse(datafile).getroot()
    elem1 = root.find(xpath).text

    elem2_root = ElementTree.parse(elem1)
    elem2 = elem2_root.find('config')
    elem2_string = ElementTree.tostring(elem2)
    return elem2_string
项目:warriorframework    作者:warriorframework    | 项目源码 | 文件源码
def getChildElementWithSpecificXpath(start, xpath):
    """
    This method takes a xml file or parent element as input and finds the first child
    containing specified xpath

    Returns the child element.

    Arguments:
    start = xml file or parent element
    xpath = a valid xml path value as supported by python, refer https://docs.python.org/2/library/xml.etree.elementtree.html
    """
    node = False
    if isinstance(start, (file, str)):
        # check if file exist here
        if file_Utils.fileExists(start):
            node = ElementTree.parse(start).getroot()
        else:
            print_warning('The file={0} is not found.'.format(start))
    elif isinstance(start, ElementTree.Element):
        node = start
    if node is not False or node is not None:
        element = node.find(xpath)
    else:
        element = False
    return element
项目:warriorframework    作者:warriorframework    | 项目源码 | 文件源码
def get_element_by_attribute(xml_file, tag_name, attr_name, attr_value):
    """
    Gets the element with matching tag_name, attribute name and attribute value
    """
    element= ""
    doc = minidom.parse(xml_file)
    element_list = doc.getElementsByTagName(tag_name)
    found = "No"
    for element in element_list:
        if element.getAttribute(attr_name) == attr_value:
            found = "Yes"
            break
    if found == "Yes":
        return element
    else:
        return False
项目:warriorframework    作者:warriorframework    | 项目源码 | 文件源码
def del_tags_from_xml(xml, tag_list=[]):
    """
        It deletes the tags either by their names or xpath

        Arguments:
            1.xml: It takes xml file path or xml string as input
            2.tag_list: It contains list of tags which needs to be removed
        Returns:
            It returns xml string
    """
    if os.path.exists(xml):
        tree = ElementTree.parse(xml)
        root = tree.getroot()
    else:
        root = ElementTree.fromstring(xml)
    for tag in tag_list:
        if 'xpath=' in tag:
            tag = tag.strip('xpath=')
            req_tags = getChildElementsListWithSpecificXpath(root, tag)
        else:
            req_tags = getChildElementsListWithSpecificXpath(root, ".//{0}".format(tag))
        recursive_delete_among_children(root, req_tags)

    xml_string = ElementTree.tostring(root, encoding='utf-8', method='xml')
    return xml_string
项目:warriorframework    作者:warriorframework    | 项目源码 | 文件源码
def convert_xml_to_list_of_dict(file_name):
    """
        Takes xml file path as input and
        converts to list of dictionaries
        Arguments:
            file_name : It takes xml file path as input
        Returns:
            list_of_dict: list of dictionaries where keys
            are tag names and values are respective text of the tag.
    """
    tree = ElementTree.parse(file_name)
    root = tree.getroot()
    list_of_dict = []
    for child in root:
        subchild_dict = OrderedDict()
        for subchild in child:
            subchild_dict[subchild.tag] = subchild.text
        list_of_dict.append(subchild_dict)

    return list_of_dict

#2016/06/22 ymizugaki add begin
项目:PcbDraw    作者:yaqwsx    | 项目源码 | 文件源码
def extract_svg_content(filename):
    prefix = unique_prefix() + "_"
    root = etree.parse(filename).getroot()
    # We have to ensure all Ids in SVG are unique. Let's make it nasty by
    # collecting all ids and doing search & replace
    # Potentially dangerous (can break user text)
    ids = []
    for el in root.getiterator():
        if "id" in el.attrib and el.attrib["id"] != "origin":
            ids.append(el.attrib["id"])
    with open(filename) as f:
        content = f.read()
    for i in ids:
        content = content.replace("#"+i, "#" + prefix + i)
    root = etree.fromstring(content)
    # Remove SVG namespace to ease our lifes and change ids
    for el in root.getiterator():
        if "id" in el.attrib and el.attrib["id"] != "origin":
            el.attrib["id"] = prefix + el.attrib["id"]
        if '}' in str(el.tag):
            el.tag = el.tag.split('}', 1)[1]
    return [ x for x in root if x.tag and x.tag not in ["title", "desc"]]
项目:ceiba-dl    作者:lantw44    | 项目源码 | 文件源码
def file(self, path, output, args={}, progress_callback=lambda *x: None):
        self.logger.debug('??????????')
        self.web_cache[path] = dict(args)
        url = urllib.parse.urljoin(self.file_url, urllib.parse.quote(path))
        if len(args) > 0:
            url += '?' + urllib.parse.urlencode(args)
        self.logger.debug('HTTP ?????{}'.format(url))
        self.curl.setopt(pycurl.URL, url)
        self.curl.setopt(pycurl.COOKIE, self.web_cookie)
        self.curl.setopt(pycurl.NOBODY, False)
        self.curl.setopt(pycurl.NOPROGRESS, False)
        self.curl.setopt(pycurl.WRITEDATA, output)
        self.curl.setopt(pycurl.HEADERFUNCTION, lambda *x: None)
        self.curl.setopt(pycurl.XFERINFOFUNCTION, progress_callback)
        self.curl.perform()
        status = self.curl.getinfo(pycurl.RESPONSE_CODE)
        if status != 200:
            raise ServerError(status)
项目:ceiba-dl    作者:lantw44    | 项目源码 | 文件源码
def file_size(self, path, args={}):
        self.logger.debug('????????????')
        self.web_cache[path] = dict(args)
        url = urllib.parse.urljoin(self.file_url, urllib.parse.quote(path))
        if len(args) > 0:
            url += '?' + urllib.parse.urlencode(args)
        self.logger.debug('HTTP ?????{}'.format(url))
        self.curl.setopt(pycurl.URL, url)
        self.curl.setopt(pycurl.COOKIE, self.web_cookie)
        self.curl.setopt(pycurl.NOBODY, True)
        self.curl.setopt(pycurl.NOPROGRESS, True)
        self.curl.setopt(pycurl.WRITEDATA, io.BytesIO())
        self.curl.setopt(pycurl.HEADERFUNCTION, lambda *x: None)
        self.curl.setopt(pycurl.XFERINFOFUNCTION, lambda *x: None)
        self.curl.perform()
        status = self.curl.getinfo(pycurl.RESPONSE_CODE)
        if status != 200:
            raise ServerError(status)
        return self.curl.getinfo(pycurl.CONTENT_LENGTH_DOWNLOAD)
项目:ceiba-dl    作者:lantw44    | 项目源码 | 文件源码
def web_redirect(self, path, args={}):
        self.logger.debug('????????????')
        self.web_cache[path] = dict(args)
        url = urllib.parse.urljoin(self.web_url, urllib.parse.quote(path))
        if len(args) > 0:
            url += '?' + urllib.parse.urlencode(args)
        self.logger.debug('HTTP ?????{}'.format(url))
        headers = io.BytesIO()
        self.curl.setopt(pycurl.URL, url)
        self.curl.setopt(pycurl.COOKIE, self.web_cookie)
        self.curl.setopt(pycurl.NOBODY, False)
        self.curl.setopt(pycurl.NOPROGRESS, True)
        self.curl.setopt(pycurl.WRITEDATA, NoneIO())
        self.curl.setopt(pycurl.HEADERFUNCTION, headers.write)
        self.curl.setopt(pycurl.XFERINFOFUNCTION, lambda *x: None)
        self.curl.perform()
        status = self.curl.getinfo(pycurl.RESPONSE_CODE)
        if status != 302:
            raise ServerError(status)
        for header_line in headers.getvalue().split(b'\r\n'):
            if header_line.startswith(b'Location:'):
                return header_line.split(b':', maxsplit=1)[1].strip().decode()
        return None
项目:rainmap-lite    作者:cldrn    | 项目源码 | 文件源码
def execute(path, cmd, uuid):
    filename  = "%s/%s" % (OUTPUT_PATH, uuid)
    nmap_cmd = '%s %s -oA %s' % (path, cmd, filename)
    ops = NmapOptions()
    ops.parse_string(nmap_cmd)
    proc = subprocess.Popen(ops.render(), shell=False)
    proc.wait()

    print('\n[%s] Finished execution of command "%s"' % (datetime.datetime.now(), cmd))

    dom = ET.parse("%s.xml" % filename)
    xsl_filename = dom.getroot().getprevious().getprevious().parseXSL() # need to add error checking
    transform = ET.XSLT(xsl_filename)
    html = transform(dom)
    html_file = open('%s.html' % filename, 'w')
    html.write(html_file)

    print('[%s] HTML report generated (%s.html)' % (datetime.datetime.now(), filename))
项目:FineResources    作者:JumeiRdGroup    | 项目源码 | 文件源码
def remove_resource_value(issue, filepath, ignore_layouts_value):
    """
    Read an xml file and remove an element which is unused, then save the file back to the filesystem
    """
    # if os.path.exists(filepath):
    # tory ignore layouts ?????layout???
    print "remove_resource_value()...%s --> %s" % (issue.elements[0][0], filepath)
    if os.path.exists(filepath) and (ignore_layouts_value is False or issue.elements[0][0] != 'layout'):
        doCheck(filepath=filepath,issue=issue)
        for element in issue.elements:
            print('removing {0} from resource {1}'.format(element, filepath))
            parser = etree.XMLParser(remove_blank_text=False, remove_comments=False,
                                     remove_pis=False, strip_cdata=False, resolve_entities=False)
            tree = etree.parse(filepath, parser)
            root = tree.getroot()
            for unused_value in root.findall('.//{0}[@name="{1}"]'.format(element[0], element[1])):
                root.remove(unused_value)
            with open(filepath, 'wb') as resource:
                tree.write(resource, encoding='utf-8', xml_declaration=True)
项目:isni-reconcile    作者:cmh2166    | 项目源码 | 文件源码
def _lxml_default_loader(href, parse, encoding=None, parser=None):
    if parse == "xml":
        data = etree.parse(href, parser).getroot()
    else:
        if "://" in href:
            f = urlopen(href)
        else:
            f = open(href, 'rb')
        data = f.read()
        f.close()
        if not encoding:
            encoding = 'utf-8'
        data = data.decode(encoding)
    return data

##
# Wrapper for ET compatibility - drops the parser
项目:isni-reconcile    作者:cmh2166    | 项目源码 | 文件源码
def _wrap_et_loader(loader):
    def load(href, parse, encoding=None, parser=None):
        return loader(href, parse, encoding)
    return load


##
# Expand XInclude directives.
#
# @param elem Root element.
# @param loader Optional resource loader.  If omitted, it defaults
#     to {@link default_loader}.  If given, it should be a callable
#     that implements the same interface as <b>default_loader</b>.
# @throws FatalIncludeError If the function fails to include a given
#     resource, or if the tree contains malformed XInclude elements.
# @throws IOError If the function fails to load a given resource.
# @returns the node or its replacement if it was an XInclude node
项目:Taigabot    作者:FrozenPigs    | 项目源码 | 文件源码
def _lxml_default_loader(href, parse, encoding=None, parser=None):
    if parse == "xml":
        data = etree.parse(href, parser).getroot()
    else:
        if "://" in href:
            f = urlopen(href)
        else:
            f = open(href, 'rb')
        data = f.read()
        f.close()
        if not encoding:
            encoding = 'utf-8'
        data = data.decode(encoding)
    return data

##
# Wrapper for ET compatibility - drops the parser
项目:Taigabot    作者:FrozenPigs    | 项目源码 | 文件源码
def _wrap_et_loader(loader):
    def load(href, parse, encoding=None, parser=None):
        return loader(href, parse, encoding)
    return load


##
# Expand XInclude directives.
#
# @param elem Root element.
# @param loader Optional resource loader.  If omitted, it defaults
#     to {@link default_loader}.  If given, it should be a callable
#     that implements the same interface as <b>default_loader</b>.
# @throws FatalIncludeError If the function fails to include a given
#     resource, or if the tree contains malformed XInclude elements.
# @throws IOError If the function fails to load a given resource.
# @returns the node or its replacement if it was an XInclude node
项目:Taigabot    作者:FrozenPigs    | 项目源码 | 文件源码
def simpleparsefile():
    """
    Test the xmllib-based parser.

    >>> from elementtree import SimpleXMLTreeBuilder
    >>> parser = SimpleXMLTreeBuilder.TreeBuilder()
    >>> tree = ElementTree.parse("samples/simple.xml", parser)
    >>> normalize_crlf(tree)
    >>> tree.write(sys.stdout)
    <root>
       <element key="value">text</element>
       <element>text</element>tail
       <empty-element />
    </root>
    """

# doesn't work with lxml.etree
项目:Taigabot    作者:FrozenPigs    | 项目源码 | 文件源码
def parsefile():
    """
    Test parsing from file.  Note that we're opening the files in
    here; by default, the 'parse' function opens the file in binary
    mode, and doctest doesn't filter out carriage returns.

    >>> file = open("samples/simple.xml", "rb")
    >>> tree = ElementTree.parse(file)
    >>> file.close()
    >>> tree.write(stdout())
    <root>
       <element key="value">text</element>
       <element>text</element>tail
       <empty-element/>
    </root>
    >>> file = open("samples/simple-ns.xml", "rb")
    >>> tree = ElementTree.parse(file)
    >>> file.close()
    >>> tree.write(stdout())
    <root xmlns="http://namespace/">
       <element key="value">text</element>
       <element>text</element>tail
       <empty-element/>
    </root>
    """
项目:merlin    作者:CSTR-Edinburgh    | 项目源码 | 文件源码
def add_projection_indices(uttlist, token_xpath, attrib_name, outdir):
    ## Taken from: ~/proj/dnn_tts/script/add_token_index.py
    '''
    For utts in uttlist, add attribute called <attrib_name> to all nodes
    matching <token_xpath> with a corpus-unique integer value > 0. Add default
    0-valued attrib at root node.
    '''
    i = 1
    for uttfile in uttlist:
        utt = etree.parse(uttfile)
        ## clear target attribute name from all nodes to be safe:
        for node in utt.xpath('//*'): ## all nodes
            if attrib_name in node.attrib:
                del node.attrib[attrib_name]
        root_node = utt.getroot()
        root_node.attrib[attrib_name] = '0'   ## 0 is the defualt 'n/a' value -- *some* ancestor of all nodes will have the relevant attibute to fall back on
        for node in utt.xpath(token_xpath):
            node.attrib[attrib_name] = str(i)
            i += 1
        junk,fname = os.path.split(uttfile)
        outfile = os.path.join(outdir, fname)
        utt.write(outfile, encoding='utf-8', pretty_print=True)
项目:edxcut    作者:mitodl    | 项目源码 | 文件源码
def list_courses(self):
        '''
        List courses available in Studio site
        '''
        self.ensure_studio_site()
        url = "%s/home/" % self.BASE
        ret = self.ses.get(url)
        parser = etree.HTMLParser()
        xml = etree.parse(StringIO(ret.content), parser).getroot()
        courses = []
        course_ids = []
        for course in xml.findall('.//li[@class="course-item"]'):
            cid = course.get("data-course-key")
            if self.verbose:
                print cid  # etree.tostring(course)
            courses.append(course)
            course_ids.append(cid)
        return {'xml': courses,
                'course_ids': course_ids,
                }
项目:edxcut    作者:mitodl    | 项目源码 | 文件源码
def _get_block_child_info_from_content_preview(self, block_id):
        '''
        Get child info dict from content preview
        '''
        xblock = self.get_xblock(usage_key=block_id, view="container_preview")
        html = xblock['html']
        parser = etree.HTMLParser()
        xml = etree.parse(StringIO(html), parser).getroot()
        ids =[]
        child_blocks = []
        for elem in xml.findall('.//li[@class="studio-xblock-wrapper is-draggable"]'):
            cid = elem.get('data-locator')
            ids.append(cid)
            child_blocks.append(self.get_xblock(usage_key=cid))
        child_info = {'children': child_blocks,
                      'child_ids': ids,
                      }
        return child_info
项目:edxcut    作者:mitodl    | 项目源码 | 文件源码
def process_file(self, fn):
        xml = etree.parse(fn).getroot()
        tests = []
        for problem in xml.findall('.//problem'):
            url_name = problem.get('url_name')
            responses = []
            for cr in problem.findall('.//customresponse'):
                for line in cr.findall('.//textline'):
                    responses.append(line.get('correct_answer'))
            test = {'url_name': url_name, 'responses': responses, 'expected': ['correct'] * len(responses)}
            tests.append(test)
        sys.stderr.write("%d tests added\n" % len(tests))

        cut_spec = {'config': {}, 'tests': tests}
        config_keys = ["username", "password", "course_id", "site_base_url"]
        for ck in config_keys:
            val = getattr(self.optargs, ck)
            if val:
                cut_spec['config'][ck] = val

        print yaml.dump(cut_spec)
项目:cfdilib    作者:Vauxoo    | 项目源码 | 文件源码
def get_documentation(self, element, namespace=None, schema_str=None):
        """**Helper method:** should return an schema specific documentation
        given an element parsing or getting the `Clark's Notation`_
        `{url:schema}Element` from the message error on validate method.

        :param str element: Element string following the Clark's Notation
        :param dict namespace: Element string following the Clark's Notation

        :returns: The documentation text if exists
        :rtype: unicode

        .. _`Clark's Notation`: http://effbot.org/zone/element-namespaces.htm
        """
        if namespace is None:
            namespace = {'xs': 'http://www.w3.org/2001/XMLSchema'}
        schema_root = etree.parse(StringIO(self.schema))
        document = schema_root.xpath(self.get_element_from_clark(element),
                                     namespaces=namespace)
        return document and document[0].text or ''
项目:dati-ckan-docker    作者:italia    | 项目源码 | 文件源码
def _transform_to_html(self, content, xslt_package=None, xslt_path=None):

        xslt_package = xslt_package or __name__
        xslt_path = xslt_path or \
            '../templates/ckanext/spatial/gemini2-html-stylesheet.xsl'

        # optimise -- read transform only once and compile rather
        # than at each request
        with resource_stream(xslt_package, xslt_path) as style:
            style_xml = etree.parse(style)
            transformer = etree.XSLT(style_xml)

        xml = etree.parse(StringIO(content.encode('utf-8')))
        html = transformer(xml)

        response.headers['Content-Type'] = 'text/html; charset=utf-8'
        response.headers['Content-Length'] = len(content)

        result = etree.tostring(html, pretty_print=True)

        return result