HTML4J -


BSD
跨平台
Java

软件简介

HTML4J 是一个 Java 解析 HTML 的类库。示例代码:

    Reader re = ...
    // Create the document
    HTMLDoc doc = new HTMLDoc();
    // Load its content
    doc.load(re);
    // Get the HTML
    HTMLFragment html = doc.getHTML();
    // Create a 'date' meta-tag
    HTMLTag tag = HTMLTag.parse("<meta name=\"date\" content=21/01/2001>");
    // Insert it just before the title
    html.insertBefore(html.findTagByName("title"), tag);
    // Create a paragraph
    tag = HTMLTag.create("p");
    // Insert '<p>Paragraph</p>' just before a tag with id="someid"
    html.insertBefore(html.getIdFinder("someid").getTag().getPosition(),
        tag.toString("Paragraph"));
    // Create an anchor to foo.html
    HTMLTag anchor = HTMLTag.parse("<a href=\"foo.html\">");
    // We could also do a 'HTMLTag.create("a")' and then set the 'href'
    // attribute using getAttributes().setAttribute("href", "foo.html")
    //
    // Now we get a tag block with id="otherid"
    tag = html.getIdFinder("otherid").getTagBlock();
    // Replace the tag that has id="otherid" by the same tag
    // embraced by the foo.html anchor
    html.replace(tag.getBlockPosition(), anchor.toString(tag));
    // For example, if the 'otherid' tag was 'img src="something.jpg"',
    // then the result would be:
    //   '<a href="foo.html"><img id="otherid" src="something.jpg"></a>'
    //
    tag = html.getTagByName("meta");
    // We just got the first 'meta' tag found in the document, and now we
    // set its name attribute to 'last_update', and its value
    // (the 'content' attribute) to "20/01/2001"
    tag.getAttributes().setAttribute("name", "last_update");
    tag.getAttributes().setAttribute("content", "20/01/2001");
    // Commit the changes to the 'meta' tag to the document
    html.update(tag);