Java 类org.jsoup.parser.Parser 实例源码

项目:Babler    文件:YouTubeCaptionsScraper.java   
/**
 * Fetches captions/transcript for a given video
 * @param videoID to fetch
 * @param lang this captions should be in
 * @throws IOException
 */
public void getAndSaveTranscript(String videoID, String lang) throws IOException {

    lang = LanguageCode.convertIso2toIso1(lang);

    String url = captionEndPoint+"lang="+lang+"&v="+videoID;
    GetMethod get = new GetMethod(url);
    this.client.executeMethod(get);
    String xmlData = get.getResponseBodyAsString();

    //parse XML
    Document doc = Jsoup.parse(xmlData, "", Parser.xmlParser());
    String allCaps = "";
    for (Element e : doc.select("text")) {
        allCaps += e.text();
    }

    FileSaver file = new FileSaver(allCaps, lang, "youtube_caps", url, videoID);
    file.save(logDb);

}
项目:Babler    文件:YouTubeCaptionsScraper.java   
/**
 * Checks if a given video has captions in our target language. As identified by the user who entered them
 * @param videoID to check
 * @param lang target
 * @return true if there are captions in lang
 * @throws IOException
 */
public boolean videoHasCaptionsInLanguage(String videoID, String lang) throws IOException {
    //visit captions index
    GetMethod get = new GetMethod(captionsIndex+videoID);
    this.client.executeMethod(get);
    String xmlData = get.getResponseBodyAsString();

    //parse XML
    Document doc = Jsoup.parse(xmlData, "", Parser.xmlParser());

    //iterate over all captions
    for (Element e : doc.select("track")) {
        String langCode = e.attr("lang_code");
        String fixedLangCode = LanguageCode.convertIso1toIso2(langCode);
        if(fixedLangCode.equals(lang))
            return true;
    }

    return false;
}
项目:vue-gwt    文件:TemplateParser.java   
/**
 * Parse a given HTML template and return the a result object containing the expressions
 * and a transformed HTML.
 * @param htmlTemplate The HTML template to process, as a String
 * @param context Context of the Component we are currently processing
 * @return A {@link TemplateParserResult} containing the processed template and expressions
 */
public TemplateParserResult parseHtmlTemplate(String htmlTemplate,
    TemplateParserContext context)
{
    this.context = context;
    Parser parser = Parser.htmlParser();
    parser.settings(new ParseSettings(true, true)); // tag, attribute preserve case
    Document doc = parser.parseInput(htmlTemplate, "");

    result = new TemplateParserResult();
    processImports(doc);
    processNode(doc);

    result.setProcessedTemplate(doc.body().html());
    return result;
}
项目:KSAnime    文件:MAL.java   
@Override
protected ArrayList<News> doInBackground(Void ...voids) {
    final ArrayList<News> newsList = new ArrayList<News>();
    try {
        final String endpoint = BASE_URL + NEWS_ENDPOINT;
        final Document doc = Jsoup.connect(endpoint).parser(Parser.xmlParser()).get();
        final Elements elements = doc.select("item");
        for (final Element element : elements) {
            final News news = new News();
            news.title = element.select("title").text();
            news.description = element.select("description").text();
            news.thumbnail = element.select("media|thumbnail").text();
            news.pubDate = element.select("pubDate").text();
            news.link = element.select("link").text();
            newsList.add(news);
        }
    } catch (IOException e){
        Log.e(TAG, "FetchNews error", e);
    }
    return newsList;
}
项目:wiki2mongo    文件:MarkupCleaner.java   
public String removeHtmlTags(String markup) {

    String clean = preMatcher.matcher(markup).replaceAll(" ");
    clean = sourceMatcher.matcher(clean).replaceAll(" ");
    clean = syntaxMatcher.matcher(clean).replaceAll(" ");
    clean = commentMatcher.matcher(clean).replaceAll(" ");
    clean = monoTagMatcher.matcher(clean).replaceAll(" ");
    clean = fakeTagMatcher.matcher(clean).replaceAll(" ");

    try {
        Document document = Jsoup.parse(clean, "", Parser.xmlParser());
        document.select("math, gallery, ref, br, ins, s, del, tt, blockqoute, table").html(" ");
        clean = document.text();
    } catch(Exception e) {
        e.printStackTrace();
    }

    return clean;
}
项目:lavaplayer    文件:NicoAudioSourceManager.java   
private AudioTrack loadTrack(String videoId) {
  checkLoggedIn();

  try (HttpInterface httpInterface = getHttpInterface()) {
    try (CloseableHttpResponse response = httpInterface.execute(new HttpGet("http://ext.nicovideo.jp/api/getthumbinfo/" + videoId))) {
      int statusCode = response.getStatusLine().getStatusCode();
      if (statusCode != 200) {
        throw new IOException("Unexpected response code from video info: " + statusCode);
      }

      Document document = Jsoup.parse(response.getEntity().getContent(), StandardCharsets.UTF_8.name(), "", Parser.xmlParser());
      return extractTrackFromXml(videoId, document);
    }
  } catch (IOException e) {
    throw new FriendlyException("Error occurred when extracting video info.", SUSPICIOUS, e);
  }
}
项目:substitution-schedule-parser    文件:IndiwareMobileParser.java   
@Override public List<String> getAllClasses() throws IOException, JSONException, CredentialInvalidException {
    String baseurl = data.getString(PARAM_BASEURL) + "/";

    for (int i = -4; i < MAX_DAYS; i++) {
        LocalDate date = LocalDate.now().plusDays(i);
        String dateStr = DateTimeFormat.forPattern("yyyyMMdd").print(date);
        String url = baseurl + "mobdaten/PlanKl" + dateStr + ".xml?_=" + System.currentTimeMillis();
        try {
            String xml = httpGet(url, "UTF-8");
            Document doc = Jsoup.parse(xml, url, Parser.xmlParser());

            List<String> classes = new ArrayList<>();
            for (Element klasse:doc.select("Klassen > Kl")) {
                classes.add(klasse.select("Kurz").first().text());
            }
            return classes;
        } catch (HttpResponseException e) {
            if (e.getStatusCode() != 404 && e.getStatusCode() != 300) throw e;
        }
    }
    return new ArrayList<>();
}
项目:common    文件:DataUtilTest.java   
@Test
public void wrongMetaCharsetFallback() {
    try {
        final byte[] input = "<html><head><meta charset=iso-8></head><body></body></html>".getBytes("UTF-8");
        final ByteBuffer inBuffer = ByteBuffer.wrap(input);

        Document doc = DataUtil.parseByteData(inBuffer, null, "http://example.com", Parser.htmlParser());

        final String expected = "<html>\n" +
                                " <head>\n" +
                                "  <meta charset=\"iso-8\">\n" +
                                " </head>\n" +
                                " <body></body>\n" +
                                "</html>";

        assertEquals(expected, doc.toString());
    } catch( UnsupportedEncodingException ex ) {
        fail(ex.getMessage());
    }
}
项目:CrawlerPack    文件:Ch5Coz4.java   
public static void normalXmlParse(){
    String json = CrawlerPack.getFromRemote(url);
    String xml = CrawlerPack.jsonToXml(json);

    // 原始 json 轉為 xml 的結果
    System.out.println( "原始XML" ) ;
    System.out.println( xml );

    Document jsoupDoc = Jsoup.parse(xml, "", Parser.xmlParser());
    jsoupDoc.charset(StandardCharsets.UTF_8);

    // 發生了什麼事?
    System.out.println( "轉換後XML" ) ;
    System.out.println(jsoupDoc.toString());


}
项目:Munch    文件:ImportOpmlInteractor.java   
@Override
protected String doInBackground(String... strings) {
    Document opmlDocument = null;
    try {
        if (mUrl != null) {
            opmlDocument = Jsoup.connect(mUrl).parser(Parser.xmlParser()).get();
        } else {
            opmlDocument = Jsoup.parse(mFile, "UTF-8");
        }
    } catch (IOException e) {
        e.printStackTrace();
        return e.getMessage();
    }
    if (opmlDocument != null) {
        mOpmlItems = opmlDocument.select("outline");
    }
    return "success";
}
项目:kongfzCrawler    文件:FileUtils.java   
public static Document getDocument(String filepath) {
    Document doc = null;
    InputStream is = null;
    try {
        is = new FileInputStream(filepath);
    } catch (FileNotFoundException e1) {
        // TODO Auto-generated catch block
        System.out.println("FileUtils: no such XML file path exists");
        e1.printStackTrace();
        return null;
    }
    try {
        doc = Jsoup.parse(is, "UTF-8", "", Parser.xmlParser());
    } catch (Exception e) {
        System.out.println("Parse file to XML Document error!");
        e.printStackTrace();
    }
    return doc;
}
项目:gerrit-ci    文件:JenkinsJobParser.java   
private static String getBranchRegex(String jobXml) {
    try {
        String branchRegex = Jsoup.parse(jobXml, "", Parser.xmlParser())
                .getElementsByTag("gerritProjects").get(0)
                .getElementsByTag(GERRITPROJECT_TAG).get(0)
                .getElementsByTag("branches").get(0)
                .getElementsByTag(BRANCH_TAG).get(0)
                .getElementsByTag("pattern").get(0).html();

        // Remove "^" and "$" at the beginning and the end, respectively
        branchRegex = branchRegex.substring(1, branchRegex.length() - 1);

        // Remove sections of regex that we add post-user-input
        branchRegex = branchRegex.replace("(?!refs/meta/)", "");
        branchRegex = branchRegex.replace("(?!refs/)", "refs/heads/");

        return branchRegex;
    } catch (IndexOutOfBoundsException e) {
        return null;
    }
}
项目:web-data-extractor    文件:JerryExtractor.java   
private String parse(String str) {
    Document document = Jsoup.parse(str, "", Parser.xmlParser());
    String result = "";
    switch (outType) {
        case TYPE_TEXT:
            result = document.text();
            break;
        case TYPE_HTML:
            result = document.html();
            break;
        default:
            result = document.text();
            break;
    }
    return result;
}
项目:neembuu-uploader    文件:BoxDotComAccount.java   
/**
 * Read information about user. Here you can read other important info.
 * @throws Exception 
 */
private void getUserInfo() throws Exception {
    //https://www.box.net/api/1.0/rest?action=get_auth_token&api_key=vkf3k5dh0tg1ibvcikjcp8sx0f89d14u&ticket=

    //https://www.box.net/api/1.0/rest?action=get_auth_token&api_key=vkf3k5dh0tg1ibvcikjcp8sx0f89d14u&ticket=xybt9orxzo1xrr5vk4r0axne804y1tpk

    NULogger.getLogger().log(Level.INFO, "{0} Getting auth token value............", getClass());
    httpGet = new NUHttpGet("https://www.box.net/api/1.0/rest?action=get_auth_token&api_key=vkf3k5dh0tg1ibvcikjcp8sx0f89d14u&ticket=" + ticket);
    httpResponse = httpclient.execute(httpGet, httpContext);
    responseString = EntityUtils.toString(httpResponse.getEntity());
    //NULogger.getLogger().log(Level.INFO, "{0}Response : {1}", new Object[]{getClass(), stringResponse});

    doc = Jsoup.parse(responseString, "", Parser.xmlParser());
    String auth_token = doc.select("response auth_token").text();

    NULogger.getLogger().log(Level.INFO, "{0} Auth_token : {1}", new Object[]{getClass(), auth_token});
    properties().setEncryptedProperty(KEY_AUTH_TOKEN, auth_token);
}
项目:astor    文件:DataUtilTest.java   
@Test
public void wrongMetaCharsetFallback() {
    try {
        final byte[] input = "<html><head><meta charset=iso-8></head><body></body></html>".getBytes("UTF-8");
        final ByteBuffer inBuffer = ByteBuffer.wrap(input);

        Document doc = DataUtil.parseByteData(inBuffer, null, "http://example.com", Parser.htmlParser());

        final String expected = "<html>\n" +
                                " <head>\n" +
                                "  <meta charset=\"iso-8\">\n" +
                                " </head>\n" +
                                " <body></body>\n" +
                                "</html>";

        assertEquals(expected, doc.toString());
    } catch( UnsupportedEncodingException ex ) {
        fail(ex.getMessage());
    }
}
项目:astor    文件:UrlConnectTest.java   
@Test
public void handles200WithNoContent() throws IOException {
    Connection con = Jsoup
        .connect("http://direct.infohound.net/tools/200-no-content.pl")
        .userAgent(browserUa);
    Connection.Response res = con.execute();
    Document doc = res.parse();
    assertEquals(200, res.statusCode());

    con = Jsoup
        .connect("http://direct.infohound.net/tools/200-no-content.pl")
        .parser(Parser.xmlParser())
        .userAgent(browserUa);
    res = con.execute();
    doc = res.parse();
    assertEquals(200, res.statusCode());
}
项目:astor    文件:UrlConnectTest.java   
@Test
public void handles200WithNoContent() throws IOException {
    Connection con = Jsoup
        .connect("http://direct.infohound.net/tools/200-no-content.pl")
        .userAgent(browserUa);
    Connection.Response res = con.execute();
    Document doc = res.parse();
    assertEquals(200, res.statusCode());

    con = Jsoup
        .connect("http://direct.infohound.net/tools/200-no-content.pl")
        .parser(Parser.xmlParser())
        .userAgent(browserUa);
    res = con.execute();
    doc = res.parse();
    assertEquals(200, res.statusCode());
}
项目:quiz_helper    文件:YoudaoOnline.java   
static public YoudaoResult getDefinition(String key) throws IOException{
        Document doc = Jsoup.connect(String.format(BASE_URL, key.trim()))
                .userAgent("Mozilla")
                .cookie("auth", "token")
                .timeout(2000)
                .parser(Parser.xmlParser())
                .get();
        //doc.toString();
        String phonetic = getSingleQueryResult(doc, "phonetic-symbol");
        String returnPhrase = getSingleQueryResult(doc, "return-phrase");
        List<String> translation = new ArrayList<String>();
        for(Element e : doc.select("translation > content")){
            translation.add(e.text());
        }

        Map<String, List<String>> webTranslation = new LinkedHashMap<>();
        for(Element web : doc.select("web-translation")){
            String keyString = getSingleQueryResult(web, "key");
            List<String> values = new ArrayList<>();
            for(Element value : web.select("trans > value")){
                String valueString = value.text().trim();
                values.add(valueString);
            }
            webTranslation.put(keyString, values);
        }
    YoudaoResult youdaoResult = new YoudaoResult();
    youdaoResult.phonetic = phonetic;
    youdaoResult.returnPhrase = returnPhrase;
    youdaoResult.translation = translation;
    youdaoResult.webTranslation = webTranslation;
    return  youdaoResult;
}
项目:solo-spring    文件:ArticleController.java   
public void action(JSONObject data) throws EventException {
    final JSONObject article = data.optJSONObject(Article.ARTICLE);

    String content = article.optString(Article.ARTICLE_CONTENT);

    final Document doc = Jsoup.parse(content, StringUtils.EMPTY, Parser.htmlParser());
    doc.outputSettings().prettyPrint(false);

    final StringBuilder listBuilder = new StringBuilder();

    listBuilder.append("<link rel=\"stylesheet\" type=\"text/css\" href=\"" + Latkes.getStaticServePath()
            + "/plugins/list/style.css\" />");

    final Elements hs = doc.select("h1, h2, h3, h4, h5");

    listBuilder.append("<ul class='b3-solo-list'>");
    for (int i = 0; i < hs.size(); i++) {
        final Element element = hs.get(i);
        final String tagName = element.tagName().toLowerCase();
        final String text = element.text();
        final String id = "b3_solo_" + tagName + "_" + i;

        element.before("<span id='" + id + "'></span>");

        listBuilder.append("<li class='b3-solo-list-").append(tagName).append("'><a href='#").append(id)
                .append("'>").append(text).append("</a></li>");
    }
    listBuilder.append("</ul>");

    final Element body = doc.getElementsByTag("body").get(0);

    content = listBuilder.toString() + body.html();

    article.put(Article.ARTICLE_CONTENT, content);
}
项目:libris    文件:URLGoogleImageSearcher.java   
@Override
protected IGoogleImageSearchResult parseResult(String response) {
    Document parsedPage = Parser.parse(response, link.toString());

    final Elements body = parsedPage.body().children();

    return new GoogleImageSearchResult.Builder()
            .addBestGuess(retrieveBestGuessFromHTML(body))
            .addLinks(retrieveLinksFromHTML(body))
            .addDescriptions(retrieveDescriptionFromHTML(body))
            .addTitles(retrieveTitleFromHTML(body))
            .addSimilarImages(retrieveSimilarImageFromHTML(body))
            .build();
}
项目:wulkanowy    文件:Login.java   
public String findSymbolInCertificate(String certificate) {
    Elements els = Jsoup.parse(certificate.replaceAll(":", ""), "", Parser.xmlParser())
            .select("[AttributeName=\"UserInstance\"] samlAttributeValue");

    if (els.isEmpty()) {
        return "";
    }

    return els.get(1).text();
}
项目:CodeMineProject1    文件:ComputerPartActivity.java   
@Override
protected String doInBackground(String... params) {
    System.setProperty("http.agent", "Chrome");
    try {
        Document doc = Jsoup.parse(new URL(params[0]).openStream(), "UTF-8", "", Parser.xmlParser());
        rawData = PartPickerScraper.getRawData(doc);
        urls = PartPickerScraper.getUrlsFromDoc(doc);
    } catch (IOException e) {
        e.printStackTrace();
    }
    return null;
}
项目:ankihelper    文件:YoudaoOnline.java   
static public YoudaoResult getDefinition(String key) throws IOException{
        Document doc = Jsoup.connect(String.format(BASE_URL, key.trim()))
                .userAgent("Mozilla")
                .cookie("auth", "token")
                .timeout(2000)
                .parser(Parser.xmlParser())
                .get();
        //doc.toString();
        String phonetic = getSingleQueryResult(doc, "phonetic-symbol");
        String returnPhrase = getSingleQueryResult(doc, "return-phrase");
        List<String> translation = new ArrayList<String>();
        for(Element e : doc.select("translation > content")){
            translation.add(e.text());
        }

        Map<String, List<String>> webTranslation = new LinkedHashMap<>();
        for(Element web : doc.select("web-translation")){
            String keyString = getSingleQueryResult(web, "key");
            List<String> values = new ArrayList<>();
            for(Element value : web.select("trans > value")){
                String valueString = value.text().trim();
                values.add(valueString);
            }
            webTranslation.put(keyString, values);
        }
    YoudaoResult youdaoResult = new YoudaoResult();
    youdaoResult.phonetic = phonetic;
    youdaoResult.returnPhrase = returnPhrase;
    youdaoResult.translation = translation;
    youdaoResult.webTranslation = webTranslation;
    return  youdaoResult;
}
项目:knotx    文件:DefaultFormSimplifier.java   
private String getFragmentContent(String content, Document scriptContentDocument) {
  Document resultDocument = Jsoup.parse(content, "UTF-8", Parser.xmlParser());
  Element scriptTag = resultDocument.child(0).empty();
  scriptContentDocument.childNodesCopy().forEach(scriptTag::appendChild);

  return resultDocument.html();
}
项目:knotx    文件:ActionKnotProxyVerticleTest.java   
private String clean(String text) {
  String cleanText = text.replace("\n", "").replaceAll(">(\\s)+<", "><")
      .replaceAll(">(\\s)+\\{", ">{").replaceAll("\\}(\\s)+<", "}<");
  return Jsoup.parse(cleanText, "UTF-8", Parser.xmlParser())
      .outputSettings(OUTPUT_SETTINGS)
      .html()
      .trim();
}
项目:lavaplayer    文件:YoutubeAudioTrack.java   
private List<YoutubeTrackFormat> loadTrackFormatsFromDash(String dashUrl, HttpInterface httpInterface, String playerScript) throws Exception {
  String resolvedDashUrl = sourceManager.getCipherManager().getValidDashUrl(httpInterface, playerScript, dashUrl);

  try (CloseableHttpResponse response = httpInterface.execute(new HttpGet(resolvedDashUrl))) {
    int statusCode = response.getStatusLine().getStatusCode();
    if (statusCode != 200) {
      throw new IOException("Invalid status code for track info page response: " + statusCode);
    }

    Document document = Jsoup.parse(response.getEntity().getContent(), CHARSET, "", Parser.xmlParser());
    return loadTrackFormatsFromDashDocument(document);
  }
}
项目:site    文件:RegistrationService.java   
/**
 * Cleans some html text by stripping all tags but <code>br</code> and then
 * unescapes named entitiesl like '&quote';. brs will be replaced by
 * newlines.
 *
 * @param htmlText
 * @return
 */
String htmlTextToPlainText(final String htmlText) {
    final Whitelist whitelist = Whitelist.none();
    whitelist.addTags("br");
    final Cleaner cleaner = new Cleaner(whitelist);
    final Document cleanedDocument = cleaner.clean(Jsoup.parse(htmlText));
    cleanedDocument
            .outputSettings()
            .prettyPrint(false)
            .escapeMode(EscapeMode.xhtml)
            .charset(StandardCharsets.UTF_8);
    return Parser.unescapeEntities(cleanedDocument.body().html().trim(), true).replaceAll("<br(?: ?/)?>", "\r\n");
}
项目:cms    文件:GeneralFileHandlerServiceImpl.java   
@Override
public String findParentFile(String xml) {
    String ret = null;
    Document doc = Jsoup.parse(xml, "", Parser.xmlParser());
    for (Element e : doc.select("resources")) {
        ret = e.select("resource").get(0).attr("href");
    }
    return ret;
}
项目:rules_closure    文件:Html5PrinterTest.java   
private static Document parse(String html) throws IOException {
  Parser parser = Parser.htmlParser();
  Document doc = Jsoup.parse(new ByteArrayInputStream(html.getBytes(UTF_8)), null, "", parser);
  doc.outputSettings().indentAmount(0);
  doc.outputSettings().prettyPrint(false);
  return doc;
}
项目:DramaNLP    文件:CoreTEIUrlReader.java   
@Override
public void getNext(final JCas jcas, InputStream file, Drama drama) throws IOException, CollectionException {
    Document doc = Jsoup.parse(file, "UTF-8", "", Parser.xmlParser());

    Visitor vis = new Visitor(jcas);

    Element root = doc.select("TEI > text").first();
    root.traverse(vis);
    vis.getJCas();

    select2Annotation(jcas, root, vis.getAnnotationMap(), "speaker", Speaker.class, null);
    select2Annotation(jcas, root, vis.getAnnotationMap(), "stage", StageDirection.class, null);
    select2Annotation(jcas, root, vis.getAnnotationMap(), "sp", Utterance.class, null,
            new Select2AnnotationCallback<Utterance>() {
                @Override
                public void call(Utterance annotation, Element xmlElement) {
                    Collection<Speaker> speakers = JCasUtil.selectCovered(Speaker.class, annotation);
                    for (Speaker sp : speakers) {
                        String[] whos = xmlElement.attr("who").split(" ");
                        sp.setXmlId(new StringArray(jcas, whos.length));
                        for (int i = 0; i < whos.length; i++)
                            sp.setXmlId(i, whos[i]);
                    }
                }
            });
    select2Annotation(jcas, root, vis.getAnnotationMap(), "l", Speech.class, null);

    readActsAndScenes(jcas, root, vis.getAnnotationMap(), true);

    readCast(jcas, drama, doc);

    AnnotationUtil.trim(new ArrayList<Figure>(JCasUtil.select(jcas, Figure.class)));
    AnnotationUtil.trim(new ArrayList<Speech>(JCasUtil.select(jcas, Speech.class)));
    AnnotationUtil.trim(new ArrayList<Utterance>(JCasUtil.select(jcas, Utterance.class)));
    AnnotationUtil.trim(new ArrayList<Scene>(JCasUtil.select(jcas, Scene.class)));
    AnnotationUtil.trim(new ArrayList<Act>(JCasUtil.select(jcas, Act.class)));

}
项目:DramaNLP    文件:FolgerReader.java   
@Override
public void getNext(JCas jcas, InputStream file, Drama drama) throws IOException, CollectionException {

    getLogger().log(Level.INFO, "Now parsing XML document");
    Document doc = Jsoup.parse(file, "UTF-8", "", Parser.xmlParser());

    Visitor vis = new FolgerVisitor(jcas);
    Element root = doc.select("TEI > text > body").first();
    getLogger().log(Level.INFO, "Traversing XML nodes");
    root.traverse(vis);
    jcas = vis.getJCas();

    getLogger().log(Level.INFO, "Finished Traversing");

}
项目:substitution-schedule-parser    文件:WinterShParser.java   
@NotNull static AdditionalInfo handleXML(String xml) {
    AdditionalInfo info = new AdditionalInfo();
    info.setTitle(TITLE);
    Document doc = Jsoup.parse(xml, "", Parser.xmlParser());
    String text = doc.select("item description").first().text();
    if (text.equals("Zurzeit gibt es keine Hinweise auf witterungsbedingten Unterrichtsausfall.")) {
        info.setHasInformation(false);
    }
    info.setTitle(TITLE + " (Stand: " + doc.select("pubDate").first().text() + ")");
    info.setText(text);

    return info;
}
项目:substitution-schedule-parser    文件:IndiwareParser.java   
void parseIndiwarePage(SubstitutionSchedule v, String response) throws JSONException, IOException {
    boolean html;
    Element doc;
    if (response.contains("<html") || response.contains("<table")) {
        html = true;
        doc = Jsoup.parse(response);
    } else {
        html = false;
        doc = Jsoup.parse(response, "", Parser.xmlParser());
    }
    if (html && data.has(PARAM_EMBEDDED_CONTENT_SELECTOR)) {
        String selector = data.getString(PARAM_EMBEDDED_CONTENT_SELECTOR);
        Elements elems = doc.select(selector);
        if (elems.size() == 0) throw new IOException("No elements found using " + selector);
        for (Element elem : elems) {
            v.addDay(parseIndiwareDay(elem, true));
        }
    } else if (html && doc.select(".vpfuer").size() > 1) {
        // multiple schedules after each other on one page
        String[] htmls = doc.html().split("<span class=\"vpfuer\">");
        for (int i = 1; i < htmls.length; i++) {
            Document splitDoc = Jsoup.parse(htmls[i]);
            v.addDay(parseIndiwareDay(splitDoc, true));
        }
    } else {
        v.addDay(parseIndiwareDay(doc, html));
    }
}
项目:substitution-schedule-parser    文件:IndiwareMobileDemoTest.java   
@Test
public void demoTest() throws IOException, JSONException {
    Document doc = Jsoup.parse(readResource("/indiware-mobile/indiware-mobile.xml"), "", Parser.xmlParser());
    SubstitutionScheduleDay day = IndiwareMobileParser.parseDay(doc, new ColorProvider());

    assertEquals(new LocalDate(2017, 6, 21), day.getDate());
    assertEquals(new LocalDateTime(2017, 6, 20, 10, 28), day.getLastChange());
    assertEquals(192, day.getSubstitutions().size());
}
项目:substitution-schedule-parser    文件:IndiwareDemoTest.java   
@Test
public void testEquals() throws IOException, JSONException {
    SubstitutionScheduleDay scheduleXML = parser.parseIndiwareDay(Jsoup.parse(xml, "", Parser.xmlParser()),
            false);
    SubstitutionScheduleDay scheduleHTML = parser.parseIndiwareDay(Jsoup.parse(html), true);
    assertEquals(scheduleXML, scheduleHTML);
}
项目:common    文件:HttpConnection.java   
private Request() {
    timeoutMilliseconds = 3000;
    maxBodySizeBytes = 1024 * 1024; // 1MB
    followRedirects = true;
    data = new ArrayList<Connection.KeyVal>();
    method = Method.GET;
    headers.put("Accept-Encoding", "gzip");
    parser = Parser.htmlParser();
}
项目:common    文件:Element.java   
/**
 * Add inner HTML to this element. The supplied HTML will be parsed, and each node appended to the end of the children.
 * @param html HTML to add inside this element, after the existing HTML
 * @return this element
 * @see #html(String)
 */
public Element append(String html) {
    Validate.notNull(html);

    List<Node> nodes = Parser.parseFragment(html, this, baseUri());
    addChildren(nodes.toArray(new Node[nodes.size()]));
    return this;
}
项目:common    文件:Element.java   
/**
 * Add inner HTML into this element. The supplied HTML will be parsed, and each node prepended to the start of the element's children.
 * @param html HTML to add inside this element, before the existing HTML
 * @return this element
 * @see #html(String)
 */
public Element prepend(String html) {
    Validate.notNull(html);

    List<Node> nodes = Parser.parseFragment(html, this, baseUri());
    addChildren(0, nodes.toArray(new Node[nodes.size()]));
    return this;
}
项目:common    文件:Node.java   
private void addSiblingHtml(int index, String html) {
    Validate.notNull(html);
    Validate.notNull(parentNode);

    Element context = parent() instanceof Element ? (Element) parent() : null;        
    List<Node> nodes = Parser.parseFragment(html, context, baseUri());
    parentNode.addChildren(index, nodes.toArray(new Node[nodes.size()]));
}
项目:common    文件:UrlConnectTest.java   
@Test
public void fetchHandlesXmlAsHtmlWhenParserSet() throws IOException {
    // should auto-detect xml and use XML parser, unless explicitly requested the html parser
    String xmlUrl = "http://direct.infohound.net/tools/parse-xml.xml";
    Connection con = Jsoup.connect(xmlUrl).parser(Parser.htmlParser());
    Document doc = con.get();
    Connection.Request req = con.request();
    assertTrue(req.parser().getTreeBuilder() instanceof HtmlTreeBuilder);
    assertEquals("<html> <head></head> <body> <xml> <link>one <table> Two </table> </xml> </body> </html>", StringUtil.normaliseWhitespace(doc.outerHtml()));
}