Java 类org.jsoup.safety.Whitelist 实例源码

项目:crawler-jsoup-maven    文件:JsoupTest.java   
public static void main(String[] args) {

    String d = "<span><div>test</div></span>";
    Document doc = Jsoup.parse(d);
    Element div = doc.select("div").first(); // <div></div>
    div.html("<p>lorem ipsum</p>"); // <div><p>lorem ipsum</p></div>
    div.prepend("<p>First</p>");
    div.append("<p>Last</p>");
    // now: <div><p>First</p><p>lorem ipsum</p><p>Last</p></div>
    div.appendElement(d);
    Element span = doc.select("span").first(); // <span>One</span>
    span.wrap("<li><a href='http://example.com/'></a></li>");
    // now: <li><a href="http://example.com"><span>One</span></a></li>
    System.out.println(doc.html());

    String s = Jsoup.clean(doc.html(), "", Whitelist.relaxed(), new OutputSettings().prettyPrint(false));

    System.out.println(s);
}
项目:docx4j-template    文件:XHTMLDocumentHandler.java   
public static void main(String[] args) {
    String baseUri = "http://www.baidu.com";
    String html = "<a href=\"http://www.baidu.com/gaoji/preferences.html\"name=\"tj_setting\">搜索设置</a>";
    String doc = Jsoup.clean(html, baseUri, Whitelist.none());
    System.out.println(doc);
    System.out.println("*******");
    doc = Jsoup.clean(html, baseUri, Whitelist.simpleText());
    System.out.println(doc);
    System.out.println("*******");
    doc = Jsoup.clean(html, baseUri, Whitelist.basic());
    System.out.println(doc);
    System.out.println("*******");
    doc = Jsoup.clean(html, baseUri, Whitelist.basicWithImages());
    System.out.println(doc);
    System.out.println("*******");
    doc = Jsoup.clean(html, baseUri, Whitelist.relaxed());
    System.out.println(doc);

}
项目:shoucang    文件:ReplyService.java   
public Reply createReply(ReplyDTO replyDTO, User user) {
    replyDTO.setUserId(user.getId());
    Reply reply = replyDTO.toReply();

    String content = Jsoup.clean(reply.getContent(), Whitelist.basicWithImages());
    content = updateAtUser(content);

    reply.setContent(content);
    reply.setStatus(ReplyStatus.ACTIVE);

    Reply result = replyRepository.save(reply);

    reply.setUser(user);

    afterCreatingReply(reply);
    return result;
}
项目:jaffa-framework    文件:AuditLogger.java   
private String trimValue(String dataFormat, String input) {
    String cleaned = null;
    if ("html".equals(dataFormat)) {
        Document document = Jsoup.parse(input);
        if (document != null) {
            document.outputSettings(new Document.OutputSettings().prettyPrint(false));
            document.select("br").append("\\n");
            String s = org.jsoup.parser.Parser.unescapeEntities(Jsoup.clean(document.html().replaceAll("\\\\n", " "), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false)), false);
            if (s != null) {
                cleaned = s.trim().replaceAll("\r", "").replaceAll("\n","");
            }
        }
    } else {
        cleaned = input.trim();
        if (this.eolPattern == null) {
            this.eolPattern = Pattern.compile("[\r|\n]");
        }
        Matcher m = this.eolPattern.matcher(cleaned);
        if (m.find()) {
            cleaned = cleaned.subSequence(0, m.start()).toString();
        }
    }
    return cleaned != null ? (cleaned.length() > 100 ? cleaned.substring(0, 100) : cleaned) : (input.length() > 100 ? input.substring(0, 100) : input);
}
项目:TeamNote    文件:QualityUtilImpl.java   
public int checkTextContent(int userId, String content) throws IOException {
    HashSet<String> sensitiveWords = new HashSet<String>();
    InputStream fis = new FileInputStream(source);
    InputStreamReader isr = new InputStreamReader(fis, Charset.forName("UTF-8"));
    BufferedReader br = new BufferedReader(isr);
    String line;
    while ((line = br.readLine()) != null)
        sensitiveWords.add(line.substring(0, line.length() - 1));


    Result result = ToAnalysis.parse(Jsoup.clean(content, Whitelist.none()));
    List<Term> termList = result.getTerms();
    for (Term term : termList) {
        if (sensitiveWords.contains(term.getName()))
            return 0;
    }
    return 1;
}
项目:yadaframework    文件:YadaWebUtil.java   
/**
 * Cleans the html content leaving only the following tags: b, em, i, strong, u, br, cite, em, i, p, strong, img, li, ul, ol, sup, sub, s
 * @param content html content
 * @param extraTags any other tags that you may want to keep, e. g. "a"
 * @return
 */
public String cleanContent(String content, String ... extraTags) {
    Whitelist allowedTags = Whitelist.simpleText(); // This whitelist allows only simple text formatting: b, em, i, strong, u. All other HTML (tags and attributes) will be removed.
    allowedTags.addTags("br", "cite", "em", "i", "p", "strong", "img", "li", "ul", "ol", "sup", "sub", "s");
    allowedTags.addTags(extraTags);
    allowedTags.addAttributes("p", "style"); // Serve per l'allineamento a destra e sinistra
    allowedTags.addAttributes("img", "src", "style", "class"); 
    if (Arrays.asList(extraTags).contains("a")) {
        allowedTags.addAttributes("a", "href", "target"); 
    }
    Document dirty = Jsoup.parseBodyFragment(content, "");
    Cleaner cleaner = new Cleaner(allowedTags);
    Document clean = cleaner.clean(dirty);
    clean.outputSettings().escapeMode(EscapeMode.xhtml); // Non fa l'escape dei caratteri utf-8
    String safe = clean.body().html();
    return safe;
}
项目:SOCVFinder    文件:ChatRoom.java   
public String getUnkownCommandResponse(String message,String userName) {
    if (chatSession==null){
        return "Sorry I did not recognize your command and the AI functions are disabled";
    }
    MagicStrings.default_Customer_id = userName;
    String msg = chatSession.multisentenceRespond(message);

    if (msg == null || (msg.toLowerCase().contains("google") || msg.contains("<search>"))) {
        return "Sorry, I do not know";
    }

    if (msg.length() > 250 && !msg.contains("\n")) {
        msg = "Well\n" + Jsoup.clean(msg, Whitelist.basic());
    }
    return msg.replaceAll("<br/>", "\n");

}
项目:Quoter-Android    文件:QODFragment.java   
private void parseQodResponse(JSONObject response) throws JSONException {
    JSONObject parse = response.getJSONObject("parse");
    JSONObject text = parse.getJSONObject("text");
    String content = text.getString("*");

    Document doc = Jsoup.parse(content);
    Elements table = doc.select("table[style=\"text-align:center; width:100%\"]");
    Elements rows = table.select("tr");
    Elements qodTd = rows.get(0).select("td");
    Elements author = rows.get(1).select("td");
    Whitelist whitelist = Whitelist.none();

    String newQuote = Html.fromHtml(Jsoup.clean(qodTd.toString(), whitelist)).toString();
    String newAuthor = Html.fromHtml(Jsoup.clean(author.toString(), whitelist).replace("~", "")).toString();

    Quote qod = sharedPrefStorage.getQod();

    if (!qod.getQuoteText().equals(newQuote) || !qod.getQuoteAuthor().equals(newAuthor)) {
        Snackbar.make(binding.coordinatorLayout, getString(R.string.str_Refreshing), Snackbar.LENGTH_SHORT).show();
    }

    sharedPrefStorage.setQodText(newQuote);
    sharedPrefStorage.setQodAuthor(newAuthor);

    setQuoteOfTheDayTextAndAuthor(qod);
}
项目:lucene4ir    文件:TRECAquaintDocumentIndexer.java   
public TRECAquaintDocumentIndexer(String indexPath, String tokenFilterFile, boolean positional){
    super(indexPath, tokenFilterFile, positional);

    try {
        whiteList = Whitelist.relaxed();
        whiteList.addTags("docno");
        whiteList.addTags("doc");
        whiteList.addTags("headline");
        whiteList.addTags("text");
        whiteList.addTags("date_time");
        whiteList.addTags("slug");
    } catch (Exception e){
        System.out.println(" caught a " + e.getClass() +
                "\n with message: " + e.getMessage());
    }

    doc = new Document();
    initFields();
    initAQUAINTDoc();
}
项目:shoucang    文件:ReplyService.java   
public Reply createReply(ReplyDTO replyDTO, User user) {
    replyDTO.setUserId(user.getId());
    Reply reply = replyDTO.toReply();

    String content = Jsoup.clean(reply.getContent(), Whitelist.basicWithImages());
    content = updateAtUser(content);

    reply.setContent(content);
    reply.setStatus(ReplyStatus.ACTIVE);

    Reply result = replyRepository.save(reply);

    reply.setUser(user);

    afterCreatingReply(reply);
    return result;
}
项目:mesfavoris    文件:JavadocCommentProvider.java   
private String getJavadocCommentAsText(IMember member) {
    try (Reader reader = JavadocContentAccess.getHTMLContentReader(member, true, true)) {
        if (reader == null) {
            return null;
        }
        String javadocAsHtml = CharStreams.toString(reader);
        String javadocAsString = Jsoup.clean(javadocAsHtml, "", Whitelist.none(), new OutputSettings().prettyPrint(false));

        // trim lines
        try (BufferedReader bufferedReader = new BufferedReader(new StringReader(javadocAsString))) {
            return bufferedReader.lines().map(line->line.trim()).collect(Collectors.joining("\n"));
        }
    } catch (JavaModelException | IOException e) {
        return null;
    }
}
项目:Portofino    文件:AbstractCrudAction.java   
/**
 * Writes the contents of the create or edit form into the persistent object.
 * Assumes that the form has already been validated.
 * Also processes rich-text (HTML) fields by cleaning the submitted HTML according
 * to the {@link #getWhitelist() whitelist}.
 */
protected void writeFormToObject() {
    form.writeToObject(object);
    for(TextField textField : FormUtil.collectEditableRichTextFields(form)) {
        //TODO in bulk edit mode, the field should be skipped altogether if the checkbox is not checked.
        PropertyAccessor propertyAccessor = textField.getPropertyAccessor();
        String stringValue = (String) propertyAccessor.get(object);
        String cleanText;
        try {
            Whitelist whitelist = getWhitelist();
            cleanText = Jsoup.clean(stringValue, whitelist);
        } catch (Throwable t) {
            logger.error("Could not clean HTML, falling back to escaped text", t);
            cleanText = StringEscapeUtils.escapeHtml(stringValue);
        }
        propertyAccessor.set(object, cleanText);
    }
}
项目:News    文件:LibertyTimes.java   
protected String cleaner(String rs) {

        rs = rs.replace(" 廣告","");
        rs = rs.replace("data-original=","src=");
        //rs = rs.replace("<span>","<p>");
        //rs = rs.replace("</span>","</p>");
        rs = rs.replace("相關新聞", "<!--");

        Whitelist wlist = new Whitelist();

        wlist.addTags("p", "span");
        wlist.addTags("table","tbody","tr","td");
        wlist.addTags("img").addAttributes("img", "src");

        return Jsoup.clean(rs, wlist);

    }
项目:News    文件:OrientalDaily.java   
protected String cleaner(String rs) {

        rs = rs.replace("src=\"/cnt", "src=\"http://orientaldaily.on.cc/cnt");
        rs = rs.replace("<h3>","<p><b>");
        rs = rs.replace("</h3>","</b></p>");
        rs = rs.replace("<!--AD-->","<!--");
        rs = rs.replace("<div id=\"articleNav\">","<!--");

        Whitelist wlist = new Whitelist();

        wlist.addTags("p","b");
        //wlist.addTags("table","tbody","tr","td");
        wlist.addTags("img").addAttributes("img", "src");

        return Jsoup.clean(rs, wlist);

    }
项目:News    文件:HKAppleDaily.java   
protected String cleaner(String rs) {
/*
        rs = rs.replace("<h2>","<p>");
        rs = rs.replace("</h2>","</p>");
        rs = rs.replace("#video_player{width:100%; height:100%;}","");


        rs = rs.replace("<h1>","<!--");
        rs = rs.replace("</h1>","-->");
*/
        rs = rs.replace("https://staticlayout.appledaily.hk/web_images/layout/art_end.gif","");

        Whitelist wlist = new Whitelist();

        wlist.addTags("p");
        wlist.addTags("table","tbody","tr","td");
        wlist.addTags("img").addAttributes("img", "src");

        return Jsoup.clean(rs, wlist);

    }
项目:appverse-server    文件:JSONHtmlXssSerializer.java   
public String stripXSS( String value )
{
    if( value != null )
    {
        System.out.println("STRIP XSS -> ["+value+"]");
        // Use the ESAPI library to avoid encoded attacks.
        value = ESAPI.encoder().canonicalize( value );

        // Avoid null characters
        value = value.replaceAll("\0", "");

        // Clean out HTML
        value = Jsoup.clean(value, Whitelist.none());
        System.out.println("STRIPED XSS -> ["+value+"]");
    }
    return value;
}
项目:appverse-server    文件:JSONStringXSSDeserializer.java   
/**
 * This method removes all html markup from the supplied string.
 * @param value The string containing possible html tags.
 * @return The string without html tags.
 */
private String stripXSS( String value )
{
    if( value != null )
    {
      //  System.out.println("STRIP XSS -> ["+value+"]");
        // Use the ESAPI library to avoid encoded attacks.
        value = ESAPI.encoder().canonicalize( value );
        //ESAPI.encoder().encodeForHTML()

        // Avoid null characters
        value = value.replaceAll("\0", "");

        // Clean out HTML
        //This clean, removes all html tags. so instead of &lt;script&gt;, it simple removes the <script> tag.
        value = Jsoup.clean(value, Whitelist.none());
        //System.out.println("STRIPED XSS -> ["+value+"]");
    }
    return value;
}
项目:expper    文件:ReplyService.java   
public Reply createReply(ReplyDTO replyDTO, User user) {
    replyDTO.setUserId(user.getId());
    Reply reply = replyDTO.toReply();

    String content = Jsoup.clean(reply.getContent(), Whitelist.basicWithImages());
    content = updateAtUser(content);

    reply.setContent(content);
    reply.setStatus(ReplyStatus.ACTIVE);

    Reply result = replyRepository.save(reply);

    reply.setUser(user);

    afterCreatingReply(reply);
    return result;
}
项目:manydesigns.cn    文件:AbstractCrudAction.java   
/**
 * Writes the contents of the create or edit form into the persistent object.
 * Assumes that the form has already been validated.
 * Also processes rich-text (HTML) fields by cleaning the submitted HTML according
 * to the {@link #getWhitelist() whitelist}.
 */
protected void writeFormToObject() {
    form.writeToObject(object);
    for(TextField textField : getEditableRichTextFields()) {
        PropertyAccessor propertyAccessor = textField.getPropertyAccessor();
        String stringValue = textField.getStringValue();
        String cleanText;
        try {
            Whitelist whitelist = getWhitelist();
            cleanText = Jsoup.clean(stringValue, whitelist);
        } catch (Throwable t) {
            logger.error("Could not clean HTML, falling back to escaped text", t);
            cleanText = StringEscapeUtils.escapeHtml(stringValue);
        }
        propertyAccessor.set(object, cleanText);
    }
}
项目:android-opensource-library-56    文件:SanitizeActivity.java   
@Override
protected void onCreate(Bundle savedInstanceState) {
    super.onCreate(savedInstanceState);
    setContentView(R.layout.activity_sanitize);

    final EditText inputText = (EditText) findViewById(R.id.input_text);
    inputText
            .setText("<p><a href='http://example.com/' onclick='doAttack()'>Link</a></p>");
    final EditText sanitizedText = (EditText) findViewById(R.id.sanitized_text);
    findViewById(R.id.sanitize_button).setOnClickListener(
            new OnClickListener() {
                @Override
                public void onClick(View v) {
                    String sanitized = Jsoup.clean(inputText.getText()
                            .toString(), Whitelist.basic());
                    sanitizedText.setText(sanitized);
                }
            });
}
项目:sipsoup    文件:JsoupParseTest.java   
public static void main(String[] args) {
    Document test = Jsoup.parse("test");
    System.out.println(test);

    //没用
    boolean test1 = Jsoup.isValid("test", Whitelist.none());
    System.out.println(test1);

    Document document = Jsoup.parse(null);
    System.out.println(document);
}
项目:q-mail    文件:HtmlSanitizer.java   
HtmlSanitizer() {
    Whitelist whitelist = Whitelist.relaxed()
            .addTags("font")
            .addAttributes("table", "align", "bgcolor", "border", "cellpadding", "cellspacing", "width")
            .addAttributes(":all", "class", "style", "id")
            .addProtocols("img", "src", "http", "https", "cid", "data");

    cleaner = new Cleaner(whitelist);
    headCleaner = new HeadCleaner();
}
项目:ripme    文件:FuraffinityRipper.java   
public String getDescription(String page) {
    try {
        // Fetch the image page
        Response resp = Http.url(page)
                .referrer(this.url)
                .response();
        cookies.putAll(resp.cookies());

        // Try to find the description
        Elements els = resp.parse().select("td[class=alt1][width=\"70%\"]");
        if (els.size() == 0) {
            logger.debug("No description at " + page);
            throw new IOException("No description found");
        }
        logger.debug("Description found!");
        Document documentz = resp.parse();
        Element ele = documentz.select("td[class=alt1][width=\"70%\"]").get(0); // This is where the description is.
        // Would break completely if FurAffinity changed site layout.
        documentz.outputSettings(new Document.OutputSettings().prettyPrint(false));
        ele.select("br").append("\\n");
        ele.select("p").prepend("\\n\\n");
        logger.debug("Returning description at " + page);
        String tempPage = Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false));
        return documentz.select("meta[property=og:title]").attr("content") + "\n" + tempPage; // Overridden saveText takes first line and makes it the file name.
    } catch (IOException ioe) {
        logger.info("Failed to get description " + page + " : '" + ioe.getMessage() + "'");
        return null;
    }
}
项目:Equella    文件:FlickrSearchResultsSection.java   
public String stripHtmlFrom(String original)

    {
        // If there's no content, return to sender unopened
        if( Check.isEmpty(original) )
        {
            return original;
        }

        String cleaned = Jsoup.clean(original, Whitelist.simpleText());

        return cleaned;
    }
项目:shoucang    文件:PostService.java   
@Transactional
@Timed
@Caching(evict = {
    @CacheEvict(value = TagService.CACHE_COUNT_USER, key = "#postDTO.userId.toString().concat('_posts_count')"),
    @CacheEvict(value = CACHE_COUNT_USER_TAG_POSTS, key = "#postDTO.userId.toString().concat('_tags_posts_count')", allEntries = true),
})
public Optional<Post> createPost(PostDTO postDTO) throws JSONException {
    Post post = postMapper.postDTOToPost(postDTO);
    String result = getWebPost(String.format(SERVER_URL, post.getUrl()));

    if (result == null)
        return Optional.empty();

    JSONObject json = new JSONObject(result);
    String content = json.getString("content");

    // Filter html tags
    content = Jsoup.clean(content, Whitelist.relaxed());

    post.setTitle(json.getString("title"));
    post.setTitle(post.getTitle().substring(0, Math.min(255, post.getTitle().length())));
    post.setContent(content);
    post.setDomain(json.getString("host"));

    updateTags(post, null);
    saveNewPost(post);

    return Optional.of(post);
}
项目:shoucang    文件:RabbitConsumer.java   
/**
 * Get full text of a post
 */
public void getArticle(Post post) {
    log.debug("Handle crawling article full text from source site, id=" + post.getId() + " , url=" + post.getUrl());

    try {
        String result = postService.getWebPost(String.format(PostService.SERVER_URL, post.getUrl()));
        if (result == null) {
            log.error("Failed to get article full text, id=" + post.getId());
            return;
        }

        Post resultPost = postRepository.findOne(post.getId());
        if (resultPost == null) {
            log.warn("Cancel crawling article full text of post id=" + post.getId() + ", because the post does not exist.");
            return;
        }

        JSONObject json = new JSONObject(result);
        String content = json.getString("content");
        content = Jsoup.clean(content, Whitelist.relaxed());
        resultPost.setContent(content);

        postService.saveNewPost(resultPost);
    } catch (Exception e) {
        log.error("Failed to resolve article full text, id=" + post.getId() + ", url=" + post.getUrl() + ", exception: " + e.getMessage());
    }
}
项目:gitplex-mit    文件:DefaultMarkdownManager.java   
@Inject
public DefaultMarkdownManager(Set<Extension> contributedExtensions, Set<HtmlTransformer> htmlTransformers) {
    this.contributedExtensions = contributedExtensions;
    this.htmlTransformers = htmlTransformers;

    whiteList = new Whitelist() {

        @Override
        protected boolean isSafeAttribute(String tagName, Element el, Attribute attr) {
            if (attr.getKey().startsWith("data-"))
                return true;
            else
                return super.isSafeAttribute(tagName, el, attr);
        }

    };

    whiteList.addTags(SAFE_TAGS)
            .addAttributes("a", "href", "title")
            .addAttributes("img", "align", "alt", "height", "src", "title", "width")
            .addAttributes("div", "itemscope", "itemtype")
            .addAttributes(":all", SAFE_ATTRIBUTES)
            .addProtocols("a", "href", SAFE_ANCHOR_SCHEMES)
            .addProtocols("blockquote", "cite", "http", "https")
            .addProtocols("cite", "cite", "http", "https")
            .addProtocols("img", "src", "http", "https")
            .addProtocols("q", "cite", "http", "https")
            .preserveRelativeLinks(true);
}
项目:etomica    文件:SimClassInfo.java   
public static SimClassInfo forClass(Class<?> cls) {

        Optional<ClassJavadoc> javadoc = RuntimeJavadoc.getJavadoc(cls);
        String comment = javadoc.map(ClassJavadoc::getComment).map(Comment::toString).orElse("");
        String sanitizedComment = Jsoup.clean(comment, Whitelist.basic());
        return new SimClassInfo(cls.getCanonicalName(), sanitizedComment);
    }
项目:matrix-appservice-email    文件:GmailClientFormatter.java   
@Override
protected String formatHtml(String content) {
    Element body = Jsoup.parse(content).body();
    Element contentDiv = body.select("div[dir='ltr']").first();
    if (contentDiv == null) {
        log.warn("Found no valid content in e-mail from Gmail, returning empty");
        return "";
    }

    while (contentDiv.children().size() > 0 && contentDiv.children().last().is("br")) {
        contentDiv.children().last().remove();
    }

    return Jsoup.clean(contentDiv.html(), Whitelist.basic());
}
项目:matrix-appservice-email    文件:ThunderbirdClientFormatter.java   
@Override
protected String formatHtml(String content) {
    Element body = Jsoup.parse(content).body();
    body.select("blockquote[cite]").remove();
    body.select("div.moz-cite-prefix").remove();

    while (body.children().size() > 0 && body.children().last().is("br")) {
        body.children().last().remove();
    }

    return Jsoup.clean(body.html(), Whitelist.basic());
}
项目:ugc-bot-redux    文件:AdminPanelService.java   
@Retryable(backoff = @Backoff(2000L))
public String getServerConsole(String subId) throws IOException {
    rateLimiter.acquire();
    String bodyHtml = validateSessionAndGet(Jsoup.connect(SUB_URL)
        .userAgent(Constants.USER_AGENT)
        .data("view", "console_log")
        .data("SUBID", subId)
        .timeout(TIMEOUT)).body().toString();
    return Jsoup.clean(bodyHtml, "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false));
}
项目:Peking-University-Open-Research-Data-Platform    文件:MarkupChecker.java   
/**
 * Wrapper around Jsoup clean method with the basic White list
 *   http://jsoup.org/cookbook/cleaning-html/whitelist-sanitizer
 * @param unsafe
 * @return 
 */
public static String sanitizeBasicHTML(String unsafe){

    if (unsafe == null){
        return null;
    }
    // basic includes: a, b, blockquote, br, cite, code, dd, dl, dt, em, i, li, ol, p, pre, q, small, span, strike, strong, sub, sup, u, ul
    //Whitelist wl = Whitelist.basic().addTags("img", "h1", "h2", "h3", "kbd", "hr", "s", "del");  

    Whitelist wl = Whitelist.basicWithImages().addTags( "h1", "h2", "h3", "kbd", "hr", "s", "del","map","area").addAttributes("img", "usemap")
            .addAttributes("map", "name").addAttributes("area", "shape","coords","href","title","alt");  

    return Jsoup.clean(unsafe, wl);

}
项目:fiware-sinfonier    文件:Client.java   
private String sanitise(String str) {
  if (str == null)
    return "";

  String _str = Jsoup.clean(str, Whitelist.basic());
  String separator = System.getProperty("line.separator");

  if ((!_str.contains("\n") || !_str.contains(separator)) && StringUtils.split(_str).length > MAX_WORDS_PER_LINE) {
    String[] words = StringUtils.split(_str);
    int counterPerLine = 0;
    _str = "";

    for (int i = 0; i < words.length; i++) {
      if (counterPerLine > MAX_WORDS_PER_LINE) {
        _str += separator;
        counterPerLine = 0;
      }

      _str += words[i] + " ";
      counterPerLine++;

      if (i == words.length - 1) {
        _str += separator;
        break;
      }
    }
  }

  return _str;
}
项目:interview-preparation    文件:App.java   
public static void main( String[] args )
  {
// load html from file
Document doc = loadHtmlFromFile("index.html", "utf-8");

// just leave if doc is null
if(doc == null) {           
    LogUtils.d(CLS_NAME, "main", "document is null");
    return;
}

/* the dirty html */
System.out.println("===BEFORE===");
System.out.println(doc.html());

/* create and config whitelist */
Whitelist allowList = Whitelist.relaxed();
allowList
    .addTags("meta", "title", "script", "iframe")
    .addAttributes("meta", "charset")
    .addAttributes("iframe", "src")
    .addProtocols("iframe", "src", "http", "https");

/* clean the dirty doc */
Cleaner cleaner = new Cleaner(allowList);
Document newDoc = cleaner.clean(doc);

/* the clean one */
System.out.println("===AFTER===");
System.out.println(newDoc.html());
  }
项目:GoogleIndexRetriever    文件:GoogleSearch.java   
/**
 * Strips any potential XSS threats out of the value
 * @param value
 * @return
 */
public String stripXSS( String value )
{
    if( value == null )
        return null;

    // Avoid null characters
    value = value.replaceAll("\0", "");

    // Clean out HTML
    value = Jsoup.clean( value, Whitelist.none() );

    return value;
}
项目:imageboard    文件:BelchanBot.java   
private void sendPostsInChat(Long chatId, List<Post> postsAfter) {
    StringBuilder sb = new StringBuilder();
    postsAfter.forEach(post -> {
        Board board = boardService.getBoard(post.getBoardid());
        sb.append("\nBoard is /");
        sb.append(board.getName());
        sb.append("/ ");
        sb.append(board.getDesc());
        sb.append("\nPOST № ");
        sb.append(post.getId());
        sb.append("\nSubject  ");
        sb.append(post.getSubject());
        sb.append("\nMessage ");
        sb.append(Jsoup.clean(post.getMessage(), Whitelist.simpleText()));
        sb.append("\nURL : http://belchan.org/");
        sb.append(board.getName());
        sb.append("/res/");
        int id = post.getParentid();
        if (id == 0) {
            id = post.getId();
        }
        sb.append(id);
        sb.append(".html\n\n");

    });
    sendMessage(chatId, sb.toString());
}
项目:bpm    文件:SourceSpigotMc.java   
protected static String br2nl(String html) {
    if (html == null)
        return html;
    Document document = Jsoup.parse(html);
    document.outputSettings(new Document.OutputSettings().prettyPrint(false));
    document.select("br").append("\\n");
    document.select("p").prepend("\\n\\n");
    String s = document.html().replaceAll("\\\\n", "\n");
    return Jsoup.clean(s, "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false));
}
项目:base    文件:StringUtil.java   
/**
 * ***********************************************************************************
 * Checks if Input contains any HTML or CSS tags
 *
 * @param str input String
 *
 * @return Validated String
 */
public static boolean containsHtml( String str )
{
    if ( Strings.isNullOrEmpty( str ) )
    {
        return false;
    }
    else
    {
        return Jsoup.isValid( str, Whitelist.none() ) ? false : true;
    }
}
项目:shoucang    文件:PostService.java   
@Transactional
@Timed
@Caching(evict = {
    @CacheEvict(value = TagService.CACHE_COUNT_USER, key = "#postDTO.userId.toString().concat('_posts_count')"),
    @CacheEvict(value = CACHE_COUNT_USER_TAG_POSTS, key = "#postDTO.userId.toString().concat('_tags_posts_count')", allEntries = true),
})
public Optional<Post> createPost(PostDTO postDTO) throws JSONException {
    Post post = postMapper.postDTOToPost(postDTO);
    String result = getWebPost(String.format(SERVER_URL, post.getUrl()));

    if (result == null)
        return Optional.empty();

    JSONObject json = new JSONObject(result);
    String content = json.getString("content");

    // Filter html tags
    content = Jsoup.clean(content, Whitelist.relaxed());

    post.setTitle(json.getString("title"));
    post.setTitle(post.getTitle().substring(0, Math.min(255, post.getTitle().length())));
    post.setContent(content);
    post.setDomain(json.getString("host"));

    updateTags(post, null);
    saveNewPost(post);

    return Optional.of(post);
}
项目:shoucang    文件:RabbitConsumer.java   
/**
 * Get full text of a post
 */
public void getArticle(Post post) {
    log.debug("Handle crawling article full text from source site, id=" + post.getId() + " , url=" + post.getUrl());

    try {
        String result = postService.getWebPost(String.format(PostService.SERVER_URL, post.getUrl()));
        if (result == null) {
            log.error("Failed to get article full text, id=" + post.getId());
            return;
        }

        Post resultPost = postRepository.findOne(post.getId());
        if (resultPost == null) {
            log.warn("Cancel crawling article full text of post id=" + post.getId() + ", because the post does not exist.");
            return;
        }

        JSONObject json = new JSONObject(result);
        String content = json.getString("content");
        content = Jsoup.clean(content, Whitelist.relaxed());
        resultPost.setContent(content);

        postService.saveNewPost(resultPost);
    } catch (Exception e) {
        log.error("Failed to resolve article full text, id=" + post.getId() + ", url=" + post.getUrl() + ", exception: " + e.getMessage());
    }
}