Java 类org.apache.commons.io.input.BOMInputStream 实例源码

项目:Gargoyle    文件:XmlFileReadModel.java   
@Override
public List<String> readLines(URL url) throws IOException {

    ByteArrayOutputStream out = new ByteArrayOutputStream();

    try (InputStream in = new BOMInputStream(url.openStream());) {

        int tmp = -1;
        while ((tmp = in.read()) != -1) {
            out.write(tmp);
        }
    }

    String string = out.toString();
    LOGGER.debug(string);
    XMLDiffFormatter xmlFormatter = new XMLDiffFormatter();
    String format = xmlFormatter.format(string);

    return Stream.of(format.split("\n")).collect(Collectors.toList());
}
项目:DigitalMediaServer    文件:PlaylistFolder.java   
private BufferedReader getBufferedReader() throws IOException {
    String extension;
    Charset charset;
    if (FileUtil.isUrl(uri)) {
        extension = FileUtil.getUrlExtension(uri).toLowerCase(PMS.getLocale());
    } else {
        extension = FileUtil.getExtension(uri).toLowerCase(PMS.getLocale());
    }
    if (extension != null && (extension.equals("m3u8") || extension.equals(".cue"))) {
        charset = StandardCharsets.UTF_8;
    } else {
        charset = StandardCharsets.ISO_8859_1;
    }
    if (FileUtil.isUrl(uri)) {
        return new BufferedReader(new InputStreamReader(new BOMInputStream(new URL(uri).openStream()), charset));
    } else {
        File playlistfile = new File(uri);
        if (playlistfile.length() < 10000000) {
            return new BufferedReader(new InputStreamReader(new BOMInputStream(new FileInputStream(playlistfile)), charset));
        }
    }
    return null;
}
项目:mojito    文件:CommandHelper.java   
/**
 * Get content from {@link java.nio.file.Path} using UTF8
 *
 * @param path
 * @return
 * @throws CommandException
 */
public String getFileContent(Path path) throws CommandException {
    try {
        File file = path.toFile();
        BOMInputStream inputStream = new BOMInputStream(FileUtils.openInputStream(file), false, boms);
        String fileContent;
        if (inputStream.hasBOM()) {
            fileContent = IOUtils.toString(inputStream, inputStream.getBOMCharsetName());
        } else {
            fileContent = IOUtils.toString(inputStream, StandardCharsets.UTF_8);
        }
        return fileContent;
    } catch (IOException e) {
        throw new CommandException("Cannot get file content for path: " + path.toString(), e);
    }
}
项目:dss    文件:TestBOM.java   
@Test
public void test() throws IOException {
    ApacheCommonsUtils acu = new ApacheCommonsUtils();

    FileInputStream fis = new FileInputStream(new File("src/test/resources/lotl_utf-8-sansbom.xml"));
    FileInputStream fisBom = new FileInputStream(new File("src/test/resources/lotl_utf-8.xml"));

    assertNotEquals(acu.toBase64(acu.toByteArray(fis)), acu.toBase64(acu.toByteArray(fisBom)));

    fis = new FileInputStream(new File("src/test/resources/lotl_utf-8-sansbom.xml"));
    fisBom = new FileInputStream(new File("src/test/resources/lotl_utf-8.xml"));

    BOMInputStream bomIS = new BOMInputStream(fis);
    BOMInputStream bomISSkipped = new BOMInputStream(fisBom);

    assertEquals(acu.toBase64(acu.toByteArray(bomIS)), acu.toBase64(acu.toByteArray(bomISSkipped)));
}
项目:crawler-commons    文件:SiteMapParser.java   
/**
 * Process a text-based Sitemap. Text sitemaps only list URLs but no
 * priorities, last mods, etc.
 *
 * @param sitemapUrl
 *            URL to sitemap file
 * @param stream
 *            content stream
 * @return The site map
 * @throws IOException
 *             if there is an error reading in the site map content
 */
protected SiteMap processText(URL sitemapUrl, InputStream stream) throws IOException {
    LOG.debug("Processing textual Sitemap");

    SiteMap textSiteMap = new SiteMap(sitemapUrl);
    textSiteMap.setType(SitemapType.TEXT);

    BOMInputStream bomIs = new BOMInputStream(stream);
    @SuppressWarnings("resource")
    BufferedReader reader = new BufferedReader(new InputStreamReader(bomIs, UTF_8));

    String line;
    int i = 1;
    while ((line = reader.readLine()) != null) {
        if (line.length() > 0 && i <= MAX_URLS) {
            addUrlIntoSitemap(line, textSiteMap, null, null, null, i++);
        }
    }
    textSiteMap.setProcessed(true);

    return textSiteMap;
}
项目:crawler-commons    文件:SiteMapParser.java   
/**
 * Decompress the gzipped content and process the resulting XML Sitemap.
 * 
 * @param url
 *            - URL of the gzipped content
 * @param response
 *            - Gzipped content
 * @return the site map
 * @throws UnknownFormatException
 *             if there is an error parsing the gzip
 * @throws IOException
 *             if there is an error reading in the gzip {@link java.net.URL}
 */
protected AbstractSiteMap processGzippedXML(URL url, byte[] response) throws IOException, UnknownFormatException {

    LOG.debug("Processing gzipped XML");

    InputStream is = new ByteArrayInputStream(response);

    // Remove .gz ending
    String xmlUrl = url.toString().replaceFirst("\\.gz$", "");
    LOG.debug("XML url = {}", xmlUrl);

    BOMInputStream decompressed = new BOMInputStream(new GZIPInputStream(is));
    InputSource in = new InputSource(decompressed);
    in.setSystemId(xmlUrl);
    return processXml(url, in);
}
项目:easyjasub    文件:InputTextSubFile.java   
public InputTextSubFile(SubtitleFileType inputFormat, String fileName,
        InputStream is) throws InputTextSubException, IOException {
    try {
        tto = createFormat(inputFormat).parseFile(fileName,
                new BOMInputStream(is));
    } catch (FatalParsingException ex) {
        throw new InputTextSubException(
                "Parse error returned by subtitle read library", ex);
    }
    captions = new ArrayList<InputSubtitleLine>(tto.captions.size());
    for (Caption caption : tto.captions.values()) {
        InputSubtitleLine line = new InputSubtitleLine();
        line.setContent(caption.content);
        line.setStartTime(new SubtitleFileTimeWrapper(caption.start)
                .getMSeconds());
        line.setEndTime(new SubtitleFileTimeWrapper(caption.end)
                .getMSeconds());
        captions.add(line);
    }
}
项目:org.fastnate    文件:AbstractCsvReader.java   
/**
 * Opens a CSV file.
 *
 * If the given file ends with "gz", then the file is decompressed before using a {@link GZIPInputStream}.
 *
 * @param importFile
 *            the csv file
 * @return a list reader
 * @throws IOException
 *             on io exception
 */
@SuppressWarnings("resource")
protected CsvListReader openCsvListReader(final File importFile) throws IOException {
    // Open file
    InputStream fileStream = new FileInputStream(importFile);

    // Check for compressed file
    if (importFile.getName().toLowerCase().endsWith(".gz")) {
        fileStream = new GZIPInputStream(fileStream);
    }

    // Guess the encoding
    final BOMInputStream inputStream = new BOMInputStream(fileStream, false, ByteOrderMark.UTF_8,
            ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE);
    final String charset;
    if (inputStream.hasBOM()) {
        charset = inputStream.getBOMCharsetName();
        log.info("BOM detected. Using {} as encoding", charset);
    } else {
        charset = getDefaultEncoding().toString();
        log.info("No BOM detected. Assuming {} as encoding", charset);
    }
    final Reader reader = new InputStreamReader(inputStream, charset);
    return new CsvListReader(reader, new CsvPreference.Builder(CsvPreference.EXCEL_NORTH_EUROPE_PREFERENCE)
            .skipComments(new CommentMatches("(//|/\\*|#|;).*")).build());
}
项目:dclib    文件:ConverterService.java   
/**
 * Simple invocation. Load template and data from a file, run process
 * and return memory model containing results or null if there was a problem.
 * Problems/progress reporting live to given reporter
 * @param templateFile the name of the template file to use
 * @param dataFile  the name of the data file to process
 * @param report the message reporter
 * @param debug set to true to enable voluminous debug message
 * @param allowNullRows set to true to allow output even if some rows don't match
 * @throws IOException 
 */
public Model simpleConvert(String templateFile, String dataFile, ProgressMonitorReporter reporter, boolean debug, boolean allowNullRows) throws IOException {
    Template template = TemplateFactory.templateFrom(templateFile, dc);

    File dataFileF = new File(dataFile);
    String filename = dataFileF.getName();
    String filebasename = NameUtils.removeExtension(filename);
    put(ConverterProcess.FILE_NAME, filename);
    put(ConverterProcess.FILE_BASE_NAME, filebasename);
    InputStream is = new BOMInputStream( new FileInputStream(dataFileF) );

    ConverterProcess process = new ConverterProcess(dc, is);
    process.setDebug(debug);
    process.setTemplate( template );
    process.setMessageReporter( reporter );
    process.setAllowNullRows(allowNullRows);
    boolean ok = process.process();

    return ok ?  process.getModel() : null;
}
项目:rosa    文件:JsonldJenaUtils.java   
/**
 * Generate a single Jena model from several different files, output it to 
 * specified OutputStream
 * @param aggr String[] String array containing all relevant RDF files "name.extension"
 * @param out OutputStream
 * @param type an instance of ScDemoFile class
 * @throws IOException
 */
public static Model generateAggregateModel(String[] aggr, String lang) 
        throws IOException {

    Model model = ModelFactory.createDefaultModel();
    Model subModel = ModelFactory.createDefaultModel();

    for (int i=0; i<aggr.length; i++) {
        InputStream in = (JsonldJenaUtils.class).getClassLoader().getResourceAsStream(
                aggr[i]);
        BOMInputStream bIn = new BOMInputStream(in, false);

        subModel.read(bIn, null, lang);
        model = model.add(subModel);

        subModel.removeAll();
        bIn.close();
        in.close();
    }

    return model;

}
项目:elasticsearch-river-remote    文件:SiteMapParser.java   
/**
 * Decompress the gzipped content and process the resulting XML Sitemap.
 * 
 * @param url - URL of the gzipped content
 * @param response - Gzipped content
 * @throws MalformedURLException
 * @throws IOException
 * @throws UnknownFormatException
 */
private AbstractSiteMap processGzip(URL url, byte[] response) throws MalformedURLException, IOException,
        UnknownFormatException {

    logger.debug("Processing gzip");

    AbstractSiteMap smi;

    InputStream is = new ByteArrayInputStream(response);

    // Remove .gz ending
    String xmlUrl = url.toString().replaceFirst("\\.gz$", "");

    logger.debug("XML url = " + xmlUrl);

    BOMInputStream decompressed = new BOMInputStream(new GZIPInputStream(is));
    InputSource in = new InputSource(decompressed);
    in.setSystemId(xmlUrl);
    smi = processXml(url, in);
    decompressed.close();
    return smi;
}
项目:file-type-plugin    文件:FileType.java   
private String showByteOfMark(InputStream source) throws IOException {
  ByteOrderMark detectedBOM = new BOMInputStream(source).getBOM();
  if (detectedBOM == null) {
    return "";
  }
  String bom = detectedBOM.toString();
  FileType.logger.log(Level.INFO, "BOM: {0}", bom);
  return " w/ " + bom;
}
项目:instalint    文件:FileMetadata.java   
private static InputStream streamFile(File file) {
  try {
    return new BOMInputStream(new FileInputStream(file),
      ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE);
  } catch (FileNotFoundException e) {
    throw new IllegalStateException("File not found: " + file.getAbsolutePath(), e);
  }
}
项目:file-format-streaming-converter    文件:XlsxToCsvConverterTest.java   
private CSVParser createCsvParser(String inputFileName, String delimiter) throws IOException {
    BufferedReader reader = new BufferedReader(new InputStreamReader(new BOMInputStream(new FileInputStream(inputFileName)), Charsets.UTF_8));
    CSVFormat format = CSVFormat.newFormat(delimiter.charAt(0))
            .withSkipHeaderRecord()
            .withIgnoreEmptyLines()
            .withAllowMissingColumnNames()
            .withQuote('"')
            .withHeader();
    return new CSVParser(reader, format);
}
项目:jijimaku    文件:SubtitleFile.java   
public SubtitleFile(String fileName, String fileContents, String stylesStr) throws IOException, FatalParsingException {
  LOGGER.debug("Parsing subtitle file {}", fileName);

  TimedTextFileFormat timedTextFormat;
  switch (FilenameUtils.getExtension(fileName)) {
    case "ass":
      timedTextFormat = new FormatASS();
      break;
    case "srt":
      timedTextFormat = new FormatSRT();
      break;
    default:
      LOGGER.error("invalid subtitle file extension file: {}", fileName);
      throw new UnexpectedError();
  }

  // Convert String to InputStream to match subtitleFile API
  byte[] byteData = fileContents.getBytes("UTF-8");
  // Must use BOMInputStream otherwise files with BOM will broke :(((
  // => http://stackoverflow.com/questions/4897876/reading-utf-8-bom-marker
  try (BOMInputStream inputStream = new BOMInputStream(new ByteArrayInputStream(byteData))) {
    timedText = timedTextFormat.parseFile(fileName, inputStream, StandardCharsets.UTF_8);
  }

  if (timedText.warnings.length() > "List of non fatal errors produced during parsing:\n\n".length()) {
    LOGGER.warn("There was some warnings during parsing. See logs.");
    LOGGER.debug("Got warnings: {}", "\n" + timedText.warnings);
  }

  styles = parseStyles(stylesStr);
  timedText.styling = styles;
  timedText.description = JIJIMAKU_SIGNATURE;
  annotationCaptions = new TreeMap<>();

  // Initialization: add jijimaku mark and set style to Default
  addJijimakuMark();
  timedText.captions.values().stream().forEach(c -> c.style = styles.get("Default"));

  captionIter = timedText.captions.entrySet().iterator();
}
项目:mojito    文件:CommandHelper.java   
/**
 * Writes the content into a file using same format as source file
 *
 * @param content content to be written
 * @param path path to the file
 * @param sourceFileMatch
 * @throws CommandException
 */
public void writeFileContent(String content, Path path, FileMatch sourceFileMatch) throws CommandException {
    try {
        File outputFile = path.toFile();
        BOMInputStream inputStream = new BOMInputStream(FileUtils.openInputStream(sourceFileMatch.getPath().toFile()), false, boms);
        if (inputStream.hasBOM()) {
            FileUtils.writeByteArrayToFile(outputFile, inputStream.getBOM().getBytes());
            FileUtils.writeByteArrayToFile(outputFile, content.getBytes(inputStream.getBOMCharsetName()), true);
        } else {
            FileUtils.writeStringToFile(outputFile, content, StandardCharsets.UTF_8);
        }
    } catch (IOException e) {
        throw new CommandException("Cannot write file content in path: " + path.toString(), e);
    }
}
项目:georocket    文件:MimeTypeUtils.java   
/**
 * Read the first bytes of the given file and try to determine the file
 * format. Read up to 100 KB before giving up.
 * @param f the file to read
 * @return the file format (or <code>null</code> if the format
 * could not be determined)
 * @throws IOException if the input stream could not be read
 */
public static String detect(File f) throws IOException {
  if (!f.exists()) {
    return null;
  }
  try (BufferedInputStream bis = new BufferedInputStream(new BOMInputStream(
      new FileInputStream(f)))) {
    return determineFileFormat(bis);
  }
}
项目:Open-Clinica-Data-Uploader    文件:UploadController.java   
private Path saveFile(MultipartFile file) throws IOException {
    // Get the filename and build the local file path
    String filename = file.getOriginalFilename();
    String directory = System.getProperty("java.io.tmpdir");
    String filepath = Paths.get(directory, filename).toString();

    // Save the file locally
    try (BufferedOutputStream stream =
                 new BufferedOutputStream(new FileOutputStream(new File(filepath)));
         BOMInputStream bis = new BOMInputStream(file.getInputStream(), false)) {
        IOUtils.copy(bis, stream);
    }
    return Paths.get(filepath);
}
项目:webz-server    文件:FileDownloaderWithBOM.java   
public FileDownloaderWithBOM(WebzInputStreamDownloader downloader, String defaultEncoding) throws IOException, WebzException {

        this.bomIn = (BOMInputStream) new BOMInputStream(downloader.getInputStream(), false, ALL_BOMS);
        this.downloader = new FileDownloader(downloader.getFileSpecific(), bomIn);
        ByteOrderMark bom = bomIn.getBOM();

        if (bom == null) {
            actualEncoding = defaultEncoding;
            actualNumberOfBytes = downloader.getFileSpecific().getNumberOfBytes();
        } else {
            actualEncoding = bom.getCharsetName();
            actualNumberOfBytes = downloader.getFileSpecific().getNumberOfBytes() - bom.length();
        }
        reader = new InputStreamReader(bomIn, actualEncoding);
    }
项目:spring-usc    文件:EncodingDetector.java   
public static InputStreamReader getInputStreamReader(File file, String encoding) throws IOException {

    FileInputStream fis = new FileInputStream(file);
    logger.debug("Reading file: " + file + " using encoding: " + encoding);
    BOMInputStream bis = new BOMInputStream(fis); //So that we can remove the BOM
    return new InputStreamReader(bis, encoding);
}
项目:dwca-io    文件:DwcMetaFiles.java   
/**
 * Read the provided meta descriptor (e.g. meta.xml) and return a {@link Archive}.
 * @param metaDescriptor
 * @throws SAXException
 * @throws IOException
 * @throws UnsupportedArchiveException
 * @return a new {@link Archive}, never null
 */
public static Archive fromMetaDescriptor(InputStream metaDescriptor) throws SAXException, IOException, UnsupportedArchiveException {
  Archive archive = new Archive();
  try (BOMInputStream bomInputStream = new BOMInputStream(metaDescriptor)) {
    SAXParser p = SAX_FACTORY.newSAXParser();
    MetaXMLSaxHandler mh = new MetaXMLSaxHandler(archive);
    p.parse(bomInputStream, mh);
  } catch (ParserConfigurationException e) {
    throw new SAXException(e);
  }
  return archive;
}
项目:digidoc4j    文件:AsicContainerParser.java   
private void extractMimeType(ZipEntry entry) {
  try {
    InputStream zipFileInputStream = getZipEntryInputStream(entry);
    BOMInputStream bomInputStream = new BOMInputStream(zipFileInputStream);
    DSSDocument document = new InMemoryDocument(bomInputStream);
    mimeType = StringUtils.trim(IOUtils.toString(getDocumentBytes(document), "UTF-8"));
    extractAsicEntry(entry, document);
  } catch (IOException e) {
    logger.error("Error parsing container mime type: " + e.getMessage());
    throw new TechnicalException("Error parsing container mime type: " + e.getMessage(), e);
  }
}
项目:srclib-java    文件:Resolver.java   
/**
 * Tries to fetch POM model from maven central for a given dependency
 * @param dependency dependency to fetch model to
 * @return POM model if found and valid
 * @throws IOException
 * @throws XmlPullParserException
 */
private static Model fetchModel(RawDependency dependency)
        throws IOException, XmlPullParserException {

    // Get the url to the POM file for this artifact
    String url = "http://central.maven.org/maven2/"
            + dependency.groupID.replace('.', '/') + '/' + dependency.artifactID + '/'
            + dependency.version + '/' + dependency.artifactID + '-' + dependency.version + ".pom";
    InputStream input = new BOMInputStream(new URL(url).openStream());

    MavenXpp3Reader xpp3Reader = new MavenXpp3Reader();
    Model model = xpp3Reader.read(input);
    input.close();
    return model;
}
项目:commons-csv    文件:CSVParserTest.java   
@Test
public void testBOMInputStream_ParserWithInputStream() throws IOException {
    try (final BOMInputStream inputStream = createBOMInputStream("CSVFileParser/bom.csv");
            final CSVParser parser = CSVParser.parse(inputStream, UTF_8, CSVFormat.EXCEL.withHeader())) {
        for (final CSVRecord record : parser) {
            final String string = record.get("Date");
            Assert.assertNotNull(string);
            // System.out.println("date: " + record.get("Date"));
        }
    }
}
项目:es6draft    文件:ChakraTest.java   
private static Charset charsetFor(BOMInputStream bis) throws IOException {
    ByteOrderMark bom = bis.getBOM();
    if (ByteOrderMark.UTF_8.equals(bom)) {
        return StandardCharsets.UTF_8;
    }
    if (ByteOrderMark.UTF_16LE.equals(bom)) {
        return StandardCharsets.UTF_16LE;
    }
    if (ByteOrderMark.UTF_16BE.equals(bom)) {
        return StandardCharsets.UTF_16BE;
    }
    return StandardCharsets.UTF_8;
}
项目:olca-modules    文件:AbstractImport.java   
public void run(File file, Seq seq, IDatabase database) throws Exception {
    this.seq = seq;
    this.database = database;
    CsvPreference pref = new CsvPreference.Builder('"', ';', "\n").build();
    try (FileInputStream fis = new FileInputStream(file);
            // exclude the byte order mark, if there is any
            BOMInputStream bom = new BOMInputStream(fis, false,
                    ByteOrderMark.UTF_8);
            InputStreamReader reader = new InputStreamReader(bom, "utf-8");
            BufferedReader buffer = new BufferedReader(reader);
            CsvListReader csvReader = new CsvListReader(buffer, pref)) {
        importFile(csvReader, database);
    }
}
项目:olca-modules    文件:Maps.java   
private static CsvListReader createReader(InputStream stream)
        throws Exception {
    CsvPreference pref = new CsvPreference.Builder('"', ';', "\n").build();
    // exclude the byte order mark, if there is any
    BOMInputStream bom = new BOMInputStream(stream, false,
            ByteOrderMark.UTF_8);
    InputStreamReader reader = new InputStreamReader(bom, "utf-8");
    BufferedReader buffer = new BufferedReader(reader);
    CsvListReader csvReader = new CsvListReader(buffer, pref);
    return csvReader;
}
项目:storm-crawler    文件:CharsetIdentification.java   
/**
 * Detects any BOMs and returns the corresponding charset
 */
private static String getCharsetFromBOM(final byte[] byteData) {
    BOMInputStream bomIn = new BOMInputStream(new ByteArrayInputStream(
            byteData));
    try {
        ByteOrderMark bom = bomIn.getBOM();
        if (bom != null) {
            return bom.getCharsetName();
        }
    } catch (IOException e) {
        return null;
    }
    return null;
}
项目:pentaho-kettle    文件:CsvInput.java   
String[] readFieldNamesFromFile( String fileName, CsvInputMeta csvInputMeta ) throws KettleException {
  String delimiter = environmentSubstitute( csvInputMeta.getDelimiter() );
  String enclosure = environmentSubstitute( csvInputMeta.getEnclosure() );
  String realEncoding = environmentSubstitute( csvInputMeta.getEncoding() );

  try ( FileObject fileObject = KettleVFS.getFileObject( fileName, getTransMeta() );
      BOMInputStream inputStream =
          new BOMInputStream( KettleVFS.getInputStream( fileObject ), ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE,
              ByteOrderMark.UTF_16BE ) ) {
    InputStreamReader reader = null;
    if ( Utils.isEmpty( realEncoding ) ) {
      reader = new InputStreamReader( inputStream );
    } else {
      reader = new InputStreamReader( inputStream, realEncoding );
    }
    EncodingType encodingType = EncodingType.guessEncodingType( reader.getEncoding() );
    String line =
        TextFileInput.getLine( log, reader, encodingType, TextFileInputMeta.FILE_FORMAT_UNIX, new StringBuilder(
            1000 ) );
    String[] fieldNames =
        CsvInput.guessStringsFromLine( log, line, delimiter, enclosure, csvInputMeta.getEscapeCharacter() );
    if ( !Utils.isEmpty( csvInputMeta.getEnclosure() ) ) {
      removeEnclosure( fieldNames, csvInputMeta.getEnclosure() );
    }
    trimFieldNames( fieldNames );
    return fieldNames;
  } catch ( IOException e ) {
    throw new KettleFileException( BaseMessages.getString( PKG, "CsvInput.Exception.CreateFieldMappingError" ), e );
  }
}
项目:elasticsearch-river-remote    文件:SiteMapParser.java   
/**
 * Process a text-based Sitemap. Text sitemaps only list URLs but no priorities, last mods, etc.
 * 
 * @param content
 * @throws IOException
 */
private SiteMap processText(byte[] content, String sitemapUrl) throws IOException {

    logger.debug("Processing textual Sitemap");

    SiteMap textSiteMap = new SiteMap(sitemapUrl);
    textSiteMap.setType(SitemapType.TEXT);

    BOMInputStream bomIs = new BOMInputStream(new ByteArrayInputStream(content));
    @SuppressWarnings("resource")
    BufferedReader reader = new BufferedReader(new InputStreamReader(bomIs));

    String line;

    int i = 1;
    while ((line = reader.readLine()) != null) {
        if (line.length() > 0 && i <= MAX_URLS) {
            try {
                URL url = new URL(line);
                boolean valid = urlIsLegal(textSiteMap.getBaseUrl(), url.toString());

                if (valid || !strict) {
                    if (logger.isDebugEnabled()) {
                        StringBuffer sb = new StringBuffer("  ");
                        sb.append(i).append(". ").append(url);
                        logger.debug(sb.toString());
                    }
                    i++;
                    SiteMapURL surl = new SiteMapURL(url, valid);
                    textSiteMap.addSiteMapUrl(surl);
                }
            } catch (MalformedURLException e) {
                logger.debug("Bad URL [" + line + "].");
            }
        }
    }
    textSiteMap.setProcessed(true);
    return textSiteMap;
}
项目:languagetool    文件:Main.java   
private void loadFile(File file) {
  try (FileInputStream inputStream = new FileInputStream(file)) {
    BOMInputStream bomIn = new BOMInputStream(inputStream, false,
            ByteOrderMark.UTF_8, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_16LE,
            ByteOrderMark.UTF_32BE, ByteOrderMark.UTF_32LE);
    String charsetName;
    if (bomIn.hasBOM()) {
      bom = bomIn.getBOM();
      charsetName = bom.getCharsetName();
    } else {
      // No BOM found
      bom = null;
      charsetName = null;
    }
    String fileContents = StringTools.readStream(bomIn, charsetName);
    textArea.setText(fileContents);
    currentFile = file;
    updateTitle();
    if(recentFiles.contains(file.getAbsolutePath())) {
      recentFiles.remove(file.getAbsolutePath());
    }
    recentFiles.add(file.getAbsolutePath());
    localStorage.saveProperty("recentFiles", recentFiles);
    updateRecentFilesMenu();
  } catch (IOException e) {
    Tools.showError(e);
  }
}
项目:languagetool    文件:Main.java   
private InputStreamReader getInputStreamReader(String filename, String encoding) throws IOException {
  String charsetName = encoding != null ? encoding : Charset.defaultCharset().name();
  InputStream is = System.in;
  if (!isStdIn(filename)) {
    is = new FileInputStream(new File(filename));
    BOMInputStream bomIn = new BOMInputStream(is, true, ByteOrderMark.UTF_8,
      ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_16LE,
      ByteOrderMark.UTF_32BE,ByteOrderMark.UTF_32LE);
    if (bomIn.hasBOM() && encoding == null) {
      charsetName = bomIn.getBOMCharsetName();
    }
    is = bomIn;
  }
  return new InputStreamReader(new BufferedInputStream(is), charsetName);
}
项目:sparql-generate    文件:LocatorURLAccept.java   
private TypedInputStream openConnectionCheckRedirects(URLConnection c) throws IOException {
    boolean redir;
    int redirects = 0;
    InputStream in = null;
    String contentType = null;
    String contentEncoding = null;
    do {
        if (c instanceof HttpURLConnection) {
            ((HttpURLConnection) c).setInstanceFollowRedirects(false);
        }
        // We want to open the input stream before getting headers
        // because getHeaderField() et al swallow IOExceptions.
        in = new BufferedInputStream(new BOMInputStream(c.getInputStream()));
        contentType = c.getContentType();
        contentEncoding = c.getContentEncoding();
        redir = false;
        if (c instanceof HttpURLConnection) {
            HttpURLConnection http = (HttpURLConnection) c;
            int stat = http.getResponseCode();
            if (stat >= 300 && stat <= 307 && stat != 306
                    && stat != HttpURLConnection.HTTP_NOT_MODIFIED) {
                URL base = http.getURL();
                String loc = http.getHeaderField("Location");
                URL target = null;
                if (loc != null) {
                    target = new URL(base, loc);
                }
                http.disconnect();
                // Redirection should be allowed only for HTTP and HTTPS
                // and should be limited to 5 redirections at most.
                if (target == null
                        || !(target.getProtocol().equals("http") || target.getProtocol().equals("https"))
                        || c.getURL().getProtocol().equals("https") && target.getProtocol().equals("http")
                        || redirects >= 5) {
                    throw new SecurityException("illegal URL redirect");
                }
                redir = true;
                c = target.openConnection();
                redirects++;
            }
        }
    } while (redir);
    if(contentType==null) {
        contentType = "text/plain";
    }
    return new TypedInputStream(in, contentType, contentEncoding);
}
项目:AniML    文件:DataTable.java   
public static DataTable loadCSV(String fileName, String formatType, VariableType[] colTypesOverride, String[] colNamesOverride, boolean hasHeaderRow) {
    try {
        // use apache commons io + csv to load but convert to list of String[]
        // byte-order markers are handled if present at start of file.
        FileInputStream fis = new FileInputStream(fileName);
        final Reader reader = new InputStreamReader(new BOMInputStream(fis), "UTF-8");
        CSVFormat format;
        if ( formatType==null ) {
            format = hasHeaderRow ? CSVFormat.RFC4180.withHeader() : CSVFormat.RFC4180;
        }
        else {
            switch ( formatType.toLowerCase() ) {
                case "tsv":
                    format = hasHeaderRow ? CSVFormat.TDF.withHeader() : CSVFormat.TDF;
                    break;
                case "mysql":
                    format = hasHeaderRow ? CSVFormat.MYSQL.withHeader() : CSVFormat.MYSQL;
                    break;
                case "excel":
                    format = hasHeaderRow ? CSVFormat.EXCEL.withHeader() : CSVFormat.EXCEL;
                    break;
                case "rfc4180":
                default:
                    format = hasHeaderRow ? CSVFormat.RFC4180.withHeader() : CSVFormat.RFC4180;
                    break;
            }
        }
        final CSVParser parser = new CSVParser(reader, format);
        List<String[]> rows = new ArrayList<>();
        int numHeaderNames = parser.getHeaderMap().size();
        try {
            for (final CSVRecord record : parser) {
                String[] row = new String[record.size()];
                for (int j = 0; j<record.size(); j++) {
                    row[j] = record.get(j);
                }
                rows.add(row);
            }
        }
        finally {
            parser.close();
            reader.close();
        }

        VariableType[] actualTypes = computeColTypes(rows, numHeaderNames);

        Set<String> colNameSet = parser.getHeaderMap().keySet();
        String[] colNames = colNameSet.toArray(new String[colNameSet.size()]);
        if ( colNamesOverride!=null ) {
            colNames = colNamesOverride;
        }
        if ( colTypesOverride!=null ) {
            actualTypes = colTypesOverride;
        }
        return fromStrings(rows, actualTypes, colNames, false);
    }
    catch (Exception e) {
        throw new IllegalArgumentException("Can't open and/or read "+fileName, e);
    }
}
项目:AniML    文件:DataTable.java   
public static DataTable loadCSV(String fileName, VariableType[] colTypes, boolean hasHeaderRow) {
        int numCols = colTypes.length;
        try {
            final FileInputStream fis = new FileInputStream(fileName);
            final Reader r = new InputStreamReader(new BOMInputStream(fis), "UTF-8");
            final BufferedReader bf = new BufferedReader(r);
            List<int[]> rows = new ArrayList<>();
            String line;
            String[] colNames = null;
            if ( hasHeaderRow ) {
                line=bf.readLine();
                if ( line!=null ) {
                    line = line.trim();
                    if ( line.length()>0 ) {
                        colNames = line.split(",");
                        for (int i = 0; i<colNames.length; i++) {
                            colNames[i] = colNames[i].trim();
                        }
                    }
                }
            }
            int n = 0;
            while ( (line=bf.readLine())!=null ) {
                if ( n>0 && n % 10000 == 0 ) System.out.println(n);
                line = line.trim();
                if ( line.length()==0 ) continue;
                int[] row = new int[numCols];
                int comma = line.indexOf(',', 0);
                int prev = 0;
                int col = 0;
                while ( comma>=0 ) {
                    String v = line.substring(prev, comma);
                    row[col] = getValue(colTypes[col], v);

                    prev = comma+1;
                    comma = line.indexOf(',', comma+1);
                    col++;
                }
                // grab last element after last comma
                String lastv = line.substring(prev, line.length());
                row[col] = getValue(colTypes[col], lastv);

//              System.out.println();
                rows.add(row);
                n++;
            }

            DataTable data = new DataTable(rows, colTypes, colNames, null);
            return data;
        }
        catch (IOException ioe) {
            throw new IllegalArgumentException("Can't open and/or read "+fileName, ioe);
        }
    }
项目:reference-ccda-validator    文件:ReferenceCCDAValidationService.java   
private List<RefCCDAValidationResult> runValidators(String validationObjective, String referenceFileName,
                                                    MultipartFile ccdaFile) throws SAXException, Exception {
    List<RefCCDAValidationResult> validatorResults = new ArrayList<>();
    InputStream ccdaFileInputStream = null;
    try {
        ccdaFileInputStream = ccdaFile.getInputStream();
        String ccdaFileContents = IOUtils.toString(new BOMInputStream(ccdaFileInputStream));

        List<RefCCDAValidationResult> mdhtResults = doMDHTValidation(validationObjective, referenceFileName, ccdaFileContents);
        if(mdhtResults != null && !mdhtResults.isEmpty()) {
            logger.info("Adding MDHT results");
            validatorResults.addAll(mdhtResults);
        }

        boolean isSchemaErrorInMdhtResults = mdhtResultsHaveSchemaError(mdhtResults);
        boolean isObjectiveAllowingVocabularyValidation = objectiveAllowsVocabularyValidation(validationObjective);
        if (!isSchemaErrorInMdhtResults && isObjectiveAllowingVocabularyValidation) {
            List<RefCCDAValidationResult> vocabResults = doVocabularyValidation(validationObjective, referenceFileName, ccdaFileContents);
            if(vocabResults != null && !vocabResults.isEmpty()) {
                logger.info("Adding Vocabulary results");
                validatorResults.addAll(vocabResults);
            }

            if(objectiveAllowsContentValidation(validationObjective)) {
             List<RefCCDAValidationResult> contentResults = doContentValidation(validationObjective, referenceFileName, ccdaFileContents);
            if(contentResults != null && !contentResults.isEmpty()) {
                logger.info("Adding Content results");
                validatorResults.addAll(contentResults);
            }
            } else {
                logger.info("Skipping Content validation due to: "
                        + "validationObjective (" + (validationObjective != null ? validationObjective : "null objective") 
                        + ") is not relevant or valid for Content validation");                 
            }
        } else {
            String separator = !isObjectiveAllowingVocabularyValidation && isSchemaErrorInMdhtResults ? " and " : "";
            logger.info("Skipping Vocabulary (and thus Content) validation due to: " 
                    + (isObjectiveAllowingVocabularyValidation ? "" : "validationObjective POSTed: " 
                    + (validationObjective != null ? validationObjective : "null objective") + separator) 
                    + (isSchemaErrorInMdhtResults ? "C-CDA Schema error(s) found" : ""));
        }
    } catch (IOException e) {
        throw new RuntimeException("Error getting CCDA contents from provided file", e);
    }finally {
        closeFileInputStream(ccdaFileInputStream);
    }
    return validatorResults;
}
项目:gtfs-lib    文件:Entity.java   
/**
 * The main entry point into an Entity.Loader. Interprets each row of a CSV file within a zip file as a sinle
 * GTFS entity, and loads them into a table.
 *
 * @param zip the zip file from which to read a table
 */
public void loadTable(ZipFile zip) throws IOException {
    ZipEntry entry = zip.getEntry(tableName + ".txt");
    if (entry == null) {
        Enumeration<? extends ZipEntry> entries = zip.entries();
        // check if table is contained within sub-directory
        while (entries.hasMoreElements()) {
            ZipEntry e = entries.nextElement();
            if (e.getName().endsWith(tableName + ".txt")) {
                entry = e;
                feed.errors.add(new TableInSubdirectoryError(tableName, entry.getName().replace(tableName + ".txt", "")));
            }
        }
        /* This GTFS table did not exist in the zip. */
        if (this.isRequired()) {
            feed.errors.add(new MissingTableError(tableName));
        } else {
            LOG.info("Table {} was missing but it is not required.", tableName);
        }

        if (entry == null) return;
    }
    LOG.info("Loading GTFS table {} from {}", tableName, entry);
    InputStream zis = zip.getInputStream(entry);
    // skip any byte order mark that may be present. Files must be UTF-8,
    // but the GTFS spec says that "files that include the UTF byte order mark are acceptable"
    InputStream bis = new BOMInputStream(zis);
    CsvReader reader = new CsvReader(bis, ',', Charset.forName("UTF8"));
    this.reader = reader;
    boolean hasHeaders = reader.readHeaders();
    if (!hasHeaders) {
        feed.errors.add(new EmptyTableError(tableName));
    }
    while (reader.readRecord()) {
        // reader.getCurrentRecord() is zero-based and does not include the header line, keep our own row count
        if (++row % 500000 == 0) {
            LOG.info("Record number {}", human(row));
        }
        loadOneRow(); // Call subclass method to produce an entity from the current row.
    }
    if (row == 0) {
        feed.errors.add(new EmptyTableError(tableName));
    }
}
项目:eMonocot    文件:BOMIgnoringBufferedReaderFactory.java   
@Override
public BufferedReader create(Resource resource, String encoding)
        throws UnsupportedEncodingException, IOException {
    BOMInputStream bomInputStream = new BOMInputStream(resource.getInputStream());
    return new BufferedReader(new InputStreamReader(bomInputStream, encoding));
}
项目:powop    文件:BOMIgnoringBufferedReaderFactory.java   
@Override
public BufferedReader create(Resource resource, String encoding)
        throws UnsupportedEncodingException, IOException {
    BOMInputStream bomInputStream = new BOMInputStream(resource.getInputStream());
    return new BufferedReader(new InputStreamReader(bomInputStream, encoding));
}
项目:spring-usc    文件:EncodingDetector.java   
public static InputStreamReader getInputStreamReader(InputStream is, String encoding) throws IOException {

    logger.debug("Reading stream: using encoding: " + encoding);
    BOMInputStream bis = new BOMInputStream(is); //So that we can remove the BOM
    return new InputStreamReader(bis, encoding);
}