Java 类org.apache.hadoop.util.LineReader 实例源码

项目:hadoop    文件:TestKeyValueTextInputFormat.java   
public void testUTF8() throws Exception {
  LineReader in = null;

  try {
    in = makeStream("abcd\u20acbdcd\u20ac");
    Text line = new Text();
    in.readLine(line);
    assertEquals("readLine changed utf8 characters",
                 "abcd\u20acbdcd\u20ac", line.toString());
    in = makeStream("abc\u200axyz");
    in.readLine(line);
    assertEquals("split on fake newline", "abc\u200axyz", line.toString());
  } finally {
    if (in != null) {
      in.close();
    }
  }
}
项目:hadoop    文件:TestKeyValueTextInputFormat.java   
public void testNewLines() throws Exception {
  LineReader in = null;
  try {
    in = makeStream("a\nbb\n\nccc\rdddd\r\neeeee");
    Text out = new Text();
    in.readLine(out);
    assertEquals("line1 length", 1, out.getLength());
    in.readLine(out);
    assertEquals("line2 length", 2, out.getLength());
    in.readLine(out);
    assertEquals("line3 length", 0, out.getLength());
    in.readLine(out);
    assertEquals("line4 length", 3, out.getLength());
    in.readLine(out);
    assertEquals("line5 length", 4, out.getLength());
    in.readLine(out);
    assertEquals("line5 length", 5, out.getLength());
    assertEquals("end of file", 0, in.readLine(out));
  } finally {
    if (in != null) {
      in.close();
    }
  }
}
项目:hadoop    文件:TestTextInputFormat.java   
/**
 * Test readLine for correct interpretation of maxLineLength
 * (returned string should be clipped at maxLineLength, and the
 * remaining bytes on the same line should be thrown out).
 * Also check that returned value matches the string length.
 * Varies buffer size to stress test.
 *
 * @throws Exception
 */
@Test (timeout=5000)
public void testMaxLineLength() throws Exception {
  final String STR = "a\nbb\n\nccc\rdddd\r\neeeee";
  final int STRLENBYTES = STR.getBytes().length;
  Text out = new Text();
  for (int bufsz = 1; bufsz < STRLENBYTES+1; ++bufsz) {
    LineReader in = makeStream(STR, bufsz);
    int c = 0;
    c += in.readLine(out, 1);
    assertEquals("line1 length, bufsz: "+bufsz, 1, out.getLength());
    c += in.readLine(out, 1);
    assertEquals("line2 length, bufsz: "+bufsz, 1, out.getLength());
    c += in.readLine(out, 1);
    assertEquals("line3 length, bufsz: "+bufsz, 0, out.getLength());
    c += in.readLine(out, 3);
    assertEquals("line4 length, bufsz: "+bufsz, 3, out.getLength());
    c += in.readLine(out, 10);
    assertEquals("line5 length, bufsz: "+bufsz, 4, out.getLength());
    c += in.readLine(out, 8);
    assertEquals("line5 length, bufsz: "+bufsz, 5, out.getLength());
    assertEquals("end of file, bufsz: " +bufsz, 0, in.readLine(out));
    assertEquals("total bytes, bufsz: "+bufsz, c, STRLENBYTES);
  }
}
项目:hadoop    文件:TestMRKeyValueTextInputFormat.java   
@Test
public void testNewLines() throws Exception {
  LineReader in = makeStream("a\nbb\n\nccc\rdddd\r\neeeee");
  Text out = new Text();
  in.readLine(out);
  assertEquals("line1 length", 1, out.getLength());
  in.readLine(out);
  assertEquals("line2 length", 2, out.getLength());
  in.readLine(out);
  assertEquals("line3 length", 0, out.getLength());
  in.readLine(out);
  assertEquals("line4 length", 3, out.getLength());
  in.readLine(out);
  assertEquals("line5 length", 4, out.getLength());
  in.readLine(out);
  assertEquals("line5 length", 5, out.getLength());
  assertEquals("end of file", 0, in.readLine(out));
}
项目:hadoop    文件:MultiFileWordCount.java   
public CombineFileLineRecordReader(CombineFileSplit split,
    TaskAttemptContext context, Integer index) throws IOException {

  this.path = split.getPath(index);
  fs = this.path.getFileSystem(context.getConfiguration());
  this.startOffset = split.getOffset(index);
  this.end = startOffset + split.getLength(index);
  boolean skipFirstLine = false;

  //open the file
  fileIn = fs.open(path);
  if (startOffset != 0) {
    skipFirstLine = true;
    --startOffset;
    fileIn.seek(startOffset);
  }
  reader = new LineReader(fileIn);
  if (skipFirstLine) {  // skip first line and re-establish "startOffset".
    startOffset += reader.readLine(new Text(), 0,
                (int)Math.min((long)Integer.MAX_VALUE, end - startOffset));
  }
  this.pos = startOffset;
}
项目:SparkSeq    文件:SingleFastqInputFormat.java   
public SingleFastqRecordReader(Configuration conf, FileSplit split) throws IOException {
    file = split.getPath();
    start = split.getStart();
    end = start + split.getLength();

    FileSystem fs = file.getFileSystem(conf);
    FSDataInputStream fileIn = fs.open(file);

    CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf);
    CompressionCodec codec        = codecFactory.getCodec(file);

    if (codec == null) { // no codec.  Uncompressed file.
        positionAtFirstRecord(fileIn);
        inputStream = fileIn;
    } else {
        // compressed file
        if (start != 0) {
            throw new RuntimeException("Start position for compressed file is not 0! (found " + start + ")");
        }

        inputStream = codec.createInputStream(fileIn);
        end = Long.MAX_VALUE; // read until the end of the file
    }

    lineReader = new LineReader(inputStream);
}
项目:ditb    文件:CompactionTool.java   
/**
 * Returns a split for each store files directory using the block location
 * of each file as locality reference.
 */
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
  List<InputSplit> splits = new ArrayList<InputSplit>();
  List<FileStatus> files = listStatus(job);

  Text key = new Text();
  for (FileStatus file: files) {
    Path path = file.getPath();
    FileSystem fs = path.getFileSystem(job.getConfiguration());
    LineReader reader = new LineReader(fs.open(path));
    long pos = 0;
    int n;
    try {
      while ((n = reader.readLine(key)) > 0) {
        String[] hosts = getStoreDirHosts(fs, path);
        splits.add(new FileSplit(path, pos, n, hosts));
        pos += n;
      }
    } finally {
      reader.close();
    }
  }

  return splits;
}
项目:aliyun-oss-hadoop-fs    文件:TestKeyValueTextInputFormat.java   
public void testUTF8() throws Exception {
  LineReader in = null;

  try {
    in = makeStream("abcd\u20acbdcd\u20ac");
    Text line = new Text();
    in.readLine(line);
    assertEquals("readLine changed utf8 characters",
                 "abcd\u20acbdcd\u20ac", line.toString());
    in = makeStream("abc\u200axyz");
    in.readLine(line);
    assertEquals("split on fake newline", "abc\u200axyz", line.toString());
  } finally {
    if (in != null) {
      in.close();
    }
  }
}
项目:aliyun-oss-hadoop-fs    文件:TestKeyValueTextInputFormat.java   
public void testNewLines() throws Exception {
  LineReader in = null;
  try {
    in = makeStream("a\nbb\n\nccc\rdddd\r\neeeee");
    Text out = new Text();
    in.readLine(out);
    assertEquals("line1 length", 1, out.getLength());
    in.readLine(out);
    assertEquals("line2 length", 2, out.getLength());
    in.readLine(out);
    assertEquals("line3 length", 0, out.getLength());
    in.readLine(out);
    assertEquals("line4 length", 3, out.getLength());
    in.readLine(out);
    assertEquals("line5 length", 4, out.getLength());
    in.readLine(out);
    assertEquals("line5 length", 5, out.getLength());
    assertEquals("end of file", 0, in.readLine(out));
  } finally {
    if (in != null) {
      in.close();
    }
  }
}
项目:aliyun-oss-hadoop-fs    文件:TestTextInputFormat.java   
/**
 * Test readLine for correct interpretation of maxLineLength
 * (returned string should be clipped at maxLineLength, and the
 * remaining bytes on the same line should be thrown out).
 * Also check that returned value matches the string length.
 * Varies buffer size to stress test.
 *
 * @throws Exception
 */
@Test (timeout=5000)
public void testMaxLineLength() throws Exception {
  final String STR = "a\nbb\n\nccc\rdddd\r\neeeee";
  final int STRLENBYTES = STR.getBytes().length;
  Text out = new Text();
  for (int bufsz = 1; bufsz < STRLENBYTES+1; ++bufsz) {
    LineReader in = makeStream(STR, bufsz);
    int c = 0;
    c += in.readLine(out, 1);
    assertEquals("line1 length, bufsz: "+bufsz, 1, out.getLength());
    c += in.readLine(out, 1);
    assertEquals("line2 length, bufsz: "+bufsz, 1, out.getLength());
    c += in.readLine(out, 1);
    assertEquals("line3 length, bufsz: "+bufsz, 0, out.getLength());
    c += in.readLine(out, 3);
    assertEquals("line4 length, bufsz: "+bufsz, 3, out.getLength());
    c += in.readLine(out, 10);
    assertEquals("line5 length, bufsz: "+bufsz, 4, out.getLength());
    c += in.readLine(out, 8);
    assertEquals("line5 length, bufsz: "+bufsz, 5, out.getLength());
    assertEquals("end of file, bufsz: " +bufsz, 0, in.readLine(out));
    assertEquals("total bytes, bufsz: "+bufsz, c, STRLENBYTES);
  }
}
项目:aliyun-oss-hadoop-fs    文件:TestMRKeyValueTextInputFormat.java   
@Test
public void testNewLines() throws Exception {
  LineReader in = makeStream("a\nbb\n\nccc\rdddd\r\neeeee");
  Text out = new Text();
  in.readLine(out);
  assertEquals("line1 length", 1, out.getLength());
  in.readLine(out);
  assertEquals("line2 length", 2, out.getLength());
  in.readLine(out);
  assertEquals("line3 length", 0, out.getLength());
  in.readLine(out);
  assertEquals("line4 length", 3, out.getLength());
  in.readLine(out);
  assertEquals("line5 length", 4, out.getLength());
  in.readLine(out);
  assertEquals("line5 length", 5, out.getLength());
  assertEquals("end of file", 0, in.readLine(out));
}
项目:aliyun-oss-hadoop-fs    文件:MultiFileWordCount.java   
public CombineFileLineRecordReader(CombineFileSplit split,
    TaskAttemptContext context, Integer index) throws IOException {

  this.path = split.getPath(index);
  fs = this.path.getFileSystem(context.getConfiguration());
  this.startOffset = split.getOffset(index);
  this.end = startOffset + split.getLength(index);
  boolean skipFirstLine = false;

  //open the file
  fileIn = fs.open(path);
  if (startOffset != 0) {
    skipFirstLine = true;
    --startOffset;
    fileIn.seek(startOffset);
  }
  reader = new LineReader(fileIn);
  if (skipFirstLine) {  // skip first line and re-establish "startOffset".
    startOffset += reader.readLine(new Text(), 0,
                (int)Math.min((long)Integer.MAX_VALUE, end - startOffset));
  }
  this.pos = startOffset;
}
项目:SOAPgaea    文件:WholeGenomeShare.java   
public static boolean distributeCache(String chrList, Job job, String cacheName)
        throws IOException, URISyntaxException {
    job.addCacheFile(new URI(chrList + "#" + cacheName));

    Configuration conf = job.getConfiguration();
    Path refPath = new Path(chrList);
    FileSystem fs = refPath.getFileSystem(conf);
    FSDataInputStream refin = fs.open(refPath);
    LineReader in = new LineReader(refin);
    Text line = new Text();

    String chrFile = "";
    String[] chrs = new String[3];
    while ((in.readLine(line)) != 0) {
        chrFile = line.toString();
        chrs = chrFile.split("\t");
        File fileTest = new File(chrs[1]);
        if (fileTest.isFile()) {
            chrs[1] = "file://" + chrs[1];
        }
        job.addCacheFile(new URI(chrs[1] + "#" + chrs[0]));
    }
    in.close();
    refin.close();
    return true;
}
项目:SOAPgaea    文件:WholeGenomeShare.java   
protected void loadChromosomeList(Path refPath) throws NumberFormatException, IOException{
    Configuration conf = new Configuration();
    FileSystem fs = refPath.getFileSystem(conf);
    FSDataInputStream refin = fs.open(refPath);
    LineReader in = new LineReader(refin);
    Text line = new Text();

    String chrFile = "";
    String[] chrs = new String[3];
    while((in.readLine(line)) != 0){
        chrFile = line.toString();
        chrs = chrFile.split("\t");

        // insert chr
        if(!addChromosome(chrs[0])) {
            in.close();
            throw new RuntimeException("map Chromosome "+chrs[1]+" Failed.");
        }
        setChromosome(chrs[1],chrs[0],Integer.parseInt(chrs[2]));
    }
    in.close();
}
项目:SOAPgaea    文件:FastqQualityControlReporterIO.java   
public void readFromHdfs(Path path, Configuration conf,
        FastqQualityControlReport report) throws IOException {
    FileSystem fs = path.getFileSystem(conf);
    FSDataInputStream FSinput = fs.open(path);

    LineReader lineReader = new LineReader(FSinput, conf);
    Text line = new Text();
    int sampleID = 0, i,cnt;
    while ((lineReader.readLine(line)) != 0) {
        sampleID = report.addCount(line.toString());
        if(report.isPartitionNull())
            continue;

        for (i = 0; i < FastqQualityControlReport.BASE_STATIC_COUNT; i++) {
            cnt = lineReader.readLine(line);
            if(cnt == 0)
                continue;
            report.addBaseByPosition(sampleID, i, line.toString());
        }
    }

    lineReader.close();
}
项目:SOAPgaea    文件:WholeGenomeResultReport.java   
@Override
public void parseReport(LineReader lineReader, Text line, ReferenceShare genome) throws IOException {
    super.parseReport(lineReader, line, genome);
    String lineString = line.toString();
    if(lineString.contains("Cover Information")) {
        if(lineReader.readLine(line) > 0 && line.getLength() != 0) {
            String[] splitArray = line.toString().split("\t");
            WholeGenomeCoverReport coverReport = null;
            for(String keyValue : splitArray) {
                if(keyValue.split(" ").length == 1) {
                    String chrName = keyValue;
                    if(!coverReports.containsKey(chrName)) {
                        ChromosomeInformationShare chrInfo = genome.getChromosomeInfo(chrName);
                        coverReport = new WholeGenomeCoverReport(chrInfo);
                        coverReports.put(chrName, coverReport);
                    } else {
                        coverReport = coverReports.get(chrName);
                    }
                } else {
                    assert coverReport != null;
                    coverReport.parse(keyValue, genome);
                }
            }
        }
    }
}
项目:FEL    文件:WikipediaDocnoMapping.java   
/**
 * Creates a mappings file from the contents of a flat text file containing docid to docno
 * mappings. This method is used by {@link WikipediaDocnoMappingBuilder} internally.
 *
 * @param inputFile flat text file containing docid to docno mappings
 * @param outputFile output mappings file
 * @throws IOException
 */
static public void writeDocnoMappingData(FileSystem fs, String inputFile, int n, String outputFile) throws IOException {
    LOG.info("Writing " + n + " docids to " + outputFile);
    LineReader reader = new LineReader(fs.open(new Path(inputFile)));

    int cnt = 0;
    Text line = new Text();

    FSDataOutputStream out = fs.create(new Path(outputFile), true);
    out.writeInt(n);
    for (int i = 0; i < n; i++) {
        reader.readLine(line);
        String[] arr = line.toString().split("\\t");
        out.writeInt(Integer.parseInt(arr[0]));
        cnt++;
        if (cnt % 100000 == 0) {
            LOG.info(cnt + " articles");
        }
    }
    out.close();
    reader.close();
    LOG.info("Done!");
}
项目:big-c    文件:TestKeyValueTextInputFormat.java   
public void testUTF8() throws Exception {
  LineReader in = null;

  try {
    in = makeStream("abcd\u20acbdcd\u20ac");
    Text line = new Text();
    in.readLine(line);
    assertEquals("readLine changed utf8 characters",
                 "abcd\u20acbdcd\u20ac", line.toString());
    in = makeStream("abc\u200axyz");
    in.readLine(line);
    assertEquals("split on fake newline", "abc\u200axyz", line.toString());
  } finally {
    if (in != null) {
      in.close();
    }
  }
}
项目:big-c    文件:TestKeyValueTextInputFormat.java   
public void testNewLines() throws Exception {
  LineReader in = null;
  try {
    in = makeStream("a\nbb\n\nccc\rdddd\r\neeeee");
    Text out = new Text();
    in.readLine(out);
    assertEquals("line1 length", 1, out.getLength());
    in.readLine(out);
    assertEquals("line2 length", 2, out.getLength());
    in.readLine(out);
    assertEquals("line3 length", 0, out.getLength());
    in.readLine(out);
    assertEquals("line4 length", 3, out.getLength());
    in.readLine(out);
    assertEquals("line5 length", 4, out.getLength());
    in.readLine(out);
    assertEquals("line5 length", 5, out.getLength());
    assertEquals("end of file", 0, in.readLine(out));
  } finally {
    if (in != null) {
      in.close();
    }
  }
}
项目:big-c    文件:TestTextInputFormat.java   
/**
 * Test readLine for correct interpretation of maxLineLength
 * (returned string should be clipped at maxLineLength, and the
 * remaining bytes on the same line should be thrown out).
 * Also check that returned value matches the string length.
 * Varies buffer size to stress test.
 *
 * @throws Exception
 */
@Test (timeout=5000)
public void testMaxLineLength() throws Exception {
  final String STR = "a\nbb\n\nccc\rdddd\r\neeeee";
  final int STRLENBYTES = STR.getBytes().length;
  Text out = new Text();
  for (int bufsz = 1; bufsz < STRLENBYTES+1; ++bufsz) {
    LineReader in = makeStream(STR, bufsz);
    int c = 0;
    c += in.readLine(out, 1);
    assertEquals("line1 length, bufsz: "+bufsz, 1, out.getLength());
    c += in.readLine(out, 1);
    assertEquals("line2 length, bufsz: "+bufsz, 1, out.getLength());
    c += in.readLine(out, 1);
    assertEquals("line3 length, bufsz: "+bufsz, 0, out.getLength());
    c += in.readLine(out, 3);
    assertEquals("line4 length, bufsz: "+bufsz, 3, out.getLength());
    c += in.readLine(out, 10);
    assertEquals("line5 length, bufsz: "+bufsz, 4, out.getLength());
    c += in.readLine(out, 8);
    assertEquals("line5 length, bufsz: "+bufsz, 5, out.getLength());
    assertEquals("end of file, bufsz: " +bufsz, 0, in.readLine(out));
    assertEquals("total bytes, bufsz: "+bufsz, c, STRLENBYTES);
  }
}
项目:big-c    文件:TestMRKeyValueTextInputFormat.java   
@Test
public void testNewLines() throws Exception {
  LineReader in = makeStream("a\nbb\n\nccc\rdddd\r\neeeee");
  Text out = new Text();
  in.readLine(out);
  assertEquals("line1 length", 1, out.getLength());
  in.readLine(out);
  assertEquals("line2 length", 2, out.getLength());
  in.readLine(out);
  assertEquals("line3 length", 0, out.getLength());
  in.readLine(out);
  assertEquals("line4 length", 3, out.getLength());
  in.readLine(out);
  assertEquals("line5 length", 4, out.getLength());
  in.readLine(out);
  assertEquals("line5 length", 5, out.getLength());
  assertEquals("end of file", 0, in.readLine(out));
}
项目:big-c    文件:MultiFileWordCount.java   
public CombineFileLineRecordReader(CombineFileSplit split,
    TaskAttemptContext context, Integer index) throws IOException {

  this.path = split.getPath(index);
  fs = this.path.getFileSystem(context.getConfiguration());
  this.startOffset = split.getOffset(index);
  this.end = startOffset + split.getLength(index);
  boolean skipFirstLine = false;

  //open the file
  fileIn = fs.open(path);
  if (startOffset != 0) {
    skipFirstLine = true;
    --startOffset;
    fileIn.seek(startOffset);
  }
  reader = new LineReader(fileIn);
  if (skipFirstLine) {  // skip first line and re-establish "startOffset".
    startOffset += reader.readLine(new Text(), 0,
                (int)Math.min((long)Integer.MAX_VALUE, end - startOffset));
  }
  this.pos = startOffset;
}
项目:LCIndex-HBase-0.94.16    文件:CompactionTool.java   
/**
 * Returns a split for each store files directory using the block location
 * of each file as locality reference.
 */
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
  List<InputSplit> splits = new ArrayList<InputSplit>();
  List<FileStatus> files = listStatus(job);

  Text key = new Text();
  for (FileStatus file: files) {
    Path path = file.getPath();
    FileSystem fs = path.getFileSystem(job.getConfiguration());
    LineReader reader = new LineReader(fs.open(path));
    long pos = 0;
    int n;
    try {
      while ((n = reader.readLine(key)) > 0) {
        String[] hosts = getStoreDirHosts(fs, path);
        splits.add(new FileSplit(path, pos, n, hosts));
        pos += n;
      }
    } finally {
      reader.close();
    }
  }

  return splits;
}
项目:hadoop-2.6.0-cdh5.4.3    文件:TestKeyValueTextInputFormat.java   
public void testUTF8() throws Exception {
  LineReader in = null;

  try {
    in = makeStream("abcd\u20acbdcd\u20ac");
    Text line = new Text();
    in.readLine(line);
    assertEquals("readLine changed utf8 characters",
                 "abcd\u20acbdcd\u20ac", line.toString());
    in = makeStream("abc\u200axyz");
    in.readLine(line);
    assertEquals("split on fake newline", "abc\u200axyz", line.toString());
  } finally {
    if (in != null) {
      in.close();
    }
  }
}
项目:hadoop-2.6.0-cdh5.4.3    文件:TestKeyValueTextInputFormat.java   
public void testNewLines() throws Exception {
  LineReader in = null;
  try {
    in = makeStream("a\nbb\n\nccc\rdddd\r\neeeee");
    Text out = new Text();
    in.readLine(out);
    assertEquals("line1 length", 1, out.getLength());
    in.readLine(out);
    assertEquals("line2 length", 2, out.getLength());
    in.readLine(out);
    assertEquals("line3 length", 0, out.getLength());
    in.readLine(out);
    assertEquals("line4 length", 3, out.getLength());
    in.readLine(out);
    assertEquals("line5 length", 4, out.getLength());
    in.readLine(out);
    assertEquals("line5 length", 5, out.getLength());
    assertEquals("end of file", 0, in.readLine(out));
  } finally {
    if (in != null) {
      in.close();
    }
  }
}
项目:hadoop-2.6.0-cdh5.4.3    文件:TestTextInputFormat.java   
/**
 * Test readLine for correct interpretation of maxLineLength
 * (returned string should be clipped at maxLineLength, and the
 * remaining bytes on the same line should be thrown out).
 * Also check that returned value matches the string length.
 * Varies buffer size to stress test.
 *
 * @throws Exception
 */
@Test (timeout=5000)
public void testMaxLineLength() throws Exception {
  final String STR = "a\nbb\n\nccc\rdddd\r\neeeee";
  final int STRLENBYTES = STR.getBytes().length;
  Text out = new Text();
  for (int bufsz = 1; bufsz < STRLENBYTES+1; ++bufsz) {
    LineReader in = makeStream(STR, bufsz);
    int c = 0;
    c += in.readLine(out, 1);
    assertEquals("line1 length, bufsz: "+bufsz, 1, out.getLength());
    c += in.readLine(out, 1);
    assertEquals("line2 length, bufsz: "+bufsz, 1, out.getLength());
    c += in.readLine(out, 1);
    assertEquals("line3 length, bufsz: "+bufsz, 0, out.getLength());
    c += in.readLine(out, 3);
    assertEquals("line4 length, bufsz: "+bufsz, 3, out.getLength());
    c += in.readLine(out, 10);
    assertEquals("line5 length, bufsz: "+bufsz, 4, out.getLength());
    c += in.readLine(out, 8);
    assertEquals("line5 length, bufsz: "+bufsz, 5, out.getLength());
    assertEquals("end of file, bufsz: " +bufsz, 0, in.readLine(out));
    assertEquals("total bytes, bufsz: "+bufsz, c, STRLENBYTES);
  }
}
项目:hadoop-2.6.0-cdh5.4.3    文件:TestMRKeyValueTextInputFormat.java   
@Test
public void testNewLines() throws Exception {
  LineReader in = makeStream("a\nbb\n\nccc\rdddd\r\neeeee");
  Text out = new Text();
  in.readLine(out);
  assertEquals("line1 length", 1, out.getLength());
  in.readLine(out);
  assertEquals("line2 length", 2, out.getLength());
  in.readLine(out);
  assertEquals("line3 length", 0, out.getLength());
  in.readLine(out);
  assertEquals("line4 length", 3, out.getLength());
  in.readLine(out);
  assertEquals("line5 length", 4, out.getLength());
  in.readLine(out);
  assertEquals("line5 length", 5, out.getLength());
  assertEquals("end of file", 0, in.readLine(out));
}
项目:hadoop-2.6.0-cdh5.4.3    文件:MultiFileWordCount.java   
public CombineFileLineRecordReader(CombineFileSplit split,
    TaskAttemptContext context, Integer index) throws IOException {

  this.path = split.getPath(index);
  fs = this.path.getFileSystem(context.getConfiguration());
  this.startOffset = split.getOffset(index);
  this.end = startOffset + split.getLength(index);
  boolean skipFirstLine = false;

  //open the file
  fileIn = fs.open(path);
  if (startOffset != 0) {
    skipFirstLine = true;
    --startOffset;
    fileIn.seek(startOffset);
  }
  reader = new LineReader(fileIn);
  if (skipFirstLine) {  // skip first line and re-establish "startOffset".
    startOffset += reader.readLine(new Text(), 0,
                (int)Math.min((long)Integer.MAX_VALUE, end - startOffset));
  }
  this.pos = startOffset;
}
项目:hadoop-2.6.0-cdh5.4.3    文件:TestTextInputFormat.java   
/**
 * Test readLine for correct interpretation of maxLineLength
 * (returned string should be clipped at maxLineLength, and the
 * remaining bytes on the same line should be thrown out).
 * Also check that returned value matches the string length.
 * Varies buffer size to stress test.
 *
 * @throws Exception
 */
@Test
public void testMaxLineLength() throws Exception {
  final String STR = "a\nbb\n\nccc\rdddd\r\neeeee";
  final int STRLENBYTES = STR.getBytes().length;
  Text out = new Text();
  for (int bufsz = 1; bufsz < STRLENBYTES+1; ++bufsz) {
    LineReader in = makeStream(STR, bufsz);
    int c = 0;
    c += in.readLine(out, 1);
    assertEquals("line1 length, bufsz: "+bufsz, 1, out.getLength());
    c += in.readLine(out, 1);
    assertEquals("line2 length, bufsz: "+bufsz, 1, out.getLength());
    c += in.readLine(out, 1);
    assertEquals("line3 length, bufsz: "+bufsz, 0, out.getLength());
    c += in.readLine(out, 3);
    assertEquals("line4 length, bufsz: "+bufsz, 3, out.getLength());
    c += in.readLine(out, 10);
    assertEquals("line5 length, bufsz: "+bufsz, 4, out.getLength());
    c += in.readLine(out, 8);
    assertEquals("line5 length, bufsz: "+bufsz, 5, out.getLength());
    assertEquals("end of file, bufsz: " +bufsz, 0, in.readLine(out));
    assertEquals("total bytes, bufsz: "+bufsz, c, STRLENBYTES);
  }
}
项目:hadoop-2.6.0-cdh5.4.3    文件:TestKeyValueTextInputFormat.java   
public void testNewLines() throws Exception {
  LineReader in = makeStream("a\nbb\n\nccc\rdddd\r\neeeee");
  Text out = new Text();
  in.readLine(out);
  assertEquals("line1 length", 1, out.getLength());
  in.readLine(out);
  assertEquals("line2 length", 2, out.getLength());
  in.readLine(out);
  assertEquals("line3 length", 0, out.getLength());
  in.readLine(out);
  assertEquals("line4 length", 3, out.getLength());
  in.readLine(out);
  assertEquals("line5 length", 4, out.getLength());
  in.readLine(out);
  assertEquals("line5 length", 5, out.getLength());
  assertEquals("end of file", 0, in.readLine(out));
}
项目:hadoop-2.6.0-cdh5.4.3    文件:TestMRKeyValueTextInputFormat.java   
public void testNewLines() throws Exception {
  LineReader in = makeStream("a\nbb\n\nccc\rdddd\r\neeeee");
  Text out = new Text();
  in.readLine(out);
  assertEquals("line1 length", 1, out.getLength());
  in.readLine(out);
  assertEquals("line2 length", 2, out.getLength());
  in.readLine(out);
  assertEquals("line3 length", 0, out.getLength());
  in.readLine(out);
  assertEquals("line4 length", 3, out.getLength());
  in.readLine(out);
  assertEquals("line5 length", 4, out.getLength());
  in.readLine(out);
  assertEquals("line5 length", 5, out.getLength());
  assertEquals("end of file", 0, in.readLine(out));
}
项目:HIndex    文件:CompactionTool.java   
/**
 * Returns a split for each store files directory using the block location
 * of each file as locality reference.
 */
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
  List<InputSplit> splits = new ArrayList<InputSplit>();
  List<FileStatus> files = listStatus(job);

  Text key = new Text();
  for (FileStatus file: files) {
    Path path = file.getPath();
    FileSystem fs = path.getFileSystem(job.getConfiguration());
    LineReader reader = new LineReader(fs.open(path));
    long pos = 0;
    int n;
    try {
      while ((n = reader.readLine(key)) > 0) {
        String[] hosts = getStoreDirHosts(fs, path);
        splits.add(new FileSplit(path, pos, n, hosts));
        pos += n;
      }
    } finally {
      reader.close();
    }
  }

  return splits;
}
项目:hadoop-EAR    文件:HarIndex.java   
/**
 * Constructor that reads the contents of the index file.
 * @param in An input stream to the index file.
 * @param max The size of the index file.
 * @throws IOException
 */
public HarIndex(InputStream in, long max) throws IOException {
  LineReader lineReader = new LineReader(in);
  Text text = new Text();
  long nread = 0;
  while (nread < max) {
    int n = lineReader.readLine(text);
    nread += n;
    String line = text.toString();
    try {
      parseLine(line);
    } catch (UnsupportedEncodingException e) {
      throw new IOException("UnsupportedEncodingException after reading " +
                            nread + "bytes");
    }
  }
}
项目:hadoop-EAR    文件:HarFileSystem.java   
public int getHarVersion() throws IOException {
  FSDataInputStream masterIn = fs.open(masterIndex);
  LineReader lmaster = new LineReader(masterIn, getConf());
  Text line = new Text();
  lmaster.readLine(line);
  try {
    masterIn.close();
  } catch(IOException e){
    //disregard it.
    // its a read.
  }
  String versionLine = line.toString();
  String[] arr = versionLine.split(" ");
  int version = Integer.parseInt(arr[0]);
  return version;
}
项目:hadoop-plus    文件:TestKeyValueTextInputFormat.java   
public void testNewLines() throws Exception {
  LineReader in = null;
  try {
    in = makeStream("a\nbb\n\nccc\rdddd\r\neeeee");
    Text out = new Text();
    in.readLine(out);
    assertEquals("line1 length", 1, out.getLength());
    in.readLine(out);
    assertEquals("line2 length", 2, out.getLength());
    in.readLine(out);
    assertEquals("line3 length", 0, out.getLength());
    in.readLine(out);
    assertEquals("line4 length", 3, out.getLength());
    in.readLine(out);
    assertEquals("line5 length", 4, out.getLength());
    in.readLine(out);
    assertEquals("line5 length", 5, out.getLength());
    assertEquals("end of file", 0, in.readLine(out));
  } finally {
    if (in != null) {
      in.close();
    }
  }
}
项目:hops    文件:TestKeyValueTextInputFormat.java   
@Test
public void testUTF8() throws Exception {
  LineReader in = null;

  try {
    in = makeStream("abcd\u20acbdcd\u20ac");
    Text line = new Text();
    in.readLine(line);
    assertEquals("readLine changed utf8 characters",
                 "abcd\u20acbdcd\u20ac", line.toString());
    in = makeStream("abc\u200axyz");
    in.readLine(line);
    assertEquals("split on fake newline", "abc\u200axyz", line.toString());
  } finally {
    if (in != null) {
      in.close();
    }
  }
}
项目:hadoop-plus    文件:TestTextInputFormat.java   
/**
 * Test readLine for correct interpretation of maxLineLength
 * (returned string should be clipped at maxLineLength, and the
 * remaining bytes on the same line should be thrown out).
 * Also check that returned value matches the string length.
 * Varies buffer size to stress test.
 *
 * @throws Exception
 */
@Test (timeout=5000)
public void testMaxLineLength() throws Exception {
  final String STR = "a\nbb\n\nccc\rdddd\r\neeeee";
  final int STRLENBYTES = STR.getBytes().length;
  Text out = new Text();
  for (int bufsz = 1; bufsz < STRLENBYTES+1; ++bufsz) {
    LineReader in = makeStream(STR, bufsz);
    int c = 0;
    c += in.readLine(out, 1);
    assertEquals("line1 length, bufsz: "+bufsz, 1, out.getLength());
    c += in.readLine(out, 1);
    assertEquals("line2 length, bufsz: "+bufsz, 1, out.getLength());
    c += in.readLine(out, 1);
    assertEquals("line3 length, bufsz: "+bufsz, 0, out.getLength());
    c += in.readLine(out, 3);
    assertEquals("line4 length, bufsz: "+bufsz, 3, out.getLength());
    c += in.readLine(out, 10);
    assertEquals("line5 length, bufsz: "+bufsz, 4, out.getLength());
    c += in.readLine(out, 8);
    assertEquals("line5 length, bufsz: "+bufsz, 5, out.getLength());
    assertEquals("end of file, bufsz: " +bufsz, 0, in.readLine(out));
    assertEquals("total bytes, bufsz: "+bufsz, c, STRLENBYTES);
  }
}
项目:hadoop-plus    文件:TestMRKeyValueTextInputFormat.java   
@Test
public void testNewLines() throws Exception {
  LineReader in = makeStream("a\nbb\n\nccc\rdddd\r\neeeee");
  Text out = new Text();
  in.readLine(out);
  assertEquals("line1 length", 1, out.getLength());
  in.readLine(out);
  assertEquals("line2 length", 2, out.getLength());
  in.readLine(out);
  assertEquals("line3 length", 0, out.getLength());
  in.readLine(out);
  assertEquals("line4 length", 3, out.getLength());
  in.readLine(out);
  assertEquals("line5 length", 4, out.getLength());
  in.readLine(out);
  assertEquals("line5 length", 5, out.getLength());
  assertEquals("end of file", 0, in.readLine(out));
}
项目:hadoop-plus    文件:MultiFileWordCount.java   
public CombineFileLineRecordReader(CombineFileSplit split,
    TaskAttemptContext context, Integer index) throws IOException {

  this.path = split.getPath(index);
  fs = this.path.getFileSystem(context.getConfiguration());
  this.startOffset = split.getOffset(index);
  this.end = startOffset + split.getLength(index);
  boolean skipFirstLine = false;

  //open the file
  fileIn = fs.open(path);
  if (startOffset != 0) {
    skipFirstLine = true;
    --startOffset;
    fileIn.seek(startOffset);
  }
  reader = new LineReader(fileIn);
  if (skipFirstLine) {  // skip first line and re-establish "startOffset".
    startOffset += reader.readLine(new Text(), 0,
                (int)Math.min((long)Integer.MAX_VALUE, end - startOffset));
  }
  this.pos = startOffset;
}
项目:pbase    文件:CompactionTool.java   
/**
 * Returns a split for each store files directory using the block location
 * of each file as locality reference.
 */
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
  List<InputSplit> splits = new ArrayList<InputSplit>();
  List<FileStatus> files = listStatus(job);

  Text key = new Text();
  for (FileStatus file: files) {
    Path path = file.getPath();
    FileSystem fs = path.getFileSystem(job.getConfiguration());
    LineReader reader = new LineReader(fs.open(path));
    long pos = 0;
    int n;
    try {
      while ((n = reader.readLine(key)) > 0) {
        String[] hosts = getStoreDirHosts(fs, path);
        splits.add(new FileSplit(path, pos, n, hosts));
        pos += n;
      }
    } finally {
      reader.close();
    }
  }

  return splits;
}