Java 类weka.core.converters.ConverterUtils.DataSink 实例源码

项目:CraTer    文件:InsMerge.java   
/***
     * <p>To Merge the datasets in path array and save the total dataset in dirpath.
     * </p>
     * @param path String array of arff file
     * @throws Exception
     */
    public static void getIns(String[] path, String dirpath) throws Exception{

        /** Create a empty dataset total*/
        Instances total = new Instances("total3500", getStandAttrs(), 1);

        total.setClassIndex(total.numAttributes() - 1);

        int len = path.length;
        Instances[] temp = new Instances[len];

        for(int i=0; i<path.length; i++){

            temp[i] = DataSource.read(path[i]);
            temp[i].setClassIndex(temp[i].numAttributes() - 1);

            total.addAll(temp[i]);
            System.out.println("adding " + path[i] + " " + temp[i].numInstances());
//          System.out.println("data" + total.numInstances() + "\n");
        }

        String totalName = dirpath+"total3500" + String.valueOf(System.currentTimeMillis()) + ".arff";

        DataSink.write(totalName,
                total);
        System.out.println("Writing the data into [" + totalName + "] successfully.\n");
    }
项目:CraTer    文件:RandomGenerator.java   
/**<p>Generate Random sample according to random seed on Desktop, each sample has the same distribution of InTrace/OutTrace
 *  and have <b>SIZE</b> instances.
 * </p>
 * @param path original arff file to be sampled in path
 * @param rand random seed
 * @param num the number of selection
 * */
public static void generateARFF(String path, int rand, int num) throws Exception{
    /*** original dataset reading */
    Instances data = DataSource.read(path);
    data.setClassIndex(data.numAttributes()-1);

    /*** randomize the dataset */
    data.randomize(new Random(rand));

    /*** dataIn to save instances of InTrace class */
    Instances dataIn = new Instances("dataIn", InsMerge.getStandAttrs(), 1);
    dataIn.setClassIndex(dataIn.numAttributes() - 1);

    /*** dataOut to save instances of OutTrace class */
    Instances dataOut = new Instances("dataOut", InsMerge.getStandAttrs(), 1);
    dataIn.setClassIndex(dataIn.numAttributes() - 1);

    /*** add OutTrace instances into dataOut */
    for(int i=0; i<data.numInstances(); i++){
        if(data.get(i).stringValue(data.get(i).classAttribute()).equals("OutTrace")){
            dataOut.add(data.get(i));
        }
    }

    /** add InTrace instances into dataIn */
    for(int i=0; i<data.numInstances(); i++){
        if(data.get(i).stringValue(data.get(i).classAttribute()).equals("InTrace")){
            dataIn.add(data.get(i));
        }
    }

    /*** get the In/Out ratio in original dataset */
    int inTrace = dataIn.numInstances();
    int outTrace = dataOut.numInstances();
    double ratioI = inTrace*1.0/(outTrace + inTrace);

    /*** expected number to select from original dataset*/
    int intrace = (int) (num * ratioI);
    int outtrace = num - intrace;

    /** create new generated dataset train*/
    Instances train = new Instances("dataIn", InsMerge.getStandAttrs(), 1);
    train.setClassIndex(train.numAttributes() - 1);

    /** train get X instances from dataIn*/
    for(int i=0; i<intrace; i++){
        train.add(dataIn.get(i));
    }

    /** train get Y instances from dataOut*/
    for(int j=0; j<outtrace; j++){
        train.add(dataOut.get(j));
    }

    /** save the dataset in path, we save the arff into D:/Users/LEE/Desktop/New_Data/XXX.arff */
    String filename = "files/generated/" + filterName(path) + rand + ".arff";
    DataSink.write(filename, train);

}