protected static Option buildOption(String name, String shortName, String description, boolean hasArg, int min, int max, boolean required, String defaultValue) { DefaultOptionBuilder optBuilder = new DefaultOptionBuilder().withLongName(name).withDescription(description) .withRequired(required); if (shortName != null) { optBuilder.withShortName(shortName); } if (hasArg) { ArgumentBuilder argBuilder = new ArgumentBuilder().withName(name).withMinimum(min).withMaximum(max); if (defaultValue != null) { argBuilder = argBuilder.withDefault(defaultValue); } optBuilder.withArgument(argBuilder.create()); } return optBuilder.create(); }
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option inputOpt = DefaultOptionCreator.inputOption().withRequired(false).create(); Option outputOpt = DefaultOptionCreator.outputOption().withRequired(false).create(); Option vectorOpt = obuilder.withLongName("vector").withRequired(false).withArgument( abuilder.withName("v").withMinimum(1).withMaximum(1).create()).withDescription( "The vector implementation to use.").withShortName("v").create(); Option helpOpt = DefaultOptionCreator.helpOption(); Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption( vectorOpt).withOption(helpOpt).create(); try { Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return; } Path input = new Path(cmdLine.getValue(inputOpt, "testdata").toString()); Path output = new Path(cmdLine.getValue(outputOpt, "output").toString()); String vectorClassName = cmdLine.getValue(vectorOpt, "org.apache.mahout.math.RandomAccessSparseVector").toString(); //runJob(input, output, vectorClassName); } catch (OptionException e) { InputDriver.log.error("Exception parsing command line: ", e); CommandLineUtil.printHelp(group); } }
/** * Returns a default command line option for input directory specification. * Used by all clustering jobs plus others */ public static DefaultOptionBuilder inputOption() { return new DefaultOptionBuilder() .withLongName(INPUT_OPTION) .withRequired(false) .withShortName("i") .withArgument( new ArgumentBuilder().withName(INPUT_OPTION).withMinimum(1) .withMaximum(1).create()) .withDescription("Path to job input directory."); }
/** * Returns a default command line option for clusters input directory * specification. Used by FuzzyKmeans, Kmeans */ public static DefaultOptionBuilder clustersInOption() { return new DefaultOptionBuilder() .withLongName(CLUSTERS_IN_OPTION) .withRequired(true) .withArgument( new ArgumentBuilder().withName(CLUSTERS_IN_OPTION).withMinimum(1) .withMaximum(1).create()) .withDescription( "The path to the initial clusters directory. Must be a SequenceFile of some type of Cluster") .withShortName("c"); }
/** * Returns a default command line option for output directory specification. * Used by all clustering jobs plus others */ public static DefaultOptionBuilder outputOption() { return new DefaultOptionBuilder() .withLongName(OUTPUT_OPTION) .withRequired(false) .withShortName("o") .withArgument( new ArgumentBuilder().withName(OUTPUT_OPTION).withMinimum(1) .withMaximum(1).create()) .withDescription("The directory pathname for output."); }
/** * Returns a default command line option for output directory overwriting. * Used by all clustering jobs */ public static DefaultOptionBuilder overwriteOption() { return new DefaultOptionBuilder() .withLongName(OVERWRITE_OPTION) .withRequired(false) .withDescription( "If present, overwrite the output directory before running job") .withShortName("ow"); }
/** * Returns a default command line option for specification of distance measure * class to use. Used by Canopy, FuzzyKmeans, Kmeans, MeanShift */ public static DefaultOptionBuilder distanceMeasureOption() { return new DefaultOptionBuilder() .withLongName(DISTANCE_MEASURE_OPTION) .withRequired(false) .withShortName("dm") .withArgument( new ArgumentBuilder().withName(DISTANCE_MEASURE_OPTION) .withDefault(SquaredEuclideanDistanceMeasure.class.getName()) .withMinimum(1).withMaximum(1).create()) .withDescription( "The classname of the DistanceMeasure. Default is SquaredEuclidean"); }
/** * Returns a default command line option for specification of sequential or * parallel operation. Used by Canopy, FuzzyKmeans, Kmeans, MeanShift, * Dirichlet */ public static DefaultOptionBuilder methodOption() { return new DefaultOptionBuilder() .withLongName(METHOD_OPTION) .withRequired(false) .withShortName("xm") .withArgument( new ArgumentBuilder().withName(METHOD_OPTION) .withDefault(MAPREDUCE_METHOD).withMinimum(1).withMaximum(1) .create()) .withDescription( "The execution method to use: sequential or mapreduce. Default is mapreduce"); }
/** * Returns a default command line option for specification of T1. Used by * Canopy, MeanShift */ public static DefaultOptionBuilder t1Option() { return new DefaultOptionBuilder() .withLongName(T1_OPTION) .withRequired(true) .withArgument( new ArgumentBuilder().withName(T1_OPTION).withMinimum(1) .withMaximum(1).create()).withDescription("T1 threshold value") .withShortName(T1_OPTION); }
/** * Returns a default command line option for specification of T2. Used by * Canopy, MeanShift */ public static DefaultOptionBuilder t2Option() { return new DefaultOptionBuilder() .withLongName(T2_OPTION) .withRequired(true) .withArgument( new ArgumentBuilder().withName(T2_OPTION).withMinimum(1) .withMaximum(1).create()).withDescription("T2 threshold value") .withShortName(T2_OPTION); }
/** * Returns a default command line option for specification of T3 (Reducer T1). * Used by Canopy */ public static DefaultOptionBuilder t3Option() { return new DefaultOptionBuilder() .withLongName(T3_OPTION) .withRequired(false) .withArgument( new ArgumentBuilder().withName(T3_OPTION).withMinimum(1) .withMaximum(1).create()) .withDescription("T3 (Reducer T1) threshold value") .withShortName(T3_OPTION); }
/** * Returns a default command line option for specification of T4 (Reducer T2). * Used by Canopy */ public static DefaultOptionBuilder t4Option() { return new DefaultOptionBuilder() .withLongName(T4_OPTION) .withRequired(false) .withArgument( new ArgumentBuilder().withName(T4_OPTION).withMinimum(1) .withMaximum(1).create()) .withDescription("T4 (Reducer T2) threshold value") .withShortName(T4_OPTION); }
/** * @return a DefaultOptionBuilder for the clusterFilter option */ public static DefaultOptionBuilder clusterFilterOption() { return new DefaultOptionBuilder() .withLongName(CLUSTER_FILTER_OPTION) .withShortName("cf") .withRequired(false) .withArgument( new ArgumentBuilder().withName(CLUSTER_FILTER_OPTION).withMinimum(1) .withMaximum(1).create()) .withDescription("Cluster filter suppresses small canopies from mapper") .withShortName(CLUSTER_FILTER_OPTION); }
/** * Returns a default command line option for specification of max number of * iterations. Used by Dirichlet, FuzzyKmeans, Kmeans, LDA */ public static DefaultOptionBuilder maxIterationsOption() { // default value used by LDA which overrides withRequired(false) return new DefaultOptionBuilder() .withLongName(MAX_ITERATIONS_OPTION) .withRequired(true) .withShortName("x") .withArgument( new ArgumentBuilder().withName(MAX_ITERATIONS_OPTION) .withDefault("-1").withMinimum(1).withMaximum(1).create()) .withDescription("The maximum number of iterations."); }
/** * Returns a default command line option for specification of numbers of * clusters to create. Used by Dirichlet, FuzzyKmeans, Kmeans */ public static DefaultOptionBuilder numClustersOption() { return new DefaultOptionBuilder() .withLongName(NUM_CLUSTERS_OPTION) .withRequired(false) .withArgument( new ArgumentBuilder().withName("k").withMinimum(1).withMaximum(1) .create()).withDescription("The number of clusters to create") .withShortName("k"); }
/** * Returns a default command line option for convergence delta specification. * Used by FuzzyKmeans, Kmeans, MeanShift */ public static DefaultOptionBuilder convergenceOption() { return new DefaultOptionBuilder() .withLongName(CONVERGENCE_DELTA_OPTION) .withRequired(false) .withShortName("cd") .withArgument( new ArgumentBuilder().withName(CONVERGENCE_DELTA_OPTION) .withDefault("0.5").withMinimum(1).withMaximum(1).create()) .withDescription("The convergence delta value. Default is 0.5"); }
/** * Returns a default command line option for specifying the max number of * reducers. Used by Dirichlet, FuzzyKmeans, Kmeans and LDA * * @deprecated */ @Deprecated public static DefaultOptionBuilder numReducersOption() { return new DefaultOptionBuilder() .withLongName(MAX_REDUCERS_OPTION) .withRequired(false) .withShortName("r") .withArgument( new ArgumentBuilder().withName(MAX_REDUCERS_OPTION) .withDefault("2").withMinimum(1).withMaximum(1).create()) .withDescription("The number of reduce tasks. Defaults to 2"); }
/** * Returns a default command line option for clustering specification. Used by * all clustering except LDA */ public static DefaultOptionBuilder clusteringOption() { return new DefaultOptionBuilder() .withLongName(CLUSTERING_OPTION) .withRequired(false) .withDescription( "If present, run clustering after the iterations have taken place") .withShortName("cl"); }
/** * Returns a default command line option for specifying a Lucene analyzer class * @return {@link DefaultOptionBuilder} */ public static DefaultOptionBuilder analyzerOption() { return new DefaultOptionBuilder() .withLongName(ANALYZER_NAME_OPTION) .withRequired(false) .withDescription( "If present, the name of a Lucene analyzer class to use") .withArgument( new ArgumentBuilder().withName(ANALYZER_NAME_OPTION).withDefault(DefaultAnalyzer.class.getName()) .withMinimum(1).withMaximum(1).create() ) .withShortName("an"); }
/** * Returns a default command line option for specifying the emitMostLikely * flag. Used by Dirichlet and FuzzyKmeans */ public static DefaultOptionBuilder emitMostLikelyOption() { return new DefaultOptionBuilder() .withLongName(EMIT_MOST_LIKELY_OPTION) .withRequired(false) .withShortName("e") .withArgument( new ArgumentBuilder().withName(EMIT_MOST_LIKELY_OPTION) .withDefault("true").withMinimum(1).withMaximum(1).create()) .withDescription( "True if clustering should emit the most likely point only, " + "false for threshold clustering. Default is true"); }
/** * Returns a default command line option for specifying the clustering * threshold value. Used by Dirichlet and FuzzyKmeans */ public static DefaultOptionBuilder thresholdOption() { return new DefaultOptionBuilder() .withLongName(THRESHOLD_OPTION) .withRequired(false) .withShortName("t") .withArgument( new ArgumentBuilder().withName(THRESHOLD_OPTION).withDefault("0") .withMinimum(1).withMaximum(1).create()) .withDescription( "The pdf threshold used for cluster determination. Default is 0"); }
public static DefaultOptionBuilder kernelProfileOption() { return new DefaultOptionBuilder() .withLongName(KERNEL_PROFILE_OPTION) .withRequired(false) .withShortName("kp") .withArgument( new ArgumentBuilder() .withName(KERNEL_PROFILE_OPTION) .withDefault(TriangularKernelProfile.class.getName()) .withMinimum(1).withMaximum(1).create()) .withDescription( "The classname of the IKernelProfile. Default is TriangularKernelProfile"); }
public static DefaultOptionBuilder inputIsCanopiesOption() { return new DefaultOptionBuilder() .withLongName(MeanShiftCanopyDriver.INPUT_IS_CANOPIES_OPTION) .withRequired(false) .withShortName("ic") .withArgument( new ArgumentBuilder() .withName(MeanShiftCanopyDriver.INPUT_IS_CANOPIES_OPTION) .withMinimum(1).withMaximum(1).create()) .withDescription( "If present, the input directory already contains MeanShiftCanopies"); }
/** * Returns a default command line option for specification of OUTLIER THRESHOLD value. Used for * Cluster Classification. */ public static DefaultOptionBuilder outlierThresholdOption() { return new DefaultOptionBuilder() .withLongName(OUTLIER_THRESHOLD) .withRequired(false) .withArgument( new ArgumentBuilder().withName(OUTLIER_THRESHOLD).withMinimum(1) .withMaximum(1).create()).withDescription("Outlier threshold value") .withShortName(OUTLIER_THRESHOLD); }
public static DefaultOptionBuilder numReducersOption() { return new DefaultOptionBuilder() .withLongName(NUM_REDUCERS) .withRequired(false) .withShortName("r") .withArgument( new ArgumentBuilder().withName(NUM_REDUCERS).withDefault("2") .withMinimum(1).withMaximum(1).create()) .withDescription("The number of reduce tasks. Defaults to 2"); }
/** * Returns a default command line option for specifying the minimum cluster * size in MinHash clustering */ public static DefaultOptionBuilder minClusterSizeOption() { return new DefaultOptionBuilder() .withLongName(MIN_CLUSTER_SIZE) .withRequired(false) .withArgument( new ArgumentBuilder().withName(MIN_CLUSTER_SIZE).withDefault("10") .withMinimum(1).withMaximum(1).create()) .withDescription("Minimum points inside a cluster") .withShortName("mcs"); }
/** * Returns a default command line option for specifying the type of hash to * use in MinHash clustering: Should one out of * ("linear","polynomial","murmur") */ public static DefaultOptionBuilder hashTypeOption() { return new DefaultOptionBuilder() .withLongName(HASH_TYPE) .withRequired(false) .withArgument( new ArgumentBuilder().withName(HASH_TYPE).withDefault("murmur") .withMinimum(1).withMaximum(1).create()) .withDescription( "Type of hash function to use. Available types: (linear, polynomial, murmur) ") .withShortName("ht"); }
/** * Returns a default command line option for specifying the min size of the * vector to hash Should one out of ("linear","polynomial","murmur") */ public static DefaultOptionBuilder minVectorSizeOption() { return new DefaultOptionBuilder() .withLongName(MIN_VECTOR_SIZE) .withRequired(false) .withArgument( new ArgumentBuilder().withName(MIN_VECTOR_SIZE).withDefault("5") .withMinimum(1).withMaximum(1).create()) .withDescription("Minimum size of vector to be hashed") .withShortName("mvs"); }
/** * Returns a default command line option for specifying the number of hash * functions to be used in MinHash clustering */ public static DefaultOptionBuilder numHashFunctionsOption() { return new DefaultOptionBuilder() .withLongName(NUM_HASH_FUNCTIONS) .withRequired(false) .withArgument( new ArgumentBuilder().withName(NUM_HASH_FUNCTIONS) .withDefault("10").withMinimum(1).withMaximum(1).create()) .withDescription("Number of hash functions to be used") .withShortName("nh"); }
/** * Returns a default command line option for specifying the number of key * groups to be used in MinHash clustering */ public static DefaultOptionBuilder keyGroupsOption() { return new DefaultOptionBuilder() .withLongName(KEY_GROUPS) .withRequired(false) .withArgument( new ArgumentBuilder().withName(KEY_GROUPS).withDefault("2") .withMinimum(1).withMaximum(1).create()) .withDescription("Number of key groups to be used").withShortName("kg"); }