<strong><span style="font-size:18px;">/****
* @author YangXin
* @info 使用canopy生成和k-means聚类对新闻进行聚类
*/
package unitNine;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.lucene.analysis.Analyzer;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.math.VectorWritable;
import org.apache.mahout.vectorizer.DictionaryVectorizer;
import org.apache.mahout.vectorizer.DocumentProcessor;
import org.apache.mahout.vectorizer.tfidf.TFIDFConverter;
public class ReutersToSparseVectors {
public static void main(String args[]) throws Exception {
int minSupport = 5;
int minDf = 5;
int maxDFPercent = 95;
int maxNGramSize = 1;
float minLLRValue = 50;
int reduceTasks = 1;
int chunkSize &#
使用canopy生成和k-means聚类对新闻进行聚类
最新推荐文章于 2024-03-08 23:06:38 发布