Lucene搜索简单的实例

jopen 10年前

项目环境:

1.导入jar包

KAnalyzer3.2.0Stable.jar
lucene-analyzers-3.0.1.jar
lucene-core-3.0.1.jar
lucene-highlighter-3.0.1.jar
lucene-memory-3.0.1.jar

 

2.写一个demo

  a.创建一个实体

DROP TABLE IF EXISTS `article`;
CREATE TABLE `article` (
  `id` int(11) NOT NULL AUTO_INCREMENT,
  `title` varchar(20) DEFAULT NULL,
  `content` varchar(5000) DEFAULT NULL,
  PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

 

public class Article {
private int id;
private String title;
private String content;

................//get/set/toString方法省略

}

b.创建一个提供公共方法的类:

//提供封装分页数据的类:    @SuppressWarnings("rawtypes")  public class QueryResult {  private List list;  private int count;    ..............get/set省略    }    //提供创建索引,目录的类    public class LuceneUtils{    private static Directory directory;// 建立索引库存储目录  private static Analyzer analyzer;// 创建分词器    private static IndexWriter indexWriter; // 在程序启动是初始化,建立索引    private static IndexSearcher indexSearcher;// 查询      static {  try {  // 加载配置文件lucene.properties,该文件中是创建索引库的路径"path=D:\\IindexSearch  Properties prop = new Properties();  InputStream inStream = LuceneUtils.class.getClassLoader().getResourceAsStream("lucene.properties");  //InputStream inStream = ClassLoader.getSystemResourceAsStream("lucene.properties");  prop.load(inStream);    directory = FSDirectory.open(new File(prop.getProperty("path")));  analyzer = new StandardAnalyzer(Version.LUCENE_30);  // 在程序启动是初始化,建立索引  indexWriter = new IndexWriter(directory, analyzer, MaxFieldLength.LIMITED);    //程序退出时关闭资源  Runtime.getRuntime().addShutdownHook(new Thread(){  public void run(){  try {  indexWriter.close();  } catch (Exception e) {  e.printStackTrace();  }   }  });  } catch (Exception e) {  e.printStackTrace();  }  }      public static Document objectToDocument(Object obj) {  Article article = (Article) obj;  // 将文档转为domcment  Document doc = new Document();  String idstr = NumericUtils.intToPrefixCoded(article.getId());  doc.add(new Field("id", idstr, Store.YES, Index.NOT_ANALYZED));  doc.add(new Field("title", article.getTitle(), Store.YES, Index.ANALYZED));  doc.add(new Field("content", article.getContent(), Store.YES, Index.ANALYZED));  return doc;  }      public static Object documentToObject(Document doc) {  Article article = new Article();  //将Document转为Article  //将字符串转化为数字  int id = NumericUtils.prefixCodedToInt(doc.get("id"));  article.setId(id);  article.setTitle(doc.get("title"));  article.setContent(doc.get("content"));  return article;  }      public static IndexWriter getIndexWriter() {  return indexWriter;  }      public static IndexSearcher getIndexSearch() {  // 执行查询  try {  indexSearcher = new IndexSearcher(directory);  } catch (Exception e) {  throw new RuntimeException(e);  }  return indexSearcher;  }      public static Directory getDirectory() {  return directory;  }      public static Analyzer getAnalyzer() {  return analyzer;  }  }    c: 创建增删改查方法    public class IndexDao {      /**  * //保存到索引库  *   * @return  * @throws Exception  */  public void save(Article article) {  try {  // 将Aritcle转为Documnet  Document doc = LuceneUtils.objectToDocument(article);      // 建立索引  IndexWriter indexWriter = LuceneUtils.getIndexWriter();  indexWriter.addDocument(doc);  indexWriter.commit();  } catch (Exception e) {  throw new RuntimeException(e);  }  }      /**  * 删除索引库 Term 表示制定列中包含的关键字  *   * @return  * @throws Exception  */  public void delete(Article article) {    String idStr = NumericUtils.intToPrefixCoded(article.getId());  Term term = new Term("id", idStr);  try {  // 建立索引  IndexWriter indexWriter = LuceneUtils.getIndexWriter();  indexWriter.deleteDocuments(term);// 删除指定Term总重的documnet数据  indexWriter.commit();  } catch (Exception e) {  throw new RuntimeException(e);  }  }      /**  * 修改索引库  *   * @return  * @throws Exception  */  public void update(Article article) {    // 创建Term  String idStr = NumericUtils.intToPrefixCoded(article.getId());  Term term = new Term("id", idStr);      // 准备document  Document doc = LuceneUtils.objectToDocument(article);  try {  // 建立索引  IndexWriter indexWriter = LuceneUtils.getIndexWriter();  indexWriter.updateDocument(term, doc);// 删除指定Term总重的documnet数据  indexWriter.commit();      // 先删除,在创建  // indexWriter.deleteDocuments(term);  // indexWriter.addDocument(doc);  } catch (Exception e) {  throw new RuntimeException(e);  }  }      /**  * 查询索引库  *   * @return  * @throws Exception  */  public QueryResult query(String queryString, int first, int max) {  IndexSearcher indexSearcher = null;  try {  // MultiFieldQueryParser:表示可以根据多个字段查询  int totail = first + max;  // 1.把字符串转为Query对象  QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_30, new String[] { "id", "title", "content" },  LuceneUtils.getAnalyzer());  Query query = parser.parse(queryString);    // 2.执行查询  indexSearcher = LuceneUtils.getIndexSearch();  // 指定排序条件  Sort sort = new Sort(new SortField("id", SortField.INT));// 按照id升序  TopDocs topDocs = indexSearcher.search(query, null, totail, sort);// 查询并返回最多的前n条数据  int count = topDocs.totalHits;// 总记录数  ScoreDoc[] scoreDoc = topDocs.scoreDocs;// 最多前n条结果数据      // 生成高亮显示器;设置前缀,后缀,摘要的大小  Formatter formatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");  Scorer scorer = new QueryScorer(query);// 查询条件  Highlighter highlighter = new Highlighter(formatter, scorer);  highlighter.setTextFragmenter(new SimpleFragmenter(100));// 设置摘要的大小      // 3.取出数据  int endIndex = Math.min(totail, scoreDoc.length);  List<Article> list = new ArrayList<Article>();  for (int i = 0; i < endIndex; i++) {  // float score = scoreDoc[i].score;//平均得分  int docId = scoreDoc[i].doc;  Document doc = indexSearcher.doc(docId);      // 进行高亮操作,当没有找到关键词时,返回为null  String text = highlighter.getBestFragment(LuceneUtils.getAnalyzer(), "title", doc.get("title"));  if (text != null) {  doc.getField("title").setValue(text);  }  // 将Document转为Article  Article article = (Article) LuceneUtils.documentToObject(doc);  list.add(article);  }  QueryResult queryResult = new QueryResult(list, count);  return queryResult;  } catch (Exception e) {  throw new RuntimeException(e);  }  }    c;测试dao方法:    @Test  public void testSave() {  // 创建文档对象  Article article = new Article();  for (int i = 0; i < 20; i++) {  article.setId(i);  article.setTitle("Lucene搜索的方式");  article.setContent("全文检索是计算机程序通过扫描文章中的每一个词,对每一个词建立一个索引,指明该词在文章中出现的次数和位置。");  indexDao.save(article);  }  }      @Test  public void testDelete() {  Article article = new Article();  article.setId(1);  indexDao.delete(article);  }      @Test  public void testUpdate() {  // 创建文档对象  Article article = new Article();  article.setId(1);  article.setTitle("Lucene搜索的方式");  article.setContent("跟新索引库测试是否正确");  indexDao.update(article);  }    @Test  @SuppressWarnings("unchecked")  public void testQuery() {  String queryString = "Lucene";  QueryResult queryResult = indexDao.searchAndOrderBy(queryString, 0, 10);  System.out.println("count---------->" + queryResult.getCount());  List<Article> list = (List<Article>)queryResult.getList();  for(Article article:list){  System.err.println("list--------->" + article.toString());  }

 

到此lucenes测试例子完成。以下是本人使用过程中进行改造封装后,制作的模板,由于增删改中都要用到一些相同的代码,并且重复比较多,为此本人将重复的制作为一个方法,其他的不能确定的提供一个借口提供一个抽象方法,在调用者需要封装那些实体,即可调用接口中的方法并重写即可。此封装主要用到的是模板设计模式。

如下:

public class ComsLuceneUtils {  public interface CallBackQuery {    //不分页    //public abstract  List  documentToObject(Document doc, List list) throws Exception;    //分页方法  public abstract QueryResult documentToObject(Document doc, List list, int count) throws Exception;      public abstract Document objectToDocument(Object obj) throws Exception;  }      /**  * //保存到索引库  * CallBackQuery :保存函数  * @return  * @throws Exception  */  public static void save(Article article, CallBackQuery callBack) {  try {  // 将Aritcle转为Documnet  Document doc = callBack.objectToDocument(article);      // 建立索引  IndexWriter indexWriter = LuceneUtils.getIndexWriter();  indexWriter.addDocument(doc);  indexWriter.commit();  } catch (Exception e) {  throw new RuntimeException(e);  }  }      /**  * 删除索引库 Term 表示制定列中包含的关键字  *   * @return  * @throws Exception  */  public static void delete(Integer id) {      String idStr = NumericUtils.intToPrefixCoded(id);  Term term = new Term("id", idStr);  try {  // 建立索引  IndexWriter indexWriter = LuceneUtils.getIndexWriter();  indexWriter.deleteDocuments(term);// 删除指定Term总重的documnet数据  indexWriter.commit();  } catch (Exception e) {  throw new RuntimeException(e);  }  }      /**  * 修改索引库  * CallBackQuery :更新函数  * @return  * @throws Exception  */  public static void update(Article article, CallBackQuery callBack) {      // 创建Term  String idStr = NumericUtils.intToPrefixCoded(article.getId());  Term term = new Term("id", idStr);  try {  // 将Object转换为Document对象  Document doc = callBack.objectToDocument(article);      // 建立索引  IndexWriter indexWriter = LuceneUtils.getIndexWriter();  indexWriter.updateDocument(term, doc);// 删除指定Term数的documnet数据  indexWriter.commit();      // 先删除,在创建  // indexWriter.deleteDocuments(term);  // indexWriter.addDocument(doc);  } catch (Exception e) {  throw new RuntimeException(e);  }  }      /**查询分页方法  *   * 查询索引库 querString: 查询字符串   * first : 开始位置(分页功能)  * max : 最大数(分页功能)   * parameter :查询指定的字段   * CallBackQuery : 查询函数   * QueryResult : 返回结果集合  * @throws Exception  */  public static QueryResult query(String queryString, int first, int max, String[] parameter, CallBackQuery callback) {  IndexSearcher indexSearcher = null;  try {  // MultiFieldQueryParser:表示可以根据多个字段查询  int totail = first + max;  // 1.把字符串转为Query对象  QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_30, parameter, LuceneUtils.getAnalyzer());  Query query = parser.parse(queryString);      // 2.执行查询  indexSearcher = LuceneUtils.getIndexSearch();  // 指定排序条件  Sort sort = new Sort(new SortField("id", SortField.INT));// 按照id升序  TopDocs topDocs = indexSearcher.search(query, null, totail, sort);// 查询并返回最多的前n条数据  int count = topDocs.totalHits;// 总记录数  ScoreDoc[] scoreDoc = topDocs.scoreDocs;// 最多前n条结果数据      // 生成高亮显示器;设置前缀,后缀,摘要的大小  Formatter formatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");  Scorer scorer = new QueryScorer(query);// 查询条件  Highlighter highlighter = new Highlighter(formatter, scorer);  highlighter.setTextFragmenter(new SimpleFragmenter(100));// 设置摘要的大小      // 3.取出数据  int endIndex = Math.min(totail, scoreDoc.length);  List<Article> list = new ArrayList<Article>();  for (int i = 0; i < endIndex; i++) {// start  // float score = scoreDoc[i].score;//平均得分  int docId = scoreDoc[i].doc;  Document doc = indexSearcher.doc(docId);      // 进行高亮操作  if (parameter.length > 0) {  for (int j = 0; j < parameter.length; j++) {  // 进行高亮操作,当没有找到关键词时,返回为null  String text = highlighter.getBestFragment(LuceneUtils.getAnalyzer(), parameter[j], doc.get(parameter[j]));  if (text != null) {  doc.getField(parameter[j]).setValue(text);  }  }  }      // 将Document转为Article  callback.documentToObject(doc, list, count);  }// end      // 返回结果集  QueryResult queryResult = new QueryResult(list, count);  return queryResult;  } catch (Exception e) {  throw new RuntimeException(e);  }  }    }    测试方法:    @Test  @SuppressWarnings("unchecked")  public void testQuery() {  String queryString = "Lucene";  String[] param = new String[]{"id","title","content"};    try {  QueryResult queryResult=ComsLuceneUtils.query(queryString, 0, 20, param, new cn.net.yixun.util.ComsLuceneUtils.CallBackQuery(){  public QueryResult documentToObject(Document doc,List list,int count)throws Exception{  Article article = new Article();  //将Document转为Article  //将字符串转化为数字  int id = NumericUtils.prefixCodedToInt(doc.get("id"));  article.setId(id);  article.setTitle(doc.get("title"));  article.setContent(doc.get("content"));  list.add(article);  QueryResult queryResult=new QueryResult(list, count);  return queryResult;  }   public Document objectToDocument(Object obj)throws Exception{  return null;};  });    List<Article> list = (List<Article>)queryResult.getList();  for(Article article:list){  System.err.println("list--------->" + article.toString());  }  } catch (Exception e) {  e.printStackTrace();  }  }