lucene 全文检索数据库

北斗狼 贡献于2012-09-06

作者 user  创建于2010-05-27 05:03:00   修改者user  修改于2010-05-27 05:03:00字数6687

文档摘要:我们以前经常碰到搜索数据库的内容;用like %的sql语句; 如果数据量大而且多表查询时; 用lucene2那就可以解决速度问题。 lucene2搜索photo表的title,username,tagname,desr内容; 用一个例题来说明更直观;
关键词:

我们以前经常碰到搜索数据库的内容;用like %的sql语句; 如果数据量大而且多表查询时; 用lucene2那就可以解决速度问题。 lucene2搜索photo表的title,username,tagname,desr内容; 用一个例题来说明更直观; 此例题能搜索中文分词; (需要mysql5的jdbc包和lucene2的包): 1、数据库我用mysql5;建一个photo表;数据库名是test。 photo表有一下几个字段: CREATE TABLE `photo` ( `photo_id` int(11) NOT NULL auto_increment, `title` varchar(11) default NULL, `address` varchar(50) default NULL, `descr` text, `user_id` int(11) default NULL, `user_name` varchar(11) default NULL, `upload_time` date default NULL, `tag_name` varchar(11) default NULL, PRIMARY KEY (`photo_id`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=REDUNDANT; 2、java文件有4个: 文件Photo.java是数据库的photo表的操作文件; 内容如下: import java.sql.Connection; import java.util.ArrayList; import java.util.Date; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; public class Photo {  private long photoId;  private String title;  private String description;  private String address;  private String userName;  private long userId;  private String tag;  private Date date;  public String getAddress() {   return address;  }  public void setAddress(String address) {   this.address = address;  }  public String getDescription() {   return description;  }  public void setDescription(String description) {   this.description = description;  }  public long getPhotoId() {   return photoId;  }  public void setPhotoId(long photoId) {   this.photoId = photoId;  }  public String getTag() {   return tag;  }  public void setTag(String tag) {   this.tag = tag;  }  public String getTitle() {   return title;  }  public void setTitle(String title) {   this.title = title;  }  public long getUserId() {   return userId;  }  public void setUserId(long userId) {   this.userId = userId;  }  public String getUserName() {   return userName;  }  public void setUserName(String userName) {   this.userName = userName;  }  public static Photo[] loadPhotos(Connection con) throws Exception {   ArrayList list = new ArrayList();   PreparedStatement pstm = null;   ResultSet rs = null;   String sql = "select photo_id,title,address,descr,user_id,user_name,upload_time,tag_name from photo";   try {    pstm = con.prepareStatement(sql);    rs = pstm.executeQuery();    while (rs.next()) {     Photo photo = new Photo();     photo.setPhotoId(rs.getLong(1));     photo.setTitle(rs.getString(2));     photo.setAddress(rs.getString(3));     photo.setDescription(rs.getString(4));     photo.setUserId(rs.getLong(5));     photo.setUserName(rs.getString(6));     photo.setDate(rs.getTimestamp(7));     photo.setTag(rs.getString(8));          list.add(photo);    }    System.out.println("com.upolestar.kmpm.po.Photo.java  ========"+list.size());   } catch (SQLException e) {    e.printStackTrace();   } finally {    if (rs != null) {     rs.close();    }    if (pstm != null) {     pstm.close();    }   }   return (Photo[]) list.toArray(new Photo[list.size()]);  }  public Date getDate() {   return date;  }  public void setDate(Date date) {   this.date = date;  } } 文件IndexerFile.java是把数据库的内容备份成索引文件到磁盘中去; 内容如下: package com.upolestar.kmpm.service; import java.io.IOException; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import com.upolestar.kmpm.po.Photo; public class IndexerFile {  public static int indexFile(String indexDir, Photo[] list)    throws IOException {   IndexWriter writer = new IndexWriter(indexDir, new StandardAnalyzer(),     true);   writer.setUseCompoundFile(false);   for (int i = 0; i < list.length; i++) {    Document doc = new Document();    doc.add(new Field("photoId", String.valueOf(list[i].getPhotoId()),      Field.Store.YES, Field.Index.NO));    if (list[i].getTitle() != null)     doc.add(new Field("title", list[i].getTitle(), Field.Store.YES,       Field.Index.TOKENIZED));    if (list[i].getDescription() != null)     doc.add(new Field("description", list[i].getDescription(),       Field.Store.YES, Field.Index.TOKENIZED));    doc.add(new Field("address", list[i].getAddress(), Field.Store.YES,      Field.Index.NO));    doc.add(new Field("userName", list[i].getUserName(),      Field.Store.YES, Field.Index.TOKENIZED));    doc.add(new Field("userId", String.valueOf(list[i].getUserId()),      Field.Store.YES, Field.Index.NO));    if (list[i].getTag().length() > 0)     doc.add(new Field("tag", list[i].getTag(), Field.Store.YES,       Field.Index.TOKENIZED));    doc.add(new Field("uploadTime", list[i].getDate().toLocaleString(), Field.Store.YES,      Field.Index.TOKENIZED));    writer.addDocument(doc);   }   int numIndexed = writer.docCount();   writer.optimize();   writer.close();   return numIndexed;  } } 文件SearcherFile.java是搜索磁盘索引文件内容的; 内容如下: package com.upolestar.kmpm.service; import java.io.IOException; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.queryParser.MultiFieldQueryParser; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.search.Hits; import org.apache.lucene.search.Query; import org.apache.lucene.search.Searcher; public class SearcherFile {  public static void search(Searcher searcher, String[] q)    throws IOException, ParseException {   Analyzer analyzer = new StandardAnalyzer();   String[] fields = { "title", "description", "tag", "userName" };   Query query = MultiFieldQueryParser.parse(q, fields, analyzer);   Hits hits = searcher.search(query);   System.out.println("SearcherFile======"+hits.length());   for (int i = 0; i < hits.length(); i++) {    Document doc = hits.doc(i);    System.out.println(doc.get("photoId") + "==="      + doc.get("uploadTime")+ "==="      + doc.get("title")+ "==="      + doc.get("description")+ "==="      + doc.get("tag")+ "==="      + doc.get("userName"));   }  } } 文件test.java是操作的主文件; 内容如下: package com.upolestar.kmpm.test; import java.io.IOException; import java.sql.Connection; import java.sql.SQLException; import java.util.Date; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Searcher; import com.upolestar.kmpm.po.Photo; import com.upolestar.kmpm.service.IndexerFile; import com.upolestar.kmpm.service.SearcherFile; public class Test {  public final static String indexDir = "D:\\TestLucene";  private static Connection getConnection() {   Connection conn = null;   String url = "jdbc:mysql://localhost:3306/opencms";   String userName = "root";   String password = "1111";   try {    Class.forName("com.mysql.jdbc.Driver");    conn = java.sql.DriverManager      .getConnection(url, userName, password);   } catch (Exception e) {    e.printStackTrace();    System.out.println("Error Trace in getConnection() : "      + e.getMessage());   }   return conn;  }  public static void main(String[] args) throws IOException, ParseException,    SQLException {   index();// 做索引   Searcher searcher = null;   try {    searcher = new IndexSearcher(indexDir);    search(searcher);// 搜索   } catch (Exception e) {    e.printStackTrace();   } finally {    if (searcher != null)     searcher.close();   }  }  public static void search(Searcher searcher) throws IOException,    ParseException {   // 以下是搜索的关键词   String[] q = { "SVN", "捱三", "null", "null" };   long start = new Date().getTime();   SearcherFile.search(searcher, q);   long end = new Date().getTime();   System.out.println("花费时间:" + (double) (end - start) / 1000 + "秒");  }  public static void index() throws SQLException {   Connection conn = null;   try {    conn = getConnection();    Photo[] list = Photo.loadPhotos(conn);    IndexerFile.indexFile(indexDir, list);   } catch (Exception e) {    e.printStackTrace();   } finally {    if (conn != null) {     conn.close();    }   }  } }   已经测试过!!

下载文档到电脑,查找使用更方便

文档的实际排版效果,会与网站的显示效果略有不同!!

需要 3 金币 [ 分享文档获得金币 ] 0 人已下载

下载文档