现在的位置: 首页 > 综合 > 正文

Lucene 3.6 的学习研究

2014年06月27日 ⁄ 综合 ⁄ 共 7318字 ⁄ 字号 评论关闭

以前做项目的时候,同事用到了Lucene,觉得很神奇,这几天正好有闲功夫,给自己充下电,以下是一个 Lucene 3.6的实例,废话少说,直接上代码:

package com.chen.lucene.action;

import java.io.File;
import java.io.IOException;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TotalHitCountCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.NRTCachingDirectory;
import org.apache.lucene.util.Version;

public class HelloLucene{

 final static String PATH = "F:/workspaceA/testLucene/luceneIndex";

 public static final Version version = LuceneUtils.version;
 
 /* create index */
 public static void create() throws IOException
 {

  Directory fsDir = FSDirectory.open(new File(PATH));
  NRTCachingDirectory cachedFSDir = new NRTCachingDirectory(fsDir, 5.0, 60.0);

  IndexWriterConfig conf = new IndexWriterConfig(version, new StandardAnalyzer(version));
  conf.setMergeScheduler(cachedFSDir.getMergeScheduler());
  IndexWriter writer = new IndexWriter(cachedFSDir, conf);

  Document doc = new Document();
  // 注意:filed实例在多次添加的时候可以重用,节约构造field实例的时间。
  doc.add(new Field("name", "vincent", Field.Store.YES, Field.Index.NOT_ANALYZED));
  doc.add(new Field("info", "this is a good boy 1", Field.Store.YES, Field.Index.ANALYZED));
  // System.out.println("toString>>>>"+doc.toString());
  // Documen相当于数据库里行的概念,这里创建了1行数据
  Document doc2 = new Document();
  /*
   * Field.Store.YES:存储字段值(未分词前的字段值) Field.Store.NO:不存储,存储与索引没有关系
   * Field.Store.COMPRESS:压缩存储,用于长文本或二进制,但性能受损 Field.Index.ANALYZED:分词建索引
   * Field
   * Index.ANALYZED_NO_NORMS:分词建索引,但是Field的值不像通常那样被保存,而是只取一个byte,这样节约存储空间
   * Field.Index.NOT_ANALYZED:不分词且索引
   * Field.Index.NOT_ANALYZED_NO_NORMS:不分词建索引,Field的值去一个byte保存
   */
  doc2.add(new Field("name", "vincent2", Field.Store.YES, Field.Index.NOT_ANALYZED));
  doc2.add(new Field("info", "this is a good boy 2", Field.Store.YES, Field.Index.ANALYZED));
  doc2.add(new Field("info2", "this is a good boy info2", Field.Store.YES, Field.Index.ANALYZED));
  writer.addDocument(doc);
  writer.addDocument(doc2);
  writer.commit();
  writer.close();
 }

 /* search index */
 public static void search(String key, String value) throws CorruptIndexException, IOException, ParseException
 {
  IndexSearcher searcher = null;
  // 创建QueryParser对象,第一个参数表示Lucene的版本,第二个表示搜索Field的字段,第三个表示搜索使用分词器
  QueryParser qp = new QueryParser(version, key, new StandardAnalyzer(version));
  searcher = new IndexSearcher(IndexReader.open(FSDirectory.open(new File(PATH))));
  Query tq = qp.parse(value);
  TotalHitCountCollector results = new TotalHitCountCollector();
  searcher.search(tq, results); 
  // new function
  System.out.println(results.getTotalHits());
  for (int i = 0; i < results.getTotalHits(); i++)
  {
   System.out.println(searcher.doc(i).get("info")); // new function
  }
 }

 /**
  * 在多个字段查找同一个值
  */
 public static void searchList() throws Exception
 {
  // 查询的字符串:输入不存在的字符串是查询不到的,如:中国
  String queryString = "boy";
  // 查询字段集合
  String[] queryFileds = { "info", "info2" };
  IndexSearcher searcher = LuceneUtils.createIndexSearcher();
  Query query = LuceneUtils.createQuery(queryFileds, queryString);
  // 在搜索器中进行查询
  // 对查询内容进行过滤
  Filter filter = null;
  // 一次在索引器查询多少条数据
  int queryCount = 100;

  TopDocs results = searcher.search(query, filter, queryCount);
  System.out.println("总符合: " + results.totalHits + "条数!");

  // 显示记录
  for (ScoreDoc sr : results.scoreDocs)
  {
   // 文档编号
   int docID = sr.doc;
   // 真正的内容
   Document doc = searcher.doc(docID);
   System.out.println("inof = " + doc.get("info"));
   System.out.println("info2 = " + doc.get("info2"));

  }
 }

 /*
  * lucene复合条件查询,即 and or 等 BooleanClause.Occur.MUST表示and
  * BooleanClause.Occur.MUST_NOT表示not BooleanClause.Occur.SHOULD表示or.
  */
 public static void searchQuery() throws Exception
 {
  IndexSearcher searcher = LuceneUtils.createIndexSearcher();

  String[] queries = { "vincent", "boy" };
  String[] fields = { "name", "info" };
  BooleanClause.Occur[] clauses = { BooleanClause.Occur.MUST, BooleanClause.Occur.MUST };
  Query query = MultiFieldQueryParser.parse(version, queries, fields, clauses, new StandardAnalyzer(version));

  TopDocs results = searcher.search(query, null, 100);
  System.out.println("总符合: " + results.totalHits + "条数!");

  // 显示记录
  for (ScoreDoc sr : results.scoreDocs)
  {
   // 文档编号
   int docID = sr.doc;
   // 真正的内容
   Document doc = searcher.doc(docID);

   System.out.println("name = " + doc.get("name"));
   System.out.println("inof = " + doc.get("info"));
   System.out.println("info2 = " + doc.get("info2"));

  }
 }

 public static void main(String[] args) throws Exception
 {
//  HelloLucene.create();
  HelloLucene.search("info2", "boy");
  HelloLucene.searchQuery();
  HelloLucene.searchList();
 }

}

 

package com.chen.lucene.action;

import java.io.File;
import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class LuceneUtils
{
 // 当前目录位置
 public static final String USERDIR = System.getProperty("user.dir");
 // 存放索引的目录
 private static final String INDEXPATH = USERDIR + File.separator + "index";
 private static final String INDEXPATH2 = "F:/workspaceA/testLucene/luceneIndex";

 // 使用版本
 public static final Version version = Version.LUCENE_35;

 /** * 获取分词器 * */
 public static Analyzer getAnalyzer()
 {
  // 分词器
  Analyzer analyzer = new StandardAnalyzer(version);
  return analyzer;
 }

 /***************************************************************************
  * * 创建一个索引器的操作类 *
  * 
  * @param openMode *
  * @return *
  * @throws Exception
  */
 public static IndexWriter createIndexWriter(OpenMode openMode) throws Exception
 { // 索引存放位置设置
  Directory dir = FSDirectory.open(new File(INDEXPATH));
  // 索引配置类设置
  IndexWriterConfig iwc = new IndexWriterConfig(version, getAnalyzer());
  iwc.setOpenMode(openMode);
  IndexWriter writer = new IndexWriter(dir, iwc);
  return writer;
 }

 /**
  * * * 创建一个搜索的索引器 *
  * 
  * @throws IOException *
  * @throws CorruptIndexException *
  */
 public static IndexSearcher createIndexSearcher() throws CorruptIndexException, IOException
 {
  IndexReader reader = IndexReader.open(FSDirectory.open(new File(INDEXPATH2)));
  IndexSearcher searcher = new IndexSearcher(reader);
  return searcher;
 }

 /**
  * * 创建一个查询器 *
  * 
  * @param queryFileds
  *            在哪些字段上进行查询 *
  * @param queryString
  *            查询内容 *
  * @return *
  * @throws ParseException
  */
 public static Query createQuery(String[] queryFileds, String queryString) throws ParseException
 {
  QueryParser parser = new MultiFieldQueryParser(version, queryFileds, getAnalyzer());
  Query query = parser.parse(queryString);
  return query;
 }
}

抱歉!评论已关闭.