以前做项目的时候,同事用到了Lucene,觉得很神奇,这几天正好有闲功夫,给自己充下电,以下是一个 Lucene 3.6的实例,废话少说,直接上代码:
package com.chen.lucene.action;
import java.io.File;
import java.io.IOException;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TotalHitCountCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.NRTCachingDirectory;
import org.apache.lucene.util.Version;
public class HelloLucene{
final static String PATH = "F:/workspaceA/testLucene/luceneIndex";
public static final Version version = LuceneUtils.version;
/* create index */
public static void create() throws IOException
{
Directory fsDir = FSDirectory.open(new File(PATH));
NRTCachingDirectory cachedFSDir = new NRTCachingDirectory(fsDir, 5.0, 60.0);
IndexWriterConfig conf = new IndexWriterConfig(version, new StandardAnalyzer(version));
conf.setMergeScheduler(cachedFSDir.getMergeScheduler());
IndexWriter writer = new IndexWriter(cachedFSDir, conf);
Document doc = new Document();
// 注意:filed实例在多次添加的时候可以重用,节约构造field实例的时间。
doc.add(new Field("name", "vincent", Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("info", "this is a good boy 1", Field.Store.YES, Field.Index.ANALYZED));
// System.out.println("toString>>>>"+doc.toString());
// Documen相当于数据库里行的概念,这里创建了1行数据
Document doc2 = new Document();
/*
* Field.Store.YES:存储字段值(未分词前的字段值) Field.Store.NO:不存储,存储与索引没有关系
* Field.Store.COMPRESS:压缩存储,用于长文本或二进制,但性能受损 Field.Index.ANALYZED:分词建索引
* Field
* Index.ANALYZED_NO_NORMS:分词建索引,但是Field的值不像通常那样被保存,而是只取一个byte,这样节约存储空间
* Field.Index.NOT_ANALYZED:不分词且索引
* Field.Index.NOT_ANALYZED_NO_NORMS:不分词建索引,Field的值去一个byte保存
*/
doc2.add(new Field("name", "vincent2", Field.Store.YES, Field.Index.NOT_ANALYZED));
doc2.add(new Field("info", "this is a good boy 2", Field.Store.YES, Field.Index.ANALYZED));
doc2.add(new Field("info2", "this is a good boy info2", Field.Store.YES, Field.Index.ANALYZED));
writer.addDocument(doc);
writer.addDocument(doc2);
writer.commit();
writer.close();
}
/* search index */
public static void search(String key, String value) throws CorruptIndexException, IOException, ParseException
{
IndexSearcher searcher = null;
// 创建QueryParser对象,第一个参数表示Lucene的版本,第二个表示搜索Field的字段,第三个表示搜索使用分词器
QueryParser qp = new QueryParser(version, key, new StandardAnalyzer(version));
searcher = new IndexSearcher(IndexReader.open(FSDirectory.open(new File(PATH))));
Query tq = qp.parse(value);
TotalHitCountCollector results = new TotalHitCountCollector();
searcher.search(tq, results);
// new function
System.out.println(results.getTotalHits());
for (int i = 0; i < results.getTotalHits(); i++)
{
System.out.println(searcher.doc(i).get("info")); // new function
}
}
/**
* 在多个字段查找同一个值
*/
public static void searchList() throws Exception
{
// 查询的字符串:输入不存在的字符串是查询不到的,如:中国
String queryString = "boy";
// 查询字段集合
String[] queryFileds = { "info", "info2" };
IndexSearcher searcher = LuceneUtils.createIndexSearcher();
Query query = LuceneUtils.createQuery(queryFileds, queryString);
// 在搜索器中进行查询
// 对查询内容进行过滤
Filter filter = null;
// 一次在索引器查询多少条数据
int queryCount = 100;
TopDocs results = searcher.search(query, filter, queryCount);
System.out.println("总符合: " + results.totalHits + "条数!");
// 显示记录
for (ScoreDoc sr : results.scoreDocs)
{
// 文档编号
int docID = sr.doc;
// 真正的内容
Document doc = searcher.doc(docID);
System.out.println("inof = " + doc.get("info"));
System.out.println("info2 = " + doc.get("info2"));
}
}
/*
* lucene复合条件查询,即 and or 等 BooleanClause.Occur.MUST表示and
* BooleanClause.Occur.MUST_NOT表示not BooleanClause.Occur.SHOULD表示or.
*/
public static void searchQuery() throws Exception
{
IndexSearcher searcher = LuceneUtils.createIndexSearcher();
String[] queries = { "vincent", "boy" };
String[] fields = { "name", "info" };
BooleanClause.Occur[] clauses = { BooleanClause.Occur.MUST, BooleanClause.Occur.MUST };
Query query = MultiFieldQueryParser.parse(version, queries, fields, clauses, new StandardAnalyzer(version));
TopDocs results = searcher.search(query, null, 100);
System.out.println("总符合: " + results.totalHits + "条数!");
// 显示记录
for (ScoreDoc sr : results.scoreDocs)
{
// 文档编号
int docID = sr.doc;
// 真正的内容
Document doc = searcher.doc(docID);
System.out.println("name = " + doc.get("name"));
System.out.println("inof = " + doc.get("info"));
System.out.println("info2 = " + doc.get("info2"));
}
}
public static void main(String[] args) throws Exception
{
// HelloLucene.create();
HelloLucene.search("info2", "boy");
HelloLucene.searchQuery();
HelloLucene.searchList();
}
}
package com.chen.lucene.action;
import java.io.File;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class LuceneUtils
{
// 当前目录位置
public static final String USERDIR = System.getProperty("user.dir");
// 存放索引的目录
private static final String INDEXPATH = USERDIR + File.separator + "index";
private static final String INDEXPATH2 = "F:/workspaceA/testLucene/luceneIndex";
// 使用版本
public static final Version version = Version.LUCENE_35;
/** * 获取分词器 * */
public static Analyzer getAnalyzer()
{
// 分词器
Analyzer analyzer = new StandardAnalyzer(version);
return analyzer;
}
/***************************************************************************
* * 创建一个索引器的操作类 *
*
* @param openMode *
* @return *
* @throws Exception
*/
public static IndexWriter createIndexWriter(OpenMode openMode) throws Exception
{ // 索引存放位置设置
Directory dir = FSDirectory.open(new File(INDEXPATH));
// 索引配置类设置
IndexWriterConfig iwc = new IndexWriterConfig(version, getAnalyzer());
iwc.setOpenMode(openMode);
IndexWriter writer = new IndexWriter(dir, iwc);
return writer;
}
/**
* * * 创建一个搜索的索引器 *
*
* @throws IOException *
* @throws CorruptIndexException *
*/
public static IndexSearcher createIndexSearcher() throws CorruptIndexException, IOException
{
IndexReader reader = IndexReader.open(FSDirectory.open(new File(INDEXPATH2)));
IndexSearcher searcher = new IndexSearcher(reader);
return searcher;
}
/**
* * 创建一个查询器 *
*
* @param queryFileds
* 在哪些字段上进行查询 *
* @param queryString
* 查询内容 *
* @return *
* @throws ParseException
*/
public static Query createQuery(String[] queryFileds, String queryString) throws ParseException
{
QueryParser parser = new MultiFieldQueryParser(version, queryFileds, getAnalyzer());
Query query = parser.parse(queryString);
return query;
}
}