本文建立一个基本的Lucene例子,测试一下。
注释我都加在代码里面了。
luceneIndex.java
功能:建立索引
- import java.io.BufferedReader;
- import java.io.File;
- import java.io.FileInputStream;
- import java.io.InputStreamReader;
- import java.io.Reader;
- import java.util.Date;
- import org.apache.lucene.analysis.standard.StandardAnalyzer;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.document.Field;
- import org.apache.lucene.index.IndexWriter;
- import org.apache.lucene.index.IndexWriter.MaxFieldLength;
- public class LuceneIndex {
- public static void main(String[] args) throws Exception {
- // 声明一个对象
- LuceneIndex indexer = new LuceneIndex();
- // 建立索引
- Date start = new Date();
- indexer.writeToIndex();
- Date end = new Date();
- System.out.println("建立索引用时" + (end.getTime() - start.getTime()) + "毫秒");
- //这一步很关键,往往缺少这一步,而使得存放索引的目录中缺少文件
- indexer.close();
- }
- public LuceneIndex() {
- try {
- writer = new IndexWriter(Constants.INDEX_STORE_PATH,
- new StandardAnalyzer(), true,new MaxFieldLength(999999999));//大小:999999999
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
- // 索引器
- private IndexWriter writer = null;
- // 将要建立索引的文件构造成一个Document对象,并添加一个域"content"
- private Document getDocument(File f) throws Exception {
- Document doc = new Document();
- FileInputStream is = new FileInputStream(f);
- Reader reader = new BufferedReader(new InputStreamReader(is));
- doc.add(new Field("contents", reader));
- doc.add(new Field("path", f.getAbsolutePath(),Field.Store.YES,Field.Index.NOT_ANALYZED));
- return doc;
- }
- public void writeToIndex() throws Exception {
- File folder = new File(Constants.INDEX_FILE_PATH);
- if (folder.isDirectory()) {
- String[] files = folder.list();
- for (int i = 0; i < files.length; i++) {
- File file = new File(folder, files[i]);
- Document doc = getDocument(file);
- System.out.println("正在建立索引 : " + file + "");
- writer.addDocument(doc);
- }
- }
- }
- public void close() throws Exception {
- writer.close();
- }
- }
LuceneSearch.java
//测试
- import java.util.Date;
- import org.apache.lucene.analysis.standard.StandardAnalyzer;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.queryParser.MultiFieldQueryParser;
- import org.apache.lucene.search.BooleanClause;
- import org.apache.lucene.search.IndexSearcher;
- import org.apache.lucene.search.Query;
- import org.apache.lucene.search.ScoreDoc;
- import org.apache.lucene.search.TopDocCollector;
- public class LuceneSearch {
- public static void main(String[] args) throws Exception {
- LuceneSearch test = new LuceneSearch();
- ScoreDoc[] h = null;
- h = test.search("关键字");
- test.printResult(h);
- h = null;
- h = test.search("adddd");
- test.printResult(h);
- }
- public LuceneSearch() {
- try {
- searcher = new IndexSearcher(Constants.INDEX_STORE_PATH);
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
- // 声明一个IndexSearcher对象
- private IndexSearcher searcher = null;
- public final ScoreDoc[] search(String keyword) {
- System.out.println("正在检索关键字 : " + keyword);
- try {
- BooleanClause.Occur[] clauses = { BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD };
- TopDocCollector collector = new TopDocCollector(10); // 启用这个
- Query query = MultiFieldQueryParser.parse(new String[]{keyword}, new String[] { "contents" }, new StandardAnalyzer() );
- //Query query = MultiFieldQueryParser.parse(keyword, new String[] { "contents" }, clauses,new StandardAnalyzer() );
- Date start = new Date();
- searcher.search(query, collector); // 作为参数
- ScoreDoc[] hits = collector.topDocs().scoreDocs; // 拿到结果
- Date end = new Date();
- System.out.println("检索完成,用时" + (end.getTime() - start.getTime()) + "毫秒");
- return hits;
- } catch (Exception e) {
- e.printStackTrace();
- return null;
- }
- }
- public void printResult(ScoreDoc[] h) {
- if (h == null || h.length == 0) {
- System.out.println("对不起,没有找到您要的结果。");
- }
- else
- {
- for (int i = 0; i < h.length; i++) {
- try {
- int num = h[i].doc; // 一个内部编号
- Document doc = searcher.doc(num); // 通过编号,拿到文档
- System.out.print("这是第" + i + "个检索到的结果,文档编号为:" + num + "文件名为:");
- System.out.println("PATH路径:" + doc.get("path"));
- //doc.setBoost(0.7f);
- System.out.println("该文档的得分是" + doc.getBoost());
- System.out.println("CONTENTS内容:" + doc.get("contents"));
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
- }
- System.out.println("--------------------------");
- }
- }