Lucene 3.6 的学习研究

现在的位置: 首页 > 综合 > 正文

Lucene 3.6 的学习研究

2014年06月27日 ⁄ 综合 ⁄ 共 7318字 ⁄ 字号小中大 ⁄ 评论关闭

以前做项目的时候，同事用到了Lucene，觉得很神奇，这几天正好有闲功夫，给自己充下电，以下是一个 Lucene 3.6的实例，废话少说，直接上代码：

package com.chen.lucene.action;

import java.io.File;
import java.io.IOException;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TotalHitCountCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.NRTCachingDirectory;
import org.apache.lucene.util.Version;

public class HelloLucene{

final static String PATH = "F:/workspaceA/testLucene/luceneIndex";

public static final Version version = LuceneUtils.version;

/* create index */
public static void create() throws IOException
{

Directory fsDir = FSDirectory.open(new File(PATH));
NRTCachingDirectory cachedFSDir = new NRTCachingDirectory(fsDir, 5.0, 60.0);

  IndexWriterConfig conf = new IndexWriterConfig(version, new StandardAnalyzer(version));
  conf.setMergeScheduler(cachedFSDir.getMergeScheduler());
  IndexWriter writer = new IndexWriter(cachedFSDir, conf);

  Document doc = new Document();
  // 注意:filed实例在多次添加的时候可以重用，节约构造field实例的时间。
  doc.add(new Field("name", "vincent", Field.Store.YES, Field.Index.NOT_ANALYZED));
  doc.add(new Field("info", "this is a good boy 1", Field.Store.YES, Field.Index.ANALYZED));
  // System.out.println("toString>>>>"+doc.toString());
  // Documen相当于数据库里行的概念，这里创建了1行数据
  Document doc2 = new Document();
  /*
   * Field.Store.YES:存储字段值（未分词前的字段值） Field.Store.NO:不存储,存储与索引没有关系
   * Field.Store.COMPRESS:压缩存储,用于长文本或二进制，但性能受损 Field.Index.ANALYZED:分词建索引
   * Field
   * Index.ANALYZED_NO_NORMS:分词建索引，但是Field的值不像通常那样被保存，而是只取一个byte，这样节约存储空间
   * Field.Index.NOT_ANALYZED:不分词且索引
   * Field.Index.NOT_ANALYZED_NO_NORMS:不分词建索引，Field的值去一个byte保存
   */
  doc2.add(new Field("name", "vincent2", Field.Store.YES, Field.Index.NOT_ANALYZED));
  doc2.add(new Field("info", "this is a good boy 2", Field.Store.YES, Field.Index.ANALYZED));
  doc2.add(new Field("info2", "this is a good boy info2", Field.Store.YES, Field.Index.ANALYZED));
  writer.addDocument(doc);
  writer.addDocument(doc2);
  writer.commit();
  writer.close();
}

/* search index */
public static void search(String key, String value) throws CorruptIndexException, IOException, ParseException
{
  IndexSearcher searcher = null;
  // 创建QueryParser对象,第一个参数表示Lucene的版本,第二个表示搜索Field的字段,第三个表示搜索使用分词器
  QueryParser qp = new QueryParser(version, key, new StandardAnalyzer(version));
  searcher = new IndexSearcher(IndexReader.open(FSDirectory.open(new File(PATH))));
  Query tq = qp.parse(value);
  TotalHitCountCollector results = new TotalHitCountCollector();
  searcher.search(tq, results);
  // new function
  System.out.println(results.getTotalHits());
  for (int i = 0; i < results.getTotalHits(); i++)
  {
   System.out.println(searcher.doc(i).get("info")); // new function
  }
}

/**
* 在多个字段查找同一个值
*/
public static void searchList() throws Exception
{
  // 查询的字符串:输入不存在的字符串是查询不到的,如：中国
  String queryString = "boy";
  // 查询字段集合
  String[] queryFileds = { "info", "info2" };
  IndexSearcher searcher = LuceneUtils.createIndexSearcher();
  Query query = LuceneUtils.createQuery(queryFileds, queryString);
  // 在搜索器中进行查询
  // 对查询内容进行过滤
  Filter filter = null;
  // 一次在索引器查询多少条数据
  int queryCount = 100;

TopDocs results = searcher.search(query, filter, queryCount);
System.out.println("总符合: " + results.totalHits + "条数！");

  // 显示记录
  for (ScoreDoc sr : results.scoreDocs)
  {
   // 文档编号
   int docID = sr.doc;
   // 真正的内容
   Document doc = searcher.doc(docID);
   System.out.println("inof = " + doc.get("info"));
   System.out.println("info2 = " + doc.get("info2"));

}
}

/*
* lucene复合条件查询，即 and or 等 BooleanClause.Occur.MUST表示and
* BooleanClause.Occur.MUST_NOT表示not BooleanClause.Occur.SHOULD表示or.
*/
public static void searchQuery() throws Exception
{
IndexSearcher searcher = LuceneUtils.createIndexSearcher();

  String[] queries = { "vincent", "boy" };
  String[] fields = { "name", "info" };
  BooleanClause.Occur[] clauses = { BooleanClause.Occur.MUST, BooleanClause.Occur.MUST };
  Query query = MultiFieldQueryParser.parse(version, queries, fields, clauses, new StandardAnalyzer(version));

TopDocs results = searcher.search(query, null, 100);
System.out.println("总符合: " + results.totalHits + "条数！");

  // 显示记录
  for (ScoreDoc sr : results.scoreDocs)
  {
   // 文档编号
   int docID = sr.doc;
   // 真正的内容
   Document doc = searcher.doc(docID);

   System.out.println("name = " + doc.get("name"));
   System.out.println("inof = " + doc.get("info"));
   System.out.println("info2 = " + doc.get("info2"));

}
}

public static void main(String[] args) throws Exception
{
//  HelloLucene.create();
  HelloLucene.search("info2", "boy");
  HelloLucene.searchQuery();
  HelloLucene.searchList();
}

}

package com.chen.lucene.action;

import java.io.File;
import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class LuceneUtils
{
// 当前目录位置
public static final String USERDIR = System.getProperty("user.dir");
// 存放索引的目录
private static final String INDEXPATH = USERDIR + File.separator + "index";
private static final String INDEXPATH2 = "F:/workspaceA/testLucene/luceneIndex";

// 使用版本
public static final Version version = Version.LUCENE_35;

/** * 获取分词器 * */
public static Analyzer getAnalyzer()
{
  // 分词器
  Analyzer analyzer = new StandardAnalyzer(version);
  return analyzer;
}

/***************************************************************************
* * 创建一个索引器的操作类 *
*
* @param openMode *
* @return *
* @throws Exception
*/
public static IndexWriter createIndexWriter(OpenMode openMode) throws Exception
{ // 索引存放位置设置
  Directory dir = FSDirectory.open(new File(INDEXPATH));
  // 索引配置类设置
  IndexWriterConfig iwc = new IndexWriterConfig(version, getAnalyzer());
  iwc.setOpenMode(openMode);
  IndexWriter writer = new IndexWriter(dir, iwc);
  return writer;
}

/**
* * * 创建一个搜索的索引器 *
*
* @throws IOException *
* @throws CorruptIndexException *
*/
public static IndexSearcher createIndexSearcher() throws CorruptIndexException, IOException
{
  IndexReader reader = IndexReader.open(FSDirectory.open(new File(INDEXPATH2)));
  IndexSearcher searcher = new IndexSearcher(reader);
  return searcher;
}

/**
* * 创建一个查询器 *
*
* @param queryFileds
*            在哪些字段上进行查询 *
* @param queryString
*            查询内容 *
* @return *
* @throws ParseException
*/
public static Query createQuery(String[] queryFileds, String queryString) throws ParseException
{
  QueryParser parser = new MultiFieldQueryParser(version, queryFileds, getAnalyzer());
  Query query = parser.parse(queryString);
  return query;
}
}

【上篇】Spring配置之OpenSessionInViewFilter
【下篇】CSS3实现背景颜色渐变摘抄

作者: blasphemy

该日志由 blasphemy 于10年前发表在综合分类下，最后更新于 2014年06月27日.
转载请注明: Lucene 3.6 的学习研究 | 学步园 +复制链接

抱歉!评论已关闭.

学步园

Lucene 3.6 的学习研究

作者: blasphemy

书签

最新文章New

本站推荐

返回首页