现在的位置: 首页 > 综合 > 正文

Lucene学习总结:Lucene搜索过程解析

2012年08月17日 ⁄ 综合 ⁄ 共 4125字 ⁄ 字号 评论关闭
首先,在项目中先引用Lucene.Net.Dll
 然后建立一个(创建索引的页面)index.aspx
<div>
 <h3>新建索引</h3>
 <p> 使用该功能会删除原有的索引文件,对所有新闻建立新的索引,随着新闻的越来越多 耗时会越来越多
 </p>
  <p>
  <asp:button id="btnNew" runat="server" text="新建索引" onclick="btnNew_Click">
  </asp:button></p>
  </div>
 
然后,在index.aspx.cs文件中
//创建索引文件,并写入索引数据
 

View Code

protected void btnNew_Click(object sender, EventArgs e)
{
IList<contentsmodel> contents = Contents.GetContentsByCategory(18);//这里取得表数据的集合
string indexDir = ConfigurationManager.AppSettings["indexDir"];//这里是在web.config中写上索引存放的路径
Analyzer analyzer = new StandardAnalyzer(global::Lucene.Net.Util.Version.LUCENE_29);
IndexWriter writer = new IndexWriter(indexDir, analyzer, true);
try
{
foreach (var item in contents)
{
if (IsContentLegal(item))
{
writer.AddDocument(GetDocument(item));
}
}
writer.SetMergeFactor(30);
writer.SetMaxBufferedDocs(30);
writer.Optimize();
}
catch
{
}
finally
{
writer.Close();
}
}
//过滤不符合的数据
 

View Code

private bool IsContentLegal(ContentsModel contentInfo)
{
return contentInfo.Title != null &amp;&amp;
contentInfo.ContentBody != null;
}
将索引列写入索引文件中
View Code

 private Document GetDocument(ContentsModel contentModel)
{
Document doc = new Document();

//存储但不索引 主要用来构建页面
doc.Add(new Field("cId", contentModel.Id.ToString(), Field.Store.YES, Field.Index.NO));
doc.Add(new Field("content_thumbnail", contentModel.Thumbnail, Field.Store.YES, Field.Index.NO));
doc.Add(new Field("issue_datetime", contentModel.Issue.ToString(), Field.Store.YES, Field.Index.NO));
doc.Add(new Field("category_id", contentModel.CategoryId.ToString(), Field.Store.YES, Field.Index.NO));

//既存储, 又索引
doc.Add(new Field("content_title", contentModel.Title.ToString(), Field.Store.YES, Field.Index.ANALYZED));
doc.Add(new Field("content_body", contentModel.ContentBody.ToString(), Field.Store.YES, Field.Index.ANALYZED));
return doc;
}
 
搜素结果显示Result.aspx页面
  

View Code

<div>
<%for (int i = 0; i < hitsOfpage.Count; i++){ %>
<ul>
<li>
<span><%=hitsOfpage[i].Get("content_title")%></span>
</li>
</ul>
<%} %>
</div>
 
最后,在搜索结果页Result.aspx.cs
//本来有分页的.但我的分页用的pager类.木有用sql存储.所以就不把分页代码贴出来了.
View Code

 1  public partial class Result : System.Web.UI.Page
2 {
3 const int PAGE_SIZE = 7;
4 protected int totalPageCount;
5 protected int currentPageIndex;
6 protected string searchp;
7 protected static List<Document> hitsOfpage = new List<Document>();
8 protected void Page_Load(object sender, EventArgs e)
9 {
10 searchp = Request["q"];
11 string pagenum = Request.QueryString["page"];
12 if (pagenum == null)
13 {
14 pagenum = "0";
15 }
16 int pageIndex = 1;
17
18 int.TryParse(pagenum.Trim(), out pageIndex);
19
20 if (pageIndex < 1)
21 {
22 pageIndex = 1;
23 }
24
25 Hits hits = SearchQuery(searchp);
26 currentPageIndex = pageIndex;
27 int count = hits.Length();
28 //分页
29 int pageCount = count / PAGE_SIZE + (count % PAGE_SIZE > 0 ? 1 : 0);
30 currentPageIndex = Math.Min(currentPageIndex, pageCount);
31 int startPos = Math.Max((currentPageIndex - 1) * PAGE_SIZE, 0);
32 int endPos = Math.Min(currentPageIndex * PAGE_SIZE - 1, count - 1);
33 hitsOfpage.Clear();
34 for (int i = startPos; i <= endPos; i++)
35 {
36 hitsOfpage.Add(hits.Doc(i));
37 }
38 if (count % PAGE_SIZE != 0)
39 {
40 totalPageCount = count / PAGE_SIZE + 1;
41 }
42 else
43 {
44 totalPageCount = count / PAGE_SIZE;
45 }
46 }

 

//根据搜索条件查询索引文件返回hits集合
View Code

   private Hits SearchQuery(string word)
{
string indexDir = ConfigurationManager.AppSettings["indexDir"];//索引文件的位置
string[] field = new string[] { "content_title", "content_body" };//耦合的字段
IndexReader reader = IndexReader.Open(indexDir, true);
IndexSearcher searcher = new IndexSearcher(reader);
MultiFieldQueryParser queryParser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_29, field,new StandardAnalyzer());
Query query = queryParser.Parse(word);
return searcher.Search(query);
}
 
2011-11-28补充一个东西.后来发现在搜索的时候如果输入运算符会报异常,发现原因所在改用BooleanQuery 

private Hits SearchQuery(string querystring)
{
string indexDir = ConfigurationManager.AppSettings["indexDir"];
Analyzer analyzer = new StandardAnalyzer();
//构造BooleanQuery
QueryParser parser = new QueryParser("content_title", analyzer);
BooleanQuery bquery = new BooleanQuery();
TokenStream ts = analyzer.TokenStream(null, new StringReader(querystring));
Lucene.Net.Analysis.Token token;
while ((token = ts.Next()) != null)
{
Query query = parser.Parse(token.TermText());
bquery.Add(query, BooleanClause.Occur.MUST);
}
//构造完成
IndexReader reader = IndexReader.Open(Server.MapPath(indexDir), true);
IndexSearcher searcher = new IndexSearcher(reader);

//Query query = parser.Parse(querystring);
//输出我们要查看的表达式
return searcher.Search(bquery);
}

抱歉!评论已关闭.