Lucene学习总结：Lucene搜索过程解析

现在的位置: 首页 > 综合 > 正文

RSS

Lucene学习总结：Lucene搜索过程解析

2012年08月17日 ⁄ 综合 ⁄ 共 4125字 ⁄ 字号小中大 ⁄ 评论关闭

首先,在项目中先引用Lucene.Net.Dll

然后建立一个(创建索引的页面)index.aspx

<div>

<p> 使用该功能会删除原有的索引文件，对所有新闻建立新的索引,随着新闻的越来越多耗时会越来越多

</p>

<p>

<asp:button id="btnNew" runat="server" text="新建索引" onclick="btnNew_Click">

</asp:button></p>

</div>

然后,在index.aspx.cs文件中

//创建索引文件,并写入索引数据

View Code

protected void btnNew_Click(object sender, EventArgs e)
        {
            IList<contentsmodel> contents = Contents.GetContentsByCategory(18);//这里取得表数据的集合
            string indexDir = ConfigurationManager.AppSettings["indexDir"];//这里是在web.config中写上索引存放的路径
            Analyzer analyzer = new StandardAnalyzer(global::Lucene.Net.Util.Version.LUCENE_29);
            IndexWriter writer = new IndexWriter(indexDir, analyzer, true);
try
            {
foreach (var item in contents)
                {
if (IsContentLegal(item))
                    {
                        writer.AddDocument(GetDocument(item));
                    }
                }
                writer.SetMergeFactor(30);
                writer.SetMaxBufferedDocs(30);
                writer.Optimize();
            }
catch
            {
            }
finally
            {
                writer.Close();
            }
        }

//过滤不符合的数据

View Code

private bool IsContentLegal(ContentsModel contentInfo)
        {
return contentInfo.Title != null &amp;&amp;
                    contentInfo.ContentBody != null;
        }

将索引列写入索引文件中

View Code

 private Document GetDocument(ContentsModel contentModel)
        {
            Document doc = new Document();

//存储但不索引  主要用来构建页面
            doc.Add(new Field("cId", contentModel.Id.ToString(), Field.Store.YES, Field.Index.NO));
            doc.Add(new Field("content_thumbnail", contentModel.Thumbnail, Field.Store.YES, Field.Index.NO));
            doc.Add(new Field("issue_datetime", contentModel.Issue.ToString(), Field.Store.YES, Field.Index.NO));
            doc.Add(new Field("category_id", contentModel.CategoryId.ToString(), Field.Store.YES, Field.Index.NO));

//既存储， 又索引
            doc.Add(new Field("content_title", contentModel.Title.ToString(), Field.Store.YES, Field.Index.ANALYZED));
            doc.Add(new Field("content_body", contentModel.ContentBody.ToString(), Field.Store.YES, Field.Index.ANALYZED));
return doc;
        }

搜素结果显示Result.aspx页面

View Code

<div>
<%for (int i = 0; i < hitsOfpage.Count; i++){ %>
<ul>
<li>
<span><%=hitsOfpage[i].Get("content_title")%></span>
</li>
 </ul>
<%} %>
</div>

最后,在搜索结果页Result.aspx.cs

//本来有分页的.但我的分页用的pager类.木有用sql存储.所以就不把分页代码贴出来了.

View Code

 1  public partial class Result : System.Web.UI.Page
 2     {
 3         const int PAGE_SIZE = 7;
 4         protected int totalPageCount;
 5         protected int currentPageIndex;
 6         protected string searchp;
 7         protected static List<Document> hitsOfpage = new List<Document>();
 8         protected void Page_Load(object sender, EventArgs e)
 9         {
10             searchp = Request["q"];
11             string pagenum = Request.QueryString["page"];
12             if (pagenum == null)
13             {
14                 pagenum = "0";
15             }
16             int pageIndex = 1;
17 
18             int.TryParse(pagenum.Trim(), out pageIndex);
19 
20             if (pageIndex < 1)
21             {
22                 pageIndex = 1;
23             }
24 
25             Hits hits = SearchQuery(searchp);
26             currentPageIndex = pageIndex;
27             int count = hits.Length();
28             //分页
29             int pageCount = count / PAGE_SIZE + (count % PAGE_SIZE > 0 ? 1 : 0);
30             currentPageIndex = Math.Min(currentPageIndex, pageCount);
31             int startPos = Math.Max((currentPageIndex - 1) * PAGE_SIZE, 0);
32             int endPos = Math.Min(currentPageIndex * PAGE_SIZE - 1, count - 1);
33             hitsOfpage.Clear();
34             for (int i = startPos; i <= endPos; i++)
35             {
36                 hitsOfpage.Add(hits.Doc(i));
37             }
38             if (count % PAGE_SIZE != 0)
39             {
40                 totalPageCount = count / PAGE_SIZE + 1;
41             }
42             else
43             {
44                 totalPageCount = count / PAGE_SIZE;
45             }
46         }

//根据搜索条件查询索引文件返回hits集合

View Code

   private Hits SearchQuery(string word)
        {
string indexDir = ConfigurationManager.AppSettings["indexDir"];//索引文件的位置
            string[] field = new string[] { "content_title", "content_body" };//耦合的字段
            IndexReader reader = IndexReader.Open(indexDir, true);
            IndexSearcher searcher = new IndexSearcher(reader);
            MultiFieldQueryParser queryParser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_29, field,new StandardAnalyzer());
            Query query = queryParser.Parse(word);
return searcher.Search(query);
        }

2011-11-28补充一个东西.后来发现在搜索的时候如果输入运算符会报异常,发现原因所在改用BooleanQuery

private Hits SearchQuery(string querystring)
{
string indexDir = ConfigurationManager.AppSettings["indexDir"];
Analyzer analyzer = new StandardAnalyzer();
//构造BooleanQuery
QueryParser parser = new QueryParser("content_title", analyzer);
BooleanQuery bquery = new BooleanQuery();
TokenStream ts = analyzer.TokenStream(null, new StringReader(querystring));
Lucene.Net.Analysis.Token token;
while ((token = ts.Next()) != null)
{
Query query = parser.Parse(token.TermText());
bquery.Add(query, BooleanClause.Occur.MUST);
}
//构造完成
IndexReader reader = IndexReader.Open(Server.MapPath(indexDir), true);
IndexSearcher searcher = new IndexSearcher(reader);

//Query query = parser.Parse(querystring);
//输出我们要查看的表达式
return searcher.Search(bquery);
}

【上篇】ASP.NET开发人员需要学习ASP.NET MVC么？
【下篇】Hello World !

作者: lawless

该日志由 lawless 于12年前发表在综合分类下，最后更新于 2012年08月17日.
转载请注明: Lucene学习总结：Lucene搜索过程解析 | 学步园 +复制链接

抱歉!评论已关闭.

学步园

Lucene学习总结：Lucene搜索过程解析

作者: lawless

书签

最新文章New

本站推荐

返回首页