现在的位置: 首页 > 综合 > 正文

Lucene.NET 开发实现

2013年03月27日 ⁄ 综合 ⁄ 共 9682字 ⁄ 字号 评论关闭

最近在帮一个朋友忙,帮他们一个软件设计一下架构,该应用程序某核心逻辑设计数据量较大,客户对查询要求又很高。这种需求除了在数据库设计要考虑水平分表,分区视图之类的设计,在程序中也要考虑效率问题,于是就决定使用LUCENE.NET将核心数据做索引文件,作假全文搜索,这样就算数据量由千万级别,查询也只在几秒钟完成,对性能帮助还是有很大帮助。之前也没仔细了解过LUCENE方面技术,正好学习一下。

LUCENE.NET是JAVA移植到.NET平台上的开源技术,技术资料也很丰富。

以下是创建索引代码:

  public static void Run()
        {
            QryPage qryPage = new QryPage();
            qryPage.PerPageSize = 350;
            qryPage.PageNumber = 0;
            qryPage.PageCount = 10000;
            qryPage.NeedInitPageNo = false;
            List<AutoParts> packages = new List<AutoParts>();
            while (qryPage.PageNumber < qryPage.PageCount)
            {
                IList<AutoParts> autoPartses = new CustomerQuery().QueryAutoParts(new AutoPartDTO(),  ref qryPage);//获取索引数据
                foreach (var p in autoPartses)
                {
                    if (!IsValidProduct(p))
                    {
                        continue;
                    }
                     packages.Add(p);

                }
              qryPage.PageNumber++;
            }

            //
            // Write search item index to file.
            //
            Write(packages);

        }

        public static void Write(List<AutoParts> packages)
        {
            build( packages);
        }

        public static void build( List<AutoParts> packages)
        {
        
            var writer = new IndexWriter(Common.ProductIndexPath, new EsayTooAnalyzer(), true);
            try
            {
                writer.SetMaxFieldLength(1000);
                writer.SetUseCompoundFile(true);
                Logger.Info("Indexing to directory '" + Common.ProductIndexPath + "'...");
                DateTime start = System.DateTime.Now;
                indexDocs(writer, packages);
                
                Logger.Info("Optimizing...");
                writer.Optimize();
                writer.Close();
               
                DateTime end = System.DateTime.Now;
                //Console.Out.WriteLine(end.Ticks - start.Ticks + " total milliseconds");

                Logger.Info(end.Ticks - start.Ticks + " total milliseconds");
            }
            catch (Exception e)
            {
                Console.WriteLine(e.Message);
            }
        }

        public static void UpdateIndex(AutoParts dto)
        {
            try
            {
                Term tm = new Term("id", dto.Id.ToString());
                var qerty = new TermQuery(tm);
                     var productIndexReader = IndexReader.Open(Common.ProductIndexPath);
             var   searcher = new IndexSearcher(productIndexReader);
                var his = searcher.Search(qerty);

                var   reader = IndexReader.Open(Common.ProductIndexPath);
                reader.DeleteDocuments(tm);
                var writer = new IndexWriter(Common.ProductIndexPath, new EsayTooAnalyzer(), false);

                AddDocument(dto, writer);
                writer.Optimize();
                writer.Close();
            }
            catch (Exception e)
            {
                Console.WriteLine("添加索引出错,配件ID:" + dto.Id + "\n");
                Console.Write(e.Message);
            }
         
          }
        public static void AddDocument(AutoParts dto)
        {
            try
            {
                var writer = new IndexWriter(Common.ProductIndexPath, new EsayTooAnalyzer(), false);
                AddDocument(dto, writer);
                writer.Optimize();
                writer.Close();
            }
            catch (Exception e)
            {
                Console.WriteLine("添加索引出错,配件ID:"+dto.Id+"\n");
                Console.WriteLine(e.Message);
                
                throw;
            }
       
        }

        private static void AddDocument(AutoParts package, IndexWriter getWriter)
        {
            Document doc = new Document();

            doc.Add(new Field("id", package.Id.ToString(), Field.Store.YES, Field.Index.UN_TOKENIZED));
            doc.Add(new Field("CarCategoryId", package.CarCategoryId.ToString(), Field.Store.YES, Field.Index.UN_TOKENIZED));
            doc.Add(new Field("Name", package.Name, Field.Store.YES, Field.Index.UN_TOKENIZED));
            doc.Add(new Field("Code", package.Code, Field.Store.YES, Field.Index.UN_TOKENIZED));
            doc.Add(new Field("FSPrice", package.FSPrice, Field.Store.YES, Field.Index.NO));
            doc.Add(new Field("YCPrice", package.YCPrice, Field.Store.YES, Field.Index.NO));
            doc.Add(new Field("YCCost", package.YCCost, Field.Store.YES, Field.Index.NO));
            doc.Add(new Field("YCSupplier", package.YCSupplier, Field.Store.YES, Field.Index.NO));
            doc.Add(new Field("FCPrice", package.FCPrice, Field.Store.YES, Field.Index.NO));
            doc.Add(new Field("FCCost", package.FCCost, Field.Store.YES, Field.Index.NO));
            //doc.Add(new Field("FCSupplier", new StringReader(package.FCSupplier) ));
            doc.Add(new Field("FCSupplier", package.FCCost, Field.Store.YES, Field.Index.NO));
            doc.Add(new Field("CCPrice", package.CCPrice, Field.Store.YES, Field.Index.NO));
            doc.Add(new Field("CCCost", package.CCCost, Field.Store.YES, Field.Index.NO));
            doc.Add(new Field("CCSupplier", package.CCSupplier, Field.Store.YES, Field.Index.NO));
            doc.Add(new Field("Repire", package.Repire, Field.Store.YES, Field.Index.NO));
            doc.Add(new Field("AskPriceInfo", package.AskPriceInfo, Field.Store.YES, Field.Index.NO));
            doc.Add(new Field("AskCustomer", package.AskCustomer, Field.Store.YES, Field.Index.NO));
            doc.Add(new Field("Description", package.Description, Field.Store.YES, Field.Index.NO));
            doc.Add(new Field("Picture1", package.Picture1.ToString(), Field.Store.YES, Field.Index.NO));
            doc.Add(new Field("Picture2", package.Picture2.ToString(), Field.Store.YES, Field.Index.NO));
            doc.Add(new Field("Picture3", package.Picture3.ToString(), Field.Store.YES, Field.Index.NO));
            doc.Add(new Field("IsAvaliable", package.IsAvaliable.ToString(), Field.Store.YES, Field.Index.NO));
            doc.Add(new Field("CarTypeTags", package.CarTypeTags, Field.Store.YES, Field.Index.TOKENIZED));
            doc.Add(new Field("ModifiedTime", package.ModifiedTime.ToShortDateString(), Field.Store.YES, Field.Index.UN_TOKENIZED));
            doc.Add(new Field("ModifiedBy", package.ModifiedBy, Field.Store.YES, Field.Index.NO));


            getWriter.AddDocument(doc);
        }

        private static void indexDocs(IndexWriter writer, List<AutoParts> packages)
        {
            try
            {
                int i = 0;
                foreach (var package in packages)
                {
                    i++;
                    Console.WriteLine("生成索引顺序"+i);
                    AddDocument(package, writer);
                }
            }
            catch (Exception e)
            {
                Console.Write(e.Message);
            }
        }
        private static bool IsValidProduct(AutoParts autoParts)
        {
            return true;
        }
    }

  其中更新索引方法还在调试,因为发现删除索引方法不成功,还在调试

 下面是查询核心算法,其中也包含了分页查询,完全可以按照数据库一致的方式来进行查询,核心数据底层查询就如下查询即可

public static List<AutoPartDTO> Query(QueryCritiriaDTO dto, ref QryPage page)// int pageIndex, int pageSize, out int totalRec)
        {
            IndexSearcher searcher;
                    if (page.PageNumber == 0)
                    page.PageNumber = 1;

                Sort sort = new Sort(new SortField("id", SortField.DOC, false));

                Query query = CreateQuery(dto);
                MutiFilter filter = CreateFilter(dto);

                query = filter.getFilterQuery(query);

                var productIndexReader = IndexReader.Open(Common.ProductIndexPath);
                searcher = new IndexSearcher(productIndexReader);
            try
            {
                TopDocs topDocs = searcher.Search(query, null, page.PageNumber * page.PerPageSize, sort);
                page.TotalCount = topDocs.totalHits;
                page.PageCount = (int)Math.Ceiling((decimal)page.TotalCount / (decimal)page.PerPageSize);

                if (page.PageCount == 1 || page.PageCount == 0)
                    return TopDocs2Data(searcher, topDocs.scoreDocs);

                return TopDocs2Data(searcher,topDocs.scoreDocs, page);
            }
            catch (Exception e)
            {
                Console.WriteLine("查询出错");
                Console.WriteLine(e.Message);
                return new List<AutoPartDTO>();
            }
            finally
            {
                searcher.Close();
            }
        }

        private static Query CreateQuery(QueryCritiriaDTO dto)
        {
            var booleanQuery = new BooleanQuery();

            if (dto.CatetoryL3 != -1 && dto.CatetoryL3 != 0)
            {
                TermQuery searcher = new TermQuery(new Term("CarCategoryId", dto.CatetoryL3.ToString()));
                booleanQuery.Add(searcher, BooleanClause.Occur.MUST);
            }

            if (dto.CatetoryL4 != -1 && dto.CatetoryL4 != 0)
            {
                FuzzyQuery searcher = new FuzzyQuery(new Term("CarTypeTags", dto.CatetoryL4.ToString()), 0.3f);
                booleanQuery.Add(searcher, BooleanClause.Occur.MUST);
            }

            if (!string.IsNullOrEmpty(dto.Name))
            {
                //FuzzyQuery wildcardQuery = new FuzzyQuery(new Term("Name", dto.Name));
                TermQuery searcher = new TermQuery(new Term("Name", dto.Name));
                booleanQuery.Add(searcher, BooleanClause.Occur.MUST);
            }

            if (!string.IsNullOrEmpty(dto.Code))
            {
                TermQuery searcher = new TermQuery(new Term("Code", dto.Code));
                booleanQuery.Add(searcher, BooleanClause.Occur.MUST);
            }

            if (!string.IsNullOrEmpty(dto.SupplierId))
            {
                TermQuery searcher = new TermQuery(new Term("SupplierId", dto.SupplierId));
                booleanQuery.Add(searcher, BooleanClause.Occur.MUST);
            }

            return booleanQuery;
        }

        private static MutiFilter CreateFilter(QueryCritiriaDTO dto)
        {

            MutiFilter mf = new MutiFilter();

                   if (dto.Start != CP.Utils.DateTimeUtil.MIN_DATETIME && dto.End != CP.Utils.DateTimeUtil.MIN_DATETIME)
            {
                mf.AddRangeFilter("ModifiedTime", dto.Start.ToShortDateString(), dto.End.ToShortDateString());
            }
         //   RangeFilter rf3 = new RangeFilter("ModifiedTime", dto.Start.ToShortDateString(), dto.End.ToShortDateString(),true, true);


            return mf;



        }

        #region 获取最终的数据
        /// <summary>
        /// 获取最终的数据
        /// </summary>
        /// <param name="scoreDoc"></param>
        /// <param name="pageIndex"></param>
        /// <param name="pageSize"></param>
        /// <param name="totalRec"></param>
        /// <returns></returns>
        private static List<AutoPartDTO> TopDocs2Data(IndexSearcher searcher, ScoreDoc[] scoreDoc, QryPage page)// int pageIndex, int pageSize, int totalRec)
        {
            int start = (page.PageNumber - 1) * page.PerPageSize;
            int end = page.PageNumber * page.PerPageSize;
            if (end > page.TotalCount)
                end = page.TotalCount;

            List<AutoPartDTO> list = new List<AutoPartDTO>();
            for (int index = start; index < end; index++)
            {
                Document doc = searcher.Doc(scoreDoc[index].doc);
                // Document doc = Common.GenerateSearcher().Doc(sd.doc);
                AutoPartDTO autoPartDto = new AutoPartDTO() { };
                autoPartDto.Id = long.Parse(doc.Get("id"));
                autoPartDto.Name = doc.Get("Name");
                
                list.Add(autoPartDto);

            }
            return list;
        }
        /// <summary>
        /// 获取最终的数据
        /// </summary>
        /// <param name="docs"></param>
        /// <returns></returns>
        private static List<AutoPartDTO> TopDocs2Data(IndexSearcher searcher,  ScoreDoc[] docs)
        {
            if (docs == null || docs.Length == 0)
                return null;
            List<AutoPartDTO> list = new List<AutoPartDTO>();
            foreach (ScoreDoc sd in docs)
            {
                Document doc = searcher.Doc(sd.doc);
                AutoPartDTO autoPartDto = new AutoPartDTO() { };

                autoPartDto.Id = long.Parse(doc.Get("id"));
                autoPartDto.Name = doc.Get("Name");
             
                list.Add(autoPartDto);
            }
            return list;
        }
        #endregion
    }
    public class Common
    {
        public static string ProductIndexPath
        {
            get { return IndexStoredDirectory; }
        }

        private static string IndexStoredDirectory = AppDomain.CurrentDomain.BaseDirectory + "auto.index";
    
    }

    public class MutiFilter
    {
        private List<Filter> filterList;
        public MutiFilter()
        {
            filterList = new List<Filter>();
        }
        public void AddFilter(String Field, String Value)
        {
            Term term = new Term(Field, Value);//添加term
            QueryFilter filter = new QueryFilter(new TermQuery(term));//添加过滤器
            filterList.Add(filter);//加入List,可以增加多個过滤
        }
        public void AddRangeFilter(string Field, string start, string end)
        {
            Term ts = new Term(Field, start);
            Term te = new Term(Field, end);
            var q = new RangeQuery(ts, te, true);
            //var q = new RangeQuery(begin, end, true);
            var filter = new QueryFilter(q);
            filterList.Add(filter);//加入List,可以增加多個过滤
        }

        public Query getFilterQuery(Query query)
        {
            for (int i = 0; i < filterList.Count; i++)
            {
                //取出多個过滤器,在结果中再次定位结果
                query = new FilteredQuery(query, filterList[i]);
            }
            return query;
        }

    }
   
    public class EsayTooTokenizer : CharTokenizer
    {
        public EsayTooTokenizer(TextReader reader)
            : base(reader)
        {
        }

        //单纯按照“,” 空格 分词
        protected override bool IsTokenChar(char c)
        {
            return c == ',' || c == ' ' ? false : true;
        }
    }

    public class EsayTooAnalyzer : Analyzer//自定义最简单的分词器
{ public override TokenStream TokenStream(string fieldName, System.IO.TextReader reader) { return new EsayTooTokenizer(reader); } }

  

抱歉!评论已关闭.