lucene 搜索入门实例 | 学步园

返回顶部
查看留言
转到底部

现在的位置: 首页 > 综合 > 正文

lucene 搜索入门实例

2013年08月05日 ⁄ 综合 ⁄ 共 6107字 ⁄ 字号小中大 ⁄ 评论关闭

Y_indexer.java  建索引
package com.hapark.lucene;
import java.io.BufferedReader;   
import java.io.File;   
import java.io.FileInputStream;   
import java.io.IOException;   
import java.io.InputStreamReader;   
import java.util.ArrayList;
import java.util.Date;   
  
import org.apache.lucene.analysis.Analyzer;   
import org.apache.lucene.analysis.standard.StandardAnalyzer;   
import org.apache.lucene.document.Document;   
import org.apache.lucene.document.Field;   
import org.apache.lucene.index.IndexWriter;   
public class Y_indexer {
      public static void main(String[] args) throws Exception {   
            /* 指明要索引文件夹的位置,这里是D盘的y文件夹下 */  
            File fileDir = new File("d://y");   
      
            /* 这里放索引文件的位置 */  
            File indexDir = new File("d://index");   
            Analyzer luceneAnalyzer = new StandardAnalyzer();   
            IndexWriter indexWriter = new IndexWriter(indexDir, luceneAnalyzer,   
                    true); 
            ArrayList list = new ArrayList();
            getList(fileDir, list);
            File[] textFiles = (File[])list.toArray(new File[0]);
            System.out.println(textFiles.length);
            long startTime = new Date().getTime();   
               
            //增加document到索引去    
            for (int i = 0; i < textFiles.length; i++) {   
                if (textFiles[i].isFile()   
                        && textFiles[i].getName().endsWith(".html")) {   
                    System.out.println("File " + textFiles[i].getCanonicalPath()   
                            + "正在被索引....");   
                    String temp = FileReaderAll(textFiles[i].getCanonicalPath(),   
                            "GBK");   
                    System.out.println(temp);   
                    Document document = new Document();   
                    Field FieldPath = new Field("path", textFiles[i].getPath(),   
                            Field.Store.YES, Field.Index.NO);   
                    Field FieldBody = new Field("body", temp, Field.Store.YES,   
                            Field.Index.TOKENIZED,   
                            Field.TermVector.WITH_POSITIONS_OFFSETS);   
                    Field FieldTitle = new Field("title", temp, Field.Store.YES,   
                            Field.Index.TOKENIZED,   
                            Field.TermVector.WITH_POSITIONS_OFFSETS);   
                    document.add(FieldPath);   
                    document.add(FieldBody);   
                    document.add(FieldTitle);
                    indexWriter.addDocument(document);   
                }   
            }   
            //optimize()方法是对索引进行优化    
            indexWriter.optimize();   
            indexWriter.close();   
               
            //测试一下索引的时间    
            long endTime = new Date().getTime();   
            System.out   
                    .println("这花费了"  
                            + (endTime - startTime)   
                            + " 毫秒来把文档增加到索引里面去!"  
                            + fileDir.getPath());   
        }  
      
      
      /**
       * 多层文件夹
       * @param file
       * @param list
       */
      public static void getList(File file, ArrayList list){
          if(file.isDirectory() && file.getName().indexOf(".") != 0){
              
              File [] file2 = file.listFiles();
              for(int i= 0; i< file2.length; i++)
              getList(file2[i], list);
          }else{
          list.add(file);
          }
      }
      
        public static String FileReaderAll(String FileName, String charset)   
                throws IOException {   
            BufferedReader reader = new BufferedReader(new InputStreamReader(   
                    new FileInputStream(FileName), charset));   
            String line = new String();   
            String temp = new String();   
               
            while ((line = reader.readLine()) != null) {   
                temp += line;   
            }   
            reader.close();   
            return temp;   
        }   
    }  

Y_searcher  lucene 搜索
package com.hapark.lucene;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
public class Y_searcher {
      
     public List search(){
         List searchResult = new ArrayList();//创建一个List接口的一个实例类ArrayList类 
         try{
         Hits hits = null;   
         String key = "苏";   
         Query query = null;   
         IndexSearcher searcher = new IndexSearcher("d://yuyang"); 
            Analyzer analyzer = new StandardAnalyzer();   //创建一个Analyzer接口的一个实例类StandardAnalyzer 
           
                QueryParser qp = new QueryParser("title", analyzer);   
                query = qp.parse(key);   
           
            if (searcher != null) {   
                  Date start=new Date();
                hits = searcher.search(query);   //遍历hist结果的length 
                if(hits.length()==0){
                     System.out.println("对不起。没你想要的结果!");
                }
                else{
                for(int i=0;i<hits.length();i++){ 
                     Date end=new Date();
                  //  System.out.println("找到:" + hits.length() + " Totalresult!");    
                  System.out.println("文件的路径:"+hits.doc(i).get("path"));
                   // System.out.println("内容:"+hits.doc(i).get("body")); 
                    System.out.println(hits.doc(i).get("title"));
                    System.out.println("检索完成，用时" + (end.getTime() - start.getTime()) + "毫秒");
                } 
                }
            }
         }
                catch(ParseException ex){
                      
                  }
                catch(IOException e){
                    
                }
                return searchResult;
                }   
     public static void main(String args[]){
              Y_searcher y_s=new Y_searcher();
              y_s.search();
     }
}

返回

【上篇】如何快速生成100万不重复的8位编号
【下篇】ARM浮点运算

作者: fraternal

该日志由 fraternal 于11年前发表在综合分类下，最后更新于 2013年08月05日.
转载请注明: lucene 搜索入门实例 | 学步园 +复制链接

抱歉!评论已关闭.

返回首页

Copyright © 2013-2018 学步园保留所有权利.
软文销售 QQ客服：2265327166

点击这里给我发消息

（其他合作也可洽谈）