- Y_indexer.java 建索引
- package com.hapark.lucene;
- import java.io.BufferedReader;
- import java.io.File;
- import java.io.FileInputStream;
- import java.io.IOException;
- import java.io.InputStreamReader;
- import java.util.ArrayList;
- import java.util.Date;
- import org.apache.lucene.analysis.Analyzer;
- import org.apache.lucene.analysis.standard.StandardAnalyzer;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.document.Field;
- import org.apache.lucene.index.IndexWriter;
- public class Y_indexer {
- public static void main(String[] args) throws Exception {
- /* 指明要索引文件夹的位置,这里是D盘的y文件夹下 */
- File fileDir = new File("d://y");
- /* 这里放索引文件的位置 */
- File indexDir = new File("d://index");
- Analyzer luceneAnalyzer = new StandardAnalyzer();
- IndexWriter indexWriter = new IndexWriter(indexDir, luceneAnalyzer,
- true);
- ArrayList list = new ArrayList();
- getList(fileDir, list);
- File[] textFiles = (File[])list.toArray(new File[0]);
- System.out.println(textFiles.length);
- long startTime = new Date().getTime();
- //增加document到索引去
- for (int i = 0; i < textFiles.length; i++) {
- if (textFiles[i].isFile()
- && textFiles[i].getName().endsWith(".html")) {
- System.out.println("File " + textFiles[i].getCanonicalPath()
- + "正在被索引....");
- String temp = FileReaderAll(textFiles[i].getCanonicalPath(),
- "GBK");
- System.out.println(temp);
- Document document = new Document();
- Field FieldPath = new Field("path", textFiles[i].getPath(),
- Field.Store.YES, Field.Index.NO);
- Field FieldBody = new Field("body", temp, Field.Store.YES,
- Field.Index.TOKENIZED,
- Field.TermVector.WITH_POSITIONS_OFFSETS);
- Field FieldTitle = new Field("title", temp, Field.Store.YES,
- Field.Index.TOKENIZED,
- Field.TermVector.WITH_POSITIONS_OFFSETS);
- document.add(FieldPath);
- document.add(FieldBody);
- document.add(FieldTitle);
- indexWriter.addDocument(document);
- }
- }
- //optimize()方法是对索引进行优化
- indexWriter.optimize();
- indexWriter.close();
- //测试一下索引的时间
- long endTime = new Date().getTime();
- System.out
- .println("这花费了"
- + (endTime - startTime)
- + " 毫秒来把文档增加到索引里面去!"
- + fileDir.getPath());
- }
- /**
- * 多层文件夹
- * @param file
- * @param list
- */
- public static void getList(File file, ArrayList list){
- if(file.isDirectory() && file.getName().indexOf(".") != 0){
- File [] file2 = file.listFiles();
- for(int i= 0; i< file2.length; i++)
- getList(file2[i], list);
- }else{
- list.add(file);
- }
- }
- public static String FileReaderAll(String FileName, String charset)
- throws IOException {
- BufferedReader reader = new BufferedReader(new InputStreamReader(
- new FileInputStream(FileName), charset));
- String line = new String();
- String temp = new String();
- while ((line = reader.readLine()) != null) {
- temp += line;
- }
- reader.close();
- return temp;
- }
- }
- Y_searcher lucene 搜索
- package com.hapark.lucene;
- import java.io.IOException;
- import java.util.ArrayList;
- import java.util.Date;
- import java.util.List;
- import org.apache.lucene.analysis.Analyzer;
- import org.apache.lucene.analysis.standard.StandardAnalyzer;
- import org.apache.lucene.queryParser.ParseException;
- import org.apache.lucene.queryParser.QueryParser;
- import org.apache.lucene.search.Hits;
- import org.apache.lucene.search.IndexSearcher;
- import org.apache.lucene.search.Query;
- public class Y_searcher {
- public List search(){
- List searchResult = new ArrayList();//创建一个List接口的一个实例类ArrayList类
- try{
- Hits hits = null;
- String key = "苏";
- Query query = null;
- IndexSearcher searcher = new IndexSearcher("d://yuyang");
- Analyzer analyzer = new StandardAnalyzer(); //创建一个Analyzer接口的一个实例类StandardAnalyzer
- QueryParser qp = new QueryParser("title", analyzer);
- query = qp.parse(key);
- if (searcher != null) {
- Date start=new Date();
- hits = searcher.search(query); //遍历hist结果的length
- if(hits.length()==0){
- System.out.println("对不起。没你想要的结果!");
- }
- else{
- for(int i=0;i<hits.length();i++){
- Date end=new Date();
- // System.out.println("找到:" + hits.length() + " Totalresult!");
- System.out.println("文件的路径:"+hits.doc(i).get("path"));
- // System.out.println("内容:"+hits.doc(i).get("body"));
- System.out.println(hits.doc(i).get("title"));
- System.out.println("检索完成,用时" + (end.getTime() - start.getTime()) + "毫秒");
- }
- }
- }
- }
- catch(ParseException ex){
- }
- catch(IOException e){
- }
- return searchResult;
- }
- public static void main(String args[]){
- Y_searcher y_s=new Y_searcher();
- y_s.search();
- }
- }