现在的位置: 首页 > 综合 > 正文

Lucene过滤查询实例

2012年06月22日 ⁄ 综合 ⁄ 共 7427字 ⁄ 字号 评论关闭
  1 import java.io.IOException;
  2 import java.util.BitSet;
  3 
  4 import org.apache.lucene.analysis.standard.StandardAnalyzer;
  5 import org.apache.lucene.document.Document;
  6 import org.apache.lucene.document.Field;
  7 import org.apache.lucene.index.IndexReader;
  8 import org.apache.lucene.index.IndexWriter;
  9 import org.apache.lucene.index.Term;
 10 import org.apache.lucene.index.TermDocs;
 11 import org.apache.lucene.search.Filter;
 12 import org.apache.lucene.search.Hits;
 13 import org.apache.lucene.search.IndexSearcher;
 14 import org.apache.lucene.search.RangeQuery;
 15 
 16 /**
 17  * 测试过滤Filter
 18  * @author Administrator
 19  *
 20  */
 21 
 22 
 23 public class FilterTest {
 24 
 25  private static final String INDEX_STORE_PATH="d:\\test";
 26  private static final int SECURITY_ADVANCE=0;
 27  private static final int SECURITY_MIDDLE=1;
 28  private static final int SECURITY_NORMAL=2;
 29  
 30  public static void main(String[] args)throws IOException
 31  {
 32   //indexwriter(INDEX_STORE_PATH);
 33   //read(INDEX_STORE_PATH);
 34   System.out.println("****************************");
 35   search(INDEX_STORE_PATH);
 36  }
 37  
 38  /*
 39   * 建立索引
 40   */
 41  public static void indexwriter(String path) throws IOException
 42  {
 43   //建立文档
 44   Document doc1=new Document();
 45   Field f1=new Field("booknumber","0000003",Field.Store.YES,Field.Index.UN_TOKENIZED);
 46         Field f2=new Field("bookname","论宇宙非对称模型",Field.Store.YES,Field.Index.TOKENIZED);
 47         Field f3=new Field("publishdate","1970-01-01",Field.Store.YES,
 48     Field.Index.UN_TOKENIZED);
 49         Field f4=new Field("securitylevel",SECURITY_ADVANCE+"",Field.Store.YES,
 50     Field.Index.UN_TOKENIZED);
 51         doc1.add(f1);
 52         doc1.add(f2);
 53         doc1.add(f3);
 54         doc1.add(f4);
 55         //-----
 56         Document doc2=new Document();
 57    f1=new Field("booknumber","0000005",Field.Store.YES,Field.Index.UN_TOKENIZED);
 58          f2=new Field("bookname","钢铁战士",Field.Store.YES,Field.Index.TOKENIZED);
 59          f3=new Field("publishdate","1995-01-01",Field.Store.YES,
 60     Field.Index.UN_TOKENIZED);
 61          f4=new Field("securitylevel",SECURITY_MIDDLE+"",Field.Store.YES,
 62     Field.Index.UN_TOKENIZED);
 63         doc2.add(f1);
 64         doc2.add(f2);
 65         doc2.add(f3);
 66         doc2.add(f4);
 67         //----
 68         Document doc3=new Document();
 69   f1=new Field("booknumber","0000001",Field.Store.YES,Field.Index.UN_TOKENIZED);
 70         f2=new Field("bookname","相对论",Field.Store.YES,Field.Index.TOKENIZED);
 71         f3=new Field("publishdate","1970-01-01",Field.Store.YES,
 72     Field.Index.UN_TOKENIZED);
 73         f4=new Field("securitylevel",SECURITY_ADVANCE+"",Field.Store.YES,
 74     Field.Index.UN_TOKENIZED);
 75        doc3.add(f1);
 76        doc3.add(f2);
 77        doc3.add(f3);
 78        doc3.add(f4);
 79        //----
 80        Document doc4=new Document();
 81     f1=new Field("booknumber","0000006",Field.Store.YES,Field.Index.UN_TOKENIZED);
 82        f2=new Field("bookname","黑猫警长",Field.Store.YES,Field.Index.TOKENIZED);
 83        f3=new Field("publishdate","1999-01-01",Field.Store.YES,
 84     Field.Index.UN_TOKENIZED);
 85        f4=new Field("securitylevel",SECURITY_NORMAL+"",Field.Store.YES,
 86     Field.Index.UN_TOKENIZED);
 87       doc4.add(f1);
 88       doc4.add(f2);
 89       doc4.add(f3);
 90       doc4.add(f4);
 91       //----
 92       Document doc5=new Document();
 93    f1=new Field("booknumber","0000004",Field.Store.YES,Field.Index.UN_TOKENIZED);
 94       f2=new Field("bookname","原子弹的爆炸模拟",Field.Store.YES,Field.Index.TOKENIZED);
 95       f3=new Field("publishdate","1995-01-01",Field.Store.YES,
 96     Field.Index.UN_TOKENIZED);
 97       f4=new Field("securitylevel",SECURITY_ADVANCE+"",Field.Store.YES,
 98     Field.Index.UN_TOKENIZED);
 99      doc5.add(f1);
100      doc5.add(f2);
101      doc5.add(f3);
102      doc5.add(f4);
103      //----
104      Document doc6=new Document();
105    f1=new Field("booknumber","0000007",Field.Store.YES,Field.Index.UN_TOKENIZED);
106      f2=new Field("bookname","钢铁是怎么炼成的",Field.Store.YES,Field.Index.TOKENIZED);
107      f3=new Field("publishdate","1995-08-01",Field.Store.YES,
108     Field.Index.UN_TOKENIZED);
109      f4=new Field("securitylevel",SECURITY_MIDDLE+"",Field.Store.YES,
110     Field.Index.UN_TOKENIZED);
111     doc6.add(f1);
112     doc6.add(f2);
113     doc6.add(f3);
114     doc6.add(f4);
115      //----
116     Document doc7=new Document();
117    f1=new Field("booknumber","0000004",Field.Store.YES,Field.Index.UN_TOKENIZED);
118     f2=new Field("bookname","白毛女",Field.Store.YES,Field.Index.TOKENIZED);
119     f3=new Field("publishdate","1995-01-01",Field.Store.YES,
120     Field.Index.UN_TOKENIZED);
121     f4=new Field("securitylevel",SECURITY_NORMAL+"",Field.Store.YES,
122     Field.Index.UN_TOKENIZED);
123    doc7.add(f1);
124    doc7.add(f2);
125    doc7.add(f3);
126    doc7.add(f4);
127    
128  //建立索引
129  IndexWriter writer=new IndexWriter( path ,new StandardAnalyzer(),true);
130  writer.setUseCompoundFile(false);
131  writer.addDocument(doc1);
132  writer.addDocument(doc2);
133  writer.addDocument(doc3);
134  writer.addDocument(doc4);
135  writer.addDocument(doc5);
136  writer.addDocument(doc6);
137  writer.addDocument(doc7);
138  writer.close();
139  System.out.printf("建立索引完毕!\n");
140  }
141  /*
142   * IndexReader阅读索引
143   */
144  public static void read(String path) throws IOException
145  {
146   IndexReader reader=IndexReader.open(path);
147   for(int i=0;i<reader.numDocs();i++)
148   {
149    Document doc=reader.document(i);
150    System.out.print("书号:");
151    System.out.println(doc.get("booknumber"));
152    System.out.print("书名:");
153    System.out.println(doc.get("bookname"));
154    System.out.print("发布日期:");
155    System.out.println(doc.get("publishdate"));
156    System.out.print("安全级别:");
157    int leve=Integer.parseInt(doc.get("securitylevel"));
158    switch(leve)
159    {
160    case SECURITY_ADVANCE:
161     System.out.println("高级");
162     break;
163    case SECURITY_MIDDLE:
164     System.out.println("中级");
165     break;
166    case SECURITY_NORMAL:
167     System.out.println("低级");
168     break;
169    default:
170     break;
171    }
172    System.out.println("-------------------------");
173   }
174  }
175  
176    /*
177     * 过滤检索
178     */
179  public static void search(String path) throws IOException
180  {
181   //构建一个RangeQuery
182   Term begin=new Term("publishdate","1900-01-01");
183   Term end=new Term("publishdate","2010-01-01");
184   RangeQuery query=new RangeQuery(begin,end,true);
185   //
186   IndexSearcher searcher=new IndexSearcher(path);
187   //使用AdvancedSecurity来做过滤器
188   AdvancedSecurity filter=new AdvancedSecurity();
189   //使用字符过滤
190   /*String text="钢铁";
191   TestFilter filter=new TestFilter(text);  */
192   Hits hits=searcher.search(query, filter);
193   //
194   for(int i=0;i<hits.length();i++)
195   {
196    Document doc=hits.doc(i);
197    System.out.print("书号:");
198    System.out.println(doc.get("booknumber"));
199    System.out.print("书名:");
200    System.out.println(doc.get("bookname"));
201    System.out.print("发布日期:");
202    System.out.println(doc.get("publishdate"));
203    System.out.print("安全级别:");
204    int leve=Integer.parseInt(doc.get("securitylevel"));
205    switch(leve)
206    {
207    case SECURITY_ADVANCE:
208     System.out.println("高级");
209     break;
210    case SECURITY_MIDDLE:
211     System.out.println("中级");
212     break;
213    case SECURITY_NORMAL:
214     System.out.println("低级");
215     break;
216    default:
217     break;
218    }
219    System.out.println("-------------------------");
220      }
221  }
222  }
223 
224 
225 class AdvancedSecurity extends Filter  //过滤安全级别为高级级别的
226 {
227  public static final int SECURITY_ADVACED=0;
228  //继承自Filter的抽象类
229  public BitSet bits(IndexReader reader) throws IOException
230  {
231   //初始化一个Bitset对象
232   final BitSet bits=new BitSet(reader.maxDoc());
233   //将整个集合至于全部true
234   //以至于全部文档都可以被检索
235   bits.set(0,bits.size()-1);
236   
237   //构造一个Trem对象,代表安全级别
238   Term term=new Term("securitylevel",SECURITY_ADVACED+"");
239   //从检索中取出高级级别的文档
240   TermDocs termdocs=reader.termDocs(term);
241   
242   //遍历每个文档
243   while(termdocs.next())
244    bits.set(termdocs.doc(), false);
245   return bits;
246   
247  }
248 }
249 
250 class TestFilter extends Filter   //过滤包含某个关键字的文档
251 {
252  public String FilterText;
253  //重构
254  public  TestFilter(String text)
255  {
256   //
257   this.FilterText=text;
258  }
259  
260 
261  
262  public  BitSet bits(IndexReader reader) throws IOException
263  {
264   final BitSet bits=new BitSet(reader.maxDoc());
265   bits.set(0,bits.size()-1);
266   //
267   Term term=new Term("bookname",FilterText);
268   TermDocs tdocs=reader.termDocs(term);
269   
270   //遍历每个文档
271   while(tdocs.next())
272    bits.set(tdocs.doc(), false);
273   
274   return bits;
275  }
276 }

 

抱歉!评论已关闭.