现在的位置: 首页 > 综合 > 正文

Lucene3.4索引文件创建过程(有源码)

2014年08月21日 ⁄ 综合 ⁄ 共 1840字 ⁄ 字号 评论关闭
public class Indexer {
	private IndexWriter writer;
	
	 * main(这里用一句话描述这个方法的作用)   
	public static void main(String[] args) throws IOException {
		String indexDir = "F:/lucene/indexDir";		//指定目录创建索引
		String dataDir = "F:/lucene/dataDir";			//存储数据的目录	
		long beginIndexTime = System.currentTimeMillis();
		Indexer indexer = new Indexer( indexDir );		//生成索引写入器
		int numIndexed = 0;
		
		try {
			numIndexed = indexer.index( dataDir, new TextFilesFilter() );	//开始索引文件
		} catch (Exception e) {
			// TODO: handle exception
			e.printStackTrace();
		}finally {
			indexer.close();
		}
		
		long endIndexTime = System.currentTimeMillis();
		
		System.out.println( "Indexing " + numIndexed + " files took " + (endIndexTime - beginIndexTime) + "miliseconds" );

	}

	public Indexer(String indexDir) throws IOException {
		Directory dir = FSDirectory.open( new File( indexDir ) );		//建立磁盘索引
		writer = new IndexWriter( dir, new StandardAnalyzer( Version.LUCENE_34 ),  IndexWriter.MaxFieldLength.UNLIMITED );
	}

	public void close() throws CorruptIndexException, IOException {
		writer.close();
	}
	
	public int index( String dataDir, FileFilter filter ) throws IOException {
		File[] files = new File( dataDir ).listFiles();
		
		for (File f : files) {
			if ( !f.isDirectory() && 
				 !f.isHidden() &&
				 f.exists() && 
				 f.canRead() &&
				 ( filter == null || filter.accept( f ) ) ) {
				 indexFile( f );
			}
		}
		
		return writer.numDocs();
	}
	
	private void indexFile( File f ) throws IOException {
		System.out.println( "Indexing " + f.getCanonicalPath() );
		Document doc = getDocument( f );
		writer.addDocument( doc );
	}
	
	private Document getDocument( File f ) throws IOException {
		Document doc = new Document();
		doc.add( new Field( "contents" , new FileReader( f )) );
		doc.add( new Field( "filename", f.getName(), Field.Store.YES, Field.Index.NOT_ANALYZED ) );
		doc.add( new Field( "fullpath", f.getCanonicalPath(), Field.Store.YES, Field.Index.NOT_ANALYZED ) );
		
		return doc;
	}
	
	private static class TextFilesFilter implements FileFilter{

		/* (non-Javadoc)   
		* @see java.io.FileFilter#accept(java.io.File)   
		*/
		@Override
		public boolean accept(File pathname) {
			// TODO Auto-generated method stub
			return pathname.getName().endsWith( ".txt" );
		}
		
	}
}

抱歉!评论已关闭.