现在的位置: 首页 > 综合 > 正文

solr创建文件索引代码

2018年04月18日 ⁄ 综合 ⁄ 共 3630字 ⁄ 字号 评论关闭

废话就不多说了,直接上代码,其中索引的字段需要自己在配置文件中配置,这个相信不用多说了吧。

package com.search.commons;

import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.ResourceBundle;

import org.apache.log4j.Logger;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.client.solrj.request.AbstractUpdateRequest;
import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;


/**
 * =用sql语句查询各个类别的文件(环评批复,报告书。。。),把路径存在list里面,在同一调用不同的core生成各个的索引,
 * 查询结合类SolrJSearcheDemo即可。
 * @author Administrator
 *
 */
public  class TestCreate {
    private static Logger log = Logger.getLogger(TestCreate.class);

/**
 * 生成文件索引方法
 * @param id
 * @param fileurl
 * @param filename
 * @param homename
 * @param mytitle
 * @param savetime
 * @param myindextype
 * @throws Exception
 */
    public static void indexFilesSolr(String id,String fileurl,String filename,String homename,String mytitle,String savetime,String myindextype,String myyears) throws Exception
    {
        SolrServer solr=new HttpSolrServer(getServerurl()+homename);
        ContentStreamUpdateRequest up=new ContentStreamUpdateRequest("/update/extract");
        String contenttype=getFileContentType(filename);
        if(!contenttype.equals("othertype"))
        {
        File file=new File(fileurl)    ;
            if(file.exists()){
                log.info("开始建索引:"+fileurl);
        up.addFile(file, contenttype);
        up.setParam("literal.id", id);
        up.setParam("literal.mytitle", mytitle);
        up.setParam("literal.mytime", dataTurntoLong(savetime));
        up.setParam("literal.myindextype", myindextype);
        up.setParam("literal.myyears", myyears);

        up.setParam("fmap.content", "content");
        up.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true);
        solr.request(up);
        log.info("结束建索引:"+fileurl);
            }else
            {
            //    log.info("文件不存在");
            }
        }
    }
    /**
     * 获得毫秒数
     * @param date
     * @return
     */
    public static String dataTurntoLong(String date) {      
     Date d = null;
    try {
        d = new SimpleDateFormat("yyyyMMddHHmmss").parse(date);
    } catch (ParseException e) {
        e.printStackTrace();
    }
      return String.valueOf(d.getTime());
     }
    /**
     * 获取系统路径
     * @return
     */
    public static String getServerurl()
    {
        ResourceBundle res=    ResourceBundle.getBundle("solrserver");
        
        return res.getString("serverurl");
    }
    /**
     * 根据文件名获取文件的ContentType类型
     * @param filename
     * @return
     */
    public static String getFileContentType(String filename)
    {
        String contentType="";
         String prefix=filename.substring(filename.lastIndexOf(".")+1);
        if(prefix.equals("xlsx"))
        {
            contentType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
        }else if(prefix.equals("pdf"))
        {
            contentType="application/pdf";
        }else if(prefix.equals("doc"))
        {
            contentType="application/msword";
        }else if(prefix.equals("txt"))
        {
            contentType="text/plain";
        }else if(prefix.equals("xls"))
        {
            contentType="application/vnd.ms-excel";
        }else if(prefix.equals("docx"))
        {
            contentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document";
        }else if(prefix.equals("ppt"))
        {
            contentType="application/vnd.ms-powerpoint";
        }else if(prefix.equals("pptx"))
        {
            contentType="application/vnd.openxmlformats-officedocument.presentationml.presentation";
        }
        
        else
        {
            contentType="othertype";
        }
        
        
        return contentType;
    }
    
    
    /**
     * 返回文件ContentType
     * @param paths
     * @return
     */
    public static String getContentType(String paths)
    {
           Path path = Paths.get(paths);
            String contentType = null;
            try {
                contentType = Files.probeContentType(path);
            } catch (IOException e) {
                e.printStackTrace();
            }
            log.info("文件类型 : " + contentType);
        return contentType;
    }
    public static void main(String args[])
    {
        File file=new File("D:\\logs");
        String files[]=file.list();
        for(int i=0;i<files.length;i++)
        {
            String a=files[i];
            String path="D:/logs/"+files[i];
            String type="A";
            try {
                if(i%2==0)
                {
                    type="B";
                }
                indexFilesSolr(path,path, a, "filecore",a,"2014-06-26 15:19:06",type,"200"+i);
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
        
    }
    
    
}

    

【上篇】
【下篇】

抱歉!评论已关闭.