转载自 http://martin3000.iteye.com/blog/1328833
使用DataImportHandler进行简单数据导入还是比较有效的,特别是DIH中针对简单的数据库表,可以把完全导入和增量导入合并成一个语句,非常方便。我的使用方式如下所示
1。配置schema
Xml代码
- <requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler">
- <lst name="defaults">
- <str name="config">/home/tomcat/bin/solr/conf/data-config.xml</str>
- </lst>
- </requestHandler>
2.添加data-config文件
data-config.xml
Xml代码
- <dataConfig>
- <dataSource type="JdbcDataSource"
- driver="com.mysql.jdbc.Driver"
- url="jdbc:mysql://127.0.0.1/db"
- user="root"
- password="pass"
- batchSize="-1"/>
- <document>
- <entity name="id" pk="id"
- query="select id,username,text,cat from hot where '${dataimporter.request.clean}' != 'false' OR timestamp > '${dataimporter.last_index_time}'">
- <field column="id" name="id"/>
- <field column="text" name="text"/>
- <field column="username" name="username_s"/>
- <field column="cat" name="cat_t"/>
- </entity>
- </document>
- </dataConfig>
3.让DIH周期性的运行
修改dataimport.properties文件,这个是自动生成的,同在solr/conf下,添加参数
interval 间隔时间 单位 分钟
syncEnabled=1 打开周期运行
params 其实就是具体调用的url,周期运行就是周期性的访问一个url
Java代码
- #Wed Dec 28 09:29:42 UTC 2011
- port=8983
- interval=5
- last_index_time=2011-12-28 09\:29\:26
- syncEnabled=1
- webapp=solr
- id.last_index_time=2011-12-28 09\:29\:26
- server=127.0.0.1
- params=/select?qt\=/dataimport&command\=full-import&clean\=false&commit\=true&optimize\=false
到此还并不能周期运行,在solr的wiki中有一段实现这个功能的代码,但并没有加入到solr的发行包中,于是我们需要重新编译这段代码,打包放到webapp/solr/WEB-INF/lib中才行
Xml代码
- <web-app>
- <listener>
- <listener-class>org.apache.solr.handler.dataimport.scheduler.ApplicationListener</listener-class>
- </listener>
- ...
- </web-app>
以下是solr wiki上周期运行的代码,我已打好包,放在附件里。
Java代码
- package org.apache.solr.handler.dataimport.scheduler;
- import java.io.FileInputStream;
- import java.io.FileNotFoundException;
- import java.io.IOException;
- import java.util.Properties;
- import org.apache.solr.core.SolrResourceLoader;
- import org.slf4j.Logger;
- import org.slf4j.LoggerFactory;
- public class SolrDataImportProperties {
- private Properties properties;
- public static final String SYNC_ENABLED = "syncEnabled";
- public static final String SYNC_CORES = "syncCores";
- public static final String SERVER = "server";
- public static final String PORT = "port";
- public static final String WEBAPP = "webapp";
- public static final String PARAMS = "params";
- public static final String INTERVAL = "interval";
- private static final Logger logger = LoggerFactory.getLogger(SolrDataImportProperties.class);
- public SolrDataImportProperties(){
- // loadProperties(true);
- }
- public void loadProperties(boolean force){
- try{
- SolrResourceLoader loader = new SolrResourceLoader(null);
- logger.info("Instance dir = " + loader.getInstanceDir());
- String configDir = loader.getConfigDir();
- configDir = SolrResourceLoader.normalizeDir(configDir);
- if(force || properties == null){
- properties = new Properties();
- String dataImportPropertiesPath = configDir + "\\dataimport.properties";
- FileInputStream fis = new FileInputStream(dataImportPropertiesPath);
- properties.load(fis);
- }
- }catch(FileNotFoundException fnfe){
- logger.error("Error locating DataImportScheduler dataimport.properties file", fnfe);
- }catch(IOException ioe){
- logger.error("Error reading DataImportScheduler dataimport.properties file", ioe);
- }catch(Exception e){
- logger.error("Error loading DataImportScheduler properties", e);
- }
- }
- public String getProperty(String key){
- return properties.getProperty(key);
- }
- }
Java代码
- package org.apache.solr.handler.dataimport.scheduler;
- import java.util.Calendar;
- import java.util.Date;
- import java.util.Timer;
- import javax.servlet.ServletContext;
- import javax.servlet.ServletContextEvent;
- import javax.servlet.ServletContextListener;
- import org.slf4j.Logger;
- import org.slf4j.LoggerFactory;
- public class ApplicationListener implements ServletContextListener {
- private static final Logger logger = LoggerFactory.getLogger(ApplicationListener.class);
- @Override
- public void contextDestroyed(ServletContextEvent servletContextEvent) {
- ServletContext servletContext = servletContextEvent.getServletContext();
- // get our timer from the context
- Timer timer = (Timer)servletContext.getAttribute("timer");
- // cancel all active tasks in the timers queue
- if (timer != null)
- timer.cancel();
- // remove the timer from the context
- servletContext.removeAttribute("timer");
- }
- @Override
- public void contextInitialized(ServletContextEvent servletContextEvent) {
- ServletContext servletContext = servletContextEvent.getServletContext();
- try{
- // create the timer and timer task objects
- Timer timer = new Timer();
- HTTPPostScheduler task = new HTTPPostScheduler(servletContext.getServletContextName(), timer);
- // get our interval from HTTPPostScheduler
- int interval = task.getIntervalInt();
- // get a calendar to set the start time (first run)
- Calendar calendar = Calendar.getInstance();
- // set the first run to now + interval (to avoid fireing while the app/server is starting)
- calendar.add(Calendar.MINUTE, interval);
- Date startTime = calendar.getTime();
- // schedule the task
- timer.scheduleAtFixedRate(task, startTime, 1000 * 60 * interval);
- // save the timer in context
- servletContext.setAttribute("timer", timer);
- } catch (Exception e) {
- if(e.getMessage().endsWith("disabled")){
- logger.info("Schedule disabled");