现在的位置: 首页 > 综合 > 正文

java用Digester解析xml文件——高效率的xml解析

2015年02月03日 ⁄ 综合 ⁄ 共 7080字 ⁄ 字号 评论关闭

  Digester不是jdk里面自带的,有依赖包:commons-beanutils.jar、commons-collections.jar、commons-digester.jar、commons-logging-1.1.3.jar。下载地址:点击打开链接

  Digester底层采用SAX解析XML文件,所以很自然的,对象转换由"事件"驱动,即在识别出特定XML元素时(实际被细分为begin、body、end、finish四个时点),将执行特定的动作,比如创建特定的Java对象,或调用特定对象的方法等。此处的XML元素根据匹配模式(matching pattern)识别,而相关操作由规则(rule)定义。在转换过程中,Digester维持了一个对象栈,可以看作对象转换的工作台,用来存放转换中生成的、或是为转换临时创建的Java对象。对输入XML文件作了一趟完整的扫描后,对象栈的栈顶元素即为目标对象。由于Digester屏蔽了SAX解析的细节,使用者仅需关注转换操作本身,大大简化了转换操作。Digester按规则执行方法时用java反射执行方法。

  本文示例的项目源代码下载地址:点击打开链接

  上xml文件内容:

<?xml version="1.0" encoding="UTF-8"?>

<CCMS_DATA>
  <CCMS_MAXAMOUNT_DATA>
    <ROW>
      <SYSCODE>BEPS</SYSCODE>
      <SENDBANK>ALL</SENDBANK>
      <RECVBANK>ALL</RECVBANK>
      <MSGTYPE>ALL</MSGTYPE>
      <BIZTYPE>0</BIZTYPE>
      <AMTLMT>9999999999999.99</AMTLMT>
      <CHKLEVEL>10</CHKLEVEL>
    </ROW>
    <ROW>
      <SYSCODE>BEPS</SYSCODE>
      <SENDBANK>ALL</SENDBANK>
      <RECVBANK>ALL</RECVBANK>
      <MSGTYPE>PKG001</MSGTYPE>
      <BIZTYPE>0</BIZTYPE>
      <AMTLMT>50000.00</AMTLMT>
      <CHKLEVEL>11</CHKLEVEL>
    </ROW>
    <ROW>
      <SYSCODE>BEPS</SYSCODE>
      <SENDBANK>ALL</SENDBANK>
      <RECVBANK>ALL</RECVBANK>
      <MSGTYPE>PKG003</MSGTYPE>
      <BIZTYPE>0</BIZTYPE>
      <AMTLMT>9999999999999.99</AMTLMT>
      <CHKLEVEL>11</CHKLEVEL>
    </ROW>
  </CCMS_MAXAMOUNT_DATA>

</CCMS_DATA>

  上代码:

  entity/bean层:

package com.bosspay.entity;

public class DicAmtLimit {
	private long id;
	private String syscode;
	private String mt;
	private String txtp;
	private String sndrbk;
	private String rcvbk;
	private String chcklvl;
	private String amtupperlmt;

	public long getId() {
		return id;
	}

	public void setId(long id) {
		this.id = id;
	}

	public String getSyscode() {
		return syscode;
	}

	public void setSyscode(String syscode) {
		this.syscode = syscode;
	}

	public String getMt() {
		return mt;
	}

	public void setMt(String mt) {
		this.mt = mt;
	}

	public String getTxtp() {
		return txtp;
	}

	public void setTxtp(String txtp) {
		this.txtp = txtp;
	}

	public String getSndrbk() {
		return sndrbk;
	}

	public void setSndrbk(String sndrbk) {
		this.sndrbk = sndrbk;
	}

	public String getRcvbk() {
		return rcvbk;
	}

	public void setRcvbk(String rcvbk) {
		this.rcvbk = rcvbk;
	}

	public String getChcklvl() {
		return chcklvl;
	}

	public void setChcklvl(String chcklvl) {
		this.chcklvl = chcklvl;
	}

	public String getAmtupperlmt() {
		return amtupperlmt;
	}

	public void setAmtupperlmt(String amtupperlmt) {
		this.amtupperlmt = amtupperlmt;
	}
}

  dao层:

package com.bosspay.dao;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.List;

import com.bosspay.common.DataUtil;
import com.bosspay.entity.DicAmtLimit;

public class DicAmtLimitDao {

	public static final String TABLE_NAME = "DIC_AMT_LIMIT";
	
	public static final String ID = "id";

	public static final String SYSCODE = "syscode";
	public static final String MT = "mt";
	public static final String TXTP = "txtp";
	public static final String SNDRBK = "sndrbk";
	public static final String RCVBK = "rcvbk";
	public static final String CHCKLVL = "chcklvl";
	public static final String AMTUPPERLMT = "amtupperlmt";
	
	private static final String INSERT_SQL = "insert into " + TABLE_NAME +"("
		+SYSCODE+","+MT+","+TXTP+","+SNDRBK+","+RCVBK+","+CHCKLVL+","+AMTUPPERLMT
		+ ")values(?,?,?,?,?,?,?)";

	private static DicAmtLimitDao instance = new DicAmtLimitDao();
	
	public static DicAmtLimitDao getInstance(){
		
		return instance;
	}
	
	/**
	 * 插入数据
	 * @param DicAmtLimit
	 * @return
	 * @throws SQLException
	 */
	public boolean insertBatch(List<DicAmtLimit> dicAmtLimits){
		try {
			Connection conn = DataUtil.getInstance().getConnection(); // 获取连接
			PreparedStatement stm = conn.prepareStatement(INSERT_SQL);
			for (DicAmtLimit dicAmtLimit : dicAmtLimits) {
				stm.setString(1, dicAmtLimit.getSyscode());
				stm.setString(2, dicAmtLimit.getMt());
				stm.setString(3, dicAmtLimit.getTxtp());
				stm.setString(4, dicAmtLimit.getSndrbk());
				stm.setString(5, dicAmtLimit.getRcvbk());
				stm.setString(6, dicAmtLimit.getChcklvl());
				stm.setString(7, dicAmtLimit.getAmtupperlmt());
				stm.addBatch();
			}
			stm.executeBatch();
			DataUtil.getInstance().close(stm, conn);  // 关闭连接
		} catch (Exception e) {
			e.printStackTrace();
		}
		
		return true;
	}

}

  处理层(我自定义的,其实这层可以和dao层合并成一层):

package com.bosspay.processor;
import java.util.ArrayList;
import java.util.List;

import com.bosspay.dao.DicAmtLimitDao;
import com.bosspay.entity.DicAmtLimit;


public class DicAmtLimitProcessor {
	private List<DicAmtLimit> dicAmtLimits = new ArrayList<>();
	private DicAmtLimitDao dicAmtLimitDao = new DicAmtLimitDao();
	private long num;
	
	public void addDicAmtLimit(DicAmtLimit dicAmtLimit){
		if(dicAmtLimits.size()>=5000){
			this.dicAmtLimitDao.insertBatch(dicAmtLimits);
			this.dicAmtLimits.clear();
			System.out.println("当前处理的数据有"+num+"条!");
		}
		this.dicAmtLimits.add(dicAmtLimit);
		num += 1;
	}
	
	public void lastInsert(){
		this.dicAmtLimitDao.insertBatch(dicAmtLimits);
		System.out.println("总共处理的数据有"+num+"条!");
	}
}

  解析规则层:

package com.bosspay.processor;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;

import org.apache.commons.digester.Digester;
import org.xml.sax.SAXException;

import com.bosspay.entity.DicAmtLimit;



public class DicAmtLimitParse {
	private Digester digester = new Digester();
	
	public void parse(String path) throws IOException, SAXException{
		String CCMS_MAXAMOUNT_DATA = "CCMS_DATA/CCMS_MAXAMOUNT_DATA";
		String ROW = CCMS_MAXAMOUNT_DATA+"/ROW";
		// 设置解析规则
		digester.setValidating(false);
		digester.push(this);//将this压入栈中 此时栈中有DicAmtLimitParse对象
		//addObjectCreate:当碰到第一个参数的节点时创建类型为第二个参数的对象并压入栈中
		//当碰到CCMS_DATA/CCMS_MAXAMOUNT_DATA节点时创建DicAmtLimitProcessor对象并压入栈中
		digester.addObjectCreate(CCMS_MAXAMOUNT_DATA, DicAmtLimitProcessor.class);
		//当碰到CCMS_DATA/CCMS_MAXAMOUNT_DATA/ROW节点时创建DicAmtLimit对象并压入栈中
		digester.addObjectCreate(ROW, DicAmtLimit.class);
		//此时栈中有DicAmtLimitParse、DicAmtLimitProcessor、DicAmtLimit 3种对象
		//addBeanPropertySetter:当碰到第一个参数的节点时执行栈顶对象的第二个参数的setter方法,参数为该节点的内容
		//当碰到CCMS_DATA/CCMS_MAXAMOUNT_DATA/ROW/SYSCODE节点时执行DicAmtLimit对象的setSyscode方法
		digester.addBeanPropertySetter(ROW+"/SYSCODE", "syscode");
		digester.addBeanPropertySetter(ROW+"/SENDBANK", "sndrbk");
		digester.addBeanPropertySetter(ROW+"/RECVBANK", "rcvbk");
		digester.addBeanPropertySetter(ROW+"/MSGTYPE", "mt");
		digester.addBeanPropertySetter(ROW+"/BIZTYPE", "txtp");
		digester.addBeanPropertySetter(ROW+"/AMTLMT", "amtupperlmt");
		digester.addBeanPropertySetter(ROW+"/CHKLEVEL", "chcklvl");
		//当碰到第一个参数节点的结束标志时弹出栈顶元素,执行新栈顶元素的第二个参数的方法,并将弹出的栈顶元素当作该方法的参数
		//当碰到CCMS_DATA/CCMS_MAXAMOUNT_DATA/ROW节点结束标志执行DicAmtLimitProcessor的addDicAmtLimit方法,
		//参数为DicAmtLimit对象
		digester.addSetNext(ROW, "addDicAmtLimit");
		//当碰到CCMS_DATA/CCMS_MAXAMOUNT_DATA节点结束标志执行DicAmtLimitParse的clearList方法,
		//参数为DicAmtLimitProcessor对象
		digester.addSetNext(CCMS_MAXAMOUNT_DATA, "clearList");
		//生成xml文件输入流
		InputStream stream = new FileInputStream(new File(path));
		//digester解析xml文件的输入流
		digester.parse(stream);	
		stream.close();
	}
	
	public void clearList(DicAmtLimitProcessor xmlProcessor){
		xmlProcessor.lastInsert();
	}
	
	public Digester getDigester() {
		return digester;
	}
	
	public void setDigester(Digester digester) {
		this.digester = digester;
	}
}

  要注意的是,digester实际上就是设置解析规则,当碰到定义好的规则时就执行对应的方法,当xml结构重复的时候他就重复执行对应的方法。有的时候可能会碰到这样的情况:xml转换成java对象时属性重复(xml元素重复),这时我们可以修改java bean属性的setter方法,用数组、集合或者字符串拼接等方法存储重复属性的值,反正digester只认setter方法,就算没有对应的属性都行,我用代码加注释来说明:

xml文件:

    <ROW>
      <mmbCd>10086</mmbCd>
      <mmbCd>10010</mmbCd>
      <mmbCd>10000</mmbCd>
    </ROW>

bean中:
/**
* 直接参与机构号用,隔开
*/
private String mmbCds = "";//只声明了mmbCds 没有声明mmbCd

//创建mmbCd的setter方法

public void setMmbCd(String mmbCd) {
if(this.mmbCds.equals("")){
this.mmbCds = mmbCd;
}else{
this.mmbCds = this.mmbCds + "," + mmbCd;
}
}

规则设置时:

digester.addBeanPropertySetter(ROW+"/mmbCd", "mmbCd");

解析生成的对象mmbCds属性的值是 10086,10010,10000 。

digester用的是反射机制,所以我们可以根据反射原理适当更改代码,将复杂的结构简单解析。

抱歉!评论已关闭.