长江证劵分析员做过题为“交易数据反映的信息及预测性”研究,见:http://www.docin.com/p-68475999.html。
笔者试图套用文章的模型进行高频数据的研究:期货数据形如:
....
*读取主力合约数据; data test; set quote20120731; format date yymmdd10.; time1=input(time,time8.); format time1 time8.; *10:00:00时间格式; hour=hour(time1); *提取小时; minute=minute(time1); *提取分钟; second=second(time1); *剥离秒; dif_price=dif(price); *计算每笔数据的价格变量量; dif_amount=dif(amount); *计算单笔交易的成交量变化; drop time; run; *剔除集合竞价数据,9:30以前及下午开盘的集合竞价等数据; data test1; set test; if hour=9 & minute<=30 then delete; if hour=15 & minute>15 then delete; run; *计算单位分笔,价格变动引起的成交量变动量和价格变化:dif_price,dif_amount; proc sql; create table test2 as select distinct futurecode,date,time1,price,amount,hour,minute,second, sum(dif_price) as dif_price, sum(dif_amount) as dif_amount from test1 group by hour, minute ,second ; quit; *定义价格走势,flag=1正向走势;flag=-1负向走势; data test3; set test2; if dif_price < 0 then flag=-1; else if dif_price >0 then flag=1; else flag=0; run; *计算两个方向上的价格累计和交易量累计; proc sql; create table test4 as select futurecode,date,time1,price,amount,hour,minute,second,flag, sum(dif_price) as ps, sum(dif_amount) as vol from test3 group by hour,minute,second,flag; quit; *对正向和负向走势的价格和交易量进行重命名,为后面建模铺垫。data步是赋值过程,proc sql语句统计 一分钟内的汇总情况。; data test5; set test4; if flag=1 then do; psp=ps;volp=vol; end; else if flag=-1 then do; psn=ps;voln=vol; end; drop ps vol; run; proc sql; create table test6 as select futurecode,date,time1,price,amount,hour,minute,second,flag, sum(psp) as psp, sum(volp) as volp, sum(psn) as psn, sum(voln) as voln from test5 group by hour,minute; quit; *提取一分钟的价格,进行收益率计算; proc sort data= test6; by time1; run; data test7; set test6; by hour minute; if last.minute then output; run; *参数计算:各参数定义见:http://www.docin.com/p-68475999.html; data test8; set test7; r=dif(log(price)); psn=abs(psn); vol=volp+voln; vol1=log(vol); vol2=log(lag(vol)); vol3=log(lag2(vol)); vol4=dif(vol1); vol5=dif(vol2); vol6=dif(vol3); VDIR=(volp-voln)/vol; vdir1=lag(vdir); vdir2=lag2(vdir); vdense=vol/(psp+psn); vdense1=lag(vdense); vdense2=lag2(vdense); ratio=(volp/psp)/(voln/psn) ; ratio1=lag(ratio); ratio2=lag2(ratio); run; *建立回归模型; ods rtf file="e:\result.rtf"; ods graphics on; proc reg data=test8 ; model r = vol1-vol3; model r = vol4-vol6; model r = vdir vdir1 vdir2; model r = vdense vdense1 vdense2; model r = ratio ratio1 ratio2; run; ods graphics off; ods rtf close;