现在的位置: 首页 > 综合 > 正文

sas探索交易数据信息

2018年10月21日 ⁄ 综合 ⁄ 共 2102字 ⁄ 字号 评论关闭

长江证劵分析员做过题为“交易数据反映的信息及预测性”研究,见:http://www.docin.com/p-68475999.html

笔者试图套用文章的模型进行高频数据的研究:期货数据形如:

....

*读取主力合约数据;
data test;
   set quote20120731;
   format date yymmdd10.;
   time1=input(time,time8.);
   format time1 time8.;	 *10:00:00时间格式;
   hour=hour(time1);     *提取小时;
   minute=minute(time1); *提取分钟;
   second=second(time1); *剥离秒;
   dif_price=dif(price); *计算每笔数据的价格变量量;
   dif_amount=dif(amount); *计算单笔交易的成交量变化;
   drop time;
run;
*剔除集合竞价数据,9:30以前及下午开盘的集合竞价等数据;
data test1;
   set test;
   if hour=9 & minute<=30  then delete;
   if hour=15 & minute>15 then delete;
run;
*计算单位分笔,价格变动引起的成交量变动量和价格变化:dif_price,dif_amount;
proc sql;
   create table test2 as
      select distinct
             futurecode,date,time1,price,amount,hour,minute,second,
              sum(dif_price) as dif_price,
			  sum(dif_amount) as dif_amount
	  from test1
	  group by hour, minute ,second ;
quit;

*定义价格走势,flag=1正向走势;flag=-1负向走势;
data test3;
   set test2;
   if dif_price < 0 then flag=-1;
   else if dif_price >0  then flag=1;
   else flag=0;
run;
*计算两个方向上的价格累计和交易量累计;
proc sql;
   create table test4 as
      select futurecode,date,time1,price,amount,hour,minute,second,flag, 
	         sum(dif_price) as ps,
			 sum(dif_amount) as vol
		 from test3
		 group by hour,minute,second,flag;
quit;
*对正向和负向走势的价格和交易量进行重命名,为后面建模铺垫。data步是赋值过程,proc sql语句统计
一分钟内的汇总情况。;
data test5;
   set test4;
   if flag=1 then do;
      psp=ps;volp=vol;
   end;
   else if flag=-1 then do;
      psn=ps;voln=vol;
   end;
   drop ps vol;
run; 

proc sql;
   create table test6 as
   select futurecode,date,time1,price,amount,hour,minute,second,flag,
          sum(psp) as psp,
		  sum(volp) as volp,
		  sum(psn) as psn,
		  sum(voln) as voln
	from test5
	group by hour,minute;
quit;
*提取一分钟的价格,进行收益率计算;
proc sort data= test6;
   by time1;
run;
data test7;
   set test6;
   by hour minute;
   if last.minute then output;
run;

*参数计算:各参数定义见:http://www.docin.com/p-68475999.html;
data test8;
  set test7;
  r=dif(log(price));
  psn=abs(psn);

  vol=volp+voln;
  vol1=log(vol);  
  vol2=log(lag(vol));
  vol3=log(lag2(vol));  
  vol4=dif(vol1);
  vol5=dif(vol2);  
  vol6=dif(vol3);
  
  VDIR=(volp-voln)/vol;
  vdir1=lag(vdir);  
  vdir2=lag2(vdir);
  
  vdense=vol/(psp+psn);
  vdense1=lag(vdense);
  vdense2=lag2(vdense); 
  
  ratio=(volp/psp)/(voln/psn) ;
  ratio1=lag(ratio);
  ratio2=lag2(ratio);
run;

*建立回归模型;
ods rtf file="e:\result.rtf";
ods graphics on;
proc reg data=test8 ;
   model  r = vol1-vol3;
   model  r = vol4-vol6;
   model  r = vdir vdir1 vdir2;
   model  r = vdense vdense1 vdense2; 
   model  r = ratio ratio1 ratio2;
run;
ods graphics off;
ods rtf close;

 

抱歉!评论已关闭.