现在的位置: 首页 > 综合 > 正文

缺失值添加帖子汇总

2018年10月21日 ⁄ 综合 ⁄ 共 2176字 ⁄ 字号 评论关闭

https://communities.sas.com/thread/35155

https://communities.sas.com/message/129069#129069

https://communities.sas.com/message/109396#109396

https://communities.sas.com/message/121311#121311

https://communities.sas.com/message/114467#114467

https://communities.sas.com/message/132287#132287

补齐缺失日期的数据

data have;
  input (ProductArea Product_Level_1) ($)
         Request_Period_DT anydtdte. Request_Quantity;
  format Request_Period_DT monyy7.;
  cards;
AAA  bbb  nov-2010 100
AAA  bbb  mar-2011  80
AAA  bbb  apr-2011 100
AAA  bbb  may-2011  90
AAA  bbb  sep-2011 200
AAA  ccc  jun-2011 100
AAA  ccc  jul-2011  50
AAA  ccc  aug-2011  80
AAA  ccc  sep-2011  90
BBB  ddd  jul-2011 100
BBB  eee  mar-2011  80
BBB  eee  apr-2011 100
BBB  eee  may-2011  90
;
 

/create a file with one record for each ProductArea Product_Level_1 combination*/
proc sort data=have out=missing nodupkey;
  by ProductArea Product_Level_1;
run;
 

/*create macro variables for the 2 dates of concern*/
%let first=01jun2011;
%let last=01oct2011;
 

/*expand the file missing to include a record for every month in the range*/
data missing;
 set missing;
  by ProductArea Product_Level_1;
  Request_Quantity=0;
  do Request_Period_DT="&first."d to "&last."d;
    if day(Request_Period_DT) eq 1 then output;
  end;
run;
 

/*merge have and the expanded missing*/
data want;
  merge missing (in=ina) have (in=inb);
  by ProductArea Product_Level_1 Request_Period_DT;
  if inb or (ina and not inb);
run;

*proc timeseries;

proc sort data=have;
by ProductArea Product_Level_1 Request_Period_DT;
run;
proc timeseries data=have out=want;
id Request_Period_DT interval=month accumulate=total start="01NOV2010"d end="01SEP2011"d setmissing=0;
var Request_Quantity;
by ProductArea Product_Level_1;
run;

2.补齐var

data have;
infile cards;
input Year     Month     Day     Hour     Var $;
cards;
2011     1     1     1     A
2011     1     1     1     B
2011     1     1     1     C
2011     1     1     1     D
2011     1     1     1     E
2011     1     1     2     A
2011     1     1     2     B
2011     1     1     2     D
2011     1     1     3     C
2011     1     1     3     D
2011     1     1     3     E
2011     1     1     4     A
2011     1     1     4     B
2011     1     1     4     D
2011     1     1     4     E
;
data want (drop=_:);
length _t $40;
retain _t;
do until (last.hour);
set have;
by year month day hour;
if first.hour then call missing (_t);
_t=cats(_t,var);
end;

do until (last.hour);
set have;
length _t $40;
by year month day hour;
output;
if last.hour then do _n_=1 to lengthn(compress('ABCDE',_t));
  var=substr(compress('ABCDE',_t),_n_,1);
  output;
end;
end;
run;
proc print;run;
*proc summary;data class;  
   if 0 then set have(keep=var);  
   input var @@;  
  cards;  
A B C D E F  
;  
 run;  
proc summary data=have nway classdata=class;  
   by year--hour;  
   class var;  
   output out=expand;  
   run;  
proc print; run;

抱歉!评论已关闭.