现在的位置: 首页 > 综合 > 正文

Longitudinal data: from incomplete to complete sequence

2018年10月23日 ⁄ 综合 ⁄ 共 3265字 ⁄ 字号 评论关闭

http://www.mathkb.com/Uwe/Forum.aspx/sas/3866/Longitudinal-data-from-incomplete-to-complete-sequence

该问题主要针对有缺失年份(月,日)的时间序列数据,对缺失年份的数据进行填补。

形如:将表一数据,通过填补得到表二数据

TABLE 1
patid  drug  date    daysupply
1      A     199901  20
1      A     199902   0
1      A     199903  10
1      A     199904  20
1      A     199906  20
1      A     199907   0
...
TABLE 2
patid  drug  date    daysupply
1      A     199901  20
1      A     199902   0
1      A     199903  10
1      A     199904  20
1      A     199905   0   <- Added
1      A     199906  20
1      A     199907   0

....

下面是牛人解决方法,案例数据有所出入,但疏通同归。

Philip_Crane

 

/* Read data convert the year and month to a SAS date to make gaps */
/* that may span years easier to handle.                               */

data test(drop=year mth);
  length drug $ 1.;
  input patid
      drug
      yymm
      days    ;
  year = floor(yymm / 100);
  mth  = mod(yymm,10);
  date = mdy(mth,1,year);
cards;
1      A     199901  20
1      A     199902   0
1      A     199903  10
1      A     199904  20
1      A     199906  20
1      A     199907   0
2      A     199901  20
2      A     199904  20
3      A     199809  13
3      A     199902  17
run;

/* Sort and add a record counter.                               */

proc sort data=test;
  by patid drug date;
run;

data test;
  set test;
  by patid drug date;
  cnt = _n_;
run;

/* Combine the dataset with its self to build records with a  */
/* from and to date.                                            */

proc sql;
  create table test as
  select l.patid,
       l.drug,
       l.date        as date_to,
       l.days        as days_to,
       r.date        as date_frm,
       r.days        as days_frm
    from test      l
    left join
       test      r
      on l.patid = r.patid
     and l.drug = r.drug
   where l.cnt = r.cnt + 1
   order by patid,
          drug,
          date_frm
       ;
quit;

/* Identify the gaps between the from and to dates and output   */
/* records with zero day count.                                 */

data test(keep=patid drug date days gap);
  set test;
  by patid drug date_frm;
  gap = intck('month',date_frm,date_to);
  date = date_frm;
  days = days_frm;
  output;
  if gap > 1
     then do count = 1 to gap - 1;
      date = intnx('month',date_frm,count);
      days = 0;
      output;
      end;
  if last.drug
     then do;
        date = date_to;
        days = days_to;
        output;
      end;
run;

proc print data=test;
  format date ddmmyys10.;
run;

 Sigurd Hermansen的思路

/* Longitudinal study framework. */
data test;
 input patid  drug: $1.  date: yymmn6.   daysupply ;
cards;
1      A     199901  20
1      C     199902   0
2      B     199903  10
2      D     199904  20
3      C     199906  20
3      A     199907   0
;
run;
/* Informats prove particularly useful in INPUT statements. */
/* Dates */
data dates (drop=i);
 do i=0 to 11;
   date=intnx('month','01Jan1999'd,i);
       put date mmddyy10. ;
       output;
 end;
run;
/* Functions and loops facilitate construction of frameworks. */
proc sql;
/* Patient-Drug combinations linked to dates. */
create table frame as
select t1.patid,t1.drug,t1.date format=yymmn6.,
      coalesce(t2.daySupply,0) as daySupply
from (select * from /*Cartesian product */
     (select distinct patid,drug,daySupply from test) as t1,
     (select date from dates)
       ) as t1
         left join
         test as t2
         on t1.patid=t2.patid and t1.drug=t2.drug and t1.date=t2.date
;
quit;
/* SAS SQL includes different joins, including a Cartesian product and a
LEFT JOIN. The different joins have specific functions, and the functions
cover the range of operations a programmer needs to combine data from
different sources. The COALESCE() function and formats display data
appropriately. */

Kevin Roland Viel

这个只解决了1999年情况

data _null_;
 call symput("start",  input ( "01Jan1999" , date9. ) );
 call symput("end",    input ( "31Dec2002" , date9. ) );
run;

data filled_supply;
 array mon ( &start. : &end. ) _temporary_ ;

 do until ( last.drug ) ;
   set supply end = end ;
       by patid drug ;
   mon( date ) = daysupply ;
 end;

 do _n_ = 0 to 11 ;
   date = intnx( "month" , "01Jan1999"d , _n_ ) ;
       if mon ( date ) ne . then daysupply = mon ( date ) ;
         else daysupply = 0 ;
       output;
 end ;
run ;

 

data _null_;

一直没发现proc 也有这功能 

data have;
  input obs id:$1. week $ value;
  cards;
1 A week1 30
2 A week3 60
3 B week2 10
4 C week1 20
;;;;
run;
data week;
  if 0 then set have(keep=week);
  do week='week1','week2','week3','week4';
     output;
     end;
  stop;
run;
proc summary nway data=have classdata=week;
  by id;
  class week;
  freq value;
  output out=expanded(drop=_:) n(value)=;
run;
proc print;
run;

 

抱歉!评论已关闭.