现在的位置: 首页 > 综合 > 正文

缺失值简单插补方法

2018年10月23日 ⁄ 综合 ⁄ 共 1509字 ⁄ 字号 评论关闭

1.利用均值/最小值/最大值等进行插补

PROC SQL noprint;
   create table sample as
   select *
          ,CASE _value_
              when . then MEAN(_value_) else _value_
           end as value
   from _temp_
   GROUP BY patient;
Quit;

2.利用最常出现的数据的均值/最小值/最大值等进行插补

/*计算数值频率*/
proc sql noprint;
   create table FreqVals as
   select patient, freq(_value_) as frq, _value_
      from _temp_
      where _value_ is not null
      group by patient, _value_;
/*计算符合条件的数值均值*/
   create table Target as
   select patient, MEAN(_value_) as _value_
      from (select distinct patient, _value_
              from FreqVals
              group by patient
              having frq eq max(frq) )
              GROUP BY patient
              order by patient;
/*进行缺失值插补替换 */
   create table sample as
   select e.*
          , case e._value_
            when . then f._value_ else e._value_
            end as value
      from _temp_ as e left join Target as f
      on f.patient eq e.patient
      order by patient, time;
quit;

3.利用趋势进行插补,包括延后/提前/左右临近值插补

proc sort data=_temp_;
   by patient DESCENDING time;
data _temp_;
   set _temp_;
      by patient DESCENDING time;
      retain BackWard;
      if first.patient then BackWard=.;
      if _value_ ne . then BackWard=_value_;
run;
proc sort data=_temp_;
     by patient time;
data _temp_;
     set _temp_;
        by patient time;
        retain ForWard;
        if first.patient then ForWard=.;
        if _value_ ne . then ForWard=_value_;
run;
data sample (drop=ForWard BackWard);
     set _temp_;
        if _value_ ne . then value=_value_;
        else value=(ForWard); if value eq . then value = max(ForWard, BackWard);
run;

4.基于样本均值的缺失值的随机插补,类似与多重插补,

proc sort data=_temp_;
   by patient ;
run;
proc means data=_temp_ noprint;
   var _value_;
   by patient;
   output out=Target mean=m std=s;
run;
proc sql noprint;
   create table sample as
   select e.*
          ,case e._value_ when .
              then f.m+f.s*rannor(0)
           else e._value_
           end as value
      from _temp_ as e left join Target as f
   on f.patient eq e.patient
   order by patient, time;
quit;

代码摘自<A SAS® Macro for Single Imputation>一文

抱歉!评论已关闭.