1.利用均值/最小值/最大值等进行插补
PROC SQL noprint; create table sample as select * ,CASE _value_ when . then MEAN(_value_) else _value_ end as value from _temp_ GROUP BY patient; Quit;
2.利用最常出现的数据的均值/最小值/最大值等进行插补
/*计算数值频率*/
proc sql noprint; create table FreqVals as select patient, freq(_value_) as frq, _value_ from _temp_ where _value_ is not null group by patient, _value_; /*计算符合条件的数值均值*/ create table Target as select patient, MEAN(_value_) as _value_ from (select distinct patient, _value_ from FreqVals group by patient having frq eq max(frq) ) GROUP BY patient order by patient; /*进行缺失值插补替换 */ create table sample as select e.* , case e._value_ when . then f._value_ else e._value_ end as value from _temp_ as e left join Target as f on f.patient eq e.patient order by patient, time; quit;
3.利用趋势进行插补,包括延后/提前/左右临近值插补
proc sort data=_temp_; by patient DESCENDING time; data _temp_; set _temp_; by patient DESCENDING time; retain BackWard; if first.patient then BackWard=.; if _value_ ne . then BackWard=_value_; run; proc sort data=_temp_; by patient time; data _temp_; set _temp_; by patient time; retain ForWard; if first.patient then ForWard=.; if _value_ ne . then ForWard=_value_; run; data sample (drop=ForWard BackWard); set _temp_; if _value_ ne . then value=_value_; else value=(ForWard); if value eq . then value = max(ForWard, BackWard); run;
4.基于样本均值的缺失值的随机插补,类似与多重插补,
proc sort data=_temp_; by patient ; run; proc means data=_temp_ noprint; var _value_; by patient; output out=Target mean=m std=s; run; proc sql noprint; create table sample as select e.* ,case e._value_ when . then f.m+f.s*rannor(0) else e._value_ end as value from _temp_ as e left join Target as f on f.patient eq e.patient order by patient, time; quit;
代码摘自<A SAS® Macro for Single Imputation>一文