现在的位置: 首页 > 综合 > 正文

分类数据之logistic回归

2018年10月20日 ⁄ 综合 ⁄ 共 4234字 ⁄ 字号 评论关闭
/*
分类变量分析之logistc(一),因变量为二分类变量
数据coronary中ca为二分类因变量,sex、ecg为二分类自变量,所有的二分类变量
用0、1进行区别,构成(0,1)矩阵
*/
data coronary;
   input sex ecg ca count @@;
   datalines;
0 0 0 11 0 0 1 4
0 1 0 10 0 1 1 8
1 0 0 9 1 0 1 9
1 1 0 6 1 1 1 21
;
*scale选项用于对过度离散数据校正; descending,应变量ca按降序排序,
 sas中按y=1的概率建模,即ordered value为1对应y的取值
 output语句设置输出结果,这里结果存在predict数据中,预测值为prob
;
proc logistic data=coronary descending;
   freq count;
   model ca=sex ecg / scale=none aggregate;
   output out=predict pred=prob;
run;
proc print data=predict;run;

*ods select:考虑交叉影响的参数估计剥离;
ods select FitStatistics ParameterEstimates;
proc logistic descending;
   freq count;
   model ca=sex ecg sex*ecg;
run;

*二、条件变量,class;
*sentence是二分类因变量,type、prior为二分类字符型自变量;
data sentence;
   input type $ prior $ sentence $ count @@;
   datalines;
nrb some y 42 nrb some n 109
nrb none y 17 nrb none n 75
other some y 33 other some n 175
other none y 53 other none n 359
;
*class:对分类变量进行0-1处理,
 ref= :设置参照水平,这里ref=first表示some作为参照水平
 scale:指定离散参数估算方法,校正离散情况,给出“偏差和 Pearson 拟合优度统计量” 
 aggregate:设置皮尔逊卡方检验统计量
;
proc logistic data=sentence descending;
   class type prior(ref=first) / param=ref;
   freq count;
   model sentence = type prior / scale=none aggregate;
run;
*拟合优度剥离;
ods select GoodnessOfFit;
proc logistic descending;
   class type prior (ref=first) / param=ref;
   freq count;
   model sentence = type / scale=none aggregate=(type prior);
run;
*从sas结果中剥离分类水平、拟合优度、参数估计、似然比情况,单独显示;
ods select ClassLevelInfo GoodnessOfFit 
           ParameterEstimates OddsRatios;
proc logistic data=sentence descending;
   class type prior(ref='none');
   freq count;
   model sentence = type prior / scale=none aggregate;
run;

*三、自变量是定性变量;
data uti;
   input diagnosis : $13. treatment $ response $ count @@;
   datalines;
complicated A cured 78 complicated A not 28
complicated B cured 101 complicated B not 11
complicated C cured 68 complicated C not 46
uncomplicated A cured 40 uncomplicated A not 5
uncomplicated B cured 54 uncomplicated B not 5
uncomplicated C cured 34 uncomplicated C not 6
;
run;
ods select FitStatistics;
proc logistic;
   freq count;
   class diagnosis treatment /param=ref;
   model response = diagnosis|treatment;
run;

ods select FitStatistics GoodnessOfFit
           TypeIII OddsRatios;
proc logistic;
   freq count;
   class diagnosis treatment;
   model response = diagnosis treatment /
   scale=none aggregate;
run;

*clodds:计算似然比的置信区间
 clparm: 计算参数的置信区间
;
ods select ClparmPL CloddsPL;
proc logistic;
   freq count;
   class diagnosis treatment;
   model response = diagnosis treatment /
   scale=none aggregate clodds=pl clparm=pl;
run;

*contrast:定制假设检验的方式,变量需要是矩阵形式
;
ods select ContrastTest ContrastEstimate;
proc logistic;
   freq count;
   class diagnosis treatment /param=ref;
   model response = diagnosis treatment;
   contrast 'B versus A' treatment -1 1
            / estimate=exp;
   contrast 'A' treatment 1 0;
   contrast 'joint test' treatment 1 0,
                         treatment 0 1;
run;

*四、自变量连续有序的情况;
data coronary;
   input sex ecg age ca @@ ;
   datalines;
0 0 28 0 1 0 42 1 0 1 46 0 1 1 45 0
0 0 34 0 1 0 44 1 0 1 48 1 1 1 45 1
0 0 38 0 1 0 45 0 0 1 49 0 1 1 45 1
0 0 41 1 1 0 46 0 0 1 49 0 1 1 46 1
0 0 44 0 1 0 48 0 0 1 52 0 1 1 48 1
0 0 45 1 1 0 50 0 0 1 53 1 1 1 57 1
0 0 46 0 1 0 52 1 0 1 54 1 1 1 57 1
0 0 47 0 1 0 52 1 0 1 55 0 1 1 59 1
0 0 50 0 1 0 54 0 0 1 57 1 1 1 60 1
0 0 51 0 1 0 55 0 0 2 46 1 1 1 63 1
0 0 51 0 1 0 59 1 0 2 48 0 1 2 35 0
0 0 53 0 1 0 59 1 0 2 57 1 1 2 37 1
0 0 55 1 1 1 32 0 0 2 60 1 1 2 43 1
0 0 59 0 1 1 37 0 1 0 30 0 1 2 47 1
0 0 60 1 1 1 38 1 1 0 34 0 1 2 48 1
0 1 32 1 1 1 38 1 1 0 36 1 1 2 49 0
0 1 33 0 1 1 42 1 1 0 38 1 1 2 58 1
0 1 35 0 1 1 43 0 1 0 39 0 1 2 59 1
0 1 39 0 1 1 43 1 1 0 42 0 1 2 60 1
0 1 40 0 1 1 44 1
;
run;
*拟合logistic模型
 selection用于选择逐步回归方法,包括forward,backward,stepwise
 include:设定每个拟合模型中包含model语句中列的因子的个数
 units :可以设置想要计算的似然比odds ratios
;
proc logistic data=coronary descending;
   model ca=sex ecg age ecg*ecg age*age
         sex*ecg sex*age ecg*age /
         selection=forward include=3 details lackfit;
run;
proc logistic descending;
   model ca=sex ecg age;
   units age=10;
run;

*五、logistic回归诊断;
data uti2;
   input diagnosis : $13. treatment $ response trials;
   datalines;
complicated A 78 106
complicated B 101 112
complicated C 68 114
uncomplicated A 40 45
uncomplicated B 54 59
uncomplicated C 34 40
;
*INFLUENCE诊断;
proc logistic data=uti2;
   class diagnosis treatment / param=ref;
   model response/trials = diagnosis treatment/influence;
run;
proc logistic data=uti2;
   class diagnosis treatment / param=ref;
   model response/trials = diagnosis/scale=none 
                                     aggregate=(treatment diagnosis) 
                                     influence 
                                     iplots;
run;

*精确logistic回归方法,exact;
data liver;
   input time $ group $ status $ count @@;
   datalines;
early antidote severe 6 early antidote not 12
early control severe 6 early control not 2
delayed antidote severe 3 delayed antidote not 4
delayed control severe 3 delayed control not 0
late antidote severe 5 late antidote not 1
late control severe 6 late control not 0
;
*estimate=both,表示对第一个exact语句中指定的变量进行精确点估计
 joint,表示对第二个exact中time、group进行联合检验
;
proc logistic descending;
   freq count;
   class time(ref='early') group(ref='control') /param=ref;
   model status = time group / scale=none aggregate clparm=wald;
   exact 'Model 1' intercept time group / estimate=both;
   exact 'Joint Test' time group / joint;
run;

【上篇】
【下篇】

抱歉!评论已关闭.