现在的位置: 首页 > 综合 > 正文

抽样之proc surveyselect

2018年10月23日 ⁄ 综合 ⁄ 共 5173字 ⁄ 字号 评论关闭

现摘录SAS文档中的代码:

 

/* Generate Sampling Frame -------------------------------------*/

data Customer1;
   input CustomerID State$ Type$ Usage;
   format CustomerID SSN11.;
   datalines;
416874322 AL New 839
288139763 GA Old 224
339008654 GA Old 2451
118980542 GA New 349
421670342 FL New 562
623189201 SC New 68
324550324 FL Old 137
832902397 AL Old 1563
586450178 GA New 615
801245317 SC New 728
run;
data Customer2;
   drop n;
   format CustomerID SSN11.;
   state = 'GA';
   Type = 'New';
   do n=1 to 3486;
      CustomerID = floor(1e9 * ranuni(123));
      Usage = exp(5+1.35*rannor(456));
      output; end;
   Type = 'Old';
   do n= 1 to 1938;
      CustomerID = floor(1e9 * ranuni(123));
      Usage = exp(5.2+1.4*rannor(456));
      output; end;
   state = 'AL';
   Type = 'New';
   do n=1 to 1237;
      CustomerID = floor(1e9 * ranuni(123));
      Usage = exp(5+1.35*rannor(456));
      output; end;
   Type = 'Old';
   do n=1 to 705;
      CustomerID = floor(1e9 * ranuni(123));
      Usage = exp(5.2+1.4*rannor(456));
      output; end;
   state = 'FL';
   Type = 'New';
   do n=1 to 2169;
      CustomerID = floor(1e9 * ranuni(123));
      Usage = exp(5+1.35*rannor(456));
      output; end;
   Type = 'Old';
   do n=1 to 1369;
      CustomerID = floor( 1e9 * ranuni(6) );
      Usage = exp(5.2+1.4*rannor(1));
      output; end;
   state = 'SC';
   Type = 'New';
   do n=1 to 1682;
      CustomerID = floor( 1e9 * ranuni(7) );
      Usage = exp(5+1.35*rannor(1));
      output; end;
   Type = 'Old';
   do n=1 to 875;
      CustomerID = floor( 1e9 * ranuni(8) );
      Usage = exp(5.2+1.4*rannor(1));
      output; end;
;

proc sort data=Customer2;
   by CustomerID;
run;

data Customers; set Customer1 Customer2;
   format Usage 6.0;
   if Usage < 0 then Usage = 0;
;

title1 'Customer Satisfaction Survey';
title2 'First 10 Observations';
proc print data=Customers(obs=10);
run;

/* Simple Random Sampling --------------------------------------*/

title2 'Simple Random Sampling';
proc surveyselect data=Customers
   method=srs n=100
   seed=39647 out=SampleSRS;
run;

title2 'Sample of 100 Customers, Selected by SRS';
title3 '(First 20 Observations)';
proc print data=SampleSRS(obs=20);
run;

/* Stratified Sampling -----------------------------------------*/

proc sort data=Customers;
   by State Type;
run;

title2 'Strata of Customers';
proc freq data=Customers;
   tables State*Type;
run;

title2 'Stratified Sampling';
proc surveyselect data=Customers
      method=srs n=15
      seed=1953 out=SampleStrata;
   strata State Type;
run;

title2 'Sample Selected by Stratified Design';
title3 '(First 30 Observations)';
proc print data=SampleStrata(obs=30);
run;

/* Stratified Sampling with Control Sorting --------------------*/

title2 'Stratified Sampling with Control Sorting';
proc surveyselect data=Customers
      method=sys rate=.02
      seed=1234 out=SampleControl;
   strata State;
   control Type Usage;
run;
/* Proportional Allocation Among Strata ------------------------*/

title1 'Customer Satisfaction Survey';
title2 'Proportional Allocation';
proc surveyselect data=Customers
      n=1000 out=SampleSizes;
   strata State Type / alloc=prop nosample;
run;

proc print data=SampleSizes;
run;
/* Replicated Sampling -----------------------------------------*/

title1 'Customer Satisfaction Survey';
title2 'Replicated Sampling';
proc surveyselect data=Customers
      method=seq n=(8 12 20 10)
      rep=4
      seed=40070 out=SampleRep;
   strata State;
   control Type Usage;
run;

title2 'Sample Selected by Replicated Design';
title3 '(First Stratum)';
proc print data=SampleRep;
   where State = 'AL';
run;

/* Sampling Frame ----------------------------------------------*/

data TravelExpense;
   input ID$ Amount @@;
   if (Amount < 500) then Level='1_Low ';
      else if (Amount > 1500) then Level='3_High';
      else Level='2_Avg ';
   datalines;
110  237.18   002  567.89   234  118.50
743   74.38   411 1287.23   782  258.10
216  325.36   174  218.38   568 1670.80
302  134.71   285 2020.70   314   47.80
139 1183.45   775  330.54   425  780.10
506  895.80   239  620.10   011  420.18
672  979.66   142  810.25   738  670.85
192  314.58   243   87.50   263 1893.40
496  753.30   332  540.65   486 2580.35
614  230.56   654  185.60   308  688.43
784  505.14   017  205.48   162  650.42
289 1348.34   691   30.50   545 2214.80
517  940.35   382  217.85   024  142.90
478  806.90   107  560.72
;

proc sort data=TravelExpense;
   by Level;
run;

title 'Travel Expense Audit';
proc print data=TravelExpense;
run;

/* PPS (Dollar-Unit) Sampling ----------------------------------*/

title2 'Stratified PPS (Dollar-Unit) Sampling';
proc surveyselect data=TravelExpense
      method=pps n=(6 10 4)
      seed=47279 out=AuditSample;
   size Amount;
   strata Level;
run;

title2 'Sample Selected by Stratified PPS Design';
proc print data=AuditSample;
run;

 

/* Sampling Frame ----------------------------------------------*/

data HospitalFrame;
   input Hospital$ Type$ SizeMeasure @@;
   if (SizeMeasure < 20) then Size='Small ';
      else if (SizeMeasure < 50) then Size='Medium';
      else Size='Large ';
   datalines;
034 Rural  0.870   107 Rural  1.316
079 Rural  2.127   223 Rural  3.960
236 Rural  5.279   165 Rural  5.893
086 Rural  0.501   141 Rural 11.528
042 Urban  3.104   124 Urban  4.033
006 Urban  4.249   261 Urban  4.376
195 Urban  5.024   190 Urban 10.373
038 Urban 17.125   083 Urban 40.382
259 Urban 44.942   129 Urban 46.702
133 Urban 46.992   218 Urban 48.231
026 Urban 61.460   058 Urban 65.931
119 Urban 66.352
;

title 'Hospital Utilization Survey';
title2 'Sampling Frame, Region 1';
proc print data=HospitalFrame;
run;

/* PPS Selection of Two Units Per Stratum ----------------------*/

title2 'Stratified PPS Sampling';
proc surveyselect data=HospitalFrame
      method=pps_brewer
      seed=48702 out=SampleHospitals;
   size SizeMeasure;
   strata Type Size notsorted;
run;

title2 'Sample Selected by Stratified PPS Design';
proc print data=SampleHospitals;
run;

抱歉!评论已关闭.