现摘录SAS文档中的代码:
/* Generate Sampling Frame -------------------------------------*/
data Customer1;
input CustomerID State$ Type$ Usage;
format CustomerID SSN11.;
datalines;
416874322 AL New 839
288139763 GA Old 224
339008654 GA Old 2451
118980542 GA New 349
421670342 FL New 562
623189201 SC New 68
324550324 FL Old 137
832902397 AL Old 1563
586450178 GA New 615
801245317 SC New 728
run;
data Customer2;
drop n;
format CustomerID SSN11.;
state = 'GA';
Type = 'New';
do n=1 to 3486;
CustomerID = floor(1e9 * ranuni(123));
Usage = exp(5+1.35*rannor(456));
output; end;
Type = 'Old';
do n= 1 to 1938;
CustomerID = floor(1e9 * ranuni(123));
Usage = exp(5.2+1.4*rannor(456));
output; end;
state = 'AL';
Type = 'New';
do n=1 to 1237;
CustomerID = floor(1e9 * ranuni(123));
Usage = exp(5+1.35*rannor(456));
output; end;
Type = 'Old';
do n=1 to 705;
CustomerID = floor(1e9 * ranuni(123));
Usage = exp(5.2+1.4*rannor(456));
output; end;
state = 'FL';
Type = 'New';
do n=1 to 2169;
CustomerID = floor(1e9 * ranuni(123));
Usage = exp(5+1.35*rannor(456));
output; end;
Type = 'Old';
do n=1 to 1369;
CustomerID = floor( 1e9 * ranuni(6) );
Usage = exp(5.2+1.4*rannor(1));
output; end;
state = 'SC';
Type = 'New';
do n=1 to 1682;
CustomerID = floor( 1e9 * ranuni(7) );
Usage = exp(5+1.35*rannor(1));
output; end;
Type = 'Old';
do n=1 to 875;
CustomerID = floor( 1e9 * ranuni(8) );
Usage = exp(5.2+1.4*rannor(1));
output; end;
;
proc sort data=Customer2;
by CustomerID;
run;
data Customers; set Customer1 Customer2;
format Usage 6.0;
if Usage < 0 then Usage = 0;
;
title1 'Customer Satisfaction Survey';
title2 'First 10 Observations';
proc print data=Customers(obs=10);
run;
/* Simple Random Sampling --------------------------------------*/
title2 'Simple Random Sampling';
proc surveyselect data=Customers
method=srs n=100
seed=39647 out=SampleSRS;
run;
title2 'Sample of 100 Customers, Selected by SRS';
title3 '(First 20 Observations)';
proc print data=SampleSRS(obs=20);
run;
/* Stratified Sampling -----------------------------------------*/
proc sort data=Customers;
by State Type;
run;
title2 'Strata of Customers';
proc freq data=Customers;
tables State*Type;
run;
title2 'Stratified Sampling';
proc surveyselect data=Customers
method=srs n=15
seed=1953 out=SampleStrata;
strata State Type;
run;
title2 'Sample Selected by Stratified Design';
title3 '(First 30 Observations)';
proc print data=SampleStrata(obs=30);
run;
/* Stratified Sampling with Control Sorting --------------------*/
title2 'Stratified Sampling with Control Sorting';
proc surveyselect data=Customers
method=sys rate=.02
seed=1234 out=SampleControl;
strata State;
control Type Usage;
run;
/* Proportional Allocation Among Strata ------------------------*/
title1 'Customer Satisfaction Survey';
title2 'Proportional Allocation';
proc surveyselect data=Customers
n=1000 out=SampleSizes;
strata State Type / alloc=prop nosample;
run;
proc print data=SampleSizes;
run;
/* Replicated Sampling -----------------------------------------*/
title1 'Customer Satisfaction Survey';
title2 'Replicated Sampling';
proc surveyselect data=Customers
method=seq n=(8 12 20 10)
rep=4
seed=40070 out=SampleRep;
strata State;
control Type Usage;
run;
title2 'Sample Selected by Replicated Design';
title3 '(First Stratum)';
proc print data=SampleRep;
where State = 'AL';
run;
/* Sampling Frame ----------------------------------------------*/
data TravelExpense;
input ID$ Amount @@;
if (Amount < 500) then Level='1_Low ';
else if (Amount > 1500) then Level='3_High';
else Level='2_Avg ';
datalines;
110 237.18 002 567.89 234 118.50
743 74.38 411 1287.23 782 258.10
216 325.36 174 218.38 568 1670.80
302 134.71 285 2020.70 314 47.80
139 1183.45 775 330.54 425 780.10
506 895.80 239 620.10 011 420.18
672 979.66 142 810.25 738 670.85
192 314.58 243 87.50 263 1893.40
496 753.30 332 540.65 486 2580.35
614 230.56 654 185.60 308 688.43
784 505.14 017 205.48 162 650.42
289 1348.34 691 30.50 545 2214.80
517 940.35 382 217.85 024 142.90
478 806.90 107 560.72
;
proc sort data=TravelExpense;
by Level;
run;
title 'Travel Expense Audit';
proc print data=TravelExpense;
run;
/* PPS (Dollar-Unit) Sampling ----------------------------------*/
title2 'Stratified PPS (Dollar-Unit) Sampling';
proc surveyselect data=TravelExpense
method=pps n=(6 10 4)
seed=47279 out=AuditSample;
size Amount;
strata Level;
run;
title2 'Sample Selected by Stratified PPS Design';
proc print data=AuditSample;
run;
/* Sampling Frame ----------------------------------------------*/
data HospitalFrame;
input Hospital$ Type$ SizeMeasure @@;
if (SizeMeasure < 20) then Size='Small ';
else if (SizeMeasure < 50) then Size='Medium';
else Size='Large ';
datalines;
034 Rural 0.870 107 Rural 1.316
079 Rural 2.127 223 Rural 3.960
236 Rural 5.279 165 Rural 5.893
086 Rural 0.501 141 Rural 11.528
042 Urban 3.104 124 Urban 4.033
006 Urban 4.249 261 Urban 4.376
195 Urban 5.024 190 Urban 10.373
038 Urban 17.125 083 Urban 40.382
259 Urban 44.942 129 Urban 46.702
133 Urban 46.992 218 Urban 48.231
026 Urban 61.460 058 Urban 65.931
119 Urban 66.352
;
title 'Hospital Utilization Survey';
title2 'Sampling Frame, Region 1';
proc print data=HospitalFrame;
run;
/* PPS Selection of Two Units Per Stratum ----------------------*/
title2 'Stratified PPS Sampling';
proc surveyselect data=HospitalFrame
method=pps_brewer
seed=48702 out=SampleHospitals;
size SizeMeasure;
strata Type Size notsorted;
run;
title2 'Sample Selected by Stratified PPS Design';
proc print data=SampleHospitals;
run;