现在的位置: 首页 > 综合 > 正文

SAS中的HASH语句及其常见应用

2018年10月22日 ⁄ 综合 ⁄ 共 5105字 ⁄ 字号 评论关闭

首先,应用HASH有以下几点优点

• Key lookup occurs in memory, avoiding costly disk access.
• When a key lookup occurs, only a small subset of the records are searched.
• The key and data parts of a record can consist of more than one value, removing the need to format and concatenate values to construct the key and data parts.
• The hash object allocates memory as records are added. That is, the hash object only allocates as much memory as it needs and the number of records that can be stored is only limited by the amount of memory available to SAS.
• When loading a hash object from a data set, the data set need not be sorted or indexed.

下面介绍如何定义hash对象

The hash table methods are the functions it can perform, namely:

例:

1.HASH OBJECT LOAD AND LOOKUP

data participants;
input name $ gender:$1. treatment $;
datalines;
John M Placebo
Ronald M Drug-A
Barbara F Drug-B
Alice F Drug-A
;
data weight(drop=i);
input date:DATE9. @;
do i = 1 to 4;
input name $ weight @;
output;
end;
/* For brevity, only two dates are listed below */
datalines;
05May2006 Barbara 125 Alice 130 Ronald 170 John 160
04Jun2006 Barbara 122 Alice 133 Ronald 168 John 155
;
data results;
   length name treatment $ 8 gender $ 1;
   if _N_ = 1 then do;
   declare hash h(dataset:'participants'); /*Create and name the hash object*/
      h.defineKey('name');                 /*Specify key variables*/
      h.defineData('gender', 'treatment'); /*Specify data variables*/
      h.defineDone();                      /*Complete the definition*/
   end;
   set weight;
   if h.find() = 0 then output;
run;
proc print data=results;
   format date DATE9.;
   var date name gender weight treatment;
run;

2.ADD, REPLACE, AND OUTPUT

data goals;
input player $ when & $9.;
datalines;
Hill 1st 01:24
Jones 1st 09:43
Santos 1st 12:45
Santos 2nd 00:42
Santos 2nd 03:46
Jones 2nd 11:15
;
data _null_;
   length goals_list $ 64;
   if _N_ = 1 then do;
   declare hash h();
      h.defineKey('player');
      h.defineData('player', 'goals_list');
      h.defineDone();
   end;
   set goals end=done;
   if h.find() ^= 0 then do;  /*determines if a record exists in the hash object*/
      goals_list = when;
      h.add();                /*add the player to the hash object*/
   end;
   else do;
      goals_list = trim(goals_list) || ', ' || when;
      h.replace();            /*replace the player's old list with the new list*/
   end;
   if done then h.output(dataset:'goal_summary'); /*outputs data variables*/
run;
proc print data=goal_summary;
run;

HASH的应用

1.HITER: HASH ITERATOR OBJECT

data sample ;
input k sat ;
cards ;
185 01
971 02
400 03
260 04
922 05
970 06
543 07
532 08
050 09
067 10
;
run ;
/*HASH ITERATOR OBJECT*/
data _null_ ;
   if 0 then set sample ;
   dcl hash hh ( dataset: 'sample', hashexp: 8, ordered: 'a') ;/*‘a’= ascending*/
   dcl hiter hi ( 'hh' ) ;
   hh.DefineKey ( 'k' ) ;
   hh.DefineData ( 'sat' , 'k' ) ;
   hh.DefineDone () ;
   do rc = hi.first () by 0 while ( rc = 0 ) ;/*fetches the smallest key into the host variable K 
   and the corresponding satellite - into the host variable SAT*/
      put k = z3. +1 sat = z2. ;
      rc = hi.next () ; /*fetch the hash entry with the next key in ascending order*/
   end ;
   put 13 * '-' ;
   do rc = hi.last () by 0 while ( rc = 0 ) ;/*fetches the entry with the largest key*/
      put k = z3. +1 sat = z2. ;
      rc = hi.prev () ;
   end ;
   stop ;
run ;

/* Array Sorting */
data _null_ ;
   array a (-100000 : 100000) _temporary_ ;
   array b (-100000 : 100000) _temporary_ ;
   do j = lbound (a) to hbound (a) ;
      a (j) = ceil ( ranuni (1) * 1e5 ) ;
      b (j) = j ;
   end ;
   length ka 8 sb 8 ;
   declare hash hh (hashexp: 0, ordered: ‘a’ ) ;
   declare hiter hi ( 'hh' ) ;
           hh.DefineKey ( 'ka' ) ;
           hh.DefineData ( 'ka' , 'sb' ) ;
           hh.DefineDone () ;
   do j = lbound(a) to hbound(a) ;
      ka = a (j) ;
   if hh.check () = 0 then continue ;
      sb = b (j) ;
      n_unique ++ 1 ;
      hh.add () ;
   end ;
* sort ascending ;
   rc = hi.first () ;
   do j = lbound (a) by 1 while ( rc = 0 ) ;
      a (j) = ka ;
      b (j) = sb ;
      rc = hi.next () ;
   end ;
* sort descending ;
   rc = hi.last() ;
   do j = lbound(a) by 1 while ( rc = 0 ) ;
      a (j) = ka ;
      b (j) = sb ;
      rc = hi.prev() ;
   end ;
   stop ;
run ;

 

2.Summarizing Without Summary

data input ;
   do k1 = 1e6 to 1 by -1 ; 
      k2 = put (k1, z7.) ;
      do num = 1 to ceil (ranuni(1) * 6) ;
         output ;
      end ;
   end ;
run ;
/*summary*/
proc summary data = input nway ;
   class k1 k2 ;
   var num ;
   output out = summ_sum (drop = _:) sum = sum ;
run ;
/*hash*/
data _null_ ;
   if 0 then set input ;
   dcl hash hh (hashexp:16) ;
       hh.definekey ('k1', 'k2' ) ;
       hh.definedata ('k1', 'k2', 'sum') ;
       hh.definedone () ;
   do until (eof) ;
   set input end = eof ;
   if hh.find () ne 0 then sum = 0 ;
      sum ++ num ;
      hh.replace () ;
   end ;
   rc = hh.output (dataset: 'hash_sum') ;
run ;

3.SPLITTING A SAS FILE DYNAMICALLY USING THE .OUTPUT() METHOD

/*example 1:根据id将数据拆分成若干数据*/
data sample ;
input id transid amt ;
cards ;
1 11 40
1 11 26
1 12 97
2 13 5
2 13 7
2 14 22
3 14 1
4 15 43
4 15 81
5 11 86
5 11 85
;
run ;
/*proc sql*/
proc sql noprint ;
   select distinct 'OUT' || put (id, best.-l)
   into : dslist
   separated by ' '
   from sample ;
   select 'WHEN (' || put (id, best.-l) || ') OUTPUT OUT' || put (id, best.-l)
   into : whenlist
   separated by ';'
   from sample;
quit ;
proc sort data = sample ;
   by id transid amt ;
run ;
data &dslist ;
   set sample ;
   select ( id ) ;
   &whenlist ;
   otherwise ;
   end;
run ;
/* HASH */
data _null_ ;
   dcl hash hid (ordered: 'a') ;
       hid.definekey ('id', 'transid', 'amt', '_n_') ;
       hid.definedata ('id', 'transid', 'amt' ) ;
       hid.definedone ( ) ;
   do _n_ = 1 by 1 until ( last.id ) ;
   set sample ;
   by id ;
   hid.add() ;
   end ;
   hid.output (dataset: 'OUT' || put (id, best.-l)) ;
run ;

/*example2:未排序数据的拆分*/
data sample ;
input id transid amt ;
cards ;
5 11 86
2 14 22
1 12 97
3 14 1
4 15 43
2 13 5
2 13 7
1 11 40
4 15 81
5 11 85
1 11 26
;
run ;
data _null_ ;
   dcl hash hoh (ordered: 'a') ;
   dcl hiter hih ('hoh' ) ;
       hoh.definekey ('id' ) ;
       hoh.definedata ('id', 'hh' ) ;
       hoh.definedone () ;
   dcl hash hh () ;
   do _n_ = 1 by 1 until ( eof ) ;
      set sample end = eof ;
      if hoh.find () ne 0 then do ;
         hh = _new_ hash (ordered: 'a') ;
         hh.definekey ('id','transid', '_n_') ;
         hh.definedata ('id','transid', 'amt') ;
         hh.definedone () ;
         hoh.replace () ;
      end ;
      hh.replace() ;
   end ;
   do rc = hih.next () by 0 while ( rc = 0 ) ;
      hh.output (dataset: 'out'|| put (id, best.-L)) ;
      rc = hih.next() ;
   end ;
   stop ;
run ;

 

 

声明:文章主要摘录《Hash Component Objects:Dynamic Data Storage and Table Look-Up》和《Data Step Hash Objects as Programming Tools》

相关文献:

《How to Implement the SAS® DATA Step Hash Object》

《An Introduction to SAS® Hash Programming Techniques》

《Getting Started with the DATA Step Hash Object》

抱歉!评论已关闭.