options nodate nonumber nocenter formdlim="-";

/***********************************************
   Recoding variables
***********************************************/
data faminc;
  input famid faminc1-faminc12 ;
cards;
1 3281 3413 3114 2500 2700 3500 3114 -999 3514 1282 2434 2818
2 4042 3084 3108 3150 -999 3100 1531 2914 3819 4124 4274 4471
3 6015 6123 6113 -999 6100 6200 6186 6132 -999 4231 6039 6215
;
run;
data recode_missing;
  set faminc;
  if faminc1 = -999 then faminc1 = .;
  if faminc2 = -999 then faminc2 = .;
  if faminc3 = -999 then faminc3 = .;
  if faminc4 = -999 then faminc4 = .;
  if faminc5 = -999 then faminc5 = .;
  if faminc6 = -999 then faminc6 = .;
  if faminc7 = -999 then faminc7 = .;
  if faminc8 = -999 then faminc8 = .;
  if faminc9 = -999 then faminc9 = .;
  if faminc10 = -999 then faminc10 = .;
  if faminc11 = -999 then faminc11 = .;
  if faminc12 = -999 then faminc12 = .;
 run;
proc print data = recode_missing heading= h noobs;
run;
data recode_missing;
  set faminc;
  array inc[12] faminc1 - faminc12;
  do i = 1 to 12;
    if inc[i]=-999 then inc[i]=.;
  end;
  drop i;
run;

/****************************************************************
Applying the same math computation to many variables at a time
*****************************************************************/
data score;
  input item1 item2 item3 item4;
cards;
-2   1   -3   0
-1   2   -2   1
 0  -1   -3  -1
;
run;
data score_array1;
  set score;
  array item(4) item1-item4;
  do i=1 to 4;
   item(i) = -1*item(i);
  end;
  drop i;
run;
proc print data=score_array1;
run;

/************************************************************
   computing new variables
************************************************************/
data tax_manual;
 set recode_missing;
  taxinc1 = faminc1 * .10 ;
  taxinc2 = faminc2 * .10 ;
  taxinc3 = faminc3 * .10 ;
  taxinc4 = faminc4 * .10 ;
  taxinc5 = faminc5 * .10 ; 
  taxinc6 = faminc6 * .10 ;
  taxinc7 = faminc7 * .10 ;
  taxinc8 = faminc8 * .10 ;
  taxinc9 = faminc9 * .10 ;
  taxinc10= faminc10 * .10 ;
  taxinc11= faminc11 * .10 ;
  taxinc12= faminc12 * .10 ;
run;
proc print data=tax_manual noobs heading=h;
  var famid faminc6-faminc12 taxinc6-taxinc12;
run;

data tax_array;
  set recode_missing;
  array inc(12) faminc1-faminc12; /* existing variables */
  array tax(12) taxinc1-taxinc12; /* new variables */
  do month = 1 to 12;
    tax[month] = inc[month]*0.1;
  end;
run;
proc print data=tax_array noobs;
  var famid faminc1-faminc3 taxinc1-taxinc3;
run;

/***********************************************************
Revisiting recoding variables - 
two variations of the example on recoding
************************************************************/
data test;
  set faminc;
  array inc(*) faminc:;
  do i =1 to dim(inc);
    if inc(i) = -999 then inc(i)=.;
  end;
  drop i;
run;

data test;
  set faminc;
  array a(*) _numeric_;
  do i =1 to dim(a);
    if a(i) = -999 then a(i)=.;
  end;
  drop i;
run;

/********************************************************
Identify patterns across variables using arrays
*********************************************************/
data mspatterns;
  set recode_missing;
  array inc(12) faminc1-faminc12; /* existing vars */
  nmiss = 0;
  do i = 1 to 12;
    if inc(i) = . then nmiss = nmiss + 1;
  end;
run;
proc print data=mspatterns noobs heading=H;
  var famid faminc1-faminc12 nmiss;
run;

/**********************************************************
Reshaping wide to long
**********************************************************/
data wide; 
  input famid faminc96 faminc97 faminc98 ; 
cards; 
1 40000 40500 41000 
2 45000 45400 45800 
3 75000 76000 77000 
; 
run;
data long_manual;
  set wide;
  year=96;
  faminc=faminc96;
  output;
  year=97;
  faminc=faminc97;
  output;
  year=98;
  faminc=faminc98;
  output;
run;
proc print data=long_manual;
  var famid year faminc;
run;

data problem;
  set wide;
  year=96;
  faminc=faminc96;
  *output;
  year=97;
  faminc=faminc97;
  *output;
  year=98;
  faminc=faminc98;
  output;
run;
proc print data=problem;
  var famid year faminc;
run;

data long_array;
  set wide;
  array Afaminc(96:98) faminc96 - faminc98;
  do year = 96 to 98;
   faminc = Afaminc[year];
   output;
  end;
  drop faminc96-faminc98;
run;
proc print data=long_array;
run;

data multi_wide; 
  input famid faminc96 faminc97 faminc98 spend96 spend97 spend98 
        debt96 $ debt97 $ debt98 $ ; 
cards; 
1 40000 40500 41000 38000 39000 40000 yes yes no 
2 45000 45400 45800 42000 43000 44000 yes no  no 
3 75000 76000 77000 70000 71000 72000 no  no  no 
; 
run;
data multi_long;
  set multi_wide;
  length debt $ 3;
  array Afaminc(96:98) faminc96-faminc98;
  array Aspend(96:98) spend96-spend98;
  array Adebt(96:98) debt96-debt98;
  do year = 96 to 98;
   faminc = Afaminc[year];
   spend = Aspend[year];
   debt = Adebt[year];
   output;
  end;
  drop faminc96-faminc98 spend96-spend98;
run;
proc print data=multi_long;
  var famid year faminc spend debt;
run;

data character;
  length name_old name_now name_future $ 24;
  input id name_old $ name_now $ name_future $ inc_old inc_now inc_future;
cards;
1  Ramon  Martin  Martin_Sheen  23000  50000  700000
2  John  Johnnie  J_boy         10000  20000  600000
3  Mary_Cathleen  Bo  Bo_Derek  15000  40000  250000
;
run;
proc print data=character;
run;

data character_array;
  set character;
  length name $ 24;
  array Aname(3) $ name_old name_now name_future;
  array Aincome(3) inc_old inc_now inc_future;
  do time = 1 to 3;
   name = Aname[time];
   income = Aincome[time];
   output;
  end;
run;
proc format;
  value t_format 1='old' 2='now' 3='future';
run;
proc print data=character_array ;
  format time t_format.;
  var id time name income; 
run;

/***************************************************
Understanding the functions first., 
last. and the retain statement
****************************************************/
data missings;
  input id measurement;
cards;
1  .
1  2
3  .
2  3
3  4
2  .
3  .
1  .
3  5
3  6
;
run;
data ex_retain;
  set missings;
  retain new_meas 0; 
  if measurement ne . then new_meas = measurement;
run;
proc print data=ex_retain;
run;

data ex_retain;
  set missings;
  *retain new_meas 0; 
  if measurement ne . then new_meas = measurement;
run;
proc print data=ex_retain;
run;

data ex2_retain;
  set missings;
  retain new1 0; 
  if measurement ne . then new1 = new1 + measurement;
run;
proc print data=ex2_retain;
run;

data ex2_retain;
  set missings;
  *retain new1 0; 
  if measurement ne . then new1 = new1 + measurement;
run;
proc print data=ex2_retain;
run;

proc sort data=missings out=sort_miss;
  by id;
run;
data ex1;
  set sort_miss;
  by id;
  if first.id then first=1;
   else first=0;
  if last.id then last=1;
   else last=0;
run;
proc print data=ex1;
run;

data kids;
  length kidname $ 4;
  input famid kidname birth_order wt;
cards;
1 Beth 1  60
1 Barb 3  20
4 Sam  1 100
4 Stu  2  90
1 Bob  2  40
3 Pete 1  60
3 Phil 3  20
2 Andy 1  80
3 Pam  2  40
2 Al   2  50
2 Ann  3  20
;
run;

proc sort data=kids out=sort_kids;
  by famid;
run;
data retain1;
  set sort_kids;
  retain sumwt count; /*carry over the value from previous obs to next obs*/
  by famid;
  if first.famid then do; /*at 1st obs of each family set sumwt and count = 0*/
    sumwt=0;
    count=0;
  end;
  sumwt = sumwt + wt;
  count = count + 1;
  meanwt = sumwt/count;
run;
proc print data=retain1;
  var famid kidname wt sumwt count meanwt;
run;

data retain2;
  set retain1;
  by famid;
  if last.famid then output; /*output only the last obs for each family*/
run;
proc print data=retain2;
  var famid sumwt meanwt;
run;

/*************************************
reshaping long to wide using arrays
*************************************/
proc print data=long_array;
run;

proc sort data=long_array out=long_sort;
  by famid;
run;
data wide_array;
  set long_sort;
  by famid;
  retain faminc96-faminc98;
  array Afaminc(96:98) faminc96-faminc98;
  if first.famid then do;
    do i = 96 to 98;
      Afaminc[i] = .; /*initializing to missing*/
    end;
  end;
  Afaminc(year) = faminc; /*looping across values in the variable year*/
  *if last.famid then output; /* outputs only the last obs in a family*/
  drop year faminc i;
run;
proc print data=wide_array noobs;
run;

data wide_array;
  set long_sort;
  by famid;
  retain faminc96-faminc98;
  array Afaminc(96:98) faminc96-faminc98;
  if first.famid then do;
    do i = 96 to 98;
      Afaminc[i] = .;
    end;
  end;
  Afaminc(year) = faminc; /*looping across values in the variable year*/
  if last.famid then output; /* outputs only the last obs in a family*/
  drop year faminc i;
run;
proc print data=wide_array noobs;
run;

/********************************************************
Comparisons across observations using arrays
********************************************************/
data real_life;
  input person topicA;
cards;
1   0  
1   1  
3  -1  
1   0  
2   0  
1   1  
2  -1  
2  -1  
3   0  
3   1  
4   0  
1   1  
4   1  
4   0  
2  -1  
4   0  
4   0  
1  -1  
;
run;

proc sort data=real_life out=sort_real;
  by person;
run;
data count_real;
  set sort_real;
  retain count;
  by person;
  if first.person then count = 0;
  count = count + 1;
run;
proc print data=count_real noobs;
run;

data wide_real;
  set count_real;
  array AtopicA(6) topicA_1-topicA_6;
  retain topicA_1-topicA_6;
  by person;
  if first.person then do;
    do i = 1 to 6;
     AtopicA[i] = .;
    end;
  end;
  AtopicA(count) = topicA; /*looping across values in the variable count*/
  if last.person then output; /* outputs only the last obs per person */
run;
proc print data=wide_real noobs;
  var person topicA_1-topicA_6;
run;

data three;
  set wide_real;
  array topic(6) topicA_1-topicA_6;
  do i = 2 to 5;
   if topic[i-1] ne . & topic[i] ne . & topic[i+1] ne . & 
      topic[i]=topic[i-1] & topic[i]=topic[i+1] then flagA=1;
  end;
  if flagA=. then flagA=0;
run;
proc print data=three noobs;
  var person topicA_1-topicA_6 flagA;
run;
