UCLA Academic Technology Services HomeServicesClassesContactJobs
Search

SAS Textbook Examples
An Introduction to Categorical Analysis by Alan Agresti
Chapter 7 - Building and Applying Logit and Log-linear Models

We will present both the SAS proc catmod and proc genmod code in this chapter most of the time.

Table 7.2 based on the drug data set, table 7.1 on page 178.

data drug;
  input R G A C M count;
  label R='Race'
        G='Gender' 
        A='alcohol' 
        C='smoke'
        M='marijuana';
cards;
1  1  1   1   1   405
1  1  1   1   0   268 
1  1  1   0   1    13
1  1  1   0   0   218
1  1  0   1   1     1
1  1  0   1   0    17
1  1  0   0   1     1
1  1  0   0   0   117
1  0  1   1   1   453
1  0  1   1   0   228 
1  0  1   0   1    28
1  0  1   0   0   201
1  0  0   1   1     1
1  0  0   1   0    17
1  0  0   0   1     1
1  0  0   0   0   133
0  1  1   1   1    23
0  1  1   1   0    23 
0  1  1   0   1     2
0  1  1   0   0    19
0  1  0   1   1     0
0  1  0   1   0     1
0  1  0   0   1     0
0  1  0   0   0    12
0  0  1   1   1    30
0  0  1   1   0    19 
0  0  1   0   1     1
0  0  1   0   0    18
0  0  0   1   1     1
0  0  0   1   0     8
0  0  0   0   1     0
0  0  0   0   0    17
;
run;
The catmod proc will drop any observations where the count is zero. In order to solve this problem we create a new count variable where we add a very small amount (in this case .0005) to all the counts. This will not change any of the Maximum Likelihood estimates, it will simply ensure that all the observations are used in the calculations.
data drugmore;
  set drug;
  count1= count+.0005;
run;
Table 7.2, p. 179 using proc catmod.
ods listing close;
proc catmod data=drugmore;
  weight count1;
  model R*G*A*C*M= _response_ ;
  loglin g|r a c m g r ;
  ods output anova=temp1;
run;
quit;
data temp1;
  set temp1;
  model = 'RG_A_C_M';
run;
proc catmod data=drugmore;
  weight count1;
  model R*G*A*C*M= _response_ ;
  loglin g|r|a|c|m @2;
  ods output anova=temp2;
run;
quit;
data temp2;
  set temp2;
  model = 'homogeneous';
run;
proc catmod data=drugmore;
  weight count1;
  model R*G*A*C*M= _response_ ;
  loglin g|r|a|c|m @3;
  ods output anova=temp3;
run;
quit;
data temp3;
  set temp3;
  model = 'three-way';
run;
proc catmod data=drugmore;
  weight count1;
  model R*G*A*C*M= _response_ ;
  loglin r|g r|a r|c r|m g|a g|c g|m  a|m c|m;
  ods output anova=temp4;
run;
quit;
data temp4;
  set temp4;
  model = 'homogeneous - AC';
run;
proc catmod data=drugmore;
  weight count1;
  model R*G*A*C*M= _response_ ;
  loglin r|g r|a r|c r|m g|a g|c g|m a|c  c|m;
  ods output anova=temp5;
run;
quit;
data temp5;
  set temp5;
  model = 'homogeneous - AM';
run;
proc catmod data=drugmore;
  weight count1;
  model R*G*A*C*M= _response_ ;
  loglin r|g r|a r|c r|m g|a g|c g|m a|c a|m ;
  ods output anova=temp6;
run;
quit;
data temp6;
  set temp6;
  model = 'homogeneous - CM';
run;
proc catmod data=drugmore;
  weight count1;
  model R*G*A*C*M= _response_ ;
  loglin r|g r|a r|c r|m  g|c g|m a|c a|m c|m;
  ods output anova=temp7;
run;
quit;
data temp7;
  set temp7;
  model = 'homogeneous - AG';
run;
proc catmod data=drugmore;
  weight count1;
  model R*G*A*C*M= _response_ ;
  loglin r|g r|c r|m g|a g|c g|m a|c a|m c|m;
  ods output anova=temp8;
run;
quit;
data temp8;
  set temp8;
  model = 'homogeneous - AR';
run;
proc catmod data=drugmore;
  weight count1;
  model R*G*A*C*M= _response_ ;
  loglin r|g r|a r|c r|m g|a g|m a|c a|m c|m;
  ods output anova=temp9;
run;
quit;
data temp9;
  set temp9;
  model = 'homogeneous - CG';
run;
proc catmod data=drugmore;
  weight count1;
  model R*G*A*C*M= _response_ ;
  loglin r|g r|a r|m g|a g|c g|m a|c a|m c|m;
  ods output anova=temp10;
run;
quit;
data temp10;
  set temp10;
  model = 'homogeneous - CR';
run;
proc catmod data=drugmore;
  weight count1;
  model R*G*A*C*M= _response_ ;
  loglin r|g r|a r|c r|m g|a g|c  a|c a|m c|m;
  ods output anova=temp11;
run;
quit;
data temp11;
  set temp11;
  model = 'homogeneous - GM';
run;
proc catmod data=drugmore;
  weight count1;
  model R*G*A*C*M= _response_ ;
  loglin r|g r|a r|c  g|a g|c g|m a|c a|m c|m;
  ods output anova=temp12;
run;
quit;
data temp12;
  set temp12;
  model = 'homogeneous - MR';
run;
 
proc catmod data=drugmore;
  weight count1;
  model R*G*A*C*M= _response_ ;
  loglin r|g r|a r|m g|a g|m a|c a|m c|m;
  ods output anova=temp13;
run;
quit;
data temp13;
  set temp13;
  model = 'RG_RA_RM_GA_GM_AC_AM_CM';
run;
proc catmod data=drugmore;
  weight count1;
  model R*G*A*C*M= _response_ ;
  loglin r|g r|a  g|a g|m a|c a|m c|m;
  ods output anova=temp14;
run;
quit;
data temp14;
  set temp14;
  model = 'RG_RA_GA_GM_AC_AM_CM';
run;
proc catmod data=drugmore;
  weight count1;
  model R*G*A*C*M= _response_ ;
  loglin g|r r|a  g|a a|c a|m c|m;
  ods output anova=temp15;
run;
quit;
data temp15;
  set temp15;
  model = 'RG_RA_RM_GA_AC_AM_CM';
run;
ods output close;
ods listing;
data combo;
  set temp1 temp2 temp3 temp4 temp5 temp6 temp7 TEMP8 TEMP9 TEMP10 TEMP11 temp12 temp13 temp14 temp15;
  rename chisq=G2;
  rename probchisq=P_value;
run;
proc print data=combo noobs;
  where source='Likelihood Ratio';
  var model G2 df P_value;
run;
Model                            G2     DF    P_value

RG_A_C_M                    1325.09     25    <.0001
homogeneous                   15.32     16    0.5011
three-way                      5.25      6    0.5119
homogeneous - AC             201.19     17    <.0001
homogeneous - AM             106.93     17    <.0001
homogeneous - CM             513.45     17    <.0001
homogeneous - AG              18.70     17    0.3460
homogeneous - AR              20.31     17    0.2589
homogeneous - CG              16.30     17    0.5026
homogeneous - CR              15.77     17    0.5404
homogeneous - GM              25.14     17    0.0915
homogeneous - MR              18.91     17    0.3337
RG_RA_RM_GA_GM_AC_AM_CM       16.72     18    0.5425
RG_RA_GA_GM_AC_AM_CM          19.89     19    0.4012
RG_RA_RM_GA_AC_AM_CM          28.79     20    0.0920
The code using proc genmod is shown below.
proc genmod data = drug;
  class a c r g  m ;
  model count =a c r|g  m /dist = poi link = log ;
  ods output modelfit = mf1;
run;

proc genmod data = drug;
  class a c r g m ;
  model count = a|c a|r  a|g a|m c|r  c|g c|m r|g r|m g|m
                /dist = poi link = log ;
 ods output modelfit = mf2;
run;
proc genmod data = drug;
  class a c r g m;
    model count =  g|r|a|c|m @2 /dist = poi link = log ;
	ods output modelfit = mf2;
run;

proc genmod data = drug;
  class a c r g m;
    model count =  g|r|a|c|m @3 /dist = poi link = log ;
	ods output modelfit = mf3;
run;

proc genmod data = drug;/*-ac*/
  class a c r g m;
  model count = r|g r|a r|c r|m g|a g|c g|m  a|m c|m /dist = poi link = log ;
 ods output modelfit = mf4;
run;
proc genmod data = drug;/*-am*/
  class a c r g m;
  model count = r|g r|a r|c r|m g|a g|c g|m a|c  c|m /dist = poi link = log ;
 ods output modelfit = mf5;
run;

proc genmod data = drug;/*-cm*/
  class a c r g m;
  model count =r|g r|a r|c r|m g|a g|c g|m a|c a|m  /dist = poi link = log ;
 ods output modelfit = mf6;
run;
proc genmod data = drug;/*-ag*/
  class a c r g m;
  model count= r|g r|a r|c r|m  g|c g|m a|c a|m c|m/dist = poi link = log ;
 ods output modelfit = mf7;
run;
proc genmod data = drug;/*-ar*/
  class a c r g m;
  model count= r|g r|c r|m g|a g|c g|m a|c a|m c|m /dist = poi link = log ;
 ods output modelfit = mf8;
run;
proc genmod data = drug;/*-cg*/
  class a c r g m;
  model count= r|g r|a r|c r|m g|a g|m a|c a|m c|m /dist = poi link = log ;
 ods output modelfit = mf9;
run;
proc genmod data = drug;/*-cr*/
  class a c r g m;
  model count=r|g r|a r|m g|a g|c g|m a|c a|m c|m /dist = poi link = log ;
 ods output modelfit = mf10;
run;
proc genmod data = drug;/*-gm*/
 class a c r g m;
  model count=r|g r|a r|c r|m g|a g|c  a|c a|m c|m /dist = poi link = log ;
 ods output modelfit = mf11;
run;
proc genmod data = drug;/*-mr*/
 class a c r g m;
  model count= r|g r|a r|c  g|a g|c g|m a|c a|m c|m /dist = poi link = log ;
 ods output modelfit = mf12;
run;
proc genmod data = drug;/*ac am cm ag ar gm gr mr*/
  class a c r g m;
  model count = r|g r|a r|m g|a g|m a|c a|m c|m
				/dist = poi link = log ;
 ods output modelfit = mf13;
run;
proc genmod data = drug;/*ac am cm ag ar gm gr*/
   class a c r g m;
  model count =r|g r|a  g|a g|m a|c a|m c|m 
				/dist = poi link = log ;
 ods output modelfit = mf14;
run;
proc genmod data = drug;/*ac am cm ag ar gr */
  class a c r g m;
  model count = g|r r|a  g|a a|c a|m c|m
				/dist = poi link = log ;
 ods output modelfit = mf15;
run;
Inputting Survey data, table 7.3, p. 181.
data sex;
  input premar birth count @@;
  assoc = premar*birth; 
cards;
1 4  38   1 3  60   1 2 68   1 1 81
2 4  14   2 3  29   2 2 26   2 1 24
3 4  42   3 3  74   3 2 41   3 1 18
4 4 157   4 3 161   4 2 57   4 1 36
;
run;
Table 7.3, p. 181 and the goodness-of-fit statistics in the second paragraph.
proc genmod data = sex;
  class premar birth;
  model count = premar birth /dist = poi link = log ;
  output out = table7_3 pred=pred STDRESCHI = r;
run;
proc format;
  value premar 1='Always wrong' 2='Usually Wrong' 3='Sometimes Wrong' 4='Never wrong';
  value birth 1='Strongly Disagree' 2='Disagree' 3='Agree' 4='Strongly Agree';
run;
proc print data = table7_3 noobs;
  format premar premar. birth birth.;
run;
Criteria For Assessing Goodness Of Fit

Criterion                 DF           Value        Value/DF

Deviance                   9        127.6529         14.1837
Scaled Deviance            9        127.6529         14.1837
Pearson Chi-Square         9        128.6836         14.2982
Scaled Pearson X2          9        128.6836         14.2982
Log Likelihood                     2983.6850
   
 premar         birth                count    assoc      pred         r

Always wrong       Strongly Agree         38        4      66.951    -4.83965
Always wrong       Agree                  60        3      86.423    -4.11671
Always wrong       Disagree               68        2      51.214     3.07671
Always wrong       Strongly Disagree      81        1      42.411     7.60318
Usually Wrong      Strongly Agree         14        8      25.208    -2.75682
Usually Wrong      Agree                  29        6      32.540    -0.81148
Usually Wrong      Disagree               26        4      19.283     1.81148
Usually Wrong      Strongly Disagree      24        2      15.969     2.32832
Sometimes Wrong    Strongly Agree         42       12      47.435    -1.02637
Sometimes Wrong    Agree                  74        9      61.231     2.24730
Sometimes Wrong    Disagree               41        6      36.285     0.97623
Sometimes Wrong    Strongly Disagree      18        3      30.049    -2.68175
Never wrong        Strongly Agree        157       16     111.405     6.78455
Never wrong        Agree                 161       12     143.806     2.38456
Never wrong        Disagree               57        8      85.218    -4.60386
Never wrong        Strongly Disagree      36        4      70.571    -6.06333
Using proc catmod, we get the same predicted values and the likelihood ratio test statistic.
proc catmod data=sex;
  weight count;
  model premar*birth= _response_/ predict=freq noprofile noiter noresponse ;
  loglin premar birth;
run;
quit;
     Maximum Likelihood Analysis of Variance

Source               DF   Chi-Square    Pr > ChiSq
--------------------------------------------------
premar                3       212.44        <.0001
birth                 3        66.20        <.0001

Likelihood Ratio      9       127.65        <.0001
 
             Maximum Likelihood Predicted Values for Frequencies

                   -------Observed------    ------Predicted------
                                Standard                 Standard
premar    birth    Frequency       Error    Frequency       Error    Residual
-----------------------------------------------------------------------------
1         1               81    8.597365     42.41145    3.835381    38.58855
1         2               68    7.937662     51.21382    4.314471    16.78618
1         3               60    7.490815     86.42333    6.095867    -26.4233
1         4               38    6.036605      66.9514    5.130779    -28.9514
2         1               24    4.835077     15.96868     1.94804    8.031317
2         2               26    5.026925     19.28294    2.265306    6.717063
2         3               29    5.300169     32.53996    3.516572    -3.53996
2         4               14    3.713265     25.20842    2.827013    -11.2084
3         1               18    4.201203      30.0486    2.981299    -12.0486
3         2               41    6.259766      36.2851    3.396646    4.714903
3         3               74    8.251448      61.2311    4.989831     12.7689
3         4               42    6.332064     47.43521    4.118554    -5.43521
4         1               36    5.882213     70.57127    5.716958    -34.5713
4         2               57    7.313779     85.21814    6.309364    -28.2181
4         3              161    11.53289     143.8056    8.335372    17.19438
4         4              157    11.41846      111.405    7.268973    45.59503
Linear-by-linear association in the Survey data.

The ML estimate of the association parameter, beta, and its standard error are the estimate and standard error of assoc in the table labeled Analysis of Parameter Estimates which is followed by the Wald test and 95% Wald confidence interval, middle of p. 183.

proc genmod data=sex; 
  format premar premar. birth birth.;
  class premar birth;
  model count = premar birth assoc/ dist=poi link=log ;
  output out=temp p=predict ;
run;
The GENMOD Procedure

      Model Information
Data Set              WORK.SEX
Distribution           Poisson
Link Function              Log
Dependent Variable       count
Observations Used           16
                        Class Level Information

Class       Levels    Values
premar           4    Always wrong Never wrong Sometimes Wrong Usually
                      Wrong
birth            4    Agree Disagree Strongly Agree Strongly Disagree
           Criteria For Assessing Goodness Of Fit

Criterion                 DF           Value        Value/DF
Deviance                   8         11.5337          1.4417
Scaled Deviance            8         11.5337          1.4417
Pearson Chi-Square         8         11.5085          1.4386
Scaled Pearson X2          8         11.5085          1.4386
Log Likelihood                     3041.7446

Algorithm converged.
                              Analysis Of Parameter Estimates

                                           Standard       Wald 95%          Chi-
Parameter                    DF  Estimate     Error   Confidence Limits   Square  Pr > ChiSq
Intercept                     1    2.4609    0.1323    2.2015    2.7202   345.91      <.0001
premar     Always wrong       1    1.6460    0.1347    1.3819    1.9100   149.24      <.0001
premar     Never wrong        1   -0.1077    0.1988   -0.4974    0.2820     0.29      0.5880
premar     Sometimes Wrong    1   -0.1241    0.1481   -0.4143    0.1661     0.70      0.4021
premar     Usually Wrong      0    0.0000    0.0000    0.0000    0.0000      .         .
birth      Agree              1   -0.7245    0.1620   -1.0420   -0.4070    20.00      <.0001
birth      Disagree           1   -0.4641    0.1195   -0.6984   -0.2298    15.08      0.0001
birth      Strongly Agree     1   -1.8797    0.2491   -2.3679   -1.3914    56.94      <.0001
birth      Strongly Disagre   0    0.0000    0.0000    0.0000    0.0000      .         .
assoc                         1    0.2858    0.0282    0.2305    0.3412   102.46      <.0001
Scale                         0    1.0000    0.0000    1.0000    1.0000
NOTE: The scale parameter was held fixed.
Table 7.4, p. 184.
proc print data = temp;
  format premar premar. birth birth.;
  var premar birth count predict;  
run;
Obs        premar         birth                count    predict

  1    Always wrong       Strongly Agree         38      29.094
  2    Always wrong       Agree                  60      69.396
  3    Always wrong       Disagree               68      67.654
  4    Always wrong       Strongly Disagree      81      80.857
  5    Usually Wrong      Strongly Agree         14      17.600
  6    Usually Wrong      Agree                  29      31.543
  7    Usually Wrong      Disagree               26      23.107
  8    Usually Wrong      Strongly Disagree      24      20.750
  9    Sometimes Wrong    Strongly Agree         42      48.773
 10    Sometimes Wrong    Agree                  74      65.681
 11    Sometimes Wrong    Disagree               41      36.152
 12    Sometimes Wrong    Strongly Disagree      18      24.394
 13    Never wrong        Strongly Agree        157     155.533
 14    Never wrong        Agree                 161     157.379
 15    Never wrong        Disagree               57      65.088
 16    Never wrong        Strongly Disagree      36      33.000
Section 7.2.3 using the contrast statement in proc genmod to obtain the likelihood-ratio test and Wald test statistics.
proc genmod data = sex; 
   class premar birth;
   model  count = premar birth assoc / dist=poi link=log ;
   output out = table7_4 pred = pred;
   contrast "asso" premar 0 birth 0 assoc 1 ;
   contrast "asso_wald" premar 0 birth 0 assoc 1 /wald;
run;
                    Contrast Results

                          Chi-
Contrast         DF     Square    Pr > ChiSq    Type
asso              1     116.12        <.0001    LR
asso_wald         1     102.46        <.0001    Wald
Inputting Job Satisfaction data, table 7.5, p. 186.
data cmh;
input gender $ income satisf count @@;
cards;
F  3 1 1    F 3  3 3    F  3 4 11    F  3 5 2
F 10 1 2    F 10 3 3    F 10 4 17    F 10 5 3
F 20 1 0    F 20 3 1    F 20 4  8    F 20 5 5
F 35 1 0    F 35 3 2    F 35 4  4    F 35 5 2
M  3 1 1    M  3 3 1    M  3 4  2    M  3 5 1
M 10 1 0    M 10 3 3    M 10 4  5    M 10 5 1
M 20 1 0    M 20 3 0    M 20 4  7    M 20 5 3
M 35 1 0    M 35 3 1    M 35 4  9    M 35 5 6
;
run;
Proc catmod will drop any observations where the count equals zero. To fix this problem we simply create a new count variable where we add a very small amount (in this case .00001) to all the counts. This will not change any of the Maximum Likelihood estimates, it will simply ensure that all the observations are used in the calculations.
data cmh1;
  set cmh;
  count1 = count + .00001;
run;
The Likelihood Ratio statistic in the output is the G^2 statistic for the model (GI, GS), Section 7.3.2, p. 186.
proc catmod data=cmh1;
  weight count1;
  model gender*income*satisf= _response_ /noiter noprofile noparm noresponse;
  loglin gender|income gender|satisf ;
  ods output anova=temp1;
run;
quit;
data _null_;
  set temp1;
  if source = 'Likelihood Ratio' then call symput('chisq1', chisq);
  if source = 'Likelihood Ratio' then call symput('df1', df);
run;
%put chisq=&chisq1 and df=&df1; /* this will make the two number appear in the log file. */
The CATMOD Procedure
                           Data Summary

Response           gender*income*satisf     Response Levels      32
Weight Variable    count1                   Populations           1
Data Set           CMH1                     Total Frequency     104
Frequency Missing  0                        Observations         32
                   Maximum Likelihood Analysis
           Maximum likelihood computations converged.
     Maximum Likelihood Analysis of Variance

Source               DF   Chi-Square    Pr > ChiSq
--------------------------------------------------
gender                1         2.72        0.0991
income                3         2.47        0.4808
gender*income         3        11.52        0.0092
satisf                3        49.51        <.0001
gender*satisf         3         1.28        0.7330

Likelihood Ratio     18        19.37        0.3696
This can also be produced by proc genmod.
proc genmod data= cmh;
  class gender income satisf;
   model count = gender|income gender|satisf  /dist = poi link = log;
run;
Note 1: We have created two macro variables in order to use a Chi-squared test to compare the two models.

Note 2: In the previous data step we created a new count variable in order to get rid of cells with zero entries. However, if the amount added to all the counts was too small proc catmod might have problems with collinearity when calculating the degrees of freedom for the G^2 statistic. Our suggestion is to try adding various amounts to all the counts starting with .1 then .01 then .001 and so on until the degrees of freedom start to drop. The point is that in order to get the best approximation for the G^2 statistic you want to add the smallest possible amount without losing degrees of freedom due to collinearity.

data cmh2;
  set cmh1;
  count2=count+.001;
run;
proc catmod data=cmh2;
  weight count2;
  model gender*income*satisf= _response_ /noiter noprofile noparm noresponse;
  loglin gender|income gender|satisf income|satisf;
  ods output anova=temp2;
run;
quit;
data _null_;
  set temp2;
  if source = 'Likelihood Ratio' then call symput('chisq2', chisq);
  if source = 'Likelihood Ratio' then call symput('df2', df);
run;
%put chisq=&chisq2 and df=&df2; /* this will make the numbers appear in the log file */
The CATMOD Procedure
                           Data Summary
Response           gender*income*satisf     Response Levels      32
Weight Variable    count2                   Populations           1
Data Set           CMH2                     Total Frequency 104.032
Frequency Missing  0                        Observations         32
                   Maximum Likelihood Analysis
           Maximum likelihood computations converged.
     Maximum Likelihood Analysis of Variance

Source               DF   Chi-Square    Pr > ChiSq
--------------------------------------------------
gender                1         1.58        0.2089
income                3         0.99        0.8025
gender*income         3        10.47        0.0150
satisf                3        31.38        <.0001
gender*satisf         3         0.11        0.9902
income*satisf         9         6.46        0.6932

Likelihood Ratio      9         7.07        0.6301
The following is the code for the proc genmod.
proc genmod data= cmh;
  class gender income satisf;
  model count = gender|income gender|satisf income|satisf  / dist = poi link = log;
run;
Using a Chi-squared test to compare the two models, (GI, GS, IS) to (GI, GS), section 7.3.2, p. 186. We should notice that the comparison of the two models can be done by using contrast statement. But, with this particular model,  it didn't converge because that the negative Hessian is not positive definite and this prevents SAS further estimate any other statements. This is why we have to do the comparison manually.
data chitest;
  chi_diff= &chisq1-&chisq2;
  df_diff= &df1-&df2;
  p_value = 1-cdf('CHISQ', (&chisq1-&chisq2), (&df1-&df2) );
run;
proc print data = chitest;
run; 
Obs    chi_diff    df_diff    p_value
 1      12.3000       9       0.19692
Generating the scores {1, 2, 3, 4} for income and satisfaction as well as creating the association variable, p. 187.
data score;
  set cmh1;
  if income=3 then score1 = 1;
  if income=10 then score1= 2;
  if income=20 then score1=3;
  if income=35 then score1=4;
  score2 = 1;
  if satisf ne 1 then score2 = satisf - 1;
  assoc = score1*score2;
run;
The estimate and standard error for assoc in the output is the ML estimate and ASE for beta-hat followed by the Wald test and 95% Wald confidence interval, p. 187 and 188.
proc genmod data= score;
  class  female income satisf  ;
  model count = female|income female|satisf  assoc / expected dist = poi link = log;
  contrast "assoc" assoc 1;
  contrast "assoc_wald" assoc 1 /wald;
run;
The GENMOD Procedure

           Criteria For Assessing Goodness Of Fit

Criterion                 DF           Value        Value/DF

Deviance                  17         12.3268          0.7251
Scaled Deviance           17         12.3268          0.7251
Pearson Chi-Square        17         12.8853          0.7580
Scaled Pearson X2         17         12.8853          0.7580
Log Likelihood                       70.2108

Algorithm converged.

                                Analysis Of Parameter Estimates

                                          Standard   Wald 95% Confidence      Chi-
Parameter                 DF   Estimate      Error          Limits          Square   Pr > ChiSq
Intercept                  1    -5.2238     2.3367    -9.8036    -0.6440      5.00       0.0254
female          0          1     0.8040     0.5420    -0.2583     1.8663      2.20       0.1380
female          1          0     0.0000     0.0000     0.0000     0.0000       .          .
income          3          1     4.2551     1.4863     1.3421     7.1681      8.20       0.0042
income          10         1     3.5434     1.0790     1.4285     5.6582     10.78       0.0010
income          20         1     1.7928     0.6845     0.4512     3.1343      6.86       0.0088
income          35         0     0.0000     0.0000     0.0000     0.0000       .          .
female*income   0    3     1    -1.8948     0.6889    -3.2450    -0.5446      7.56       0.0060
female*income   0    10    1    -1.6969     0.5912    -2.8557    -0.5381      8.24       0.0041
female*income   0    20    1    -1.0186     0.6013    -2.1971     0.1598      2.87       0.0902
female*income   0    35    0     0.0000     0.0000     0.0000     0.0000       .          .
female*income   1    3     0     0.0000     0.0000     0.0000     0.0000       .          .
female*income   1    10    0     0.0000     0.0000     0.0000     0.0000       .          .
female*income   1    20    0     0.0000     0.0000     0.0000     0.0000       .          .
female*income   1    35    0     0.0000     0.0000     0.0000     0.0000       .          .
satisf          1          1     1.0097     1.1165    -1.1785     3.1980      0.82       0.3658
satisf          3          1     1.4297     0.8213    -0.1800     3.0393      3.03       0.0817
satisf          4          1     2.1307     0.5169     1.1176     3.1439     16.99       <.0001
satisf          5          0     0.0000     0.0000     0.0000     0.0000       .          .
female*satisf   0    1     1    -0.3044     1.2714    -2.7962     2.1875      0.06       0.8108
female*satisf   0    3     1     0.0210     0.7408    -1.4310     1.4730      0.00       0.9774
female*satisf   0    4     1    -0.1920     0.5105    -1.1924     0.8085      0.14       0.7069
female*satisf   0    5     0     0.0000     0.0000     0.0000     0.0000       .          .
female*satisf   1    1     0     0.0000     0.0000     0.0000     0.0000       .          .
female*satisf   1    3     0     0.0000     0.0000     0.0000     0.0000       .          .
female*satisf   1    4     0     0.0000     0.0000     0.0000     0.0000       .          .
female*satisf   1    5     0     0.0000     0.0000     0.0000     0.0000       .          .
assoc                      1     0.3878     0.1547     0.0846     0.6910      6.28       0.0122
Scale                      0     1.0000     0.0000     1.0000     1.0000

                   Contrast Results

                           Chi-
Contrast          DF     Square    Pr > ChiSq    Type

assoc              1       7.04        0.0080    LR
assoc_wald         1       6.28        0.0122    Wald
Section 7.3.5, section 7.3.6 and section 7.3.7 on page 188 and page 190. The sample correlation between income and job satisfaction for female and male is shown below.
proc sort data = score;
by female;
proc corr data = score noprob nosimple;
  by female;
  weight count;
  var score1 score2 ;
run;
The CORR Procedure

   2        Variables:    score1   score2
     Weight Variable:     count
female=0
Pearson Correlation Coefficients, N = 16
              score1        score2
score1       1.00000       0.38145
score2       0.38145       1.00000

female=1

Pearson Correlation Coefficients, N = 16

              score1        score2
score1       1.00000       0.17086
score2       0.17086       1.00000
Means by gender and by levels of income using the scores {1, 2, 3, 4} for job satisfaction, p. 189.
proc sort data=score out=sorted;
  by gender income;
run;
proc means data=sorted mean;
  freq count;
  class gender income;
  var score2;
run;
gender            income           N Obs            Mean
--------------------------------------------------------
F                      3              17       2.8235294

                      10              25       2.8400000

                      20              14       3.2857143

                      35               8       3.0000000

M                      3               5       2.6000000

                      10               9       2.7777778

                      20              10       3.3000000

                      35              16       3.3125000
--------------------------------------------------------
Generalized correlation statistic in the middle of page 188. SAS computes three different generalized Cochran-Mantel-Haenszel Statistics. The first line of output below is the first generalization in section 7.3.5.   The result on testing difference among the four income levels in their mean job satisfaction (result on page 189, the third paragraph) is also shown below. The last line of the output is the result on page 190 on general association statistic.
proc freq data = score;
  weight count;
  tables female*score1*score2 /cmh noprint;
run;
The FREQ Procedure

Summary Statistics for score1 by score2
Controlling for female

  Cochran-Mantel-Haenszel Statistics (Based on Table Scores)

Statistic    Alternative Hypothesis    DF       Value      Prob
---------------------------------------------------------------
    1        Nonzero Correlation        1      6.6235    0.0101
    2        Row Mean Scores Differ     3      9.2259    0.0264
    3        General Association        9     10.2001    0.3345

Total Sample Size = 104
Using the scores {3, 10, 20, 35} for income and {1, 3, 4, 5} for satisfaction, we have the generalized correlation different from what we have above.
proc freq data = score;
  weight count;
  tables female*income*satisf /cmh noprint;
run;
The FREQ Procedure

Summary Statistics for income by satisf
Controlling for female

  Cochran-Mantel-Haenszel Statistics (Based on Table Scores)

Statistic    Alternative Hypothesis    DF       Value      Prob
---------------------------------------------------------------
    1        Nonzero Correlation        1      6.1563    0.0131
    2        Row Mean Scores Differ     3      9.0342    0.0288
    3        General Association        9     10.2001    0.3345

Total Sample Size = 104
Section 7.4.2 Clinical Trials Example. Inputting Clinical Trial data, table 7.7, p. 193.
data trial;
  input center treat response count;
cards;
       1         1         1         0  
       1         1         0         5  
       1         0         1         0  
       1         0         0         9  
       2         1         1         1  
       2         1         0        12  
       2         0         1         0  
       2         0         0        10  
       3         1         1         0  
       3         1         0         7  
       3         0         1         0  
       3         0         0         5  
       4         1         1         6  
       4         1         0         3  
       4         0         1         2  
       4         0         0         6  
       5         1         1         5  
       5         1         0         9  
       5         0         1         2  
       5         0         0        12  
;
run;
Allowing zero cell entries. proc genmod example on page 193.
proc genmod data = trial descending;
   class center treat;
   weight count;
   model response = center treat /dist=multinomial ;
run;
                               Analysis Of Parameter Estimates

                                     Standard     Wald 95% Confidence       Chi-
Parameter          DF    Estimate       Error           Limits            Square    Pr > ChiSq

Intercept1          1     -0.4763      0.5058     -1.4677      0.5151       0.89        0.3463
center        1     1    -25.5795    172969.8     -339040    338989.0       0.00        0.9999
center        2     1     -2.1802      1.1327     -4.4003      0.0399       3.70        0.0543
center        3     1    -25.8964    187690.2     -367892    367840.1       0.00        0.9999
center        4     1      1.0631      0.7011     -0.3110      2.4373       2.30        0.1294
center        5     0      0.0000      0.0000      0.0000      0.0000        .           .
treat         0     1     -1.5460      0.7017     -2.9212     -0.1708       4.85        0.0276
treat         1     0      0.0000      0.0000      0.0000      0.0000        .           .
Scale               0      1.0000      0.0000      1.0000      1.0000
The results by adding a constant less than .001 and by collapsing center 1, center 2 and center 3.
data trial_add;
  set trial;
  count1 = count + .001;
run;
proc genmod data=trial_add ;
  class center treat ;
  weight count1;
  model response = center treat / dist=multinomial type3;
run;

data collapse;
  set trial_add;
  if center = 1 or center = 2 or center = 3 then ccenter = 1;
  else if center =4 then ccenter= 2;
  else if center = 5 then ccenter = 3;
run;

proc genmod data=collapse ;
  class ccenter treat ;
  weight count1;
  model response = ccenter treat / dist=multinomial type3;
run;
Model 1:

           Criteria For Assessing Goodness Of Fit

Criterion                 DF           Value        Value/DF

Log Likelihood                      -28.9239

                               Analysis Of Parameter Estimates

                                     Standard     Wald 95% Confidence       Chi-
Parameter          DF    Estimate       Error           Limits            Square    Pr > ChiSq

Intercept1          1      0.4765      0.5057     -0.5147      1.4678       0.89        0.3461
center        1     1      7.6727     22.3676    -36.1669     51.5123       0.12        0.7316
center        2     1      2.1783      1.1318     -0.0400      4.3966       3.70        0.0543
center        3     1      7.8259     22.3681    -36.0147     51.6666       0.12        0.7264
center        4     1     -1.0628      0.7009     -2.4366      0.3111       2.30        0.1295
center        5     0      0.0000      0.0000      0.0000      0.0000        .           .
treat         0     1      1.5447      0.7013      0.1702      2.9191       4.85        0.0276
treat         1     0      0.0000      0.0000      0.0000      0.0000        .           .
Scale               0      1.0000      0.0000      1.0000      1.0000

      LR Statistics For Type 3 Analysis

                            Chi-
Source             DF     Square    Pr > ChiSq

center              4      23.36        0.0001
treat               1       5.48        0.0192

Model 2:

         Criteria For Assessing Goodness Of Fit

Criterion                 DF           Value        Value/DF

Log Likelihood                      -29.6055

                               Analysis Of Parameter Estimates

                                     Standard     Wald 95% Confidence       Chi-
Parameter          DF    Estimate       Error           Limits            Square    Pr > ChiSq

Intercept1          1      0.4726      0.5053     -0.5178      1.4631       0.87        0.3496
ccenter       1     1      2.8953      1.1144      0.7111      5.0796       6.75        0.0094
ccenter       2     1     -1.0645      0.7017     -2.4398      0.3108       2.30        0.1293
ccenter       3     0      0.0000      0.0000      0.0000      0.0000        .           .
treat         0     1      1.5574      0.6994      0.1867      2.9282       4.96        0.0260
treat         1     0      0.0000      0.0000      0.0000      0.0000        .           .
Scale               0      1.0000      0.0000      1.0000      1.0000

      LR Statistics For Type 3 Analysis

                            Chi-
Source             DF     Square    Pr > ChiSq

ccenter             2      22.00        <.0001
treat               1       5.61        0.0179

How to cite this page

Report an error on this page

UCLA Researchers are invited to our Statistical Consulting Services
We recommend others to our list of Other Resources for Statistical Computing Help
These pages are Copyrighted (c) by UCLA Academic Technology Services


The content of this web site should not be construed as an endorsement of any particular web site, book, or software product by the University of California