* An example of apply Goodness-of-fit test to fixed-effects only model of nest survival
* This is an extension of work provided in:
* Sturdivant, R.X., J.J. Rotella, R.E. Russell.  2007.  A smoothed residual based
*   goodness-of-fit statistic for nest-survival models.  Studies in Avian Biology 34:45-54.
;

data work.mall; set Sasuser.Mall2000nd;
DATA pred ;
    SET mall ;
    int=1 ; * add a column of 1's for use as intercept in GOF
            * ADD INTERACTIONS to DATA HERE if any are used in model;
run ;

%MACRO u1kern ;
*options nonotes ;

PROC IML ;
    USE piest ;
        read all var {ifate} into yvec ;  * NAME OF RESPONSE HERE ;
        read all var {pred} into pihat ;
    CLOSE piest ;
    USE west ;
        read all var {pred} into wvec ;
    CLOSE west ;

    ehat = yvec-pihat ;
    what = diag(wvec) ;
    winv = inv(what) ;
    n = nrow(pihat) ;
    getwt =  ceil(0*sqrt(n))+1 ;     * SELECT BANDWIDTH HERE ;

*  KERNEL SMOOTH ROUTINE ;

    wtmat = J(n,n) ;
    rx=J(n,1) ;
    do i=1 to n ;
            x = abs(pihat[i] - pihat);
            rx[rank(x)]=x;
            bw = rx[getwt] ;
            if bw = 0 then do ;
                bw = 0.0000000000001 ;
            end ;
            wtmat[,i] = x / bw ;
    end ;

        * UNIFORM (-a,a) ;
    ukern = t(wtmat<1) ;
    icolsum = 1/ukern[,+] ;
    uwt = ukern # icolsum ;

        * CUBIC  ;
    ctemp = 1 - (t(wtmat))##3 ;
    ckern = ukern # ctemp ;
    icolsum = 1/ckern[,+] ;
    cwt = ckern # icolsum ;

        * NORMAL ;
    nkern = pdf('norm',t(2*wtmat)) ;
    icolsum = 1/nkern[,+] ;
    nwt = nkern # icolsum ;

* MOMENTS and TEST STATISTICS;

    USE pred ;
    * INCLUDE ALL PREDICTORS IN ORDER OF MODEL w INT first ;
        read all var{int sage PctGr4} into Q ;  
    CLOSE pred ;

    USE betahat ;
        read all var {Estimate} into betahat ;  
    CLOSE betahat ;
    
    * CREATE g vector and M matrix ;

    mymat = inv( t(Q)*what*Q) ;

    M = what * Q * mymat * t(Q) ;
    g = J(n,1,0) ;

    * CALCULATE TEST STATISTICS;
    
    im = I(nrow(M))-M ;

    midunif = t(uwt)*uwt ;
    midcube = t(cwt)*cwt ;
    midnorm = t(nwt)*nwt ;

    aunif = t(im)*midunif*im ;
    acube = t(im)*midcube*im ;
    anorm = t(im)*midnorm*im ;

    bunif = 2*t(im)*midunif*g ;
    bcube = 2*t(im)*midcube*g ;
    bnorm = 2*t(im)*midnorm*g ;

    Tuni = t(ehat)*midunif*ehat ;
    Tc = t(ehat)*midcube*ehat ;
    Tn = t(ehat)*midnorm*ehat ;

    * CALCULATE EXPECTED VALUES ;
    eunif = trace( aunif*what) + t(g)*midunif*g ;
    ecube = trace( acube*what) + t(g)*midcube*g ;
    enorm = trace( anorm*what) + t(g)*midnorm*g ;

    * CALCULATE VARIANCE ;
    temp1 = wvec#(1-6*wvec) ;
    temp3 = pihat#(1-pihat)#(1-2*pihat) ;

    tempu = (vecdiag(aunif))##2 ;
    tempc = (vecdiag(acube))##2 ;
    tempn = (vecdiag(anorm))##2 ;

    v1unif = sum(tempu#temp1) ;
    v2unif = 2* trace(aunif*what*aunif*what) ;
    v3unif = t(bunif)*what*bunif ;
    v4unif = 2*sum( (vecdiag(aunif))#bunif#temp3 ) ;

    v1cube = sum(tempc#temp1) ;
    v2cube = 2* trace(acube*what*acube*what) ;
    v3cube = t(bcube)*what*bcube ;
    v4cube = 2*sum( (vecdiag(acube))#bcube#temp3 ) ;

    v1norm = sum(tempn#temp1) ;
    v2norm = 2* trace(anorm*what*anorm*what) ;
    v3norm = t(bnorm)*what*bnorm ;
    v4norm = 2*sum( (vecdiag(anorm))#bnorm#temp3 ) ;

    vunif = v1unif + v2unif + v3unif + v4unif ;
    vcube = v1cube + v2cube + v3cube + v4cube ;
    vnorm = v1norm + v2norm + v3norm + v4norm ;

    cubestat = (Tc-ecube)/sqrt(vcube) ;
    normstat = (Tn-enorm)/sqrt(vnorm) ;
    unifstat = (Tuni-eunif)/sqrt(vunif) ;

    punif = 2*(1-probnorm(abs(unifstat))) ;
    pcube = 2*(1-probnorm(abs(cubestat))) ;
    pnorm = 2*(1-probnorm(abs(normstat))) ;

print Tc ecube vcube pcube ;
print Tuni eunif vunif punif ;
print Tn enorm vnorm pnorm;

quit ; run ;
%MEND ;


/* Run a fixed-effects model of interest */

Proc Nlmixed data=Mall tech=quanew method=gauss maxiter=1000;
parms B0=0, B1=0, B2=0;
      p=1;
         do i=0 TO t-1;
            logit=B0+B1*(SAge+i)+B2*PctGr4;
            p=p*(exp(logit)/(1+exp(logit)));
         end;
model ifate~binomial(1,p);
RUN;
/*  ADD THESE THREE LINES TO THE MODEL FIT */
predict p*(1-p) out=west ;
predict p out=piest ;
ods output ParameterEstimates=betahat ;
RUN;
%u1kern ; run ;
* the calculated statistics and moments for the chosen bandwidth
*  and the cubic kernel appear on the 1st row of output on the
*  final page of output
* e.g., for this model:
*                                   Tc     ecube     vcube     pcube
*                            199.71518 199.07884 68.441508 0.9386893
* Where 'pcube' is the P-value