************SAS CODE - CONTINUOUS ASPES METHOD;
/*
Variable definitions
y = Outcome of interest
Ma = Actual mediator
Mp = Predicted mediator
T = Dummy variable to identify treatment group
Mp_T = Interaction term between T and Mp (defined at analysis stage - Part 2)
x: = Baseline student level covariates
z: = Baseline student level covariates
w: = Baseline student level covariates
*/
*========================================================
PREPARING Mp: Predicted Mediator
STEPS:
The following example SAS code can be used to conduct the following:
(1) Randomly partition the sample into ten mutually-exclusive cross-validation groups.
(2) For treatment group members, use OLS regression to model the relationship between
baseline characteristics and the mediator of interest ten times, each time
leaving out one of the ten cross-validation groups and using the remaining
90 percent subsample for prediction.
(3) For each of the ten cross-validation groups, construct the predicted mediator using
the parameters obtained from the model estimation that excluded their group.
This step provides each participant with a continuous predicted mediator value
based on baseline characteristics.
*========================================================;
PROC IMPORT DATAFILE="mock_data.xlsx" OUT=mock_data DBMS = EXCEL REPLACE; RUN;
DATA mock_data;
mock_data;
sortvar = RANUNI(123456);
CALL SYMPUT ("sam_size", _N_);
RUN;
*Create Prediction Groups, continuous ASPES;
DATA mock_data;
SET mock_data;
samplesize = &sam_size * 1;
CV_group=1+floor((_n_-1)/(samplesize*0.10)) ;
CALL SYMPUT ("number_of_CV_groups", CV_group);
RUN;
DATA mock_treatment;
SET mock_data;
IF t = 1;
RUN;
PROC FREQ DATA = mock_treatment;
TABLES CV_group /LIST MISSING;
RUN;
%MACRO PRED;
DATA predicted_All ;STOP;RUN;
%DO i = 1 %TO &number_of_CV_groups;
DATA subsamp2a;STOP;RUN;
DATA subsamp2b;STOP;RUN;
DATA predicted&i ;STOP;RUN;
*Dataset to Predict FROM;
DATA subsamp2a;
SET mock_treatment;
IF CV_group ~= &i * 1;
RUN;
*Dataset to Predict TO;
DATA subsamp2b;
SET mock_data;
IF CV_group = &i * 1;
RUN;
PROC REG DATA = subsamp2a NOPRINT OUTEST = Est_predicted&i;
MODEL Ma = x: z: w: /P;
RUN;
QUIT;
PROC SCORE DATA= subsamp2b SCORE = Est_predicted&i OUT= predicted&i TYPE = PARMS;
VAR x: z: w: ;
RUN;
DATA Predicted_All;
SET Predicted_All predicted&i (RENAME = (Model1 = Mp));
RUN;
%END;
%MEND PRED;
%PRED;
*========================================================
Compare the distribution of the predicted mediator to the actual mediator.
*========================================================;
PROC UNIVARIATE data = Predicted_All;
VAR Mp Ma;
RUN;
*========================================================
Estimate a linear relationship between actual and predicted mediator values for the treatment group (dropping the constant term).
Graph the relationship between actual and predicted values of the mediator for the treatment group.
*========================================================;
PROC REG DATA = Predicted_All PLOTS(ONLY) = FITPLOT;
MODEL Ma = Mp / NOINT;
WHERE T = 1;
RUN;
QUIT;
*========================================================
Estimate the Relationship between the Predicted Mediator and Effect Size.
*========================================================;
*Generate treatment X Mp interaction;
DATA subsamp3;
SET Predicted_All;
Mp_T = T * Mp;
RUN;
*Estimate the Relationship between the Predicted Mediator and Effect Size;
/*
We can interpret the model coefficients as follows:
ß_3 is the indirect effect of the treatment on the impact, operating through the mediator, and
ß_2+ß_3 (Mp) is the total effect of treatment on the impact
*/
PROC REG DATA = subsamp3 PLOTS = NONE;
MODEL y = Mp T Mp_T x: z: w: ;
RUN;
QUIT;