**************************************************************************; ** Macro name : CRTFASTGEEPWR **; ** Version number : 02.04 **; ** Authors : Ying Zhang **; ** John Preisser. **; ** Date : Jan-22-2024. **; ** SAS version : 9.4 or higher or SAS studo **; ** Description : CRTFASTGEEPWR is a sas macro compute GEE power for **; ** binary/Count/Continuous outcomes and **; ** complete/incomplete multiple-CRT based on the **; ** general GEE power method proposed by Rochon and **; ** in the JSS fast GEE power manuscript; **; ** In this version 02.02, we calculate the power **; ** base on cohort study with drop out. Also edit the **; ** matrix generation for the BE and PD compared to 02.01; ** The version 02.03 enable the calculation of power. **; **. for the the exist of non-missing CP size step **; ** in the design with missing the other steps **; ** as well as the exist of step with no maintenance **; ** period in extended incremental effect model **; ** The version 02.04 enable the check of DP with 2 **; ** according the CP location with 0 **; ** Parameters : See full list of parameters in the manuscript and **; ** the specifications below **; ** Outputs: : T: Number of periods, a scalar **; ** S: Number of sequences (steps), a scalar **; ** clusters: Number of clusters in the CRT **; ** df: Degrees of freedom in the test of the **; ** intervention effects **; ** theta: Value of period effect parameters in **; ** marginal model **; ** totaln:Total number of the participants in the CRT **; ** Dist: The distribution for the outcomes **; ** link: The link function used in the GEE model **; ** stddel:Estimated standardized intervention effects **; ** |\delta|/\sqrt{var(\hat{\delta})} **; ** zpower: Power using the z-test **; ** tpower: Power using the t-test **; ** **; ** **; ** **; ** This program is free software: you can redistribute it and/or modify **; ** it under the terms of the GNU General Public License as published by **; ** the Free Software Foundation, version 3 of the License. **; ** **; ** This program is distributed in the hope that it will be useful, **; ** but WITHOUT ANY WARRANTY, without even the implied warranty of **; ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the **; ** GNU General Public License for more details. **; ** **; ** You should have received a copy of the GNU General Public License **; ** along with this program. If not, see N[j2] then part_Ij2 =I(N[j1])[,1:N[j2]]; else part_Ij2 =I(N[j2])[1:N[j1],]; R_offdiag[first[j1]:last[j1],first[j2]:last[j2]] = J(N[j1],N[j2],1)*alpha2 +(alpha3-alpha2)*part_Ij2; end; end; R = R_offdiag +R_offdiag` + R_diag; return( R ); /* return the inverse of Ri */ finish; /************************************************************************************** **the SAS module for the proportional decay corr, type=PD ***************************** ***************************************************************************************/ start proportional_decay(alpha0,rho,rho1, T,N); /*generate a block EX correlation*/ blockR = J(N[1],N[1],1); do j1=2 to T; blockR = block(blockR, J(N[j1],N[j1],1)); end; R_diag = alpha0*blockR + (1-alpha0)*I(sum(N)); R_offdiag = J(sum(N),sum(N),0); last = cusum(N); first = last - N + 1; do j1=1 to T; j1_p1 = j1+1; do j2 = j1_p1 to T; if N[j1]>N[j2] then part_Ij2 =I(N[j1])[,1:N[j2]]; else part_Ij2 =I(N[j2])[1:N[j1],]; *R_offdiag[first[j1]:last[j1],first[j2]:last[j2]] = (J(N[j1],N[j2],1)*alpha0 +part_Ij2*(1-alpha0))*rho##abs(j2-j1); * edit it into the general PD version as in Stat power paper; R_offdiag[first[j1]:last[j1],first[j2]:last[j2]] = alpha0*J(N[j1],N[j2],1)*rho##abs(j2-j1) +part_Ij2*(rho1##abs(j2-j1)-alpha0*rho##abs(j2-j1)); end; end; R = R_offdiag +R_offdiag` + R_diag; return( R ); /* return the inverse of Ri */ finish; /**************************************************************************************** **the SAS module to chech frechet bound ************************************************* *****************************************************************************************/ start check_frechet(mu,R); dim1 = ncol(R); Do j = 1 to dim1-1; Do k = j+1 to dim1; flag = 0; if R[j,k] >=min(sqrt(mu[j]*(1-mu[k])/(mu[k]*(1-mu[j]))),sqrt(mu[k]*(1-mu[j])/(mu[j]*(1-mu[k]))))then do; flag =2; /*print j k flag;*/ return(flag); goto theend ;end; else if R[j,k] <=max(-sqrt(mu[j]*mu[k]/((1-mu[k])*(1-mu[j]))),-sqrt(((1-mu[k])*(1-mu[j]))/(mu[j]*mu[k]))) then do; flag =1; /*print j k flag;*/ return(flag); goto theend; end; end; end; return(flag); theend:finish check_frechet; /***************************************************************************************** **Generate vectors and means in marginal mean model based on all design inputs ********** *****************************************************************************************/ DesignPattern = &DesignPattern; S = nrow(DesignPattern); T = ncol(DesignPattern); *print S T; ref_cluster_period_size = &CP_size_matrix; sum_n = sum(&m`*ref_cluster_period_size); *print sum_n; ind_CP_size_missing= (min(ref_cluster_period_size) =0);/*indicator of the incomplete CRT*/ loc_no_obs = loc(ref_cluster_period_size=0); /*One consistency check about the missing locations in DP and CP*/ ind_missing_DP_matrix = (max(&DesignPattern)=2); ind_missing_CP_matrix = (min(&CP_size_matrix)=0); if ind_missing_CP_matrix=1 | ind_missing_DP_matrix=1 then do; do i =1 to S; if min(&CP_size_matrix[i,])=0 then do; idx_missing_CP_i = loc(&CP_size_matrix[i,]=0); missing_DP_i_value = DesignPattern[i,idx_missing_CP_i]; *print i idx_missing_CP_i missing_DP_i_value; if min(missing_DP_i_value)^=2 | max(missing_DP_i_value)^=2 then do; Print "ERROR: The missing cluster periods are different from the CP_size_matrix and design pattern matrix"; abort; end; end; end; end; if ind_CP_size_missing=1 then do; /*if there is a missing in the design, do the loop*/ do step =1 to S; if (min(ref_cluster_period_size[step,]) =0) then do; /*if there is a missing in the step, do the loop*/ loc_no_obs_step = loc(ref_cluster_period_size[step,]=0); ref_cluster_period_size[step,loc_no_obs_step] = 1;/*assuming there is one observation in missing period in the reference complete CRT for incomplete CRTs*/ *print ref_cluster_period_size; %if %upcase(&corr_type) =BE | %upcase(&corr_type) =PD %then %do; ref_cluster_period_size[step,loc_no_obs_step] = max(&CP_size_matrix[step,]);/*assuming the in-consistent cohort size for incomplete closted-cohort CRT*/ %end; end; end; end; /*Obtain the marignal model parameters from the inputs*/ delta = δ /* intervention para,eter */ beta= &beta_period_effects; /* periods effect parameter*/ theta = beta//delta;/* set the total parameters theta;*/ length_beta = nrow(beta); length_theta = nrow(theta); /***************************************************************************************** **decide the period effect in the design & consistency check 1 ********** *****************************************************************************************/ %if %upcase(&period_effect_type) = CAT %then %do; /*Choose the categorical period effect model, there are T period effects */ time_effect = I(T); %end; %else %if %upcase(&period_effect_type) = LIN %then %do; /*Choose the continuous period effect model, a intercept and a slope for time. */ time_effect = J(T,1,1)||(0:(T-1))`; %end; ncol_tf = ncol(time_effect); /*************Two consistency checks about the number of parameters and chosen period effects model*/ if length_beta ^= ncol_tf then Print "ERROR: The length of periods effect parameter does no fit with the periods effect model"; %if (%upcase(&period_effect_type)=CAT) %then %do; if ((T+1) ^= length_theta) then Print "ERROR:The number of period T is not the same with the length of categorical periods effect"; %end; /********************************************************************************************** **Calculate Model-based variance by sequence and design pattern & consistency check ********** **********************************************************************************************/ completeness_matrix = J(S,T,1);/*Generate the completeness matrix by the design pattern matrix, with the same dim*/ ind_missing_DP_matrix = (max(&DesignPattern)=2); if ind_missing_DP_matrix=1 then do; idx_missing = loc(DesignPattern=2); completeness_matrix[idx_missing] = 0; /*One consistency check about the missing locations*/ if ncol(loc_no_obs) ^= ncol(idx_missing) then Print "ERROR: The missing cluster periods are different from the CP_size_matrix and design pattern matrix"; end; XWXsum=J(length_theta,length_theta,0); /* The dim of XWXsum depends on how to period effect*/ /*In categorical period effect model, there are T period effects, so the dim of XWXsum is (T+1 * T+1) */ /*In continuous period effect model, there are 2 period-effects, so the dim of XWXsum is (3* 3) */ design_pattern = J(S,T,0); totaln = 0; do i =1 to S; /*generate the XWXsum by adding XWXi by each sequence*/ Ni = ref_cluster_period_size[i,]; totaln = totaln + sum(&m[i,]*&CP_size_matrix[i,]); /*The total number of subjects are based on the real clutster period size matrix and how many clusters in the sequence*/ /********************************************************************************************** **generate correlation matrix by selected type *********************************************** **********************************************************************************************/ %if %upcase(&corr_type) = ED %then %do; alpha0 = &alpha0; r_0 = &r0 ; R = exp_decay(alpha0,r_0, T,Ni); %let ctype = exponential decay correlation structure; %let c_param = (alpha0,r0):(&alpha0, &r0); %end;%else %if %upcase(&corr_type) = BE %then %do; N = Ni[1,1]; /*Closed_cohort: assume same cluster-period size for each cluster*/ alpha1 = &alpha1; alpha2 = &alpha2; alpha3 = &alpha3; R = block_exchange_R_inv(alpha1, alpha2, alpha3,T,Ni); %let ctype = block exchangeable correlation structure; %let c_param = (alpha1,alpha2,alpha3):(&alpha1, &alpha2, &alpha3); %end; %else %if %upcase(&corr_type) = PD %then %do; N = Ni[1,1]; /*Closed_cohort: assume same cluster-period size for each cluster*/ alpha0 = &alpha0; r0 = &r0; r1 = &r1; R = proportional_decay(alpha0, r0,r1,T,Ni); %let ctype = proportional decay correlation structure; %let c_param = (alpha0,r0,r1):(&alpha0, &r0,&r1); %end;%else %if %upcase(&corr_type) = NE %then %do; R = nested_exchangeable(&alpha1,&alpha2,T, Ni); %let ctype = nested exchangeable correlation structure; %let c_param = (alpha1,alpha2):(&alpha1, &alpha2); %end;%else print "Error Correlation structure specification in the input"; /********************************************************************************************* **generate intervention effects by selected type ********************************************* **********************************************************************************************/ %if %upcase(&intervention_effect_type) = AVE %then %do; /*Choose the average intervention effects model */ Ui = DesignPattern[i,] ; /* Ui is trt sequence received by all clusters in sequence i;*/ %end; %else %if %upcase(&intervention_effect_type) = INC %then %do; /*Choose the incremental intervention effects model, usual in SWCRT*/ locs = loc(DesignPattern[i,]=1); Ui = J(1,(locs[1]-1),0)||((1:(T-locs[1]+1))/&max_intervention_period) ; /* Ui is trt sequence received by all clusters in sequence i;*/ %end; %else %if %upcase(&intervention_effect_type) = INC_EX %then %do; /*Choose the extended incremental intervention effects model, usual in SWCRT and each sequence must have the maintainance period */ locs = loc(DesignPattern[i,]=1); /*In the maintainance period, treatment effect is 1, otherwise is the same with incremental effect model*/ n_maintainance_p =(T-locs[1]+1-&max_intervention_period); if n_maintainance_p>0 then Ui = J(1,(locs[1]-1),0)|| ((1:&max_intervention_period)/&max_intervention_period)||J(1,(T-locs[1]+1-&max_intervention_period),1) ; else Ui = J(1,(locs[1]-1),0)|| ((1:(T-locs[1]+1))/&max_intervention_period) ; %end; Ui = Ui`; /********************************************************************************************** **get the incidence matrices, Ki by each sequence *********************************** **********************************************************************************************/ completeness_seq_i = completeness_matrix[i,]; /*expand the completeness sequence into matrix by the cluster period in the sth step*/ block_completeness_matrix = completeness_seq_i[1]*I(Ni[1]); /*repeat the design matrix row by cluster-period size*/ do j=2 to T; block_completeness_matrix = block(block_completeness_matrix, completeness_seq_i[j]*I(Ni[j]));/*for the block matrix, the row with all zeros indicating periods without observations*/ end; Ki= block_completeness_matrix[loc(block_completeness_matrix[+,]=1),]; /*delete the rows with all zeros */ /********************************************************************************************* **Obtain the Xi, Di,Ai in the power calculation in GEE analysis by outcome types based ***** **on the reference hypothetical complete multi-period CRT ******* **********************************************************************************************/ design_matrix_i = (time_effect||Ui); /*append the time effect with intervention effect*/ Xi = design_matrix_i[1,]@J(Ni[1],1,1); /* initialize Xi with the first period */ do j = 2 to T; Xi = Xi//(design_matrix_i[j,]@J(Ni[j],1,1)); /* generate Xi by varying cluster-period size Ni */ end; /*obtain the mean and variance matrix by responses types and link functions*/ gi = Xi*theta; /********************************************************************************************************************* **Obtain the Xi, Di,Ai in the power calculation in GEE analysis for count outcomes with log or identity link********* **********************************************************************************************************************/ %if %upcase(&Dist) = POISSON %then %do;/*for count outcomes*/ mui = exp(gi); /*log link as the default link function*/ Di = diag(mui); Dist = "POISSON"; Link = "LOG";/*canonical link*/ %if %upcase(&Link) = IDENTITY %then %do; Link = "IDENTITY"; mui = gi; /*outcome mean under identity link */ /*internal consistency checks for count outcomes*/ if min(mui) <=0 then print "ERROR: The identity link of count outcome does not work because the mean is negative"; Di = I(nrow(mui)); %end; Ai_sqrt = diag(sqrt(mui));/*Ai_sqrt is the square root of variance matrix Ai, which is sqrt(mu) in the count outcomes */ %end; /*************************************************************************************************************************** **Obtain the Xi, Di,Ai in the power calculation in GEE analysis for continuous outcomes with log or identity link********* ***************************************************************************************************************************/ %else %if %upcase(&Dist) = NORMAL %then %do; mui = gi; /*identity link as the default link function*/ Di = I(nrow(mui)); Dist = "NORMAL"; LINK = "IDENTITY";/*canonical link*/ %if %upcase(&Link) = LOG %then %do; Link = "LOG"; mui = exp(gi); /*outcome mean under log link */ /*internal consistency checks for normal outcomes*/ Di = diag(mui); %end; Ai_sqrt = I(nrow(mui));/*Ai_sqrt is the square root of variance matrix Ai, which is sqrt(mu) in the count outcomes */ %end; /************************************************************************************************************************** **Obtain the Xi, Di,Ai in the power calculation in GEE analysis for binary outcomes with logit log, identity link********* ***************************************************************************************************************************/ %else %if %upcase(&Dist) = BINARY %then %do; /*for binary outcomes*/ /*check the Frechet bound for binary outcomes*/ Xi_star = Ki*Xi; gi_star = Xi_star*theta; Dist = "BINARY"; mui_star = exp(gi_star)/(1 + exp(gi_star)); /*logit link as the default link function*/ Ri_star = Ki*R*Ki`; flag = check_frechet(mui_star,Ri_star);/*The frechet boundary is examinated based on the true incomplete average mean vector mui_star*/ if flag>0 then do; print "ERROR: Correlation out of Frechet bound";end; mui = exp(gi)/(1 + exp(gi)); /*logit link as the default link function*/ *print mui; Di = diag(mui#(1-mui)); Link = "LOGIT";/*canonical link*/ %if %upcase(&link) = LOG %then %do; Link = "LOG"; mui = exp(gi); /*outcome mean under log link */ /*internal consistency checks for binary outcomes*/ if max(mui) >=1 then Print "ERROR: The log link of binary outcome does not work as mean is larger than 1"; Di = diag(mui); %end; %else %if %upcase(&Link) = IDENTITY %then %do; Link = "IDENTITY"; mui = gi; /*outcome mean under identity link */ /*internal consistency checks for binary outcomes*/ if max(mui) >=1 | min(mui) <=0 then print "ERROR: The identity link of binary outcome does not work as mean is larger than 1 or smaller than 0"; Di = I(nrow(mui)); %end; Ai_sqrt = diag(sqrt(mui#(1-mui)));/*Ai_sqrt is the square root of variance matrix Ai, which is sqrt(mu*(1-mu)) in the binary outcomes */ *print mui Ai_sqrt Di ; %end; ****************************************************************************************************************; **Obtain the Xi_star, Di_star,Ai_star in the power calculation in GEE analysis by outcome types for *****; **the incomplete multi-period CRT with the incidence matrix ***********************; ****************************************************************************************************************; *print mui; Xi_star = Ki*Xi; Ai_sqrt_star = Ki*Ai_sqrt*Ki`; Di_star = Ki*Di*Ki`; test =Ki*R*Ki`; * print test; Wi_star = Di_star*inv(Ai_sqrt_star*Ki*R*Ki`*Ai_sqrt_star)*Di_star; * print Xi_star Wi_star; XWXi = t(Xi_star)*Wi_star*Xi_star; XWXsum = XWXsum + XWXi*&m[i,];/* specify the number of clusters per sequence */ *print XWXsum; end; phi = φ /*set the dispersion parameters*/ modvar = phi*inv(XWXsum);/*calculate the model based variance*/ vardel = modvar[length_theta, length_theta]; /*pick the variance of intervention effect*/ ****************************************************************************************************************; **Print out the fast GEE power using z and t test and all the outputs about the CRT. *****; ****************************************************************************************************************; /*add one safety check for the number of sample size in the macro*/ *print totaln sum_n; if totaln ^= sum_n then print "Errors in the power calculation loop, please check"; else do; alpha=α /*get value from macro*/ twotailprob=(1-alpha/2); ztwotail=probit(twotailprob); stddel = round(abs(delta)/sqrt(vardel),0.0001); zpower_value = stddel - ztwotail; zpower = round(probnorm(zpower_value),0.0001); clusters = sum(&m); * p is the number of parameters estimated; df = clusters - length_theta; /*df impacted by choice of modeling time effect */ %if &df_choice=2 %then %do; df = clusters-2;%end; ttwotail = tinv(1-alpha/2, df); tpower_value = stddel - ttwotail; tpower = round(probt(tpower_value,df),0.0001); /********************************************************************/ %let name_dist = %qsysfunc(compress(&DIST,%str(%"))); title "The fast GEE power of &name_dist outcomes with &ctype and &c_param"; %if %upcase(&intervention_effect_type)=AVE %then %do; title2 "Under average intervention effects model and delta = &delta"; %end; %if %upcase(&intervention_effect_type)=INC %then %do; title2 "Under incremental intervention effects model and delta = &delta"; %end; %if %upcase(&intervention_effect_type)=INC_EX %then %do; title2 "Under extended incremental intervention effects model and delta = &delta"; %end; print T S clusters df theta totaln dist link stddel zpower tpower ; end; quit; %mend;