#ipw: An R Package for Inverse Probability Weighting #by Willem M. van der Wal and Ronald B. Geskus ############################################################################# #example 1: causal effect of income on health #load package library("ipw") #simulate data with continuous confounder and outcome, binomial exposure #marginal causal effect of exposure on outcome: 10 #set random seed for reproducibility set.seed(16) n <- 1000 simdat <- data.frame(l = rnorm(n, 10, 5)) a.lin <- simdat$l - 10 pa <- exp(a.lin)/(1 + exp(a.lin)) simdat$a <- rbinom(n, 1, prob = pa) simdat$y <- 10*simdat$a + 0.5*simdat$l + rnorm(n, -10, 5) simdat[1:5,] #estimate ipw weights temp <- ipwpoint( exposure = a, family = "binomial", link = "logit", numerator = ~ 1, denominator = ~ l, data = simdat) summary(temp$ipw.weights) #plot inverse probability weights graphics.off() ipwplot(weights = temp$ipw.weights, logscale = FALSE, main = "Stabilized weights", xlim = c(0, 8)) #examine numerator and denominator models summary(temp$num.mod) summary(temp$den.mod) #Paste inverse probability weights simdat$sw <- temp$ipw.weights #marginal structural model for the causal effect of a on y #corrected for confounding by l using inverse probability weighting #with robust standard error from the survey package msm <- (svyglm(y ~ a, design = svydesign(~ 1, weights = ~ sw, data = simdat))) coef(msm) confint(msm) ############################################################################# #example 2: causal effect of HAART use on mortality in HIV patients #load package, load haartdat, briefly explore haartdat library("ipw") data("haartdat") haartdat[1:10,] #estimate and examine (stabilized) inverse probability weights temp <- ipwtm( exposure = haartind, family = "survival", numerator = ~ sex + age, denominator = ~ cd4.sqrt + sex + age, id = patient, tstart = tstart, timevar = fuptime, type = "first", data = haartdat) summary(temp$ipw.weights) #for comparison, estimate and examine unstabilized weights temp.unstab <- ipwtm( exposure = haartind, family = "survival", denominator = ~ cd4.sqrt, id = patient, tstart = tstart, timevar = fuptime, type = "first", data = haartdat) summary(temp.unstab$ipw.weights) #plot (stabilized) inverse probability weights graphics.off() ipwplot(weights = temp$ipw.weights, timevar = haartdat$fuptime, binwidth = 100, ylim = c(-1.5, 1.5), main = "Stabilized inverse probability weights") #estimate inverse probability of censoring weights temp2 <- ipwtm( exposure = dropout, family = "survival", numerator = ~ sex + age, denominator = ~ sex + age + cd4.sqrt, id = patient, tstart = tstart, timevar = fuptime, type = "first", data = haartdat) #plot inverse probability of censoring weights graphics.off() ipwplot(weights = temp2$ipw.weights, timevar = haartdat$fuptime, binwidth = 100, ylim = c(-1.5, 1.5), main = "Stabilized inverse probability of censoring weights") ###to plot both sets of weights together ##graphics.off() ##par(mfrow = c(2,1)) ##ipwplot(weights = temp$ipw.weights, timevar = haartdat$fuptime, ## binwidth = 100, ylim = c(-1.5, 1.5), main = "Stabilized weights") ##ipwplot(weights = temp2$ipw.weights, timevar = haartdat$fuptime, ## binwidth = 100, ylim = c(-1.5, 1.5), main = "Stabilized weights") #MSM summary(coxph(Surv(tstart, fuptime, event) ~ haartind + cluster(patient), data = haartdat, weights = temp$ipw.weights*temp2$ipw.weights)) #uncorrected model summary(coxph(Surv(tstart, fuptime, event) ~ haartind, data = haartdat)) ############################################################################# #example 3: causal effect of active tuberculosis on mortality in HIV patients #load package ipw, time fixed data and time varying data, explore data library("ipw") data("basdat") data("timedat") basdat[1:4,] timedat[1:10,] #processing of the original data table(duplicated(timedat[,c("id", "fuptime")])) #check for ties timedat$cd4.sqrt <- sqrt(timedat$cd4count) #take square root of CD4 timedat <- merge(timedat, basdat[,c("id", "Ttb")], by = "id", all.x = TRUE) #add time of first TB to dataframe timedat$tb.lag <- ifelse(with(timedat, !is.na(Ttb) & fuptime > Ttb), 1, 0) #compute TB status one day before fuptime #fit longitudinal CD4 model cd4.lme <- lme(cd4.sqrt ~ fuptime + tb.lag, random = ~ fuptime | id, data = timedat) #longitudinal CD4-model #build new dataset: rows corresponding to time points at which TB-status switches, and individual end times times <- sort(unique(c(basdat$Ttb, basdat$Tend))) startstop <- data.frame( id = rep(basdat$id, each = length(times)), fuptime = rep(times, nrow(basdat))) startstop <- merge(startstop, basdat, by = "id", all.x = TRUE) #add baseline data to dataframe startstop <- startstop[with(startstop, fuptime <= Tend),] #limit individual follow-up using Tend startstop$tstart <- tstartfun(id, fuptime, startstop) #compute tstart (see ?tstartfun) startstop$tb <- ifelse(with(startstop, !is.na(Ttb) & fuptime >= Ttb), 1, 0) #indicate B status startstop$tb.lag <- ifelse(with(startstop, !is.na(Ttb) & fuptime > Ttb), 1, 0) #indicate TB status at previous time point startstop$event <- ifelse(with(startstop, !is.na(Tdeath) & fuptime >= Tdeath), 1, 0) #indicate death startstop$cd4.sqrt <- predict(cd4.lme, newdata = data.frame(id = startstop$id, fuptime = startstop$fuptime, tb.lag = startstop$tb.lag)) #impute CD4, based on TB status at previous time point. Note that lvcf could have been used also, but using the lme model smooths the measurements. #compute and plot inverse probability weights temp <- ipwtm( exposure = tb, family = "survival", numerator = ~ 1, denominator = ~ cd4.sqrt, id = id, tstart = tstart, timevar = fuptime, type = "first", data = startstop) summary(temp$ipw.weights) ipwplot(weights = temp$ipw.weights, timevar = startstop$fuptime, binwidth = 100, main = "Stabilized weights", xlab = "Days since HIV seroconversion", ylab = "Logarithm of weights", xaxt = "n") axis(side = 1, at = c(0, 5, 10, 15, 20, 25, 30, 35), labels = as.character(c(0, 5, 10, 15, 20, 25, 30, 35)*100)) #models summary(coxph(Surv(tstart, fuptime, event) ~ tb + cluster(id), data = startstop, weights = temp$ipw.weights)) #IPW-fitted MSM, using cluster() to obtain robust standard error estimate summary(coxph(Surv(tstart, fuptime, event) ~ tb, data = startstop)) #unadjusted summary(coxph(Surv(tstart, fuptime, event) ~ tb + cd4.sqrt, data = startstop)) #adjusted using conditioning: part of the effect of TB is adjusted away