First, let’s read our data into R again.
library(readstata13)
dep=read.dta13("~/Documents/TEACHING/SHARE-Child data/SC_anonymized_forAmy_20180314.dta")
## Warning in read.dta13("~/Documents/TEACHING/SHARE-Child data/SC_anonymized_forAmy_20180314.dta"):
## timepoint:
## Factor codes of type double or float detected - no labels assigned.
## Set option nonint.factors to TRUE to assign labels anyway.
#PHQ>9 a sign of potential moderate to severe depression
dep$depressed=dep$phqTotal>9; dep$y=as.numeric(dep$depressed)
dep$edu=cut(dep$momEduc,breaks=c(-.001,12,16))
dep$age=cut(dep$momAge,breaks=c(15,30,41))
dep$pid=dep$anon_id
depfinal=dep[,c(3,8,9:11)] #pick off ID, time, outcome, edu, and age
In SHARE-Child Pakistan, we have missing data on the depression outcome measured longitudinally. Our data are structured so that we have one line per person for each measurement occasion.
head(depfinal)
## timepoint y edu age pid
## 1 0 0 (-0.001,12] (15,30] 1
## 2 1 0 (-0.001,12] (15,30] 1
## 3 2 0 (-0.001,12] (15,30] 1
## 4 0 1 (-0.001,12] (15,30] 2
## 5 1 0 (-0.001,12] (15,30] 2
## 6 2 1 (-0.001,12] (15,30] 2
The variable anon_id
indicates the observations from each participant.
To use MICE, we want to have the data structured so that we have only one line per person. Because education and age do not change within subject in these data, it is fairly straightforward to reshape the data into one line per person format.
depwide=reshape(depfinal,timevar="timepoint",idvar=c("pid","edu","age"),
direction="wide")
head(depwide) #check vs previous output this happened correctly
## edu age pid y.0 y.1 y.2
## 1 (-0.001,12] (15,30] 1 0 0 0
## 4 (-0.001,12] (15,30] 2 1 0 1
## 7 (-0.001,12] (15,30] 3 0 NA NA
## 10 (-0.001,12] (30,41] 4 1 0 0
## 13 (-0.001,12] (15,30] 5 1 0 0
## 16 (-0.001,12] (15,30] 6 1 0 1
md.pattern(depwide)
## edu age pid y.0 y.2 y.1
## 126 1 1 1 1 1 1 0
## 19 1 1 1 1 1 0 1
## 7 1 1 1 1 0 1 1
## 22 1 1 1 1 0 0 2
## 0 0 0 0 29 41 70
Here we see that there are 126 people with no missing data, 19 who are missing the 3 month follow-up only, 7 who are only missing the 6 month follow-up, and 22 who are missing both post-baseline measurements.
#depwide =1 if only missing y.1, 2 if only missing y.2, 3 if missing both
depwide$pattern=as.numeric(is.na(depwide$y.1))+2*as.numeric(is.na(depwide$y.2))
To look at predictors of the patterns of missing data, we can fit a multinomial logistic regression model to examine predictors of missingness of each pattern (relative to having complete data).
library(nnet)
genl=multinom(pattern~age+edu+y.0,data=depwide)
## # weights: 20 (12 variable)
## initial value 241.215219
## iter 10 value 147.031226
## iter 20 value 146.817039
## final value 146.816150
## converged
summary(genl)
## Call:
## multinom(formula = pattern ~ age + edu + y.0, data = depwide)
##
## Coefficients:
## (Intercept) age(30,41] edu(12,16] y.0
## 1 -2.150815 -0.34978759 0.3270265 0.5326544
## 2 -3.092709 0.55694120 -11.1688829 0.4078089
## 3 -1.798004 0.06510159 0.9183336 -0.3385712
##
## Std. Errors:
## (Intercept) age(30,41] edu(12,16] y.0
## 1 0.3999866 0.6751067 0.6922712 0.4997496
## 2 0.6257071 0.8753350 255.8445676 0.7907378
## 3 0.3405420 0.5716863 0.5571427 0.4830832
##
## Residual Deviance: 293.6323
## AIC: 317.6323
summary(genl)$coefficients/summary(genl)$standard.errors
## (Intercept) age(30,41] edu(12,16] y.0
## 1 -5.377217 -0.5181219 0.47239647 1.0658425
## 2 -4.942743 0.6362606 -0.04365495 0.5157321
## 3 -5.279829 0.1138764 1.64829151 -0.7008549
That multinomial logistic regression model does not reveal strong predictors of missingness. While it is not possible to rule out MNAR, we do not see strong evidence of any observed variables (including baseline depression) predicting missingness.
depimp <- mice(depwide, m=20, printFlag=FALSE, maxit = 40, seed=2525)
# The output depimp contains m=20 completed datasets
#look at first few lines of imputed data and original data
cbind(head(complete(depimp,1)),head(depwide))
## edu age pid y.0 y.1 y.2 pattern edu age pid y.0
## 1 (-0.001,12] (15,30] 1 0 0 0 0 (-0.001,12] (15,30] 1 0
## 2 (-0.001,12] (15,30] 2 1 0 1 0 (-0.001,12] (15,30] 2 1
## 3 (-0.001,12] (15,30] 3 0 1 1 3 (-0.001,12] (15,30] 3 0
## 4 (-0.001,12] (30,41] 4 1 0 0 0 (-0.001,12] (30,41] 4 1
## 5 (-0.001,12] (15,30] 5 1 0 0 0 (-0.001,12] (15,30] 5 1
## 6 (-0.001,12] (15,30] 6 1 0 1 0 (-0.001,12] (15,30] 6 1
## y.1 y.2 pattern
## 1 0 0 0
## 2 0 1 0
## 3 NA NA 3
## 4 0 0 0
## 5 0 0 0
## 6 0 1 0
You can see the imputed values in the first imputation for person 3, who was missing both follow-up times.
# note the .imp value is the number of the imputation
head(complete(depimp,action='long'))
## .imp .id edu age pid y.0 y.1 y.2 pattern
## 1 1 1 (-0.001,12] (15,30] 1 0 0 0 0
## 2 1 2 (-0.001,12] (15,30] 2 1 0 1 0
## 3 1 3 (-0.001,12] (15,30] 3 0 1 1 3
## 4 1 4 (-0.001,12] (30,41] 4 1 0 0 0
## 5 1 5 (-0.001,12] (15,30] 5 1 0 0 0
## 6 1 6 (-0.001,12] (15,30] 6 1 0 1 0
tail(complete(depimp,action='long'))
## .imp .id edu age pid y.0 y.1 y.2 pattern
## 3475 20 169 (12,16] (15,30] 169 0 1 0 0
## 3476 20 170 (-0.001,12] (15,30] 170 1 1 1 0
## 3477 20 171 (12,16] (30,41] 171 0 0 0 0
## 3478 20 172 (-0.001,12] (30,41] 172 1 1 0 0
## 3479 20 173 (12,16] (30,41] 173 1 1 1 3
## 3480 20 174 (-0.001,12] (15,30] 174 0 0 0 0
In order to fit a GEE to these data, we need to transform the data structure back into “long” format from “wide” format. We can do this reconfiguration as follows.
depimp.data=as.list(1:20)
for (i in 1:20) {
depimp.data[[i]]=complete(depimp,action=i)
}
# transpose data and add time variable back
depimp.data2 <- lapply(depimp.data,reshape,varying=4:6,v.names="dep",idvar="pid",timevar="followup",times=c(0,1,2),direction="long")
depimp.data2 <- lapply(depimp.data2,FUN=function(u){ u[order(u$pid),]})
library(geepack)
depimp.gee=lapply(depimp.data2,
FUN=function(u){geeglm(dep~as.factor(followup)+edu,
id=as.factor(pid), family=binomial(),
corstr="exchangeable",waves=followup,data=u)})
depimp.gee
## [[1]]
##
## Call:
## geeglm(formula = dep ~ as.factor(followup) + edu, family = binomial(),
## data = u, id = as.factor(pid), waves = followup, corstr = "exchangeable")
##
## Coefficients:
## (Intercept) as.factor(followup)1 as.factor(followup)2
## -0.15348187 -0.77529060 -0.55790229
## edu(12,16]
## -0.05465437
##
## Degrees of Freedom: 522 Total (i.e. Null); 518 Residual
##
## Scale Link: identity
## Estimated Scale Parameters: [1] 1.000106
##
## Correlation: Structure = exchangeable Link = identity
## Estimated Correlation Parameters:
## alpha
## 0.180043
##
## Number of clusters: 174 Maximum cluster size: 3
##
##
## [[2]]
##
## Call:
## geeglm(formula = dep ~ as.factor(followup) + edu, family = binomial(),
## data = u, id = as.factor(pid), waves = followup, corstr = "exchangeable")
##
## Coefficients:
## (Intercept) as.factor(followup)1 as.factor(followup)2
## -0.15037304 -0.80394537 -1.08061614
## edu(12,16]
## -0.07632113
##
## Degrees of Freedom: 522 Total (i.e. Null); 518 Residual
##
## Scale Link: identity
## Estimated Scale Parameters: [1] 1.000036
##
## Correlation: Structure = exchangeable Link = identity
## Estimated Correlation Parameters:
## alpha
## 0.1513148
##
## Number of clusters: 174 Maximum cluster size: 3
##
##
## [[3]]
##
## Call:
## geeglm(formula = dep ~ as.factor(followup) + edu, family = binomial(),
## data = u, id = as.factor(pid), waves = followup, corstr = "exchangeable")
##
## Coefficients:
## (Intercept) as.factor(followup)1 as.factor(followup)2
## -0.15218713 -0.83287058 -1.32896924
## edu(12,16]
## -0.06344385
##
## Degrees of Freedom: 522 Total (i.e. Null); 518 Residual
##
## Scale Link: identity
## Estimated Scale Parameters: [1] 0.9998686
##
## Correlation: Structure = exchangeable Link = identity
## Estimated Correlation Parameters:
## alpha
## 0.1576116
##
## Number of clusters: 174 Maximum cluster size: 3
##
##
## [[4]]
##
## Call:
## geeglm(formula = dep ~ as.factor(followup) + edu, family = binomial(),
## data = u, id = as.factor(pid), waves = followup, corstr = "exchangeable")
##
## Coefficients:
## (Intercept) as.factor(followup)1 as.factor(followup)2
## -0.1405558 -0.6108859 -1.3682973
## edu(12,16]
## -0.1453150
##
## Degrees of Freedom: 522 Total (i.e. Null); 518 Residual
##
## Scale Link: identity
## Estimated Scale Parameters: [1] 1.000156
##
## Correlation: Structure = exchangeable Link = identity
## Estimated Correlation Parameters:
## alpha
## 0.09879622
##
## Number of clusters: 174 Maximum cluster size: 3
##
##
## [[5]]
##
## Call:
## geeglm(formula = dep ~ as.factor(followup) + edu, family = binomial(),
## data = u, id = as.factor(pid), waves = followup, corstr = "exchangeable")
##
## Coefficients:
## (Intercept) as.factor(followup)1 as.factor(followup)2
## -0.1883719 -0.7477744 -1.0815044
## edu(12,16]
## 0.1908969
##
## Degrees of Freedom: 522 Total (i.e. Null); 518 Residual
##
## Scale Link: identity
## Estimated Scale Parameters: [1] 0.9991441
##
## Correlation: Structure = exchangeable Link = identity
## Estimated Correlation Parameters:
## alpha
## 0.185012
##
## Number of clusters: 174 Maximum cluster size: 3
##
##
## [[6]]
##
## Call:
## geeglm(formula = dep ~ as.factor(followup) + edu, family = binomial(),
## data = u, id = as.factor(pid), waves = followup, corstr = "exchangeable")
##
## Coefficients:
## (Intercept) as.factor(followup)1 as.factor(followup)2
## -0.15690289 -0.83279168 -1.21788771
## edu(12,16]
## -0.03071406
##
## Degrees of Freedom: 522 Total (i.e. Null); 518 Residual
##
## Scale Link: identity
## Estimated Scale Parameters: [1] 1.000196
##
## Correlation: Structure = exchangeable Link = identity
## Estimated Correlation Parameters:
## alpha
## 0.1295885
##
## Number of clusters: 174 Maximum cluster size: 3
##
##
## [[7]]
##
## Call:
## geeglm(formula = dep ~ as.factor(followup) + edu, family = binomial(),
## data = u, id = as.factor(pid), waves = followup, corstr = "exchangeable")
##
## Coefficients:
## (Intercept) as.factor(followup)1 as.factor(followup)2
## -0.1849023 -0.8334190 -1.1146190
## edu(12,16]
## 0.1656098
##
## Degrees of Freedom: 522 Total (i.e. Null); 518 Residual
##
## Scale Link: identity
## Estimated Scale Parameters: [1] 0.9993827
##
## Correlation: Structure = exchangeable Link = identity
## Estimated Correlation Parameters:
## alpha
## 0.1386259
##
## Number of clusters: 174 Maximum cluster size: 3
##
##
## [[8]]
##
## Call:
## geeglm(formula = dep ~ as.factor(followup) + edu, family = binomial(),
## data = u, id = as.factor(pid), waves = followup, corstr = "exchangeable")
##
## Coefficients:
## (Intercept) as.factor(followup)1 as.factor(followup)2
## -0.1380775 -0.9227384 -1.4491035
## edu(12,16]
## -0.1627455
##
## Degrees of Freedom: 522 Total (i.e. Null); 518 Residual
##
## Scale Link: identity
## Estimated Scale Parameters: [1] 0.9998302
##
## Correlation: Structure = exchangeable Link = identity
## Estimated Correlation Parameters:
## alpha
## 0.1665335
##
## Number of clusters: 174 Maximum cluster size: 3
##
##
## [[9]]
##
## Call:
## geeglm(formula = dep ~ as.factor(followup) + edu, family = binomial(),
## data = u, id = as.factor(pid), waves = followup, corstr = "exchangeable")
##
## Coefficients:
## (Intercept) as.factor(followup)1 as.factor(followup)2
## -0.1801525 -0.8625564 -1.0160018
## edu(12,16]
## 0.1338792
##
## Degrees of Freedom: 522 Total (i.e. Null); 518 Residual
##
## Scale Link: identity
## Estimated Scale Parameters: [1] 0.9990598
##
## Correlation: Structure = exchangeable Link = identity
## Estimated Correlation Parameters:
## alpha
## 0.2055832
##
## Number of clusters: 174 Maximum cluster size: 3
##
##
## [[10]]
##
## Call:
## geeglm(formula = dep ~ as.factor(followup) + edu, family = binomial(),
## data = u, id = as.factor(pid), waves = followup, corstr = "exchangeable")
##
## Coefficients:
## (Intercept) as.factor(followup)1 as.factor(followup)2
## -0.15426757 -0.74704567 -1.21794117
## edu(12,16]
## -0.04879318
##
## Degrees of Freedom: 522 Total (i.e. Null); 518 Residual
##
## Scale Link: identity
## Estimated Scale Parameters: [1] 0.9998467
##
## Correlation: Structure = exchangeable Link = identity
## Estimated Correlation Parameters:
## alpha
## 0.1557326
##
## Number of clusters: 174 Maximum cluster size: 3
##
##
## [[11]]
##
## Call:
## geeglm(formula = dep ~ as.factor(followup) + edu, family = binomial(),
## data = u, id = as.factor(pid), waves = followup, corstr = "exchangeable")
##
## Coefficients:
## (Intercept) as.factor(followup)1 as.factor(followup)2
## -0.15674859 -0.89190973 -1.36763035
## edu(12,16]
## -0.03160435
##
## Degrees of Freedom: 522 Total (i.e. Null); 518 Residual
##
## Scale Link: identity
## Estimated Scale Parameters: [1] 0.9999681
##
## Correlation: Structure = exchangeable Link = identity
## Estimated Correlation Parameters:
## alpha
## 0.1599415
##
## Number of clusters: 174 Maximum cluster size: 3
##
##
## [[12]]
##
## Call:
## geeglm(formula = dep ~ as.factor(followup) + edu, family = binomial(),
## data = u, id = as.factor(pid), waves = followup, corstr = "exchangeable")
##
## Coefficients:
## (Intercept) as.factor(followup)1 as.factor(followup)2
## -0.1350008 -0.8629177 -1.4493535
## edu(12,16]
## -0.1843211
##
## Degrees of Freedom: 522 Total (i.e. Null); 518 Residual
##
## Scale Link: identity
## Estimated Scale Parameters: [1] 0.9996987
##
## Correlation: Structure = exchangeable Link = identity
## Estimated Correlation Parameters:
## alpha
## 0.1526383
##
## Number of clusters: 174 Maximum cluster size: 3
##
##
## [[13]]
##
## Call:
## geeglm(formula = dep ~ as.factor(followup) + edu, family = binomial(),
## data = u, id = as.factor(pid), waves = followup, corstr = "exchangeable")
##
## Coefficients:
## (Intercept) as.factor(followup)1 as.factor(followup)2
## -0.1378185 -0.8627547 -0.9845721
## edu(12,16]
## -0.1649201
##
## Degrees of Freedom: 522 Total (i.e. Null); 518 Residual
##
## Scale Link: identity
## Estimated Scale Parameters: [1] 1.000149
##
## Correlation: Structure = exchangeable Link = identity
## Estimated Correlation Parameters:
## alpha
## 0.1846641
##
## Number of clusters: 174 Maximum cluster size: 3
##
##
## [[14]]
##
## Call:
## geeglm(formula = dep ~ as.factor(followup) + edu, family = binomial(),
## data = u, id = as.factor(pid), waves = followup, corstr = "exchangeable")
##
## Coefficients:
## (Intercept) as.factor(followup)1 as.factor(followup)2
## -0.1436969 -0.8041486 -0.9531188
## edu(12,16]
## -0.1238652
##
## Degrees of Freedom: 522 Total (i.e. Null); 518 Residual
##
## Scale Link: identity
## Estimated Scale Parameters: [1] 1.000352
##
## Correlation: Structure = exchangeable Link = identity
## Estimated Correlation Parameters:
## alpha
## 0.2006754
##
## Number of clusters: 174 Maximum cluster size: 3
##
##
## [[15]]
##
## Call:
## geeglm(formula = dep ~ as.factor(followup) + edu, family = binomial(),
## data = u, id = as.factor(pid), waves = followup, corstr = "exchangeable")
##
## Coefficients:
## (Intercept) as.factor(followup)1 as.factor(followup)2
## -0.162583871 -0.832772912 -1.328824507
## edu(12,16]
## 0.009214223
##
## Degrees of Freedom: 522 Total (i.e. Null); 518 Residual
##
## Scale Link: identity
## Estimated Scale Parameters: [1] 0.9999835
##
## Correlation: Structure = exchangeable Link = identity
## Estimated Correlation Parameters:
## alpha
## 0.1428201
##
## Number of clusters: 174 Maximum cluster size: 3
##
##
## [[16]]
##
## Call:
## geeglm(formula = dep ~ as.factor(followup) + edu, family = binomial(),
## data = u, id = as.factor(pid), waves = followup, corstr = "exchangeable")
##
## Coefficients:
## (Intercept) as.factor(followup)1 as.factor(followup)2
## -0.1348556 -0.8335516 -1.4913498
## edu(12,16]
## -0.1853785
##
## Degrees of Freedom: 522 Total (i.e. Null); 518 Residual
##
## Scale Link: identity
## Estimated Scale Parameters: [1] 0.9997413
##
## Correlation: Structure = exchangeable Link = identity
## Estimated Correlation Parameters:
## alpha
## 0.133694
##
## Number of clusters: 174 Maximum cluster size: 3
##
##
## [[17]]
##
## Call:
## geeglm(formula = dep ~ as.factor(followup) + edu, family = binomial(),
## data = u, id = as.factor(pid), waves = followup, corstr = "exchangeable")
##
## Coefficients:
## (Intercept) as.factor(followup)1 as.factor(followup)2
## -0.1825086 -0.7756921 -1.3295410
## edu(12,16]
## 0.1477374
##
## Degrees of Freedom: 522 Total (i.e. Null); 518 Residual
##
## Scale Link: identity
## Estimated Scale Parameters: [1] 1.000435
##
## Correlation: Structure = exchangeable Link = identity
## Estimated Correlation Parameters:
## alpha
## 0.146908
##
## Number of clusters: 174 Maximum cluster size: 3
##
##
## [[18]]
##
## Call:
## geeglm(formula = dep ~ as.factor(followup) + edu, family = binomial(),
## data = u, id = as.factor(pid), waves = followup, corstr = "exchangeable")
##
## Coefficients:
## (Intercept) as.factor(followup)1 as.factor(followup)2
## -0.1274522 -0.9234518 -1.4920918
## edu(12,16]
## -0.2379322
##
## Degrees of Freedom: 522 Total (i.e. Null); 518 Residual
##
## Scale Link: identity
## Estimated Scale Parameters: [1] 0.999736
##
## Correlation: Structure = exchangeable Link = identity
## Estimated Correlation Parameters:
## alpha
## 0.1347016
##
## Number of clusters: 174 Maximum cluster size: 3
##
##
## [[19]]
##
## Call:
## geeglm(formula = dep ~ as.factor(followup) + edu, family = binomial(),
## data = u, id = as.factor(pid), waves = followup, corstr = "exchangeable")
##
## Coefficients:
## (Intercept) as.factor(followup)1 as.factor(followup)2
## -0.1878199 -0.6111493 -1.0164787
## edu(12,16]
## 0.1865089
##
## Degrees of Freedom: 522 Total (i.e. Null); 518 Residual
##
## Scale Link: identity
## Estimated Scale Parameters: [1] 0.999227
##
## Correlation: Structure = exchangeable Link = identity
## Estimated Correlation Parameters:
## alpha
## 0.1673019
##
## Number of clusters: 174 Maximum cluster size: 3
##
##
## [[20]]
##
## Call:
## geeglm(formula = dep ~ as.factor(followup) + edu, family = binomial(),
## data = u, id = as.factor(pid), waves = followup, corstr = "exchangeable")
##
## Coefficients:
## (Intercept) as.factor(followup)1 as.factor(followup)2
## -0.16448151 -0.69151764 -1.04770363
## edu(12,16]
## 0.02242106
##
## Degrees of Freedom: 522 Total (i.e. Null); 518 Residual
##
## Scale Link: identity
## Estimated Scale Parameters: [1] 1.000015
##
## Correlation: Structure = exchangeable Link = identity
## Estimated Correlation Parameters:
## alpha
## 0.162133
##
## Number of clusters: 174 Maximum cluster size: 3
depimp.parms <- lapply(depimp.gee,coefficients)
depimp.ses <- lapply(depimp.gee,FUN=function(u){
sqrt(diag(u$geese$vbeta))
})
For this GEE, we cannot use MICE for the pooling, but we can use the mi.inference function from the norm
package.
library(norm)
mi.parms = mi.inference(depimp.parms,depimp.ses,confidence=0.95)
mi.results = cbind(mi.parms$est,mi.parms$std.err,mi.parms$lower,mi.parms$upper, mi.parms$df)
colnames(mi.results) <- c("Est","StdErr","Lower","Upper","DF")
You practice! Interpret the results from this GEE using MI. Do our answers mirror those in the complete case analysis?
mi.results
## Est StdErr Lower Upper
## (Intercept) -0.15661195 0.1623232 -0.4747640 0.1615401
## as.factor(followup)1 -0.80295921 0.2396829 -1.2733186 -0.3325998
## as.factor(followup)2 -1.19467533 0.3304510 -1.8541355 -0.5352152
## edu(12,16] -0.03268707 0.3008989 -0.6241564 0.5587823
## DF
## (Intercept) 85893.28091
## as.factor(followup)1 965.56001
## as.factor(followup)2 67.68586
## edu(12,16] 416.61284