# R code for chapter 54 repeated_measures_mixed_models_gee # Book details: # Health Science Statistics using R and R Commander # by Robin Beaumont # Paperback - ISBN 9781907904318 # http://www.amazon.co.uk/Health-Science-Statistics-using-Commander/dp/190790431X # or direct from publisher at: # http://www.scionpublishing.com/ # book website: http://www.robin-beaumont.co.uk/rbook ########################################################### # # This file is PART 1 of the chapter # # web: load("http://www.robin-beaumont.co.uk/virtualclassroom/stats/statistics2/data/lmm3/bb_wide.rdata") #### summary(bb_wide) # need to rename variables one off: # names(bb_wide)[names(bb_wide)=="bdi_3"]<-"bdi_3m" # factor(bb_wide,labels= c("bdi_pre","bdi_2m", "bdi_3", "bdi_5m", "bdi_8m")) names(bb_wide) # [1] "subject" "drug" "length" "treat" "bdi_pre" "bdi_2m" "bdi_3m" # [8] "bdi_5m" "bdi_8m" attach(bb_wide) # boxplot of entire sample boxplot(bdi_pre, bdi_2m, bdi_3m, bdi_5m, bdi_8m , col = "lavender", main= "entire sample", data = bb_wide) # now boxplots with the subset commands - do not work!! # boxplot(bdi_pre, bdi_2m, bdi_3m, bdi_5m, bdi_8m , # col = "lavender", main= "TAU sample", data = bb_wide[bb_wide$treat == "TAU",]) # boxplot(bdi_pre, bdi_2m, bdi_3, bdi_5m, bdi_8m , col = "lavender", # main= "BtheB sample", subset= treat =="BtheB") # above subset commands do not work!! # So try alternative way: # use two subsets of the data using the 'with' command' with( subset(bb_wide, treat == "BtheB"), boxplot(bdi_pre, bdi_2m, bdi_3m, bdi_5m, bdi_8m , col = "lavender", main= "BtheBdata sample") ) with ( subset(bb_wide, treat == "TAU"), boxplot(bdi_pre, bdi_2m, bdi_3m, bdi_5m, bdi_8m , col = "lavender", main= "TAU sample") ) ############ now profile plots ############ need to get all the dbi observation ina single column # for the profile plots # install.packages("reshape2") library(reshape2) bb_long<- melt(bb_wide, id.vars = c("subject","drug","length","treat", "bdi_pre"), measure.vars= c("bdi_pre", "bdi_2m","bdi_3m","bdi_5m","bdi_8m" ), variable.name= "time", value.name="bdi" ) bb_long # the above works but you can leave out all the measure vars and let it default to them. # now create a new variable month bb_long$month[bb_long$time=="bdi_pre"] <- 0 bb_long$month[bb_long$time=="bdi_2m"] <- 2 bb_long$month[bb_long$time=="bdi_3m"] <- 3 bb_long$month[bb_long$time=="bdi_5m"] <- 5 bb_long$month[bb_long$time=="bdi_8m"] <- 8 attach(bb_long) head(bb_long) names (bb_long) # [1] "subject" "drug" "length" "treat" "time" "bdi" "month" # not necessary to order data for lme4 lmer function only for # the gee analysis?? results checked bb_long <- bb_long[order(bb_long$subject,bb_long$month),] bb_long # install.packages("lattice") library(lattice) xyplot(bdi ~ month | treat, data = bb_long) xyplot(bdi ~ month | treat, data = bb_long, type="b") xyplot(bdi ~ month | treat, data = bb_long, type="b",col= "grey", alpha = 0.2) # for lines with no markers use lower case L, "l" for type xyplot(bdi ~ month | treat, data = bb_long, type="l",col= "grey", alpha = 0.2) names(bb_wide) # [1] "subject" "drug" "length" "treat" "bdi_pre" "bdi_2m" "bdi_3m" # [8] "bdi_5m" "bdi_8m" ################ now compare correlations # psych library # install.packages("psych") library(psych) corr.test(bb_wide[bb_wide$treat == "TAU",5:9], adjust="none") corr.test(bb_wide[bb_wide$treat == "BtheB",5:9], adjust="none") # and overall corr.test(bb_wide[,5:9], adjust="none") ################################### ########### model development at last # Naive analysis bb_wrong <- lm(bdi ~ bdi_pre + month +treat + drug + length, data=bb_long[bb_long$month != 0,]) summary(bb_wrong) ############ install.packages("nlme") library(nlme) bb_wrong2 <- gls(bdi ~ bdi_pre + month +treat + drug + length, data=bb_long[bb_long$month != 0,], method ="ML", na.action= na.omit) summary(bb_wrong2) bb_intercept_only <- gls(bdi ~ 1, data=bb_long[bb_long$month != 0,], method ="ML", na.action= na.omit) summary(bb_intercept_only) # install.packages("lme4") library(lme4) # note in the lmer() function # add verbose = TRUE to get the iteration history bb_rintercept <- lmer(bdi ~ bdi_pre + month +treat + drug + length + (1|subject), data=bb_long[bb_long$month != 0,], REML = FALSE, na.action = na.exclude) bb_rintercept summary(bb_rintercept) # confidence intervals - only works if latest version installed # fixed effects pr01 <- profile(bb_rintercept) pr01 confint(pr01) # from version 1 of lme4 can simply do: confint(bb_rintercept) ######### versions of lme4 >1 do not provide standard errors for fixed effects # need to use a separate package lmerTest install.packages("lmerTest") library(lmerTest) anova(bb_rintercept) #random effects - interval for each level (subject) dotplot(ranef(bb_rintercept, condVar = TRUE)) # redo it to see the subject numbers by reducing font size dotplot(ranef(bb_rintercept, condVar = TRUE),scales=list(cex=.4, col="blue")) # can also find the extreme values using: #note: ‘postVar’ is deprecated: please use ‘condVar’ ran_cc<- as.data.frame(ranef(bb_rintercept, condVar = TRUE)[["subject"]]) #selects the subject column in the list # ran_cc does contain rownames = subject id ran_cc[abs(ran_cc$'(Intercept)') > 12,] # works '(intercept)' is the default name! # below also works and produces same result: excess_rintercept <- ran_cc[(ran_cc$'(Intercept)' > 12 | ran_cc$'(Intercept)' < -12) ,] excess_rintercept # but does not contain row names? ######### # see whats going on # we can create a two column dataframe showing random effect parameter and subject no. # subjects 91, 97 and 100 missing as only single observations for them and no parameter estimate as.integer(rownames(ran_cc)) # works misses out 91, 97 and 100 = subjects dim(ran_cc) # list up to 97 ran2_cc <- cbind(ran_cc, as.integer(rownames(ran_cc))) ran2_cc # give the columns sensible names colnames(ran2_cc) <- c( 'raneffect', 'subjectid') ran2_cc # show it str(ran2_cc) #################### # graphing the regressions for each subject based on the model ## stage one draw the points for each subject library (lattice) xyplot( bdi ~ month | subject ) # does a simple one ## stage two obtain the coefficients from the model # and convert to a dataframe vals1 <- coef(bb_rintercept)[["subject"]] vals1 <- as.data.frame(vals1) vals1 # now have a dataframe with columns representing names names(vals1) # show names of the columns str(vals1) # show the structure of the vals1 object yslope = as.data.frame(cbind(vals1[,1],vals1[,3], as.integer(rownames(vals1)))) # get rid of the parentheses in the name and give other columns names colnames(yslope) <-c("intercept","month", "subid") str(yslope) # but this vector has 97 rows; needs 100 to coinside with the subject levels # add another column with 1 to 100 temp1 <- data.frame(subid= 1:100) temp1 yslope <- merge(yslope, temp1 , by="subid", all = TRUE) yslope yslope[is.na(yslope)] <- 0 yslope ############################## ############################# # below shows regression line not taking into account bdi_pre # not shown in chapter. xyplot(bdi ~ month | subject, bb_long, aspect="xy", type = c("g", "p"), coef.list = yslope[,2:3], panel = function(..., coef.list) { panel.xyplot(...) panel.abline(as.numeric(coef.list[packet.number(),] )) } ) ################## # Now adding the actual bdi_pre value to the intercept # to create correctly positioned line # lattice.options(default.theme = modifyList(standard.theme(color = FALSE), list(strip.background = list(col = "transparent")))) xyplot(bdi ~ month | subject, bb_long, aspect="xy", type = c("p"), pch=4, grid= FALSE, cex=.5, par.strip.text=list(cex=0.5), coef.list = yslope[,2:3], #provides raw intercept + slope coefficiants panel = function(x,y,..., coef.list) { yadd <- 0.64 *y[1] + (coef.list[packet.number(),1]) xadd <- (coef.list[packet.number(),2]) panel.xyplot(x,y,...) panel.abline(a = yadd[1], b= xadd ) panel.text(5, 40, paste(y , "\n ", round(yadd[1],1)), cex = .5, font = 1) # shows bdi_pre } ) # above produces graph but gives a warning error: # Error in x$y1 : REAL() can only be applied to a 'numeric', not a 'NULL' # alternative to above xyplot(bdi ~ month | subject, bb_long, aspect="xy", type = c("p"), pch=4, grid= FALSE, cex=.5, par.strip.text=list(cex=0.5), coef.list = yslope[,2:3], #provides raw intercept + slope coefficiants panel = function(x,y,..., coef.list) { yadd <- 0.64 *y[1] + (coef.list[packet.number(),1]) xadd <- (coef.list[packet.number(),2]) panel.xyplot(x,y,...) panel.abline(a = yadd, b= xadd ) panel.text(5, 40, paste(y , "\n ", round(yadd[1],1)), cex = .5, font = 1) # shows bdi_pre } ) # the above does not give a warning error: # single plots work: xyplot(bdi ~ month, bb_long, aspect="fill", groups=subject, grid= FALSE, cex=.5, ylim=c(-20, 60), coef.list = yslope[,2:3], #provides raw intercept + slope coefficiants panel = function(x,y,coef.list, ...) { panel.xyplot(x,y) panel.superpose(x, y, coef.list, ..., panel.groups = function(x,y, coef.list, group.number,...) { yadd <- 0.64 *y[1] + (coef.list[group.number,1]) xadd <- (coef.list[group.number,2]) alpha =0.6 # panel.text(5,300 - 3*group.number, group.number, cex=.5) panel.abline(a = yadd, b= xadd ) } #end panel groups function ) # end panel groups function } # end panel function ) # end xyplot xyplot(bdi ~ month | treat + drug , bb_long, aspect="fill", groups=subject, grid= FALSE, cex=.5, ylim=c(-20, 60), coef.list = yslope[,2:3], #provides raw intercept + slope coefficiants panel = function(x,y,coef.list, ...) { panel.xyplot(x,y) panel.superpose(x, y, coef.list, ..., panel.groups = function(x,y, coef.list, group.number,...) { yadd <- 0.64 *y[1] + (coef.list[group.number,1]) xadd <- (coef.list[group.number,2]) alpha =0.6 # panel.text(5,300 - 3*group.number, group.number, cex=.5) panel.abline(a = yadd, b= xadd ) } #end panel groups function ) # end panel groups function } # end panel function ) # end xyplot ################# # end of section 54.10 (page 313) # continues in repeated_measures_mixed_models_gee_pt2