# R code for chapter chapter 54 repeated_measures_mixed_models_gee # Book details: # Health Science Statistics using R and R Commander # by Robin Beaumont # Paperback - ISBN 9781907904318 # http://www.amazon.co.uk/Health-Science-Statistics-using-Commander/dp/190790431X # or direct from publisher at: # http://www.scionpublishing.com/ # book website: http://www.robin-beaumont.co.uk/rbook ########################################################### # This file is PART 2 of the chapter. Section 54.11 to end # This file is PART 2 of the chapter. Section 54.11 to end ########################################################### # dataset: load(file="D:\\web_sites_mine\\HIcourseweb new\\book2data\\bb_long.rdata") # install.packages("lattice") library(lattice) # install.packages("lme4") library(lme4) ############### ############### three models to produce the regression lines # with all predictors # also use na.action = na.omit) below # 1|subject in previous file was the random intercept model bb_rintercept <- lmer(bdi ~ bdi_pre + month +treat + drug + length + (1|subject),data=bb_long[bb_long$month != 0,], REML = FALSE, na.action= na.omit) bb_rintercept ### # now(month|subject) gives different intercept and different slope value for each patient bb_rintercept_slope <- lmer(bdi ~ bdi_pre + month +treat + drug + length + (month|subject), data=bb_long[bb_long$month != 0,], REML = FALSE, na.action= na.omit) bb_rintercept_slope ##### ## The above allows the two random terms to be correlated # to stop this correlation we include each term # in a separate bracket: bb_rintercept_slope_ind <- lmer(bdi ~ bdi_pre + month +treat + drug + length+ (1|subject) + (0 +month|subject), data=bb_long[bb_long$month != 0,], REML = FALSE, na.action= na.omit) bb_rintercept_slope_ind # to obtain standard errors: # install.packages("lmerTest") #only need above line of code first time library(lmerTest) anova(bb_rintercept) # defined above # compare models using lower case anova() in the lmerTest package anova(bb_rintercept, bb_rintercept_slope) # ## graphing mutliple regression lines section of chapter . . . ############### two new models for use to produce the regression lines # which only use significant predictors bdi_pre + month bb_rintercept2 <- lmer(bdi ~ bdi_pre + month +(1|subject), data=bb_long[bb_long$month != 0,], REML = FALSE, na.action= na.omit) summary(bb_rintercept2) # note bb_long[bb_long$month != 0,] has 400 rows # number of bdi na's in it = 120 # so total number of possible estimates =280 bb_rintercept_slope2 <- lmer(bdi ~ bdi_pre + month + (month|subject), data=bb_long[bb_long$month != 0,], REML = FALSE, na.action= na.omit) bb_rintercept_slope2 ## ######################### using sarkers general approach but with model coefficiants ########################## temp1 <- as.data.frame(coef(bb_rintercept2)[[1]]) # removes the attribute problem row.names(temp1) # works returns subject no temp1 <- cbind(temp1, subject = as.numeric(row.names(temp1))) temp1 str(temp1) temp2 <- as.data.frame(coef(bb_rintercept_slope2)[[1]]) # removes the attribute problem row.names(temp2) # works returns subject no temp2 <- cbind(temp2, subject = as.numeric(row.names(temp2))) temp2 str(temp2) ## now we need to convert the original dataset to have the same form, by selecting only # above columns to leave us with month, subject, bdi # originaldata <- bb_long[,c("month", "subject", "bdi")] # str(originaldata) ############ # combining as columns allcoefs <- merge(temp1, temp2, by="subject", all = TRUE) allcoefs names(allcoefs) #[1] "subject" "(Intercept).x" "bdi_pre.x" "month.x" "(Intercept).y" "bdi_pre.y" "month.y" names(allcoefs) <- c("subject", "ri_intercept", "ri_bdi_pre", "ri_month", "rs_intercept", "rs_bdi_pre", "rs_month") str(allcoefs) # check subject is a number not a factor # now create a column with 1 to 100 for the calculated coefficiants and null values subjectsall <- data.frame(subject = 1:100) str(subjectsall) # can have problems if subject is a factor optional line below converts it's labels to numeric # allcoefs$subject <- as.numeric(as.character(allcoefs$subject)) str(allcoefs) # checking subject is a number in allcoefs allcoefs allcoefs2 <- merge(subjectsall, allcoefs, by="subject", all = TRUE) names(allcoefs2) ####### xyplot(bdi ~ month | subject, data= bb_long, layout=c(25,4), aspect="fill", type = c("p"), pch=4, grid= FALSE, cex=.6, par.strip.text=list(cex=0.6), coef.list = allcoefs2, # provides raw intercept + slope x2 panel = function(x,y,..., coef.list, subscripts ) { yadd1 <- y * (coef.list[packet.number(),3]) + (coef.list[packet.number(),2]) xadd1<- coef.list[packet.number(),4] panel.xyplot(x,y,...) # abline only works if you specify the first element of x, # i.e. the first of the 5 repeated bdi measures for each panel = yadd[1] panel.abline(a=yadd1[1], b= xadd1 ) yadd2 <- y * (coef.list[packet.number(),6]) + (coef.list[packet.number(),5]) xadd2<- coef.list[packet.number(),7] panel.abline(a=yadd2[1], b= xadd2 ) # below adds slope ce panel.text(5, 40, paste("r slope", "\n ", round(xadd2[1],3)), cex = .5, font = 1) } ) ######################################## # single plot development # need line below to stop error orruring allcoefs2[is.na(allcoefs2)] <- 0 xyplot(bdi ~ month, bb_long, aspect="fill", groups=subject, grid= FALSE, cex=.5, ylim=c(0, 60), coef.list = allcoefs2, #provides raw intercept + slope coefficiants panel = function(x,y,coef.list, ...) { panel.xyplot(x,y) panel.superpose(x, y, coef.list, ..., panel.groups = function(x,y, col, coef.list, group.number, col.symbol,...) { na.action = na.exclude yadd2 <- y[1] * coef.list[group.number,6] + coef.list[group.number,5] xadd2<- coef.list[group.number,7] panel.abline(a=yadd2, b= xadd2 ) panel.text(5,300 - 3*group.number, xadd2, cex=.5) } #end panel groups function ) # end panel groups function } # end panel function ) # end xyplot ################# gee analysis # install.packages("geepack") library(geepack) bb_gee_indeptest <- geeglm(bdi ~ bdi_pre + month + treat + drug + length, data=bb_long[bb_long$month != 0,], id=subject, family=gaussian, corstr = "independence") summary(bb_gee_indeptest) bb_gee_exch <- geeglm(bdi ~ bdi_pre + month + treat + drug + length, data=bb_long[bb_long$month != 0,], id=subject, family=gaussian, corstr = "exchangable") summary(bb_gee_exch) bb_gee_ar1 <- geeglm(bdi ~ bdi_pre + month + treat + drug + length, data=bb_long[bb_long$month != 0,], id=subject, family=gaussian, corstr = "ar1") summary(bb_gee_ar1) bb_gee_unstruct <- geeglm(bdi ~ bdi_pre + month + treat + drug + length, data=bb_long[bb_long$month != 0,], id=subject, family=gaussian, corstr = "unstructured") summary(bb_gee_unstruct) coef(summary(bb_gee_unstruct)) coef(bb_gee_unstruct) bb_gee_unstruct$geese$alpha # compare different models # Can't compare different geeglms with same fixed parameter: # next line says all same # anova(bb_gee_indep, bb_gee_exch, bb_gee_ar1,bb_gee_unstruct ) # see below for solution # some extractor functions ignore # bb_gee_unstruct$geese$alpha # bb_gee_indep$geese$vbeta.naiv # bb_gee_indep$geese$vbeta # bb_gee_indep$geese$zcor ##### comparing working correlation matrices in Gee # MESS library install.packages("MESS") library(MESS) QIC(bb_gee_indeptest, bb_gee_exch, bb_gee_ar1, bb_gee_unstruct) #To get naive and robust se's to compare differences install.packages("gee") library(gee) bb_gee2_indeptest <- gee(bdi ~ bdi_pre + month + treat + drug + length, data=bb_long[bb_long$month != 0,], id=subject, family=gaussian, corstr = "independence") summary(bb_gee2_indeptest) bb_gee2_exch2 <- gee(bdi ~ bdi_pre + month + treat + drug + length, data=bb_long[bb_long$month != 0,], id=subject, family=gaussian, corstr = "exchangeable") summary(bb_gee2_exch2)