# R code for chapter chapter 54 repeated_measures_mixed_models_gee
# Book details:
# Health Science Statistics using R and R Commander
# by Robin Beaumont
# Paperback - ISBN 9781907904318
# http://www.amazon.co.uk/Health-Science-Statistics-using-Commander/dp/190790431X
# or direct from publisher at: 
# http://www.scionpublishing.com/
# book website: http://www.robin-beaumont.co.uk/rbook
###########################################################
# This file is PART 2 of the chapter. Section 54.11 to end
# This file is PART 2 of the chapter. Section 54.11 to end
###########################################################
# dataset:
load(file="D:\\web_sites_mine\\HIcourseweb new\\book2data\\bb_long.rdata")
# install.packages("lattice")
library(lattice)
# install.packages("lme4")
library(lme4)

###############
############### three  models  to produce the regression lines
#  with all predictors 
#  also  use na.action = na.omit) below
# 1|subject in previous file was the random intercept model

bb_rintercept <- lmer(bdi ~ bdi_pre + month +treat + drug + length + (1|subject),data=bb_long[bb_long$month != 0,], 
REML = FALSE, na.action= na.omit)
bb_rintercept
###
# now(month|subject) gives different intercept and different slope value for each patient
bb_rintercept_slope <- 
lmer(bdi ~ bdi_pre + month +treat + drug + length + (month|subject),
data=bb_long[bb_long$month != 0,], 
REML = FALSE, na.action= na.omit)
bb_rintercept_slope
#####
## The above allows the two random terms to be correlated
# to stop this correlation we include each term
# in a separate bracket: 
bb_rintercept_slope_ind <- 
lmer(bdi ~ bdi_pre + month +treat + drug + length+
(1|subject) + (0 +month|subject),
data=bb_long[bb_long$month != 0,], 
REML = FALSE, na.action= na.omit)
bb_rintercept_slope_ind
# to obtain standard errors:

# install.packages("lmerTest")  
#only need above line of code first time
library(lmerTest)
anova(bb_rintercept) # defined above
# compare models using lower case anova() in the lmerTest package
anova(bb_rintercept, bb_rintercept_slope)
#
## graphing mutliple regression lines section of chapter . . .
############### two new models for use to produce the regression lines
#  which only use significant predictors bdi_pre + month
bb_rintercept2 <- 
lmer(bdi ~ bdi_pre + month +(1|subject),
data=bb_long[bb_long$month != 0,], REML = FALSE, na.action= na.omit)
summary(bb_rintercept2)
# note bb_long[bb_long$month != 0,] has 400 rows
# number of bdi na's in it = 120
# so total number of possible estimates =280 
bb_rintercept_slope2 <- lmer(bdi ~ bdi_pre + month  + (month|subject),
data=bb_long[bb_long$month != 0,], REML = FALSE, na.action= na.omit)
bb_rintercept_slope2
##
######################### using sarkers general approach but with model coefficiants
##########################  

temp1 <- as.data.frame(coef(bb_rintercept2)[[1]]) # removes the attribute problem
row.names(temp1) # works returns subject no
temp1 <- cbind(temp1, subject = as.numeric(row.names(temp1)))
temp1
str(temp1)

temp2 <- as.data.frame(coef(bb_rintercept_slope2)[[1]]) # removes the attribute problem
row.names(temp2) # works returns subject no
temp2 <- cbind(temp2, subject = as.numeric(row.names(temp2)))
temp2
str(temp2)

## now we need to convert the original dataset to have the same form, by selecting only 
# above columns to leave us with month, subject, bdi
# originaldata <-  bb_long[,c("month", "subject", "bdi")]
# str(originaldata) 
############
# combining as columns 

allcoefs <- merge(temp1, temp2, by="subject", all = TRUE)
allcoefs
 names(allcoefs)
#[1] "subject"  "(Intercept).x" "bdi_pre.x" "month.x" "(Intercept).y" "bdi_pre.y" "month.y"      
names(allcoefs) <- c("subject", "ri_intercept", "ri_bdi_pre",   "ri_month", 
					  "rs_intercept", "rs_bdi_pre",   "rs_month")
str(allcoefs)  # check subject is a number not a factor

# now create a column with 1 to 100 for the calculated coefficiants and null values
subjectsall <- data.frame(subject = 1:100)
str(subjectsall) 
# can have problems if subject is a factor  optional line below converts it's labels to numeric
# allcoefs$subject <- as.numeric(as.character(allcoefs$subject))
str(allcoefs) # checking subject is a number in allcoefs 
allcoefs
allcoefs2 <- merge(subjectsall, allcoefs, by="subject", all = TRUE) 
names(allcoefs2)
#######

xyplot(bdi ~ month | subject, data= bb_long, layout=c(25,4),
			aspect="fill", type = c("p"), pch=4, grid= FALSE, cex=.6,
			 par.strip.text=list(cex=0.6),
             coef.list = allcoefs2, # provides raw intercept + slope x2
             panel = function(x,y,..., coef.list, subscripts ) 
				{
				yadd1 <- y * (coef.list[packet.number(),3]) + (coef.list[packet.number(),2])
				xadd1<- coef.list[packet.number(),4] 
				panel.xyplot(x,y,...)
				# abline only works if you specify the first element of x,
				# i.e. the first of the 5 repeated bdi measures for each panel = yadd[1]
                 panel.abline(a=yadd1[1], b= xadd1 )
				 yadd2 <- y * (coef.list[packet.number(),6]) + (coef.list[packet.number(),5])
				 xadd2<- coef.list[packet.number(),7]
				panel.abline(a=yadd2[1], b= xadd2 ) # below adds slope ce
				panel.text(5, 40, paste("r slope", "\n ", round(xadd2[1],3)), cex = .5, font = 1) 
				}
			)
			
########################################
# single plot  development

# need line below to stop error orruring
allcoefs2[is.na(allcoefs2)] <- 0

xyplot(bdi ~ month,  bb_long, aspect="fill", groups=subject,
grid= FALSE, cex=.5, ylim=c(0, 60),
coef.list = allcoefs2, #provides raw intercept + slope coefficiants
	panel = function(x,y,coef.list,    ...) { 
	panel.xyplot(x,y)
	panel.superpose(x, y, coef.list, ...,
	panel.groups = function(x,y, col, coef.list, group.number,  col.symbol,...) {
	na.action = na.exclude
	yadd2 <- y[1] * coef.list[group.number,6] + coef.list[group.number,5]
	xadd2<- coef.list[group.number,7]
	panel.abline(a=yadd2, b= xadd2 )  
	  panel.text(5,300 - 3*group.number, xadd2, cex=.5)
		} #end panel groups function
		) # end panel groups function
	} # end panel function
) # end xyplot

################# gee analysis
# install.packages("geepack")
library(geepack)

bb_gee_indeptest <- geeglm(bdi ~ bdi_pre + month + treat + drug + length,
data=bb_long[bb_long$month != 0,], id=subject, family=gaussian,
corstr = "independence")
summary(bb_gee_indeptest)

bb_gee_exch <- geeglm(bdi ~ bdi_pre + month + treat + drug + length,
data=bb_long[bb_long$month != 0,], id=subject, family=gaussian,
corstr = "exchangable")
summary(bb_gee_exch)

bb_gee_ar1 <- geeglm(bdi ~ bdi_pre + month  + treat + drug + length,
data=bb_long[bb_long$month != 0,], id=subject, family=gaussian,
corstr = "ar1")
summary(bb_gee_ar1)

bb_gee_unstruct <- geeglm(bdi ~ bdi_pre + month  + treat + drug + length,
data=bb_long[bb_long$month != 0,], id=subject, family=gaussian,
corstr = "unstructured")
summary(bb_gee_unstruct)
coef(summary(bb_gee_unstruct))
coef(bb_gee_unstruct)
bb_gee_unstruct$geese$alpha

#  compare different models 
# Can't compare different geeglms with same fixed parameter:
# next line says all same
# anova(bb_gee_indep, bb_gee_exch, bb_gee_ar1,bb_gee_unstruct )
#  see below for solution
#  some extractor functions ignore
# bb_gee_unstruct$geese$alpha
# bb_gee_indep$geese$vbeta.naiv
# bb_gee_indep$geese$vbeta
# bb_gee_indep$geese$zcor

##### comparing working correlation matrices in Gee
# MESS library
install.packages("MESS")
library(MESS)

QIC(bb_gee_indeptest, bb_gee_exch, bb_gee_ar1, bb_gee_unstruct)

#To get naive and robust se's to compare differences 
install.packages("gee")

library(gee)
bb_gee2_indeptest <- gee(bdi ~ bdi_pre + month + treat + drug + length,
data=bb_long[bb_long$month != 0,], id=subject, family=gaussian,
corstr = "independence")
summary(bb_gee2_indeptest)

bb_gee2_exch2 <- gee(bdi ~ bdi_pre + month + treat + drug + length,
data=bb_long[bb_long$month != 0,], id=subject, family=gaussian,
corstr = "exchangeable")
summary(bb_gee2_exch2)