# R code for chapter 54 repeated_measures_mixed_models_gee
# Book details:
# Health Science Statistics using R and R Commander
# by Robin Beaumont
# Paperback - ISBN 9781907904318
# http://www.amazon.co.uk/Health-Science-Statistics-using-Commander/dp/190790431X
# or direct from publisher at: 
# http://www.scionpublishing.com/
# book website: http://www.robin-beaumont.co.uk/rbook
###########################################################
# 
# This file is PART 1 of the chapter
# 
# web: load("http://www.robin-beaumont.co.uk/virtualclassroom/stats/statistics2/data/lmm3/bb_wide.rdata")
####
summary(bb_wide)
#  need to rename variables one off:
# names(bb_wide)[names(bb_wide)=="bdi_3"]<-"bdi_3m"
# factor(bb_wide,labels= c("bdi_pre","bdi_2m", "bdi_3", "bdi_5m", "bdi_8m"))

names(bb_wide)
# [1] "subject" "drug"    "length"  "treat"   "bdi_pre" "bdi_2m"  "bdi_3m"  
# [8] "bdi_5m"  "bdi_8m"
attach(bb_wide)
# boxplot of entire sample
boxplot(bdi_pre, bdi_2m, bdi_3m, bdi_5m, bdi_8m ,  col = "lavender", 
main= "entire sample", data = bb_wide)
#  now boxplots with the subset commands - do not work!!
# boxplot(bdi_pre, bdi_2m, bdi_3m, bdi_5m, bdi_8m ,  
# col = "lavender", main= "TAU sample", data = bb_wide[bb_wide$treat == "TAU",])
# boxplot(bdi_pre, bdi_2m, bdi_3, bdi_5m, bdi_8m ,  col = "lavender", 
# main= "BtheB sample", subset= treat =="BtheB")
# above subset commands do not work!!
# So try alternative way:
# use two subsets of the data using the 'with' command'
with(
subset(bb_wide,  treat == "BtheB"),
boxplot(bdi_pre, bdi_2m, bdi_3m, bdi_5m, bdi_8m ,  col = "lavender", 
main= "BtheBdata sample")
)
with ( 
subset(bb_wide,  treat == "TAU"),
boxplot(bdi_pre, bdi_2m, bdi_3m, bdi_5m, bdi_8m ,  col = "lavender", 
main= "TAU sample")
)

############ now profile plots
############  need to get all the dbi observation ina single column
#           for the profile plots
# 
install.packages("reshape2")
library(reshape2)

bb_long<- melt(bb_wide, id.vars = c("subject","drug","length","treat", "bdi_pre"),
measure.vars= c("bdi_pre", "bdi_2m","bdi_3m","bdi_5m","bdi_8m" ),
variable.name= "time", value.name="bdi"  )
bb_long
# the above works but you can leave out all the measure vars and let it default to them.

# now create a new variable month
bb_long$month[bb_long$time=="bdi_pre"] <- 0
bb_long$month[bb_long$time=="bdi_2m"] <- 2
bb_long$month[bb_long$time=="bdi_3m"] <- 3
bb_long$month[bb_long$time=="bdi_5m"] <- 5
bb_long$month[bb_long$time=="bdi_8m"] <- 8
attach(bb_long)

head(bb_long)
names (bb_long)
# [1] "subject" "drug"    "length"  "treat"   "time"    "bdi"     "month"  

#  not necessary to order data for lme4 lmer function only for 
#  the gee analysis??  results checked 
bb_long <- bb_long[order(bb_long$subject,bb_long$month),]
bb_long

# install.packages("lattice")
library(lattice)

xyplot(bdi ~ month | treat, data = bb_long)
xyplot(bdi ~ month | treat, data = bb_long, type="b")
xyplot(bdi ~ month | treat, data = bb_long, type="b",col= "grey", alpha = 0.2)
# for lines with no markers use lower case L, "l" for type
xyplot(bdi ~ month | treat, data = bb_long, type="l",col= "grey", alpha = 0.2)

names(bb_wide)
# [1] "subject" "drug"    "length"  "treat"   "bdi_pre" "bdi_2m"  "bdi_3m"  
# [8] "bdi_5m"  "bdi_8m"
################ now compare correlations
# psych library
# install.packages("psych")
library(psych)
corr.test(bb_wide[bb_wide$treat == "TAU",5:9], adjust="none") 
corr.test(bb_wide[bb_wide$treat == "BtheB",5:9], adjust="none") 
# and overall
corr.test(bb_wide[,5:9], adjust="none") 
###################################
########### model development at last
# Naive analysis
bb_wrong <- lm(bdi ~ bdi_pre + month +treat + drug + length,
data=bb_long[bb_long$month != 0,])
summary(bb_wrong)
############
install.packages("nlme")
library(nlme)
bb_wrong2 <- gls(bdi ~ bdi_pre + month +treat + drug + length,
data=bb_long[bb_long$month != 0,], method ="ML", na.action= na.omit)
summary(bb_wrong2)

bb_intercept_only <- gls(bdi ~ 1,
data=bb_long[bb_long$month != 0,], method ="ML", na.action= na.omit)
summary(bb_intercept_only)
#
 install.packages("lme4")
library(lme4)
# note in the lmer() function
# add verbose = TRUE to get the iteration history

bb_rintercept <- lmer(bdi ~ bdi_pre + month +treat + drug + length + (1|subject),
data=bb_long[bb_long$month != 0,], REML = FALSE, na.action = na.exclude)
bb_rintercept
summary(bb_rintercept)

# confidence intervals - only works if latest version installed
# fixed effects
pr01 <- profile(bb_rintercept)
pr01
confint(pr01)
# from version 1 of lme4 can simply do:
confint(bb_rintercept)

#########  versions of lme4 >1 do not provide standard errors for fixed effects
# need to use a separate package lmerTest
 install.packages("lmerTest")
library(lmerTest)
anova(bb_rintercept)

#random effects - interval for each level (subject)
dotplot(ranef(bb_rintercept, condVar = TRUE))

# redo it to see the subject numbers by reducing font size
dotplot(ranef(bb_rintercept, condVar = TRUE),scales=list(cex=.4, col="blue"))

#  can also find the extreme values using: #note: ‘postVar’ is deprecated: please use ‘condVar’
ran_cc<- as.data.frame(ranef(bb_rintercept, condVar = TRUE)[["subject"]]) #selects the subject column in the list
# ran_cc does contain rownames = subject id
ran_cc[abs(ran_cc$'(Intercept)') > 12,]  # works '(intercept)' is the default name!
# below also works and produces same result:
excess_rintercept <- ran_cc[(ran_cc$'(Intercept)' > 12 | ran_cc$'(Intercept)' < -12) ,] 
excess_rintercept # but does not contain row names?
#########
# see whats going on 
# we can create a two column dataframe showing random effect parameter and subject no.
# subjects 91, 97 and 100 missing as only single observations for them and no parameter estimate
as.integer(rownames(ran_cc)) # works misses out 91, 97 and 100 = subjects
dim(ran_cc) # list up to 97
ran2_cc <- cbind(ran_cc, as.integer(rownames(ran_cc)))
ran2_cc
# give the columns sensible names
colnames(ran2_cc) <- c( 'raneffect', 'subjectid')
ran2_cc  # show it
str(ran2_cc)

####################
# graphing the regressions for each subject based on the model
## stage one draw the points for each subject
 library (lattice)
xyplot( bdi ~ month | subject ) # does a simple one
## stage two obtain the coefficients from the model 
# and convert to a dataframe
vals1 <- coef(bb_rintercept)[["subject"]]
vals1 <- as.data.frame(vals1)
vals1 # now have a dataframe with columns representing names
names(vals1) # show names of the columns
str(vals1) # show the structure of the vals1 object

yslope = as.data.frame(cbind(vals1[,1],vals1[,3],  as.integer(rownames(vals1))))
# get rid of the parentheses in the name and give other columns names
colnames(yslope) <-c("intercept","month",  "subid")
str(yslope) 
# but this vector has 97 rows; needs 100 to coinside with the subject levels
# add another column with 1 to 100 
temp1 <- data.frame(subid= 1:100)
temp1
yslope <- merge(yslope, temp1 , by="subid", all = TRUE) 
yslope
yslope[is.na(yslope)] <- 0
yslope
##############################

#############################
# below shows regression line not taking into account bdi_pre 
# not shown in chapter. 
xyplot(bdi ~ month | subject, bb_long, aspect="xy", type = c("g", "p"), 
             coef.list = yslope[,2:3],
             panel = function(..., coef.list) 
 {
panel.xyplot(...)
panel.abline(as.numeric(coef.list[packet.number(),] ))
} )
##################
# Now adding the actual bdi_pre value to the intercept 
# to create correctly positioned line
# lattice.options(default.theme = modifyList(standard.theme(color = FALSE), list(strip.background = list(col = "transparent"))))
			 xyplot(bdi ~ month | subject, bb_long, aspect="xy", 
			 type = c("p"), pch=4, grid= FALSE, cex=.5,
			 par.strip.text=list(cex=0.5),
             coef.list = yslope[,2:3], #provides raw intercept + slope coefficiants
             panel = function(x,y,..., coef.list) 
				{ 
				yadd <- 0.64 *y[1]  + (coef.list[packet.number(),1])
				xadd <- (coef.list[packet.number(),2])
                 panel.xyplot(x,y,...)
                 panel.abline(a = yadd[1], b= xadd )
				 panel.text(5, 40, paste(y , "\n ", round(yadd[1],1)), cex = .5, font = 1) 
				 # shows bdi_pre
				}
			)
# above produces graph but gives a warning error:
# Error in x$y1 : REAL() can only be applied to a 'numeric', not a 'NULL'			
# alternative to above
			 xyplot(bdi ~ month | subject, bb_long, aspect="xy", 
			 type = c("p"), pch=4, grid= FALSE, cex=.5,
			 par.strip.text=list(cex=0.5),
             coef.list = yslope[,2:3], #provides raw intercept + slope coefficiants
             panel = function(x,y,..., coef.list) 
				{ 
				yadd <- 0.64 *y[1]  + (coef.list[packet.number(),1])
				xadd <- (coef.list[packet.number(),2])
                 panel.xyplot(x,y,...)
                 panel.abline(a = yadd, b= xadd )
				 panel.text(5, 40, paste(y , "\n ", round(yadd[1],1)), cex = .5, font = 1) 
				 # shows bdi_pre
				}
			)			
# the above does not give a warning error:

			# single plots  work:
xyplot(bdi ~ month, bb_long, aspect="fill", groups=subject,
grid= FALSE, cex=.5, ylim=c(-20, 60),
coef.list = yslope[,2:3], #provides raw intercept + slope coefficiants
	panel = function(x,y,coef.list,    ...) { 
	panel.xyplot(x,y)
	panel.superpose(x, y, coef.list, ...,
	panel.groups = function(x,y, coef.list, group.number,...) {
		yadd <- 0.64 *y[1] + (coef.list[group.number,1])
		xadd <- (coef.list[group.number,2])
		alpha =0.6
		# panel.text(5,300 - 3*group.number, group.number, cex=.5)
		panel.abline(a = yadd, b= xadd )
		} #end panel groups function
		) # end panel groups function
	} # end panel function
) # end xyplot			


xyplot(bdi ~ month | treat + drug , bb_long, aspect="fill", groups=subject,
grid= FALSE, cex=.5, ylim=c(-20, 60),
coef.list = yslope[,2:3], #provides raw intercept + slope coefficiants
	panel = function(x,y,coef.list,    ...) { 
	panel.xyplot(x,y)
	panel.superpose(x, y, coef.list, ...,
	panel.groups = function(x,y, coef.list, group.number,...) {
		yadd <- 0.64 *y[1] + (coef.list[group.number,1])
		xadd <- (coef.list[group.number,2])
		alpha =0.6
		# panel.text(5,300 - 3*group.number, group.number, cex=.5)
		panel.abline(a = yadd, b= xadd )
		} #end panel groups function
		) # end panel groups function
	} # end panel function
) # end xyplot	
#################
# end of section 54.10 (page 313)
# continues in repeated_measures_mixed_models_gee_pt2