# chapter: Comparing several independent categories: Contingency tables
# taking the first table as an example of how you would do it the long way round:
# doing it from scratch
cell34 <- (31-26.9)^2/26.9
cell34 
cell33 <- (7-5.3)^2/5.3
cell33
cell32 <- (9-12.1)^2/12.1
cell32
cell31 <- (28-30.8)^2/30.8
cell31
cell24 <- (31-37.7)^2/37.7
cell24
cell23 <- (8-7.4)^2/7.4
cell23
cell22 <- (22-16.9)^2/16.9
cell22
cell21 <- (44-43.1)^2/43.1
cell21
cell14 <- (476-473.4)^2/473.4
cell14
cell13 <- (90-92.4)^2/92.4
cell13
cell12 <- (211-213.0)^2/213.0
cell12
cell11 <- (543-541.2)^2/541.2
 cell11
 totalchi <- cell34+cell33+cell32+cell31+cell24+cell23+cell22+cell21+cell14+cell13+cell12+cell11
  totalchi
# to get the p=value consider area to right of curve 
1- pchisq(totalchi, df=6)  

# chi square with counts
# using c=column, row=row
# first column = 144, 96, 240
#second column = 160, 80, 240
# you do not need to enter the totals columns
thedata <- matrix( c(144, 96, 160, 80), nrow= 2, ncol=2,  
dimnames = list(outcome = c("improved at 5 days", "NO improvement at 5 days"),  antibiotic =c("Amoxicillin", "Erythromycin")))
thedata # get a printout of the data
result <- chisq.test(thedata)
result$observed
result$expected
result$residual



d<- matrix ( c(31, 7, 9, 28, 31, 8, 22, 44, 476, 90, 211, 543), ncol=4, byrow=TRUE)
dimnames(d) = list(condition = c("absent", "mild", "severe"), blood_group =c("A", "B", "AB", "O"))
d

d<- matrix ( c(31, 7, 9, 28, 31, 8, 22, 44, 476, 90, 211, 543), ncol=4, byrow=TRUE)
col_names <- c("O", "AB", "B", "A")
row_names<- c( "severe", "mild", "absent")
dimnames(d) <- list(condition = row_names, blood_group = col_names)
d
result <- chisq.test(d)
result$observed
result$expected
result$residual
result$residual^2
sum(result$residual^2)
## also calculate a p value by simulation (better technique if expected cell count is < 5)
result<- chisq.test(d, simulate.p.value = TRUE, B= 10000)
result

###############
# Section 5 larger tables

# matrix( c(col 1 data, col 2 data etc),  nrow = 3, ncol=4) 
# define the names for each level
# name the rows and then the columns, give the severe group a very high proportion of blood group A
# and also the severe group have a very low proportion of blood group O
# If you don't have the vcd package installed un-comment the next line
# install.packages("vcd")
library(vcd)
thedata<-matrix(c(543, 44, 58, 211, 22, 9, 90, 8, 7, 476, 31, 1), nrow=3, ncol=4, 
dimnames = list(condition= c("absent", "mild", "severe"), blood_group =c("A"  , "B", "AB", "O")))
# get a print of the data 
thedata
assoc(thedata, main = "blood group and illness", shade = TRUE)
assoc(thedata, gp = shading_max)
##

########### alternatives using raw data
mydataframe <- read.delim("http://www.robin-beaumont.co.uk/virtualclassroom/book2data/chiq1_daniel_bg_condition.dat", header=TRUE)
names(mydataframe)
mydataframe
myresult <- chisq.test(table(mydataframe), correct=FALSE) 
myresult
myresult$observed
myresult$expected
myresult$residual
library(vcd)
mosaic(table(mydataframe),residuals_type = "pearson", gp = shading_Friendly )

# Also from the noncentral effect size chapter:
###### cohens w = sqrt(chisquare)/n))
w_from_chi = sqrt((myresult$statistic)/1500)
w_from_chi

###########