# chapter: Comparing several independent categories: Contingency tables # taking the first table as an example of how you would do it the long way round: # doing it from scratch cell34 <- (31-26.9)^2/26.9 cell34 cell33 <- (7-5.3)^2/5.3 cell33 cell32 <- (9-12.1)^2/12.1 cell32 cell31 <- (28-30.8)^2/30.8 cell31 cell24 <- (31-37.7)^2/37.7 cell24 cell23 <- (8-7.4)^2/7.4 cell23 cell22 <- (22-16.9)^2/16.9 cell22 cell21 <- (44-43.1)^2/43.1 cell21 cell14 <- (476-473.4)^2/473.4 cell14 cell13 <- (90-92.4)^2/92.4 cell13 cell12 <- (211-213.0)^2/213.0 cell12 cell11 <- (543-541.2)^2/541.2 cell11 totalchi <- cell34+cell33+cell32+cell31+cell24+cell23+cell22+cell21+cell14+cell13+cell12+cell11 totalchi # to get the p=value consider area to right of curve 1- pchisq(totalchi, df=6) # chi square with counts # using c=column, row=row # first column = 144, 96, 240 #second column = 160, 80, 240 # you do not need to enter the totals columns thedata <- matrix( c(144, 96, 160, 80), nrow= 2, ncol=2, dimnames = list(outcome = c("improved at 5 days", "NO improvement at 5 days"), antibiotic =c("Amoxicillin", "Erythromycin"))) thedata # get a printout of the data result <- chisq.test(thedata) result$observed result$expected result$residual d<- matrix ( c(31, 7, 9, 28, 31, 8, 22, 44, 476, 90, 211, 543), ncol=4, byrow=TRUE) dimnames(d) = list(condition = c("absent", "mild", "severe"), blood_group =c("A", "B", "AB", "O")) d d<- matrix ( c(31, 7, 9, 28, 31, 8, 22, 44, 476, 90, 211, 543), ncol=4, byrow=TRUE) col_names <- c("O", "AB", "B", "A") row_names<- c( "severe", "mild", "absent") dimnames(d) <- list(condition = row_names, blood_group = col_names) d result <- chisq.test(d) result$observed result$expected result$residual result$residual^2 sum(result$residual^2) ## also calculate a p value by simulation (better technique if expected cell count is < 5) result<- chisq.test(d, simulate.p.value = TRUE, B= 10000) result ############### # Section 5 larger tables # matrix( c(col 1 data, col 2 data etc), nrow = 3, ncol=4) # define the names for each level # name the rows and then the columns, give the severe group a very high proportion of blood group A # and also the severe group have a very low proportion of blood group O # If you don't have the vcd package installed un-comment the next line # install.packages("vcd") library(vcd) thedata<-matrix(c(543, 44, 58, 211, 22, 9, 90, 8, 7, 476, 31, 1), nrow=3, ncol=4, dimnames = list(condition= c("absent", "mild", "severe"), blood_group =c("A" , "B", "AB", "O"))) # get a print of the data thedata assoc(thedata, main = "blood group and illness", shade = TRUE) assoc(thedata, gp = shading_max) ## ########### alternatives using raw data mydataframe <- read.delim("http://www.robin-beaumont.co.uk/virtualclassroom/book2data/chiq1_daniel_bg_condition.dat", header=TRUE) names(mydataframe) mydataframe myresult <- chisq.test(table(mydataframe), correct=FALSE) myresult myresult$observed myresult$expected myresult$residual library(vcd) mosaic(table(mydataframe),residuals_type = "pearson", gp = shading_Friendly ) # Also from the noncentral effect size chapter: ###### cohens w = sqrt(chisquare)/n)) w_from_chi = sqrt((myresult$statistic)/1500) w_from_chi ###########