# R code for chapter 46 creating datasets and distributions in r commander and r # Book details: # http://www.amazon.co.uk/Health-Science-Statistics-using-Commander/dp/190790431X ############# ############### ### # example of using NCStats to expand table data # but first an example from the epitool package names(mydataframe) tab2<- table(count=mydataframe$count, cat=mydataframe$cat) tab2 expand.table(as.table(tab2)) ##################### ## MSCStats # have dependencies #because it is on the rforge site # does not seem to pick them up automatically ######## install.packages("sciplot", dependencies=TRUE) install.packages("plotrix", dependencies=TRUE) install.packages("gplots", dependencies=TRUE) install.packages("TeachingDemos", dependencies=TRUE) install.packages("NCStats",,"http://www.rforge.net/",dependencies=TRUE) library(TeachingDemos) library(gplots) library(plotrix) library(sciplot) library(NCStats) # works in NCStat ################### # produces a single column with 328 value 1's and 1033 value 2's as factor levels d<- matrix( c(328, 1033), nrow=2, byrow=TRUE) colnames(d) <- "count" rownames(d) <- c("value1", "value2") rawd <- expandTable (d, "thevalue") rawd ################### # produces a single column with 328 1's and 1033 2's as numbers d<- matrix( c(328, 1033), nrow=2, byrow=TRUE) rownames(d) <- c(1, 2) # for factor levels use "factorlevel1" etc rawd <- expandTable (d, "value") rawd ############ # more complex example 4 groups smoker incidence library(NCStats) d<- matrix ( c( 83, 3, 90, 3, 129, 7, 70, 12), nrow = 4, byrow=TRUE) colnames(d) <- c("smoker", "nonsmoker") rownames(d)<- c( "g1", "g2", "g3", "g4") rawd<- expandTable(d, c("group", "smoker status")) # rownames; colnames rawd ################# # more complex example condition by blood group (Daniel example) library(NCStats) d<- matrix ( c(31, 7, 9, 28, 31, 8, 22, 44, 476, 90, 211, 543), ncol=4, byrow=TRUE) d colnames(d) <- c("O", "AB", "B", "A") rownames(d)<- c( "severe", "mild", "absent") rawd<- expandTable(d, c("condition", "blood_group")) # rownames; colnames ################# # Cohen's 1960 paper introducing the Kappa measure of agreement between raters, # used in the Levels of agreement chapter: d<- matrix ( c( 88,14,18, 10,40,10, 2,6,12), nrow = 3, byrow=TRUE) colnames(d) <- c("schizophrenic", "neurotic", "brain damage") rownames(d)<- c( "schizophrenic", "neurotic", "brain damage") rawd<- expandTable(d, c("judge A", "judge B")) # rownames; colnames rawd write.table(rawd, "cohen_1960.dat", sep="\t", row.names=FALSE) ############################ df <- expand.table(d) df ## Another Chi-square test example from chisq.test x <- matrix(c(12, 5, 7, 7), ncol = 2) chi2.ex <- chisq.test(x) chi2.ex plot(chi2.ex) ############################ # source code for the expandTable() from the NCStats package: expandTable_RB <- function (x, var.names = NULL, ...) { Freq <- NULL nr <- nrow(x) nc <- ncol(x) if (nr == 1 | nc == 1) { if (nr == 1) x <- t(x) df <- data.frame(rep(rownames(x), x)) if (length(var.names) > 1) stop("Too many var.names given.", call. = FALSE) names(df) <- var.names } else { x <- as.data.frame.table(x) df <- sapply(1:nrow(x), function(i) x[rep(i, each = x[i, "Freq"]), ], simplify = FALSE) df <- subset(do.call("rbind", df), select = -Freq) for (i in 1:ncol(df)) { df[[i]] <- type.convert(as.character(df[[i]]), ...) } rownames(df) <- NULL if (!is.null(var.names)) { if (length(var.names) < 2) stop("Too few var.names given.", call. = FALSE) else if (length(var.names) > 2) stop("Too many var.names given.", call. = FALSE) else names(df) <- var.names } } df } ############### end