# R code for chapter 46 creating datasets and distributions in r commander and r
# Book details:
# http://www.amazon.co.uk/Health-Science-Statistics-using-Commander/dp/190790431X
#############
###############
###
# example of using NCStats to expand table data

# but first an example from the epitool package

names(mydataframe)
tab2<- table(count=mydataframe$count, cat=mydataframe$cat)
tab2
expand.table(as.table(tab2))

#####################
##  MSCStats
# have dependencies
#because it is on the rforge site
# does not seem to pick them up automatically
########
install.packages("sciplot", dependencies=TRUE)
install.packages("plotrix", dependencies=TRUE) 
install.packages("gplots", dependencies=TRUE) 
install.packages("TeachingDemos", dependencies=TRUE) 
install.packages("NCStats",,"http://www.rforge.net/",dependencies=TRUE) 
 library(TeachingDemos)
library(gplots)
library(plotrix)
library(sciplot)
library(NCStats)

# works in NCStat
###################
# produces a single column with 328 value 1's and 1033 value 2's as factor levels

d<- matrix( c(328, 1033), nrow=2, byrow=TRUE)
colnames(d) <- "count"
rownames(d) <- c("value1", "value2")
rawd <- expandTable (d, "thevalue")
rawd  

###################
# produces a single column with 328 1's and 1033 2's as numbers

d<- matrix( c(328, 1033), nrow=2, byrow=TRUE)
rownames(d) <- c(1, 2)  # for factor levels use "factorlevel1" etc
rawd <- expandTable (d, "value")  
rawd  
############

# more complex example 4 groups smoker incidence
library(NCStats)
d<- matrix ( c( 83, 3, 90, 3, 129, 7, 70, 12), nrow = 4, byrow=TRUE)
colnames(d) <- c("smoker", "nonsmoker")
rownames(d)<- c( "g1", "g2", "g3", "g4")
rawd<- expandTable(d, c("group", "smoker status")) #  rownames; colnames
rawd
#################
# more complex example condition by blood group (Daniel example)
library(NCStats)
d<- matrix ( c(31, 7, 9, 28, 31, 8, 
22, 44, 476, 90, 211, 543), ncol=4, byrow=TRUE)
d
colnames(d) <- c("O", "AB", "B", "A")
rownames(d)<- c( "severe", "mild", "absent")
rawd<- expandTable(d, c("condition", "blood_group")) #  rownames; colnames

#################
# Cohen's 1960 paper introducing the Kappa measure of agreement between raters, 
# used in the Levels of agreement chapter:
d<- matrix ( c( 88,14,18, 10,40,10, 2,6,12), nrow = 3, byrow=TRUE)
colnames(d) <- c("schizophrenic", "neurotic", "brain damage")
rownames(d)<- c( "schizophrenic", "neurotic", "brain damage")
rawd<- expandTable(d, c("judge A", "judge B")) #  rownames; colnames
rawd
write.table(rawd, "cohen_1960.dat", sep="\t", row.names=FALSE)

############################
df <- expand.table(d)
df
## Another Chi-square test example from chisq.test
x <- matrix(c(12, 5, 7, 7), ncol = 2)
chi2.ex <- chisq.test(x)
chi2.ex
plot(chi2.ex)

############################
# source code for the expandTable() from the NCStats package:
expandTable_RB <- function (x, var.names = NULL, ...) 
{
    Freq <- NULL
    nr <- nrow(x)
    nc <- ncol(x)
    if (nr == 1 | nc == 1) {
        if (nr == 1) 
            x <- t(x)
        df <- data.frame(rep(rownames(x), x))
        if (length(var.names) > 1) 
            stop("Too many var.names given.", call. = FALSE)
        names(df) <- var.names
    }
    else {
        x <- as.data.frame.table(x)
        df <- sapply(1:nrow(x), function(i) x[rep(i, each = x[i, 
            "Freq"]), ], simplify = FALSE)
        df <- subset(do.call("rbind", df), select = -Freq)
        for (i in 1:ncol(df)) {
            df[[i]] <- type.convert(as.character(df[[i]]), ...)
        }
        rownames(df) <- NULL
        if (!is.null(var.names)) {
            if (length(var.names) < 2) 
                stop("Too few var.names given.", call. = FALSE)
            else if (length(var.names) > 2) 
                stop("Too many var.names given.", call. = FALSE)
            else names(df) <- var.names
        }
    }
    df
}


###############   end