desctab by Daisung Jang

desctab is a function that generates descriptive statistics tables in close-to-APA-format.

Given a dataframe, desctab selects the numeric variables, and then generates an excel file that contains Ns, means, and SDs for each variable as well as a correlation matrix. It also retains the table as an object in R.

Suggestions for improvement are welcome:

desctab <- function(x) {
    # borrows heavily from the corstarsl function
    # http://myowelt.blogspot.com/2008/04/beautiful-correlation-tables-in-r.html
    
    x <- Filter(is.numeric, x)
    
    require(Hmisc)
    require(openxlsx)
    x <- as.matrix(x)
    R <- rcorr(x)$r
    p <- rcorr(x)$P
    
    ## define notions for significance levels; spacing is important.
    mystars <- ifelse(p < 0.01, "** ", ifelse(p < 0.05, "* ", ifelse(p < 0.1, "† ", 
        " ")))
    ## trunctuate the matrix that holds the correlations to two decimal
    R <- format(round(cbind(rep(-1.11, ncol(x)), R), 2))[, -1]
    
    ## build a new matrix that includes the correlations with their apropriate stars
    Rnew <- matrix(paste(R, mystars, sep = ""), ncol = ncol(x))
    diag(Rnew) <- paste(diag(R), " ", sep = "")
    rownames(Rnew) <- colnames(x)
    colnames(Rnew) <- paste(colnames(x), "", sep = "")
    
    ## remove upper triangle
    Rnew <- as.matrix(Rnew)
    Rnew[upper.tri(Rnew, diag = TRUE)] <- ""
    Rnew <- as.data.frame(Rnew)
    
    ## create two versions of the table, one with just stars and another with just
    ## numbers
    Rnew <- cbind(Rnew[1:length(Rnew) - 1])
    temptab_star <- sapply(Rnew, function(x) gsub("[^\\*|^\\†]+", "", x))
    temptab_num <- sapply(Rnew, function(x) gsub("[^-|0-9|\\.|\\s]+", "", x))
    
    # create indexes for the desired order
    index <- order(c(1:ncol(temptab_num), 1:ncol(temptab_star)))  ## taken from here: https://stackoverflow.com/questions/24576548/merge-interleave-dataframes-in-r
    
    # cbind, interleaving columns
    cortab <- cbind(temptab_num, temptab_star)[, index]
    
    # generate column numbers
    colnum <- paste("(", seq(1:length(temptab_num[1, ])), ")", sep = "")
    
    colnames(cortab) <- c(rbind(colnum, rep("", ncol(temptab_star))))
    
    # Create a summary table and write to an Excel file, adapted from:
    # https://stackoverflow.com/questions/50471794/sapply-retain-column-names
    
    sfsum = function(x) {
        mean = round(mean(x, na.rm = T), 2)
        sd = round(sd(x, na.rm = T), 2)
        n = sum(!is.na(x))
        return(c(N = n, Mean = mean, SD = sd))  #For column names
    }
    
    # Combine the two tables and add some useful notes.
    
    tab1 <- t(sapply(as.data.frame(x), sfsum, USE.NAMES = TRUE))  #USE.NAMES = TRUE to get names on top
    
    tab2 <- cbind(tab1, cortab)
    
    tab2 <- as.data.frame(tab2)
    
    tab2$Variables <- rownames(tab2)
    
    tab2 <- tab2[, c(which(colnames(tab2) == "Variables"), which(colnames(tab2) != 
        "Variables"))]
    
    varnum <- paste(seq.int(nrow(tab2)), ". ", sep = "")
    
    tab2$Variables <- paste(varnum, tab2$Variables, sep = "")
    
    number <- rcorr(x)$n
    
    tabmin <- min(number)
    
    tabmax <- max(number)
    
    ns <- paste("The N for the correlation table ranges from ", tabmin, " to ", tabmax, 
        ".", sep = "")
    
    note <- paste("Note. ", ns, " † p < .10, * p < .05, ** p < .01.", sep = "")
    
    noteline <- cbind(note, t(rep(NA, (ncol(tab2) - 1))))
    
    colnames(noteline) <- colnames(tab2)
    
    tab2 <- (rbind(tab2, noteline))
    
    write.xlsx(tab2, "descriptives.xlsx", sheetName = "descriptives", row.names = F)
    
    wb <- loadWorkbook("descriptives.xlsx")
    
    l1 <- seq(5, ncol(tab2), by = 2)
    
    for (i in l1) {
        j = i + 1
        mergeCells(wb, 1, cols = i:j, rows = 1)
    }
    
    saveWorkbook(wb, "descriptives.xlsx", overwrite = TRUE)
    
    rownames(tab2) <- c()
    
    excelcolnums <- c(rbind(colnum, rep("", ncol(temptab_star))))
    
    colnames(tab2) <- c("Variables", "N", "Mean", "SD", excelcolnums)
    
    return(tab2)
}
library(knitr)
data("iris")

descriptives <- desctab(iris)
## Loading required package: Hmisc
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Loading required package: openxlsx
kable(descriptives)
Variables N Mean SD (1) (2) (3)
1. Sepal.Length 150 5.84 0.83
2. Sepal.Width 150 3.06 0.44 -0.12
3. Petal.Length 150 3.76 1.77 0.87 ** -0.43 **
4. Petal.Width 150 1.2 0.76 0.82 ** -0.37 ** 0.96 **
Note. The N for the correlation table ranges from 150 to 150. † p < .10, * p < .05, ** p < .01. NA NA NA NA NA NA NA NA NA