## ----setup, include=FALSE------------------------------------------------
knitr::opts_chunk$set(echo = TRUE,
                      warning = FALSE,
                      message = FALSE)

## ----eval = FALSE--------------------------------------------------------
#  install.packages("cvequality")

## ------------------------------------------------------------------------
# read in the data, we obtained this from the HistData package
GaltonFamilies <- read.csv("GaltonFamilies.csv")

library(knitr)
kable(head(GaltonFamilies), caption = "Preview of first few rows of the GaltonFamilies data")

## ------------------------------------------------------------------------
library(ggplot2)
library(ggbeeswarm)
ggplot(GaltonFamilies, 
       aes(gender, 
           childHeight)) +
  geom_boxplot() +
  geom_quasirandom(alpha = 0.05) +
  theme_bw()

## ------------------------------------------------------------------------
# test for difference in CVs between boys and girls
library(cvequality)
child_height_by_gender_cv_test <- 
with(GaltonFamilies, 
     asymptotic_test(childHeight,
                     gender))

## ------------------------------------------------------------------------

child_height_by_gender_cv_test_MSLRT <- 
with(GaltonFamilies, 
     mslr_test(nr = 1e4, 
               childHeight,
               gender))

## ---- echo = FALSE-------------------------------------------------------
# make a table to display the results
child_test_summary <- 
  data.frame(`Test name` = c("asymptotic", 
                           "M-SLRT"),
             `Test statistic` = c(child_height_by_gender_cv_test$D_AD,
                                  child_height_by_gender_cv_test_MSLRT$MLRT),
             `p-value` = c(child_height_by_gender_cv_test$p_value,
                                  child_height_by_gender_cv_test_MSLRT$p_value),
             check.names = FALSE)

kable(child_test_summary, 
      caption =  "Summary of tests on the equality of CVs in child heights by gender")

## ------------------------------------------------------------------------
# read in the data, we obtained this from the archdata package
handaxes <- read.csv("Handaxes.csv")

# take a quick look
kable(head(handaxes), 
      caption = "Preview of first few rows of the handaxes data")

## ------------------------------------------------------------------------
library(dplyr)
library(tidyr)
handaxes_reshape <- 
  handaxes %>% 
  select(c(L, L1, B, B1, B2, T, T1)) %>% 
  gather(variable, value) %>% 
  na.omit

# take a quick look
kable(head(handaxes_reshape), 
      caption = "Preview of first few rows of the handaxes data")

## ------------------------------------------------------------------------
ggplot(handaxes_reshape, 
       aes(variable, 
           value)) +
  geom_boxplot() +
  geom_quasirandom(alpha = 0.02) +
  theme_bw()

## ------------------------------------------------------------------------
handaxes_asymptotic_test <- 
with(handaxes_reshape, 
     asymptotic_test(value, 
                     variable))

handaxes_mlrt_test <- 
with(handaxes_reshape, 
     mslr_test(nr = 1e4, 
               value, 
               variable))

## ---- echo = FALSE-------------------------------------------------------
# make a table to display the results
handaxe_test_summary <- 
  data.frame(`Test name` = c("asymptotic", 
                           "M-SLRT"),
             `Test statistic` = c(handaxes_asymptotic_test$D_AD,
                                  handaxes_mlrt_test$MLRT),
             `p-value` = c(handaxes_asymptotic_test$p_value,
                           handaxes_mlrt_test$p_value),
             check.names = FALSE)

kable(handaxe_test_summary, caption =  "Summary of tests on the equality of CVs in measurements of different variables of handaxe size")

## ------------------------------------------------------------------------
miller <- data.frame(test = c('ELISA', 'WEHI', '`Viral inhibition`'),
                     Mean = c(6.8, 8.5, 6.0),
                     CV =   c(0.090, 0.462, 0.340),
                     N =    c(5, 5, 5))

## ------------------------------------------------------------------------
# compute SD from mean and cv
miller$SD <- with(miller, CV * Mean)

## ------------------------------------------------------------------------
ggplot(miller,
       aes(test,
           Mean)) +
  # points to show mean values
  geom_point(size = 4) +
  # lines to show standard deviations
  geom_linerange(aes(ymin = Mean - SD,
                     ymax = Mean + SD)) +
  theme_bw()

## ------------------------------------------------------------------------
miller_asymptotic_test2 <- 
asymptotic_test2(k = nrow(miller), 
                 n = miller$N, 
                 s = miller$SD, 
                 x = miller$Mean)


## ------------------------------------------------------------------------
miller_mlrt_test2 <-
mslr_test2(nr = 1e4, 
           n = miller$N, 
           s = miller$SD, 
           x = miller$Mean)


## ------------------------------------------------------------------------
set.seed(42)
df <- data.frame(x = c(rnorm(20, 5, 2), 
                       rnorm(20, 5, 4)), 
                 y = c(rep(1, 20), 
                       rep(2, 20)))

## ------------------------------------------------------------------------
set.seed(42)
mslr_test(nr = 10000, x = df$x, y = df$y)

## ------------------------------------------------------------------------
set.seed(42)
mslr_test(nr = 10000, x = df$x, y = df$y)

## ------------------------------------------------------------------------
# repeat it n times to see how the results vary
n <- 10
reps <- replicate(n, {set.seed(42); mslr_test(nr = 10000, x = df$x, y = df$y)})

# take a look
stat <- unlist(reps[seq(1, length(reps), 2)])
pvals <- unlist(reps[seq(2, length(reps), 2)])

how_many_unique_values <- data.frame(unique_stat_values = length(unique(stat)),
                                     unique_p_values = length(unique(pvals)))
kable(how_many_unique_values)