## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>") oldopts <- options(digits = 3, scipen = 999) print_num <- function(x) print(sprintf("%.3f", x)) ## ----install, eval = FALSE---------------------------------------------------- # install.packages("devtools") # devtools::install_github("queelius/algebraic.mle") ## ----setup, include = FALSE--------------------------------------------------- library(algebraic.dist) library(algebraic.mle) library(numDeriv) library(MASS) library(ggplot2) library(tibble) library(mvtnorm) set.seed(1234) ## ----------------------------------------------------------------------------- fit_normal <- function(data) { sigma <- function(data) { mean((data - mean(data))^2) } loglik <- function(par, data) { n <- length(data) -n / 2 * log(2 * pi * par[2]) - 1 / (2 * par[2]) * (sum(data^2) - 2 * par[1] * sum(data) + n * par[1]^2) } par.hat <- c(mu = mean(data), var = sigma(data)) H <- numDeriv::hessian(func = loglik, x = par.hat, data = data) algebraic.mle::mle( theta.hat = par.hat, loglike = loglik(par.hat, data), score = numDeriv::grad(func = loglik, x = par.hat, data = data), sigma = MASS::ginv(-H), info = -H, obs = NULL, nobs = length(data), superclasses = c("mle_normal")) } ## ----theta-samp-mc------------------------------------------------------------ theta_samp_mc <- function(n, theta, B = 10000) { mu <- theta[1] var <- theta[2] mles <- matrix(NA, nrow = B, ncol = 2) for (i in 1:B) { d <- rnorm(n, mean = mu, sd = sqrt(var)) mles[i, ] <- params(fit_normal(d)) } colnames(mles) <- c("mu", "var") mles } ## ----mc-samp, cache=TRUE------------------------------------------------------ # Set up the parameters of a simulation set.seed(913254) n <- 70 mu <- 1 var <- 1 B <- 1000 theta <- c(mu, var) mles <- theta_samp_mc(n = n, theta = theta, B = B) head(mles) ## ----fig.width=4, fig.height=4, echo=FALSE, fig.align="center", fig.cap="Sampling distribution of the MLEs.", warning=FALSE, cache=TRUE---- tmp <- colMeans(mles) # countour plot of the sampling distribution `mles` and with the point of the # true mean and variance, theta = (mu, var), shown. label it "true parameter" ggplot(as_tibble(mles), aes(x = mu, y = var)) + labs(x = expression(mu), y = expression(sigma^2)) + theme_bw() + geom_hline(yintercept = var) + geom_vline(xintercept = mu) + geom_point(aes(x = tmp[1], y = tmp[2]), color = "green") + geom_point(aes(x = mu, y = var), color = "red") + stat_density2d(aes(fill = ..level..), geom = "polygon", alpha = 0.2) + scale_fill_gradient(low = "blue", high = "red") ## ----------------------------------------------------------------------------- theta.mc <- algebraic.dist::empirical_dist(mles) ## ----------------------------------------------------------------------------- (mu.mc <- mean(theta.mc)) ## ----expectation-tests, cache = TRUE------------------------------------------ # should sum to 1 expectation(theta.mc, function(x) 1) # mean expectation(theta.mc, function(x) x) # variance of (mu, var) expectation(theta.mc, function(x) (x - mu.mc)^2) # kurtosis of (mu, var) expectation(theta.mc, function(x) (x - mu.mc)^4) / expectation(theta.mc, function(x) (x - mu.mc)^2)^2 # skewness of mu and var -- should be (0, 0) expectation(theta.mc, function(x) ((x - mu.mc) / theta)^3) # covariance of (mu, var) -- should be around 0 expectation(theta.mc, function(x) (x[1] - mu.mc[1]) * (x[2] - mu.mc[2])) ## ----------------------------------------------------------------------------- bias.mle_normal <- function(x, par = NULL, ...) { if (is.null(par)) { par <- params(x) } c(mu = 0, var = -(1 / nobs(x)) * par[2]) } ## ----bias-1------------------------------------------------------------------- # first, we sample some data from the true distribution data <- rnorm(n = n, mean = mu, sd = sqrt(var)) # now we fit it to the normal distribution theta.hat <- fit_normal(data) # now we compute the bias, first using the asymptotic theory bias(theta.hat, theta) # now using the empirical sampling distribution expectation(theta.mc, function(x) x - theta) # mean(theta.mc) - theta ## ----bias-code, cache = TRUE-------------------------------------------------- N <- 1000 ns <- seq(10, 500, 10) bias_var <- numeric(length(ns)) j <- 1 for (n in ns) { vars <- numeric(length(N)) for (i in 1:N) { d <- rnorm(n = n, mean = mu, sd = sqrt(var)) fit <- fit_normal(d) vars[i] <- params(fit)[2] } bias_var[j] <- mean(vars) - var j <- j + 1 } ## ----bias-plot, echo=FALSE, fig.height=4, fig.width=6------------------------- plot(ns, bias_var, type = "l", xlab = "Sample size", ylab = "Bias(Var)") # plot the asymptotic bias lines(ns, -(1 / ns) * var, lty = 2) ## ----------------------------------------------------------------------------- round(vcov(theta.hat), digits=3) round(vcov(theta.mc), digits=3) ## ----confint-1---------------------------------------------------------------- confint(theta.hat) ## ----covearge-prob, cache = TRUE---------------------------------------------- N <- 1000 ns <- c(seq(50, 200, 50), 300, 600, 1000) coverage_prob <- matrix(NA, nrow=length(ns), ncol=2) j <- 1 for (n in ns) { count1 <- 0L count2 <- 0L for (i in 1:N) { d <- rnorm(n = n, mean = mu, sd = sqrt(var)) fit <- fit_normal(d) ci <- confint(fit) if (ci[1, 1] <= mu && mu <= ci[1, 2]) { count1 <- count1 + 1 } if (ci[2, 1] <= var && var <= ci[2, 2]) { count2 <- count2 + 1 } } coverage_prob[j, 1] <- count1 / N coverage_prob[j, 2] <- count2 / N j <- j + 1 } ## ----cov-plot, echo = FALSE, fig.height = 4, fig.width = 5, cache = TRUE------ plot(ns, coverage_prob[, 2], type = "l", xlab = "Sample size", col = "blue", ylab = "Coverage") lines(ns, coverage_prob[, 1], col = "green") legend("bottomright", legend = c("mu", "var"), col = c("blue", "green"), lty = 1) abline(h = 0.95, lty = 2) ## ----mse-1-------------------------------------------------------------------- mse.hat <- mse(theta.hat, theta) mse.mc <- matrix(expectation(theta.mc, function(x) (x - theta) %*% t(x - theta)), nrow = 2) round(mse.hat, digits = 3) round(mse.mc, digits = 3) ## ----------------------------------------------------------------------------- # temporarily show more digits in the numbers/outputs for this code block op <- options(digits = 12) # mse(mu) expectation(theta.mc, function(x) (x[1] - mu)^2) # variance(mu) (mu.var <- expectation(theta.mc, function(x) (x[1] - mean(theta.mc)[1])^2)) b <- expectation(theta.mc, function(x) x[1] - mu) # mse = bias^2 + variance b^2 + mu.var options(op) ## ----mse-sample-size, cache = TRUE-------------------------------------------- ns <- seq(25, 200, 25) mses.mc <- matrix(NA, nrow = length(ns), ncol = 2) mses.hat <- matrix(NA, nrow = length(ns), ncol = 2) mses.hat.hat <- matrix(NA, nrow = length(ns), ncol = 2) j <- 1 for (n in ns) { theta.n <- empirical_dist(theta_samp_mc(n = n, theta = theta, B = B)) # Use mean() on the sampled distribution for MSE calculation samples <- obs(theta.n) mse.mu.n <- mean((samples[, 1] - mu)^2) mse.var.n <- mean((samples[, 2] - var)^2) data <- rnorm(n = n, mean = mu, sd = sqrt(var)) fit <- fit_normal(data) mses.mc[j, ] <- c(mse.mu.n, mse.var.n) mses.hat[j, ] <- diag(mse(fit, theta)) mses.hat.hat[j, ] <- diag(mse(fit)) j <- j + 1 } ## ----mse-plots, echo = FALSE, fig.height = 4, fig.width = 6------------------- plot(ns, mses.mc[, 1], type = "l", col = rgb(0,0,1,0.5), ylab = "MSE(mu)", xlab = "Sample size") # blue with transparency lines(ns, mses.hat[, 1], col = rgb(0,1,0,0.5)) # green with transparency lines(ns, mses.hat.hat[, 1], col = rgb(1,0,0,0.5)) # red with transparency legend("topright", legend = c("MC", "Asymptotic", "Asymptotic (estimate)"), col = c(rgb(0,0,1,0.5), rgb(0,1,0,0.5), rgb(1,0,0,0.5)), lty = 1) plot(ns, mses.mc[, 2], type = "l", col = rgb(0,0,1,0.5), ylab = "MSE(var)", xlab = "Sample size") # blue with transparency lines(ns, mses.hat[, 2], col = rgb(0,1,0,0.5)) # green with transparency lines(ns, mses.hat.hat[, 2], col = rgb(1,0,0,0.5)) # red with transparency legend("topright", legend = c("MC", "Asymptotic", "Asymptotic (estimate)"), col = c(rgb(0,0,1,0.5), rgb(0,1,0,0.5), rgb(1,0,0,0.5)), lty = 1) ## ----mle-boot, cache=TRUE----------------------------------------------------- # Simulate a sample of n observations from a normal with mean 1 and variance 2. library(boot) theta.boot <- mle_boot(boot( data = data, statistic = function(x, ind) { params(fit_normal(x[ind])) }, R = B)) ## ----------------------------------------------------------------------------- params(theta.boot) confint(theta.boot) ## ----------------------------------------------------------------------------- theta.b <- empirical_dist(theta.boot$t) ## ----expectation-tests-boot, cache = TRUE------------------------------------- # should sum to 1 expectation(theta.b, function(x) 1) # mean (mu.b <- mean(theta.b)) # variance of (mu, var) expectation(theta.b, function(x) (x - mu.b)^2) # kurtosis of (mu, var) expectation(theta.b, function(x) (x - mu.b)^4) / expectation(theta.b, function(x) (x - mu.b)^2)^2 # skewness of mu and var -- should be (0, 0) expectation(theta.b, function(x) ((x - mu.b) / theta)^3) # covariance of (mu, var) -- should be around 0 expectation(theta.b, function(x) (x[1] - mu.b[1]) * (x[2] - mu.b[2])) ## ----bias-2------------------------------------------------------------------- bias(theta.boot) expectation(theta.mc, function(x) x - theta) bias(theta.hat, theta) ## ----------------------------------------------------------------------------- round(vcov(theta.b), digits = 3) round(vcov(theta.mc), digits = 3) round(vcov(theta.hat), digits = 3) ## ----coverage-prob-boot, cache = TRUE----------------------------------------- N <- 100 ns <- c(50, 100) coverage_prob <- matrix(NA, nrow=length(ns), ncol=2) j <- 1 for (n in ns) { count1 <- 0L count2 <- 0L for (i in 1:N) { d <- rnorm(n = n, mean = mu, sd = sqrt(var)) fit.boot <- mle_boot(boot( data = d, statistic = function(x, ind) { params(fit_normal(x[ind])) }, R = 250)) ci <- confint(fit.boot) if (ci[1, 1] <= mu && mu <= ci[1, 2]) { count1 <- count1 + 1 } if (ci[2, 1] <= var && var <= ci[2, 2]) { count2 <- count2 + 1 } } coverage_prob[j, 1] <- count1 / N coverage_prob[j, 2] <- count2 / N #cat("n = ", n, ", coverage = ", coverage_prob[j, ], "\n") j <- j + 1 } ## ----cov-plot-boot, echo = FALSE, fig.height = 4, fig.width = 5, cache = TRUE---- plot(ns, coverage_prob[, 2], type = "l", xlab = "Sample size", col = "blue", ylab = "Coverage") lines(ns, coverage_prob[, 1], col = "green") legend("bottomright", legend = c("mu", "var"), col = c("blue", "green"), lty = 1) abline(h = 0.95, lty = 2) ## ----------------------------------------------------------------------------- samp <- function(n, par) rnorm(n = n, mean = par[1], sd = sqrt(par[2])) pred(x = theta.hat, samp = samp) ## ----------------------------------------------------------------------------- par <- params(theta.hat) mu.hat <- par[1] var.hat <- par[2] c(mu.hat, qnorm(c(.025,.975), mean = mu.hat, sd = sqrt(var.hat))) ## ----------------------------------------------------------------------------- N <- 100 r <- 5 samp <- rnorm(N, mean = theta[1], sd = sqrt(theta[2])) samp.sub <- matrix(samp, nrow = r) mles.sub <- list(length = r) for (i in 1:r) mles.sub[[i]] <- fit_normal(samp.sub[i,]) mle.wt <- mle_weighted(mles.sub) mle <- fit_normal(samp) ## ----------------------------------------------------------------------------- params(mle.wt) vcov(mle.wt) ## ----------------------------------------------------------------------------- params(mle) vcov(mle) ## ----include = FALSE---------------------------------------------------------- options(oldopts)