## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 8, fig.height = 5, warning = FALSE, message = FALSE ) ## ----libraries---------------------------------------------------------------- library(FFdownload) library(dplyr) library(tidyr) library(purrr) library(ggplot2) ## ----download, cache=TRUE----------------------------------------------------- outd <- file.path(tempdir(), paste0("ffap_", format(Sys.time(), "%H%M%S"))) outf <- tempfile(fileext = ".RData") inputlist <- c( "25_Portfolios_5x5", "100_Portfolios_10x10", "F-F_Research_Data_Factors", "F-F_Research_Data_5_Factors_2x3", "F-F_Momentum_Factor" ) FFdata <- FFdownload( output_file = outf, tempd = outd, exclude_daily = TRUE, download = TRUE, download_only = FALSE, inputlist = inputlist, format = "tbl", na_values = c(-99, -999, -99.99), return_data = TRUE ) ## ----factors------------------------------------------------------------------ # FF5 factors: Mkt.RF, SMB, HML, RMW, CMA, RF (start: Jul 1963) ff5_raw <- FFdata$`x_F-F_Research_Data_5_Factors_2x3`$monthly$Temp2 # FF3 factors back to Jul 1926 (for time-series tests) ff3_raw <- FFdata$`x_F-F_Research_Data_Factors`$monthly$Temp2 # Momentum factor mom_raw <- FFdata$`x_F-F_Momentum_Factor`$monthly$Temp2 # Combined factor table (full history where all factors are available) factors <- ff5_raw |> select(date, Mkt.RF, SMB, HML, RMW, CMA, RF) |> left_join(mom_raw |> select(date, Mom), by = "date") |> filter(!is.na(Mom)) # Mom starts Jan 1927; FF5 starts Jul 1963 → ~Jul 1963 # Factor table with FF3 history (no RMW/CMA/Mom) factors_ff3hist <- ff3_raw |> select(date, Mkt.RF, SMB, HML, RF) cat("Factor sample:", format(min(factors$date)), "to", format(max(factors$date)), "—", nrow(factors), "months\n") cat("FF3 history :", format(min(factors_ff3hist$date)), "to", format(max(factors_ff3hist$date)), "—", nrow(factors_ff3hist), "months\n") ## ----portfolios--------------------------------------------------------------- # Helper: find the value-weighted returns sub-table by name pattern find_vw <- function(monthly_list) { nm <- names(monthly_list) vw_nm <- grep("value_weight", nm, ignore.case = TRUE, value = TRUE) if (length(vw_nm) == 0) stop("Cannot find value-weighted sub-table. Available: ", paste(nm, collapse = ", ")) monthly_list[[vw_nm[1]]] } # 25 portfolios pf25_vw <- find_vw(FFdata$x_25_Portfolios_5x5$monthly) pf25_cols <- setdiff(names(pf25_vw), "date") # 25 portfolio column names stopifnot(length(pf25_cols) == 25) # 100 portfolios pf100_vw <- find_vw(FFdata$x_100_Portfolios_10x10$monthly) pf100_cols <- setdiff(names(pf100_vw), "date") # 100 portfolio column names stopifnot(length(pf100_cols) == 100) cat("25-pf column names (first 6):", paste(head(pf25_cols, 6), collapse=", "), "\n") ## ----panels------------------------------------------------------------------- # Long panel for 25 portfolios — join with FF3 history pf25_long <- pf25_vw |> pivot_longer(-date, names_to = "portfolio", values_to = "ret") |> left_join(factors_ff3hist, by = "date") |> mutate( ret_excess = ret - RF, # Position-based size (S1=small → S5=large) and BM (B1=growth → B5=value) labels pf_pos = as.integer(factor(portfolio, levels = pf25_cols)), size_q = ceiling(pf_pos / 5), bm_q = ((pf_pos - 1) %% 5) + 1, size_lbl = factor(size_q, labels = c("Small","S2","S3","S4","Large")), bm_lbl = factor(bm_q, labels = c("Growth","B2","B3","B4","Value")) ) |> filter(!is.na(ret_excess), !is.na(Mkt.RF)) # Long panel for 100 portfolios — join with FF5+Mom (1963+) pf100_long <- pf100_vw |> pivot_longer(-date, names_to = "portfolio", values_to = "ret") |> left_join(factors, by = "date") |> mutate( ret_excess = ret - RF, pf_pos = as.integer(factor(portfolio, levels = pf100_cols)), size_q = ceiling(pf_pos / 10), bm_q = ((pf_pos - 1) %% 10) + 1 ) |> filter(!is.na(ret_excess), !is.na(Mom)) # FF5+Mom sample (Jul 1963+) cat("25-pf panel: ", nrow(pf25_long), "rows,", n_distinct(pf25_long$portfolio), "portfolios\n") cat("100-pf panel:", nrow(pf100_long), "rows,", n_distinct(pf100_long$portfolio), "portfolios\n") ## ----ts_capm------------------------------------------------------------------ ts_capm <- pf25_long |> nest_by(portfolio, size_q, bm_q, size_lbl, bm_lbl) |> mutate( fit = list(lm(ret_excess ~ Mkt.RF, data = data)), alpha = coef(fit)[["(Intercept)"]], alpha_se = summary(fit)$coefficients["(Intercept)", "Std. Error"], alpha_t = summary(fit)$coefficients["(Intercept)", "t value"], beta_mkt = coef(fit)[["Mkt.RF"]], r2 = summary(fit)$r.squared, n_obs = nrow(data) ) |> ungroup() |> select(-data, -fit) cat("CAPM — mean |alpha|:", round(mean(abs(ts_capm$alpha)), 3), "%/month\n") cat(" significant alphas (|t| > 2):", sum(abs(ts_capm$alpha_t) > 2), "of", nrow(ts_capm), "\n") ## ----plot_capm_alpha, fig.height=4-------------------------------------------- ggplot(ts_capm, aes(x = bm_lbl, y = size_lbl, fill = alpha)) + geom_tile(colour = "white", linewidth = 0.5) + geom_text(aes(label = sprintf("%.2f\n(%.1f)", alpha, alpha_t)), size = 2.8, lineheight = 0.9) + scale_fill_gradient2(low = "#4575b4", mid = "white", high = "#d73027", midpoint = 0, name = "α (%/mo)") + scale_x_discrete(position = "top") + labs(title = "CAPM Alphas — 25 Size×BM Portfolios", subtitle = "Cell: alpha (t-stat); sample: Jul 1926 – present", x = "Book-to-Market Quintile", y = "Size Quintile") + theme_minimal(base_size = 11) + theme(axis.text.x = element_text(angle = 0)) ## ----ts_ff3------------------------------------------------------------------- ts_ff3 <- pf25_long |> nest_by(portfolio, size_q, bm_q, size_lbl, bm_lbl) |> mutate( fit = list(lm(ret_excess ~ Mkt.RF + SMB + HML, data = data)), alpha = coef(fit)[["(Intercept)"]], alpha_se = summary(fit)$coefficients["(Intercept)", "Std. Error"], alpha_t = summary(fit)$coefficients["(Intercept)", "t value"], b_mkt = coef(fit)[["Mkt.RF"]], b_smb = coef(fit)[["SMB"]], b_hml = coef(fit)[["HML"]], r2 = summary(fit)$r.squared ) |> ungroup() |> select(-data, -fit) cat("FF3 — mean |alpha|:", round(mean(abs(ts_ff3$alpha)), 3), "%/month\n") cat(" significant alphas (|t| > 2):", sum(abs(ts_ff3$alpha_t) > 2), "of", nrow(ts_ff3), "\n") ## ----plot_ff3_alpha, fig.height=4--------------------------------------------- ggplot(ts_ff3, aes(x = bm_lbl, y = size_lbl, fill = alpha)) + geom_tile(colour = "white", linewidth = 0.5) + geom_text(aes(label = sprintf("%.2f\n(%.1f)", alpha, alpha_t)), size = 2.8, lineheight = 0.9) + scale_fill_gradient2(low = "#4575b4", mid = "white", high = "#d73027", midpoint = 0, name = "α (%/mo)") + scale_x_discrete(position = "top") + labs(title = "FF3 Alphas — 25 Size×BM Portfolios", subtitle = "Cell: alpha (t-stat); sample: Jul 1926 – present", x = "Book-to-Market Quintile", y = "Size Quintile") + theme_minimal(base_size = 11) ## ----plot_alpha_comparison---------------------------------------------------- bind_rows( ts_capm |> mutate(model = "CAPM"), ts_ff3 |> mutate(model = "FF3") ) |> mutate( sig = ifelse(abs(alpha_t) > 2, "p < 0.05", "n.s."), label = paste0("S", size_q, "B", bm_q) ) |> ggplot(aes(x = reorder(label, bm_q + (size_q - 1) * 5), y = alpha, colour = model, shape = sig)) + geom_hline(yintercept = 0, linetype = "dashed", colour = "grey50") + geom_errorbar(aes(ymin = alpha - 2 * alpha_se, ymax = alpha + 2 * alpha_se), width = 0.3, alpha = 0.5) + geom_point(size = 2.5) + scale_colour_manual(values = c(CAPM = "#e41a1c", FF3 = "#377eb8")) + scale_shape_manual(values = c("p < 0.05" = 16, "n.s." = 1)) + labs(title = "Pricing Errors: CAPM vs FF3", subtitle = "Portfolios ordered by BM within size quintile; bars = ±2 SE", x = "Portfolio (S = size, B = book-to-market quintile)", y = "Alpha (%/month)", colour = "Model", shape = "") + theme_bw(base_size = 10) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, size = 7)) ## ----grs_function------------------------------------------------------------- grs_test <- function(returns_xmat, factors_xmat) { # returns_xmat : T × N matrix of excess portfolio returns (no NAs) # factors_xmat : T × K matrix of factor excess returns T <- nrow(returns_xmat); N <- ncol(returns_xmat); K <- ncol(factors_xmat) stopifnot(T > N + K + 1) # OLS: Y = X*B + E where X = [1 | factors] X <- cbind(1, factors_xmat) B <- solve(crossprod(X), crossprod(X, returns_xmat)) # (K+1) × N E <- returns_xmat - X %*% B # T × N residuals alpha <- B[1, ] # N intercepts # MLE residual covariance Sigma <- crossprod(E) / T # Factor Sharpe ratio squared mu_f <- colMeans(factors_xmat) Omega <- crossprod(scale(factors_xmat, center = TRUE, scale = FALSE)) / T sr2 <- as.numeric(t(mu_f) %*% solve(Omega) %*% mu_f) # GRS statistic grs_stat <- ((T - N - K) / N) * as.numeric(t(alpha) %*% solve(Sigma) %*% alpha) / (1 + sr2) p_val <- pf(grs_stat, df1 = N, df2 = T - N - K, lower.tail = FALSE) list(GRS = round(grs_stat, 3), p_value = round(p_val, 4), df1 = N, df2 = T - N - K, mean_abs_alpha = round(mean(abs(alpha)), 4)) } ## ----grs_run------------------------------------------------------------------ # Build wide matrices aligned to the same dates pf25_wide <- pf25_long |> select(date, portfolio, ret_excess) |> pivot_wider(names_from = portfolio, values_from = ret_excess) # Merge factors on same dates and drop any rows with NAs pf25_merged <- pf25_wide |> left_join(factors_ff3hist |> select(date, Mkt.RF, SMB, HML), by = "date") |> drop_na() ret_mat_25 <- as.matrix(pf25_merged[, pf25_cols]) capm_f_mat <- as.matrix(pf25_merged[, "Mkt.RF", drop = FALSE]) ff3_f_mat <- as.matrix(pf25_merged[, c("Mkt.RF","SMB","HML")]) grs_capm <- grs_test(ret_mat_25, capm_f_mat) grs_ff3 <- grs_test(ret_mat_25, ff3_f_mat) tibble( Model = c("CAPM", "FF3"), `GRS statistic` = c(grs_capm$GRS, grs_ff3$GRS), `p-value` = c(grs_capm$p_value, grs_ff3$p_value), `F(N, T-N-K)` = c(sprintf("F(%d,%d)", grs_capm$df1, grs_capm$df2), sprintf("F(%d,%d)", grs_ff3$df1, grs_ff3$df2)), `Mean |α| (%/mo)` = c(grs_capm$mean_abs_alpha, grs_ff3$mean_abs_alpha) ) |> knitr::kable(digits = 4, align = "lrrrr", caption = "GRS Test — 25 Size×BM Portfolios") ## ----momentum----------------------------------------------------------------- # WML time series wml <- mom_raw |> select(date, Mom) |> left_join(factors_ff3hist, by = "date") |> filter(!is.na(Mom), !is.na(Mkt.RF)) # Average WML return with t-statistic wml_mean <- mean(wml$Mom) wml_t <- t.test(wml$Mom)$statistic # CAPM alpha for WML fit_wml_capm <- lm(Mom ~ Mkt.RF, data = wml) # FF3 alpha for WML fit_wml_ff3 <- lm(Mom ~ Mkt.RF + SMB + HML, data = wml) mom_summary <- tibble( Measure = c("Mean WML return", "CAPM alpha", "FF3 alpha"), `Estimate (%/mo)` = c(wml_mean, coef(fit_wml_capm)["(Intercept)"], coef(fit_wml_ff3) ["(Intercept)"]), `t-statistic` = c(wml_t, summary(fit_wml_capm)$coef["(Intercept)", "t value"], summary(fit_wml_ff3) $coef["(Intercept)", "t value"]) ) knitr::kable(mom_summary, digits = 3, align = "lrr", caption = "Momentum (WML) — Mean Return and Pricing Errors") ## ----momentum_plot, fig.height=4---------------------------------------------- # Rolling 5-year average WML return wml |> mutate(date_num = as.numeric(as.Date(date)), roll_mean = zoo::rollapply(Mom, width = 60, FUN = mean, fill = NA, align = "right")) |> ggplot(aes(x = as.Date(date))) + geom_col(aes(y = Mom), fill = "grey80", width = 20) + geom_line(aes(y = roll_mean), colour = "#e41a1c", linewidth = 1) + geom_hline(yintercept = 0, linetype = "dashed") + labs(title = "WML Momentum Factor", subtitle = "Bars = monthly return; red line = 60-month rolling average", x = NULL, y = "Return (%/month)") + theme_bw() ## ----fmb_helpers-------------------------------------------------------------- # Newey-West standard error for the mean of a time series nw_se <- function(x, lags = NULL) { x <- x[!is.na(x)] T <- length(x) if (is.null(lags)) lags <- floor(4 * (T / 100)^(2/9)) xd <- x - mean(x) v <- sum(xd^2) / T if (lags > 0) { for (l in seq_len(lags)) { v <- v + 2 * (1 - l / (lags + 1)) * sum(xd[(l+1):T] * xd[1:(T-l)]) / T } } sqrt(v / T) } # Full FMB procedure # returns_mat : T × N matrix of excess returns # factors_mat : T × K matrix of factors fmb <- function(returns_mat, factors_mat, factor_names = NULL) { T <- nrow(returns_mat); N <- ncol(returns_mat); K <- ncol(factors_mat) if (is.null(factor_names)) factor_names <- colnames(factors_mat) # ── First pass: full-sample OLS betas ─────────────────────────────────────── X_ts <- cbind(1, factors_mat) B_ts <- solve(crossprod(X_ts), crossprod(X_ts, returns_mat)) betas <- t(B_ts[-1, , drop = FALSE]) # N × K # ── Second pass: monthly cross-sectional regressions ──────────────────────── X_cs <- cbind(1, betas) # N × (K+1) gammas <- matrix(NA_real_, nrow = T, ncol = K + 1) for (t in seq_len(T)) { y_t <- returns_mat[t, ] ok <- !is.na(y_t) if (sum(ok) > K + 1) { gammas[t, ] <- lm.fit(X_cs[ok, , drop = FALSE], y_t[ok])$coefficients } } # ── Average lambdas + Newey-West t-stats ──────────────────────────────────── lambda_names <- c("Intercept", factor_names) lam_mean <- colMeans(gammas, na.rm = TRUE) lam_se <- apply(gammas, 2, nw_se) lam_t <- lam_mean / lam_se tibble( term = lambda_names, lambda = lam_mean, nw_se = lam_se, t_stat = lam_t, signif = case_when( abs(lam_t) > 3.29 ~ "***", abs(lam_t) > 2.58 ~ "**", abs(lam_t) > 1.96 ~ "*", TRUE ~ "" ) ) } ## ----fmb_data----------------------------------------------------------------- # Wide matrices aligned to the same dates (FF5+Mom sample, Jul 1963+) pf100_wide <- pf100_long |> select(date, portfolio, ret_excess) |> pivot_wider(names_from = portfolio, values_from = ret_excess) pf100_merged <- pf100_wide |> left_join(factors, by = "date") |> drop_na() ret100 <- as.matrix(pf100_merged[, pf100_cols]) f_capm <- as.matrix(pf100_merged[, "Mkt.RF", drop = FALSE]) f_ff3 <- as.matrix(pf100_merged[, c("Mkt.RF","SMB","HML")]) f_ff4 <- as.matrix(pf100_merged[, c("Mkt.RF","SMB","HML","Mom")]) f_ff5 <- as.matrix(pf100_merged[, c("Mkt.RF","SMB","HML","RMW","CMA")]) cat("FMB sample:", format(min(pf100_merged$date)), "to", format(max(pf100_merged$date)), "—", nrow(pf100_merged), "months\n") cat("100 portfolios x", ncol(ret100), "columns (check: should be 100)\n") ## ----fmb_run------------------------------------------------------------------ res_capm <- fmb(ret100, f_capm, "Mkt.RF") res_ff3 <- fmb(ret100, f_ff3, c("Mkt.RF","SMB","HML")) res_ff4 <- fmb(ret100, f_ff4, c("Mkt.RF","SMB","HML","Mom")) res_ff5 <- fmb(ret100, f_ff5, c("Mkt.RF","SMB","HML","RMW","CMA")) ## ----fmb_table---------------------------------------------------------------- # All factor names across all models all_terms <- c("Intercept","Mkt.RF","SMB","HML","Mom","RMW","CMA") fmt_col <- function(res, model_name) { res |> mutate(cell = sprintf("%.3f%s (%.2f)", lambda, signif, t_stat)) |> select(term, cell) |> right_join(tibble(term = all_terms), by = "term") |> mutate(cell = replace_na(cell, "\u2014")) |> rename(!!model_name := cell) } fmb_table <- fmt_col(res_capm, "CAPM") |> left_join(fmt_col(res_ff3, "FF3"), by = "term") |> left_join(fmt_col(res_ff4, "FF4"), by = "term") |> left_join(fmt_col(res_ff5, "FF5"), by = "term") |> rename(Factor = term) knitr::kable(fmb_table, align = "lcccc", caption = paste( "Fama-MacBeth Results — 100 Size x BM Portfolios.", "Entries: lambda estimate (significance stars) (NW t-stat).", "*, **, *** = |t| > 1.96, 2.58, 3.29.")) ## ----fmb_plot, fig.height=5--------------------------------------------------- # Lambda point estimates with 95% NW confidence intervals bind_rows( res_capm |> mutate(model = "CAPM"), res_ff3 |> mutate(model = "FF3"), res_ff4 |> mutate(model = "FF4 (Carhart)"), res_ff5 |> mutate(model = "FF5") ) |> filter(term != "Intercept") |> mutate( lo = lambda - 1.96 * nw_se, hi = lambda + 1.96 * nw_se, term = factor(term, levels = c("Mkt.RF","SMB","HML","Mom","RMW","CMA")), model = factor(model, levels = c("CAPM","FF3","FF4 (Carhart)","FF5")) ) |> ggplot(aes(x = term, y = lambda, colour = model, ymin = lo, ymax = hi)) + geom_hline(yintercept = 0, linetype = "dashed", colour = "grey50") + geom_errorbar(position = position_dodge(width = 0.5), width = 0.25, alpha = 0.7) + geom_point(position = position_dodge(width = 0.5), size = 3) + scale_colour_brewer(palette = "Set1") + labs(title = "FMB Risk Prices λ — 100 Size×BM Portfolios", subtitle = "Point = NW mean; bars = 95% NW confidence interval", x = "Factor", y = "Risk price λ (%/month)", colour = "Model") + theme_bw()