\documentclass[11pt]{article} \usepackage{graphicx} \usepackage{amssymb} \usepackage{epstopdf} \usepackage{caption} \usepackage{amsmath, amsthm} %%%%%%%%%%%%%%% MUST BE ADDED \usepackage{supertabular} \usepackage{wasysym} \usepackage{setspace} \usepackage{Sweave} \usepackage{tabularx} \newcolumntype{Y}{>{\footnotesize\raggedright\arraybackslash}X} %\singlespacing \onehalfspacing %\doublespacing \usepackage{natbib} %\usepackage{color} %\definecolor{MyDarkGreen}{rgb}{0.0,0.4,0.0} %\definecolor{MyDarkRed}{rgb}{0.4,0.0,0.0} %\usepackage[colorlinks=true, urlcolor= MyDarkGreen, linkcolor= MyDarkRed ]{hyperref} \usepackage{hyperref} \DeclareCaptionLabelSeparator{space} \DeclareGraphicsRule{.tif}{png}{.png}{`convert #1 `dirname #1`/`basename #1 .tif`.png} \textwidth = 6.5 in \textheight = 9 in \oddsidemargin = 0.0 in \evensidemargin = 0.0 in \topmargin = 0.0 in \headheight = 0.0 in \headsep = 0.0 in \parskip = 0.2in \parindent = 0.0in \newtheorem{theorem}{Theorem} \newtheorem{corollary}[theorem]{Corollary} \newtheorem{definition}{Definition} \newcommand{\ve}{\varepsilon} \newcommand{\wt}{\widetilde} \newcommand{\wh}{\widehat} \newcommand{\0}{\mathbf{0}} %\newcommand{\Fv}{\mathbf{F}} %\newcommand{\Lv}{\mathbf{L}} %\newcommand{\Ev}{\mathbf{E}} \newcommand{\st}{\mathrm{ \:\: s.t. \:\: }} \newcommand{\bs}[1]{\boldsymbol{#1}} \newcommand{\bm}[1]{\mbox{\boldmath$#1$}} \newcommand{\mr}[1]{\mathrm{#1}} \newcommand{\mb}[1]{\mathbf{#1}} \newcommand{\smss}[1]{^{_{#1}}} \newcommand{\apri}{\smss{\,(-)}} \newcommand{\apos}{\smss{\,(+)}} \newcommand{\betaup}{\rotatebox[origin=c]{12}{$\beta$}} \newcommand{\ttb}{\hspace{-0.01cm}} \newcommand{\diag}{\mathsf{diag}} \newcommand{\minz}{\mathsf{min}} \newcommand{\maxz}{\mathsf{max}} \newcommand{\zsin}{\mathsf{sin}} \newcommand{\zcos}{\mathsf{cos}} \newcommand{\SE}{\mathsf{SE}} \newcommand{\range}{\mathsf{range}} \newcommand{\ndxrng}[2]{#1 \,\!\! : \,\!\! #2} \newenvironment{DZcaption}[2]% {\begin{list}{}{\leftmargin#1\rightmargin#2}\item{}}% {\end{list}} \begin{document} %\VignetteIndexEntry{mactivate-examples-2} %% this is like a shebang %%% %\XVignetteDepends{mvtnorm} %% this is like a shebang %%% %\XVignetteDepends{maps} %% this is like a shebang \title{Package \texttt{mactivate} \\ Examples II} \author{Dave Zes} \maketitle \section{Example: Medium Data, Numerical Inputs, Numerical Response} <>= options(width=63) options(prompt=" ") options(continue=" ") @ <>= library(mactivate) set.seed(777) d <- 56 N <- 100000 X <- matrix(rnorm(N*d, 0, 1), N, d) #### colnames(X) <- paste0("x", I(1:d)) ############# primary effects b <- rep_len( c(-1/4, 1/4), d ) ########### ystar <- X %*% b + 1/3 * (X[ , 11]+1) * (X[ , 12]-1) * (X[ , 33]+1) - 1/2 * (X[ , 30]+0) * (X[ , 44]+1) * (X[ , 45]-0) * (X[ , 56]-1) + 1/3 * (X[ , 6]+1) * (X[ , 47]-1) - 1/2 * (X[ , 1]-1) * (X[ , 32]+0) * (X[ , 33]+1) * (X[ , 34]-0) * (X[ , 45]-0) * (X[ , 51]-1) m_tot <- 12 ############# xs1 <- "y ~ . + x11:x12:x33 + x30:x44:x45:x56 + x6:x47 + x1:x32:x33:x34:x45:x51" xs2 <- "y ~ . + x11*x12*x33 + x30*x44*x45*x56 + x6*x47 + x1*x32*x33*x34*x45*x51" xnotQuiteTrue_formula <- eval(parse(text=xs1)) xtrue_formula <- eval(parse(text=xs2)) xnoint_formula <- eval(parse(text="y ~ .")) yerrs <- rnorm(N, 0, 3) y <- ystar + yerrs ## y <- (y - mean(y)) / sd(y) ########## standardize X Xall <- t( ( t(X) - apply(X, 2, mean) ) / apply(X, 2, sd) ) yall <- y Nall <- N ####### fold index xxfoldNumber <- rep_len(1:2, N) ufolds <- sort(unique(xxfoldNumber)) ; ufolds ############### predict ############### predict dfx <- data.frame("y"=yall, Xall) ################### incorrectly fit LM: no interactions xlm <- lm(xnoint_formula , data=dfx) summary(xlm) yhat <- predict(xlm, newdata=dfx) sqrt( mean( (yall - yhat)^2 ) ) ################### incorrectly fit LM: no lower-order interactions xlm <- lm(xnotQuiteTrue_formula , data=dfx) summary(xlm) yhat <- predict(xlm, newdata=dfx) sqrt( mean( (yall - yhat)^2 ) ) ################### correctly fit LM xlm <- lm(xtrue_formula, data=dfx) summary(xlm) yhat <- predict(xlm, newdata=dfx) sqrt( mean( (yall - yhat)^2 ) ) ################ fit using hybrid m-activation ###### takes about 1.5-3 hours xcmact_hybrid <- f_control_mactivate( param_sensitivity = 10^12, bool_free_w = TRUE, w0_seed = 0.01, w_col_search = "alternate", max_internal_iter = 500, ##### ss_stop = 10^(-14), ### escape_rate = 1.003, #### 1.002, Wadj = 1/1, force_tries = 0, lambda = 0/10000, ### hybrid only tol = 10^(-14) ### hybrid only ) #### Fit Uall <- Xall xthis_fold <- ufolds[ 1 ] xndx_test <- which( xxfoldNumber %in% xthis_fold ) xndx_train <- setdiff( 1:Nall, xndx_test ) X_train <- Xall[ xndx_train, , drop=FALSE ] y_train <- yall[ xndx_train ] xxnow <- Sys.time() xxls_out <- f_fit_hybrid_01( X = X_train, y = y_train, m_tot = m_tot, U = X_train, m_start = 1, mact_control = xcmact_hybrid, verbosity = 1 ) cat( difftime(Sys.time(), xxnow, units="mins"), "\n" ) ######### check test error U_test <- Xall[ xndx_test, , drop=FALSE ] X_test <- Xall[ xndx_test, , drop=FALSE ] y_test <- yall[ xndx_test ] yhatTT <- matrix(NA, length(xndx_test), m_tot+1) for(iimm in 0:m_tot) { yhat_fold <- predict(object=xxls_out, X0=X_test, U0=U_test, mcols=iimm ) yhatTT[ , iimm + 1 ] <- yhat_fold } errs_by_m <- NULL for(iimm in 1:ncol(yhatTT)) { yhatX <- yhatTT[ , iimm] errs_by_m[ iimm ] <- sqrt(mean( (y_test - yhatX)^2 )) cat(iimm, "::", errs_by_m[ iimm ]) } ##### plot test RMSE vs m plot(0:(length(errs_by_m)-1), errs_by_m, type="l", xlab="m", ylab="RMSE Cost") #### 11 best ################## known 'true' for non zero-centered is xtrue_formula_use <- xtrue_formula xthis_fold <- ufolds[ 1 ] xndx_test <- which( xxfoldNumber %in% xthis_fold ) xndx_train <- setdiff( 1:Nall, xndx_test ) xlm <- lm(xtrue_formula_use , data=dfx[ xndx_train, ]) yhat <- predict(xlm, newdata=dfx[ xndx_test, ]) sqrt( mean( (y_test - yhat)^2 ) ) ############### ####### Let's dig in ####### We can use train W to construct test Xstar ####### Check which columns drive our response, y X_test <- Xall[ xndx_test, ] y_test <- yall[ xndx_test ] Xstar_test <- f_mactivate(U=X_test, W=xxls_out[[ length(xxls_out) ]][[ "What" ]]) Xi <- cbind(X_test, Xstar_test) xlm <- lm(y_test ~ Xi) sumxlm <- summary(xlm) @ <>= load(file="xlm4summary.RData") @ \newpage <>= print(sumxlm) @ <>= ########### Looks like cols 1,2,4,6 are where all the action is ########### (in other word, mactivate didn't track signal on passes 3 and 5) bWhat <- xxls_out[[ length(xxls_out) ]][[ "What" ]][ , c(1,2,4,6) ] bWhat wwmag <- apply(bWhat, 1, function(x) { return(sum(abs(x)))} ) ; wwmag plot(wwmag, type="h", lwd=4, main="W Coefficient Total Magnitute vs Input Term", xlab="Column of U (in this case, same as X)", ylab="Sum of magnitudes in fitted W", cex.lab=1.3 ) @ \begin{center} \includegraphics[width=15cm]{big_01_wmag.png} \end{center} <>= ########################### ########## these are the terms in X that ########## appear to have a polynomial contribution ########## to our response, y xxhigh <- which(wwmag > 0.5) ; xxhigh ###################### @ <>= ###### no need to show this ######## we can try refitting using only contributing inputs m_tot <- 8 X_train <- Xall[ xndx_train, ] y_train <- yall[ xndx_train ] xcmact_hybrid <- f_control_mactivate( param_sensitivity = 10^12, bool_free_w = TRUE, w0_seed = 0.01, w_col_search = "alternate", max_internal_iter = 500, ss_stop = 10^(-14), escape_rate = 1.001, Wadj = 1/1, force_tries = 0, lambda = 0/10000, tol = 10^(-14) ) xxnow <- Sys.time() xxls_out_lean <- f_fit_hybrid_01( X = X_train, y = y_train, m_tot = m_tot, U = X_train[ , xxhigh ], ### only consider 'significant' contributors m_start = 1, mact_control = xcmact_hybrid, verbosity = 1 ) cat( difftime(Sys.time(), xxnow, units="mins"), "\n" ) @ \section{Example: Orthopedic Sales Data, Numerical Inputs, Numerical Response, 10-Fold CV} <>= data(df_hospitals_ortho) xvars <- attr(df_hospitals_ortho, "modelvars") xmx <- as.matrix(df_hospitals_ortho[ , xvars]) old_par <- par(mfrow=c(3,4)) for(jj in 1:ncol(xmx)) { hist(xmx[ ,jj], main=colnames(xmx)[jj]) } ######## pause and look ################# transform xlog_list <- c( "tot_sales", "tot_knee", "tot_hip", "beds", "rehab_beds", "outpatient_visits", "adm_costs", "revenue_inpatient" ) ymx <- xmx for(jj in 1:ncol(xmx)) { ymx[ ,jj] <- log( xmx[ ,jj] + 1 ) } for(jj in 1:ncol(ymx)) { hist(ymx[ ,jj], main=colnames(ymx)[jj]) } ######## pause and look #### standardize ymx_stnd <- t( ( t( ymx ) - apply(ymx, 2, mean)) / apply(ymx, 2, sd) ) for(jj in 1:ncol(ymx_stnd)) { hist(ymx_stnd[ ,jj], main=colnames(ymx_stnd)[jj]) } ######## pause and look ############# let's fit an MLR model (in place, no train-test) ydf_stnd <- as.data.frame(ymx_stnd) xlm <- lm( tot_sales ~ . , data=ydf_stnd ) summary(xlm) yhat <- xlm$fitted yy <- ydf_stnd[ , "tot_sales" ] rmse <- sqrt( mean( (yy - yhat)^2 ) ) ; rmse 1 - rmse^2 / 1 #### r-squared ######## now let's break out m-activation using 10-fold CV library(mactivate) yall <- ymx_stnd[ , "tot_sales" ] Xall <- ymx_stnd[ , -1 ] Uall <- Xall xfolds <- rep_len( 1:10, length(yall) ) ## 10-fold CV m_tot <- 5 xmcont <- f_control_mactivate( param_sensitivity = 10^11, w0_seed = 0.1, max_internal_iter = 500, w_col_search = "one", bool_headStart = FALSE, ss_stop = 10^(-11), escape_rate = 1.001, step_size = 1/100, Wadj = 1/1, force_tries = 0, lambda = 0, tol = 10^(-8) ) ufolds <- sort(unique(xfolds)) yout <- numeric(length(yall)) #### takes about 5-10 minutes for(iif in 1:length(ufolds)) { xmask_fold <- xfolds %in% ufolds[ iif ] xxls_out <- f_fit_hybrid_01( X=Xall[ !xmask_fold, ], y=yall[ !xmask_fold ], m_tot=m_tot, U = Xall[ !xmask_fold, ], m_start = 1, mact_control = xmcont, verbosity = 0 ) xxls_out yhatG <- predict( object=xxls_out, X0=Xall[ xmask_fold, ], U0=Uall[ xmask_fold, ], mcols=m_tot ) yout[ xmask_fold ] <- yhatG cat("Done this fold:", iif, "\n\n") } mact_rmse <- sqrt( mean( (yall - yout)^2 ) ) ; mact_rmse cat("TT R2:", 1 - mact_rmse^2 / 1, "\n") #### m-activation R^2 par(old_par) @ \section{Example: Medium Data, Numerical Inputs, Dichotomous Response} <>= library(mactivate) set.seed(777) d <- 25 N <- 100000 X <- matrix(rnorm(N*d, 0, 1), N, d) #### colnames(X) <- paste0("x", I(1:d)) ############# primary effects b <- rep_len( c(-1/4, 1/4), d ) ystar <- X %*% b + 1/3 * (X[ , 1]+1) * (X[ , 2]-1) * (X[ , 3]+1) - 1/2 * (X[ , 3]+0) * (X[ , 4]+1) * (X[ , 5]-0) * (X[ , 6]-1) + 1/3 * (X[ , 6]+1) * (X[ , 7]-1) - 1/2 * (X[ , 1]-1) * (X[ , 2]+0) * (X[ , 3]+1) * (X[ , 4]-0) * (X[ , 5]-0) * (X[ , 7]-1) m_tot <- 10 ############# xs1 <- "y ~ . + x1:x2:x3 + x3:x4:x5:x6 + x6:x7 + x1:x2:x3:x4:x5:x7" xs2 <- "y ~ . + x1*x2*x3 + x3*x4*x5*x6 + x6*x7 + x1*x2*x3*x4*x5*x7" xnotQuiteTrue_formula <- eval(parse(text=xs1)) xtrue_formula <- eval(parse(text=xs2)) xnoint_formula <- eval(parse(text="y ~ .")) ysigmoid <- 1 / (1 + exp(-ystar)) range(ysigmoid) y <- rbinom(size=1, n=N, prob=ysigmoid) ########## standardize X Xall <- t( ( t(X) - apply(X, 2, mean) ) / apply(X, 2, sd) ) yall <- y Nall <- N ####### fold index xxfoldNumber <- rep_len(1:2, N) ufolds <- sort(unique(xxfoldNumber)) ; ufolds ############### predict ############### predict dfx <- data.frame("y"=yall, Xall) ################### xglm <- glm(xnoint_formula , data=dfx, family=binomial(link="logit")) summary(xglm) yhat <- predict(xglm, newdata=dfx, type="response") mean( f_logit_cost(y=yall, yhat=yhat) ) ####### known true when zero centered xglm <- glm(xnotQuiteTrue_formula , data=dfx, family=binomial(link="logit")) summary(xglm) yhat <- predict(xglm, newdata=dfx, type="response") mean( f_logit_cost(y=yall, yhat=yhat) ) ####### known true when not zero centered xglm <- glm(xtrue_formula , data=dfx, family=binomial(link="logit")) summary(xglm) yhat <- predict(xglm, newdata=dfx, type="response") mean( f_logit_cost(y=yall, yhat=yhat) ) ################################ alternate ################################ alternate -- about 1.5 hours xcmact_gradient <- f_control_mactivate( param_sensitivity = 10^9, bool_free_w = TRUE, w0_seed = 0.05, w_col_search = "alternate", bool_headStart = TRUE, ss_stop = 10^(-9), ### escape_rate = 1.003, Wadj = 1/1, force_tries = 0 ) Uall <- Xall xthis_fold <- ufolds[ 1 ] xndx_test <- which( xxfoldNumber %in% xthis_fold ) xndx_train <- setdiff( 1:Nall, xndx_test ) X_train <- Xall[ xndx_train, , drop=FALSE ] y_train <- yall[ xndx_train ] xxnow <- Sys.time() xxls_out <- f_fit_gradient_logistic_01( X = X_train, y = y_train, m_tot = m_tot, U = X_train, m_start = 1, mact_control = xcmact_gradient, verbosity = 1 ) cat( difftime(Sys.time(), xxnow, units="mins"), "\n" ) U_test <- Xall[ xndx_test, , drop=FALSE ] X_test <- Xall[ xndx_test, , drop=FALSE ] y_test <- yall[ xndx_test ] yhatTT <- matrix(NA, length(xndx_test), m_tot+1) for(iimm in 0:m_tot) { yhat_fold <- predict(object=xxls_out, X0=X_test, U0=U_test, mcols=iimm ) yhatTT[ , iimm + 1 ] <- yhat_fold[[ "p0hat" ]] } errs_by_m <- NULL for(iimm in 1:ncol(yhatTT)) { yhatX <- yhatTT[ , iimm] errs_by_m[ iimm ] <- mean( f_logit_cost(y=y_test, yhat=yhatX) ) cat(iimm, "::", errs_by_m[ iimm ]) } ##### plot test Logit vs m plot(0:(length(errs_by_m)-1), errs_by_m, type="l", xlab="m", ylab="Logit Cost") ################## use known 'true' in glm() xglm <- glm(xtrue_formula , data=dfx[ xndx_train, ], family=binomial(link="logit")) yhat <- predict(xglm, newdata=dfx[ xndx_test, ], type="response") mean( f_logit_cost(y=y_test, yhat=yhat) ) @ %\bibliographystyle{plainnat} %\bibliographystyle{jes} %\bibliographystyle{abbrv} %\bibliography{mactivation_01} \end{document}