## ---- echo=FALSE, message=FALSE, warning=FALSE-------------------------------- par( mgp=c(1.8,0.8,0), mar=c(2.5, 2.5,2,0.1) ) knitr::opts_chunk$set(fig.width = 8) ## ---- message=FALSE, warning=FALSE-------------------------------------------- library( mvtnorm ) library( ggplot2 ) library( dplyr ) library( hettx ) library( tidyr ) library( purrr ) data( ToyData ) ## ---- echo=TRUE--------------------------------------------------------------- data( ToyData ) head( ToyData ) td = gather( ToyData, x1, x2, x3, x4, key="X", value="value" ) td = gather( td, Y, tau, key="outcome", value="value2" ) ggplot( td, aes( x=value, y=value2, col=as.factor(Z) ) ) + facet_grid( outcome ~ X, scales="free" ) + geom_point( alpha=0.5, size=0.5) + geom_smooth( method="loess", se=FALSE ) + labs( x="Covariates", y="" ) ## ---- echo=TRUE--------------------------------------------------------------- mean( ToyData$tau ) ## ---- echo=TRUE--------------------------------------------------------------- par( mfrow=c(1,2) ) ll0 = lm( Y ~ Z, data=ToyData ) plot( ecdf( resid(ll0)[ToyData$Z==1] ), pch=".", main="Marginal CDFs of \n treatment and control") plot( ecdf( resid(ll0)[ToyData$Z==0] ), pch=".", col="red", add=TRUE ) ll1 = lm( Y ~ Z + x1 + x2 + x3 + x4, data=ToyData ) plot( ecdf( resid(ll1)[ToyData$Z==1] ), pch=".", main="Residual CDFs of \n treatment and control" ) plot( ecdf( resid(ll1)[ToyData$Z==0] ), pch=".", col="red", add=TRUE ) ## ---- echo=TRUE--------------------------------------------------------------- M0 <- lm( Y ~ Z * (x1+x2+x3), data=ToyData ) round( coef( M0 ), digits=1 ) ## ---- echo=TRUE, results='asis'----------------------------------------------- B <- 20 grid.size = 11 ## ---- echo=TRUE, cache=FALSE-------------------------------------------------- tst1 = detect_idiosyncratic( Y ~ Z, data=ToyData, B=B, grid.size = grid.size, verbose=FALSE ) summary( tst1 ) ## ---- echo=TRUE, cache=FALSE-------------------------------------------------- tst2 = detect_idiosyncratic( Y ~ Z, data=ToyData, control.formula = ~ x1 + x2 + x3 + x4, B=B, test.stat="SKS.stat.cov", verbose=FALSE ) summary( tst2 ) ## ---- echo=TRUE, cache=FALSE-------------------------------------------------- tst2b = detect_idiosyncratic( Y ~ Z, data=ToyData, control.formula = ~ x3 + x4, B=B, test.stat="SKS.stat.cov", verbose=FALSE ) summary( tst2b ) ## ---- echo=TRUE, cache=FALSE-------------------------------------------------- N = nrow(ToyData) ToyData$x4 = rnorm( N ) tst1b = detect_idiosyncratic( Y ~ Z, data=ToyData, control.formula = ~ x4, B=B, test.stat="SKS.stat.cov", verbose=FALSE ) summary( tst1b ) ## ----ideo_beyond_systematic, echo=TRUE, cache=FALSE--------------------------- B = 20 tst3a1 = detect_idiosyncratic( Y ~ Z, data=ToyData, interaction.formula = ~ x1, B=B, test.stat="SKS.stat.int.cov", verbose=FALSE ) summary( tst3a1 ) ## ---- echo=TRUE, cache=FALSE-------------------------------------------------- tst3a2 <- detect_idiosyncratic( Y ~ Z, data=ToyData, interaction.formula = ~ x1, control.formula = ~ x3 + x4, B=B, test.stat="SKS.stat.int.cov", verbose=FALSE ) summary( tst3a2 ) ## ---- echo=TRUE, cache=FALSE-------------------------------------------------- tst3a2b <- detect_idiosyncratic( Y ~ Z, data=ToyData, control.formula = ~ x2 + x3 + x4, interaction.formula = ~ x1, B=B, test.stat="SKS.stat.int.cov", verbose=FALSE ) summary( tst3a2b ) ## ----beyond_x1_x2, echo=TRUE, cache=FALSE------------------------------------- tst3b <- detect_idiosyncratic( Y ~ Z, data=ToyData, interaction.formula = ~ x1 + x2, B=B, test.stat="SKS.stat.int.cov", verbose=FALSE ) summary( tst3b ) ## ---- echo=TRUE--------------------------------------------------------------- tst3c <- detect_idiosyncratic( Y ~ Z, data=ToyData, interaction.formula = ~ x1 + x2, control.formula = ~ x3 + x4, B=B, test.stat="SKS.stat.int.cov", verbose=FALSE ) summary( tst3c ) ## ----full_correction, echo=TRUE, cache=FALSE---------------------------------- tst3d <- detect_idiosyncratic( Y ~ Z, data=ToyData, interaction.formula = ~ x1 + x2 + x3 + x4, B=B, test.stat="SKS.stat.int.cov", verbose=FALSE ) summary( tst3d ) ## ----------------------------------------------------------------------------- get.p.value( tst1b ) ## ----display, echo=TRUE------------------------------------------------------- tests = list( no_cov=tst1, useless_cov=tst1b, all_covariates=tst2, non_tx_covariates_only=tst2b, het_beyond_x1 = tst3a1, het_beyond_x1_with_x3_x4_cov=tst3a2, het_beyond_x1_with_all_cov=tst3a2, het_beyond_x1_x2=tst3b, het_beyond_x1_x2_with_cov=tst3c, het_beyond_all=tst3d ) agg.res = purrr::map( tests, get.p.value ) %>% purrr::map( as.list ) agg.res = bind_rows( agg.res, .id = "test" ) agg.res ## ----cautionary_tale, echo=TRUE----------------------------------------------- ll1 = lm( Y ~ Z + x1 + x2 + x3 + x4, data=ToyData ) print( summary( ll1 ) ) ## ----cautionary_tale_2, echo=TRUE--------------------------------------------- plot( ecdf( resid(ll1)[ToyData$Z==1] ), pch=".", main="Residual CDFs of treatment and control" ) plot( ecdf( resid(ll1)[ToyData$Z==0] ), pch=".", col="red", add=TRUE ) ## ----------------------------------------------------------------------------- variance.ratio.test( ToyData$Y, ToyData$Z ) ## ----------------------------------------------------------------------------- test.stat.info()