## ----style, echo = FALSE, results = 'asis'------------------------------------ BiocStyle::markdown() ## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, class.source="watch-out", comment = "#>") ## ----setup, eval=TRUE, echo=FALSE, include=FALSE----------------------------- library(ggplot2) require(ipsRdbs) knitr::opts_chunk$set(eval = FALSE) figpath <- system.file("figures", package = "ipsRdbs") print(figpath) ## ----coverplot, eval=TRUE, echo=FALSE, fig.cap="ipsRdbs book cover", fig.width =1.5, fig.height=2.5---- library(ipsRdbs) figpath <- system.file("figures", package = "ipsRdbs") knitr::include_graphics(paste0(figpath, "/cover.jpg")) ## ----echo=TRUE, eval=FALSE---------------------------------------------------- # install.packages("ipsRdbs", dependencies=TRUE) ## ----------------------------------------------------------------------------- # library(ipsRdbs) # ls("package:ipsRdbs") ## ----eval=TRUE---------------------------------------------------------------- head(beanie) summary(beanie) plot(beanie$age, beanie$value, xlab="Age", ylab="Value", pch="*", col="red") ## ----bill, eval=TRUE---------------------------------------------------------- head(bill) summary(bill) library(ggplot2) gg <- ggplot2::ggplot(data=bill, aes(x=age, y=wealth)) + geom_point(aes(col=region, size=wealth)) + geom_smooth(method="loess", se=FALSE) + xlim(c(7, 102)) + ylim(c(1, 37)) + labs(subtitle="Wealth vs Age of Billionaires", y="Wealth (Billion US $)", x="Age", title="Scatterplot", caption = "Source: Fortune Magazine, 1992.") plot(gg) ## ----bodyfat, eval=TRUE------------------------------------------------------- summary(bodyfat) plot(bodyfat$Skinfold, bodyfat$Bodyfat, xlab="Skin", ylab="Fat") plot(bodyfat$Skinfold, log(bodyfat$Bodyfat), xlab="Skin", ylab="log Fat") plot(log(bodyfat$Skinfold), log(bodyfat$Bodyfat), xlab="log Skin", ylab="log Fat") # Keep the transformed variables in the data set bodyfat$logskin <- log(bodyfat$Skinfold) bodyfat$logbfat <- log(bodyfat$Bodyfat) bodyfat$logskin <- log(bodyfat$Skinfold) # Create a grouped variable bodyfat$cutskin <- cut(log(bodyfat$Skinfold), breaks=6) boxplot(data=bodyfat, Bodyfat~cutskin, col=2:7) require(ggplot2) p2 <- ggplot(data=bodyfat, aes(x=cutskin, y=logbfat)) + geom_boxplot(col=2:7) + stat_summary(fun=mean, geom="line", aes(group=1), col="blue", linewidth=1) + labs(x="Skinfold", y="Percentage of log bodyfat", title="Boxplot of log-bodyfat percentage vs grouped log-skinfold") plot(p2) n <- nrow(bodyfat) x <- bodyfat$logskin y <- bodyfat$logbfat xbar <- mean(x) ybar <- mean(y) sx2 <- var(x) sy2 <- var(y) sxy <- cov(x, y) r <- cor(x, y) print(list(n=n, xbar=xbar, ybar=ybar, sx2=sx2, sy2=sy2, sxy=sxy, r=r)) hatbeta1 <- r * sqrt(sy2/sx2) # calculates estimate of the slope hatbeta0 <- ybar - hatbeta1 * xbar # calculates estimate of the intercept rs <- y - hatbeta0 - hatbeta1 * x # calculates residuals s2 <- sum(rs^2)/(n-2) # calculates estimate of sigma2 s2 bfat.lm <- lm(logbfat ~ logskin, data=bodyfat) ### Check the diagnostics plot(bfat.lm$fit, bfat.lm$res, xlab="Fitted values", ylab = "Residuals", pch="*") abline(h=0) ### Should be a random scatter qqnorm(bfat.lm$res, col=2) qqline(bfat.lm$res, col="blue") # All Points should be on the straight line summary(bfat.lm) anova(bfat.lm) plot(bodyfat$logskin, bodyfat$logbfat, xlab="log Skin", ylab="log Fat") abline(bfat.lm, col=7) title("Scatter plot with the fitted Linear Regression line") # 95% CI for beta(1) # 0.88225 + c(-1, 1) * qt(0.975, df=100) * 0.02479 # round(0.88225 + c(-1, 1) * qt(0.975, df=100) * 0.02479, 2) # To test H0: beta1 = 1. tstat <- (0.88225 -1)/0.02479 pval <- 2 * (1- pt(abs(tstat), df=100)) x <- seq(from=-5, to=5, length=500) y <- dt(x, df=100) plot(x, y, xlab="", ylab="", type="l") title("T-density with df=100") abline(v=abs(tstat)) abline(h=0) x1 <- seq(from=abs(tstat), to=10, length=100) y1 <- rep(0, length=100) x2 <- x1 y2 <- dt(x1, df=100) segments(x1, y1, x2, y2) abline(h=0) # Predict at a new value of Skinfold=70 # Create a new data set called new newx <- data.frame(logskin=log(70)) a <- predict(bfat.lm, newdata=newx, se.fit=TRUE) # Confidence interval for the mean of log bodyfat at skinfold=70 a <- predict(bfat.lm, newdata=newx, interval="confidence") # a # fit lwr upr # [1,] 2.498339 2.474198 2.52248 # Prediction interval for a future log bodyfat at skinfold=70 a <- predict(bfat.lm, newdata=newx, interval="prediction") a # fit lwr upr # [1,] 2.498339 2.333783 2.662895 #prediction intervals for the mean pred.bfat.clim <- predict(bfat.lm, data=bodyfat, interval="confidence") #prediction intervals for future observation pred.bfat.plim <- suppressWarnings(predict(bfat.lm, data=bodyfat, interval="prediction")) plot(bodyfat$logskin, bodyfat$logbfat, xlab="log Skin", ylab="log Fat") abline(bfat.lm, col=5) lines(log(bodyfat$Skinfold), pred.bfat.clim[,2], lty=2, col=2) lines(log(bodyfat$Skinfold), pred.bfat.clim[,3], lty=2, col=2) lines(log(bodyfat$Skinfold), pred.bfat.plim[,2], lty=4, col=3) lines(log(bodyfat$Skinfold), pred.bfat.plim[,3], lty=4, col=3) title("Scatter plot with the fitted line and prediction intervals") symb <- c("Fitted line", "95% CI for mean", "95% CI for observation") ## legend(locator(1), legend = symb, lty = c(1, 2, 4), col = c(5, 2, 3)) # Shows where we predicted earlier abline(v=log(70)) summary(bfat.lm) anova(bfat.lm) ## ----bombhits, eval=TRUE------------------------------------------------------ summary(bombhits) # Create a vector of data x <- c(rep(0, 229), rep(1, 211), rep(2, 93), rep(3, 35), rep(4, 7), 5) y <- c(229, 211, 93, 35, 7, 1) # Frequencies rel_freq <- y/576 xbar <- mean(x) pois_prob <- dpois(x=0:5, lambda=xbar) fit_freq <- pois_prob * 576 #Check cbind(x=0:5, obs_freq=y, rel_freq =round(rel_freq, 4), Poisson_prob=round(pois_prob, 4), fit_freq=round(fit_freq, 1)) obs_freq <- y xuniques <- 0:5 a <- data.frame(xuniques=0:5, obs_freq =y, fit_freq=fit_freq) barplot(rbind(obs_freq, fit_freq), args.legend = list(x = "topright"), xlab="No of bomb hits", names.arg = xuniques, beside=TRUE, col=c("darkblue","red"), legend =c("Observed", "Fitted"), main="Observed and Poisson distribution fitted frequencies for the number of bomb hits in London") ## ----eval=TRUE---------------------------------------------------------------- summary(cement) boxplot(data=cement, strength~gauger, col=1:3) boxplot(data=cement, strength~breaker, col=1:3) ## ----eval=TRUE, message=FALSE, warning=FALSE---------------------------------- summary(cheese) pairs(cheese) GGally::ggpairs(data=cheese) cheese.lm <- lm(Taste ~ AceticAcid + LacticAcid + logH2S, data=cheese, subset=2:30) # Check the diagnostics plot(cheese.lm$fit, cheese.lm$res, xlab="Fitted values", ylab = "Residuals") abline(h=0) # Should be a random scatter qqnorm(cheese.lm$res, col=2) qqline(cheese.lm$res, col="blue") summary(cheese.lm) cheese.lm2 <- lm(Taste ~ LacticAcid + logH2S, data=cheese) # Check the diagnostics plot(cheese.lm2$fit, cheese.lm2$res, xlab="Fitted values", ylab = "Residuals") abline(h=0) qqnorm(cheese.lm2$res, col=2) qqline(cheese.lm2$res, col="blue") summary(cheese.lm2) # How can we predict? newcheese <- data.frame(AceticAcid = 300, LacticAcid = 1.5, logH2S=4) cheese.pred <- predict(cheese.lm2, newdata=newcheese, se.fit=TRUE) cheese.pred # Obtain confidence interval cheese.pred$fit + c(-1, 1) * qt(0.975, df=27) * cheese.pred$se.fit # Using R to predict cheese.pred.conf.limits <- predict(cheese.lm2, newdata=newcheese, interval="confidence") cheese.pred.conf.limits # How to find prediction interval cheese.pred.pred.limits <- predict(cheese.lm2, newdata=newcheese, interval="prediction") cheese.pred.pred.limits ## ----emissions, eval=TRUE----------------------------------------------------- summary(emissions) rawdata <- emissions[, c(8, 4:7)] pairs(rawdata) # Fit the model on the raw scale raw.lm <- lm(ADR37 ~ ADR27 + CS505 + T867 + H505, data=rawdata) old.par <- par(no.readonly = TRUE) par(mfrow=c(2,1)) plot(raw.lm$fit, raw.lm$res,xlab="Fitted values",ylab="Residuals", main="Anscombe plot") abline(h=0) qqnorm(raw.lm$res,main="Normal probability plot", col=2) qqline(raw.lm$res, col="blue") # summary(raw.lm) logdata <- log(rawdata) # This only logs the values but not the column names! # We can use the following command to change the column names or you can use # fix(logdata) to do it. dimnames(logdata)[[2]] <- c("logADR37", "logCS505", "logT867", "logH505", "logADR27") pairs(logdata) log.lm <- lm(logADR37 ~ logADR27 + logCS505 + logT867 + logH505, data=logdata) plot(log.lm$fit, log.lm$res,xlab="Fitted values",ylab="Residuals", main="Anscombe plot") abline(h=0) qqnorm(log.lm$res,main="Normal probability plot", col=2) qqline(log.lm$res, col="blue") summary(log.lm) log.lm2 <- lm(logADR37 ~ logADR27 + logT867 + logH505, data=logdata) summary(log.lm2) plot(log.lm2$fit, log.lm2$res,xlab="Fitted values",ylab="Residuals", main="Anscombe plot") abline(h=0) qqnorm(log.lm2$res,main="Normal probability plot", col=2) qqline(log.lm2$res, col="blue") par(old.par) ##################################### # Multicollinearity Analysis ###################################### mod.adr27 <- lm(logADR27 ~ logT867 + logCS505 + logH505, data=logdata) summary(mod.adr27) # Multiple R^2 = 0.9936, mod.t867 <- lm(logT867 ~ logADR27 + logH505 + logCS505, data=logdata) summary(mod.t867) # Multiple R^2 = 0.977, mod.cs505 <- lm(logCS505 ~ logADR27 + logH505 + logT867, data=logdata) summary(mod.cs505) # Multiple R^2 = 0.9837, mod.h505 <- lm(logH505 ~ logADR27 + logCS505 + logT867, data=logdata) summary(mod.h505) # Multiple R^2 = 0.5784, # Variance inflation factors vifs <- c(0.9936, 0.977, 0.9837, 0.5784) vifs <- 1/(1-vifs) #Condition numbers X <- logdata # X is a copy of logdata X[,1] <- 1 # the first column of X is 1 # this is for the intercept X <- as.matrix(X) # Coerces X to be a matrix xtx <- t(X) %*% X # Gives X^T X eigenvalues <- eigen(xtx)$values kappa <- max(eigenvalues)/min(eigenvalues) kappa <- sqrt(kappa) # kappa = 244 is much LARGER than 30! ### Validation statistic # Fit the log.lm2 model with the first 45 observations # use the fitted model to predict the remaining 9 observations # Calculate the mean square error validation statistic log.lmsub <- lm(logADR37 ~ logADR27 + logT867 + logH505, data=logdata, subset=1:45) # Now predict all 54 observations using the fitted model mod.pred <- predict(log.lmsub, logdata, se.fit=TRUE) mod.pred$fit # provides all the 54 predicted values logdata$pred <- mod.pred$fit # Get only last 9 a <- logdata[46:54, ] validation.residuals <- a$logADR37 - a$pred validation.stat <- mean(validation.residuals^2) validation.stat ## ----eval=TRUE---------------------------------------------------------------- summary(err_age) ## ----ffood, eval=TRUE--------------------------------------------------------- summary(ffood) # 95% Confidence interval for the mean waiting time usig t-distribution a <- c(ffood$AM, ffood$PM) mean(a) + c(-1, 1) * qt(0.975, df=19) * sqrt(var(a))/sqrt(20) # Two sample t-test for the difference between morning and afternoon times t.test(ffood$AM, ffood$PM) ## ----gasmileage , eval=TRUE--------------------------------------------------- summary(gasmileage) y <- c(22, 26, 28, 24, 29, 29, 32, 28, 23, 24) xx <- c(1,1,2,2,2,3,3,3,4,4) # Plot the observations plot(xx, y, col="red", pch="*", xlab="Model", ylab="Mileage") # Method1: Hand calculation ni <- c(2, 3, 3, 2) means <- tapply(y, xx, mean) vars <- tapply(y, xx, var) round(rbind(means, vars), 2) sum(y^2) # gives 7115 totalSS <- sum(y^2) - 10 * (mean(y))^2 # gives 92.5 RSSf <- sum(vars*(ni-1)) # gives 31.17 groupSS <- totalSS - RSSf # gives 61.3331.17/6 meangroupSS <- groupSS/3 # gives 20.44 meanErrorSS <- RSSf/6 # gives 5.194 Fvalue <- meangroupSS/meanErrorSS # gives 3.936 pvalue <- 1-pf(Fvalue, df1=3, df2=6) #### Method 2: Illustrate using dummy variables ################################# #Create the design matrix X for the full regression model g <- 4 n1 <- 2 n2 <- 3 n3 <- 3 n4 <- 2 n <- n1+n2+n3+n4 X <- matrix(0, ncol=g, nrow=n) #Set X as a zero matrix initially X[1:n1,1] <- 1 #Determine the first column of X X[(n1+1):(n1+n2),2] <- 1 #the 2nd column X[(n1+n2+1):(n1+n2+n3),3] <- 1 #the 3rd X[(n1+n2+n3+1):(n1+n2+n3+n4),4] <- 1 #the 4th ################################# ####Fitting the full model#### ################################# #Estimation XtXinv <- solve(t(X)%*%X) betahat <- XtXinv %*%t(X)%*%y #Estimation of the coefficients Yhat <- X%*%betahat #Fitted Y values Resids <- y - Yhat #Residuals SSE <- sum(Resids^2) #Error sum of squares S2hat <- SSE/(n-g) #Estimation of sigma-square; mean square for error Sigmahat <- sqrt(S2hat) ############################################################## ####Fitting the reduced model -- the 4 means are equal ##### ############################################################## Xr <- matrix(1, ncol=1, nrow=n) kr <- dim(Xr)[2] #Estimation Varr <- solve(t(Xr)%*%Xr) hbetar <- solve(t(Xr)%*%Xr)%*%t(Xr)%*% y #Estimation of the coefficients hYr = Xr%*%hbetar #Fitted Y values Resir <- y - hYr #Residuals SSEr <- sum(Resir^2) #Total sum of squares ################################################################### ####F-test for comparing the reduced model with the full model #### ################################################################### FStat <- ((SSEr-SSE)/(g-kr))/(SSE/(n-g)) #The test statistic of the F-test alpha <- 0.05 Critical_value_F <- qf(1-alpha, g-kr,n-g) #The critical constant of F-test pvalue_F <- 1-pf(FStat,g-kr, n-g) #p-value of F-test modelA <- c(22, 26) modelB <- c(28, 24, 29) modelC <- c(29, 32, 28) modelD <- c(23, 24) SSerror = sum( (modelA-mean(modelA))^2 ) + sum( (modelB-mean(modelB))^2 ) + sum( (modelC-mean(modelC))^2 ) + sum( (modelD-mean(modelD))^2 ) SStotal <- sum( (y-mean(y))^2 ) SSgroup <- SStotal-SSerror #### #### Method 3: Use the built-in function lm directly ##################################### aa <- "modelA" bb <- "modelB" cc <- "modelC" dd <- "modelD" Expl <- c(aa,aa,bb,bb,bb,cc,cc,cc,dd,dd) is.factor(Expl) Expl <- factor(Expl) model1 <- lm(y~Expl) summary(model1) anova(model1) ###Alternatively ### xxf <- factor(xx) is.factor(xxf) model2 <- lm(y~xxf) summary(model2) anova(model2) ## ----possum, eval=TRUE-------------------------------------------------------- head(possum) dim(possum) summary(possum) ## Code lines used in the book ## Create a new data set poss <- possum poss$region <- factor(poss$Location) levels(poss$region) <- c("W.A", "S.A", "N.T", "QuL", "NSW", "Vic", "Tas") colnames(poss)<-c("y","z","Location", "x") head(poss) # Draw side by side boxplots boxplot(y~x, data=poss, col=2:8, xlab="region", ylab="Weight") # Obtain scatter plot # Start with a skeleton plot plot(poss$z, poss$y, type="n", xlab="Length", ylab="Weight") # Add points for the seven regions for (i in 1:7) { points(poss$z[poss$Location==i],poss$y[poss$Location==i],type="p", pch=as.character(i), col=i) } ## Add legends legend(x=76, y=4.2, legend=paste(as.character(1:7), levels(poss$x)), lty=1:7, col=1:7) # Start modelling #Fit the model with interaction. poss.lm1<-lm(y~z+x+z:x,data=poss) summary(poss.lm1) plot(poss$z, poss$y,type="n", xlab="Length", ylab="Weight") for (i in 1:7) { lines(poss$z[poss$Location==i],poss.lm1$fit[poss$Location==i],type="l", lty=i, col=i, lwd=1.8) points(poss$z[poss$Location==i],poss$y[poss$Location==i],type="p", pch=as.character(i), col=i) } poss.lm0 <- lm(y~z,data=poss) abline(poss.lm0, lwd=3, col=9) # Has drawn the seven parallel regression lines legend(x=76, y=4.2, legend=paste(as.character(1:7), levels(poss$x)), lty=1:7, col=1:7) n <- length(possum$Body_Weight) # Wrong model since Location is not a numeric covariate wrong.lm <- lm(Body_Weight~Location, data=possum) summary(wrong.lm) nis <- table(possum$Location) meanwts <- tapply(possum$Body_Weight, possum$Location, mean) varwts <- tapply(possum$Body_Weight, possum$Location, var) datasums <- data.frame(nis=nis, mean=meanwts, var=varwts) datasums <- data.frame(nis=nis, mean=meanwts, var=varwts) modelss <- sum(datasums[,2] * (meanwts - mean(meanwts))^2) residss <- sum( (datasums[,2] - 1) * varwts) fvalue <- (modelss/6) / (residss/94) fcritical <- qf(0.95, df1= 6, df2=94) x <- seq(from=0, to=12, length=200) y <- df(x, df1=6, df2=94) plot(x, y, type="l", xlab="", ylab="Density of F(6, 94)", col=4) abline(v=fcritical, lty=3, col=3) abline(v=fvalue, lty=2, col=2) pvalue <- 1-pf(fvalue, df1=6, df2=94) ### Doing the above in R # Convert the Location column to a factor localpossum <- possum localpossum$Location <- as.factor(localpossum$Location) summary(localpossum) # Now Location is a factor # Put the identifiability constraint: options(contrasts=c("contr.treatment", "contr.poly")) # Change to have easier column names colnames(localpossum) <- c("y", "z", "x") # Fit model M1 possum.lm1 <- lm(y~x, data=localpossum) summary(possum.lm1) anova(possum.lm1) possum.lm2 <- lm(y~z, data=localpossum) summary(possum.lm2) anova(possum.lm2) # Include both location and length but no interaction possum.lm3 <- lm(y~x+z, data=localpossum) summary(possum.lm3) anova(possum.lm3) # Include interaction effect possum.lm4 <- lm(y~x+z+x:z, data=localpossum) summary(possum.lm4) anova(possum.lm4) anova(possum.lm2, possum.lm3) #Check the diagnostics for M3 plot(possum.lm3$fit, possum.lm3$res,xlab="Fitted values",ylab="Residuals", main="Anscombe plot") abline(h=0) qqnorm(possum.lm3$res,main="Normal probability plot", col=2) qqline(possum.lm3$res, col="blue") rm(localpossum) rm(poss) ## ----puffin, eval=TRUE-------------------------------------------------------- head(puffin) dim(puffin) summary(puffin) pairs(puffin) puffin$sqrtfreq <- sqrt(puffin$Nesting_Frequency) puff.sqlm <- lm(sqrtfreq~ Grass_Cover + Mean_Soil_Depth + Slope_Angle +Distance_from_Edge, data=puffin) old.par <- par(no.readonly = TRUE) par(mfrow=c(2,1)) qqnorm(puff.sqlm$res,main="Normal probability plot", col=2) qqline(puff.sqlm$res, col="blue") plot(puff.sqlm$fit, puff.sqlm$res,xlab="Fitted values",ylab="Residuals", main="Anscombe plot", col="red") abline(h=0) summary(puff.sqlm) par(old.par) ##################################### # F test for two betas at the same time: ###################################### puff.sqlm2 <- lm(sqrtfreq~ Mean_Soil_Depth + Distance_from_Edge, data=puffin) anova(puff.sqlm) anova(puff.sqlm2) fval <- 1/2*(14.245-12.756)/0.387 # 1.924 qf(0.95, 2, 33) # 3.28 1-pf(fval, 2, 33) # 0.162 anova(puff.sqlm2, puff.sqlm) ## ----rice, eval=TRUE---------------------------------------------------------- summary(rice) plot(rice$Days, rice$Yield, pch="*", xlab="Days", ylab="Yield") rice$daymin31 <- rice$Days-31 rice.lm <- lm(Yield ~ daymin31, data=rice) summary(rice.lm) # Check the diagnostics plot(rice.lm$fit, rice.lm$res, xlab="Fitted values", ylab = "Residuals") abline(h=0) # Should be a random scatter # Needs a quadratic term qqnorm(rice.lm$res, col=2) qqline(rice.lm$res, col="blue") rice.lm2 <- lm(Yield ~ daymin31 + I(daymin31^2) , data=rice) old.par <- par(no.readonly = TRUE) par(mfrow=c(1, 2)) plot(rice.lm2$fit, rice.lm2$res, xlab="Fitted values", ylab = "Residuals") abline(h=0) # Should be a random scatter # Much better plot! qqnorm(rice.lm2$res, col=2) qqline(rice.lm2$res, col="blue") summary(rice.lm2) par(old.par) # par(mfrow=c(1,1)) plot(rice$Days, rice$Yield, xlab="Days", ylab="Yield") lines(rice$Days, rice.lm2$fit, lty=1, col=3) rice.lm3 <- lm(Yield ~ daymin31 + I(daymin31^2)+I(daymin31^3) , data=rice) #check the diagnostics summary(rice.lm3) # Will print the summary of the fitted model #### Predict at a new value of Days=31.1465 # Create a new data set called new new <- data.frame(daymin31=32.1465-31) a <- predict(rice.lm2, newdata=new, se.fit=TRUE) # Confidence interval for the mean of rice yield at day=31.1465 a <- predict(rice.lm2, newdata=new, interval="confidence") a # fit lwr upr # [1,] 3676.766 3511.904 3841.628 # Prediction interval for a future yield at day=31.1465 b <- predict(rice.lm2, newdata=new, interval="prediction") b # fit lwr upr #[1,] 3676.766 3206.461 4147.071 ## ----wgain, eval=TRUE--------------------------------------------------------- summary(wgain) # 95% Confidence interval for mean weight gain x <- wgain$final - wgain$initial mean(x) + c(-1, 1) * qt(0.975, df=67) * sqrt(var(x)/68) # t-test to test the mean difference equals 0 t.test(x) ## ----butterfly, eval=TRUE----------------------------------------------------- butterfly(color = 6) old.par <- par(no.readonly = TRUE) par(mfrow=c(2, 2)) butterfly(a=10, b=1.5, color = "seagreen") butterfly(color = 6) butterfly(a=5, b=5, color=2) butterfly(a=20, b=4, color = "blue") par(old.par) # par(mfrow=c(1, 1)) ## ----Montyplot, eval=TRUE, echo=FALSE, fig.cap="Monty Hall game", fig.width =2, fig.height=1---- library(ipsRdbs) figpath <- system.file("figures", package = "ipsRdbs") knitr::include_graphics(paste0(figpath, "/monty.png")) ## ----------------------------------------------------------------------------- # monty("stay", print_games = FALSE) # monty("switch", print_games = FALSE) # monty("random", print_games = FALSE) ## ----cltforBernoulli, eval=TRUE----------------------------------------------- a <- see_the_clt_for_Bernoulli() old.par <- par(no.readonly = TRUE) par(mfrow=c(2, 3)) a30 <- see_the_clt_for_Bernoulli(nsize=30) a50 <- see_the_clt_for_Bernoulli(nsize=50) a100 <- see_the_clt_for_Bernoulli(nsize=100) a500 <- see_the_clt_for_Bernoulli(nsize=500) a1000 <- see_the_clt_for_Bernoulli(nsize=1000) a5000 <- see_the_clt_for_Bernoulli(nsize=5000) par(old.par) ## ----seethecltforuniform, eval=TRUE------------------------------------------- a <- see_the_clt_for_uniform() old.par <- par(no.readonly = TRUE) par(mfrow=c(2, 3)) a1 <- see_the_clt_for_uniform(nsize=1) a2 <- see_the_clt_for_uniform(nsize=2) a3 <- see_the_clt_for_uniform(nsize=5) a4 <- see_the_clt_for_uniform(nsize=10) a5 <- see_the_clt_for_uniform(nsize=20) a6 <- see_the_clt_for_uniform(nsize=50) par(old.par) ybars <- see_the_clt_for_uniform(nsize=12) zbars <- (ybars - mean(ybars))/sd(ybars) k <- 100 u <- seq(from=min(zbars), to= max(zbars), length=k) ecdf <- rep(NA, k) for(i in 1:k) ecdf[i] <- length(zbars[zbars