## ----include = FALSE---------------------------------------------------------- have_packages = TRUE knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = have_packages # https://r-pkgs.org/vignettes.html#sec-vignettes-eval-option ) fig_width = 7 fig_height = 7/3 origpar = par() # Install locally # devtools::install_local( R'(C:\Users\James.Thorson\Desktop\Git\mvtweedie)', force=TRUE ) # Build and PDF # setwd(R'(C:\Users\James.Thorson\Desktop\Git\mvtweedie)'); devtools::build_rmd("vignettes/Introduction.Rmd"); rmarkdown::render( "vignettes/Introduction.Rmd", rmarkdown::pdf_document()) ## ----eval=TRUE, include=TRUE, echo=FALSE, message=FALSE, warning=FALSE, fig.width=fig_width, fig.height=fig_height---- set.seed(101) library(viridisLite) library(tweedie) library(abind) library(raster) library(plotrix) plot_histogram <- function( x, freq = TRUE, breaks = "Sturges", y_buffer = 0.05, ylim = NULL, xlim = NULL, main = "", col = "lightgrey", bty = "o", add = FALSE, do_plot = TRUE, ...){ # Modify default inputs if( !is.list(x) ){ if( is.vector(x) ){ x = list( x ) } if( is.matrix(x) ){ tmp = list() for( cI in 1:ncol(x) ){ tmp[[cI]] = x[,cI] } x = tmp } } if( length(col)==1 & length(x)>1 ) col = rep(col,length(x)) # Figure out ylim Hist = NULL if(is.null(ylim)) ylim = c(NA, 0) if(is.null(xlim)){ xlim_to_use = c(NA, NA) }else{ xlim_to_use = xlim } for(i in 1:length(x)){ Hist[[i]] = hist( x[[i]], breaks=breaks, plot=FALSE ) if(is.na(ylim[1]) & freq==TRUE) ylim[2] = max(ylim[2], max(Hist[[i]]$counts)*(1+y_buffer) ) if(is.na(ylim[1]) & freq==FALSE) ylim[2] = max(ylim[2], max(Hist[[i]]$density)*(1+y_buffer) ) if(is.null(xlim)) xlim_to_use = range( c(xlim_to_use,Hist[[i]]$breaks), na.rm=TRUE) } if(is.na(ylim[1])) ylim[1] = 0 # Plot if( do_plot==TRUE ){ for(i in 1:length(x)){ hist( x[[i]], breaks=breaks, freq=freq, ylim=ylim, xlim=xlim_to_use, col=col[i], main=main, add=ifelse(i==1,add,TRUE), ...) } if( bty=="o" ) box() } # Return stuff Return = list("Hist"=Hist, "ylim"=ylim) return( invisible(Return) ) } y = x = seq(0,1,length=100) n_prey = 100 prey_cz = cbind( "x" = c(0.6, 0.2, 0.2), "y" = c(0.5, 0.8, 0.5), "sd" = c(0.2, 0.2, 0.2), "mean" = c(1.2, 1.2, 1.2), "cv" = c(1, 1, 1) ) site_cz = cbind( "x" = c(0.2, 0.5, 0.8), "y" = c(0.8, 0.5, 0.2), "radius" = c(0.050, 0.075, 0.100) ) get_density = function(x, y, alpha_z){ sqrt( dnorm(x,alpha_z[1],alpha_z[3])/dnorm(alpha_z[1],alpha_z[1],alpha_z[3]) * dnorm(y,alpha_z[2],alpha_z[3])/dnorm(alpha_z[2],alpha_z[2],alpha_z[3]) ) } D_xyc = abind( outer(x, y, FUN=get_density, alpha_z = prey_cz[1,]), outer(x, y, FUN=get_density, alpha_z = prey_cz[2,]), outer(x, y, FUN=get_density, alpha_z = prey_cz[3,]), along=3 ) simulate_prey = function(n=1000, alpha_z){ loc_nz = matrix(nrow=0,ncol=2) while(nrow(loc_nz)runif(1)){ loc_nz = rbind(loc_nz,xy) } } # Simulate weights weight_n = rgamma(n=n, shape=1/alpha_z['cv']^2, scale=alpha_z['mean']*alpha_z['cv']^2) response = cbind("x"=loc_nz[,1], "y"=loc_nz[,2], "weight"=weight_n) return(response) } n_d = 1000 n_r = 1000 separate_zeros = TRUE par(mfrow=c(1,3), mar=c(2,2,2,1), oma=c(0,3.5,0,0), mgp=c(2,0.5,0), tck=-0.02, yaxs="i" ) # First row Y_rcc = array(NA, dim=c(n_r,3,3), dimnames=list(NULL,"Site"=NULL,"Prey"=NULL) ) for( c1 in 1:3 ){ D = sp::SpatialPointsDataFrame( coords=expand.grid(x,y), data=data.frame("D"=as.vector(D_xyc[,,c1])) ) Raster_layer = raster( D, nrows=length(x), ncols=length(x) ) R = rasterize( x=D@coords, y=Raster_layer, field=as.numeric(D@data[,1]), fun=mean ) plot(1, type="n", xlim=c(0,1), ylim=c(0,1), xaxs="i", yaxs="i", xlab="", ylab="") contour( R, add=TRUE, levels=seq(0,1,by=0.25) ) P = simulate_prey(n=n_prey, alpha_z=prey_cz[c1,]) points( x=P[,1], y=P[,2], col=viridis(3)[c1], pch=20, cex=sqrt(P[,3]) ) #drawc1rcle( x=site_cz[c1,'x'], y=site_cz[c1,'y'], # radius=site_cz[c1,'radius'], fillColor=rainbow(3,alpha=0.2)[c1]) for(c2 in 1:nrow(site_cz)){ draw.circle( x=site_cz[c2,'x'], y=site_cz[c2,'y'], radius=site_cz[c2,'radius'], col=rainbow(3,alpha=0.2)[c2], border=rgb(0,0,0,0)) dist_vec = RANN::nn2( data=site_cz[c2,c('x','y'),drop=FALSE], query=P[,c('x','y')] )$nn.dist[,1] Y_rcc[1,c2,c1] = sum(P[which(dist_vecmaxD_c[c1],maxD_c[c1],X) if( separate_zeros==TRUE ){ Zero_c = colMeans(X==0) X = ifelse(X==0,-2,X) out = plot_histogram( X, breaks=c(-2,-1,seq(0,maxD_c[c1],length=20)), do_plot=FALSE, freq=FALSE ) #, ylim=c(0,maxY_c[c1]) ) maxY_c[c1] = max( c(maxY_c[c1], 1.2*out$ylim[2], 1.2*Zero_c) ) out = plot_histogram( X, breaks=c(-2,-1,seq(0,maxD_c[c1],length=20)), col=viridis(3,alpha=0.2), border=NA, freq=FALSE, xlim=c(0,maxD_c[c1]), ylim=c(0,maxY_c[c1]) ) points( x=rep(0,3), y=Zero_c, col=viridis(3), pch=15, cex=2 ) abline( v=Y_rcc[1,c1,], col=viridis(3), lty="dotted", lwd=2 ) mtext( side=3, text=paste0("Site ",toupper(letters[c1])), col=rainbow(3)[c1], font=2 ) }else{ out = plot_histogram( X, breaks=c(-2,-1,seq(0,maxD_c[c1],length=20)), col=viridis(3,alpha=0.2), border=NA, freq=FALSE, xlim=c(0,maxD_c[c1]) ) maxY_c[c1] = max( c(maxY_c[c1], 1.2*out$ylim[2]) ) } legend("topright", bty="n", fill=viridis(3), legend=formatC(colMeans(Y_rcc[,c1,])/sum(colMeans(Y_rcc[,c1,])),format="f",digits=2), title="Proportion" ) if(c1==1) mtext( side=2, text="Sampling from\ntrue distribution", line=2 ) } ## ----eval=TRUE, include=TRUE, echo=FALSE, warning=FALSE, fig.width=fig_width, fig.height=fig_height---- par(mfrow=c(1,3), mar=c(2,2,2,1), oma=c(0,3.5,0,0), mgp=c(2,0.5,0), tck=-0.02, yaxs="i" ) ## 3rd row #density_zcc = array(NA, dim=c(length(d_z),3,3), dimnames=list("Density"=NULL,"Site"=NULL,"Prey"=NULL) ) #bsample_zcc = rsample_zcc = array(NA, dim=c(1000,3,3), dimnames=list("Density"=NULL,"Site"=NULL,"Prey"=NULL) ) #for( c1 in 1:3 ){ # d_z = seq(0, maxD_c[c1], length=n_d) # while(any(is.na(rsample_zcc[,c1,]))){ # for( c2 in 1:3 ){ # density_zcc[,c1,c2] = dtweedie( d_z, mu=mu_cc[c1,c2], power=p_cc[c1,c2], phi=phi_cc[c1,c2] ) # rsample = rtweedie( dim(rsample_zcc)[1], mu=mu_cc[c1,c2], power=p_cc[c1,c2], phi=phi_cc[c1,c2] ) # bsample_zcc[,c1,c2] = rsample_zcc[,c1,c2] = ifelse( is.na(rsample_zcc[,c1,c2]), rsample, rsample_zcc[,c1,c2] ) # } # rsample_zcc[,c1,] = rsample_zcc[,c1,] * outer( ifelse(rowSums(rsample_zcc[,c1,])==0,NA,1), rep(1,3) ) # } # if( separate_zeros==TRUE ){ # plot( 1, type="n", xlim=range(d_z), xlab="Prey biomass", ylab="", ylim=c(0,maxY_c[c1]) ) # , ylim=c(0,1.05*max(density_zcc[,c1,])) # points( x=rep(0,3), y=density_zcc[1,c1,], col=viridis(3), pch=20, cex=3 ) # matplot( x=d_z[-1], y=density_zcc[-1,c1,], col=viridis(3), lty="solid", type="l", add=TRUE, lwd=3 ) # mtext( side=3, text=paste0("Site ",toupper(letters[c1])), col=rainbow(3)[c1], font=2 ) # #abline( v=Y_rcc[1,c1,], col=viridis(3), lty="dotted", lwd=2 ) # }else{ # X = bsample_zcc[,c1,] # X = ifelse(X>maxD_c[c1],maxD_c[c1],X) # plot_histogram( X, breaks=c(-2,-1,seq(0,maxD_c[c1],length=20)), col=viridis(3,alpha=0.2), border=NA, freq=FALSE, xlim=c(0,maxD_c[c1]), ylim=c(0,maxY_c[c1]) ) # } # legend("topright", bty="n", fill=viridis(3), legend=formatC(mu_cc[c1,]/sum(mu_cc[c1,]),format="f",digits=2), title="Proportion" ) # if(c1==1) mtext( side=2, text="Tweedie:\ndistribution of density", line=2 ) #} # 4th row density_zcc = array(NA, dim=c(length(d_z),3,3), dimnames=list("Density"=NULL,"Site"=NULL,"Prey"=NULL) ) bsample_zcc = rsample_zcc = array(NA, dim=c(1000,3,3), dimnames=list("Density"=NULL,"Site"=NULL,"Prey"=NULL) ) for( c1 in 1:3 ){ d_z = seq(0, maxD_c[c1], length=n_d) while(any(is.na(rsample_zcc[,c1,]))){ for( c2 in 1:3 ){ density_zcc[,c1,c2] = dtweedie( d_z, mu=mu_cc[c1,c2], power=pprime_cc[c1,c2], phi=phiprime_cc[c1,c2] ) rsample = rtweedie( dim(rsample_zcc)[1], mu=mu_cc[c1,c2], power=pprime_cc[c1,c2], phi=phiprime_cc[c1,c2] ) bsample_zcc[,c1,c2] = rsample_zcc[,c1,c2] = ifelse( is.na(rsample_zcc[,c1,c2]), rsample, rsample_zcc[,c1,c2] ) } rsample_zcc[,c1,] = rsample_zcc[,c1,] * outer( ifelse(rowSums(rsample_zcc[,c1,])==0,NA,1), rep(1,3) ) } if( separate_zeros==TRUE ){ plot( 1, type="n", xlim=range(d_z), xlab="Prey biomass", ylab="", ylim=c(0,maxY_c[c1]) ) # , ylim=c(0,1.05*max(density_zcc[,c1,])) points( x=rep(0,3), y=density_zcc[1,c1,], col=viridis(3), pch=20, cex=3 ) matplot( x=d_z[-1], y=density_zcc[-1,c1,], col=viridis(3), lty="solid", type="l", add=TRUE, lwd=3 ) mtext( side=3, text=paste0("Site ",toupper(letters[c1])), col=rainbow(3)[c1], font=2 ) #abline( v=Y_rcc[1,c1,], col=viridis(3), lty="dotted", lwd=2 ) }else{ X = bsample_zcc[,c1,] X = ifelse(X>maxD_c[c1],maxD_c[c1],X) plot_histogram( X, breaks=c(-2,-1,seq(0,maxD_c[c1],length=20)), col=viridis(3,alpha=0.2), border=NA, freq=FALSE, xlim=c(0,maxD_c[c1]), ylim=c(0,maxY_c[c1]) ) } legend("topright", bty="n", fill=viridis(3), legend=formatC(mu_cc[c1,]/sum(mu_cc[c1,]),format="f",digits=2), title="Proportion" ) if(c1==1) mtext( side=2, text="mvtweedie GLM:\ndistribution of density", line=2 ) } ## ----eval=TRUE, echo=TRUE, message=FALSE, fig.width=6, fig.height=6, out.width = "75%"---- # Loading package library(mvtweedie) # load data set data( Middleton_Island_TUPU, package="mvtweedie" ) Middleton_Island_TUPU$Year = as.numeric(as.character( Middleton_Island_TUPU$Year_factor )) # Illustrate format knitr::kable( head(Middleton_Island_TUPU), digits=1, row.names=FALSE) ## ----eval=TRUE, echo=TRUE, message=FALSE, fig.width=6, fig.height=6, out.width = "75%"---- # Run Tweedie GLM library(mgcv) gam0 = gam( formula = Response ~ 0 + group + s(Year,by=group) + s(Year), data = Middleton_Island_TUPU, family = tw ) # Load class to enable predict.mvtweedie mygam = gam0 class(mygam) = c( "mvtweedie", class(mygam) ) ## ----eval=TRUE, echo=TRUE, message=FALSE, fig.width=fig_width, fig.height=fig_width, warning=FALSE---- # Predict values newdata = expand.grid( "group" = levels(Middleton_Island_TUPU$group), "Year" = 1978:2018 ) pred = predict( mygam, se.fit = TRUE, newdata = newdata ) newdata = cbind( newdata, fit=pred$fit, se.fit=pred$se.fit ) newdata$lower = newdata$fit - newdata$se.fit newdata$upper = newdata$fit + newdata$se.fit # Plot library(ggplot2) theme_set(theme_bw()) ggplot(newdata, aes(Year, fit)) + geom_pointrange(aes(ymin = lower, ymax = upper)) + facet_wrap(vars(group)) + ylim(0,1) + labs(y="Predicted proportion") ## ----eval=TRUE, echo=TRUE, message=FALSE, fig.width=fig_width, fig.height=fig_width, warning=FALSE---- library(pdp) # Make function to interface with pdp pred.fun = function( object, newdata ){ predict( object = object, category_name = "group", newdata = newdata ) } # Calculate partial dependence # approx = TRUE gives effects for average of other covariates # approx = FALSE gives each pdp curve Partial = partial( object = mygam, pred.var = c( "Year", "group"), pred.fun = pred.fun, train = mygam$model, approx = TRUE ) # Lattice plots as default option library( lattice ) plotPartial( Partial ) # using in ggplot2 ggplot(data=Partial, aes(x=Year, y=yhat)) + # , group=yhat.id geom_line( ) + facet_grid( vars(group) ) + labs(y="Predicted proportion") ## ----eval=TRUE, echo=TRUE, message=FALSE, fig.width=6, fig.height=6, warning=FALSE---- # Load data data(southeast_alaska_wolf) groups = c("Black tailed deer","Marine mammal", "Mountain goat", "Beaver") southeast_alaska_wolf = subset( southeast_alaska_wolf, group %in% groups ) # southeast_alaska_wolf$group = factor(southeast_alaska_wolf$group) # Illustrate format knitr::kable( head(southeast_alaska_wolf), digits=1, row.names=FALSE) ## ----eval=TRUE, echo=TRUE, message=FALSE, fig.width=fig_width, fig.height=fig_width, out.width = "75%", warning=FALSE---- # Formula Formula = Response ~ 0 + group + s(Latitude,Longitude,m=c(1,0.5),bs="ds") + s(Latitude,Longitude,by=group,m=c(1,0.5),bs="ds") # Using mgcv gam_wolf = gam( formula = Formula, data = southeast_alaska_wolf, family = tw ) class(gam_wolf) = c( "mvtweedie", class(gam_wolf) ) ## ----eval=TRUE, echo=TRUE----------------------------------------------------- predict_grid <- function( model, exclude_terms = NULL, length_out = 50, values = NULL, ... ){ if( !any(c("gam","glmmTMB") %in% class(model)) ){ stop("`predict_grid` only implemented for mgcv and glmmTMB") } n_terms <- length(model[["var.summary"]]) term_list <- list() for (term in 1:n_terms) { term_summary <- model[["var.summary"]][[term]] term_name <- names(model[["var.summary"]])[term] if (term_name %in% names(values)) { new_term <- values[[which(names(values) == term_name)]] if (is.null(new_term)) { new_term <- model[["var.summary"]][[term]][[1]] } } else { if (is.numeric(term_summary)) { min_value <- min(term_summary) max_value <- max(term_summary) new_term <- seq(min_value, max_value, length.out = length_out) } else if (is.factor(term_summary)) { new_term <- levels(term_summary) } else { stop("The terms are not numeric or factor.\n") } } term_list <- append(term_list, list(new_term)) names(term_list)[term] <- term_name } new_data <- expand.grid(term_list) class(model) = c( "mvtweedie", class(model) ) pred <- predict( model, newdata = new_data, se.fit = TRUE, ...) predicted <- as.data.frame(pred) predictions <- cbind(new_data, predicted) return(predictions) } ## ----eval=TRUE, echo=TRUE, message=FALSE, fig.width=fig_width, warning=FALSE---- library(rnaturalearth) library(raster) library(sf) library(dplyr) # Predict raster on map pred_wolf = predict_grid( gam_wolf, length_out = 100 ) pred_wolf$cv.fit = pred_wolf$se.fit / pred_wolf$fit # Map oceanmap layer US_high = ne_countries(scale=50, country="united states of america") st_box = st_polygon( list(cbind( x=c(-140,-125,-124,-140,-140), y=c(50,50,60,60,50))) ) st_box = st_sfc(st_box, crs=st_crs(US_high) ) wmap = st_intersection( US_high, st_box ) oceanmap = st_difference( st_as_sfc(st_bbox(wmap)), wmap ) sf.isl <- data.frame(island = c("Baranof", "Chichagof", "Admiralty"), lat = c(57.20583, 57.88784, 57.59644), lon = c(-135.1866, -136.0024, -134.5776)) %>% st_as_sf(., coords = c("lon", "lat"), crs = 4326) mask.land = ne_countries(scale=50, country="united states of america", returnclass = 'sf') %>% st_set_crs(., 4326) %>% st_cast(., "POLYGON") %>% st_join(., sf.isl) %>% filter(!is.na(island)) # Make figure my_breaks = c(0.02,0.1,0.25,0.5,0.75) ggplot(oceanmap) + geom_tile(data=pred_wolf, aes(x=Longitude,y=Latitude,fill=fit)) + geom_sf() + geom_sf(data = mask.land, inherit.aes = FALSE, fill = "darkgrey") + coord_sf(xlim=range(pred_wolf$Longitude), ylim=range(pred_wolf$Latitude), expand = FALSE) + facet_wrap(vars(group), ncol = 5) + scale_fill_gradient2(name = "Proportion", trans = "sqrt", breaks = my_breaks) + scale_y_continuous(breaks = seq(55,59,2)) + scale_x_continuous(breaks = c(-135, -133.5, -132)) + theme(axis.text = element_text(size = 7)) ## ----include = FALSE---------------------------------------------------------- par(origpar)