## ----prelim, echo = FALSE, results="hide"------------------------------------- library( knitr) opts_chunk$set(cache=TRUE, message = FALSE, comment = "", dev="pdf", dpi=300, fig.show = "hold", fig.align = "center") ## ----setup1, eval=FALSE------------------------------------------------------- # install.packages( "stockR") ## ----setup2------------------------------------------------------------------- library( stockR) ## ----setSeed------------------------------------------------------------------ set.seed( 747) #a 747 is a big plane ## ----simData------------------------------------------------------------------ #number of individuals N <- 100 #number of markers to measure on each of the individuals M <- 5000 #number of sampling groups (same as number of individuals) S <- N #number of stocks K <- 3 #simulate the data myData <- sim.stock.data( nAnimal=N, nSNP=M, nSampleGrps=N, K=K) ## ----lookAtData--------------------------------------------------------------- #the dimensions of the data dim( myData) #the number of fish ncol( myData) #the number of markers nrow( myData) #summary of the stock sizes table( attributes( myData)$grps) #the individuals are ordered by stock membership attributes( myData)$grps #in this case the third stock is under-represented in the entire data set. On #average, there will be equal numbers though. ## ----lookAtData2-------------------------------------------------------------- #a quick look at the data -- first 5 markers and 3 individuals myData[1:5,1:3] #so, all individuals have two allele copies (homozygous) for the third loci #the second fish is heterozygous for the allele at the first loci. #and so on. ## ----findStocks--------------------------------------------------------------- #find the stocks in the data stocks <- stockSTRUCTURE( myData, K=3) #K-EM estimation is default #the (posterior) membership probability to each stock stocks$postProb #shows high discrimination, which could be erroneous (bootstrap soon) #hard classificaiton of individuals to stocks stocks$hardClass <- apply( stocks$postProb, 1, which.max) #Bootstrap to see about uncertainty in assignment #for serious applications many more than B=25 will be needed. #only 2 cores used to pass CRAN's arbitrary checks stocks$boot <- stockBOOT( stocks, B=25, mc.cores=2) #Assignment accounting for uncertainty. #Could also look at other model quantities (e.g. allele frequencties) stocks$uncertClass <- apply( stocks$boot$postProbs, 1:2, quantile, probs=c(0.05,0.5,0.95)) #careful inspection of this object will give the lower and #upper 90% confidence intervals and the median #e.g. for the 99th individual print( round( stocks$uncertClass[,99,], 3)) #showing that there is some uncertainty about which #stock this indivudal belongs to. ## ----sampGrps----------------------------------------------------------------- #number of sampling groups (same as number of individuals) S <- 15 #there are now 15 sampling groups (100 individuals will be #distributed between them). #number of stocks K <- 3 #simulate the data myData <- sim.stock.data( nAnimal=N, nSNP=M, nSampleGrps=S, K=K) #find stocks in data stocks <- stockSTRUCTURE( myData, K=3, sample.grps=attributes(myData)$sampleGrps) #once again, the stocks have been found (up to labelling) #the simulated values attributes( myData)$grps #the data-derived values (as hard clustered) apply( stocks$postProbs, 1, which.max) #Bootstrap to see about uncertainty in assignment #for serious applications many more than B=25 will be needed. #only 2 cores used to pass CRAN's arbitrary checks stocks$boot <- stockBOOT( stocks, B=25, mc.cores=2) #Assignment accounting for uncertainty. #Could also look at other model quantities (e.g. allele frequencties) stocks$uncertClass <- apply( stocks$boot$postProbs, 1:2, quantile, probs=c(0.05,0.5,0.95)) #careful inspection of this object will give the lower and #upper 90% confidence intervals and the median print( round( stocks$uncertClass, 3)) #Showing that there is almost no uncertainty about which #stock indivudals belong to. #There is lots of information in this data, especially when #genetic information between individuals in the same sampling #group is utilised. ## ----miss--------------------------------------------------------------------- myData <- sim.stock.data( nAnimal=N, nSNP=M, nSampleGrps=N, K=K) #add some missing data to simulation. There are 30% randomly missing scores. totMark <- prod( dim( myData)) myData[sample( 1:totMark, size=floor( 0.3*totMark))] <- NA #find stocks in data stocks <- stockSTRUCTURE( myData, K=3) #once again, the stocks have been found (up to labelling) #the simulated values attributes( myData)$grps #the data-derived values (as hard clustered) apply( stocks$postProbs, 1, which.max) #bootstrap not performed for this example ## ----Tidy, eval=FALSE--------------------------------------------------------- # #THIS IS SUGGESTIVE ONLY. # #You may wish to tidy your workspace. # #rm( list=ls()) ## ----sessionInfo, results = "asis", echo = FALSE------------------------------ toLatex(sessionInfo())