## ---- echo = FALSE------------------------------------------------------------ knitr::opts_chunk$set(collapse = TRUE, comment = "#>") ## ---- echo=TRUE, message=FALSE, knitr::opts_chunk$set(cache=TRUE)------------- library(blink) data(RLdata500) head(RLdata500) ## ----------------------------------------------------------------------------- # X.c contains the categorical variables # X.s contains the string variables # p.c is the number of categorical variables # p.s contains the number of string variables X.c <- RLdata500[c("by","bm","bd")] X.c <- as.matrix(RLdata500[,"bd"],ncol=1) p.c <- ncol(X.c) X.s <- as.matrix(RLdata500[-c(2,4,7)]) p.s <- ncol(X.s) ## ----------------------------------------------------------------------------- # File number identifier # Note: Recall that X.c and X.s include all files "stacked" on top of each other. # The vector below keeps track of which rows of X.c and X.s are in which files. file.num <- rep(c(1,2,3),c(200,150,150)) ## ----------------------------------------------------------------------------- # Subjective choices for distortion probability prior a <-1 b <- 999 ## ----------------------------------------------------------------------------- d <- function(string1,string2){adist(string1,string2)} ## ----------------------------------------------------------------------------- c <- 1 ## ----results="hide"----------------------------------------------------------- library(knitr) library(blink) library(plyr) Sys.setenv(TMPDIR="/tmp/") configure.vars="TMPDIR=/tmp/" lam.gs <- rl.gibbs(file.num=file.num,X.s=X.s,X.c=X.c,num.gs=2,a=a,b=b,c=c,d=d, M=500) #system.time(lam.gs <- rl.gibbs(file.num=file.num,X.s=X.s,X.c=X.c,num.gs=2,a=a,b=b,c=c,d=d, M=500)) ## ---- fig.show="hold", fig.cap="The red line is the ground truth (450), which is not close to the estimate (500) since we only ran 10 Gibbs sampling iterations."---- #estLink <- tempfile(pattern = "lam.gs") estLink <- lam.gs estPopSize <- apply(estLink , 1, function(x) {length(unique(x))}) plot(density(estPopSize),xlim=c(300,500),main="",lty=1, "Observed Population Size", ylim= c(0,1)) abline(v=450,col="red") abline(v=mean(estPopSize),col="black",lty=2) mean(estPopSize) sd(estPopSize)