## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----eval=FALSE---------------------------------------------------------------
#  install.packages("devtools")
#  library(devtools)
#  install_github("lhe17/nebula")

## ----echo=TRUE----------------------------------------------------------------
library(nebula)
data(sample_data)

## ----echo=TRUE----------------------------------------------------------------
dim(sample_data$count)

## ----echo=TRUE----------------------------------------------------------------
sample_data$count[1:5,1:5]

## ----echo=TRUE----------------------------------------------------------------
head(sample_data$sid)
table(sample_data$sid)

## ----echo=TRUE----------------------------------------------------------------
head(sample_data$pred)
df = model.matrix(~X1+X2+cc, data=sample_data$pred)
head(df)

## ----echo=TRUE----------------------------------------------------------------
re = nebula(sample_data$count,sample_data$sid,pred=df,ncore=1)
re

## ----eval=FALSE,echo=TRUE-----------------------------------------------------
#  data_g = group_cell(count=sample_data$count,id=sample_data$sid,pred=df)
#  re = nebula(data_g$count,data_g$id,pred=data_g$pred)

## ----eval=FALSE,echo=TRUE-----------------------------------------------------
#  library(Matrix)
#  # An example of using the library size of each cell as the scaling factor
#  re = nebula(sample_data$count,sample_data$sid,pred=df,offset=Matrix::colSums(sample_data$count))

## ----echo=TRUE,eval=FALSE-----------------------------------------------------
#  library(nebula)
#  data("sample_seurat")
#  seuratdata <- scToNeb(obj = sample_seurat, assay = "RNA", id = "replicate", pred = c("celltype","tech"), offset="nCount_RNA")
#  ## Make sure that the variables do not contain NA; Otherwise, df would have fewer rows.
#  df = model.matrix(~celltype+tech, data=seuratdata$pred)
#  ## include only the first two cell types in the model to avoid separation due to too many binary variables
#  data_g = group_cell(count=seuratdata$count,id=seuratdata$id,pred=df[,c("(Intercept)","celltypeactivated_stellate","techcelseq2","techfluidigmc1","techindrop", "techsmartseq2")],offset=seuratdata$offset)
#  re = nebula(data_g$count,data_g$id,pred=data_g$pred,offset=data_g$offset)

## ----eval=TRUE,echo=TRUE------------------------------------------------------
re_ln = nebula(sample_data$count,sample_data$sid,pred=df,offset=sample_data$offset,method='LN',ncore=1)
re_hl = nebula(sample_data$count,sample_data$sid,pred=df,offset=sample_data$offset,method='HL',ncore=1)
## compare the estimated overdispersions
cbind(re_hl$overdispersion,re_ln$overdispersion)

## ----eval=TRUE,echo=TRUE------------------------------------------------------
## compare the p-values for testing the predictors using NEBULA-LN and NEBULA-HL
cbind(re_hl$summary[,10:12],re_ln$summary[,10:12])

## ----eval=TRUE,echo=TRUE------------------------------------------------------
re = nebula(sample_data$count,sample_data$sid,pred=df,offset=sample_data$offset,model='PMM',ncore=1)

## ----echo=FALSE,results='asis'------------------------------------------------
knitr::kable(re$summary)

## ----eval=FALSE,echo=TRUE-----------------------------------------------------
#  re = nebula(sample_data$count,sample_data$sid,pred=df,offset=sample_data$offset,model='NBLMM',reml=1,ncore=1)

## ----eval=TRUE,echo=TRUE------------------------------------------------------
df = model.matrix(~X1+X2+cc, data=sample_data$pred)
re_ln = nebula(sample_data$count,sample_data$sid,pred=df,offset=sample_data$offset,method='LN',covariance=TRUE,ncore=1)
cov= matrix(NA,4,4)
cov[lower.tri(cov,diag=T)] = as.numeric(re_ln$covariance[1,])
cov[upper.tri(cov)] = t(cov)[upper.tri(cov)]
cov

## ----eval=TRUE,echo=TRUE------------------------------------------------------
df = model.matrix(~X1+X2+cc, data=sample_data$pred)
## the gene to test
gene_i = 1
## output covariance
re_ln = nebula(sample_data$count,sample_data$sid,pred=df,offset=sample_data$offset,method='LN',covariance=TRUE,ncore=1)
## recover the covariance matrix
cov= matrix(NA,4,4)
cov[lower.tri(cov,diag=T)] = as.numeric(re_ln$covariance[gene_i,])
cov[upper.tri(cov)] = t(cov)[upper.tri(cov)]
## build the contrast vector
contrast = c(0,1,-1,0)
## testing the hypothesis
eff = sum(contrast*re_ln$summary[gene_i,1:4])
p = pchisq(eff^2/(t(contrast)%*%cov%*%contrast),1,lower.tail=FALSE)
p

## ----eval=FALSE,echo=TRUE-----------------------------------------------------
#  re = nebula(sample_data$count,sample_data$sid,pred=df,offset=sample_data$offset)
#  pres = nbresidual(re,count=sample_data$count,id=sample_data$sid,pred=df,offset=sample_data$offset)

## ----eval=FALSE,echo=TRUE-----------------------------------------------------
#  re = nebula(sample_data$count,sample_data$sid,pred=df,offset=sample_data$offset,output_re=TRUE)

## ----eval=FALSE,echo=TRUE-----------------------------------------------------
#  pres = nbresidual(re,count=sample_data$count,id=sample_data$sid,pred=df,offset=sample_data$offset,conditional=TRUE)