## ----style-knitr, eval=TRUE, echo=FALSE, results="asis"--------------------
BiocStyle::latex()

## ----setup, include=FALSE, cache=FALSE-------------------------------------
library(knitr)
opts_chunk$set(out.width="0.7\\maxwidth",fig.align="center")

## ----load_bsgenome, message=FALSE------------------------------------------
library(BSgenome.Hsapiens.UCSC.hg38)
g <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38

## ----load_branchpointer, message=FALSE-------------------------------------
library(branchpointer)

## ----read_exon_gtf, message=FALSE, eval=FALSE------------------------------
# exons <- gtfToExons("gencode.v26.annotation.gtf")

## ----read_exon_small, message=FALSE----------------------------------------
smallExons <- system.file("extdata","gencode.v26.annotation.small.gtf", 
                          package = "branchpointer")
exons <- gtfToExons(smallExons)

## ----show_introns, message=FALSE-------------------------------------------
queryIntronFile <- system.file("extdata","intron_example.txt", 
                               package = "branchpointer")
queryIntronTable <- read.delim(queryIntronFile)
head(queryIntronTable)

## ----read_introns, message=FALSE-------------------------------------------
queryIntron <- readQueryFile(queryIntronFile, 
                             queryType = "region", 
                             exons = exons)
head(queryIntron)

## ----make_intron, message=FALSE--------------------------------------------
queryIntronFromGTF <- makeBranchpointWindowForExons("ENSE00000939171.1", 
                                                    idType = "exon_id", 
                                                    exons = exons)
head(queryIntronFromGTF)

# for multiple ids:
queryIntronFromGTF <- makeBranchpointWindowForExons(c("ENSE00000939171.1", 
                                                      "ENSE00001814242.1"), 
                                                    idType = "exon_id", 
                                                    exons = exons)
head(queryIntronFromGTF)

## ----predict_introns, message=FALSE----------------------------------------
branchpointPredictionsIntron <- predictBranchpoints(queryIntron, 
                                                    queryType = "region", 
                                                    BSgenome = g)
head(branchpointPredictionsIntron)

## ----brca2-plot------------------------------------------------------------
plotBranchpointWindow(queryIntron$id[2], 
                      branchpointPredictionsIntron,
                      probabilityCutoff = 0.52,
                      plotMutated = FALSE,
                      plotStructure = TRUE,
                      exons = exons)

## ----show_snp, message=FALSE-----------------------------------------------
querySNPFile <- system.file("extdata","SNP_example.txt", 
                            package = "branchpointer")
querySNPTable <- read.delim(querySNPFile)
head(querySNPTable)

## ----read_snp, message=FALSE-----------------------------------------------
querySNP <- readQueryFile(querySNPFile, 
                          queryType = "SNP", 
                          exons = exons, 
                          filter = TRUE)
head(querySNP)

## ----read_snp_mart, message=FALSE------------------------------------------
library(biomaRt)
mart <- useMart("ENSEMBL_MART_SNP", dataset="hsapiens_snp",host="www.ensembl.org")
querySNP <- makeBranchpointWindowForSNP(c("rs587776767","rs786205083"), 
                                        mart.snp = mart, 
                                        exons = exons, 
                                        filter  = FALSE)
head(querySNP)

## ----snp_att_fa, message=FALSE, eval=FALSE---------------------------------
# branchpointPredictionsSNP <- predictBranchpoints(querySNP,
#                                         queryType = "SNP",
#                                         genome = "GRCh38.primary_assembly.genome.fa",
#                                         bedtoolsLocation="/Apps/bedtools2/bin/bedtools")

## ----snp_att_BS, message=FALSE---------------------------------------------
#for query SNPs
branchpointPredictionsSNP <- predictBranchpoints(querySNP,
                                        queryType = "SNP",
                                        BSgenome = g)
head(branchpointPredictionsSNP)

#to summarise effects:
querySNPSummary <- predictionsToSummary(querySNP,branchpointPredictionsSNP)
head(querySNPSummary)

## ----rs587776767-plot------------------------------------------------------
plotBranchpointWindow(querySNP$id[2], 
                      branchpointPredictionsSNP,
                      probabilityCutoff = 0.52,
                      plotMutated = TRUE,
                      plotStructure = TRUE,
                      exons = exons)

## ----run times, message=FALSE, eval=FALSE----------------------------------
# 
# # Step times for annotating branchpoints in introns:
# gtfToExons()
# # user  system elapsed
# # 41.385   3.848  47.096
# 
# # Set 1. 294 lincRNA introns on chr22:
# makeBranchpointWindowForExons()
# # user  system elapsed
# # 0.196   0.024   0.226
# predictBranchpoints()
# # user  system elapsed
# # 208.934   4.157 225.849
# 
# # Set 2. 3693 protein coding exons on chr22:
# makeBranchpointWindowForExons()
# # user  system elapsed
# # 0.245   0.013   0.261
# predictBranchpoints()
# # user   system  elapsed
# # 2332.519   38.266 2482.032
# 
# # Step times for annotating branchpoints with SNPs:
# # 29899 GWAS SNPS
# readQueryFile(filter = TRUE)
# # user  system elapsed
# # 5.997   1.608   7.773
# readQueryFile(filter = FALSE)
# # user  system elapsed
# # 1.744   0.427   2.339
# 
# # 298 filtered SNPS
# predictBranchpoints()
# # user  system elapsed
# # 172.495   2.485 181.876
# 
# predictionsToSummary()
# # user  system elapsed
# # 0.057   0.003   0.061

## ----sessionInfo, eval=TRUE------------------------------------------------
sessionInfo()

