## ----echo = FALSE, message = FALSE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ options(width = 750) knitr::opts_chunk$set( comment = "#>", error = FALSE, tidy = FALSE) ## ----eval=FALSE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # # install the biomaRt package # # source("https://bioconductor.org/biocLite.R") # # biocLite("biomaRt") # # load biomaRt # library(biomaRt) # # look at top 10 databases # head(biomaRt::listMarts(host = "https://www.ensembl.org"), 10) ## ----eval=FALSE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # head(biomaRt::listDatasets(biomaRt::useMart("ENSEMBL_MART_ENSEMBL", host = "https://www.ensembl.org")), 10) ## ----eval=FALSE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # head(biomaRt::listAttributes(biomaRt::useDataset( # dataset = "hsapiens_gene_ensembl", # mart = useMart("ENSEMBL_MART_ENSEMBL", # host = "https://www.ensembl.org"))), 10) ## ----eval=FALSE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # head(biomaRt::listFilters(biomaRt::useDataset(dataset = "hsapiens_gene_ensembl", # mart = useMart("ENSEMBL_MART_ENSEMBL", # host = "https://www.ensembl.org"))), 10) ## ----eval=FALSE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # # 1) select a mart and data set # mart <- biomaRt::useDataset(dataset = "hsapiens_gene_ensembl", # mart = useMart("ENSEMBL_MART_ENSEMBL", # host = "https://www.ensembl.org")) # # # 2) run a biomart query using the getBM() function # # and specify the attributes and filter arguments # geneSet <- "GUCA2A" # # resultTable <- biomaRt::getBM(attributes = c("start_position","end_position","description"), # filters = "hgnc_symbol", # values = geneSet, # mart = mart) # # resultTable ## ----eval=FALSE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # # load the biomartr package # library(biomartr) # # # list all available databases # biomartr::getMarts() ## ----eval=FALSE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # head(biomartr::getDatasets(mart = "ENSEMBL_MART_ENSEMBL") , 5) ## ----eval=FALSE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # tail(biomartr::getDatasets(mart = "ENSEMBL_MART_ENSEMBL") , 38) ## ----eval=FALSE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # # show all elements of the data.frame # options(tibble.print_max = Inf) # # list all available attributes for dataset: hsapiens_gene_ensembl # head( biomartr::getAttributes(mart = "ENSEMBL_MART_ENSEMBL", # dataset = "hsapiens_gene_ensembl"), 10 ) ## ----eval=FALSE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # # show all elements of the data.frame # options(tibble.print_max = Inf) # # list all available filters for dataset: hsapiens_gene_ensembl # head( biomartr::getFilters(mart = "ENSEMBL_MART_ENSEMBL", # dataset = "hsapiens_gene_ensembl"), 10 ) ## ----eval=FALSE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # # show all elements of the data.frame # options(tibble.print_max = Inf) # # retrieving all available datasets and biomart connections for # # a specific query organism (scientific name) # biomartr::organismBM(organism = "Homo sapiens") ## ----eval=FALSE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # # show all elements of the data.frame # options(tibble.print_max = Inf) # # return available attributes for "Homo sapiens" # head(biomartr::organismAttributes("Homo sapiens"), 20) ## ----eval=FALSE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # # show all elements of the data.frame # options(tibble.print_max = Inf) # # search for attribute topic "id" # head(biomartr::organismAttributes("Homo sapiens", topic = "id"), 20) ## ----eval=FALSE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # # show all elements of the data.frame # options(tibble.print_max = Inf) # # search for attribute topic "homolog" # head(biomartr::organismAttributes("Homo sapiens", topic = "homolog"), 20) ## ----eval=FALSE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # # show all elements of the data.frame # options(tibble.print_max = Inf) # # search for attribute topic "dn" # head(biomartr::organismAttributes("Homo sapiens", topic = "dn")) ## ----eval=FALSE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # # show all elements of the data.frame # options(tibble.print_max = Inf) # # search for attribute topic "ds" # head(biomartr::organismAttributes("Homo sapiens", topic = "ds")) ## ----eval=FALSE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # # show all elements of the data.frame # options(tibble.print_max = Inf) # # return available filters for "Homo sapiens" # head(biomartr::organismFilters("Homo sapiens"), 20) ## ----eval=FALSE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # # show all elements of the data.frame # options(tibble.print_max = Inf) # # search for filter topic "id" # head(biomartr::organismFilters("Homo sapiens", topic = "id"), 20) ## ----eval=FALSE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # # show all elements of the data.frame # options(tibble.print_max = Inf) # # head(biomartr::organismAttributes("Homo sapiens", topic = "id")) ## ----eval=FALSE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # # show all elements of the data.frame # options(tibble.print_max = Inf) # # retrieve the proteome of Homo sapiens from refseq # file_path <- biomartr::getProteome( db = "refseq", # organism = "Homo sapiens", # path = file.path("_ncbi_downloads","proteomes") ) # # Hsapiens_proteome <- biomartr::read_proteome(file_path, format = "fasta") # # # remove splice variants from id # gene_set <- unlist(sapply(strsplit(Hsapiens_proteome@ranges@NAMES[1:5], ".",fixed = TRUE), function(x) x[1])) # # result_BM <- biomartr::biomart( genes = gene_set, # genes were retrieved using biomartr::getGenome() # mart = "ENSEMBL_MART_ENSEMBL", # marts were selected with biomartr::getMarts() # dataset = "hsapiens_gene_ensembl", # datasets were selected with biomartr::getDatasets() # attributes = c("ensembl_gene_id","ensembl_peptide_id"), # attributes were selected with biomartr::getAttributes() # filters = "refseq_peptide") # specify what ID type was stored in the fasta file retrieved with biomartr::getGenome() # # result_BM ## ----eval=FALSE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # # show all elements of the data.frame # options(tibble.print_max = Inf) # # search for GO terms of an example Homo sapiens gene # GO_tbl <- biomartr::getGO(organism = "Homo sapiens", # genes = "GUCA2A", # filters = "hgnc_symbol") # # GO_tbl