## ----vignette-options, include = FALSE---------------------------------------- knitr::opts_chunk$set(collapse = TRUE, comment = "#>") ## ----setup-------------------------------------------------------------------- library(fastRanges) library(GenomicRanges) library(S4Vectors) data("fast_ranges_example", package = "fastRanges") query <- fast_ranges_example$query subject <- fast_ranges_example$subject ## ----inspect-example---------------------------------------------------------- names(fast_ranges_example) query subject ## ----example-files------------------------------------------------------------ system.file("extdata", "query_peaks.bed", package = "fastRanges") system.file("extdata", "subject_genes.bed", package = "fastRanges") ## ----core-overlaps------------------------------------------------------------ hits <- fast_find_overlaps(query, subject, threads = 2) hits ## ----core-overlaps-inspect---------------------------------------------------- cbind( query_id = S4Vectors::queryHits(hits), subject_id = S4Vectors::subjectHits(hits) ) ## ----core-counts-------------------------------------------------------------- counts <- fast_count_overlaps(query, subject, threads = 2) counts ## ----core-any----------------------------------------------------------------- any_hits <- fast_overlaps_any(query, subject, threads = 2) any_hits ## ----compatibility-check------------------------------------------------------ ref <- GenomicRanges::findOverlaps(query, subject, ignore.strand = FALSE) identical( cbind(S4Vectors::queryHits(hits), S4Vectors::subjectHits(hits)), cbind(S4Vectors::queryHits(ref), S4Vectors::subjectHits(ref)) ) ## ----return-types------------------------------------------------------------- ret_classes <- c( hits = class(fast_find_overlaps(query, subject, threads = 1))[1], count = class(fast_count_overlaps(query, subject, threads = 1))[1], any = class(fast_overlaps_any(query, subject, threads = 1))[1], nearest = class(fast_nearest(query, subject))[1], join = class(fast_overlap_join(query, subject, threads = 1))[1], coverage = class(fast_coverage(query))[1] ) ret_classes ## ----index-build-------------------------------------------------------------- subject_index <- fast_build_index(subject) subject_index ## ----index-use---------------------------------------------------------------- hits_from_index <- fast_find_overlaps(query, subject_index, threads = 2) identical( cbind(S4Vectors::queryHits(hits_from_index), S4Vectors::subjectHits(hits_from_index)), cbind(S4Vectors::queryHits(hits), S4Vectors::subjectHits(hits)) ) ## ----join-inner--------------------------------------------------------------- joined_inner <- fast_overlap_join(query, subject, join = "inner", threads = 2) head(joined_inner) ## ----join-left---------------------------------------------------------------- joined_left <- fast_left_overlap_join(query, subject, threads = 2) head(joined_left) ## ----join-semi-anti----------------------------------------------------------- semi_tbl <- fast_semi_overlap_join(query, subject, threads = 2) anti_tbl <- fast_anti_overlap_join(query, subject, threads = 2) head(semi_tbl) head(anti_tbl) ## ----nearest-queries---------------------------------------------------------- nearest_tbl <- fast_nearest(query, subject) head(as.data.frame(nearest_tbl)) ## ----directional-queries------------------------------------------------------ fast_precede(query, subject) fast_follow(query, subject) ## ----grouped-setup------------------------------------------------------------ set.seed(1) S4Vectors::mcols(subject)$group <- sample( c("promoter", "enhancer"), length(subject), replace = TRUE ) S4Vectors::mcols(subject)$score <- seq_len(length(subject)) ## ----grouped-counts----------------------------------------------------------- by_group <- fast_count_overlaps_by_group( query, subject, group_col = "group", threads = 2 ) by_group ## ----grouped-aggregate-------------------------------------------------------- sum_score <- fast_overlap_aggregate( query, subject, value_col = "score", fun = "sum", threads = 2 ) sum_score ## ----self-overlaps------------------------------------------------------------ self_hits <- fast_self_overlaps(query, threads = 2) self_hits ## ----overlap-clusters--------------------------------------------------------- clusters <- fast_cluster_overlaps(query, return = "data.frame") head(clusters) ## ----overlap-windows---------------------------------------------------------- window_counts <- fast_window_count_overlaps( query, subject, window_width = 1000L, step_width = 500L, threads = 2 ) head(window_counts) ## ----range-algebra------------------------------------------------------------ query_reduced <- fast_reduce(query) query_disjoint <- fast_disjoin(query) query_gaps <- fast_gaps(query) c( original = length(query), reduced = length(query_reduced), disjoint = length(query_disjoint), gaps = length(query_gaps) ) ## ----range-set-ops------------------------------------------------------------ union_ranges <- fast_range_union(query, subject) intersect_ranges <- fast_range_intersect(query, subject) setdiff_ranges <- fast_range_setdiff(query, subject) c( union = length(union_ranges), intersect = length(intersect_ranges), setdiff = length(setdiff_ranges) ) ## ----coverage----------------------------------------------------------------- query_cov <- fast_coverage(query) query_cov ## ----tile-coverage------------------------------------------------------------ query_tiles <- fast_tile_coverage( query, tile_width = 1000L, step_width = 1000L ) head(query_tiles) ## ----iterator----------------------------------------------------------------- iter <- fast_find_overlaps_iter(query, subject_index, chunk_size = 2L, threads = 2) fast_iter_has_next(iter) chunk1 <- fast_iter_next(iter) chunk1 fast_iter_has_next(iter) ## ----iterator-reset----------------------------------------------------------- fast_iter_reset(iter) all_iter_hits <- fast_iter_collect(iter) length(all_iter_hits) ## ----index-io----------------------------------------------------------------- tmp_index <- tempfile(fileext = ".rds") fast_save_index(subject_index, tmp_index) loaded_index <- fast_load_index(tmp_index) fast_index_stats(loaded_index) unlink(tmp_index) ## ----benchmark-demo----------------------------------------------------------- base_time <- system.time({ h_base <- GenomicRanges::findOverlaps(query, subject, ignore.strand = FALSE) }) fast_time <- system.time({ h_fast <- fast_find_overlaps(query, subject_index, ignore_strand = FALSE, threads = 2) }) rbind( genomic_ranges = base_time[1:3], fast_ranges = fast_time[1:3] ) c(base_hits = length(h_base), fast_hits = length(h_fast)) ## ----extdata------------------------------------------------------------------ c( list.files(system.file("extdata", package = "fastRanges")), list.files(system.file("benchmarks", package = "fastRanges")) ) ## ----session-info------------------------------------------------------------- sessionInfo()