## ----setup, echo=FALSE----------------------------------------------------- suppressPackageStartupMessages({ library(CNVRanger) library(AnnotationHub) library(regioneR) library(BSgenome.Btaurus.UCSC.bosTau6.masked) library(SummarizedExperiment) library(curatedTCGAData) library(TCGAutils) library(Gviz) }) ## ----oview, echo=FALSE, fig.wide=TRUE-------------------------------------- vign.dir <- system.file("vignettes", package = "CNVRanger") knitr::include_graphics(file.path(vign.dir, "CNVRanger.png")) ## ----input, echo=FALSE, fig.wide=TRUE-------------------------------------- vign.dir <- system.file("vignettes", package = "CNVRanger") knitr::include_graphics(file.path(vign.dir, "Input.png")) ## ----lib------------------------------------------------------------------- library(CNVRanger) ## ----readCalls------------------------------------------------------------- data.dir <- system.file("extdata", package="CNVRanger") call.file <- file.path(data.dir, "Silva16_PONE_CNV_calls.csv") calls <- read.csv(call.file, as.is=TRUE) nrow(calls) head(calls) ## ----nrSamples------------------------------------------------------------- length(unique(calls[,"NE_id"])) ## ----cnvCalls-------------------------------------------------------------- grl <- GenomicRanges::makeGRangesListFromDataFrame(calls, split.field="NE_id", keep.extra.columns=TRUE) grl ## ----sortCalls------------------------------------------------------------- grl <- GenomicRanges::sort(grl) grl ## ----RaggedExperiment------------------------------------------------------ ra <- RaggedExperiment::RaggedExperiment(grl) ra ## ----RaggedExperiment-assay------------------------------------------------ RaggedExperiment::assay(ra[1:5,1:5]) ## ----RaggedExperiment-colData---------------------------------------------- weight <- rnorm(ncol(ra), mean=1100, sd=100) fcr <- rnorm(ncol(ra), mean=6, sd=1.5) RaggedExperiment::colData(ra)$weight <- round(weight, digits=2) RaggedExperiment::colData(ra)$fcr <- round(fcr, digits=2) RaggedExperiment::colData(ra) ## ----summarization, echo=FALSE, out.width = "40%", out.extra='style="float:right; padding:10px"'---- vign.dir <- system.file("vignettes", package = "CNVRanger") knitr::include_graphics(file.path(vign.dir, "Summarization.png")) ## ----cnvrs----------------------------------------------------------------- cnvrs <- populationRanges(grl, density=0.1) cnvrs ## ----cnvrsRO--------------------------------------------------------------- ro.cnvrs <- populationRanges(grl[1:100], mode="RO", ro.thresh=0.51) ro.cnvrs ## ----gistic---------------------------------------------------------------- cnvrs <- populationRanges(grl, density=0.1, est.recur=TRUE) cnvrs ## ----recurr---------------------------------------------------------------- subset(cnvrs, pvalue < 0.05) ## ----recurrViz------------------------------------------------------------- plotRecurrentRegions(cnvrs, genome="bosTau6", chr="chr1") ## ----overlap, echo=FALSE, out.width = "40%", out.extra='style="float:right; padding:10px"'---- vign.dir <- system.file("vignettes", package = "CNVRanger") knitr::include_graphics(file.path(vign.dir, "Overlap.png")) ## ----getBtGenes------------------------------------------------------------ library(AnnotationHub) ah <- AnnotationHub::AnnotationHub() ahDb <- AnnotationHub::query(ah, pattern = c("Bos taurus", "EnsDb")) ahDb ## ----getBtGenes2----------------------------------------------------------- ahEdb <- ahDb[["AH60948"]] bt.genes <- ensembldb::genes(ahEdb) GenomeInfoDb::seqlevelsStyle(bt.genes) <- "UCSC" bt.genes ## ----formatBtGenes--------------------------------------------------------- sel.genes <- subset(bt.genes, seqnames %in% paste0("chr", 1:2)) sel.genes <- subset(sel.genes, gene_biotype == "protein_coding") sel.cnvrs <- subset(cnvrs, seqnames %in% paste0("chr", 1:2)) ## ----ovlpTest, warning=FALSE----------------------------------------------- library(regioneR) library(BSgenome.Btaurus.UCSC.bosTau6.masked) res <- regioneR::overlapPermTest(A=sel.cnvrs, B=sel.genes, ntimes=100, genome="bosTau6", mask=NA, per.chromosome=TRUE, count.once=TRUE) res ## ----permDist-------------------------------------------------------------- summary(res[[1]]$permuted) ## ----vizPermTest----------------------------------------------------------- plot(res) ## ----expression, echo=FALSE, out.width = "40%", out.extra='style="float:right; padding:10px"'---- vign.dir <- system.file("vignettes", package = "CNVRanger") knitr::include_graphics(file.path(vign.dir, "Expression.png")) ## ----rseqdata-------------------------------------------------------------- rseq.file <- file.path(data.dir, "counts_cleaned.txt") rcounts <- read.delim(rseq.file, row.names=1, stringsAsFactors=FALSE) rcounts <- as.matrix(rcounts) dim(rcounts) rcounts[1:5, 1:5] ## ----rse------------------------------------------------------------------- library(SummarizedExperiment) rranges <- GenomicRanges::granges(sel.genes)[rownames(rcounts)] rse <- SummarizedExperiment(assays=list(rcounts=rcounts), rowRanges=rranges) rse ## ----cnvEQTL--------------------------------------------------------------- res <- cnvEQTL(cnvrs, grl, rse, window="1Mbp", verbose=TRUE) res ## ----tcgaSetup, message=FALSE---------------------------------------------- library(curatedTCGAData) gbm <- curatedTCGAData::curatedTCGAData("GBM", assays=c("GISTIC_Peaks", "CNVSNP", "RNASeq2GeneNorm"), dry.run=FALSE) gbm ## ----tcgaGeneAnno, message=FALSE------------------------------------------- library(TCGAutils) gbm <- TCGAutils::symbolsToRanges(gbm, unmapped=FALSE) for(i in 1:3) { rr <- rowRanges(gbm[[i]]) GenomeInfoDb::genome(rr) <- "hg19" GenomeInfoDb::seqlevelsStyle(rr) <- "UCSC" ind <- as.character(seqnames(rr)) %in% c("chr1", "chr2") rowRanges(gbm[[i]]) <- rr gbm[[i]] <- gbm[[i]][ind,] } gbm ## ----gbmIntersect---------------------------------------------------------- gbm <- MultiAssayExperiment::intersectColumns(gbm) TCGAutils::sampleTables(gbm) data(sampleTypes, package="TCGAutils") sampleTypes gbm <- TCGAutils::splitAssays(gbm, sampleCodes="01") gbm ## ----segmean--------------------------------------------------------------- ind <- grep("CNVSNP", names(gbm)) head( mcols(gbm[[ind]]) ) summary( mcols(gbm[[ind]])$Segment_Mean ) ## ----lr2int, eval=FALSE---------------------------------------------------- # round( (2^lr) * 2) ## ----transformToStates----------------------------------------------------- smean <- mcols(gbm[[ind]])$Segment_Mean state <- round(2^smean * 2) state[state > 4] <- 4 mcols(gbm[[ind]])$State <- state mcols(gbm[[ind]]) <- mcols(gbm[[ind]])[,3:1] table(mcols(gbm[[ind]])$State) ## ----gbmEQTL--------------------------------------------------------------- res <- cnvEQTL(cnvrs="01_GBM_GISTIC_Peaks-20160128", calls="01_GBM_CNVSNP-20160128", rcounts="01_GBM_RNASeq2GeneNorm-20160128_ranged", data=gbm, window="1Mbp", verbose=TRUE) res ## ----plotEQTL-------------------------------------------------------------- res[2] (r <- GRanges(names(res)[2])) plotEQTL(cnvr=r, genes=res[[2]], genome="hg19", cn="CN1") ## ----phenotype, echo=FALSE, out.width = "40%", out.extra='style="float:right; padding:10px"'---- vign.dir <- system.file("vignettes", package = "CNVRanger") knitr::include_graphics(file.path(vign.dir, "Phenotype.png")) ## ----readCallsGWAS--------------------------------------------------------- cnv.loc <- file.path(data.dir, "CNVOut.txt") cnv.calls <- read.delim(cnv.loc, as.is=TRUE) head(cnv.calls) ## ----calls2grl------------------------------------------------------------- cnv.calls <- GenomicRanges::makeGRangesListFromDataFrame(cnv.calls, split.field="sample.id", keep.extra.columns=TRUE) sort(cnv.calls) ## ----phenoData------------------------------------------------------------- phen.loc <- file.path(data.dir, "Pheno.txt") phen.df <- read.delim(phen.loc, as.is=TRUE) head(phen.df) ## ----importPhen------------------------------------------------------------ re <- RaggedExperiment::RaggedExperiment(cnv.calls, colData=phen.df) re ## ----map------------------------------------------------------------------- map.loc <- file.path(data.dir, "MapPenn.txt") map.df <- read.delim(map.loc, as.is=TRUE) head(map.df) ## ----importPhenRagged------------------------------------------------------ phen.info <- setupCnvGWAS("example", cnv.out.loc=re, map.loc=map.loc) phen.info ## ----Wdir------------------------------------------------------------------ all.paths <- phen.info$all.paths all.paths ## ----CNVGWASNames---------------------------------------------------------- # Define chr correspondence to numeric chr.code.name <- data.frame( V1 = c(16, 25, 29:31), V2 = c("1A", "4A", "25LG1", "25LG2", "LGE22")) ## ----CNVGWA---------------------------------------------------------------- segs.pvalue.gr <- cnvGWAS(phen.info, chr.code.name=chr.code.name, method.m.test="none") segs.pvalue.gr ## ---- echo=FALSE, fig.cap="Definition of CNV segments. The figure shows construction of a CNV segment from 4 individual CNV calls in a CNV region based on pairwise copy number concordance between adjacent probes.", out.width = '100%'---- knitr::include_graphics(file.path(vign.dir, "CNVsegments.png")) ## ----manh------------------------------------------------------------------ ## Define the chromosome order in the plot order.chrs <- c(1:24, "25LG1", "25LG2", 27:28, "LGE22", "1A", "4A") ## Chromosome sizes chr.size.file <- file.path(data.dir, "Parus_major_chr_sizes.txt") chr.sizes <- scan(chr.size.file) chr.size.order <- data.frame(chr=order.chrs, sizes=chr.sizes, stringsAsFactors=FALSE) ## Plot a pdf file with a manhatthan within the 'Results' workfolder plotManhattan(all.paths, segs.pvalue.gr, chr.size.order, plot.pdf=FALSE) ## ----prodGDS--------------------------------------------------------------- ## Create a GDS file in disk and export the SNP probe positions probes.cnv.gr <- generateGDS(phen.info, chr.code.name=chr.code.name) probes.cnv.gr ## Run GWAS with existent GDS file segs.pvalue.gr <- cnvGWAS(phen.info, chr.code.name=chr.code.name, produce.gds=FALSE) ## ----importLRR------------------------------------------------------------- # List files to import LRR/BAF files <- list.files(data.dir, pattern = "\\.cnv.txt.adjusted$") samples <- sub(".cnv.txt.adjusted$", "", files) samples <- sub("^GT","", samples) sample.files <- data.frame(file.names=files, sample.names=samples) # All missing samples will have LRR = '0' and BAF = '0.5' in all SNPs listed in the GDS file importLrrBaf(all.paths, data.dir, sample.files, verbose=FALSE) # Read the GDS to check if the LRR/BAF nodes were added cnv.gds <- file.path(all.paths[1], "CNV.gds") (genofile <- SNPRelate::snpgdsOpen(cnv.gds, allow.fork=TRUE, readonly=FALSE)) gdsfmt::closefn.gds(genofile) # Run the CNV-GWAS with existent GDS segs.pvalue.gr <- cnvGWAS(phen.info, chr.code.name=chr.code.name, produce.gds=FALSE, run.lrr=TRUE) ## ----sessionInfo----------------------------------------------------------- sessionInfo()