\name{read.Mask} \alias{read.Mask} \alias{read.agpMask} \alias{read.gapMask} \alias{read.liftMask} \alias{read.rmMask} \alias{read.trfMask} \title{Read a mask from a file} \description{ \code{read.agpMask} and \code{read.gapMask} extract the AGAPS mask from an NCBI "agp" file or a UCSC "gap" file, respectively. \code{read.liftMask} extracts the AGAPS mask from a UCSC "lift" file (i.e. a file containing offsets of contigs within sequences). \code{read.rmMask} extracts the RM mask from a RepeatMasker .out file. \code{read.trfMask} extracts the TRF mask from a Tandem Repeats Finder .bed file. } \usage{ read.agpMask(file, seqname="?", mask.width=NA, gap.types=NULL, use.gap.types=FALSE) read.gapMask(file, seqname="?", mask.width=NA, gap.types=NULL, use.gap.types=FALSE) read.liftMask(file, seqname="?", mask.width=NA) read.rmMask(file, seqname="?", mask.width=NA, use.IDs=FALSE) read.trfMask(file, seqname="?", mask.width=NA) } \arguments{ \item{file}{ Either a character string naming a file or a connection open for reading. } \item{seqname}{ The name of the sequence for which the mask must be extracted. If no sequence is specified (i.e. \code{seqname="?"}) then an error is raised and the sequence names found in the file are displayed. If the file doesn't contain any information for the specified sequence, then a warning is issued and an empty mask of width \code{mask.width} is returned. } \item{mask.width}{ The width of the mask to return i.e. the length of the sequence this mask will be put on. See \code{?`\link{MaskCollection-class}`} for more information about the width of a \link{MaskCollection} object. } \item{gap.types}{ \code{NULL} or a character vector containing gap types. Use this argument to filter the assembly gaps that are to be extracted from the "agp" or "gap" file based on their type. Most common gap types are \code{"contig"}, \code{"clone"}, \code{"centromere"}, \code{"telomere"}, \code{"heterochromatin"}, \code{"short_arm"} and \code{"fragment"}. With \code{gap.types=NULL}, all the assembly gaps described in the file are extracted. With \code{gap.types="?"}, an error is raised and the gap types found in the file for the specified sequence are displayed. } \item{use.gap.types}{ Whether or not the gap types provided in the "agp" or "gap" file should be used to name the ranges constituing the returned mask. See \code{?`\link{IRanges-class}`} for more information about the names of an \link{IRanges} object. } \item{use.IDs}{ Whether or not the repeat IDs provided in the RepeatMasker .out file should be used to name the ranges constituing the returned mask. See \code{?`\link{IRanges-class}`} for more information about the names of an \link{IRanges} object. } } \seealso{ \link{MaskCollection-class}, \link{IRanges-class} } \examples{ ## --------------------------------------------------------------------- ## A. Extract a mask of assembly gaps ("AGAPS" mask) with read.agpMask() ## --------------------------------------------------------------------- ## Note: The hs_b36v3_chrY.agp file was obtained by downloading, ## extracting and renaming the hs_ref_chrY.agp.gz file from ## ## ftp://ftp.ncbi.nih.gov/genomes/H_sapiens/Assembled_chromosomes/ ## hs_ref_chrY.agp.gz 5 KB 24/03/08 04:33:00 PM ## ## on May 9, 2008. chrY_length <- 57772954 file1 <- system.file("extdata", "hs_b36v3_chrY.agp", package="IRanges") mask1 <- read.agpMask(file1, seqname="chrY", mask.width=chrY_length, use.gap.types=TRUE) mask1 mask1[[1]] mask11 <- read.agpMask(file1, seqname="chrY", mask.width=chrY_length, gap.types=c("centromere", "heterochromatin")) mask11[[1]] ## --------------------------------------------------------------------- ## B. Extract a mask of assembly gaps ("AGAPS" mask) with read.liftMask() ## --------------------------------------------------------------------- ## Note: The hg18liftAll.lft file was obtained by downloading, ## extracting and renaming the liftAll.zip file from ## ## http://hgdownload.cse.ucsc.edu/goldenPath/hg18/bigZips/ ## liftAll.zip 03-Feb-2006 11:35 5.5K ## ## on May 8, 2008. file2 <- system.file("extdata", "hg18liftAll.lft", package="IRanges") mask2 <- read.liftMask(file2, seqname="chr1") mask2 if (interactive()) { ## contigs 7 and 8 for chrY are adjacent read.liftMask(file2, seqname="chrY") ## displays the sequence names found in the file read.liftMask(file2) ## specify an unknown sequence name read.liftMask(file2, seqname="chrZ", mask.width=300) } ## --------------------------------------------------------------------- ## C. Extract a RepeatMasker ("RM") or Tandem Repeats Finder ("TRF") ## mask with read.rmMask() or read.trfMask() ## --------------------------------------------------------------------- ## Note: The ce2chrM.fa.out and ce2chrM.bed files were obtained by ## downloading, extracting and renaming the chromOut.zip and ## chromTrf.zip files from ## ## http://hgdownload.cse.ucsc.edu/goldenPath/ce2/bigZips/ ## chromOut.zip 21-Apr-2004 09:05 2.6M ## chromTrf.zip 21-Apr-2004 09:07 182K ## ## on May 7, 2008. ## Before you can extract a mask with read.rmMask() or read.trfMask(), you ## need to know the length of the sequence that you're going to put the ## mask on: if (interactive()) { library(BSgenome.Celegans.UCSC.ce2) chrM_length <- seqlengths(Celegans)[["chrM"]] ## Read the RepeatMasker .out file for chrM in ce2: file3 <- system.file("extdata", "ce2chrM.fa.out", package="IRanges") RMmask <- read.rmMask(file3, seqname="chrM", mask.width=chrM_length) RMmask ## Read the Tandem Repeats Finder .bed file for chrM in ce2: file4 <- system.file("extdata", "ce2chrM.bed", package="IRanges") TRFmask <- read.trfMask(file4, seqname="chrM", mask.width=chrM_length) TRFmask desc(TRFmask) <- paste(desc(TRFmask), "[period<=12]") TRFmask ## Put the 2 masks on chrM: chrM <- Celegans$chrM masks(chrM) <- RMmask # this would drop all current masks, if any masks(chrM) <- append(masks(chrM), TRFmask) chrM } } \keyword{manip}