\name{extractTranscripts}

\alias{transcriptWidths}
\alias{extractTranscripts}
\alias{transcriptLocs2refLocs}

\title{Extract a set of transcripts}

\description{
  \code{extractTranscripts} allows the user to extract a set of
  transcripts specified by the starts and ends of their exons
  as well as the strand from which the transcript is coming.

  \code{transcriptWidths} only returns the lengths of the
  transcripts (called the "widths" in this context) specified
  by the starts and ends of their exons.

  \code{transcriptLocs2refLocs} converts transcript-based
  locations into reference-based locations.
}

\usage{
  extractTranscripts(x, exonStarts=list(), exonEnds=list(),
                     strand=character(0), reorder.exons.on.minus.strand=FALSE)

  transcriptWidths(exonStarts=list(), exonEnds=list())

  transcriptLocs2refLocs(tlocs, exonStarts=list(), exonEnds=list(),
                         strand=character(0),
                         reorder.exons.on.minus.strand=FALSE)
}

\arguments{
  \item{x}{
    A \link{DNAString} or \link{MaskedDNAString} object.
  }
  \item{exonStarts, exonEnds}{
    The starts and ends of the exons, respectively.

    Each argument can be a list of integer vectors,
    an \link[IRanges]{IntegerList} object,
    or a character vector where each element is a
    comma-separated list of integers.
    In addition, the lists represented by \code{exonStarts}
    and \code{exonEnds} must have the same shape i.e.
    have the same lengths and have elements of the same lengths.
    The length of \code{exonStarts} and \code{exonEnds}
    is the number of transcripts.
  }
  \item{strand}{
    A character vector of the same length as \code{exonStarts} and
    \code{exonEnds} specifying the strand (\code{"+"} or \code{"-"})
    from which the transcript is coming.
  }
  \item{reorder.exons.on.minus.strand}{
    \code{TRUE} or \code{FALSE}. Should the order of exons
    for transcripts coming from the minus strand be reversed?
  }
  \item{tlocs}{
    A list of integer vectors of the same length as \code{exonStarts}
    and \code{exonEnds}. Each element in \code{tlocs} must contain
    transcript-based locations.
  }
}

\details{
  \code{extractTranscripts} allows the user to extract a set of
  transcripts specified by the starts and ends of their exons
  as well as the strand from which the transcript is coming.
  See \code{\link[GenomicFeatures:extractTranscriptsFromGenome]{extractTranscriptsFromGenome}}
  in the GenomicFeatures package for extracting transcripts from
  a genome.
}

\value{
  A \link{DNAStringSet} object for \code{extractTranscripts}.

  An integer vector for \code{transcriptWidths}.

  A list of integer vectors of the same shape as \code{tlocs}
  for \code{transcriptLocs2refLocs}.
}

\seealso{
  \code{\link[GenomicFeatures:extractTranscriptsFromGenome]{extractTranscriptsFromGenome}},
  \code{\link{reverseComplement}},
  \link{DNAString-class},
  \link{DNAStringSet-class}
}

\examples{
  ## ---------------------------------------------------------------------
  ## A. EXTRACTING WORM TRANSCRIPTS ZC101.3 AND F37B1.1
  ## ---------------------------------------------------------------------

  ## Transcript ZC101.3 (is on + strand):
  ##   Exons starts/ends relative to transcript:
  rstarts1 <- c(1, 488, 654, 996, 1365, 1712, 2163, 2453)
  rends1 <- c(137, 578, 889, 1277, 1662, 1870, 2410, 2561)
  ##   Exons starts/ends relative to chromosome:
  starts1 <- 14678410 + rstarts1
  ends1 <- 14678410 + rends1

  ## Transcript F37B1.1 (is on - strand):
  ##   Exons starts/ends relative to transcript:
  rstarts2 <- c(1, 325)
  rends2 <- c(139, 815)
  ##   Exons starts/ends relative to chromosome:
  starts2 <- 13611188 - rends2
  ends2 <- 13611188 - rstarts2

  exon_starts <- list(as.integer(starts1), as.integer(starts2))
  exon_ends <- list(as.integer(ends1), as.integer(ends2))

  library(BSgenome.Celegans.UCSC.ce2)
  ## Both transcripts are on chrII:
  chrII <- Celegans$chrII
  transcripts <- extractTranscripts(chrII,
                   exonStarts=exon_starts,
                   exonEnds=exon_ends,
                   strand=c("+","-"))

  ## Same as 'width(transcripts)':
  transcriptWidths(exonStarts=exon_starts, exonEnds=exon_ends)

  transcriptLocs2refLocs(list(c(1:6, 135:140, 1555:1560), c(1:6, 137:142, 625:630)),
                   exonStarts=exon_starts,
                   exonEnds=exon_ends,
                   strand=c("+","-"))

  ## A sanity check:
  ref_locs <- transcriptLocs2refLocs(list(1:1560, 1:630),
                   exonStarts=exon_starts,
                   exonEnds=exon_ends,
                   strand=c("+","-"))
  stopifnot(chrII[ref_locs[[1]]] == transcripts[[1]])
  stopifnot(complement(chrII)[ref_locs[[2]]] == transcripts[[2]])
}

\keyword{methods}
\keyword{manip}