## ----style-knitr, eval=TRUE, echo=FALSE, results="asis"--------------------
BiocStyle::latex(use.unsrturl=FALSE)

## ----setup_knitr, include=FALSE, cache=FALSE-------------------------------
library(knitr)
opts_chunk$set(cache = TRUE, tidy = FALSE, 
  tidy.opts = list(blank = FALSE, width.cutoff=70), highlight = FALSE, 
  out.width = "7cm", out.height = "7cm", fig.align = "center")

## ----sri_file, eval = TRUE-------------------------------------------------
library(PasillaTranscriptExpr)

data_dir <- system.file("extdata", package = "PasillaTranscriptExpr")

sri <- read.table(paste0(data_dir, "/SraRunInfo.csv"), stringsAsFactors = FALSE,
  sep = ",", header = TRUE)
keep <- grep("CG8144|Untreated-", sri$LibraryName)
sri <- sri[keep, ]

## ----download, eval = FALSE------------------------------------------------
# sra_files <- basename(sri$download_path)
# 
# for(i in 1:nrow(sri))
#   download.file(sri$download_path[i], sra_files[i])
# 

## ----fastqdump, eval = FALSE-----------------------------------------------
# cmd <- paste0("fastq-dump -O ./ --split-3 ", sra_files)
# 
# for(i in 1:length(cmd))
#   system(cmd[i])
# 
# system("gzip *.fastq")

## ----download_fasta, eval = FALSE------------------------------------------
# system("wget ftp://ftp.ensembl.org/pub/release-70/fasta/drosophila_melanogaster/cdna/Drosophila_melanogaster.BDGP5.70.cdna.all.fa.gz")
# system("gunzip Drosophila_melanogaster.BDGP5.70.cdna.all.fa.gz")

## ----download_gtf, eval = FALSE--------------------------------------------
# system("wget ftp://ftp.ensembl.org/pub/release-70/gtf/drosophila_melanogaster/Drosophila_melanogaster.BDGP5.70.gtf.gz")
# system("gunzip Drosophila_melanogaster.BDGP5.70.gtf.gz")

## ----kallisto_metadata, eval = TRUE----------------------------------------
sri$LibraryName <- gsub("S2_DRSC_","",sri$LibraryName)
metadata <- unique(sri[,c("LibraryName", "LibraryLayout", "SampleName",
  "avgLength")])

for(i in seq_len(nrow(metadata))){
  indx <- sri$LibraryName == metadata$LibraryName[i]
  
  if(metadata$LibraryLayout[i] == "PAIRED"){
    metadata$fastq[i] <- paste0(sri$Run[indx], "_1.fastq.gz ",
      sri$Run[indx], "_2.fastq.gz", collapse = " ")
  }else{
    metadata$fastq[i] <- paste0(sri$Run[indx], ".fastq.gz", collapse = " ")
  }
}

metadata$condition <- ifelse(grepl("CG8144_RNAi", metadata$LibraryName),
  "KD", "CTL")
metadata$UniqueName <- paste0(1:nrow(metadata), "_", metadata$SampleName)

## ----kallisto_index, eval = TRUE-------------------------------------------
cDNA_fasta <- "Drosophila_melanogaster.BDGP5.70.cdna.all.fa"
index <- "Drosophila_melanogaster.BDGP5.70.cdna.all.idx"

cmd <- paste("kallisto index -i", index, cDNA_fasta, sep = " ")
cmd

## ----kallisto_index_cmd, eval = FALSE--------------------------------------
# system(cmd)

## ----kallisto_quantification, eval = TRUE----------------------------------
out_dir <- metadata$UniqueName

cmd <- paste("kallisto quant -i", index, "-o", out_dir, "-b 0 -t 5",
  ifelse(metadata$LibraryLayout == "SINGLE",
    paste("--single -l", metadata$avgLength), ""), metadata$fastq)
cmd

## ----kallisto_quantification_cmd, eval = FALSE-----------------------------
# for(i in 1:length(cmd))
#   system(cmd[i])

## ----library_rtracklayer, eval = TRUE, message = FALSE---------------------
library(rtracklayer)

## ----gtf, eval = FALSE-----------------------------------------------------
# gtf_dir <- "Drosophila_melanogaster.BDGP5.70.gtf"
# 
# gtf <- import(gtf_dir)
# 
# gt <- unique(mcols(gtf)[, c("gene_id", "transcript_id")])
# rownames(gt) <- gt$transcript_id
# 

## ----merge_counts, eval = FALSE--------------------------------------------
# samples <- unique(metadata$SampleName)
# 
# counts_list <- lapply(1:length(samples), function(i){
#   indx <- which(metadata$SampleName == samples[i])
# 
#   if(length(indx) == 1){
#     abundance <- read.table(file.path(metadata$UniqueName[indx],
#       "abundance.txt"), header = TRUE, sep = "\t", as.is = TRUE)
#   }else{
#     abundance <- lapply(indx, function(j){
#       abundance_tmp <- read.table(file.path(metadata$UniqueName[j],
#         "abundance.txt"), header = TRUE, sep = "\t", as.is = TRUE)
#       abundance_tmp <- abundance_tmp[, c("target_id", "est_counts")]
#       abundance_tmp
#     })
#     abundance <- Reduce(function(...) merge(..., by = "target_id", all = TRUE,
#       sort = FALSE), abundance)
#     est_counts <- rowSums(abundance[, -1])
#     abundance <- data.frame(target_id = abundance$target_id,
#       est_counts = est_counts, stringsAsFactors = FALSE)
#   }
# 
#   counts <- data.frame(abundance$target_id, abundance$est_counts,
#     stringsAsFactors = FALSE)
#   colnames(counts) <- c("feature_id", samples[i])
#   return(counts)
# })
# 
# counts <- Reduce(function(...) merge(..., by = "feature_id", all = TRUE,
#   sort = FALSE), counts_list)
# 
# ### Add gene IDs
# counts$gene_id <- gt[counts$feature_id, "gene_id"]
# 

## ----metadata, eval = TRUE-------------------------------------------------
metadata <- unique(metadata[, c("LibraryName", "LibraryLayout", "SampleName",
  "condition")])
metadata

## ----metadata_save, eval = FALSE-------------------------------------------
# write.table(metadata, "metadata.txt", quote = FALSE, sep = "\t",
#   row.names = FALSE)

## ----counts_save, eval = FALSE---------------------------------------------
# ### Final counts with columns sorted as in metadata
# counts <- counts[, c("feature_id", "gene_id", metadata$SampleName)]
# write.table(counts, "counts.txt", quote = FALSE, sep = "\t", row.names = FALSE)

## ----sessionInfo-----------------------------------------------------------
sessionInfo()