TSSr is a comprehensive R/Bioconductor package for analyzing transcription start site (TSS) sequencing data. It supports multiple input formats including BAM, BED, BigWig, and TSS tables, and provides a complete workflow from TSS identification through downstream analyses such as core promoter shape analysis, gene annotation, differential expression, and promoter shifting.
If you use TSSr, please cite the following article:
citation("TSSr")
#> To cite package 'TSSr' in publications use:
#>
#> Lu Z, Berry K, Hu Z, Zhan Y, Ahn T, Lin Z (2021). "TSSr: an R package
#> for comprehensive analyses of TSS sequencing data." _NAR Genomics and
#> Bioinformatics_, *3*(4), lqab108. doi:10.1093/nargab/lqab108
#> <https://doi.org/10.1093/nargab/lqab108>.
#>
#> A BibTeX entry for LaTeX users is
#>
#> @Article{,
#> title = {TSSr: an R package for comprehensive analyses of TSS sequencing data},
#> author = {Zhaolian Lu and Keenan Berry and Zhenbin Hu and Yu Zhan and Tae-Hyuk Ahn and Zhenguo Lin},
#> journal = {NAR Genomics and Bioinformatics},
#> year = {2021},
#> volume = {3},
#> number = {4},
#> pages = {lqab108},
#> doi = {10.1093/nargab/lqab108},
#> }For general questions about the usage of TSSr, use the official Bioconductor support forum and tag your question “TSSr”. We strive to answer questions as quickly as possible.
For technical questions, bug reports and suggestions for new features, we refer to the TSSr github page.
Or create a new TSSr object using the constructor function. Note that the input files do not need to exist at object creation time:
# Create a TSSr object using the constructor (files do not need to exist)
newObj <- TSSr(
genomeName = "BSgenome.Scerevisiae.UCSC.sacCer3",
inputFiles = c("S01.sorted.bam", "S02.sorted.bam",
"S03.sorted.bam", "S04.sorted.bam"),
inputFilesType = "bam",
sampleLabels = c("SL01", "SL02", "SL03", "SL04"),
sampleLabelsMerged = c("control", "treat"),
mergeIndex = c(1, 1, 2, 2),
refSource = "saccharomyces_cerevisiae.SGD.gff",
organismName = "saccharomyces cerevisiae"
)
newObj
#> An object of class "TSSr"
#> Slot "genomeName":
#> [1] "BSgenome.Scerevisiae.UCSC.sacCer3"
#>
#> Slot "inputFiles":
#> [1] "S01.sorted.bam" "S02.sorted.bam" "S03.sorted.bam" "S04.sorted.bam"
#>
#> Slot "inputFilesType":
#> [1] "bam"
#>
#> Slot "sampleLabels":
#> [1] "SL01" "SL02" "SL03" "SL04"
#>
#> Slot "sampleLabelsMerged":
#> [1] "control" "treat"
#>
#> Slot "librarySizes":
#> numeric(0)
#>
#> Slot "TSSrawMatrix":
#> data frame with 0 columns and 0 rows
#>
#> Slot "mergeIndex":
#> [1] 1 1 2 2
#>
#> Slot "TSSprocessedMatrix":
#> data frame with 0 columns and 0 rows
#>
#> Slot "tagClusters":
#> list()
#>
#> Slot "consensusClusters":
#> list()
#>
#> Slot "clusterShape":
#> list()
#>
#> Slot "refSource":
#> [1] "saccharomyces_cerevisiae.SGD.gff"
#>
#> Slot "refTable":
#> data frame with 0 columns and 0 rows
#>
#> Slot "organismName":
#> [1] "saccharomyces cerevisiae"
#>
#> Slot "assignedClusters":
#> list()
#>
#> Slot "unassignedClusters":
#> list()
#>
#> Slot "filteredClusters":
#> list()
#>
#> Slot "enhancers":
#> list()
#>
#> Slot "DEtables":
#> list()
#>
#> Slot "TAGtables":
#> list()
#>
#> Slot "PromoterShift":
#> list()Then read in TSS data from the input files (requires actual files):
The
exampleTSSrobject distributed with the package already contains the results of every pipeline step below. Each computational chunk is shown aseval=FALSEto keep the vignette build time small on Bioconductor’s Single Package Builder; the chunk that follows it displays the corresponding pre-computed result fromexampleTSSrso the reader can see what each step produces.
# Merge replicates
mergeSamples(myTSSr)
# Normalization
normalizeTSS(myTSSr)
# TSS filtering
filterTSS(myTSSr, method = "TPM", tpmLow = 0.1)# Result available in the @TSSprocessedMatrix slot
head(slot(myTSSr, "TSSprocessedMatrix"))
#> chr pos strand control treat
#> <char> <int> <char> <num> <num>
#> 1: chrI 1561 + 0.000000 0.194886
#> 2: chrI 5759 + 0.310404 0.000000
#> 3: chrI 5765 + 0.310404 0.000000
#> 4: chrI 5773 + 0.310404 0.000000
#> 5: chrI 5925 + 0.310404 0.000000
#> 6: chrI 6163 + 0.000000 0.389772# TSS clustering
clusterTSS(myTSSr,
method = "peakclu", peakDistance = 100, extensionDistance = 30,
localThreshold = 0.02, clusterThreshold = 1,
useMultiCore = FALSE, numCores = NULL
)
# Aggregating consensus clusters
consensusCluster(myTSSr, dis = 50, useMultiCore = FALSE)# Tag clusters per sample (@tagClusters)
head(slot(myTSSr, "tagClusters")[["control"]])
#> cluster chr start end strand dominant_tss tags tags.dominant_tss
#> <int> <char> <int> <int> <char> <int> <num> <num>
#> 1: 1 chrI 6530 6564 + 6548 9.312118 3.724847
#> 2: 2 chrI 7166 7189 + 7169 1.241616 0.620808
#> 3: 3 chrI 8084 8087 + 8084 1.241616 0.620808
#> 4: 4 chrI 9325 9411 + 9327 4.966464 1.241616
#> 5: 5 chrI 9442 9479 + 9442 3.414443 1.862423
#> 6: 6 chrI 11253 11343 + 11329 40.662913 17.693022
#> q_0.1 q_0.9 interquantile_width
#> <int> <int> <num>
#> 1: 6530 6562 33
#> 2: 7166 7189 24
#> 3: 8084 8087 4
#> 4: 9327 9391 65
#> 5: 9442 9468 27
#> 6: 11275 11329 55
# Consensus clusters across samples (@consensusClusters)
head(slot(myTSSr, "consensusClusters")[["control"]])
#> Key: <subset>
#> cluster chr start end strand dominant_tss tags tags.dominant_tss
#> <int> <char> <int> <int> <char> <int> <num> <num>
#> 1: 385 chrII 126 175 + 150 3.414444 1.241616
#> 2: 386 chrII 5894 5932 + 5930 2.483232 1.552020
#> 3: 388 chrII 8398 8585 + 8549 22.659490 2.793635
#> 4: 389 chrII 8762 8802 + 8769 2.483232 0.620808
#> 5: 391 chrII 9331 9332 + 9332 15.520195 15.209791
#> 6: 392 chrII 9730 9761 + 9761 1.241616 0.620808
#> q_0.1 q_0.9 interquantile_width subset
#> <int> <int> <num> <char>
#> 1: 145 171 27 chrII_+
#> 2: 5894 5932 39 chrII_+
#> 3: 8427 8550 124 chrII_+
#> 4: 8762 8802 41 chrII_+
#> 5: 9332 9332 1 chrII_+
#> 6: 9730 9761 32 chrII_+# Calculating core promoter shape score
shapeCluster(myTSSr,
clusters = "consensusClusters", method = "PSS",
useMultiCore = FALSE, numCores = NULL
)# Shape scores per cluster (@clusterShape)
head(slot(myTSSr, "clusterShape")[["control"]])
#> cluster chr start end strand dominant_tss tags tags.dominant_tss
#> <int> <char> <int> <int> <char> <int> <num> <num>
#> 1: 1 chrI 6530 6564 + 6548 9.312118 3.724847
#> 2: 3 chrI 7166 7189 + 7169 1.241616 0.620808
#> 3: 4 chrI 8084 8087 + 8084 1.241616 0.620808
#> 4: 5 chrI 9325 9411 + 9327 4.966464 1.241616
#> 5: 6 chrI 9442 9479 + 9442 3.414443 1.862423
#> 6: 7 chrI 11253 11343 + 11329 40.662913 17.693022
#> q_0.1 q_0.9 interquantile_width shape.score
#> <int> <int> <num> <num>
#> 1: 6530 6562 33 9.646916
#> 2: 7166 7189 24 6.877444
#> 3: 8084 8087 4 2.000000
#> 4: 9327 9391 65 17.767262
#> 5: 9442 9468 27 7.469695
#> 6: 11275 11329 55 15.740262# Assign clusters to the annotated features
annotateCluster(myTSSr, clusters = "consensusClusters",
filterCluster = TRUE, filterClusterThreshold = 0.02,
annotationType = "genes", upstream = 1000,
upstreamOverlap = 500, downstream = 0)# Clusters assigned to genes (@assignedClusters)
head(slot(myTSSr, "assignedClusters")[["control"]])
#> cluster chr start end strand dominant_tss tags tags.dominant_tss
#> <int> <fctr> <int> <int> <fctr> <int> <num> <num>
#> 1: 5 chrI 9325 9411 + 9327 4.966464 1.241616
#> 2: 6 chrI 9442 9479 + 9442 3.414443 1.862423
#> 3: 7 chrI 11253 11343 + 11329 40.662913 17.693022
#> 4: 33 chrI 31026 31151 + 31108 48.112608 17.072215
#> 5: 34 chrI 31187 31263 + 31242 320.026426 122.609541
#> 6: 36 chrI 31515 31581 + 31521 36.627663 10.553733
#> q_0.1 q_0.9 interquantile_width gene inCoding
#> <int> <int> <num> <char> <char>
#> 1: 9327 9391 65 YAL066W <NA>
#> 2: 9442 9468 27 YAL066W <NA>
#> 3: 11275 11329 55 YAL064W-B <NA>
#> 4: 31108 31145 38 YAL062W <NA>
#> 5: 31212 31242 31 YAL062W <NA>
#> 6: 31521 31572 52 YAL062W <NA># Gene-level differential expression using DESeq2
deGene(myTSSr, comparePairs = list(c("control", "treat")),
pval = 0.01, useMultiCore = FALSE, numCores = NULL)# Differential expression results (@DEtables)
head(slot(myTSSr, "DEtables")[["control_VS_treat"]])
#> $DEtable
#> gene baseMean log2FoldChange lfcSE stat pvalue
#> <char> <num> <num> <num> <num> <num>
#> 1: YBL003C 13513.56774 -4.0059651379 0.1508944 -26.548141509 2.698283e-155
#> 2: YBR040W 4750.45985 10.1149789748 0.4184800 24.170760103 4.517757e-129
#> 3: YBR009C 9982.34378 -3.0663446812 0.1574613 -19.473645377 1.837200e-84
#> 4: YBR054W 9143.51825 -3.2077260836 0.1659926 -19.324510882 3.341355e-83
#> 5: YBR093C 1092.76754 -3.7982888386 0.2058561 -18.451186700 5.101250e-76
#> ---
#> 527: YBR131W 203.77248 -0.0068930210 0.2488917 -0.027694866 9.779055e-01
#> 528: YBR180W 13.20289 0.0218580383 0.8203032 0.026646291 9.787419e-01
#> 529: YBL058W 2917.45302 0.0033335690 0.1546458 0.021556160 9.828020e-01
#> 530: YBR240C 180.85311 -0.0042225868 0.2725510 -0.015492833 9.876390e-01
#> 531: YBR081C 1734.15811 -0.0005699222 0.1854924 -0.003072482 9.975485e-01
#> padj
#> <num>
#> 1: 1.432788e-152
#> 2: 1.199464e-126
#> 3: 3.251843e-82
#> 4: 4.435649e-81
#> 5: 5.417528e-74
#> ---
#> 527: 9.843029e-01
#> 528: 9.843029e-01
#> 529: 9.865177e-01
#> 530: 9.895025e-01
#> 531: 9.975485e-01
#>
#> $DEsig
#> gene baseMean log2FoldChange lfcSE stat pvalue
#> <char> <num> <num> <num> <num> <num>
#> 1: YBL003C 13513.56774 -4.0059651 0.1508944 -26.548142 2.698283e-155
#> 2: YBR040W 4750.45985 10.1149790 0.4184800 24.170760 4.517757e-129
#> 3: YBR009C 9982.34378 -3.0663447 0.1574613 -19.473645 1.837200e-84
#> 4: YBR054W 9143.51825 -3.2077261 0.1659926 -19.324511 3.341355e-83
#> 5: YBR093C 1092.76754 -3.7982888 0.2058561 -18.451187 5.101250e-76
#> ---
#> 169: YBR136W 331.51178 -0.6569472 0.2209313 -2.973537 2.943893e-03
#> 170: YBR173C 6414.63379 0.4457524 0.1499899 2.971883 2.959797e-03
#> 171: YBR182C-A 52.98342 -1.3081856 0.4396566 -2.975471 2.925388e-03
#> 172: YBR142W 864.92689 -0.5375705 0.1811459 -2.967611 3.001240e-03
#> 173: YAL019W 580.44899 -0.6096570 0.2062106 -2.956477 3.111751e-03
#> padj
#> <num>
#> 1: 1.432788e-152
#> 2: 1.199464e-126
#> 3: 3.251843e-82
#> 4: 4.435649e-81
#> 5: 5.417528e-74
#> ---
#> 169: 9.190949e-03
#> 170: 9.190949e-03
#> 171: 9.190949e-03
#> 172: 9.265457e-03
#> 173: 9.551097e-03# Calculate core promoter shifts
shiftPromoter(myTSSr, comparePairs = list(c("control", "treat")),
pval = 0.01)# Promoter shift results (@PromoterShift)
head(slot(myTSSr, "PromoterShift")[["control_VS_treat"]])
#> gene Ds pval padj
#> <char> <char> <char> <num>
#> 1: YBL017C 16.5567506300711 0 0.000000e+00
#> 2: YBR006W -26.3346456736974 0 0.000000e+00
#> 3: YBR023C 18.2077046044299 0 0.000000e+00
#> 4: YBR039W -27.6613679948891 0 0.000000e+00
#> 5: YBR121C -32.7177356221814 0 0.000000e+00
#> 6: YBL067C -25.5609100751329 4.82850376024535e-307 2.253302e-305TSSr provides several export functions for saving results:
# Export TSS tables
exportTSStable(myTSSr, data = "processed")
# Export cluster tables
exportClustersTable(myTSSr, data = "assigned")
# Export to BED format for genome browsers
exportClustersToBed(myTSSr, data = "consensusClusters")
# Export TSS to bedGraph
exportTSStoBedgraph(myTSSr, data = "processed")sessionInfo()
#> R version 4.6.0 RC (2026-04-17 r89917)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.4 LTS
#>
#> Matrix products: default
#> BLAS: /home/biocbuild/bbs-3.23-bioc/R/lib/libRblas.so
#> LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.12.0 LAPACK version 3.12.0
#>
#> locale:
#> [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
#> [3] LC_TIME=en_GB LC_COLLATE=C
#> [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
#> [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
#> [9] LC_ADDRESS=C LC_TELEPHONE=C
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
#>
#> time zone: America/New_York
#> tzcode source: system (glibc)
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] TSSr_0.99.11
#>
#> loaded via a namespace (and not attached):
#> [1] RColorBrewer_1.1-3 rstudioapi_0.18.0
#> [3] jsonlite_2.0.0 magrittr_2.0.5
#> [5] GenomicFeatures_1.64.0 farver_2.1.2
#> [7] rmarkdown_2.31 BiocIO_1.22.0
#> [9] vctrs_0.7.3 memoise_2.0.1
#> [11] Rsamtools_2.28.0 RCurl_1.98-1.18
#> [13] base64enc_0.1-6 htmltools_0.5.9
#> [15] S4Arrays_1.12.0 progress_1.2.3
#> [17] curl_7.1.0 SparseArray_1.12.2
#> [19] Formula_1.2-5 sass_0.4.10
#> [21] bslib_0.10.0 htmlwidgets_1.6.4
#> [23] Gviz_1.56.0 httr2_1.2.2
#> [25] cachem_1.1.0 GenomicAlignments_1.48.0
#> [27] lifecycle_1.0.5 pkgconfig_2.0.3
#> [29] Matrix_1.7-5 R6_2.6.1
#> [31] fastmap_1.2.0 MatrixGenerics_1.24.0
#> [33] digest_0.6.39 colorspace_2.1-2
#> [35] AnnotationDbi_1.74.0 S4Vectors_0.50.0
#> [37] DESeq2_1.52.0 Hmisc_5.2-5
#> [39] GenomicRanges_1.64.0 RSQLite_2.4.6
#> [41] filelock_1.0.3 httr_1.4.8
#> [43] abind_1.4-8 compiler_4.6.0
#> [45] bit64_4.8.0 htmlTable_2.5.0
#> [47] S7_0.2.2 backports_1.5.1
#> [49] BiocParallel_1.46.0 DBI_1.3.0
#> [51] biomaRt_2.68.0 MASS_7.3-65
#> [53] rappdirs_0.3.4 DelayedArray_0.38.1
#> [55] rjson_0.2.23 tools_4.6.0
#> [57] foreign_0.8-91 otel_0.2.0
#> [59] nnet_7.3-20 glue_1.8.1
#> [61] restfulr_0.0.16 grid_4.6.0
#> [63] checkmate_2.3.4 cluster_2.1.8.2
#> [65] generics_0.1.4 gtable_0.3.6
#> [67] BSgenome_1.80.0 tidyr_1.3.2
#> [69] ensembldb_2.36.0 data.table_1.18.4
#> [71] hms_1.1.4 XVector_0.52.0
#> [73] BiocGenerics_0.58.0 pillar_1.11.1
#> [75] stringr_1.6.0 dplyr_1.2.1
#> [77] BiocFileCache_3.2.0 lattice_0.22-9
#> [79] rtracklayer_1.72.0 bit_4.6.0
#> [81] deldir_2.0-4 biovizBase_1.60.0
#> [83] tidyselect_1.2.1 locfit_1.5-9.12
#> [85] Biostrings_2.80.0 knitr_1.51
#> [87] gridExtra_2.3 IRanges_2.46.0
#> [89] Seqinfo_1.2.0 ProtGenerics_1.44.0
#> [91] SummarizedExperiment_1.42.0 stats4_4.6.0
#> [93] xfun_0.57 Biobase_2.72.0
#> [95] matrixStats_1.5.0 stringi_1.8.7
#> [97] UCSC.utils_1.8.0 lazyeval_0.2.3
#> [99] yaml_2.3.12 evaluate_1.0.5
#> [101] codetools_0.2-20 cigarillo_1.2.0
#> [103] interp_1.1-6 tibble_3.3.1
#> [105] cli_3.6.6 rpart_4.1.27
#> [107] jquerylib_0.1.4 dichromat_2.0-0.1
#> [109] Rcpp_1.1.1-1.1 GenomeInfoDb_1.48.0
#> [111] dbplyr_2.5.2 png_0.1-9
#> [113] XML_3.99-0.23 parallel_4.6.0
#> [115] ggfortify_0.4.19 ggplot2_4.0.3
#> [117] blob_1.3.0 prettyunits_1.2.0
#> [119] latticeExtra_0.6-31 calibrate_1.7.7
#> [121] jpeg_0.1-11 AnnotationFilter_1.36.0
#> [123] bitops_1.0-9 txdbmaker_1.8.0
#> [125] VariantAnnotation_1.58.0 scales_1.4.0
#> [127] purrr_1.2.2 crayon_1.5.3
#> [129] rlang_1.2.0 KEGGREST_1.52.0