This section provides examples of utilizing the report tables for
broader analysis and queries.
Package Growth Over Time
Let’s explore how the number of Bioconductor packages has grown over
time:
# Get info table
info <- get_bbs_table("info")
# Count unique packages by branch
package_counts <- info |>
group_by(git_branch) |>
summarise(
n_packages = n_distinct(Package),
.groups = "drop"
) |>
arrange(desc(n_packages))
# Display the counts
package_counts
#> # A tibble: 12 × 2
#> git_branch n_packages
#> <chr> <int>
#> 1 devel 3066
#> 2 RELEASE_3_22 2885
#> 3 RELEASE_3_21 2859
#> 4 RELEASE_3_19 2816
#> 5 RELEASE_3_20 2807
#> 6 RELEASE_3_18 2778
#> 7 RELEASE_3_17 2723
#> 8 RELEASE_3_16 2670
#> 9 RELEASE_3_15 2622
#> 10 RELEASE_3_14 2528
#> 11 RELEASE_3_13 2475
#> 12 master 33
# Visualize package counts by branch
ggplot(package_counts, aes(x = reorder(git_branch, n_packages), y = n_packages)) +
geom_col(fill = "steelblue") +
coord_flip() +
labs(
title = "Number of Packages by Bioconductor Branch",
x = "Branch",
y = "Number of Packages"
) +
theme_minimal()

Build Status Distribution
Understanding the distribution of build statuses helps identify
overall system health:
# Get build summary table
build_summary <- get_bbs_table("build_summary")
# Count build statuses
status_counts <- build_summary |>
count(status) |>
arrange(desc(n))
status_counts
#> # A tibble: 4 × 2
#> status n
#> <chr> <int>
#> 1 OK 13132435
#> 2 WARNINGS 644019
#> 3 ERROR 421850
#> 4 TIMEOUT 17696
# Visualize status distribution
ggplot(status_counts, aes(x = reorder(status, n), y = n)) +
geom_col(aes(fill = status)) +
scale_fill_manual(values = c(
"OK" = "green3",
"WARNING" = "orange",
"ERROR" = "red",
"TIMEOUT" = "darkred"
)) +
coord_flip() +
labs(
title = "Distribution of Build Statuses",
x = "Status",
y = "Count"
) +
theme_minimal() +
theme(legend.position = "none")

Build Stage Analysis
Understanding which build stage most often fails:
# Analyze failures by stage
stage_failures <- build_summary |>
filter(status %in% c("ERROR", "TIMEOUT")) |>
count(stage, status) |>
arrange(desc(n))
stage_failures
#> # A tibble: 8 × 3
#> stage status n
#> <chr> <chr> <int>
#> 1 buildsrc ERROR 221063
#> 2 checksrc ERROR 117481
#> 3 install ERROR 81179
#> 4 checksrc TIMEOUT 9487
#> 5 buildsrc TIMEOUT 8120
#> 6 buildbin ERROR 2127
#> 7 buildbin TIMEOUT 57
#> 8 install TIMEOUT 32
# Visualize
ggplot(stage_failures, aes(x = stage, y = n, fill = status)) +
geom_col() +
scale_fill_manual(values = c("ERROR" = "red", "TIMEOUT" = "darkred")) +
labs(
title = "Build Failures by Stage",
x = "Build Stage",
y = "Number of Failures",
fill = "Status"
) +
theme_minimal()

Most Problematic Packages
Identify packages with the highest error rates:
# Find packages with most errors
package_errors <- build_summary |>
filter(status %in% c("ERROR", "TIMEOUT")) |>
count(package, status) |>
group_by(package) |>
summarise(
total_errors = sum(n),
.groups = "drop"
) |>
arrange(desc(total_errors))
# Top 10 packages with most errors
head(package_errors, 10)
#> # A tibble: 10 × 2
#> package total_errors
#> <chr> <int>
#> 1 lapmix 2218
#> 2 netZooR 1759
#> 3 hypeR 1689
#> 4 ChemmineOB 1598
#> 5 XNAString 1530
#> 6 Repitools 1518
#> 7 ccfindR 1484
#> 8 gpuMagic 1472
#> 9 slalom 1403
#> 10 Harshlight 1399
Maintainer Analysis
Analyze package maintenance patterns:
# Get unique packages per maintainer
maintainer_packages <- info |>
group_by(Maintainer) |>
summarise(
n_packages = n_distinct(Package),
packages = paste(unique(Package), collapse = ", "),
.groups = "drop"
) |>
arrange(desc(n_packages))
# Top maintainers by number of packages
head(maintainer_packages, 10)
#> # A tibble: 10 × 3
#> Maintainer n_packages packages
#> <chr> <int> <chr>
#> 1 Bioconductor Package Maintainer 72 GSE62944, hgu2beta7, TENxBrainDat…
#> 2 Aaron Lun 62 celldex, chipseqDBData, DropletTe…
#> 3 Hervé Pagès 25 pasillaBamSubset, RNAseqData.HNRN…
#> 4 VJ Carey 25 harbChIP, leeBamViews, MAQCsubset…
#> 5 Laurent Gatto 24 depmap, RforProteomics, CTdata, h…
#> 6 Marcel Ramos 24 curatedTCGAData, SingleCellMultiM…
#> 7 Michael Love 20 airway, fission, macrophage, null…
#> 8 Jianhong Ou 17 ATACseqQC, ATACseqTFEA, ChIPpeakA…
#> 9 Guangchuang Yu 16 ChIPseeker, clusterProfiler, DOSE…
#> 10 Mike Smith 16 BeadArrayUseCases, HD2013SGI, min…
# Distribution of packages per maintainer
ggplot(maintainer_packages, aes(x = n_packages)) +
geom_histogram(binwidth = 1, fill = "steelblue", color = "white") +
labs(
title = "Distribution of Packages per Maintainer",
x = "Number of Packages",
y = "Number of Maintainers"
) +
theme_minimal()

Temporal Analysis
Analyze build patterns over time:
# Analyze build patterns over time
build_summary <- build_summary |>
mutate(
date = as.Date(startedat),
month = format(startedat, "%Y-%m")
)
# Build activity by month
monthly_builds <- build_summary |>
count(month) |>
mutate(month_date = as.Date(paste0(month, "-01")))
ggplot(monthly_builds, aes(x = month_date, y = n)) +
geom_line(color = "steelblue", linewidth = 1) +
geom_point(color = "steelblue") +
labs(
title = "Build Activity Over Time",
x = "Month",
y = "Number of Builds"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Error rate over time
monthly_errors <- build_summary |>
group_by(month) |>
summarise(
total = n(),
errors = sum(status %in% c("ERROR", "TIMEOUT")),
error_rate = errors / total * 100,
.groups = "drop"
) |>
mutate(month_date = as.Date(paste0(month, "-01")))
ggplot(monthly_errors, aes(x = month_date, y = error_rate)) +
geom_line(color = "red", linewidth = 1) +
geom_point(color = "red") +
labs(
title = "Build Error Rate Over Time",
x = "Month",
y = "Error Rate (%)"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
