## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set(collapse = TRUE, comment = "#>", fig.width = 10, fig.height = 10) ## ----style, echo = FALSE, results = 'asis'------------------------------------ BiocStyle::markdown() ## ----message = FALSE, include = FALSE----------------------------------------- library(net4pg) library(igraph) library(ggplot2) ## ----------------------------------------------------------------------------- incM_filename <- system.file("extdata" , "incM_example" , package = "net4pg" , mustWork = TRUE) rownames_filename <- system.file("extdata" , "peptideIDs_incM_example" , package = "net4pg" , mustWork = TRUE) colnames_filename <- system.file("extdata" , "proteinIDs_incM_example" , package = "net4pg" , mustWork = TRUE) incM <- read_inc_matrix(incM_filename = incM_filename , colnames_filename = colnames_filename , rownames_filename = rownames_filename) ## ----------------------------------------------------------------------------- dim(incM) ## ----------------------------------------------------------------------------- incM_reduced <- reduce_inc_matrix(incM) dim(incM_reduced) # check the size of the reduced incidence matrix ## ----------------------------------------------------------------------------- adjM <- get_adj_matrix(incM_reduced) dim(adjM) # check the size of the adjacency matrix: ## ----------------------------------------------------------------------------- multProteinCC <- get_cc(adjM) ## ----------------------------------------------------------------------------- cc.multProteins <- multProteinCC$ccs length(cc.multProteins) ## ----------------------------------------------------------------------------- # Calculate CCs size and percentage of single- vs multi-protein CCs CCstatsOut <- cc_stats(incM = incM , cc.proteins = cc.multProteins , reducedIncM = TRUE) # Number of single-protein CCs: CCstatsOut$N_singleProtCC # Number of multi-protein CCs CCstatsOut$N_multiProtCC # Total number of CCs totCCs <- CCstatsOut$N_singleProtCC + CCstatsOut$N_multiProtCC totCCs # Percentage of single-protein CCs: PercSingleP <- round(CCstatsOut$N_singleProtCC / totCCs * 100, digits = 2) PercSingleP # View table of CC size distribution CCstatsOut$NproteinsDistribution # Plot CC size distribution plot(factor(CCstatsOut$NproteinsDistribution$N_proteins , levels = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", ">10")) , as.numeric(as.vector(CCstatsOut$NproteinsDistribution$N_CC)) , type = "s" , xlab = "N_proteins" , ylab = "N_CCs") ## ----------------------------------------------------------------------------- peptideStatsOut <- peptide_stats(incM = incM) # Number of shared peptides peptideStatsOut$nbShared # Number of specific peptides peptideStatsOut$nbSpecific # Percentage of specific peptides peptideStatsOut$percSpecific ## ----------------------------------------------------------------------------- cc.peptides.incM <- cc_composition(cc.multProteins, incM = incM) ## ---- fig.height=7------------------------------------------------------------ # Generate the bipartite graph prot <- "ENSP261" subgraphCC <- plot_cc(prot = prot , cc.proteins = cc.multProteins , cc.subincM = cc.peptides.incM$cc.subincM , incM = incM , tagProt = "ENSP" , tagContam = "Contam") # Plot it plot.igraph(subgraphCC$g , layout = layout_as_bipartite , edge.width = 1 , edge.arrow.width = 0.3 , vertex.size = 35 , edge.arrow.size = 0.5 , vertex.size2 = 35 , vertex.label.cex = 1 , asp = 0.25 , margin = -0.1) + title(paste0("Protein ", prot, " in CC#", subgraphCC$cc_id), line = -1) ## ----------------------------------------------------------------------------- incM_filename <- system.file("extdata" , "incM_example" , package = "net4pg" , mustWork = TRUE) rownames_filename <- system.file("extdata" , "peptideIDs_incM_example" , package = "net4pg" , mustWork = TRUE) colnames_filename <- system.file("extdata" , "proteinIDs_incM_example" , package = "net4pg" , mustWork = TRUE) incM <- read_inc_matrix(incM_filename = incM_filename , colnames_filename = colnames_filename , rownames_filename = rownames_filename) ## ----------------------------------------------------------------------------- dim(incM) ## ----------------------------------------------------------------------------- # Read input file names exprTranscriptsFile <- system.file("extdata" , "expressed_transcripts.txt" , package = "net4pg" , mustWork = TRUE) protein2transcriptFile <- system.file("extdata" , "protein_to_transcript" , package = "net4pg" , mustWork = TRUE) # Perform filtering incM_filt <- transcriptome_filter(incM , exprTranscriptsFile = exprTranscriptsFile , proteinToTranscriptFile = protein2transcriptFile , tagContam = "Contam" , remove = "sharedOnly") # Check size after transcriptome-informed filtering dim(incM_filt) ## ----------------------------------------------------------------------------- # Reduce incidence matrix size to accelerate downstream computation incM_filt_reduced <- reduce_inc_matrix(incM_filt) # Calculate the adjacency matrix describing protein-to-protein connections adjM_filt <- get_adj_matrix(incM_filt_reduced) # Generate a graph of protein-to-protein connections by shared peptides and # calculate its CCs (i.e., sets of proteins connected by shared peptides multProteinCC_filt <- get_cc(adjM_filt) # Extract the list of vectors enumerating protein members in each CC cc.multProteins_filt <- multProteinCC_filt$ccs # Calculate CCs size and % of single- vs multi-protein CCs obtained after # transcriptome-informed filtering CCstatsOut_filt <- cc_stats(incM = incM_filt , cc.proteins = multProteinCC_filt$ccs , reducedIncM = TRUE) # Number of single-protein CCs CCstatsOut_filt$N_singleProtCC # Number of multi-protein CCs CCstatsOut_filt$N_multiProtCC # Total number of CCs totCCs_filt <- CCstatsOut_filt$N_singleProtCC + CCstatsOut_filt$N_multiProtCC totCCs_filt # Percentage of single-protein CCs PercSingleP_filt <- round(CCstatsOut_filt$N_singleProtCC / totCCs_filt * 100 , digits = 2) # View table of CC size distribution CCstatsOut_filt$NproteinsDistribution # Plot CC size distribution plot(factor(CCstatsOut_filt$NproteinsDistribution$N_proteins , levels = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", ">10")) , as.numeric(as.vector(CCstatsOut_filt$NproteinsDistribution$N_CC)) , type = "s" , xlab = "N_proteins" , ylab = "N_CCs") ## ----------------------------------------------------------------------------- comp <- as.data.frame(cbind(as.character(as.vector(c("before_filter" , "after_filter"))) , as.numeric(as.vector(c(PercSingleP , PercSingleP_filt))))) colnames(comp) <- c("Filter", "Perc_SingleP") ggplot(data = comp , aes(x = as.factor(Filter), y = as.numeric(as.vector(Perc_SingleP)))) + geom_bar(stat = "identity") + theme_classic() + xlab("") + ylab("% single-prot CCs") + ylim(0, 100) + coord_flip() + geom_text(aes(label = as.numeric(as.vector(Perc_SingleP))) , hjust = 1.5, color = "white", size = 4) ## ----------------------------------------------------------------------------- old.par <- par(no.readonly = TRUE) # save default par values ymax_before <- as.numeric(as.vector(CCstatsOut$NproteinsDistribution$N_CC)) ymax_after <- as.numeric(as.vector(CCstatsOut_filt$NproteinsDistribution$N_CC)) ymax <- max(max(ymax_before), max(ymax_after)) par(mfrow = c(1, 2)) plot(factor(CCstatsOut$NproteinsDistribution$N_proteins , levels = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", ">10")) , as.numeric(as.vector(CCstatsOut$NproteinsDistribution$N_CC)) , type = "s" , xlab = "N_proteins" , ylab = "N_CCs" , ylim = c(0, ymax) , main = "before filtering") plot(factor(CCstatsOut_filt$NproteinsDistribution$N_proteins , levels = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", ">10")) , as.numeric(as.vector(CCstatsOut_filt$NproteinsDistribution$N_CC)) , type = "s" , xlab = "N_proteins" , ylab = "N_CCs" , ylim = c(0, ymax) , main = "after filtering") par(old.par) # restore default par values ## ----------------------------------------------------------------------------- peptideStatsOut_filt <- peptide_stats(incM = incM_filt) ## ----------------------------------------------------------------------------- comp <- as.data.frame(cbind( as.character(as.vector(c("before_filter" , "after_filter"))) , as.numeric(as.vector(c(peptideStatsOut$nbShared , peptideStatsOut_filt$nbShared))))) colnames(comp) <- c("Filter", "Perc_sharedPeptides") ggplot(data = comp , aes(x = as.factor(Filter) , y = as.numeric(as.vector(Perc_sharedPeptides)))) + geom_bar(stat = "identity") + theme_classic() + xlab("") + ylab("% shared peptides") + ylim(0, 100) + coord_flip() + geom_text(aes(label = as.numeric(as.vector(Perc_sharedPeptides))) , hjust = 1.5, color = "white", size = 4) ## ---- fig.height=14----------------------------------------------------------- # Extract peptides and peptide-to-protein mappings for each CC after filtering cc.peptides.incM_filt <- cc_composition(cc.multProteins_filt , incM = incM_filt) # Generate a bipartite graph of the CC which contains the protein of interest, # before and after transcriptome-informed filtering. prot <- "ENSP261" subgraphCC_beforeFilter <- plot_cc(prot = prot , cc.proteins = cc.multProteins , cc.subincM = cc.peptides.incM$cc.subincM , incM = incM , tagProt = "ENSP" , tagContam = "Contam") subgraphCC_afterFilter <- plot_cc(prot = prot , cc.proteins = cc.multProteins_filt , cc.subincM = cc.peptides.incM_filt$cc.subincM , incM = incM_filt , tagProt = "ENSP" , tagContam = "Contam") # Plot old.par <- par(no.readonly = TRUE) # save default par values par(mfrow = c(2, 1)) plot.igraph(subgraphCC_beforeFilter$g , layout = layout_as_bipartite , edge.width = 1 , edge.arrow.width = 0.3 , vertex.size = 35 , edge.arrow.size = 0.5 , vertex.size2 = 35 , vertex.label.cex = 1 , asp = 0.45 , margin = -0.1) + title(paste0("Protein " , prot , " in CC #" , subgraphCC_beforeFilter$cc_id , " before filtering") , line = -1) plot.igraph(subgraphCC_afterFilter$g , layout = layout_as_bipartite , edge.width = 1 , edge.arrow.width = 0.3 , vertex.size = 35 , edge.arrow.size = 0.5 , vertex.size2 = 35 , vertex.label.cex = 1 , asp = 0.45 , margin = -0.1) + title(paste0("Protein " , prot, " in CC #" , subgraphCC_beforeFilter$cc_id , " after filtering") , line = -1) par(old.par) # restore default par values ## ----------------------------------------------------------------------------- sessionInfo()