## ----echo=TRUE---------------------------------------------------------------- # Generate toy phenotype and gene expression data sets # This example consists of 40 genes grouped into 5 pathways and 100 patients # grp is a binary trait (e.g., case vs control) # bp is a continuous trait (e.g., blood pressure) set.seed(1234) library(permPATH) n = 100 K = 40 grp = rep(1:0,each=n/2) bp = rnorm(n) g = rep(1:(n/20), rep(20,n/20)) pdat = data.frame(grp, bp, g) rm(grp, bp) expdat = matrix(rnorm(K*n),K,n) ## Assign marker names g1,...,gK to the expression data set and ## patient ids id1,...,idn to the expression and phenotype data gnames = paste("g",1:K,sep="") rownames(expdat) = gnames patid = paste("id",1:n,sep="") rownames(pdat) = patid colnames(expdat) = patid # Group the K genes into M pathways of sizes n1,...,nM M = 5 p = runif(M) p = p/sum(p) nM = rmultinom(1, size=K, prob=p) gset = lapply(nM, function(x){gnames[sample(x)]}) names(gset) = paste("pathway",1:M,sep="") names(gset) # Carry out permutation analysis with grp as the outcome # using the two-sample Wilcoxon test with B=100 random permutations. # The score is the maxmean test statistic res = perm.path(expdat, y=pdat[["grp"]], local.test="wilcoxon", global.test="maxmean", B=100, gset=gset, min.num=2, max.num=50, sort="score") # Output results for top pathways head(res[["res"]]) # Output individual test statistics res[["stats"]] # Carry out permutation analysis with bp as the outcome # using the Spearman test with B=100 random permutations. # The score is the maxmean test statistic res = perm.path(expdat, y=pdat[["bp"]], local.test="spearman", global.test="maxmean", B=100, gset=gset, min.num=2, max.num=50, sort="score") # Output results for top pathways head(res[["res"]]) # Output individual test statistics res[["stats"]] # Carry out permutation analysis with g as the outcome # using the JT test with B=100 random permutations. # The score is the maxmean test statistic res = perm.path(expdat, y=pdat[["g"]], local.test="jt", global.test="maxmean", B=100, gset=gset, min.num=2, max.num=50, sort="score") # Output results for top pathways head(res[["res"]]) # Output individual test statistics res[["stats"]] ## ----echo=TRUE---------------------------------------------------------------- # Generate gene symbols set.seed(1234) library(permPATH) gnames = c("CCL13", "CCL19", "CCL2", "CCL3", "CCL3L1", "CCL4", "CCL5", "CCL7", "CCL8", "CCR1", "CCR2", "CCR3", "CCR5", "CD14", "CD180", "CD2", "CD209", "CD40", "CD44", "CD80", "CD86", "CD8A", "CDC42", "CEBPA", "CSF2", "CXCL1", "CXCL10", "CXCR4", "EIF2AK2", "ELK1", "ERBB2", "FCAR", "HLAA", "HLADQA1", "HLADQB1", "HSPA1A", "IFIT3", "IFNA1", "IFNB1", "IFNG", "IL10", "IL12A", "IL12B", "IL16", "IL1A", "IL1B", "IL2", "IL6", "IL8", "INHBA", "IRF1", "IRF3", "ITGAM", "LTA", "LYN", "MAP3K7", "MAP4K4", "MAPK8", "MAPK8IP3", "MYD88", "NFKB1", "NFKBIA", "NFKBIL1", "NFRKB", "PELI1", "PTGS2", "REL", "RELA", "RIPK2", "SARM1", "STK4", "TAP2", "TGFB1", "TIRAP", "TLR1", "TLR10", "TLR2", "TLR3", "TLR4", "TLR5", "TLR6", "TLR7", "TLR8", "TLR9", "TNF", "UBE2N", "B2M", "RPL13A", "ACTB", "HGDC", "RTC1", "RTC2", "RTC3", "PPC1", "PPC2", "PPC3") # extract pathways available at "http://software.broadinstitute.org/gsea/resources/msigdb/4.0/c2.cp.reactome.v4.0.symbols.gmt" xx = readLines("c2.cp.reactome.v4.0.symbols.gmt") pnames = as.character(sapply(xx, function(x){unlist(strsplit(x, "\t", fixed=TRUE))[1]})) anno = as.character(sapply(xx, function(x){unlist(strsplit(x, "\t", fixed=TRUE))[2]})) gset = lapply(xx, function(x){unlist(strsplit(x, "\t", fixed=TRUE))[-c(1,2)]}) names(gset) = pnames gset = list(gset, pnames, anno) #intersect gene nsymbols with gene symbols from pathways ind = unlist(lapply(gset[[1]], function(x){ifelse(length(intersect(x,gnames))>1, TRUE, FALSE)})) gset[[1]] = gset[[1]][ind] gset[[2]] = gset[[2]][ind] gset[[3]] = gset[[3]][ind] gset[[1]] = lapply(gset[[1]], function(x){intersect(x, gnames)}) names(gset[[1]]) = gset[[2]] names(gset[[3]]) = gset[[2]] #create gene expression data n = 220 K = length(gnames) expdat = matrix(abs(rnorm(K*n)), K, n) rownames(expdat) = gnames patid = paste("id",1:n,sep="") colnames(expdat) = patid grp = rep(1:0,each=n/2) bp = abs(rnorm(n)) g = rep(1:(n/20), rep(20,n/20)) pdat = data.frame(grp, bp, g) rm(grp, bp) # Carry out permutation analysis with grp as the outcome # using the two-sample Wilcoxon test with B=10000 random permutations. # The score is the maxmean test statistic res = perm.path(expdat, y=pdat[["grp"]], local.test="wilcoxon", global.test="maxmean", B=10^4, gset=gset[[1]], min.num=2, max.num=50, sort="score", anno=gset[[3]]) # Output results for top pathways head(res[["res"]]) # Carry out permutation analysis with bp as the outcome # using the Spearman test with B=10000 random permutations. # The score is the maxmean test statistic res = perm.path(expdat, y=pdat[["grp"]], local.test="spearman", global.test="maxmean", B=10^4, gset=gset[[1]], min.num=2, max.num=50, sort="score", anno=gset[[3]]) # Output results for top pathways head(res[["res"]]) # Carry out permutation analysis with grp as the outcome # using the two-sample Wilcoxon test with B=10000 random permutations. # The score is the maxmean test statistic res = perm.path(expdat, y=pdat[["grp"]], local.test="wilcoxon", global.test="maxmean", B=10^4, gset=gset[[1]], min.num=2, max.num=50, sort="score", anno=gset[[3]]) # Output results for top pathways head(res[["res"]]) # Carry out permutation analysis with g as the outcome # using the JT test with B=10000 random permutations. # The score is the maxmean test statistic res = perm.path(expdat, y=pdat[["g"]], local.test="jt", global.test="maxmean", B=10^4, gset=gset[[1]], min.num=2, max.num=50, sort="score", anno=gset[[3]]) # Output results for top pathways head(res[["res"]]) ## ----echo=TRUE---------------------------------------------------------------- library(permPATH) set.seed(1234) n = 100 K = 40 grp = rep(1:0,each=n/2) bp = rnorm(n) pdat = data.frame(grp, bp) rm(grp, bp) expdat = matrix(rnorm(K*n),K,n) ## Assign marker names g1,...,gK to the expression data set and ## patient ids id1,...,idn to the expression and phenotype data gnames = paste("g",1:K,sep="") rownames(expdat) = gnames patid = paste("id",1:n,sep="") rownames(pdat) = patid colnames(expdat) = patid #Group the K genes into M pathways of sizes n1,...,nM M = 5 p = runif(M) p = p/sum(p) nM = rmultinom(1, size=K, prob=p) gset = lapply(nM, function(x){gnames[sample(x)]}) names(gset) = paste("pathway",1:M,sep="") ## Carry out permutation analysis with grp as the outcome ## using the two-sample Wilcoxon with B=100 random permutations res = perm.path(expdat, y=pdat[["grp"]], local.test="wilcoxon", global.test="maxmean", B=100, gset=gset, min.num=2, max.num=50, sort="score") # create an html file #epermPATH2HTML(rstab, dir="/dir/", fname="tophits") sessionInfo()