## ---- include = FALSE--------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", dev = 'png', eval = F ) ## ----plink_qc----------------------------------------------------------------- # for(i in 1:22) { # bigsnpr::snp_plinkQC(plink.path = '/bin/plink', prefix.in = paste0('euro_only/chr', i), # prefix.out = paste0('euro_only_qc/chr', i), # geno = 0, maf = 0.01, hwe = 1e-10) # } # bigsnpr::snp_plinkQC(plink.path = '/bin/plink', prefix.in = 'euro_only/chrX', # prefix.out = 'euro_only_qc/chrX', # geno = 0, maf = 0.01, hwe = 1e-10) # # bigsnpr::snp_plinkQC(plink.path = '/bin/plink', prefix.in = 'euro_only/chrY', # prefix.out = 'euro_only_qc/chrY', # geno = 0, maf = 0.01, hwe = 1e-10) ## ----gwas_bim_join------------------------------------------------------------ # library(data.table) # system("mkdir plinkRanges") # system("mkdir filtered") # system("mkdir ldak") # # # We assume this contains columns SNPID, CHR19 and BP19 (so-called because of hg19), REF, and ALT # # note that gwas_dat needs to be a data.table # gwas_dat <- fread('gwas_sum_stats.tsv.gz', sep = '\t', header = T) # # # Iterating over chromosomal bim files # for(i in 1:22) { # # bim files have no header # bim_dat <- fread(sprintf('euro_only_qc/chr%d.bim', i), sep = '\t', header = F, col.names = c('Chr', 'ID', 'Cm', 'BP19', 'A1', 'A2') ) # # bim_join <- merge(bim_dat, gwas_dat[CHR19 == i], by.x = 'BP19', by.y = 'BP19') # # # Make sure alleles match, although for two-sided association p-values we don't care whether ref/alt is reversed # bim_join <- bim_join[(REF == A1 & ALT == A2) | (REF == A2 & ALT == A1)] # # bim_join <- bim_join[, .(Chr, BP19, BP19, SNPID)] # # if(any(duplicated(bim_join, by='BP19'))) { # warning(sprintf('%d duplicates removed from output', sum(duplicated(bim_join, by = 'BP19')))) # } # # # Remove duplicates # bim_join <- unique(bim_join, by='BP19') # # fwrite(bim_join, file = sprintf('plinkRanges/chr%d.tsv', i), row.names = F, sep = '\t', col.names = F, quote = F) # } ## ----merge_bim_combined_weights----------------------------------------------- # library(data.table) # # bim_dat <- fread('filtered/chr_all.bim', sep = '\t', header = F, col.names = c('Chr', 'ID', 'Cm', 'BP19', 'A1', 'A2')) # # weights_dat <- fread('ldak/combined_weights.all', sep = ' ', header = T) # # # Drop rows with a missing ID or weight value # weights_dat <- na.omit(weights_dat, cols = c('Predictor', 'Weight')) # # join_dat <- merge(weights_dat, bim_dat[, .(ID, Chr, BP19, A1, A2)], all.x = T, by.x = c('Predictor', 'Chr'), by.y = c('ID', 'Chr'), sort = F) # # fwrite(join_dat, file = 'ldak/combined_weights_meta.all', sep = ' ', col.names = T, row.names = F, quote = F) ## ----------------------------------------------------------------------------- # weights_dat <- fread('ldak/combined_weights_meta.all', sep = ' ', header = T, select = c('Predictor', 'Weight', 'Chr', 'BP19', 'A1', 'A2')) # # # We assume this contains columns SNPID, CHR19 and BP19 (so-called because of hg19), REF, and ALT # gwas_dat <- fread('gwas_sum_stats.tsv.gz', sep = '\t', header = T) # # gwas_dat <- merge(gwas_dat, weights_dat, by.x = c('CHR19', 'BP19'), by.y = c('Chr', 'BP19'), sort = F) # # # Drop rows where the ref/alt allele pairing differs from that already present # gwas_dat <- gwas_dat[((REF == A1 & ALT == A2) | (REF == A2 & ALT == A1))] # # # Drop now-redundant allele columns # gwas_dat[, c('A1', 'A2') := NULL ]