gclink
performs end-to-end analysis of gene clusters
(e.g., photosynthesis, carbon/nitrogen/sulfur cycling, carotenoid,
antibiotic, or viral genes) from (meta)genomes. It provides:
AllGeneNum
and
MinConSeq
parametersgggenes
:
# Install from CRAN
install.packages("gclink")
# Install from GitHub
if (!require("devtools")) install.packages("devtools")
::install_github("LiuyangLee/gclink") devtools
# Case 1: Using blastp result with Full pipeline (Find Cluster + Extract FASTA + Plot Cluster)
library(gclink)
data(blastp_df)
data(seq_data)
data(photosynthesis_gene_list)
data(PGC_group)
<- gclink(in_blastp_df = blastp_df,
gc_list in_seq_data = seq_data,
in_gene_list = photosynthesis_gene_list,
in_GC_group = PGC_group,
AllGeneNum = 50,
MinConSeq = 25,
apply_length_filter = TRUE,
down_IQR = 10,
up_IQR = 10,
orf_before_first = 0,
orf_after_last = 0,
levels_gene_group = c('bch','puh','puf','crt','acsF','assembly','regulator',
'hypothetical ORF'),
color_theme = c('#3BAA51','#6495ED','#DD2421','#EF9320','#F8EB00',
'#FF0683','#956548','grey'),
genome_subset = NULL)
= gc_list[["GC_meta"]]
gc_meta = gc_list[["GC_seq"]]
gc_seq = gc_list[["GC_plot"]]
gc_plot head(gc_meta) # Cluster metadata
head(gc_seq) # FASTA sequences
print(gc_plot) # Visualization
blastp_df
))qaccver | saccver | pident | length | mismatch | gapopen | qstart | qend | sstart | send | evalue | bitscore |
---|---|---|---|---|---|---|---|---|---|---|---|
Kuafubacteriaceae–GCA_016703535.1—JADJBV010000002.1_67 | enzymerhodopsin_XP_002954798.1_Volvox_carteri | 26.6 | 576 | 343 | 15 | 157 | 666 | 332 | 893 | 8.18e-41 | 161 |
Kuafubacteriaceae–GCA_016703535.1—JADJBV010000002.1_113 | petB_Candidatus_Methylomirabilis_oxyfera_DAMO_1671_MOX | 76.6 | 248 | 58 | 0 | 14 | 261 | 9 | 256 | 5.43e-149 | 417 |
Kuafubacteriaceae–GCA_016703535.1—JADJBV010000002.1_114 | petC_Candidatus_Nitronauta_litoralis_G3M70_16785_NLI | 50.8 | 177 | 73 | 2 | 8 | 184 | 27 | 189 | 3.83e-59 | 184 |
Kuafubacteriaceae–GCA_016703535.1—JADJBV010000002.1_523 | cruC_Humisphaera_borealis_IPV69_18620_HBS | 31.5 | 365 | 208 | 11 | 42 | 378 | 48 | 398 | 1.45e-41 | 151 |
Kuafubacteriaceae–GCA_016703535.1—JADJBV010000002.1_616 | rfpB_KL662192_1_938 | 33.0 | 227 | 137 | 3 | 4 | 223 | 3 | 221 | 2.53e-32 | 124 |
Kuafubacteriaceae–GCA_016703535.1—JADJBV010000002.1_754 | bchI_p_Myxococcota–c_WYAZ01–o_WYAZ01–GCA_016703535.1—JADJBV010000002.1_754 | 100.0 | 343 | 0 | 0 | 1 | 343 | 1 | 343 | 4.73e-249 | 677 |
seq_data
))SeqName | Sequence |
---|---|
Houyibacteriaceae–LLY-WYZ-15_3—k141_102864_1 # 3 # 266 # 1 # ID=85_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.807 | CCGGACGCGCCGCCCGCCCCGAAGGCCCCGCCGGCCGCCCCCACCTATCCGCTCGAAGGCGCGCTCGGTATCAGCCGCGTGCGCCTCGTGCGCGCCACGCCCTGCGGCCTCACCGGCCGCGAGCTCGGCGCCGGCGAGGAGGCCCTCCTCGTCCACTTCGACGACGGACGCCCGCCCCTCGCGGTCGCCCCCGACGCGCTCCCGACGCCCCCCGGCGACGGGACGCCCCCCACCGGCGCTCCGCCGGAAGGAGACCCCGCATGA |
Houyibacteriaceae–LLY-WYZ-15_3—k141_102864_2 # 263 # 490 # 1 # ID=85_2;partial=00;start_type=ATG;rbs_motif=AGGAG;rbs_spacer=5-10bp;gc_cont=0.737 | ATGACCCGCCCCGAAGACGCCCCGCCCACCCACGAAGCCGCGGACCGCGCCGTGCGCTCCCTCTTCCAGATCGGTCGCCTCTGGGCCTCCCACGGCCTCGAGATGGGTCGCATGACCTTGCGGACCGCCGCCAAGACCCTCGAGAGCACCGCCGAGACCCTCGAGGACCTCTCCCAGCGCGTCGCCCCCGACGACGAGCGCCCCGCGGACGAACGCGCCGCCGACTGA |
Houyibacteriaceae–LLY-WYZ-15_3—k141_102864_3 # 667 # 2184 # -1 # ID=85_3;partial=00;start_type=ATG;rbs_motif=AGGAGG;rbs_spacer=5-10bp;gc_cont=0.775 | ATGAGCGCGATCGAAGGGACCCGGCCTCGGGACGGCGAGGCCCGCATGCCCGTGGAGGCGACCCCCGTGGAGGCCATCGGGGGCCTCGTCGCCCGGGCGCGTGACGCCGGCTTCGACCACGCGGCCCGGCCCCTCGCCGAGCGCGCGGGGCTGCTGCGCGCGCTCGCGGACGCCATCCTCGCCGACGGGGAGGCCATCGTCGCGCTCCTCGAGGAGGAGACGGGCAAGCCGGCGGCGGAGGCGTGGCTCCACGAGGTCGTGCCGACGGCGGACCTCGGGAGCTGGTGGAGCAGCCAGGGGCCGGCGCACCTCGCGACGGAAGCCGTGCGCCTCGACCCGCTCGCCTACCCTGGCAAGCGCGCGCGCGTCGAGGTGGTCCCGCGTGGCGTCGTGGCGCTGATCACGCCTTGGAACTTCCCGGTGGCGATCCCGCTGCGGACGCTCTTCCCGGCGCTCCTCGCGGGCAACGGCGTCGTCTGGAAGCCGTCCGAGCACACGCCGCGGGTGGCGGCGCGCGTGCACGGGATCGTGCGCGAGGTCTTCGGGCCGGACCTGGTCGAGCTGGTGCAGGGCGCCGGCGCGCAGGGGGCGGCGCTGGTCGAGGCGGACGTGGACGCGGTGGTGTTCACGGGCAGCGTGGCGACCGGGCGGAAGGTCGGCGCGGCGGCGGGGCGGGCGCTCACGCCGGCGTCGCTCGAGCTCGGCGGCAAGGACGCGGCCGTGGTGCTCGACGACGCGGACCTGGAGCGCACGGCCCGGGGCCTGCTCTGGGCGGCGATGGCGAACGCGGGGCAGAACTGCGCCGGGCTCGAGCGCGTCTACGCGGTGGCGGAGGTCGCCGGCCCGCTGAAGGCGCGGCTCGGTGAGCTGGCCGGAGAGCTGGTGCCCGGGCGCGACGTGGGGCCGCTGGTGACCGAGGCGCAGCTCGCGACGGTGGAGCGGCACGTGCGCGAGGCGGTCGACGGGGGCGCGGAGGTGCTGGCCGGCGGCGAGCGGCTCGAGCGGGGCGGGCGCTGGTTCGCGCCGACCGTGCTGGCGGAGGTCGAGCCGTCTTCGGCGGCGCTCCGGGAGGAGACGTTCGGGCCGGTGGTCGTCGTGCAGACGGTGGCGGACGAGGCGGCGGCCGTGGCGGCGGCGAACGACTCGCGCTTCGGGCTGACGGCGAGCGTCTGGACGCGGGACGCGGCGCGCGGGGAGGCGGTCGCACGGCGGCTCCGGGCGGGCGTCGTGACGGTGAACAACCACGCCTTCACCGGGGCCATCCCGGCGCTGCCCTGGGGCGGCGTCGGCGAGACGGGCTTCGGGGTGACGAACTCGCCGCACGCGCTCCACGCATTGGTGCGGCCGCGGGCCGTGGTCGTGGACGGCAACGCGCGGCCGGAGCTCTACTGGCACCCCTACGACGAGGCGCTCGAGCGGCTCGGGAAGGGCATGGCGGCGCTCCGCGGCAAGGGCGGGCCGATCACGAAGGTGCGCGCCGTGGCCAGGCTGCTCGGGGCGCTCCGCCGGCGCTTCTGA |
PGC_group
))gene | gene_group | gene_label |
---|---|---|
bciE | bci | E |
bchB | bch | B |
bchC | bch | C |
bchD | bch | D |
photosynthesis_gene_list
))bciE bchB bchC bchD bchE
GC_meta
)gene | qaccver | saccver | pident | length | mismatch | gapopen | qstart | qend | sstart | send | evalue | bitscore | genome | orf | contig | genome_contig | orf_position | gene_cluster | GC_orf_position | GC_present_length | GC_absent_length | GC_length | SeqName | Sequence | start | end | direction | gene_group | gene_label | Pgenome | Pstart | Pend | Pdirection |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
pufC | Houyibacteriaceae–LLY-WYZ-15_3—k141_102864_97 | pufC_Rhodospirillum_centenum_RC1_2101_RCE | 53.1 | 335 | 147 | 7 | 3 | 329 | 6 | 338 | 7.66E-112 | 333 | Houyibacteriaceae–LLY-WYZ-15_3 | k141_102864_97 | k141_102864 | Houyibacteriaceae–LLY-WYZ-15_3—k141_102864 | 97 | Houyibacteriaceae–LLY-WYZ-15_3—k141_102864—1 | 1 | 34 | 2 | 36 | Houyibacteriaceae–LLY-WYZ-15_3—k141_102864_97 # 117640 # 118917 # -1 # ID=85_97;partial=00;start_type=GTG;rbs_motif=GGAG/GAGG;rbs_spacer=5-10bp;gc_cont=0.710 | GTGAAGAAGATCGCCATCGCCTTCGTGAGCACCTGGCTCCTCATCGGGGCCGTCTACGCCTACGAGCCGACCGAGACCTCGCAGATCGGCGCCGACGGCGTCGCCATGCAGGTCACGCAGACCGAGGACGAGCTCGCCGCGCGCGTGGAGGCGAACACCGTCCCGCCGGCCATCCCGATGCCCCAGAGCAGCGGCGTGCTGGCGGCCGAGGAGTACGAGAACGTGCAGGTCCTCGGCCACCTCAACACGGCCCAGTTCACCCGGCTGATGACCTCCATCACGCTCTGGGTCGCGCCGGAGCAGGGCTGCGCCTACTGCCACAACACGAACAACCTGGCCTCCGACGAGCTCTACACGAAGCGCGTGGCGCGTCGGATGATCCAGATGACCTGGCACATCAACGAGAACTGGCAGTCGCACGTCCAGGAGACCGGCGTGACCTGCTACACGTGCCACCGCGGCAACAACGTGCCCCAGCACATCTGGTTCGAGACGCCGCCCGACGACCACGGCATGGTGGGCTGGCGTGGCTCGCAGAACGCCCCGAACGACCGGACGGGGATCAGCTCCCTGCCGAACGACGTGTTCGAGGTGTTCCTCGAGGAGGACGCGAGCATCCGGGTCCAGTCGGCCGGGGAGGCCTTCCCGAACGAGAACCGCGCGTCCATCAAGCAGGCCGAGTGGACCTATGGGCTGATGATGCACTTCTCCGAGTCGCTCGGGGTGAACTGCACGGCTTGCCACAACTCGCGCTCCTGGAACGACTGGAGCCAGAGCCCGGCCCGCCGCGGGACGGCCTGGCACGGCATCCGGATGGCGCGAAACCTCAACAACCACTGGCTGACGCCGCTGCGCGATCAGTTCCCGCCGAACCGGCTCGGCGAGCTGGGTGACGCCCCGAAGGCCAACTGCGCGACGTGCCACCAGGGCGCGTACCGCCCCCTGCTCGGGCACCGCATGCTCGAGGACTTCCCGTCCCTCGTACGGGCGATGCCGCAGCCCGAGATCGAGCCGGAGCCGGAGCCGGAGCCCGAGCTGGAAGGCGAGGGCGAGGCCGGCGGGCAGCTCGAGCCGGAGGGGGAGGCGCCCGCCGCCGAAGCCCCCGAGGGCACGAACGCTGCGCCGACGGCGATGGCTGCGCCGGCGGCGATGGCCGCTCCGACGGGGATGGCCGCGCCGGCGGCGATGGCTGCGCCGGCGGCGATGGCTGCTCCGGCGGTGGCCGAGCCGACGCCCATGGCCGCGCCGGCGGCGATGGCGGCCCCGGCACCGAACTGA | 117640 | 118917 | -1 | puf | C | Houyibacteriaceae–LLY-WYZ-15_3—k141_102864—1 | 0 | 1277 | FALSE |
pufM | Houyibacteriaceae–LLY-WYZ-15_3—k141_102864_98 | pufM_p_Myxococcota–c_Polyangia–o_Polyangiales–ERR1726576_bin.13—k141_102738_3 | 100 | 437 | 0 | 0 | 1 | 437 | 1 | 437 | 4.73E-308 | 834 | Houyibacteriaceae–LLY-WYZ-15_3 | k141_102864_98 | k141_102864 | Houyibacteriaceae–LLY-WYZ-15_3—k141_102864 | 98 | Houyibacteriaceae–LLY-WYZ-15_3—k141_102864—1 | 2 | 34 | 2 | 36 | Houyibacteriaceae–LLY-WYZ-15_3—k141_102864_98 # 118914 # 120224 # -1 # ID=85_98;partial=00;start_type=ATG;rbs_motif=GGAG/GAGG;rbs_spacer=5-10bp;gc_cont=0.704 | ATGGCCCGCTACCAGAACATCTTCACGCAGATCCAAGTCGTCGGTCCGCCGGACACGCCGCCGCCGATCGACCCGGACTTCCGTACGAAGAAGACGCGCATGTCGCGGCTCCTCGGGTGGTTCGGCAACCCGCAGATCGGCCCCGTCTACCTGGGCTACACCGGCCTGGCGTCCGCGATCAGCTTCTTCATCGCTTTCGAGATCATCGGGCTCAACATGCTGGCCTCGGTGGACTGGGACGTCGTTCAGTTCATCCGCCAGCTCCCCTGGCTCGCGCTCGAACCGCCCCCGCCCTCTGCCGGGCTCTCCATCCCGACGCTTCAGGAGGGCGGCTGGTGGCTCATGGCCGGCTTCTTCCTCACGGCGTCGGTCATTCTCTGGTGGATTCGCACCTATCGGCGCGCACGCGCCCTGAAGATGGGCACGCACGTCGCGTGGGCCTTCGCCTCGGCGATCTGGCTCTACCTCGTCCTCGGCTTCATTCGCCCCTTGCTGATGGGGAGCTGGGGGGAGGCGGTGCCCTTCGGCATCTTCCCGCACCTCGACTGGACCGCCGCCTTCTCCGTTCGCTACGGCAACCTCTTCTACAACCCCTTCCACTGCCTCTCGATCGTCTTCCTCTACGGGTCGACGCTCCTCTTCGCCATGCACGGCGCGACGGTGCTCGCGCTCGGGCACGTGGGCGGTGAGCGTGAGGTGAGCCAGGTGGTCGACCGCGGCACGGCGGCCGAGCGCGGGGCGCTCTTCTGGCGCTGGACGATGGGCTTCAACGCGACCTTCGAGTCCATCCACCGCTGGGCCTGGTGGTTCGCGGTGCTCACGCCGCTCACCGGAGGCATCGGCATCCTCCTGACCGGCACCGCCGTCGACAACTGGTATCAGTGGGCCGTCGAGCACGACTTCGCGCCGGCCTATGAGGAGTCCTACGAGGTCGTCCCCGACCCGGTCGACGACCCGGCGAACGAGGACCTGCCCGGTATGCGCGGTGAGTCCACCGCGCAGTGGGAGCCGACCCCCTACGTGCCCGCCGAGGAGCCGGAGGCGCCCGAGGATGGTGCGGACGGCGCGGCCGCGGTCGAAGGCGTCGACGCCGAGGGCGGCGAGGATGCCGCCGCGGATCCCGCGAGCGAGGGCACGAGCGGCCAGCCGGAGACCGGCGCCGCGGCCCCGGAGAGCGAGCGCCTTCCGGACGAAGCGGCGGCGGCCGAGCCCGAAGGGGCTGCGCCGGAGCCCGAACCCCCCGCGCCGTCCGAGACGGCTGCCCCGAGCGAACCCGAGGCGCCCAGCGCGATGACCCCGGAGCAACCGTGA | 118914 | 120224 | -1 | puf | M | Houyibacteriaceae–LLY-WYZ-15_3—k141_102864—1 | 1274 | 2584 | FALSE |
pufL | Houyibacteriaceae–LLY-WYZ-15_3—k141_102864_99 | pufL_p_Myxococcota–c_Polyangia–o_Polyangiales–ERR1726567_bin.15—k141_184359_2 | 100 | 275 | 0 | 0 | 1 | 275 | 1 | 275 | 2.63E-214 | 583 | Houyibacteriaceae–LLY-WYZ-15_3 | k141_102864_99 | k141_102864 | Houyibacteriaceae–LLY-WYZ-15_3—k141_102864 | 99 | Houyibacteriaceae–LLY-WYZ-15_3—k141_102864—1 | 3 | 34 | 2 | 36 | Houyibacteriaceae–LLY-WYZ-15_3—k141_102864_99 # 120270 # 121094 # -1 # ID=85_99;partial=00;start_type=ATG;rbs_motif=GGAG/GAGG;rbs_spacer=5-10bp;gc_cont=0.648 | ATGGGCCTACTGAGCTTCGAGCGGCGATATCGAGTCCGAGGAGGCACGCTCCTCGGGGGCGACCTATTCGATTTCTGGGTCGGGCCCTTCTACGTGGGGCTCTTCGGCGTCACGACGATCTTCTTCACGATCGTCGGCACCGCGCTGATCCTCTGGGAGGCCTCCCGGGGTGACACCTGGAACCCCTGGCTGATCAACATCCAGCCGCCTCCAATCGAGTACGGGCTCGCCTTCGCGCCCCTCGATCAGGGGGGCATCTGGCAGCTGGTCACCATCTGCGCCATCGGCGCCTTCGGATCCTGGGCGCTCCGACAGGCGGAGATCAGCCGCAAGCTCGGCATGGGCTACCACGTGCCCATCGCCTACGGCGTCGCGGTCTTCGCCTACGTCACGCTCGTGGTGATTCGCCCGGTGATGCTGGGCGCCTGGGGCCACGGCTTCCCCTACGGCATCTTCAGCCACCTCGATTGGGTGTCGAACGTCGGGTACCAGTACCTGCACTTCCACTACAACCCGGCCCACATGATCGCGGTGAGCTTCTTCTTCACCACGACGCTCGCGCTCTCCCTCCACGGCGGTTTGATCCTCTCCGCCGTGAATCCGCCGAAGGGAGAGAAGGTGAAGACCGCCGAGTACGAGGACGGGTTCTTCCGTGACCACATCGGCTACTCGATCGGCGCCCTGGGCATTCATCGACTCGGCCTCTTCCTGGCGCTGAGCGCCGGGATCTGGAGCGCGATCTGCATTCTCATCAGCGGCCCGATGTGGACCAAGGGGTGGCCCGAGTGGTGGGACTGGTGGCTCAACCTCCCCGTGTGGAGCTGA | 120270 | 121094 | -1 | puf | L | Houyibacteriaceae–LLY-WYZ-15_3—k141_102864—1 | 2630 | 3454 | FALSE |
bchO | Houyibacteriaceae–LLY-WYZ-15_3—k141_102864_100 | bchO_Pararhodospirillum_photometricum_RSPPHO_00117_RPM | 44.9 | 265 | 144 | 1 | 33 | 295 | 28 | 292 | 6.97E-60 | 194 | Houyibacteriaceae–LLY-WYZ-15_3 | k141_102864_100 | k141_102864 | Houyibacteriaceae–LLY-WYZ-15_3—k141_102864 | 100 | Houyibacteriaceae–LLY-WYZ-15_3—k141_102864—1 | 4 | 34 | 2 | 36 | Houyibacteriaceae–LLY-WYZ-15_3—k141_102864_100 # 121191 # 122102 # -1 # ID=85_100;partial=00;start_type=ATG;rbs_motif=GGAG/GAGG;rbs_spacer=5-10bp;gc_cont=0.762 | ATGAGCTCGGCCGTCGAAGAGCAGCGCGTCGAGCACCCGCGGGTCGAGCAGCAGCCCATCGAGCAGCAGCGCGTCGAGCACCAGCGCGTCGAGCGTTCGGGCGTGCGGTGGAACGTCGCCCGCCGCGGCGCCGGACCCACGCTCCTGGCGCTCCACGGGACCGGCAGCTCGAGCCGCTCCTTCTGCGCCCTCGCGGCCACGCTCGGTGCTCGCTTCACCGTCGTGGCGCCCGATCTACCCGGCCACGCCGGGAGCCGGATCGATCGCCGCTTCCGCCTCTCGCTCCCCTCGATCGCCGCCGCCCTCGGCGAGCTCATCGAGGCGCTCGCCGTCCAGCCGGCGCTGGTCCTCGCTCACTCCGCGGGCGCGGCGGTGGCGGCGCGCGCCATGCTCGACGGGGCTCTCCGCCCGGCGCTCTTCGTCGGGCTCGGCGCGGCCCTGACGCCCCTCGAGGGGCTCGCCCGGCTCGGCGCGCGCCCGGCGGCCGCGATGCTCGCCCGCTCGCCCATCACGCGGCGGGTGGCGCGCCGGGCTGGAGGCGCCCTCGTCGGACCGATCCTGCGCAGCGTCGGATCCACCGTCGGCCCCGAGGCCACACAGCGCTATCGGGAGCTCGCCCGCGATCCCGCCCACGTCGGGGCGGTCTTCTCGATGCTCGCCCAGTGGGATCTCGACGGGCTCCACGCGGCGCTACCACGCCTGGACGTACCGACCCTGCTCCTCGGCGGCGCCCGCGACGGCGCCACCCCGATCGCCCAGCAGCGCGCCCTCGCACGTCGCCTCCCGGCCGCGCGCGCGCACGTCGTCCTCGGCGCCGGGCACCTGCTCCACGAGGAGCGACCCGCCGAGATCGCGCGCCTCGTCGAGGCCGAGTGGAACAGATTGGACGGCGGTCGTGTCAAAAATGCTTGA | 121191 | 122102 | -1 | bch | O | Houyibacteriaceae–LLY-WYZ-15_3—k141_102864—1 | 3551 | 4462 | FALSE |
bchD | Houyibacteriaceae–LLY-WYZ-15_3—k141_102864_101 | bchD_p_Myxococcota–c_Polyangia–o_Polyangiales–GCA_002699025.1—PABA01000098.1_81 | 100 | 587 | 0 | 0 | 1 | 587 | 1 | 587 | 0 | 1064 | Houyibacteriaceae–LLY-WYZ-15_3 | k141_102864_101 | k141_102864 | Houyibacteriaceae–LLY-WYZ-15_3—k141_102864 | 101 | Houyibacteriaceae–LLY-WYZ-15_3—k141_102864—1 | 5 | 34 | 2 | 36 | Houyibacteriaceae–LLY-WYZ-15_3—k141_102864_101 # 122099 # 123859 # -1 # ID=85_101;partial=00;start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.792 | ATGAGCGGCTGGCCCGACGTGGCGCGCGTCGCCGAGCTCCTGAGCGTCGACCCGGACGGCCTCGGAGGCGTGCGCCTGCGGGGTCGCCCGGGGCCGCACCGGCGCCGGGTGCTCGAGTGGGTGCGCGAGAGGCTGGCCCCGGAGGCGCCCTTCCGGCGCCTGCCCGCGCACGTGACCGAGGATCGGCTCCTCGGGGGCCTCGCGCTCGCGGAGACCTTGCGTTCGGGGCGGGCCGTCATGGAGCAGGGCGTGCTCGCGCGGAGCGACGGCGGCCTGCTCGTCGTGGCCATGGCCGAGCGGGCCGAGCGGGAGGTCGTGGCGCACCTCTGCGCGGCCCTCGACCGCGGCGCGATCACCGTCGAACGCGACGGCATGAGCGCCGAGGCGTCCTGCCGCGTGGGCCTCATCGCGCTCGACGAGGGCATCGACGAGGAGCACGTCGACCCGGCGCTCGCCGACCGGCTCGCCTTCGCGCTGGACCTCGACGCGCTCGATCCGCGGGGAGGGGCGGCGCCGGAACACGGACCCGAGGAGGTCGCGCGAGCCCGCGCCCGCCTCCCGCACGTGAGCCTCGGCGACGACATCATCGCGGCCCTCTCGGAGGCGGCCCAGGCCCTCGGCGTGGAGGCGCTCCGGCCGCTCCTGCTCGCGGCGAAGGCGGCCCGCGCGCACGCGGCGCTCCTCGGCCGGACCCGCGTCGAGGAGGAAGACGCCGGGATGGCGGCGCGCCTCGTCCTCGGCCCGAGGGCGACGCGAGCGCCGAGCGCCGAGCCCGAAGAGGCGGCCGAGCGCGAGGCCGAAGAGGGCGACCCCGACCCGGGAGGCGCCGGCGCGGCTGCAGCCGGCGAACGGGCGGACGGCGCCGACGAGGCCCCGCCGGGCGAGGTCCCGCTCGGCGATCTCGTCTTGGCGGCGGCCGAGAGCGGCATCCCGGCGGGGCTGCTCGACGCCCTCGACGTCGGGACCACCCGGCGGGCCGGCGCGACCGGTCGGAGCGGGGCGACGCGCATCGGCCCGAGCGGCGGCCGCCCGGCGGGGACGCGCGCCGCGCCGCCCACCCGAGGCCAGCGCCTGAACGTCGTCGAGACCCTCCGCGCCGCCGCGCCCTGGCAGCGGCTCCGCGGGGGCGGCTTCGGCGCGGGCGTGCGCGTCCGGCCGGAGGACTTCCGTGTCACCCGTCACCGGCAGCCGATCGAGAGCTGCGTGATCTTCGCCGTCGACGCGTCCGGCTCCGCCGCGCTTCGACGCCTGGCCGAGGCGAAGGGCGCCGTCGAGCGCGTGCTCGGCGACTGCTACGTGCGGCGCGACCACGTCGCCCTCGTCGCGTTCCGCCAGGACGGCGCCGAGCTGCTCCTGCCCCCGACGCGCTCCCTCGCCCGCGTGCGTCGCAGCCTGGCTGCCCTCGCCGGCGGCGGCGCGACCCCCCTCGCCGCGGGGATCGACGCCGCCCATCGGCTCGCCCTCGACGCCCGCGGGCGCGGCCGCGAGCCCATCGTGGTCGTCATGACCGACGGGCGGGCGAACGTGACCCGGGACGGCCGCCGGGACCCCGCGGTCGCCACCACGGACGCCCTCGAGAGCGCGCGCGGGCTCCAGCGAGCCGCCGTGCCGACCCTCTTCCTCGACACGGCCCCACGCCCCCGGCGCCGTGCCCGCGAGCTCGCCGAGGCCATGGACGCCCGCTACCTGCCGCTGCCCTACCTCGACGCGGCGGGGATCTCACGCCACGTCCAAGCGCTCGCCCGCGAGGGAGCCCGATGA | 122099 | 123859 | -1 | bch | D | Houyibacteriaceae–LLY-WYZ-15_3—k141_102864—1 | 4459 | 6219 | FALSE |
bchI | Houyibacteriaceae–LLY-WYZ-15_3—k141_102864_102 | bchI_p_Myxococcota–c_Polyangia–o_Polyangiales–GCA_002699025.1—PABA01000098.1_82 | 100 | 339 | 0 | 0 | 1 | 339 | 1 | 339 | 1.97E-239 | 652 | Houyibacteriaceae–LLY-WYZ-15_3 | k141_102864_102 | k141_102864 | Houyibacteriaceae–LLY-WYZ-15_3—k141_102864 | 102 | Houyibacteriaceae–LLY-WYZ-15_3—k141_102864—1 | 6 | 34 | 2 | 36 | Houyibacteriaceae–LLY-WYZ-15_3—k141_102864_102 # 123863 # 124879 # -1 # ID=85_102;partial=00;start_type=ATG;rbs_motif=GGA/GAG/AGG;rbs_spacer=5-10bp;gc_cont=0.745 | ATGACGCCCTATCCCTTCACCGCCATCGTCGCGCAGGACGAGCTCAAGCTCGCCCTGCAGATCGCCACCGTCGACCGCAGCATCGGCGGGGTCCTCGCCTTCGGCGACCGCGGCACCGGCAAGTCGACCACCATCCGCGCGCTCGCCCGGCTCCTGCCGCCGATGCGCGTCGTCGCCAGCTGCCCGTACCACTGTGATCCGGCCGACGCGCGCGCTCGCTGTCCGCACTGTGCCGAAGCCGCAGGGGAGCGGGAGGCGATCGAGACGCCCGTGCCGGTCGTGGACCTGCCCCTCGGCGCCACCGAGGATCGCGTCGTCGGCGCGCTCGATCTCGAGGCGGCCCTCACGCGCGGGGAGCGCCGCTTCTCACCGGGCCTGCTCGCCGCGGCGCATCGAGGCTTCCTCTACATCGACGAGGTCAACCTCCTCCCCGATCACCTCGTGGATCTGCTGCTCGACGTCGCGGCCTCGGGCGAGAACGTGGTCGAGCGCGAGGGCCTGAGCGTGCGCCACCCCGCGCGCTTCGTGCTGATCGGCAGCGGAAACCCGGAGGAGGGCGAGCTGCGCCCCCAGCTGCTCGATCGCTTCGGCCTCTCGCTCGAGGTCCGCACGCCGGACGAGGTCGCGACGCGCGTCGAGGTCGTCAAGCGGCGCATGCGCTACGATCAGGACCCGGAGGCCTTCGCGGCCGCCTGGGCGGAGGACGAGGCGGCCCTCATCGTTCGCCTCCGGGACGCGCGGGCGCGCTTGCCCGAGGTGGCCGTCAGCGACGCCGTGATCGAGCGCGCGAGCCGGCTCTGCCAGGCGCTCGGCACCGACGGGCTCCGGGGGGAGCTGACCTTGATCCGCGCCGCGCGCGCGGCCGCCAGCCTCGACGCGCAGCGGGAGGTCGCCGACGTGCACCTCGCCCAGGTCGCCCCCCTCGCGCTCCGCCACCGGCTGCGACGCGCCCCCCTGGACGACGTCGGCTCGGGCGCGCGCGTGCAGAAGGCCGTCGAGGACGTGCTCGGGGGCTGA | 123863 | 124879 | -1 | bch | I | Houyibacteriaceae–LLY-WYZ-15_3—k141_102864—1 | 6223 | 7239 | FALSE |
GC_seq
)>pufC_Houyibacteriaceae--LLY-WYZ-15_3---k141_102864---1
GTGAAGAAGATCGCCATCGCCTTCGTGAGCACCTGGCTCCTCATCGGGGCCGTCTACGCCTACGAGCCGACCGAGACCTCGCAGATCGGCGCCGACGGCGTCGCCATGCAGGTCACGCAGACCGAGGACGAGCTCGCCGCGCGCGTGGAGGCGAACACCGTCCCGCCGGCCATCCCGATGCCCCAGAGCAGCGGCGTGCTGGCGGCCGAGGAGTACGAGAACGTGCAGGTCCTCGGCCACCTCAACACGGCCCAGTTCACCCGGCTGATGACCTCCATCACGCTCTGGGTCGCGCCGGAGCAGGGCTGCGCCTACTGCCACAACACGAACAACCTGGCCTCCGACGAGCTCTACACGAAGCGCGTGGCGCGTCGGATGATCCAGATGACCTGGCACATCAACGAGAACTGGCAGTCGCACGTCCAGGAGACCGGCGTGACCTGCTACACGTGCCACCGCGGCAACAACGTGCCCCAGCACATCTGGTTCGAGACGCCGCCCGACGACCACGGCATGGTGGGCTGGCGTGGCTCGCAGAACGCCCCGAACGACCGGACGGGGATCAGCTCCCTGCCGAACGACGTGTTCGAGGTGTTCCTCGAGGAGGACGCGAGCATCCGGGTCCAGTCGGCCGGGGAGGCCTTCCCGAACGAGAACCGCGCGTCCATCAAGCAGGCCGAGTGGACCTATGGGCTGATGATGCACTTCTCCGAGTCGCTCGGGGTGAACTGCACGGCTTGCCACAACTCGCGCTCCTGGAACGACTGGAGCCAGAGCCCGGCCCGCCGCGGGACGGCCTGGCACGGCATCCGGATGGCGCGAAACCTCAACAACCACTGGCTGACGCCGCTGCGCGATCAGTTCCCGCCGAACCGGCTCGGCGAGCTGGGTGACGCCCCGAAGGCCAACTGCGCGACGTGCCACCAGGGCGCGTACCGCCCCCTGCTCGGGCACCGCATGCTCGAGGACTTCCCGTCCCTCGTACGGGCGATGCCGCAGCCCGAGATCGAGCCGGAGCCGGAGCCGGAGCCCGAGCTGGAAGGCGAGGGCGAGGCCGGCGGGCAGCTCGAGCCGGAGGGGGAGGCGCCCGCCGCCGAAGCCCCCGAGGGCACGAACGCTGCGCCGACGGCGATGGCTGCGCCGGCGGCGATGGCCGCTCCGACGGGGATGGCCGCGCCGGCGGCGATGGCTGCGCCGGCGGCGATGGCTGCTCCGGCGGTGGCCGAGCCGACGCCCATGGCCGCGCCGGCGGCGATGGCGGCCCCGGCACCGAACTGA
>pufM_Houyibacteriaceae--LLY-WYZ-15_3---k141_102864---1
ATGGCCCGCTACCAGAACATCTTCACGCAGATCCAAGTCGTCGGTCCGCCGGACACGCCGCCGCCGATCGACCCGGACTTCCGTACGAAGAAGACGCGCATGTCGCGGCTCCTCGGGTGGTTCGGCAACCCGCAGATCGGCCCCGTCTACCTGGGCTACACCGGCCTGGCGTCCGCGATCAGCTTCTTCATCGCTTTCGAGATCATCGGGCTCAACATGCTGGCCTCGGTGGACTGGGACGTCGTTCAGTTCATCCGCCAGCTCCCCTGGCTCGCGCTCGAACCGCCCCCGCCCTCTGCCGGGCTCTCCATCCCGACGCTTCAGGAGGGCGGCTGGTGGCTCATGGCCGGCTTCTTCCTCACGGCGTCGGTCATTCTCTGGTGGATTCGCACCTATCGGCGCGCACGCGCCCTGAAGATGGGCACGCACGTCGCGTGGGCCTTCGCCTCGGCGATCTGGCTCTACCTCGTCCTCGGCTTCATTCGCCCCTTGCTGATGGGGAGCTGGGGGGAGGCGGTGCCCTTCGGCATCTTCCCGCACCTCGACTGGACCGCCGCCTTCTCCGTTCGCTACGGCAACCTCTTCTACAACCCCTTCCACTGCCTCTCGATCGTCTTCCTCTACGGGTCGACGCTCCTCTTCGCCATGCACGGCGCGACGGTGCTCGCGCTCGGGCACGTGGGCGGTGAGCGTGAGGTGAGCCAGGTGGTCGACCGCGGCACGGCGGCCGAGCGCGGGGCGCTCTTCTGGCGCTGGACGATGGGCTTCAACGCGACCTTCGAGTCCATCCACCGCTGGGCCTGGTGGTTCGCGGTGCTCACGCCGCTCACCGGAGGCATCGGCATCCTCCTGACCGGCACCGCCGTCGACAACTGGTATCAGTGGGCCGTCGAGCACGACTTCGCGCCGGCCTATGAGGAGTCCTACGAGGTCGTCCCCGACCCGGTCGACGACCCGGCGAACGAGGACCTGCCCGGTATGCGCGGTGAGTCCACCGCGCAGTGGGAGCCGACCCCCTACGTGCCCGCCGAGGAGCCGGAGGCGCCCGAGGATGGTGCGGACGGCGCGGCCGCGGTCGAAGGCGTCGACGCCGAGGGCGGCGAGGATGCCGCCGCGGATCCCGCGAGCGAGGGCACGAGCGGCCAGCCGGAGACCGGCGCCGCGGCCCCGGAGAGCGAGCGCCTTCCGGACGAAGCGGCGGCGGCCGAGCCCGAAGGGGCTGCGCCGGAGCCCGAACCCCCCGCGCCGTCCGAGACGGCTGCCCCGAGCGAACCCGAGGCGCCCAGCGCGATGACCCCGGAGCAACCGTGA
>pufL_Houyibacteriaceae--LLY-WYZ-15_3---k141_102864---1
ATGGGCCTACTGAGCTTCGAGCGGCGATATCGAGTCCGAGGAGGCACGCTCCTCGGGGGCGACCTATTCGATTTCTGGGTCGGGCCCTTCTACGTGGGGCTCTTCGGCGTCACGACGATCTTCTTCACGATCGTCGGCACCGCGCTGATCCTCTGGGAGGCCTCCCGGGGTGACACCTGGAACCCCTGGCTGATCAACATCCAGCCGCCTCCAATCGAGTACGGGCTCGCCTTCGCGCCCCTCGATCAGGGGGGCATCTGGCAGCTGGTCACCATCTGCGCCATCGGCGCCTTCGGATCCTGGGCGCTCCGACAGGCGGAGATCAGCCGCAAGCTCGGCATGGGCTACCACGTGCCCATCGCCTACGGCGTCGCGGTCTTCGCCTACGTCACGCTCGTGGTGATTCGCCCGGTGATGCTGGGCGCCTGGGGCCACGGCTTCCCCTACGGCATCTTCAGCCACCTCGATTGGGTGTCGAACGTCGGGTACCAGTACCTGCACTTCCACTACAACCCGGCCCACATGATCGCGGTGAGCTTCTTCTTCACCACGACGCTCGCGCTCTCCCTCCACGGCGGTTTGATCCTCTCCGCCGTGAATCCGCCGAAGGGAGAGAAGGTGAAGACCGCCGAGTACGAGGACGGGTTCTTCCGTGACCACATCGGCTACTCGATCGGCGCCCTGGGCATTCATCGACTCGGCCTCTTCCTGGCGCTGAGCGCCGGGATCTGGAGCGCGATCTGCATTCTCATCAGCGGCCCGATGTGGACCAAGGGGTGGCCCGAGTGGTGGGACTGGTGGCTCAACCTCCCCGTGTGGAGCTGA
>bchO_Houyibacteriaceae--LLY-WYZ-15_3---k141_102864---1
ATGAGCTCGGCCGTCGAAGAGCAGCGCGTCGAGCACCCGCGGGTCGAGCAGCAGCCCATCGAGCAGCAGCGCGTCGAGCACCAGCGCGTCGAGCGTTCGGGCGTGCGGTGGAACGTCGCCCGCCGCGGCGCCGGACCCACGCTCCTGGCGCTCCACGGGACCGGCAGCTCGAGCCGCTCCTTCTGCGCCCTCGCGGCCACGCTCGGTGCTCGCTTCACCGTCGTGGCGCCCGATCTACCCGGCCACGCCGGGAGCCGGATCGATCGCCGCTTCCGCCTCTCGCTCCCCTCGATCGCCGCCGCCCTCGGCGAGCTCATCGAGGCGCTCGCCGTCCAGCCGGCGCTGGTCCTCGCTCACTCCGCGGGCGCGGCGGTGGCGGCGCGCGCCATGCTCGACGGGGCTCTCCGCCCGGCGCTCTTCGTCGGGCTCGGCGCGGCCCTGACGCCCCTCGAGGGGCTCGCCCGGCTCGGCGCGCGCCCGGCGGCCGCGATGCTCGCCCGCTCGCCCATCACGCGGCGGGTGGCGCGCCGGGCTGGAGGCGCCCTCGTCGGACCGATCCTGCGCAGCGTCGGATCCACCGTCGGCCCCGAGGCCACACAGCGCTATCGGGAGCTCGCCCGCGATCCCGCCCACGTCGGGGCGGTCTTCTCGATGCTCGCCCAGTGGGATCTCGACGGGCTCCACGCGGCGCTACCACGCCTGGACGTACCGACCCTGCTCCTCGGCGGCGCCCGCGACGGCGCCACCCCGATCGCCCAGCAGCGCGCCCTCGCACGTCGCCTCCCGGCCGCGCGCGCGCACGTCGTCCTCGGCGCCGGGCACCTGCTCCACGAGGAGCGACCCGCCGAGATCGCGCGCCTCGTCGAGGCCGAGTGGAACAGATTGGACGGCGGTCGTGTCAAAAATGCTTGA
>bchD_Houyibacteriaceae--LLY-WYZ-15_3---k141_102864---1
ATGAGCGGCTGGCCCGACGTGGCGCGCGTCGCCGAGCTCCTGAGCGTCGACCCGGACGGCCTCGGAGGCGTGCGCCTGCGGGGTCGCCCGGGGCCGCACCGGCGCCGGGTGCTCGAGTGGGTGCGCGAGAGGCTGGCCCCGGAGGCGCCCTTCCGGCGCCTGCCCGCGCACGTGACCGAGGATCGGCTCCTCGGGGGCCTCGCGCTCGCGGAGACCTTGCGTTCGGGGCGGGCCGTCATGGAGCAGGGCGTGCTCGCGCGGAGCGACGGCGGCCTGCTCGTCGTGGCCATGGCCGAGCGGGCCGAGCGGGAGGTCGTGGCGCACCTCTGCGCGGCCCTCGACCGCGGCGCGATCACCGTCGAACGCGACGGCATGAGCGCCGAGGCGTCCTGCCGCGTGGGCCTCATCGCGCTCGACGAGGGCATCGACGAGGAGCACGTCGACCCGGCGCTCGCCGACCGGCTCGCCTTCGCGCTGGACCTCGACGCGCTCGATCCGCGGGGAGGGGCGGCGCCGGAACACGGACCCGAGGAGGTCGCGCGAGCCCGCGCCCGCCTCCCGCACGTGAGCCTCGGCGACGACATCATCGCGGCCCTCTCGGAGGCGGCCCAGGCCCTCGGCGTGGAGGCGCTCCGGCCGCTCCTGCTCGCGGCGAAGGCGGCCCGCGCGCACGCGGCGCTCCTCGGCCGGACCCGCGTCGAGGAGGAAGACGCCGGGATGGCGGCGCGCCTCGTCCTCGGCCCGAGGGCGACGCGAGCGCCGAGCGCCGAGCCCGAAGAGGCGGCCGAGCGCGAGGCCGAAGAGGGCGACCCCGACCCGGGAGGCGCCGGCGCGGCTGCAGCCGGCGAACGGGCGGACGGCGCCGACGAGGCCCCGCCGGGCGAGGTCCCGCTCGGCGATCTCGTCTTGGCGGCGGCCGAGAGCGGCATCCCGGCGGGGCTGCTCGACGCCCTCGACGTCGGGACCACCCGGCGGGCCGGCGCGACCGGTCGGAGCGGGGCGACGCGCATCGGCCCGAGCGGCGGCCGCCCGGCGGGGACGCGCGCCGCGCCGCCCACCCGAGGCCAGCGCCTGAACGTCGTCGAGACCCTCCGCGCCGCCGCGCCCTGGCAGCGGCTCCGCGGGGGCGGCTTCGGCGCGGGCGTGCGCGTCCGGCCGGAGGACTTCCGTGTCACCCGTCACCGGCAGCCGATCGAGAGCTGCGTGATCTTCGCCGTCGACGCGTCCGGCTCCGCCGCGCTTCGACGCCTGGCCGAGGCGAAGGGCGCCGTCGAGCGCGTGCTCGGCGACTGCTACGTGCGGCGCGACCACGTCGCCCTCGTCGCGTTCCGCCAGGACGGCGCCGAGCTGCTCCTGCCCCCGACGCGCTCCCTCGCCCGCGTGCGTCGCAGCCTGGCTGCCCTCGCCGGCGGCGGCGCGACCCCCCTCGCCGCGGGGATCGACGCCGCCCATCGGCTCGCCCTCGACGCCCGCGGGCGCGGCCGCGAGCCCATCGTGGTCGTCATGACCGACGGGCGGGCGAACGTGACCCGGGACGGCCGCCGGGACCCCGCGGTCGCCACCACGGACGCCCTCGAGAGCGCGCGCGGGCTCCAGCGAGCCGCCGTGCCGACCCTCTTCCTCGACACGGCCCCACGCCCCCGGCGCCGTGCCCGCGAGCTCGCCGAGGCCATGGACGCCCGCTACCTGCCGCTGCCCTACCTCGACGCGGCGGGGATCTCACGCCACGTCCAAGCGCTCGCCCGCGAGGGAGCCCGATGA
>bchI_Houyibacteriaceae--LLY-WYZ-15_3---k141_102864---1
ATGACGCCCTATCCCTTCACCGCCATCGTCGCGCAGGACGAGCTCAAGCTCGCCCTGCAGATCGCCACCGTCGACCGCAGCATCGGCGGGGTCCTCGCCTTCGGCGACCGCGGCACCGGCAAGTCGACCACCATCCGCGCGCTCGCCCGGCTCCTGCCGCCGATGCGCGTCGTCGCCAGCTGCCCGTACCACTGTGATCCGGCCGACGCGCGCGCTCGCTGTCCGCACTGTGCCGAAGCCGCAGGGGAGCGGGAGGCGATCGAGACGCCCGTGCCGGTCGTGGACCTGCCCCTCGGCGCCACCGAGGATCGCGTCGTCGGCGCGCTCGATCTCGAGGCGGCCCTCACGCGCGGGGAGCGCCGCTTCTCACCGGGCCTGCTCGCCGCGGCGCATCGAGGCTTCCTCTACATCGACGAGGTCAACCTCCTCCCCGATCACCTCGTGGATCTGCTGCTCGACGTCGCGGCCTCGGGCGAGAACGTGGTCGAGCGCGAGGGCCTGAGCGTGCGCCACCCCGCGCGCTTCGTGCTGATCGGCAGCGGAAACCCGGAGGAGGGCGAGCTGCGCCCCCAGCTGCTCGATCGCTTCGGCCTCTCGCTCGAGGTCCGCACGCCGGACGAGGTCGCGACGCGCGTCGAGGTCGTCAAGCGGCGCATGCGCTACGATCAGGACCCGGAGGCCTTCGCGGCCGCCTGGGCGGAGGACGAGGCGGCCCTCATCGTTCGCCTCCGGGACGCGCGGGCGCGCTTGCCCGAGGTGGCCGTCAGCGACGCCGTGATCGAGCGCGCGAGCCGGCTCTGCCAGGCGCTCGGCACCGACGGGCTCCGGGGGGAGCTGACCTTGATCCGCGCCGCGCGCGCGGCCGCCAGCCTCGACGCGCAGCGGGAGGTCGCCGACGTGCACCTCGCCCAGGTCGCCCCCCTCGCGCTCCGCCACCGGCTGCGACGCGCCCCCCTGGACGACGTCGGCTCGGGCGCGCGCGTGCAGAAGGCCGTCGAGGACGTGCTCGGGGGCTGA
GC_plot
)# Case 2: Using eggNOG result with Full pipeline (Find Cluster + Extract FASTA + Plot Cluster)
library(gclink)
data(eggnog_df)
data(seq_data)
data(KO_group)
= c("K02291","K09844","K20611","K13789",
KOs "K09846","K08926","K08927","K08928",
"K08929","K13991","K04035","K04039",
"K11337","K03404","K11336","K04040",
"K03403","K03405","K04037","K03428",
"K04038","K06049","K10960","K11333",
"K11334","K11335","K08226","K08226",
"K09773")
= paste0("ko:", KOs)
rename_KOs $qaccver = eggnog_df$`#query`
eggnog_df$saccver = eggnog_df$KEGG_ko
eggnog_df$evalue = eggnog_df$evalue
eggnog_df$bitscore = eggnog_df$score
eggnog_df$gene = eggnog_df$KEGG_ko
eggnog_df= gclink(in_blastp_df = eggnog_df,
gc_list_2 in_seq_data = seq_data,
in_gene_list = rename_KOs,
in_GC_group = KO_group,
AllGeneNum = 50,
MinConSeq = 25,
apply_evalue_filter = FALSE,
min_evalue = 1,
apply_score_filter = TRUE,
min_score = 10,
orf_before_first = 1,
orf_after_last = 1,
levels_gene_group = c('bch','puh','puf','crt',
'acsF','assembly','hypothetical ORF'),
color_theme = c('#3BAA51','#6495ED','#DD2421','#EF9320',
'#F8EB00','#FF0683','grey'))
= gc_list_2[["GC_meta"]]
gc_meta_2 = gc_list_2[["GC_seq"]]
gc_seq_2 = gc_list_2[["GC_plot"]]
gc_plot_2 head(gc_meta_2) # Cluster metadata
head(gc_seq_2) # FASTA sequences
print(gc_plot_2) # Visualization
eggnog_df
))#query | seed_ortholog | evalue | score | eggNOG_OGs | max_annot_lvl | COG_category | Description | Preferred_name | GOs | EC | KEGG_ko | KEGG_Pathway | KEGG_Module | KEGG_Reaction | KEGG_rclass | BRITE | KEGG_TC | CAZy | BiGG_Reaction | PFAMs |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Kuafuiibacteriaceae–GCA_016703535.1—JADJBV010000001.1_1 | 439375.Oant_2732 | 1.57E-45 | 162 | COG3293@1|root,COG3293@2|Bacteria,1PVIT@1224|Proteobacteria,2TURP@28211|Alphaproteobacteria,1J3RT@118882|Brucellaceae | 28211|Alphaproteobacteria | L | Transposase DDE domain | - | - | - | ko:K07492 | - | - | - | - | ko00000 | - | - | - | DDE_Tnp_1,DDE_Tnp_1_2,DUF4096 |
Kuafuiibacteriaceae–GCA_016703535.1—JADJBV010000001.1_2 | 1173264.KI913949_gene2450 | 3.58E-17 | 83.6 | COG3335@1|root,COG3415@1|root,COG3335@2|Bacteria,COG3415@2|Bacteria,1G39S@1117|Cyanobacteria,1HCKE@1150|Oscillatoriales | 1117|Cyanobacteria | L | COGs COG3415 Transposase and inactivated derivatives | - | - | - | ko:K07494 | - | - | - | - | ko00000 | - | - | - | DDE_3,HTH_32,HTH_Tnp_IS630 |
Kuafuiibacteriaceae–GCA_016703535.1—JADJBV010000001.1_3 | 794903.OPIT5_03400 | 3.03E-30 | 114 | COG3335@1|root,COG3335@2|Bacteria | 2|Bacteria | L | DDE superfamily endonuclease | - | - | - | ko:K07494 | - | - | - | - | ko00000 | - | - | - | DDE_3,HTH_Tnp_IS630 |
Kuafuiibacteriaceae–GCA_016703535.1—JADJBV010000001.1_5 | 502025.Hoch_2790 | 2.78E-50 | 191 | 2AY84@1|root,31QA9@2|Bacteria,1QMYF@1224|Proteobacteria,4374U@68525|delta/epsilon subdivisions,2X20E@28221|Deltaproteobacteria,2YWTZ@29|Myxococcales | 28221|Deltaproteobacteria | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
Kuafuiibacteriaceae–GCA_016703535.1—JADJBV010000001.1_11 | 105420.BBPO01000003_gene1121 | 2.00E-11 | 72.8 | COG2887@1|root,COG2887@2|Bacteria,2GJC5@201174|Actinobacteria,2NGJC@228398|Streptacidiphilus | 201174|Actinobacteria | L | Protein of unknown function (DUF2800) | recB | - | - | ko:K07465 | - | - | - | - | ko00000 | - | - | - | PDDEXK_1 |
Kuafuiibacteriaceae–GCA_016703535.1—JADJBV010000001.1_12 | 1122915.AUGY01000071_gene4398 | 2.13E-37 | 152 | COG1201@1|root,COG1201@2|Bacteria,1UHYQ@1239|Firmicutes,4ISB0@91061|Bacilli,277Q5@186822|Paenibacillaceae | 91061|Bacilli | L | helicase superfamily c-terminal domain | - | - | - | - | - | - | - | - | - | - | - | - | DUF1998,Helicase_C |
seq_data
))Same with Case 1
KO_group
))gene | gene_group | gene_label |
---|---|---|
ko:K04035 | acsF | acsF |
ko:K08226 | assembly | bch2 |
ko:K04039 | bch | B |
ko:K11337 | bch | C |
ko:K03404 | bch | D |
ko:K11336 | bch | F |
ko:K04035 ko:K08226 ko:K04039 ko:K11337 ko:K03404 ko:K11336
GC_meta
)Similar with Case 1 #### 2.2 Gene cluster sequence
(GC_seq
) Similar with Case 1 #### 2.3 Gene cluster plot
(GC_plot
)
Full function reference:
::gclink ?gclink
If you use gclink
in your research, please cite:
Li, L., Huang, D., Hu, Y., Rudling, N. M., Canniffe, D. P., Wang, F., & Wang, Y. “Globally distributed Myxococcota with photosynthesis gene clusters illuminate the origin and evolution of a potentially chimeric lifestyle.” Nature Communications (2023), 14, 6450. https://doi.org/10.1038/s41467-023-42193-7
GPL-3 © Liuyang Li