Spaces:
Sleeping
Sleeping
| """ | |
| Codon usage tables for common expression hosts. | |
| Data derived from Kazusa Codon Usage Database and CoCoPUTs. | |
| Frequencies are expressed as fractions (0-1) within each amino acid group. | |
| """ | |
| # Standard genetic code | |
| CODON_TO_AA = { | |
| 'TTT': 'F', 'TTC': 'F', | |
| 'TTA': 'L', 'TTG': 'L', 'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', | |
| 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', | |
| 'ATG': 'M', | |
| 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', | |
| 'TCT': 'S', 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'AGT': 'S', 'AGC': 'S', | |
| 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', | |
| 'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', | |
| 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', | |
| 'TAT': 'Y', 'TAC': 'Y', | |
| 'TAA': '*', 'TAG': '*', 'TGA': '*', | |
| 'CAT': 'H', 'CAC': 'H', | |
| 'CAA': 'Q', 'CAG': 'Q', | |
| 'AAT': 'N', 'AAC': 'N', | |
| 'AAA': 'K', 'AAG': 'K', | |
| 'GAT': 'D', 'GAC': 'D', | |
| 'GAA': 'E', 'GAG': 'E', | |
| 'TGT': 'C', 'TGC': 'C', | |
| 'TGG': 'W', | |
| 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'AGA': 'R', 'AGG': 'R', | |
| 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', | |
| } | |
| AA_TO_CODONS = {} | |
| for codon, aa in CODON_TO_AA.items(): | |
| if aa not in AA_TO_CODONS: | |
| AA_TO_CODONS[aa] = [] | |
| AA_TO_CODONS[aa].append(codon) | |
| # Codon usage frequencies for different organisms | |
| # Values represent relative frequency within synonymous codon family (sum to 1.0) | |
| CODON_USAGE = { | |
| "Escherichia coli K12": { | |
| 'TTT': 0.51, 'TTC': 0.49, # Phe | |
| 'TTA': 0.11, 'TTG': 0.11, 'CTT': 0.10, 'CTC': 0.10, 'CTA': 0.03, 'CTG': 0.55, # Leu | |
| 'ATT': 0.47, 'ATC': 0.46, 'ATA': 0.07, # Ile | |
| 'ATG': 1.00, # Met | |
| 'GTT': 0.28, 'GTC': 0.20, 'GTA': 0.17, 'GTG': 0.35, # Val | |
| 'TCT': 0.17, 'TCC': 0.15, 'TCA': 0.12, 'TCG': 0.13, 'AGT': 0.13, 'AGC': 0.30, # Ser | |
| 'CCT': 0.16, 'CCC': 0.10, 'CCA': 0.20, 'CCG': 0.54, # Pro | |
| 'ACT': 0.19, 'ACC': 0.40, 'ACA': 0.13, 'ACG': 0.28, # Thr | |
| 'GCT': 0.18, 'GCC': 0.26, 'GCA': 0.23, 'GCG': 0.33, # Ala | |
| 'TAT': 0.53, 'TAC': 0.47, # Tyr | |
| 'TAA': 0.61, 'TAG': 0.09, 'TGA': 0.30, # Stop | |
| 'CAT': 0.52, 'CAC': 0.48, # His | |
| 'CAA': 0.31, 'CAG': 0.69, # Gln | |
| 'AAT': 0.39, 'AAC': 0.61, # Asn | |
| 'AAA': 0.74, 'AAG': 0.26, # Lys | |
| 'GAT': 0.59, 'GAC': 0.41, # Asp | |
| 'GAA': 0.68, 'GAG': 0.32, # Glu | |
| 'TGT': 0.43, 'TGC': 0.57, # Cys | |
| 'TGG': 1.00, # Trp | |
| 'CGT': 0.42, 'CGC': 0.37, 'CGA': 0.05, 'CGG': 0.08, 'AGA': 0.04, 'AGG': 0.04, # Arg | |
| 'GGT': 0.38, 'GGC': 0.40, 'GGA': 0.09, 'GGG': 0.13, # Gly | |
| }, | |
| "Homo sapiens": { | |
| 'TTT': 0.45, 'TTC': 0.55, # Phe | |
| 'TTA': 0.07, 'TTG': 0.13, 'CTT': 0.13, 'CTC': 0.20, 'CTA': 0.07, 'CTG': 0.40, # Leu | |
| 'ATT': 0.36, 'ATC': 0.48, 'ATA': 0.16, # Ile | |
| 'ATG': 1.00, # Met | |
| 'GTT': 0.18, 'GTC': 0.24, 'GTA': 0.11, 'GTG': 0.47, # Val | |
| 'TCT': 0.18, 'TCC': 0.22, 'TCA': 0.15, 'TCG': 0.06, 'AGT': 0.15, 'AGC': 0.24, # Ser | |
| 'CCT': 0.28, 'CCC': 0.33, 'CCA': 0.27, 'CCG': 0.12, # Pro | |
| 'ACT': 0.24, 'ACC': 0.36, 'ACA': 0.28, 'ACG': 0.12, # Thr | |
| 'GCT': 0.26, 'GCC': 0.40, 'GCA': 0.23, 'GCG': 0.11, # Ala | |
| 'TAT': 0.43, 'TAC': 0.57, # Tyr | |
| 'TAA': 0.28, 'TAG': 0.20, 'TGA': 0.52, # Stop | |
| 'CAT': 0.41, 'CAC': 0.59, # His | |
| 'CAA': 0.25, 'CAG': 0.75, # Gln | |
| 'AAT': 0.46, 'AAC': 0.54, # Asn | |
| 'AAA': 0.42, 'AAG': 0.58, # Lys | |
| 'GAT': 0.46, 'GAC': 0.54, # Asp | |
| 'GAA': 0.42, 'GAG': 0.58, # Glu | |
| 'TGT': 0.45, 'TGC': 0.55, # Cys | |
| 'TGG': 1.00, # Trp | |
| 'CGT': 0.08, 'CGC': 0.19, 'CGA': 0.11, 'CGG': 0.21, 'AGA': 0.20, 'AGG': 0.21, # Arg | |
| 'GGT': 0.16, 'GGC': 0.34, 'GGA': 0.25, 'GGG': 0.25, # Gly | |
| }, | |
| "CHO (Chinese Hamster Ovary)": { | |
| 'TTT': 0.44, 'TTC': 0.56, # Phe | |
| 'TTA': 0.06, 'TTG': 0.12, 'CTT': 0.12, 'CTC': 0.21, 'CTA': 0.07, 'CTG': 0.42, # Leu | |
| 'ATT': 0.34, 'ATC': 0.51, 'ATA': 0.15, # Ile | |
| 'ATG': 1.00, # Met | |
| 'GTT': 0.17, 'GTC': 0.25, 'GTA': 0.10, 'GTG': 0.48, # Val | |
| 'TCT': 0.19, 'TCC': 0.24, 'TCA': 0.14, 'TCG': 0.05, 'AGT': 0.14, 'AGC': 0.24, # Ser | |
| 'CCT': 0.29, 'CCC': 0.34, 'CCA': 0.26, 'CCG': 0.11, # Pro | |
| 'ACT': 0.24, 'ACC': 0.38, 'ACA': 0.27, 'ACG': 0.11, # Thr | |
| 'GCT': 0.27, 'GCC': 0.42, 'GCA': 0.21, 'GCG': 0.10, # Ala | |
| 'TAT': 0.42, 'TAC': 0.58, # Tyr | |
| 'TAA': 0.26, 'TAG': 0.22, 'TGA': 0.52, # Stop | |
| 'CAT': 0.40, 'CAC': 0.60, # His | |
| 'CAA': 0.24, 'CAG': 0.76, # Gln | |
| 'AAT': 0.44, 'AAC': 0.56, # Asn | |
| 'AAA': 0.40, 'AAG': 0.60, # Lys | |
| 'GAT': 0.44, 'GAC': 0.56, # Asp | |
| 'GAA': 0.40, 'GAG': 0.60, # Glu | |
| 'TGT': 0.44, 'TGC': 0.56, # Cys | |
| 'TGG': 1.00, # Trp | |
| 'CGT': 0.07, 'CGC': 0.20, 'CGA': 0.10, 'CGG': 0.22, 'AGA': 0.20, 'AGG': 0.21, # Arg | |
| 'GGT': 0.15, 'GGC': 0.36, 'GGA': 0.24, 'GGG': 0.25, # Gly | |
| }, | |
| "Saccharomyces cerevisiae": { | |
| 'TTT': 0.59, 'TTC': 0.41, # Phe | |
| 'TTA': 0.28, 'TTG': 0.29, 'CTT': 0.13, 'CTC': 0.06, 'CTA': 0.14, 'CTG': 0.10, # Leu | |
| 'ATT': 0.46, 'ATC': 0.26, 'ATA': 0.28, # Ile | |
| 'ATG': 1.00, # Met | |
| 'GTT': 0.39, 'GTC': 0.21, 'GTA': 0.21, 'GTG': 0.19, # Val | |
| 'TCT': 0.26, 'TCC': 0.16, 'TCA': 0.21, 'TCG': 0.10, 'AGT': 0.16, 'AGC': 0.11, # Ser | |
| 'CCT': 0.31, 'CCC': 0.15, 'CCA': 0.42, 'CCG': 0.12, # Pro | |
| 'ACT': 0.35, 'ACC': 0.22, 'ACA': 0.30, 'ACG': 0.13, # Thr | |
| 'GCT': 0.38, 'GCC': 0.22, 'GCA': 0.29, 'GCG': 0.11, # Ala | |
| 'TAT': 0.56, 'TAC': 0.44, # Tyr | |
| 'TAA': 0.47, 'TAG': 0.23, 'TGA': 0.30, # Stop | |
| 'CAT': 0.64, 'CAC': 0.36, # His | |
| 'CAA': 0.69, 'CAG': 0.31, # Gln | |
| 'AAT': 0.59, 'AAC': 0.41, # Asn | |
| 'AAA': 0.58, 'AAG': 0.42, # Lys | |
| 'GAT': 0.65, 'GAC': 0.35, # Asp | |
| 'GAA': 0.70, 'GAG': 0.30, # Glu | |
| 'TGT': 0.63, 'TGC': 0.37, # Cys | |
| 'TGG': 1.00, # Trp | |
| 'CGT': 0.14, 'CGC': 0.06, 'CGA': 0.07, 'CGG': 0.04, 'AGA': 0.48, 'AGG': 0.21, # Arg | |
| 'GGT': 0.47, 'GGC': 0.19, 'GGA': 0.22, 'GGG': 0.12, # Gly | |
| }, | |
| "Mus musculus": { | |
| 'TTT': 0.44, 'TTC': 0.56, # Phe | |
| 'TTA': 0.06, 'TTG': 0.13, 'CTT': 0.13, 'CTC': 0.20, 'CTA': 0.08, 'CTG': 0.40, # Leu | |
| 'ATT': 0.35, 'ATC': 0.50, 'ATA': 0.15, # Ile | |
| 'ATG': 1.00, # Met | |
| 'GTT': 0.17, 'GTC': 0.25, 'GTA': 0.11, 'GTG': 0.47, # Val | |
| 'TCT': 0.19, 'TCC': 0.22, 'TCA': 0.14, 'TCG': 0.05, 'AGT': 0.15, 'AGC': 0.25, # Ser | |
| 'CCT': 0.30, 'CCC': 0.31, 'CCA': 0.28, 'CCG': 0.11, # Pro | |
| 'ACT': 0.25, 'ACC': 0.36, 'ACA': 0.28, 'ACG': 0.11, # Thr | |
| 'GCT': 0.28, 'GCC': 0.39, 'GCA': 0.23, 'GCG': 0.10, # Ala | |
| 'TAT': 0.43, 'TAC': 0.57, # Tyr | |
| 'TAA': 0.28, 'TAG': 0.20, 'TGA': 0.52, # Stop | |
| 'CAT': 0.40, 'CAC': 0.60, # His | |
| 'CAA': 0.25, 'CAG': 0.75, # Gln | |
| 'AAT': 0.45, 'AAC': 0.55, # Asn | |
| 'AAA': 0.41, 'AAG': 0.59, # Lys | |
| 'GAT': 0.45, 'GAC': 0.55, # Asp | |
| 'GAA': 0.41, 'GAG': 0.59, # Glu | |
| 'TGT': 0.45, 'TGC': 0.55, # Cys | |
| 'TGG': 1.00, # Trp | |
| 'CGT': 0.08, 'CGC': 0.18, 'CGA': 0.12, 'CGG': 0.20, 'AGA': 0.21, 'AGG': 0.21, # Arg | |
| 'GGT': 0.17, 'GGC': 0.33, 'GGA': 0.26, 'GGG': 0.24, # Gly | |
| }, | |
| "Insect (Spodoptera frugiperda)": { | |
| 'TTT': 0.52, 'TTC': 0.48, # Phe | |
| 'TTA': 0.10, 'TTG': 0.17, 'CTT': 0.14, 'CTC': 0.15, 'CTA': 0.09, 'CTG': 0.35, # Leu | |
| 'ATT': 0.43, 'ATC': 0.40, 'ATA': 0.17, # Ile | |
| 'ATG': 1.00, # Met | |
| 'GTT': 0.24, 'GTC': 0.22, 'GTA': 0.15, 'GTG': 0.39, # Val | |
| 'TCT': 0.20, 'TCC': 0.19, 'TCA': 0.17, 'TCG': 0.08, 'AGT': 0.16, 'AGC': 0.20, # Ser | |
| 'CCT': 0.27, 'CCC': 0.24, 'CCA': 0.33, 'CCG': 0.16, # Pro | |
| 'ACT': 0.27, 'ACC': 0.30, 'ACA': 0.29, 'ACG': 0.14, # Thr | |
| 'GCT': 0.29, 'GCC': 0.32, 'GCA': 0.26, 'GCG': 0.13, # Ala | |
| 'TAT': 0.50, 'TAC': 0.50, # Tyr | |
| 'TAA': 0.35, 'TAG': 0.25, 'TGA': 0.40, # Stop | |
| 'CAT': 0.48, 'CAC': 0.52, # His | |
| 'CAA': 0.38, 'CAG': 0.62, # Gln | |
| 'AAT': 0.52, 'AAC': 0.48, # Asn | |
| 'AAA': 0.50, 'AAG': 0.50, # Lys | |
| 'GAT': 0.52, 'GAC': 0.48, # Asp | |
| 'GAA': 0.52, 'GAG': 0.48, # Glu | |
| 'TGT': 0.50, 'TGC': 0.50, # Cys | |
| 'TGG': 1.00, # Trp | |
| 'CGT': 0.12, 'CGC': 0.16, 'CGA': 0.13, 'CGG': 0.15, 'AGA': 0.24, 'AGG': 0.20, # Arg | |
| 'GGT': 0.22, 'GGC': 0.28, 'GGA': 0.28, 'GGG': 0.22, # Gly | |
| }, | |
| "Pichia pastoris": { | |
| 'TTT': 0.55, 'TTC': 0.45, # Phe | |
| 'TTA': 0.15, 'TTG': 0.30, 'CTT': 0.20, 'CTC': 0.10, 'CTA': 0.10, 'CTG': 0.15, # Leu | |
| 'ATT': 0.45, 'ATC': 0.35, 'ATA': 0.20, # Ile | |
| 'ATG': 1.00, # Met | |
| 'GTT': 0.35, 'GTC': 0.25, 'GTA': 0.18, 'GTG': 0.22, # Val | |
| 'TCT': 0.28, 'TCC': 0.18, 'TCA': 0.20, 'TCG': 0.08, 'AGT': 0.15, 'AGC': 0.11, # Ser | |
| 'CCT': 0.30, 'CCC': 0.15, 'CCA': 0.40, 'CCG': 0.15, # Pro | |
| 'ACT': 0.35, 'ACC': 0.25, 'ACA': 0.28, 'ACG': 0.12, # Thr | |
| 'GCT': 0.38, 'GCC': 0.22, 'GCA': 0.28, 'GCG': 0.12, # Ala | |
| 'TAT': 0.55, 'TAC': 0.45, # Tyr | |
| 'TAA': 0.40, 'TAG': 0.25, 'TGA': 0.35, # Stop | |
| 'CAT': 0.58, 'CAC': 0.42, # His | |
| 'CAA': 0.60, 'CAG': 0.40, # Gln | |
| 'AAT': 0.55, 'AAC': 0.45, # Asn | |
| 'AAA': 0.55, 'AAG': 0.45, # Lys | |
| 'GAT': 0.60, 'GAC': 0.40, # Asp | |
| 'GAA': 0.60, 'GAG': 0.40, # Glu | |
| 'TGT': 0.58, 'TGC': 0.42, # Cys | |
| 'TGG': 1.00, # Trp | |
| 'CGT': 0.15, 'CGC': 0.08, 'CGA': 0.10, 'CGG': 0.07, 'AGA': 0.40, 'AGG': 0.20, # Arg | |
| 'GGT': 0.40, 'GGC': 0.20, 'GGA': 0.25, 'GGG': 0.15, # Gly | |
| }, | |
| "Bacillus subtilis": { | |
| 'TTT': 0.57, 'TTC': 0.43, # Phe | |
| 'TTA': 0.20, 'TTG': 0.15, 'CTT': 0.18, 'CTC': 0.12, 'CTA': 0.08, 'CTG': 0.27, # Leu | |
| 'ATT': 0.48, 'ATC': 0.38, 'ATA': 0.14, # Ile | |
| 'ATG': 1.00, # Met | |
| 'GTT': 0.30, 'GTC': 0.22, 'GTA': 0.22, 'GTG': 0.26, # Val | |
| 'TCT': 0.22, 'TCC': 0.14, 'TCA': 0.22, 'TCG': 0.10, 'AGT': 0.16, 'AGC': 0.16, # Ser | |
| 'CCT': 0.22, 'CCC': 0.10, 'CCA': 0.38, 'CCG': 0.30, # Pro | |
| 'ACT': 0.28, 'ACC': 0.22, 'ACA': 0.32, 'ACG': 0.18, # Thr | |
| 'GCT': 0.30, 'GCC': 0.18, 'GCA': 0.32, 'GCG': 0.20, # Ala | |
| 'TAT': 0.58, 'TAC': 0.42, # Tyr | |
| 'TAA': 0.55, 'TAG': 0.15, 'TGA': 0.30, # Stop | |
| 'CAT': 0.55, 'CAC': 0.45, # His | |
| 'CAA': 0.55, 'CAG': 0.45, # Gln | |
| 'AAT': 0.52, 'AAC': 0.48, # Asn | |
| 'AAA': 0.70, 'AAG': 0.30, # Lys | |
| 'GAT': 0.58, 'GAC': 0.42, # Asp | |
| 'GAA': 0.68, 'GAG': 0.32, # Glu | |
| 'TGT': 0.52, 'TGC': 0.48, # Cys | |
| 'TGG': 1.00, # Trp | |
| 'CGT': 0.25, 'CGC': 0.20, 'CGA': 0.12, 'CGG': 0.10, 'AGA': 0.18, 'AGG': 0.15, # Arg | |
| 'GGT': 0.30, 'GGC': 0.28, 'GGA': 0.25, 'GGG': 0.17, # Gly | |
| }, | |
| "Drosophila melanogaster": { | |
| 'TTT': 0.35, 'TTC': 0.65, # Phe | |
| 'TTA': 0.05, 'TTG': 0.15, 'CTT': 0.10, 'CTC': 0.18, 'CTA': 0.08, 'CTG': 0.44, # Leu | |
| 'ATT': 0.30, 'ATC': 0.55, 'ATA': 0.15, # Ile | |
| 'ATG': 1.00, # Met | |
| 'GTT': 0.18, 'GTC': 0.28, 'GTA': 0.10, 'GTG': 0.44, # Val | |
| 'TCT': 0.12, 'TCC': 0.28, 'TCA': 0.10, 'TCG': 0.15, 'AGT': 0.10, 'AGC': 0.25, # Ser | |
| 'CCT': 0.15, 'CCC': 0.38, 'CCA': 0.25, 'CCG': 0.22, # Pro | |
| 'ACT': 0.18, 'ACC': 0.45, 'ACA': 0.20, 'ACG': 0.17, # Thr | |
| 'GCT': 0.20, 'GCC': 0.48, 'GCA': 0.18, 'GCG': 0.14, # Ala | |
| 'TAT': 0.35, 'TAC': 0.65, # Tyr | |
| 'TAA': 0.30, 'TAG': 0.22, 'TGA': 0.48, # Stop | |
| 'CAT': 0.38, 'CAC': 0.62, # His | |
| 'CAA': 0.30, 'CAG': 0.70, # Gln | |
| 'AAT': 0.38, 'AAC': 0.62, # Asn | |
| 'AAA': 0.30, 'AAG': 0.70, # Lys | |
| 'GAT': 0.42, 'GAC': 0.58, # Asp | |
| 'GAA': 0.35, 'GAG': 0.65, # Glu | |
| 'TGT': 0.35, 'TGC': 0.65, # Cys | |
| 'TGG': 1.00, # Trp | |
| 'CGT': 0.12, 'CGC': 0.35, 'CGA': 0.12, 'CGG': 0.18, 'AGA': 0.10, 'AGG': 0.13, # Arg | |
| 'GGT': 0.20, 'GGC': 0.45, 'GGA': 0.22, 'GGG': 0.13, # Gly | |
| }, | |
| "Caenorhabditis elegans": { | |
| 'TTT': 0.48, 'TTC': 0.52, # Phe | |
| 'TTA': 0.10, 'TTG': 0.18, 'CTT': 0.20, 'CTC': 0.18, 'CTA': 0.08, 'CTG': 0.26, # Leu | |
| 'ATT': 0.38, 'ATC': 0.45, 'ATA': 0.17, # Ile | |
| 'ATG': 1.00, # Met | |
| 'GTT': 0.25, 'GTC': 0.28, 'GTA': 0.15, 'GTG': 0.32, # Val | |
| 'TCT': 0.20, 'TCC': 0.20, 'TCA': 0.18, 'TCG': 0.10, 'AGT': 0.15, 'AGC': 0.17, # Ser | |
| 'CCT': 0.22, 'CCC': 0.18, 'CCA': 0.40, 'CCG': 0.20, # Pro | |
| 'ACT': 0.25, 'ACC': 0.30, 'ACA': 0.30, 'ACG': 0.15, # Thr | |
| 'GCT': 0.28, 'GCC': 0.30, 'GCA': 0.28, 'GCG': 0.14, # Ala | |
| 'TAT': 0.48, 'TAC': 0.52, # Tyr | |
| 'TAA': 0.35, 'TAG': 0.25, 'TGA': 0.40, # Stop | |
| 'CAT': 0.48, 'CAC': 0.52, # His | |
| 'CAA': 0.45, 'CAG': 0.55, # Gln | |
| 'AAT': 0.48, 'AAC': 0.52, # Asn | |
| 'AAA': 0.52, 'AAG': 0.48, # Lys | |
| 'GAT': 0.52, 'GAC': 0.48, # Asp | |
| 'GAA': 0.55, 'GAG': 0.45, # Glu | |
| 'TGT': 0.48, 'TGC': 0.52, # Cys | |
| 'TGG': 1.00, # Trp | |
| 'CGT': 0.18, 'CGC': 0.15, 'CGA': 0.18, 'CGG': 0.12, 'AGA': 0.22, 'AGG': 0.15, # Arg | |
| 'GGT': 0.22, 'GGC': 0.25, 'GGA': 0.35, 'GGG': 0.18, # Gly | |
| }, | |
| } | |
| # Alias mapping for user convenience | |
| ORGANISM_ALIASES = { | |
| "E. coli": "Escherichia coli K12", | |
| "E.coli": "Escherichia coli K12", | |
| "Ecoli": "Escherichia coli K12", | |
| "Human": "Homo sapiens", | |
| "CHO": "CHO (Chinese Hamster Ovary)", | |
| "Yeast": "Saccharomyces cerevisiae", | |
| "S. cerevisiae": "Saccharomyces cerevisiae", | |
| "Mouse": "Mus musculus", | |
| "Insect": "Insect (Spodoptera frugiperda)", | |
| "Sf9": "Insect (Spodoptera frugiperda)", | |
| "Sf21": "Insect (Spodoptera frugiperda)", | |
| "Pichia": "Pichia pastoris", | |
| "P. pastoris": "Pichia pastoris", | |
| "B. subtilis": "Bacillus subtilis", | |
| "Fruit fly": "Drosophila melanogaster", | |
| "Drosophila": "Drosophila melanogaster", | |
| "C. elegans": "Caenorhabditis elegans", | |
| "Worm": "Caenorhabditis elegans", | |
| } | |
| def get_organism_list(): | |
| """Return list of available organisms.""" | |
| return list(CODON_USAGE.keys()) | |
| def get_codon_table(organism: str) -> dict: | |
| """Get codon usage table for an organism.""" | |
| # Check aliases first | |
| if organism in ORGANISM_ALIASES: | |
| organism = ORGANISM_ALIASES[organism] | |
| if organism not in CODON_USAGE: | |
| raise ValueError(f"Unknown organism: {organism}. Available: {get_organism_list()}") | |
| return CODON_USAGE[organism] | |
| def get_synonymous_codons(amino_acid: str) -> list: | |
| """Get all codons encoding a given amino acid.""" | |
| if amino_acid not in AA_TO_CODONS: | |
| raise ValueError(f"Unknown amino acid: {amino_acid}") | |
| return AA_TO_CODONS[amino_acid] | |