| """ |
| Codon Optimization — optimize CDS codon usage for target organism. |
| |
| Demo-level implementation that replaces rare codons with frequent ones |
| based on the organism's codon usage table. |
| """ |
| from __future__ import annotations |
|
|
| from dataclasses import dataclass, field |
| from typing import Any, Dict, List, Optional |
|
|
| from core.analysis.cai import CODON_TABLES, calculate_cai |
|
|
|
|
| |
| CODON_TABLE = { |
| "TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L", |
| "CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L", |
| "ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M", |
| "GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V", |
| "TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S", |
| "CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P", |
| "ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T", |
| "GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A", |
| "TAT": "Y", "TAC": "Y", "TAA": "*", "TAG": "*", |
| "CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q", |
| "AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K", |
| "GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E", |
| "TGT": "C", "TGC": "C", "TGA": "*", "TGG": "W", |
| "CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R", |
| "AGT": "S", "AGC": "S", "AGA": "R", "AGG": "R", |
| "GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G", |
| } |
|
|
| AA_TO_CODONS: Dict[str, List[str]] = {} |
| for codon, aa in CODON_TABLE.items(): |
| AA_TO_CODONS.setdefault(aa, []).append(codon) |
|
|
|
|
| @dataclass |
| class OptimizationResult: |
| """Result of codon optimization.""" |
| original_cds: str |
| optimized_cds: str |
| original_cai: float |
| optimized_cai: float |
| organism: str |
| codons_changed: int |
| total_codons: int |
| changes: List[str] = field(default_factory=list) |
|
|
|
|
| def optimize_codons( |
| cds: str, |
| organism: str = "human", |
| min_cai_target: float = 0.8, |
| strategy: str = "match_host", |
| ) -> OptimizationResult: |
| """ |
| Optimize codon usage of a CDS for the target organism. |
| |
| Parameters |
| ---------- |
| cds : str |
| Coding DNA sequence. |
| organism : str |
| Target organism key. |
| min_cai_target : float |
| Target minimum CAI. |
| strategy : str |
| "match_host" — replace rare with frequent. |
| "harmonize" — preserve relative usage. |
| "balance" — avoid most common to prevent tRNA depletion. |
| |
| Returns |
| ------- |
| OptimizationResult |
| """ |
| seq = cds.upper().replace("U", "T") |
| organism_key = organism.lower().replace(" ", "").replace(".", "") |
|
|
| |
| org_map = { |
| "human": "human", |
| "mouse": "human", |
| "ecoli": "ecoli", |
| "cho": "human", |
| "yeast": "human", |
| "zebrafish": "human", |
| } |
| table_key = org_map.get(organism_key, "human") |
| table = CODON_TABLES.get(table_key, CODON_TABLES["human"]) |
|
|
| |
| try: |
| original_cai = calculate_cai(seq, table_key) |
| except Exception: |
| original_cai = 0.0 |
|
|
| |
| codons = [seq[i:i+3] for i in range(0, len(seq) - len(seq) % 3, 3)] |
| optimized = list(codons) |
| changes = [] |
| codons_changed = 0 |
|
|
| stop_codons = {"TAA", "TAG", "TGA"} |
|
|
| for i, codon in enumerate(codons): |
| aa = CODON_TABLE.get(codon, "?") |
| if aa == "?" or aa == "*": |
| continue |
|
|
| w = table.get(codon, 0.5) |
| if w >= 0.8: |
| continue |
|
|
| |
| alternatives = [(c, table.get(c, 0.0)) for c in AA_TO_CODONS.get(aa, []) if c not in stop_codons] |
| if not alternatives: |
| continue |
|
|
| if strategy == "match_host": |
| |
| best = max(alternatives, key=lambda x: x[1]) |
| elif strategy == "balance": |
| |
| sorted_alts = sorted(alternatives, key=lambda x: x[1], reverse=True) |
| best = sorted_alts[min(1, len(sorted_alts) - 1)] |
| else: |
| |
| best = max(alternatives, key=lambda x: x[1]) |
|
|
| if best[0] != codon and best[1] > w: |
| optimized[i] = best[0] |
| changes.append(f"Pos {i + 1}: {codon} → {best[0]} ({aa}, {w:.2f} → {best[1]:.2f})") |
| codons_changed += 1 |
|
|
| optimized_seq = "".join(optimized) |
|
|
| |
| try: |
| optimized_cai = calculate_cai(optimized_seq, table_key) |
| except Exception: |
| optimized_cai = 0.0 |
|
|
| return OptimizationResult( |
| original_cds=cds, |
| optimized_cds=optimized_seq, |
| original_cai=original_cai, |
| optimized_cai=optimized_cai, |
| organism=organism, |
| codons_changed=codons_changed, |
| total_codons=len(codons), |
| changes=changes, |
| ) |
|
|