Spaces:
Running
Running
| """ | |
| File: CodonPostProcessing.py | |
| --------------------------- | |
| Post-processing utilities for codon optimization using DNAChisel. | |
| This module provides sequence polishing capabilities to fix restriction sites, | |
| homopolymers, and other constraints while preserving CAI and GC content. | |
| """ | |
| import warnings | |
| import numpy as np | |
| try: | |
| from dnachisel import ( | |
| DnaOptimizationProblem, | |
| AvoidPattern, | |
| EnforceGCContent, | |
| EnforceTranslation, | |
| CodonOptimize, | |
| ) | |
| DNACHISEL_AVAILABLE = True | |
| except ImportError: | |
| DNACHISEL_AVAILABLE = False | |
| # This warning will be shown when the module is first imported. | |
| warnings.warn( | |
| "DNAChisel is not installed. Post-processing features will be disabled." | |
| ) | |
| def polish_sequence_with_dnachisel( | |
| dna_sequence: str, | |
| protein_sequence: str, | |
| gc_bounds: tuple = (45.0, 55.0), | |
| cai_species: str = "e_coli", | |
| avoid_homopolymers_length: int = 6, | |
| enzymes_to_avoid: list = None | |
| ): | |
| """ | |
| Polishes a DNA sequence using DNAChisel to meet lab synthesis constraints. | |
| """ | |
| if not DNACHISEL_AVAILABLE: | |
| warnings.warn("DNAChisel not available, skipping post-processing.") | |
| return dna_sequence | |
| if enzymes_to_avoid is None: | |
| # Common cloning enzymes | |
| enzymes_to_avoid = ["EcoRI", "XbaI", "SpeI", "PstI", "NotI"] | |
| try: | |
| # Start with the basic, essential constraints | |
| constraints = [ | |
| EnforceTranslation(translation=protein_sequence), | |
| EnforceGCContent(mini=gc_bounds[0] / 100.0, maxi=gc_bounds[1] / 100.0), | |
| ] | |
| # Add enzyme avoidance constraints safely | |
| for enzyme in enzymes_to_avoid: | |
| try: | |
| # This is the modern way to avoid enzyme sites | |
| constraints.append(AvoidPattern.from_enzyme_name(enzyme)) | |
| except Exception: | |
| warnings.warn(f"Could not find enzyme '{enzyme}' in DNAChisel library.") | |
| # Add homopolymer avoidance constraints | |
| for base in "ATGC": | |
| constraints.append(AvoidPattern(base * avoid_homopolymers_length)) | |
| # Define the optimization problem | |
| problem = DnaOptimizationProblem( | |
| sequence=dna_sequence, | |
| constraints=constraints, | |
| objectives=[CodonOptimize(species=cai_species, method="match_codon_usage")] | |
| ) | |
| # Solve the problem | |
| problem.resolve_constraints() | |
| problem.optimize() | |
| # Return the polished sequence | |
| return problem.sequence | |
| except Exception as e: | |
| warnings.warn(f"DNAChisel post-processing failed with an error: {e}") | |
| # Return the original sequence if polishing fails | |
| return dna_sequence | |