Spaces:
Sleeping
Sleeping
| """ | |
| Plasmid domain models. | |
| Covers plasmid backbones and fully assembled plasmids (backbone + mRNA insert). | |
| Assembly strategies and junction logic live in core/optimization/assembly.py; | |
| these are pure data structures. | |
| """ | |
| from __future__ import annotations | |
| import uuid | |
| from dataclasses import dataclass, field | |
| from typing import Any, Dict, List, Literal, Optional, Tuple | |
| from core.models.sequence import SequenceAnnotation, mRNASequence | |
| class PlasmidFeature: | |
| """A named functional element within a plasmid sequence.""" | |
| label: str | |
| feature_type: Literal[ | |
| "promoter", "terminator", "ori", "resistance", "tag", | |
| "cloning_site", "insert", "utr", "cds", "other" | |
| ] | |
| start: int # 0-based, within linear representation | |
| end: int | |
| strand: Literal["+", "-", "."] = "+" | |
| color: Optional[str] = None | |
| metadata: Dict[str, Any] = field(default_factory=dict) | |
| def length(self) -> int: | |
| return self.end - self.start | |
| class PlasmidBackbone: | |
| """ | |
| A cloning vector / expression backbone. | |
| The backbone sequence is stored as linearised DNA. For circular | |
| representation in the UI, the sequence wraps end-to-start. | |
| cloning_sites lists the restriction enzyme names or recombination | |
| sites present (for QC/assembly checks). | |
| """ | |
| name: str | |
| sequence: str # full circular backbone, linearised | |
| id: str = field(default_factory=lambda: str(uuid.uuid4())) | |
| description: Optional[str] = None | |
| # Functional elements (promoters, resistance, ori, MCS, etc.) | |
| features: List[PlasmidFeature] = field(default_factory=list) | |
| # Restriction/recombination sites available for cloning | |
| cloning_sites: List[str] = field(default_factory=list) | |
| # Source: "local", "library", or a db_source name | |
| source: str = "local" | |
| raw_metadata: Dict[str, Any] = field(default_factory=dict) | |
| def length(self) -> int: | |
| return len(self.sequence) | |
| def to_dict(self) -> Dict[str, Any]: | |
| return { | |
| "id": self.id, | |
| "name": self.name, | |
| "description": self.description, | |
| "sequence": self.sequence, | |
| "features": [ | |
| { | |
| "label": f.label, | |
| "feature_type": f.feature_type, | |
| "start": f.start, | |
| "end": f.end, | |
| "strand": f.strand, | |
| "color": f.color, | |
| } | |
| for f in self.features | |
| ], | |
| "cloning_sites": self.cloning_sites, | |
| "source": self.source, | |
| } | |
| def from_dict(cls, data: Dict[str, Any]) -> "PlasmidBackbone": | |
| features = [ | |
| PlasmidFeature( | |
| label=f["label"], | |
| feature_type=f.get("feature_type", "other"), | |
| start=f["start"], | |
| end=f["end"], | |
| strand=f.get("strand", "+"), | |
| color=f.get("color"), | |
| ) | |
| for f in data.get("features", []) | |
| ] | |
| return cls( | |
| id=data.get("id", str(uuid.uuid4())), | |
| name=data["name"], | |
| description=data.get("description"), | |
| sequence=data["sequence"], | |
| features=features, | |
| cloning_sites=data.get("cloning_sites", []), | |
| source=data.get("source", "local"), | |
| ) | |
| class AssemblyJunction: | |
| """ | |
| Records how two parts are joined in an assembly. | |
| Stores the junction sequence added/used, and the strategy that created it. | |
| """ | |
| part_a_name: str | |
| part_b_name: str | |
| strategy: Literal["restriction", "golden_gate", "gibson", "direct"] | |
| junction_sequence: str # the overhang / overlap / linker added | |
| enzyme: Optional[str] = None # for restriction/GG assemblies | |
| class AssembledPlasmid: | |
| """ | |
| A fully assembled plasmid: backbone + mRNA insert, with provenance. | |
| full_sequence is the assembled circular sequence as a flat string. | |
| junctions records every join point for audit / re-assembly. | |
| In QC mode this is populated from the unmodified parts; in Make mode | |
| the parts are modified and junctions record what was added. | |
| """ | |
| name: str | |
| backbone: PlasmidBackbone | |
| insert: mRNASequence | |
| assembly_strategy: Literal["restriction", "golden_gate", "gibson"] | |
| assembly_mode: Literal["qc", "make"] | |
| id: str = field(default_factory=lambda: str(uuid.uuid4())) | |
| full_sequence: Optional[str] = None # None until assembly is run | |
| features: List[PlasmidFeature] = field(default_factory=list) | |
| junctions: List[AssemblyJunction] = field(default_factory=list) | |
| qc_issues: List[str] = field(default_factory=list) # validation warnings | |
| notes: Optional[str] = None | |
| def is_assembled(self) -> bool: | |
| return self.full_sequence is not None | |
| def length(self) -> Optional[int]: | |
| return len(self.full_sequence) if self.full_sequence else None | |
| def to_genbank_annotations(self) -> List[Tuple[str, str, int, int, str]]: | |
| """ | |
| Return (label, feature_type, start, end, strand) tuples suitable | |
| for writing a GenBank file via BioPython. | |
| """ | |
| return [ | |
| (f.label, f.feature_type, f.start, f.end, f.strand) | |
| for f in self.features | |
| ] | |
| def to_dict(self) -> Dict[str, Any]: | |
| return { | |
| "id": self.id, | |
| "name": self.name, | |
| "backbone_id": self.backbone.id, | |
| "backbone_name": self.backbone.name, | |
| "insert_id": self.insert.id, | |
| "insert_name": self.insert.name, | |
| "assembly_strategy": self.assembly_strategy, | |
| "assembly_mode": self.assembly_mode, | |
| "full_sequence": self.full_sequence, | |
| "qc_issues": self.qc_issues, | |
| "notes": self.notes, | |
| } | |