offtargeteffect's picture
Deploy mRNA Design Studio (Docker SDK)
99f834c verified
Raw
History Blame Contribute Delete
5.95 kB
"""
Plasmid domain models.
Covers plasmid backbones and fully assembled plasmids (backbone + mRNA insert).
Assembly strategies and junction logic live in core/optimization/assembly.py;
these are pure data structures.
"""
from __future__ import annotations
import uuid
from dataclasses import dataclass, field
from typing import Any, Dict, List, Literal, Optional, Tuple
from core.models.sequence import SequenceAnnotation, mRNASequence
@dataclass
class PlasmidFeature:
"""A named functional element within a plasmid sequence."""
label: str
feature_type: Literal[
"promoter", "terminator", "ori", "resistance", "tag",
"cloning_site", "insert", "utr", "cds", "other"
]
start: int # 0-based, within linear representation
end: int
strand: Literal["+", "-", "."] = "+"
color: Optional[str] = None
metadata: Dict[str, Any] = field(default_factory=dict)
@property
def length(self) -> int:
return self.end - self.start
@dataclass
class PlasmidBackbone:
"""
A cloning vector / expression backbone.
The backbone sequence is stored as linearised DNA. For circular
representation in the UI, the sequence wraps end-to-start.
cloning_sites lists the restriction enzyme names or recombination
sites present (for QC/assembly checks).
"""
name: str
sequence: str # full circular backbone, linearised
id: str = field(default_factory=lambda: str(uuid.uuid4()))
description: Optional[str] = None
# Functional elements (promoters, resistance, ori, MCS, etc.)
features: List[PlasmidFeature] = field(default_factory=list)
# Restriction/recombination sites available for cloning
cloning_sites: List[str] = field(default_factory=list)
# Source: "local", "library", or a db_source name
source: str = "local"
raw_metadata: Dict[str, Any] = field(default_factory=dict)
@property
def length(self) -> int:
return len(self.sequence)
def to_dict(self) -> Dict[str, Any]:
return {
"id": self.id,
"name": self.name,
"description": self.description,
"sequence": self.sequence,
"features": [
{
"label": f.label,
"feature_type": f.feature_type,
"start": f.start,
"end": f.end,
"strand": f.strand,
"color": f.color,
}
for f in self.features
],
"cloning_sites": self.cloning_sites,
"source": self.source,
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "PlasmidBackbone":
features = [
PlasmidFeature(
label=f["label"],
feature_type=f.get("feature_type", "other"),
start=f["start"],
end=f["end"],
strand=f.get("strand", "+"),
color=f.get("color"),
)
for f in data.get("features", [])
]
return cls(
id=data.get("id", str(uuid.uuid4())),
name=data["name"],
description=data.get("description"),
sequence=data["sequence"],
features=features,
cloning_sites=data.get("cloning_sites", []),
source=data.get("source", "local"),
)
@dataclass
class AssemblyJunction:
"""
Records how two parts are joined in an assembly.
Stores the junction sequence added/used, and the strategy that created it.
"""
part_a_name: str
part_b_name: str
strategy: Literal["restriction", "golden_gate", "gibson", "direct"]
junction_sequence: str # the overhang / overlap / linker added
enzyme: Optional[str] = None # for restriction/GG assemblies
@dataclass
class AssembledPlasmid:
"""
A fully assembled plasmid: backbone + mRNA insert, with provenance.
full_sequence is the assembled circular sequence as a flat string.
junctions records every join point for audit / re-assembly.
In QC mode this is populated from the unmodified parts; in Make mode
the parts are modified and junctions record what was added.
"""
name: str
backbone: PlasmidBackbone
insert: mRNASequence
assembly_strategy: Literal["restriction", "golden_gate", "gibson"]
assembly_mode: Literal["qc", "make"]
id: str = field(default_factory=lambda: str(uuid.uuid4()))
full_sequence: Optional[str] = None # None until assembly is run
features: List[PlasmidFeature] = field(default_factory=list)
junctions: List[AssemblyJunction] = field(default_factory=list)
qc_issues: List[str] = field(default_factory=list) # validation warnings
notes: Optional[str] = None
@property
def is_assembled(self) -> bool:
return self.full_sequence is not None
@property
def length(self) -> Optional[int]:
return len(self.full_sequence) if self.full_sequence else None
def to_genbank_annotations(self) -> List[Tuple[str, str, int, int, str]]:
"""
Return (label, feature_type, start, end, strand) tuples suitable
for writing a GenBank file via BioPython.
"""
return [
(f.label, f.feature_type, f.start, f.end, f.strand)
for f in self.features
]
def to_dict(self) -> Dict[str, Any]:
return {
"id": self.id,
"name": self.name,
"backbone_id": self.backbone.id,
"backbone_name": self.backbone.name,
"insert_id": self.insert.id,
"insert_name": self.insert.name,
"assembly_strategy": self.assembly_strategy,
"assembly_mode": self.assembly_mode,
"full_sequence": self.full_sequence,
"qc_issues": self.qc_issues,
"notes": self.notes,
}