Samad14 commited on
Commit
3caaead
Β·
verified Β·
1 Parent(s): 50c0875

feat: add phylo router (NJ/UPGMA/ML) with PhyML binary

Browse files
Files changed (3) hide show
  1. Dockerfile +27 -0
  2. app/main.py +114 -0
  3. app/routers/phylo.py +430 -0
Dockerfile ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ RUN apt-get update && apt-get install -y --no-install-recommends \
4
+ build-essential gcc autoconf automake pkg-config wget && \
5
+ rm -rf /var/lib/apt/lists/*
6
+
7
+ # Build PhyML from source (~2 min)
8
+ RUN wget -qO /tmp/phyml.tar.gz \
9
+ https://github.com/stephaneguindon/phyml/archive/refs/tags/v3.3.20250515.tar.gz && \
10
+ tar xzf /tmp/phyml.tar.gz -C /tmp && \
11
+ cd /tmp/phyml-3.3.20250515 && \
12
+ ./autogen.sh && \
13
+ ./configure --enable-phyml && \
14
+ make -j$(nproc) && \
15
+ make install && \
16
+ cd / && \
17
+ rm -rf /tmp/phyml-3.3.20250515 /tmp/phyml.tar.gz
18
+
19
+ WORKDIR /app
20
+ COPY requirements.txt .
21
+ RUN pip install --no-cache-dir -r requirements.txt
22
+ # primer3-py is optional (requires C compiler) β€” skip silently if it fails
23
+ RUN pip install --no-cache-dir primer3-py>=2.0.3 2>/dev/null || echo "primer3-py skipped (optional)"
24
+ COPY . .
25
+
26
+ EXPOSE 7860
27
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
app/main.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from dotenv import load_dotenv
3
+ load_dotenv()
4
+
5
+ from fastapi import FastAPI, HTTPException, Request
6
+ from fastapi.middleware.cors import CORSMiddleware
7
+ from fastapi.responses import JSONResponse
8
+ from slowapi import Limiter, _rate_limit_exceeded_handler
9
+ from slowapi.util import get_remote_address
10
+ from slowapi.errors import RateLimitExceeded
11
+ from app.config import settings
12
+ from app.routers import pipelines, pipeline_v2, ai, jobs, share, profile, sequences, uniprot, alignment, structures, pathways, domains, interactions, primers, structure_analysis, phylo
13
+ from app.services.cache import init_redis
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ limiter = Limiter(key_func=get_remote_address, default_limits=["30/minute"])
18
+
19
+ app = FastAPI(title="Bio Nexus API", version="0.2.0")
20
+ app.state.limiter = limiter
21
+ app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
22
+
23
+ PROD_ORIGIN = settings.CORS_ORIGIN
24
+
25
+ app.add_middleware(
26
+ CORSMiddleware,
27
+ allow_origins=[
28
+ "http://localhost:3000",
29
+ "http://localhost:3001",
30
+ PROD_ORIGIN,
31
+ "https://bioai-platform.vercel.app",
32
+ ],
33
+ allow_credentials=True,
34
+ allow_methods=["*"],
35
+ allow_headers=["*"],
36
+ )
37
+
38
+ app.include_router(pipelines.router, prefix="/api/pipelines", tags=["pipelines"])
39
+ app.include_router(pipeline_v2.router, prefix="/api/pipeline/v2", tags=["pipeline_v2"])
40
+ app.include_router(ai.router, prefix="/api/ai", tags=["ai"])
41
+ app.include_router(jobs.router, prefix="/api/jobs", tags=["jobs"])
42
+ app.include_router(share.router, prefix="/api/share", tags=["share"])
43
+ app.include_router(profile.router, prefix="/api/profile", tags=["profile"])
44
+ app.include_router(sequences.router, prefix="/api/sequences", tags=["sequences"])
45
+ app.include_router(uniprot.router, prefix="/api/uniprot", tags=["uniprot"])
46
+ app.include_router(alignment.router, prefix="/api/alignment", tags=["alignment"])
47
+ app.include_router(structures.router, prefix="/api/structures", tags=["structures"])
48
+ app.include_router(pathways.router, prefix="/api/pathways", tags=["pathways"])
49
+ app.include_router(domains.router)
50
+ app.include_router(interactions.router)
51
+ app.include_router(primers.router)
52
+ app.include_router(structure_analysis.router)
53
+ app.include_router(phylo.router)
54
+
55
+ TERMINAL_STATUSES = {"complete", "failed"}
56
+ NON_TERMINAL_STATUSES = {
57
+ "submitted_to_ncbi", "polling_ncbi", "parsing",
58
+ "fetching_uniprot", "fetching_alphafold", "interpreting",
59
+ }
60
+
61
+
62
+ async def _fail_stuck_jobs():
63
+ try:
64
+ import httpx
65
+ from app.config import settings
66
+ headers = {
67
+ "apikey": settings.SUPABASE_SERVICE_ROLE_KEY,
68
+ "Authorization": f"Bearer {settings.SUPABASE_SERVICE_ROLE_KEY}",
69
+ "Content-Type": "application/json",
70
+ "Prefer": "return=minimal",
71
+ }
72
+ url = f"{settings.SUPABASE_URL}/rest/v1/jobs"
73
+ quoted = ",".join(f'"{s}"' for s in NON_TERMINAL_STATUSES)
74
+ select_url = f"{url}?select=id&status=in.({quoted})"
75
+ async with httpx.AsyncClient(timeout=10) as client:
76
+ resp = await client.get(select_url, headers=headers)
77
+ if resp.status_code != 200:
78
+ logger.warning(f"Startup resume: failed to query jobs ({resp.status_code})")
79
+ return
80
+ stuck = resp.json()
81
+ for job in stuck:
82
+ jid = job["id"]
83
+ logger.info(f"Startup resume: marking stuck job {jid} as failed")
84
+ await client.patch(
85
+ f"{url}?id=eq.{jid}",
86
+ headers=headers,
87
+ json={"status": "failed", "error": "Worker lost on restart β€” please re-run"},
88
+ )
89
+ if stuck:
90
+ logger.info(f"Startup resume: marked {len(stuck)} stuck job(s) as failed")
91
+ except Exception as e:
92
+ logger.warning(f"Startup resume: error: {e}")
93
+
94
+
95
+ @app.on_event("startup")
96
+ async def startup():
97
+ init_redis()
98
+ await _fail_stuck_jobs()
99
+
100
+
101
+ @app.get("/health")
102
+ async def health():
103
+ return {"status": "ok"}
104
+
105
+
106
+ @app.exception_handler(Exception)
107
+ async def global_exception_handler(request: Request, exc: Exception):
108
+ if isinstance(exc, HTTPException):
109
+ return JSONResponse(status_code=exc.status_code, content={"detail": exc.detail})
110
+ logger.exception("Unhandled exception")
111
+ return JSONResponse(
112
+ status_code=500,
113
+ content={"detail": "Internal server error"},
114
+ )
app/routers/phylo.py ADDED
@@ -0,0 +1,430 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Phylogenetic tree router β€” three methods:
3
+ nj : Neighbor-Joining via Clustal Omega guide tree (~60s)
4
+ upgma : UPGMA computed locally from Clustal alignment (adds ~0s after MSA)
5
+ ml : Maximum Likelihood via local PhyML binary (~3-5 min, includes bootstrap)
6
+
7
+ PhyML binary must be installed at build time (see Dockerfile).
8
+ Download source from: https://github.com/stephaneguindon/phyml
9
+ Compile: ./configure --enable-phyml && make && make install
10
+
11
+ Job lifecycle (in-memory, thread-safe):
12
+ queued -> msa_running -> msa_done -> tree_running -> complete | error
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import asyncio
18
+ import logging
19
+ import threading
20
+ import time
21
+ import uuid
22
+ from typing import Literal, Optional
23
+
24
+ import httpx
25
+ from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException
26
+ from pydantic import BaseModel, Field
27
+
28
+ logger = logging.getLogger(__name__)
29
+ router = APIRouter(prefix="/phylo", tags=["phylo"])
30
+
31
+ # ── EBI base URLs ──────────────────────────────────────────────────────────────
32
+ _EBI_CLUSTALO = "https://www.ebi.ac.uk/Tools/services/rest/clustalo"
33
+ _EMAIL = "bionexus@demo.com"
34
+
35
+ # ── PhyML protein models (most-used first) ────────────────────────────────────
36
+ PROTEIN_MODELS = ["LG", "WAG", "JTT", "Blosum62", "MtREV", "Dayhoff"]
37
+ DNA_MODELS = ["GTR", "HKY85", "K80", "F81", "TN93", "SYM"]
38
+
39
+ # ─── Models ───────────────────────────────────────────────────────────────────
40
+
41
+ Method = Literal["nj", "ml", "upgma"]
42
+ SeqType = Literal["protein", "dna"]
43
+ JobPhase = Literal["queued", "msa_running", "msa_done", "tree_running", "complete", "error"]
44
+
45
+
46
+ class PhyloRequest(BaseModel):
47
+ sequences: list[dict]
48
+ method: Method = "nj"
49
+ seq_type: SeqType = "protein"
50
+ # ML-only options
51
+ model: str = "LG"
52
+ bootstrap: int = Field(100, ge=0, le=1000)
53
+
54
+
55
+ class PhyloJob(BaseModel):
56
+ job_id: str
57
+ method: Method
58
+ seq_type: SeqType
59
+ model: Optional[str]
60
+ bootstrap: Optional[int]
61
+ phase: JobPhase
62
+ aln_fasta: Optional[str] = None
63
+ newick: Optional[str] = None
64
+ stats: Optional[str] = None
65
+ error: Optional[str] = None
66
+ created_at: float = 0.0
67
+ msa_done_at: Optional[float] = None
68
+ done_at: Optional[float] = None
69
+
70
+
71
+ class RunResponse(BaseModel):
72
+ job_id: str
73
+ status: str
74
+
75
+
76
+ # ─── In-memory store ──────────────────────────────────────────────────────────
77
+
78
+ _jobs: dict[str, dict] = {}
79
+ _lock = threading.Lock()
80
+
81
+
82
+ def _init(job_id: str, req: PhyloRequest) -> None:
83
+ with _lock:
84
+ _jobs[job_id] = {
85
+ "job_id": job_id,
86
+ "method": req.method,
87
+ "seq_type": req.seq_type,
88
+ "model": req.model,
89
+ "bootstrap": req.bootstrap,
90
+ "phase": "queued",
91
+ "aln_fasta": None,
92
+ "newick": None,
93
+ "stats": None,
94
+ "error": None,
95
+ "created_at": time.time(),
96
+ "msa_done_at": None,
97
+ "done_at": None,
98
+ "_req": req.model_dump(),
99
+ }
100
+
101
+
102
+ def _patch(job_id: str, **kw) -> None:
103
+ with _lock:
104
+ if job_id in _jobs:
105
+ _jobs[job_id].update(kw)
106
+
107
+
108
+ def _read(job_id: str) -> dict | None:
109
+ with _lock:
110
+ return dict(_jobs[job_id]) if job_id in _jobs else None
111
+
112
+
113
+ # ─── EBI helpers (same pattern as alignment.py) ───────────────────────────────
114
+
115
+ async def _ebi_submit(url: str, data: dict) -> str:
116
+ async with httpx.AsyncClient(timeout=30) as client:
117
+ r = await client.post(url + "/run", data=data)
118
+ r.raise_for_status()
119
+ return r.text.strip()
120
+
121
+
122
+ async def _ebi_poll(url: str, ebi_job: str, interval: float = 2.0, max_polls: int = 150) -> str:
123
+ async with httpx.AsyncClient(timeout=10) as client:
124
+ for _ in range(max_polls):
125
+ await asyncio.sleep(interval)
126
+ r = await client.get(f"{url}/status/{ebi_job}")
127
+ status = r.text.strip()
128
+ if status in ("FINISHED", "FAILED", "ERROR", "NOT_FOUND"):
129
+ return status
130
+ return "TIMEOUT"
131
+
132
+
133
+ async def _ebi_result(url: str, ebi_job: str, result_type: str, retries: int = 3) -> str:
134
+ async with httpx.AsyncClient(timeout=60) as client:
135
+ for attempt in range(retries):
136
+ try:
137
+ r = await client.get(f"{url}/result/{ebi_job}/{result_type}")
138
+ if r.status_code == 200:
139
+ return r.text
140
+ await asyncio.sleep(2 ** attempt)
141
+ except httpx.RequestError:
142
+ await asyncio.sleep(2 ** attempt)
143
+ raise RuntimeError(f"Failed to fetch EBI result {result_type} after {retries} attempts")
144
+
145
+
146
+ # ─── MSA via Clustal Omega ────────────────────────────────────────────────────
147
+
148
+ def _to_fasta(sequences: list[dict]) -> str:
149
+ return "\n".join(f">{s['id']}\n{s['sequence']}" for s in sequences)
150
+
151
+
152
+ async def _run_clustalo(job_id: str, sequences: list[dict], stype: str) -> tuple[str, str] | None:
153
+ _patch(job_id, phase="msa_running")
154
+ fasta = _to_fasta(sequences)
155
+
156
+ try:
157
+ ebi_job = await _ebi_submit(_EBI_CLUSTALO, {
158
+ "email": _EMAIL,
159
+ "sequence": fasta,
160
+ "outfmt": "fa",
161
+ "stype": "protein" if stype == "protein" else "dna",
162
+ })
163
+ logger.info(f"[{job_id}] Clustal Omega job: {ebi_job}")
164
+ except Exception as e:
165
+ _patch(job_id, phase="error", error=f"Clustal Omega submission failed: {e}")
166
+ return None
167
+
168
+ status = await _ebi_poll(_EBI_CLUSTALO, ebi_job)
169
+ if status != "FINISHED":
170
+ _patch(job_id, phase="error", error=f"Clustal Omega ended with status: {status}")
171
+ return None
172
+
173
+ try:
174
+ aln_fasta = await _ebi_result(_EBI_CLUSTALO, ebi_job, "fa")
175
+ nj_newick = await _ebi_result(_EBI_CLUSTALO, ebi_job, "phylotree")
176
+ except Exception as e:
177
+ _patch(job_id, phase="error", error=f"Clustal Omega result fetch failed: {e}")
178
+ return None
179
+
180
+ _patch(job_id, phase="msa_done", aln_fasta=aln_fasta, msa_done_at=time.time())
181
+ return aln_fasta, nj_newick
182
+
183
+
184
+ # ─── UPGMA (pure Python) ──────────────────────────────────────────────────────
185
+
186
+ def _parse_aligned_fasta(fasta: str) -> dict[str, str]:
187
+ seqs: dict[str, str] = {}
188
+ cur = None
189
+ for line in fasta.strip().splitlines():
190
+ stripped = line.strip()
191
+ if stripped.startswith(">"):
192
+ cur = stripped[1:].split()[0]
193
+ seqs[cur] = ""
194
+ elif cur:
195
+ seqs[cur] += stripped
196
+ return seqs
197
+
198
+
199
+ def _p_distance(s1: str, s2: str) -> float:
200
+ pairs = [(a, b) for a, b in zip(s1, s2) if a != "-" and b != "-"]
201
+ if not pairs:
202
+ return 1.0
203
+ return sum(1 for a, b in pairs if a != b) / len(pairs)
204
+
205
+
206
+ def _upgma_newick(aln_fasta: str) -> str:
207
+ seqs = _parse_aligned_fasta(aln_fasta)
208
+ names = list(seqs.keys())
209
+ n = len(names)
210
+
211
+ if n < 2:
212
+ return f"({names[0]}:0.0);" if names else "();"
213
+
214
+ dist: dict[tuple[str, str], float] = {}
215
+ for i in range(n):
216
+ for j in range(i + 1, n):
217
+ d = _p_distance(seqs[names[i]], seqs[names[j]])
218
+ dist[(names[i], names[j])] = d
219
+ dist[(names[j], names[i])] = d
220
+
221
+ clusters: dict[str, dict] = {
222
+ nm: {"newick": nm, "height": 0.0, "size": 1} for nm in names
223
+ }
224
+
225
+ counter = 0
226
+ while len(clusters) > 1:
227
+ ckeys = list(clusters.keys())
228
+ min_d = float("inf")
229
+ best = ("", "")
230
+ for i in range(len(ckeys)):
231
+ for j in range(i + 1, len(ckeys)):
232
+ a, b = ckeys[i], ckeys[j]
233
+ d = dist.get((a, b), float("inf"))
234
+ if d < min_d:
235
+ min_d = d
236
+ best = (a, b)
237
+
238
+ a, b = best
239
+ new_h = min_d / 2.0
240
+ bl_a = max(0.0, new_h - clusters[a]["height"])
241
+ bl_b = max(0.0, new_h - clusters[b]["height"])
242
+ new_nw = f"({clusters[a]['newick']}:{bl_a:.6f},{clusters[b]['newick']}:{bl_b:.6f})"
243
+ new_sz = clusters[a]["size"] + clusters[b]["size"]
244
+
245
+ counter += 1
246
+ new_id = f"__c{counter}"
247
+
248
+ for c in ckeys:
249
+ if c in (a, b):
250
+ continue
251
+ da = dist.get((a, c), dist.get((c, a), 0.0))
252
+ db = dist.get((b, c), dist.get((c, b), 0.0))
253
+ nd = (da * clusters[a]["size"] + db * clusters[b]["size"]) / new_sz
254
+ dist[(new_id, c)] = nd
255
+ dist[(c, new_id)] = nd
256
+
257
+ del clusters[a]
258
+ del clusters[b]
259
+ clusters[new_id] = {"newick": new_nw, "height": new_h, "size": new_sz}
260
+
261
+ root = next(iter(clusters.values()))
262
+ return root["newick"] + ";"
263
+
264
+
265
+ # ─── PhyML local (subprocess) ────────────────────────────────────────────────
266
+
267
+ def fasta_to_phylip(fasta: str) -> str:
268
+ """Convert aligned FASTA to relaxed PHYLIP (names up to 100 chars)."""
269
+ seqs: dict[str, str] = {}
270
+ cur: str | None = None
271
+ for line in fasta.strip().splitlines():
272
+ t = line.strip()
273
+ if t.startswith(">"):
274
+ cur = t[1:].split()[0][:100]
275
+ seqs[cur] = ""
276
+ elif cur:
277
+ seqs[cur] += t.upper()
278
+ if not seqs:
279
+ return ""
280
+ n = len(seqs)
281
+ L = len(next(iter(seqs.values())))
282
+ lines = [f"{n} {L}"]
283
+ for name, s in seqs.items():
284
+ lines.append(f"{name:<100}{s}")
285
+ return "\n".join(lines)
286
+
287
+
288
+ async def _run_phyml_local(job_id: str, aln_fasta: str, req: PhyloRequest) -> None:
289
+ """Run PhyML as a subprocess on a temp PHYLIP file."""
290
+ _patch(job_id, phase="tree_running")
291
+
292
+ import os
293
+ import tempfile
294
+
295
+ fd, phy_path = tempfile.mkstemp(suffix=".phy")
296
+ os.close(fd)
297
+ try:
298
+ with open(phy_path, "w") as f:
299
+ f.write(fasta_to_phylip(aln_fasta))
300
+
301
+ datatype = "aa" if req.seq_type == "protein" else "nt"
302
+ model = req.model if req.model else ("LG" if req.seq_type == "protein" else "GTR")
303
+
304
+ proc = await asyncio.create_subprocess_exec(
305
+ "phyml",
306
+ "-i", phy_path,
307
+ "-d", datatype,
308
+ "-m", model,
309
+ "-b", str(req.bootstrap if req.bootstrap else 0),
310
+ "-o", "tlr",
311
+ "--no_memory_check",
312
+ stdout=asyncio.subprocess.PIPE,
313
+ stderr=asyncio.subprocess.PIPE,
314
+ )
315
+ try:
316
+ _, stderr = await asyncio.wait_for(proc.communicate(), timeout=900)
317
+ except asyncio.TimeoutError:
318
+ proc.kill()
319
+ await proc.communicate()
320
+ _patch(job_id, phase="error", error="PhyML timed out after 15 minutes")
321
+ return
322
+
323
+ if proc.returncode != 0:
324
+ err_text = stderr.decode("utf-8", errors="replace")[:500] if stderr else ""
325
+ _patch(job_id, phase="error",
326
+ error=f"PhyML failed (exit {proc.returncode}): {err_text}")
327
+ return
328
+
329
+ tree_path = phy_path + "_phyml_tree.txt"
330
+ stats_path = phy_path + "_phyml_stats.txt"
331
+
332
+ newick = None
333
+ stats = None
334
+ if os.path.exists(tree_path):
335
+ with open(tree_path) as f:
336
+ newick = f.read().strip()
337
+ if os.path.exists(stats_path):
338
+ with open(stats_path) as f:
339
+ stats = f.read().strip()
340
+
341
+ if not newick:
342
+ _patch(job_id, phase="error", error="PhyML produced no output tree")
343
+ return
344
+
345
+ _patch(job_id, phase="complete", newick=newick, stats=stats, done_at=time.time())
346
+ except Exception as e:
347
+ _patch(job_id, phase="error", error=f"PhyML error: {e}")
348
+ finally:
349
+ for suffix in ["", "_phyml_tree.txt", "_phyml_stats.txt", "_phyml_boot_trees.txt"]:
350
+ p = phy_path + suffix
351
+ if os.path.exists(p):
352
+ os.remove(p)
353
+
354
+
355
+ # ─── Main pipeline worker ─────────────────────────────────────────────────────
356
+
357
+ async def _worker(job_id: str) -> None:
358
+ job = _read(job_id)
359
+ if not job:
360
+ return
361
+
362
+ req = PhyloRequest(**job["_req"])
363
+
364
+ result = await _run_clustalo(job_id, req.sequences, req.seq_type)
365
+ if result is None:
366
+ return
367
+
368
+ aln_fasta, nj_newick = result
369
+
370
+ if req.method == "nj":
371
+ _patch(job_id, phase="complete",
372
+ newick=nj_newick.strip(), done_at=time.time())
373
+
374
+ elif req.method == "upgma":
375
+ _patch(job_id, phase="tree_running")
376
+ try:
377
+ newick = _upgma_newick(aln_fasta)
378
+ _patch(job_id, phase="complete",
379
+ newick=newick, done_at=time.time())
380
+ except Exception as e:
381
+ _patch(job_id, phase="error", error=f"UPGMA computation failed: {e}")
382
+
383
+ elif req.method == "ml":
384
+ await _run_phyml_local(job_id, aln_fasta, req)
385
+
386
+ else:
387
+ _patch(job_id, phase="error", error=f"Unknown method: {req.method}")
388
+
389
+
390
+ # ─── API endpoints ─────────────────────────────────────────────────────────────
391
+
392
+ @router.post("/run", response_model=RunResponse)
393
+ async def run_phylo(
394
+ req: PhyloRequest,
395
+ background_tasks: BackgroundTasks,
396
+ ):
397
+ if len(req.sequences) < 2:
398
+ raise HTTPException(400, detail="At least 2 sequences are required")
399
+ if len(req.sequences) > 50:
400
+ raise HTTPException(400, detail="Maximum 50 sequences per run")
401
+
402
+ valid_models = PROTEIN_MODELS if req.seq_type == "protein" else DNA_MODELS
403
+ if req.method == "ml" and req.model not in valid_models:
404
+ raise HTTPException(
405
+ 400,
406
+ detail=f"Model '{req.model}' not valid for {req.seq_type}. "
407
+ f"Choose from: {', '.join(valid_models)}"
408
+ )
409
+
410
+ job_id = str(uuid.uuid4())
411
+ _init(job_id, req)
412
+ background_tasks.add_task(_worker, job_id)
413
+ return RunResponse(job_id=job_id, status="queued")
414
+
415
+
416
+ @router.get("/status/{job_id}")
417
+ async def get_status(job_id: str):
418
+ job = _read(job_id)
419
+ if not job:
420
+ raise HTTPException(404, detail=f"Job {job_id} not found")
421
+ return {k: v for k, v in job.items() if not k.startswith("_")}
422
+
423
+
424
+ @router.get("/models")
425
+ async def list_models(seq_type: SeqType = "protein"):
426
+ return {
427
+ "seq_type": seq_type,
428
+ "models": PROTEIN_MODELS if seq_type == "protein" else DNA_MODELS,
429
+ "default": "LG" if seq_type == "protein" else "GTR",
430
+ }