Spaces:
Sleeping
Sleeping
ci: deploy afad831
Browse files- Dockerfile +1 -1
- requirements-deploy.txt +5 -5
- src/deployment/serve.py +58 -11
Dockerfile
CHANGED
|
@@ -8,7 +8,7 @@ ENV DEBIAN_FRONTEND=noninteractive \
|
|
| 8 |
PIP_NO_CACHE_DIR=1 \
|
| 9 |
HF_HOME=/home/user/.cache/huggingface \
|
| 10 |
PORT=7860 \
|
| 11 |
-
USE_VLLM=
|
| 12 |
|
| 13 |
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 14 |
python3.11 python3.11-venv python3-pip git curl \
|
|
|
|
| 8 |
PIP_NO_CACHE_DIR=1 \
|
| 9 |
HF_HOME=/home/user/.cache/huggingface \
|
| 10 |
PORT=7860 \
|
| 11 |
+
USE_VLLM=1
|
| 12 |
|
| 13 |
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 14 |
python3.11 python3.11-venv python3-pip git curl \
|
requirements-deploy.txt
CHANGED
|
@@ -6,13 +6,13 @@ uvicorn[standard]==0.34.0
|
|
| 6 |
pydantic==2.10.4
|
| 7 |
pyyaml==6.0.2
|
| 8 |
|
| 9 |
-
# Backend d'inference
|
| 10 |
-
#
|
| 11 |
-
|
| 12 |
-
transformers>=4.51,<
|
| 13 |
accelerate>=1.2
|
| 14 |
peft>=0.14
|
| 15 |
huggingface_hub>=0.27
|
| 16 |
-
torch>=2.
|
| 17 |
safetensors>=0.4.5
|
| 18 |
sentencepiece>=0.2
|
|
|
|
| 6 |
pydantic==2.10.4
|
| 7 |
pyyaml==6.0.2
|
| 8 |
|
| 9 |
+
# Backend d'inference vLLM (Qwen3 supporte depuis vllm 0.7).
|
| 10 |
+
# vllm 0.7.x impose torch 2.5 et transformers 4.51-4.52.
|
| 11 |
+
vllm>=0.7,<0.8
|
| 12 |
+
transformers>=4.51,<4.53
|
| 13 |
accelerate>=1.2
|
| 14 |
peft>=0.14
|
| 15 |
huggingface_hub>=0.27
|
| 16 |
+
torch>=2.5,<2.6
|
| 17 |
safetensors>=0.4.5
|
| 18 |
sentencepiece>=0.2
|
src/deployment/serve.py
CHANGED
|
@@ -15,13 +15,17 @@ Variables d'environnement:
|
|
| 15 |
- USE_VLLM : "1" pour charger vLLM, "0" pour transformers/mock (default: "1")
|
| 16 |
- API_KEY : si defini, exige le header X-API-Key
|
| 17 |
- PORT : port d'ecoute (default: 7860 - requis par HF Spaces)
|
|
|
|
|
|
|
| 18 |
"""
|
| 19 |
|
| 20 |
from __future__ import annotations
|
| 21 |
|
|
|
|
| 22 |
import logging
|
| 23 |
import os
|
| 24 |
import re
|
|
|
|
| 25 |
import time
|
| 26 |
import uuid
|
| 27 |
from contextlib import asynccontextmanager
|
|
@@ -47,10 +51,52 @@ MODEL_ID = os.environ.get("MODEL_ID", "").strip()
|
|
| 47 |
ADAPTER_ID = os.environ.get("ADAPTER_ID", "").strip()
|
| 48 |
USE_VLLM = os.environ.get("USE_VLLM", "1") == "1"
|
| 49 |
API_KEY = os.environ.get("API_KEY", "").strip()
|
|
|
|
| 50 |
|
| 51 |
# Etat global du backend
|
| 52 |
_backend: dict[str, Any] = {"kind": "mock", "engine": None, "tokenizer": None}
|
| 53 |
audit_log: list[dict] = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
|
| 56 |
def _load_vllm(model_id: str) -> None:
|
|
@@ -101,6 +147,7 @@ def _load_transformers(model_id: str, adapter_id: str = "") -> None:
|
|
| 101 |
|
| 102 |
@asynccontextmanager
|
| 103 |
async def lifespan(app: FastAPI):
|
|
|
|
| 104 |
if not MODEL_ID:
|
| 105 |
logger.warning("MODEL_ID non defini - mode mock (echo) active")
|
| 106 |
else:
|
|
@@ -363,17 +410,17 @@ async def triage(req: TriageRequest):
|
|
| 363 |
backend=_backend["kind"],
|
| 364 |
)
|
| 365 |
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
)
|
| 377 |
logger.info("Triage %s -> %s (%.0f ms)", interaction_id, priority, latency_ms)
|
| 378 |
return response
|
| 379 |
|
|
|
|
| 15 |
- USE_VLLM : "1" pour charger vLLM, "0" pour transformers/mock (default: "1")
|
| 16 |
- API_KEY : si defini, exige le header X-API-Key
|
| 17 |
- PORT : port d'ecoute (default: 7860 - requis par HF Spaces)
|
| 18 |
+
- AUDIT_LOG_PATH : chemin du fichier JSONL append-only pour la tracabilite RGPD
|
| 19 |
+
(default: audit/audit.jsonl). Mettre vide pour desactiver.
|
| 20 |
"""
|
| 21 |
|
| 22 |
from __future__ import annotations
|
| 23 |
|
| 24 |
+
import json
|
| 25 |
import logging
|
| 26 |
import os
|
| 27 |
import re
|
| 28 |
+
import threading
|
| 29 |
import time
|
| 30 |
import uuid
|
| 31 |
from contextlib import asynccontextmanager
|
|
|
|
| 51 |
ADAPTER_ID = os.environ.get("ADAPTER_ID", "").strip()
|
| 52 |
USE_VLLM = os.environ.get("USE_VLLM", "1") == "1"
|
| 53 |
API_KEY = os.environ.get("API_KEY", "").strip()
|
| 54 |
+
AUDIT_LOG_PATH = os.environ.get("AUDIT_LOG_PATH", str(PROJECT_ROOT / "audit" / "audit.jsonl")).strip()
|
| 55 |
|
| 56 |
# Etat global du backend
|
| 57 |
_backend: dict[str, Any] = {"kind": "mock", "engine": None, "tokenizer": None}
|
| 58 |
audit_log: list[dict] = []
|
| 59 |
+
_audit_lock = threading.Lock()
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def _persist_audit_entry(entry: dict) -> None:
|
| 63 |
+
"""Append-only JSONL pour la tracabilite RGPD (relisible apres redemarrage).
|
| 64 |
+
|
| 65 |
+
Sans persistance, l'audit log est perdu au redeploiement, ce qui n'est pas
|
| 66 |
+
acceptable pour un audit medical.
|
| 67 |
+
"""
|
| 68 |
+
if not AUDIT_LOG_PATH:
|
| 69 |
+
return
|
| 70 |
+
try:
|
| 71 |
+
path = Path(AUDIT_LOG_PATH)
|
| 72 |
+
path.parent.mkdir(parents=True, exist_ok=True)
|
| 73 |
+
line = json.dumps(entry, ensure_ascii=False)
|
| 74 |
+
with _audit_lock, open(path, "a", encoding="utf-8") as f:
|
| 75 |
+
f.write(line + "\n")
|
| 76 |
+
except OSError as exc:
|
| 77 |
+
logger.warning("Echec persistance audit (%s): %s", AUDIT_LOG_PATH, exc)
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def _load_audit_history() -> None:
|
| 81 |
+
"""Recharge l'historique au demarrage pour exposer un audit complet via /audit."""
|
| 82 |
+
if not AUDIT_LOG_PATH:
|
| 83 |
+
return
|
| 84 |
+
path = Path(AUDIT_LOG_PATH)
|
| 85 |
+
if not path.exists():
|
| 86 |
+
return
|
| 87 |
+
try:
|
| 88 |
+
with open(path, encoding="utf-8") as f:
|
| 89 |
+
for line in f:
|
| 90 |
+
line = line.strip()
|
| 91 |
+
if not line:
|
| 92 |
+
continue
|
| 93 |
+
try:
|
| 94 |
+
audit_log.append(json.loads(line))
|
| 95 |
+
except json.JSONDecodeError:
|
| 96 |
+
continue
|
| 97 |
+
logger.info("Audit log charge: %d entrees depuis %s", len(audit_log), path)
|
| 98 |
+
except OSError as exc:
|
| 99 |
+
logger.warning("Echec lecture audit (%s): %s", path, exc)
|
| 100 |
|
| 101 |
|
| 102 |
def _load_vllm(model_id: str) -> None:
|
|
|
|
| 147 |
|
| 148 |
@asynccontextmanager
|
| 149 |
async def lifespan(app: FastAPI):
|
| 150 |
+
_load_audit_history()
|
| 151 |
if not MODEL_ID:
|
| 152 |
logger.warning("MODEL_ID non defini - mode mock (echo) active")
|
| 153 |
else:
|
|
|
|
| 410 |
backend=_backend["kind"],
|
| 411 |
)
|
| 412 |
|
| 413 |
+
entry = {
|
| 414 |
+
"interaction_id": interaction_id,
|
| 415 |
+
"timestamp": timestamp,
|
| 416 |
+
"patient_id": req.patient_id,
|
| 417 |
+
"symptoms": req.symptoms,
|
| 418 |
+
"priority_level": priority,
|
| 419 |
+
"latency_ms": response.latency_ms,
|
| 420 |
+
"backend": _backend["kind"],
|
| 421 |
+
}
|
| 422 |
+
audit_log.append(entry)
|
| 423 |
+
_persist_audit_entry(entry)
|
| 424 |
logger.info("Triage %s -> %s (%.0f ms)", interaction_id, priority, latency_ms)
|
| 425 |
return response
|
| 426 |
|