Spaces:

senthil2421
/

mlforge

Sleeping

App Files Files Community

senthil2421 commited on 28 days ago

Commit

99e3f1b

1 Parent(s): d81f11d

arch: refactor cloud_backend into lean discovery server by removing execution logic

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

adapters/__init__.py +0 -0
adapters/__pycache__/__init__.cpython-310.pyc +0 -0
adapters/__pycache__/base.cpython-310.pyc +0 -0
adapters/__pycache__/hf_adapter.cpython-310.pyc +0 -0
adapters/__pycache__/onnx_adapter.cpython-310.pyc +0 -0
adapters/__pycache__/roboflow_adapter.cpython-310.pyc +0 -0
adapters/base.py +0 -28
adapters/hf_adapter.py +0 -415
adapters/onnx_adapter.py +0 -176
adapters/roboflow_adapter.py +0 -353
api/routes/benchmark.py +0 -238
api/routes/inference.py +0 -168
api/routes/jobs.py +0 -56
api/routes/system.py +0 -97
api/routes/training.py +0 -428
benchmark/__init__.py +0 -1
benchmark/__pycache__/__init__.cpython-310.pyc +0 -0
benchmark/__pycache__/compatibility.cpython-310.pyc +0 -0
benchmark/__pycache__/execution.cpython-310.pyc +0 -0
benchmark/__pycache__/metrics.cpython-310.pyc +0 -0
benchmark/__pycache__/orchestrator.cpython-310.pyc +0 -0
benchmark/__pycache__/registry.cpython-310.pyc +0 -0
benchmark/__pycache__/telemetry.cpython-310.pyc +0 -0
benchmark/adapters/__pycache__/base.cpython-310.pyc +0 -0
benchmark/adapters/__pycache__/registry.cpython-310.pyc +0 -0
benchmark/adapters/__pycache__/torch_runner.cpython-310.pyc +0 -0
benchmark/adapters/base.py +0 -38
benchmark/adapters/optimum_runner.py +0 -53
benchmark/adapters/registry.py +0 -44
benchmark/adapters/torch_runner.py +0 -45
benchmark/compatibility.py +0 -360
benchmark/execution.py +0 -366
benchmark/metrics.py +0 -110
benchmark/orchestrator.py +0 -374
benchmark/registry.py +0 -302
benchmark/telemetry.py +0 -182
benchmark/torch_runner.py +0 -142
config.py +4 -40
download/__init__.py +0 -0
download/__pycache__/__init__.cpython-310.pyc +0 -0
download/__pycache__/manager.cpython-310.pyc +0 -0
download/manager.py +0 -366
inference/__init__.py +0 -1
inference/__pycache__/__init__.cpython-310.pyc +0 -0
inference/__pycache__/engine.cpython-310.pyc +0 -0
inference/__pycache__/session.cpython-310.pyc +0 -0
inference/engine.py +0 -447
inference/session.py +0 -80
main.py +4 -6
projects/__init__.py +0 -0

adapters/__init__.py DELETED Viewed

File without changes

adapters/__pycache__/__init__.cpython-310.pyc DELETED Viewed

Binary file (143 Bytes)

adapters/__pycache__/base.cpython-310.pyc DELETED Viewed

Binary file (1.31 kB)

adapters/__pycache__/hf_adapter.cpython-310.pyc DELETED Viewed

Binary file (13 kB)

adapters/__pycache__/onnx_adapter.cpython-310.pyc DELETED Viewed

Binary file (5.27 kB)

adapters/__pycache__/roboflow_adapter.cpython-310.pyc DELETED Viewed

Binary file (10.9 kB)

adapters/base.py DELETED Viewed

@@ -1,28 +0,0 @@
-"""
-adapters/base.py — Abstract base class every source adapter must implement.
-Enforces a stable contract so the registry never knows which adapter runs.
-"""
-from __future__ import annotations
-from abc import ABC, abstractmethod
-from models.model import Model
-class BaseAdapter(ABC):
-    """Fetch models from an external source and normalize to the Model schema."""
-    source_name: str = "unknown"
-    @abstractmethod
-    async def fetch_models(self) -> list[Model]:
-        """Return a list of normalized Model objects from the source."""
-        ...
-    def _format_size(self, bytes_: int) -> str:
-        """Human-readable file size."""
-        for unit in ("B", "KB", "MB", "GB", "TB"):
-            if bytes_ < 1024:
-                return f"{bytes_:.1f} {unit}"
-            bytes_ //= 1024
-        return f"{bytes_} PB"

adapters/hf_adapter.py DELETED Viewed

@@ -1,415 +0,0 @@
-"""
-adapters/hf_adapter.py — Hugging Face Hub adapter.
-Fetches real models via the public HF API and normalises them to our schema.
-Rate-limits respected via polite delays. Requires no authentication for
-publicly accessible models; set HF_TOKEN env var for higher rate-limits.
-"""
-from __future__ import annotations
-import asyncio
-import re
-from typing import Any
-def _is_shard_file(filename: str) -> bool:
-    """Return True for sharded weight files like model-00001-of-00003.safetensors."""
-    return bool(re.search(r"-\d{5}-of-\d{5}\.", filename))
-import httpx
-from tenacity import retry, stop_after_attempt, wait_exponential
-from adapters.base import BaseAdapter
-from config import settings
-from models.model import Model, ModelMetrics, ModelVersion
-from observability.logger import get_logger
-log = get_logger("hf_adapter")
-# ── Task mapping: HF pipeline_tag → our internal task ─────────────────────────
-HF_TASK_MAP: dict[str, str] = {
-    "object-detection":     "detection",
-    "image-classification": "classification",
-    "image-segmentation":   "segmentation",
-    "text-to-image":        "generation",
-    "image-to-image":       "generation",
-    "image-feature-extraction": "embedding",
-}
-# Tasks we actively fetch
-FETCH_TASKS: list[str] = list(HF_TASK_MAP.keys())
-# ── Framework detection ────────────────────────────────────────────────────────
-def _detect_framework(tags: list[str], model_id: str) -> str:
-    tag_str = " ".join(tags + [model_id]).lower()
-    if "onnx" in tag_str:              return "onnx"
-    if "tflite" in tag_str:            return "tflite"
-    if "coreml" in tag_str:            return "coreml"
-    if "tensorflow" in tag_str or "tf" in tag_str: return "tensorflow"
-    return "pytorch"   # HF default
-# ── Hardware detection ─────────────────────────────────────────────────────────
-def _detect_hardware(tags: list[str]) -> list[str]:
-    hw: list[str] = []
-    tag_str = " ".join(tags).lower()
-    if any(k in tag_str for k in ("cuda", "gpu")): hw.append("gpu")
-    if "edge" in tag_str or "mobile" in tag_str:   hw.append("edge")
-    if "cpu" in tag_str:                            hw.append("cpu")
-    if not hw:                                      hw.append("gpu")  # safe default
-    return hw
-# ── Internal tag normalisation ─────────────────────────────────────────────────
-QUALITY_TAG_MAP = {
-    "state-of-the-art": "sota",
-    "lightweight":      "lightweight",
-    "tiny":             "tiny",
-    "fast":             "fastest",
-    "real-time":        "real-time",
-    "accuracy":         "high-accuracy",
-}
-def _normalise_tags(raw_tags: list[str], pipeline: str) -> list[str]:
-    out: list[str] = []
-    for t in raw_tags:
-        t_lower = t.lower()
-        for keyword, mapped in QUALITY_TAG_MAP.items():
-            if keyword in t_lower:
-                out.append(mapped)
-        # keep relevant library / dataset tags
-        if any(t_lower.startswith(p) for p in ("dataset:", "license:", "language:")):
-            continue
-        out.append(t_lower)
-    # add pipeline as tag
-    if pipeline:
-        out.append(pipeline.replace("-", "_"))
-    return list(dict.fromkeys(out))  # deduplicate, preserve order
-class HFAdapter(BaseAdapter):
-    source_name = "hf"
-    def __init__(self) -> None:
-        headers = {"Accept": "application/json"}
-        if settings.hf_token:
-            headers["Authorization"] = f"Bearer {settings.hf_token}"
-        self._client = httpx.AsyncClient(
-            base_url=settings.hf_api_base,
-            headers=headers,
-            timeout=30,
-        )
-    @retry(
-        stop=stop_after_attempt(3),
-        wait=wait_exponential(multiplier=1, min=2, max=10),
-        reraise=True,
-    )
-    async def _fetch_task_page(
-        self, pipeline_tag: str, limit: int = 100
-    ) -> list[dict[str, Any]]:
-        params = {
-            "pipeline_tag": pipeline_tag,
-            "sort": "downloads",
-            "direction": -1,     # descending
-            "limit": limit,
-            "full": "True",
-        }
-        log.info("hf_fetch_task", pipeline_tag=pipeline_tag, limit=limit)
-        resp = await self._client.get("/models", params=params)
-        resp.raise_for_status()
-        return resp.json()
-    @retry(
-        stop=stop_after_attempt(3),
-        wait=wait_exponential(multiplier=1, min=2, max=10),
-        reraise=True,
-    )
-    async def _fetch_model_detail(self, model_id: str) -> dict[str, Any]:
-        resp = await self._client.get(f"/models/{model_id}", params={"full": "True"})
-        resp.raise_for_status()
-        raw = resp.json()
-        siblings: list[dict[str, Any]] = raw.get("siblings") or []
-        has_any_size = any(isinstance(s, dict) and s.get("size") for s in siblings)
-        if not has_any_size:
-            try:
-                tree = await self._fetch_model_tree(model_id, revision="main")
-                size_by_path: dict[str, int] = {
-                    (t.get("path") or ""): int(t.get("size") or 0)
-                    for t in (tree or [])
-                    if isinstance(t, dict)
-                }
-                patched: list[dict[str, Any]] = []
-                for s in siblings:
-                    if not isinstance(s, dict):
-                        continue
-                    fn = s.get("rfilename") or s.get("path") or ""
-                    if fn and not s.get("size") and fn in size_by_path:
-                        s = {**s, "size": size_by_path[fn]}
-                    patched.append(s)
-                raw["siblings"] = patched
-            except Exception:
-                pass
-        return raw
-    @retry(
-        stop=stop_after_attempt(3),
-        wait=wait_exponential(multiplier=1, min=2, max=10),
-        reraise=True,
-    )
-    async def _fetch_model_tree(self, model_id: str, *, revision: str = "main") -> list[dict[str, Any]]:
-        resp = await self._client.get(f"/models/{model_id}/tree/{revision}")
-        resp.raise_for_status()
-        data = resp.json()
-        if isinstance(data, list):
-            return data
-        return []
-    def _parse_safe_tensors_size(self, siblings: list[dict]) -> int:
-        """Estimate model size from sibling file list."""
-        total = 0
-        weight_exts = (".pt", ".pth", ".safetensors", ".bin", ".onnx", ".tflite", ".mlmodel")
-        for s in siblings or []:
-            filename = s.get("rfilename", "").lower()
-            if filename.endswith(weight_exts):
-                total += s.get("size", 0)
-        if total > 0:
-            return total
-        # If no size found in siblings, check if it's in the root dict (sometimes HF API does this)
-        return 0 # Return 0 if not found, we'll handle fallback in _make_model
-    @retry(
-        stop=stop_after_attempt(3),
-        wait=wait_exponential(multiplier=1, min=2, max=10),
-        reraise=True,
-    )
-    async def _fetch_model_card(self, model_id: str) -> str:
-        """Fetch model card (README.md) content for real-time description."""
-        url = f"{settings.hf_hub_url}/{model_id}/raw/main/README.md"
-        try:
-            resp = await self._client.get(url)
-            if resp.status_code == 200:
-                return resp.text
-        except Exception:
-            pass
-        return ""
-    def _extract_description(self, readme: str, raw: dict[str, Any]) -> str:
-        """Extract a clean description from README or card data."""
-        if readme:
-            # Simple heuristic: take first paragraph that isn't frontmatter
-            lines = readme.split("\n")
-            in_frontmatter = False
-            for line in lines:
-                if line.strip() == "---":
-                    in_frontmatter = not in_frontmatter
-                    continue
-                if not in_frontmatter and line.strip() and not line.startswith("#"):
-                    return line.strip()[:500]
-        card_data = raw.get("cardData") or {}
-        description: str = (
-            (card_data.get("summary") or "")
-            or (card_data.get("description") or "")
-            or (raw.get("description") or "")
-        ).strip()
-        return description
-    def _estimate_metrics(self, model_id: str, task: str) -> ModelMetrics:
-        """
-        Product-Grade Metrics Estimation.
-        Uses model name heuristics to provide realistic data for common architectures.
-        """
-        metrics = ModelMetrics()
-        m_id = model_id.lower()
-        # Base latency/vram estimates by architecture
-        if "vit" in m_id or "dinov2" in m_id:
-            metrics.latency_ms = 45.5 if "base" in m_id else 85.2 if "large" in m_id else 25.0
-            metrics.vram_gb = 1.2 if "base" in m_id else 2.4 if "large" in m_id else 0.8
-            metrics.accuracy = 82.4 if "base" in m_id else 84.5
-        elif "segformer" in m_id:
-            # b0, b1, b2, b3, b4, b5
-            if "b0" in m_id: metrics.latency_ms, metrics.vram_gb, metrics.accuracy = 12.0, 0.4, 35.0
-            elif "b1" in m_id: metrics.latency_ms, metrics.vram_gb, metrics.accuracy = 18.0, 0.6, 40.0
-            elif "b5" in m_id: metrics.latency_ms, metrics.vram_gb, metrics.accuracy = 45.0, 1.8, 50.0
-            else: metrics.latency_ms, metrics.vram_gb, metrics.accuracy = 25.0, 1.0, 42.0
-        elif "convnext" in m_id:
-            metrics.latency_ms = 15.0 if "tiny" in m_id else 30.0
-            metrics.vram_gb = 0.5 if "tiny" in m_id else 1.2
-            metrics.accuracy = 81.0 if "tiny" in m_id else 83.5
-        elif "yolo" in m_id:
-            # n, s, m, l, x
-            if "yolov8n" in m_id: metrics.latency_ms, metrics.vram_gb, metrics.mAP = 1.5, 0.2, 37.3
-            elif "yolov8s" in m_id: metrics.latency_ms, metrics.vram_gb, metrics.mAP = 2.8, 0.4, 44.9
-            elif "yolov8m" in m_id: metrics.latency_ms, metrics.vram_gb, metrics.mAP = 6.2, 0.9, 50.2
-            else: metrics.latency_ms, metrics.vram_gb, metrics.mAP = 10.0, 1.5, 52.0
-        # Generic task-based fallbacks if still empty
-        if metrics.latency_ms is None:
-            if task == "classification": metrics.latency_ms, metrics.accuracy = 20.0, 75.0
-            elif task == "detection": metrics.latency_ms, metrics.mAP = 35.0, 45.0
-            elif task == "embedding": metrics.latency_ms = 40.0
-            elif task == "generation": metrics.latency_ms = 1500.0
-        return metrics
-    def _make_model(self, raw: dict[str, Any], pipeline_tag: str) -> Model | None:
-        model_id: str = raw.get("id") or raw.get("modelId", "")
-        if not model_id:
-            return None
-        task = HF_TASK_MAP.get(pipeline_tag)
-        if not task:
-            return None
-        tags_raw: list[str] = raw.get("tags") or []
-        framework = _detect_framework(tags_raw, model_id)
-        hardware  = _detect_hardware(tags_raw)
-        tags      = _normalise_tags(tags_raw, pipeline_tag)
-        # Size
-        siblings: list[dict] = raw.get("siblings") or []
-        size = self._parse_safe_tensors_size(siblings)
-        if size == 0:
-            # Fallback based on model type if size not found
-            if "large" in model_id.lower(): size = 1_200_000_000
-            elif "base" in model_id.lower(): size = 500_000_000
-            elif "small" in model_id.lower() or "tiny" in model_id.lower(): size = 150_000_000
-            else: size = 450_000_000 # More realistic general default than exactly 500MB
-        # Provider — author part of model_id
-        provider = model_id.split("/")[0] if "/" in model_id else "community"
-        # safe name
-        name = model_id.split("/")[-1] if "/" in model_id else model_id
-        # Clean ugly names
-        name = re.sub(r"[-_]+", "-", name).strip("-")
-        downloads = raw.get("downloads") or 0
-        likes     = raw.get("likes") or 0
-        # Fabricate a sensible version from last modified
-        last_mod: str = raw.get("lastModified") or raw.get("createdAt") or ""
-        release_date = last_mod[:10] if last_mod else "2024-01-01"
-        sha8 = (raw.get("sha") or "main")[:8]
-        # Build versions from weight files in the repo (one per distinct weight file)
-        weight_exts = (".pt", ".pth", ".safetensors", ".bin", ".onnx", ".tflite", ".mlmodel")
-        weight_files = [
-            s for s in siblings
-            if s.get("rfilename", "").lower().endswith(weight_exts)
-            and not _is_shard_file(s.get("rfilename", ""))
-        ]
-        if len(weight_files) > 1:
-            versions = []
-            for s in weight_files[:15]:
-                filename = s["rfilename"]
-                # Detect variant from filename (n, s, m, l, x, or specific labels)
-                variant_label = "Stable"
-                fn_lower = filename.lower()
-                if any(x in fn_lower for x in ["-n.", "_n.", "nano"]): variant_label = "Nano"
-                elif any(x in fn_lower for x in ["-s.", "_s.", "small"]): variant_label = "Small"
-                elif any(x in fn_lower for x in ["-m.", "_m.", "medium"]): variant_label = "Medium"
-                elif any(x in fn_lower for x in ["-l.", "_l.", "large"]): variant_label = "Large"
-                elif any(x in fn_lower for x in ["-x.", "_x.", "xlarge", "huge"]): variant_label = "XLarge"
-                versions.append(ModelVersion(
-                    version=filename.replace(".", "_"),
-                    label=variant_label,
-                    description=f"Model variant: {filename}",
-                    releaseDate=release_date,
-                    changelog=None,
-                ))
-        else:
-            versions = [
-                ModelVersion(
-                    version=sha8,
-                    label="Latest",
-                    description="Primary model weight file.",
-                    releaseDate=release_date,
-                    changelog=None,
-                )
-            ]
-        # Description from card data
-        description = self._extract_description("", raw)
-        if not description:
-            description = f"{task.capitalize()} model by {provider}."
-        # Metrics Estimation
-        metrics = self._estimate_metrics(model_id, task)
-        return Model(
-            id          = model_id.replace("/", "_").lower(),
-            name        = name,
-            task        = task,
-            framework   = framework,
-            source      = "hf",
-            provider    = provider,
-            description = description,
-            download_url = f"https://huggingface.co/{model_id}",
-            size        = size,
-            size_label  = self._format_size(size),
-            tags        = tags,
-            hardware    = hardware,
-            status      = "available",
-            downloaded  = False,
-            downloads   = downloads,
-            rating      = min(5.0, (likes / 200) + 3.5) if likes else None,
-            liked       = False,
-            metrics     = metrics,
-            versions    = versions,
-        )
-    async def fetch_models(self) -> list[Model]:
-        models: list[Model] = []
-        seen_ids: set[str] = set()
-        for pipeline_tag in FETCH_TASKS:
-            try:
-                raw_list = await self._fetch_task_page(
-                    pipeline_tag, limit=settings.hf_models_per_task
-                )
-                for idx, raw in enumerate(raw_list):
-                    # Enrich top-N per task with full model detail so siblings include sizes.
-                    if idx < 10:
-                        original_id = raw.get("id") or raw.get("modelId")
-                        if original_id:
-                            try:
-                                raw = await self._fetch_model_detail(original_id)
-                            except Exception:
-                                pass
-                    m = self._make_model(raw, pipeline_tag)
-                    if m and m.id not in seen_ids:
-                        # Try to fetch real-time description for the first 5 models of each task
-                        if len([mod for mod in models if mod.task == m.task]) < 5:
-                            original_id = raw.get("id") or raw.get("modelId")
-                            if original_id:
-                                readme = await self._fetch_model_card(original_id)
-                                if readme:
-                                    m.description = self._extract_description(readme, raw)
-                        seen_ids.add(m.id)
-                        models.append(m)
-                # Be polite to HF API
-                await asyncio.sleep(0.3)
-            except Exception as exc:
-                log.warning(
-                    "hf_fetch_task_failed",
-                    pipeline_tag=pipeline_tag,
-                    error=str(exc),
-                )
-        log.info("hf_fetch_complete", total=len(models))
-        return models
-    async def __aenter__(self) -> "HFAdapter":
-        return self
-    async def __aexit__(self, *_: Any) -> None:
-        await self._client.aclose()

adapters/onnx_adapter.py DELETED Viewed

@@ -1,176 +0,0 @@
-"""
-adapters/onnx_adapter.py — ONNX Model Zoo adapter.
-Fetches the curated list of ONNX Zoo models from the GitHub API.
-"""
-from __future__ import annotations
-from typing import Any
-import httpx
-from tenacity import retry, stop_after_attempt, wait_exponential
-from adapters.base import BaseAdapter
-from models.model import Model, ModelMetrics, ModelVersion
-from observability.logger import get_logger
-log = get_logger("onnx_adapter")
-# Curated ONNX Zoo models with metadata + download URLs (GitHub API is rate-limited without auth)
-ONNX_CURATED: list[dict[str, Any]] = [
-    {
-        "id": "onnx_resnet50",
-        "name": "ResNet-50",
-        "task": "classification",
-        "provider": "ONNX Zoo",
-        "description": "ResNet-50 v1 image classification model in ONNX format.",
-        "download_url": "https://github.com/onnx/models/raw/main/validated/vision/classification/resnet/model/resnet50-v2-7.onnx",
-        "size": 102_000_000,
-        "tags": ["resnet", "imagenet", "classification"],
-        "hardware": ["gpu", "cpu"],
-        "metrics": {"latency_ms": 14.2, "top1": 74.9},
-        "downloads": 250_000,
-        "versions": [{"version": "1.0", "label": "Stable", "releaseDate": "2023-06-01"}],
-    },
-    {
-        "id": "onnx_yolov8n",
-        "name": "YOLOv8n",
-        "task": "detection",
-        "provider": "Ultralytics",
-        "description": "Ultralytics YOLOv8 Nano — real-time object detection, ONNX export.",
-        "download_url": "https://github.com/ultralytics/yolov8/releases/download/v8.0.0/yolov8n.onnx",
-        "size": 6_200_000,
-        "tags": ["yolo", "real-time", "fastest", "edge"],
-        "hardware": ["gpu", "cpu", "edge"],
-        "metrics": {"latency_ms": 3.1, "mAP": 37.3},
-        "downloads": 420_000,
-        "versions": [{"version": "8.0", "label": "Latest", "releaseDate": "2023-09-15"}],
-    },
-    {
-        "id": "onnx_mobilenet_v3",
-        "name": "MobileNetV3-Large",
-        "task": "classification",
-        "provider": "Google",
-        "description": "MobileNetV3-Large for efficient on-device image classification.",
-        "download_url": "https://github.com/onnx/models/raw/main/validated/vision/classification/mobilenet/model/mobilenetv3-large-1.11.onnx",
-        "size": 22_000_000,
-        "tags": ["mobilenet", "lightweight", "edge", "efficient"],
-        "hardware": ["cpu", "edge"],
-        "metrics": {"latency_ms": 5.8, "top1": 75.2, "fps": 180},
-        "downloads": 310_000,
-        "versions": [{"version": "3.0", "label": "Latest", "releaseDate": "2023-01-01"}],
-    },
-    {
-        "id": "onnx_bert_base_uncased",
-        "name": "BERT-Base-Uncased",
-        "task": "nlp",
-        "provider": "Google",
-        "description": "BERT base model fine-tuned for NLP inference in ONNX format.",
-        "download_url": "https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/resolve/main/onnx/model.onnx",
-        "size": 438_000_000,
-        "tags": ["bert", "nlp", "transformer"],
-        "hardware": ["gpu", "cpu"],
-        "metrics": {"latency_ms": 42.0},
-        "downloads": 198_000,
-        "versions": [{"version": "1.0", "label": "Stable", "releaseDate": "2022-11-01"}],
-    },
-    {
-        "id": "onnx_efficientnet_b0",
-        "name": "EfficientNet-B0",
-        "task": "classification",
-        "provider": "Google Brain",
-        "description": "EfficientNet-B0 for scalable image classification.",
-        "download_url": "https://github.com/onnx/models/raw/main/validated/vision/classification/efficientnet-lite/model/efficientnet-lite4-11.onnx",
-        "size": 20_000_000,
-        "tags": ["efficientnet", "efficient", "high-accuracy"],
-        "hardware": ["gpu", "cpu"],
-        "metrics": {"latency_ms": 10.4, "top1": 77.1},
-        "downloads": 145_000,
-        "versions": [{"version": "1.0", "label": "Stable", "releaseDate": "2023-03-01"}],
-    },
-    {
-        "id": "onnx_sam_vit_b",
-        "name": "SAM ViT-B",
-        "task": "segmentation",
-        "provider": "Meta AI",
-        "description": "Segment Anything Model (ViT-B) for universal image segmentation.",
-        "download_url": "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth",
-        "size": 375_000_000,
-        "tags": ["sam", "segmentation", "sota"],
-        "hardware": ["gpu"],
-        "metrics": {"latency_ms": 68.0},
-        "downloads": 88_000,
-        "versions": [{"version": "1.0", "label": "Latest", "releaseDate": "2023-04-05"}],
-    },
-    {
-        "id": "onnx_clip_vit_b32",
-        "name": "CLIP ViT-B/32",
-        "task": "embedding",
-        "provider": "OpenAI",
-        "description": "CLIP image + text embedding model for zero-shot classification.",
-        "download_url": "https://openaipublic.blob.core.windows.net/clip/models/40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba4f386/ViT-B-32.pt",
-        "size": 338_000_000,
-        "tags": ["clip", "embedding", "multimodal"],
-        "hardware": ["gpu", "cpu"],
-        "metrics": {"latency_ms": 25.0},
-        "downloads": 275_000,
-        "versions": [{"version": "1.0", "label": "Stable", "releaseDate": "2023-01-01"}],
-    },
-    {
-        "id": "onnx_whisper_tiny",
-        "name": "Whisper Tiny",
-        "task": "nlp",
-        "provider": "OpenAI",
-        "description": "Whisper Tiny speech-to-text model in ONNX format.",
-        "download_url": "https://openaipublic.azureedge.net/main/whisper/models/65147644a518d12f04e32d6f3b26facc3f8dd46e5390956a9424930e36a852c0/tiny.pt",
-        "size": 39_000_000,
-        "tags": ["whisper", "speech", "lightweight"],
-        "hardware": ["cpu", "edge"],
-        "metrics": {"latency_ms": 100.0},
-        "downloads": 167_000,
-        "versions": [{"version": "20231117", "label": "Latest", "releaseDate": "2023-11-17"}],
-    },
-]
-class ONNXAdapter(BaseAdapter):
-    source_name = "onnx"
-    async def fetch_models(self) -> list[Model]:
-        models: list[Model] = []
-        for raw in ONNX_CURATED:
-            try:
-                versions = [
-                    ModelVersion(
-                        version=v["version"],
-                        label=v.get("label", "Stable"),
-                        releaseDate=v.get("releaseDate", ""),
-                    )
-                    for v in raw.get("versions", [])
-                ]
-                metrics_raw = raw.get("metrics", {})
-                m = Model(
-                    id          = raw["id"],
-                    name        = raw["name"],
-                    task        = raw["task"],
-                    framework   = "onnx",
-                    source      = "onnx",
-                    provider    = raw.get("provider", "ONNX Zoo"),
-                    description = raw.get("description", ""),
-                    download_url = raw.get("download_url"),
-                    size        = raw.get("size", 0),
-                    size_label  = self._format_size(raw.get("size", 0)),
-                    tags        = raw.get("tags", []),
-                    hardware    = raw.get("hardware", ["gpu"]),
-                    status      = "available",
-                    downloaded  = False,
-                    downloads   = raw.get("downloads"),
-                    rating      = 4.2,
-                    metrics     = ModelMetrics(**metrics_raw),
-                    versions    = versions,
-                )
-                models.append(m)
-            except Exception as exc:
-                log.warning("onnx_parse_failed", model_id=raw.get("id"), error=str(exc))
-        log.info("onnx_fetch_complete", total=len(models))
-        return models

adapters/roboflow_adapter.py DELETED Viewed

@@ -1,353 +0,0 @@
-"""
-adapters/roboflow_adapter.py — Roboflow Universe API client.
-Responsibilities:
-  - Fetch dataset metadata (search, workspace listings, project details)
-  - Normalise responses → Dataset domain model
-  - Cache results in roboflow_cache table (TTL-aware)
-  - Handle pagination, rate limits, and errors robustly
-Roboflow API reference: https://docs.roboflow.com/api-reference/
-"""
-from __future__ import annotations
-import hashlib
-import json
-import time
-from typing import Any
-import httpx
-from tenacity import retry, stop_after_attempt, wait_exponential
-from database.connection import get_db
-from models.dataset import Dataset, DatasetFormat, DatasetSource, DatasetStatus, DatasetTask
-from observability.logger import audit, get_logger
-log = get_logger("roboflow_adapter")
-_ROBOFLOW_BASE = "https://api.roboflow.com"
-_UNIVERSE_BASE = "https://universe.roboflow.com"
-_DEFAULT_TTL   = 3600   # 1 hour
-# ── Task mapping from Roboflow annotation_type ───────────────────────────────
-_TASK_MAP: dict[str, DatasetTask] = {
-    "object-detection": DatasetTask.detection,
-    "instance-segmentation": DatasetTask.segmentation,
-    "semantic-segmentation": DatasetTask.segmentation,
-    "classification": DatasetTask.classification,
-    "keypoint-detection": DatasetTask.keypoints,
-    "multiclass-classification": DatasetTask.classification,
-}
-_FORMAT_MAP: dict[str, DatasetFormat] = {
-    "yolov5": DatasetFormat.yolo,
-    "yolov7": DatasetFormat.yolo,
-    "yolov8": DatasetFormat.yolo,
-    "yolov9": DatasetFormat.yolo,
-    "coco": DatasetFormat.coco,
-    "voc": DatasetFormat.voc,
-    "tfrecord": DatasetFormat.tfrecord,
-    "csv": DatasetFormat.csv,
-    "createml": DatasetFormat.json,
-    "multiclass": DatasetFormat.csv,
-}
-def _cache_key(parts: list[str]) -> str:
-    raw = "|".join(parts)
-    return hashlib.sha256(raw.encode()).hexdigest()[:32]
-def _fmt_bytes(n: int) -> str:
-    for unit in ("B", "KB", "MB", "GB", "TB"):
-        if n < 1024:
-            return f"{n:.1f} {unit}"
-        n /= 1024
-    return f"{n:.1f} PB"
-# ── Cache helpers ─────────────────────────────────────────────────────────────
-async def _cache_get(key: str) -> dict[str, Any] | None:
-    db = await get_db()
-    async with db.execute(
-        "SELECT payload, fetched_at, ttl_secs FROM roboflow_cache WHERE cache_key = ?",
-        (key,),
-    ) as cur:
-        row = await cur.fetchone()
-    if row is None:
-        return None
-    fetched = time.mktime(time.strptime(row["fetched_at"], "%Y-%m-%d %H:%M:%S"))
-    if time.time() - fetched > row["ttl_secs"]:
-        return None   # expired
-    return json.loads(row["payload"])
-async def _cache_set(key: str, payload: dict[str, Any], ttl: int = _DEFAULT_TTL) -> None:
-    db = await get_db()
-    await db.execute(
-        """INSERT OR REPLACE INTO roboflow_cache (cache_key, payload, ttl_secs)
-           VALUES (?, ?, ?)""",
-        (key, json.dumps(payload), ttl),
-    )
-    await db.commit()
-# ── HTTP client factory ───────────────────────────────────────────────────────
-def _make_client(api_key: str) -> httpx.AsyncClient:
-    return httpx.AsyncClient(
-        base_url=_ROBOFLOW_BASE,
-        params={"api_key": api_key},
-        timeout=30.0,
-        headers={"User-Agent": "MLForge/1.0"},
-    )
-# ── Roboflow Adapter ──────────────────────────────────────────────────────────
-class RoboflowAdapter:
-    """
-    Stateless adapter for the Roboflow API.
-    All methods accept api_key explicitly to support per-user keys.
-    """
-    # ── Search (Universe) ─────────────────────────────────────────────────────
-    @staticmethod
-    @retry(stop=stop_after_attempt(3), wait=wait_exponential(min=1, max=8))
-    async def search_datasets(
-        api_key: str,
-        query: str = "",
-        workspace: str | None = None,
-        page: int = 0,
-        page_size: int = 50,
-    ) -> list[Dataset]:
-        """
-        Search Roboflow Universe for datasets.
-        Returns normalised Dataset objects.
-        """
-        ck = _cache_key(["search", query, str(workspace), str(page), str(page_size)])
-        cached = await _cache_get(ck)
-        if cached:
-            log.debug("roboflow_cache_hit", key=ck, query=query)
-            return [Dataset(**d) for d in cached]
-        params: dict[str, Any] = {
-            "api_key": api_key,
-            "q":       query or "*",
-            "from":    page * page_size,
-            "size":    page_size,
-        }
-        if workspace:
-            params["workspace"] = workspace
-        async with _make_client(api_key) as client:
-            try:
-                resp = await client.get("/", params=params)
-                resp.raise_for_status()
-                data = resp.json()
-            except httpx.HTTPStatusError as e:
-                log.error("roboflow_api_error", status=e.response.status_code, query=query)
-                await audit("roboflow_error", {"query": query, "status": e.response.status_code}, level="error")
-                raise
-        datasets = []
-        for item in data.get("results", []):
-            try:
-                ds = RoboflowAdapter._normalise_search_result(item)
-                datasets.append(ds)
-            except Exception as exc:
-                log.warning("normalise_error", item_id=item.get("id"), error=str(exc))
-        await _cache_set(ck, [d.model_dump() for d in datasets])
-        await audit("roboflow_search", {"query": query, "count": len(datasets)})
-        return datasets
-    # ── Workspace datasets listing ────────────────────────────────────────────
-    @staticmethod
-    @retry(stop=stop_after_attempt(3), wait=wait_exponential(min=1, max=8))
-    async def list_workspace_datasets(
-        api_key: str,
-        workspace: str,
-    ) -> list[Dataset]:
-        """List all datasets in a Roboflow workspace."""
-        ck = _cache_key(["workspace", workspace])
-        cached = await _cache_get(ck)
-        if cached:
-            return [Dataset(**d) for d in cached]
-        async with _make_client(api_key) as client:
-            try:
-                resp = await client.get(f"/{workspace}")
-                resp.raise_for_status()
-                data = resp.json()
-            except httpx.HTTPStatusError as e:
-                log.error("roboflow_workspace_error", workspace=workspace, status=e.response.status_code)
-                raise
-        datasets = []
-        for proj in data.get("workspace", {}).get("projects", []):
-            try:
-                ds = RoboflowAdapter._normalise_project(proj, workspace)
-                datasets.append(ds)
-            except Exception as exc:
-                log.warning("normalise_project_error", project=proj.get("id"), error=str(exc))
-        await _cache_set(ck, [d.model_dump() for d in datasets])
-        return datasets
-    # ── Single project detail ─────────────────────────────────────────────────
-    @staticmethod
-    @retry(stop=stop_after_attempt(3), wait=wait_exponential(min=1, max=8))
-    async def get_project(
-        api_key: str,
-        workspace: str,
-        project_id: str,
-    ) -> Dataset | None:
-        """Fetch full metadata for a single Roboflow project."""
-        ck = _cache_key(["project", workspace, project_id])
-        cached = await _cache_get(ck)
-        if cached:
-            return Dataset(**cached)
-        async with _make_client(api_key) as client:
-            try:
-                resp = await client.get(f"/{workspace}/{project_id}")
-                resp.raise_for_status()
-                data = resp.json()
-            except httpx.HTTPStatusError as e:
-                if e.response.status_code == 404:
-                    return None
-                raise
-        proj_data = data.get("project", data)
-        ds = RoboflowAdapter._normalise_project(proj_data, workspace)
-        await _cache_set(ck, ds.model_dump())
-        return ds
-    # ── Download URL builder ──────────────────────────────────────────────────
-    @staticmethod
-    async def get_download_url(
-        api_key: str,
-        workspace: str,
-        project_id: str,
-        version: int,
-        export_format: str = "yolov8",
-    ) -> str:
-        """
-        Fetch the export download link from Roboflow for the specified format.
-        Uses the official Roboflow SDK to handle authentication and URL resolution.
-        """
-        try:
-            from roboflow import Roboflow
-            rf = Roboflow(api_key=api_key)
-            project = rf.workspace(workspace).project(project_id)
-            version_obj = project.version(version)
-            # The SDK's download method usually downloads to disk,
-            # but we can get the underlying export info.
-            # We'll use a thread to run the SDK call since it's blocking.
-            import asyncio
-            def _get_link():
-                return version_obj.export(export_format).download_link
-            link = await asyncio.to_thread(_get_link)
-            if not link:
-                raise ValueError(f"No download link returned for {workspace}/{project_id} v{version}")
-            return link
-        except Exception as e:
-            log.error("roboflow_sdk_error", error=str(e))
-            # Fallback to manual API if SDK fails or isn't installed correctly
-            async with _make_client(api_key) as client:
-                resp = await client.get(
-                    f"/{workspace}/{project_id}/{version}/{export_format}"
-                )
-                resp.raise_for_status()
-                data = resp.json()
-            link = export.get("link") or ""
-            if not link:
-                # If 'link' is missing, check if it's a Universe-style project and try to resolve manually
-                # Roboflow manual resolution often follows: universe.roboflow.com/ds/[id]?key=[api_key]
-                if "project" in data:
-                    pid = data["project"].get("id")
-                    if pid:
-                        link = f"https://universe.roboflow.com/ds/{pid}?key={api_key}"
-            if not link:
-                raise ValueError(f"No download link returned for {workspace}/{project_id} v{version}")
-            # Ensure the link includes the API key correctly
-            if "universe.roboflow.com" in link:
-                if "key=" not in link:
-                    separator = "&" if "?" in link else "?"
-                    link = f"{link}{separator}key={api_key}"
-                elif f"key={api_key}" not in link:
-                    # Replace old key if it exists but is wrong
-                    import re
-                    link = re.sub(r"key=[^&]+", f"key={api_key}", link)
-            return link
-    # ── Normalisation helpers ─────────────────────────────────────────────────
-    @staticmethod
-    def _normalise_search_result(item: dict[str, Any]) -> Dataset:
-        """Map a Universe search result → Dataset."""
-        ann_type   = item.get("annotation", {}).get("type", "object-detection")
-        rf_task    = _TASK_MAP.get(ann_type, DatasetTask.detection)
-        class_names = [c.get("name", "") for c in item.get("classes", [])]
-        images      = item.get("images", 0) or 0
-        return Dataset(
-            id          = item.get("id", "").replace("/", "__"),
-            name        = item.get("name", "Unnamed"),
-            description = item.get("description", ""),
-            task        = rf_task,
-            format      = DatasetFormat.yolo,
-            source      = DatasetSource.roboflow,
-            status      = DatasetStatus.available,
-            images      = images,
-            classes     = len(class_names),
-            class_names = class_names,
-            size_bytes  = 0,
-            size_label  = "—",
-            tags        = item.get("tags", []),
-            roboflow_id = item.get("id", ""),
-            created_at  = item.get("created", ""),
-            updated_at  = item.get("updated", ""),
-        )
-    @staticmethod
-    def _normalise_project(proj: dict[str, Any], workspace: str) -> Dataset:
-        """Map a workspace project → Dataset."""
-        ann_type    = proj.get("annotation", "object-detection")
-        rf_task     = _TASK_MAP.get(ann_type, DatasetTask.detection)
-        class_names = [c.get("name", c) if isinstance(c, dict) else c
-                       for c in proj.get("classes", [])]
-        project_id  = proj.get("id", proj.get("name", "unknown"))
-        rf_id       = f"{workspace}/{project_id}"
-        images      = proj.get("images", 0) or 0
-        return Dataset(
-            id          = rf_id.replace("/", "__"),
-            name        = proj.get("name", project_id),
-            description = proj.get("description", ""),
-            task        = rf_task,
-            format      = DatasetFormat.yolo,
-            source      = DatasetSource.roboflow,
-            status      = DatasetStatus.available,
-            images      = images,
-            classes     = len(class_names),
-            class_names = class_names,
-            size_bytes  = 0,
-            size_label  = "—",
-            roboflow_id = rf_id,
-            created_at  = proj.get("created", ""),
-            updated_at  = proj.get("updated", ""),
-        )

api/routes/benchmark.py DELETED Viewed

@@ -1,238 +0,0 @@
-"""
-api/routes/benchmark.py — Benchmark Bridge REST + WebSocket API.
-Routes:
-  POST /benchmark/validate          — compatibility check (no job created)
-  POST /benchmark/run               — validate + create + enqueue (202)
-  GET  /benchmark/jobs              — list jobs (filterable)
-  GET  /benchmark/results/all       — list all results
-  GET  /benchmark/{job_id}          — single job status + logs
-  GET  /benchmark/{job_id}/result   — metrics + telemetry for completed job
-  WS   /benchmark/live/{job_id}     — real-time progress stream
-"""
-from __future__ import annotations
-import asyncio
-from typing import Any
-from fastapi import APIRouter, HTTPException, Query, WebSocket, WebSocketDisconnect
-import benchmark.orchestrator as orchestrator
-import benchmark.registry as bench_reg
-from models.benchmark import (
-    BenchmarkContext,
-    BenchmarkJob,
-    BenchmarkResult,
-    BenchmarkRunResponse,
-    ValidationReport,
-)
-from observability.logger import get_logger
-log = get_logger("api.benchmark")
-router = APIRouter(prefix="/benchmark", tags=["benchmark"])
-# ── POST /benchmark/validate ──────────────────────────────────────────────────
-@router.post(
-    "/validate",
-    response_model = ValidationReport,
-    summary        = "Validate model ↔ dataset compatibility",
-    description    = (
-        "Runs all 5 compatibility gates (task, format, framework×hardware, "
-        "VRAM, precision) and returns a structured report. "
-        "Does NOT create a benchmark job."
-    ),
-)
-async def validate_benchmark(ctx: BenchmarkContext) -> ValidationReport:
-    try:
-        return await orchestrator.validate_context(ctx)
-    except HTTPException:
-        raise
-    except Exception as exc:
-        log.exception("validate_error")
-        raise HTTPException(status_code=500, detail=str(exc)) from exc
-# ── POST /benchmark/run ───────────────────────────────────────────────────────
-@router.post(
-    "/run",
-    response_model = BenchmarkRunResponse,
-    status_code    = 202,
-    summary        = "Start a benchmark run",
-    description    = (
-        "Validates compatibility, creates a benchmark job, and starts async "
-        "execution. Returns job_id immediately — poll GET /benchmark/{job_id} "
-        "or connect to WS /benchmark/live/{job_id} for progress."
-    ),
-)
-async def run_benchmark(ctx: BenchmarkContext) -> BenchmarkRunResponse:
-    try:
-        job = await orchestrator.create_and_run(ctx)
-        return BenchmarkRunResponse(
-            job_id  = job.id,
-            status  = job.status,
-            message = f"Benchmark job {job.id} queued — connect to /benchmark/live/{job.id} for live telemetry",
-        )
-    except HTTPException:
-        raise
-    except Exception as exc:
-        log.exception("run_benchmark_error")
-        raise HTTPException(status_code=500, detail=str(exc)) from exc
-# ── POST /benchmark/sync ──────────────────────────────────────────────────────────
-@router.post(
-    "/sync",
-    summary     = "Sync benchmarks from active project folder",
-    description = "Scans the active project's 'benchmarks' folder and ensures all JSON records are indexed in SQLite.",
-)
-async def sync_benchmarks() -> dict[str, Any]:
-    try:
-        count = await orchestrator.sync_project_benchmarks()
-        return {"status": "success", "count": count}
-    except Exception as exc:
-        log.exception("sync_error")
-        raise HTTPException(status_code=500, detail=str(exc)) from exc
-# ── GET /benchmark/jobs ───────────────────────────────────────────────────────
-@router.get(
-    "/jobs",
-    response_model = list[BenchmarkJob],
-    summary        = "List benchmark jobs",
-)
-async def list_jobs(
-    status:   str | None = Query(None, description="Filter by status (queued|running|completed|failed)"),
-    model_id: str | None = Query(None, description="Filter by model_id"),
-    limit:    int        = Query(100, ge=1, le=500),
-) -> list[BenchmarkJob]:
-    return await bench_reg.list_jobs(status=status, model_id=model_id, limit=limit)
-# ── GET /benchmark/results/all ────────────────────────────────────────────────
-# Must be declared BEFORE /{job_id} to avoid "results" being treated as a job_id
-@router.get(
-    "/results/all",
-    response_model = list[BenchmarkResult],
-    summary        = "List all benchmark results (leaderboard feed)",
-)
-async def list_results(
-    limit: int = Query(100, ge=1, le=500),
-) -> list[BenchmarkResult]:
-    return await bench_reg.list_results(limit=limit)
-# ── GET /benchmark/{job_id} ───────────────────────────────────────────────────
-@router.get(
-    "/{job_id}",
-    response_model = BenchmarkJob,
-    summary        = "Get benchmark job status + logs",
-)
-async def get_job(job_id: str) -> BenchmarkJob:
-    job = await bench_reg.get_job(job_id)
-    if not job:
-        raise HTTPException(status_code=404, detail=f"Job '{job_id}' not found")
-    return job
-# ── GET /benchmark/{job_id}/result ───────────────────────────────────────────
-@router.get(
-    "/{job_id}/result",
-    response_model = BenchmarkResult,
-    summary        = "Get final metrics + telemetry for a completed job",
-)
-async def get_result(job_id: str) -> BenchmarkResult:
-    result = await bench_reg.get_result(job_id)
-    if not result:
-        raise HTTPException(
-            status_code = 404,
-            detail      = f"No result for job '{job_id}' — job may still be running",
-        )
-    return result
-# ── WS /benchmark/live/{job_id} ───────────────────────────────────────────────
-@router.websocket("/live/{job_id}")
-async def live_telemetry(websocket: WebSocket, job_id: str) -> None:
-    """
-    WebSocket stream for real-time benchmark progress.
-    Streams incremental logs and high-frequency telemetry.
-    """
-    await websocket.accept()
-    log.info("ws_connected", job_id=job_id)
-    last_log_idx = 0
-    try:
-        while True:
-            job = await bench_reg.get_job(job_id)
-            if not job:
-                await websocket.send_json(
-                    {"error": f"Job '{job_id}' not found", "job_id": job_id}
-                )
-                break
-            # Only send new logs
-            new_logs = job.logs[last_log_idx:]
-            last_log_idx = len(job.logs)
-            payload: dict[str, Any] = {
-                "job_id":   job.id,
-                "status":   job.status,
-                "progress": round(job.progress, 4),
-                "logs":     new_logs,
-                "telemetry": job.last_telemetry.model_dump() if job.last_telemetry else None,
-            }
-            # Explicitly include detections for the UI visualizer if they exist
-            if job.last_telemetry and hasattr(job.last_telemetry, "detections"):
-                payload["detections"] = job.last_telemetry.detections
-            await websocket.send_json(payload)
-            if job.status == "completed":
-                result = await bench_reg.get_result(job_id)
-                if result:
-                    await websocket.send_json(
-                        {
-                            "job_id":   job_id,
-                            "status":   "completed",
-                            "result":   result.model_dump(),
-                        }
-                    )
-                break
-            if job.status == "failed":
-                await websocket.send_json(
-                    {
-                        "job_id": job_id,
-                        "status": "failed",
-                        "error":  job.error or "Unknown error",
-                    }
-                )
-                break
-            await asyncio.sleep(0.5)   # poll at 2 Hz
-    except WebSocketDisconnect:
-        log.info("ws_disconnected", job_id=job_id)
-    except Exception as exc:
-        log.exception("ws_error", job_id=job_id)
-        try:
-            await websocket.send_json({"error": str(exc), "job_id": job_id})
-        except Exception:
-            pass
-    finally:
-        try:
-            await websocket.close()
-        except Exception:
-            pass

api/routes/inference.py DELETED Viewed

@@ -1,168 +0,0 @@
-"""
-api/routes/inference.py — Inference Engine endpoints.
-POST /inference/run       — single synchronous inference pass
-POST /inference/stream    — SSE stream (stage-by-stage pipeline events)
-GET  /inference/history   — session ledger
-DELETE /inference/history — clear session ledger
-GET  /inference/cache     — currently warm models in memory
-DELETE /inference/cache/{model_id} — evict from cache
-"""
-from __future__ import annotations
-import asyncio
-import json
-import time
-from fastapi import APIRouter, HTTPException, Response
-from fastapi.responses import StreamingResponse
-from inference.engine import InferenceEngine, evict_model, get_cache_status
-from inference.session import clear_history, get_history, record
-from models.inference import (
-    InferenceHistoryEntry,
-    InferenceRequest,
-    InferenceResult,
-)
-from observability.logger import get_logger
-from registry.registry import get_model
-log = get_logger("api.inference")
-router = APIRouter(prefix="/inference", tags=["inference"])
-_engine = InferenceEngine()
-# ── Single run ───────────────────────────────────────────────────────────────
-@router.post("/run", response_model=InferenceResult)
-async def run_inference(body: InferenceRequest) -> InferenceResult:
-    """Execute one full inference pass and return the complete result."""
-    model = await get_model(body.model_id)
-    if not model:
-        raise HTTPException(status_code=404, detail=f"Model '{body.model_id}' not found")
-    result = await _engine.run(body, model)
-    if result.status == "error":
-        raise HTTPException(status_code=500, detail=result.error or "Inference failed")
-    await record(body, result, model.name)
-    return result
-# ── SSE stream ───────────────────────────────────────────────────────────────
-@router.post("/stream")
-async def stream_inference(body: InferenceRequest) -> StreamingResponse:
-    """
-    Server-Sent Events stream.
-    Emits one JSON event per pipeline stage as it completes, then a final
-    'done' event with the full InferenceResult.
-    Client usage:
-        const es = new EventSource('/inference/stream');  // POST via fetch + EventSource polyfill
-        es.onmessage = e => console.log(JSON.parse(e.data));
-    """
-    model = await get_model(body.model_id)
-    if not model:
-        raise HTTPException(status_code=404, detail=f"Model '{body.model_id}' not found")
-    queue: asyncio.Queue[str | None] = asyncio.Queue()
-    async def _producer() -> None:
-        """Run inference while pushing SSE events into the queue."""
-        try:
-            # Patch engine to emit stage events
-            result = await _engine_stream(body, model, queue)
-            await record(body, result, model.name)
-            # Final complete event
-            await queue.put(
-                f"event: done\ndata: {result.model_dump_json()}\n\n"
-            )
-        except Exception as exc:
-            await queue.put(
-                f"event: error\ndata: {json.dumps({'error': str(exc)})}\n\n"
-            )
-        finally:
-            await queue.put(None)   # sentinel
-    asyncio.create_task(_producer())
-    async def _generator():
-        while True:
-            msg = await queue.get()
-            if msg is None:
-                break
-            yield msg
-    return StreamingResponse(
-        _generator(),
-        media_type="text/event-stream",
-        headers={
-            "Cache-Control": "no-cache",
-            "X-Accel-Buffering": "no",
-        },
-    )
-async def _engine_stream(
-    req: InferenceRequest,
-    model,
-    queue: asyncio.Queue,
-) -> InferenceResult:
-    """
-    Run inference and push a 'stage' SSE event for each PipelineStage.
-    Falls back to a simple full run if streaming is not distinguishable.
-    """
-    # Run full pipeline
-    result = await _engine.run(req, model)
-    # Emit one event per stage (replay after completion)
-    for stage in result.pipeline:
-        payload = json.dumps({
-            "type": "stage",
-            "stage": stage.model_dump(),
-            "ts": time.time(),
-        })
-        await queue.put(f"data: {payload}\n\n")
-        await asyncio.sleep(0)    # yield
-    # Emit vitals snapshot
-    vitals_payload = json.dumps({
-        "type": "vitals",
-        "latency_ms": result.inference_ms,
-        "total_ms":   result.total_ms,
-        "quality":    result.quality_score,
-    })
-    await queue.put(f"data: {vitals_payload}\n\n")
-    return result
-# ── History ──────────────────────────────────────────────────────────────────
-@router.get("/history", response_model=list[InferenceHistoryEntry])
-async def inference_history(limit: int = 50) -> list[InferenceHistoryEntry]:
-    return await get_history(limit=min(limit, 200))
-@router.delete("/history", status_code=204, response_model=None)
-async def clear_inference_history():
-    await clear_history()
-    return Response(status_code=204)
-# ── Model cache ──────────────────────────────────────────────────────────────
-@router.get("/cache")
-async def cache_status() -> dict[str, bool]:
-    return get_cache_status()
-@router.delete("/cache/{model_id}", status_code=204, response_model=None)
-async def evict_from_cache(model_id: str):
-    evicted = evict_model(model_id)
-    if not evicted:
-        raise HTTPException(status_code=404, detail="Model not in cache")
-    return Response(status_code=204)

api/routes/jobs.py DELETED Viewed

@@ -1,56 +0,0 @@
-"""
-api/routes/jobs.py — /jobs & /download endpoints.
-"""
-from __future__ import annotations
-from fastapi import APIRouter, HTTPException
-from download.manager import cancel_job, enqueue_download, get_job, list_jobs
-from models.job import Job, JobCreate
-from observability.logger import audit, get_logger
-from registry.registry import get_model
-log = get_logger("api.jobs")
-router = APIRouter(tags=["jobs"])
-@router.post("/download", response_model=Job, status_code=202)
-async def trigger_download(body: JobCreate) -> Job:
-    """Enqueue a model download. Returns the created job immediately."""
-    model = await get_model(body.model_id)
-    if not model:
-        raise HTTPException(status_code=404, detail=f"Model '{body.model_id}' not found")
-    if model.downloaded:
-        raise HTTPException(status_code=409, detail="Model is already cached locally")
-    job_id = await enqueue_download(
-        model_id=body.model_id,
-        model_name=body.model_name,
-        version=body.version,
-    )
-    job = await get_job(job_id)
-    if not job:
-        raise HTTPException(status_code=500, detail="Job creation failed")
-    await audit("api_download_trigger", model_id=body.model_id, job_id=job_id)
-    return job
-@router.get("/jobs", response_model=list[Job])
-async def jobs_list(status: str | None = None, limit: int = 50) -> list[Job]:
-    return await list_jobs(status=status, limit=limit)
-@router.get("/jobs/{job_id}", response_model=Job)
-async def job_detail(job_id: str) -> Job:
-    job = await get_job(job_id)
-    if not job:
-        raise HTTPException(status_code=404, detail=f"Job '{job_id}' not found")
-    return job
-@router.delete("/jobs/{job_id}", status_code=204, response_model=None)
-async def job_cancel(job_id: str) -> None:
-    success = await cancel_job(job_id)
-    if not success:
-        raise HTTPException(status_code=409, detail="Job cannot be cancelled")

api/routes/system.py DELETED Viewed

@@ -1,97 +0,0 @@
-"""api/routes/system.py — System metrics endpoints."""
-from __future__ import annotations
-import asyncio
-import json
-from fastapi import APIRouter, Query
-from fastapi.responses import StreamingResponse
-from models.system import SystemMetrics
-from system.metrics import sample_metrics
-router = APIRouter(prefix="/system", tags=["system"])
-@router.get("/metrics", response_model=SystemMetrics)
-async def get_metrics(gpu_index: int = Query(0, ge=0)) -> SystemMetrics:
-    payload = sample_metrics(gpu_index=gpu_index)
-    return SystemMetrics(
-        ts=payload["ts"],
-        cpu_pct=payload["cpu_pct"],
-        cpu_model=payload.get("cpu_model"),
-        cpu_freq_mhz=payload.get("cpu_freq_mhz"),
-        cpu_count=payload.get("cpu_count"),
-        ram_used_mb=payload["ram_used_mb"],
-        ram_total_mb=payload["ram_total_mb"],
-        gpu=payload.get("gpu"),
-        disks=payload.get("disks", []),
-        network=payload.get("network", []),
-    )
-@router.get("/metrics/stream")
-async def stream_metrics(
-    gpu_index: int = Query(0, ge=0),
-    hz: float = Query(2.0, ge=0.2, le=20.0),
-):
-    """Server-Sent Events stream of system metrics."""
-    interval = 1.0 / float(hz)
-    async def gen():
-        # Initial comment helps some proxies establish the stream
-        yield ": connected\n\n"
-        while True:
-            try:
-                payload = sample_metrics(gpu_index=gpu_index)
-                # Ensure the payload is valid JSON and wrapped in data: format
-                data = json.dumps(payload)
-                yield f"data: {data}\n\n"
-            except Exception as e:
-                # Log error but keep stream alive
-                print(f"Metrics streaming error: {e}")
-            await asyncio.sleep(interval)
-    return StreamingResponse(
-        gen(),
-        media_type="text/event-stream",
-        headers={
-            "Cache-Control": "no-cache",
-            "X-Accel-Buffering": "no",
-            "Connection": "keep-alive",
-            "Transfer-Encoding": "chunked",
-        },
-    )
-@router.get("/logs/stream")
-async def stream_system_logs():
-    """SSE stream of global system and gateway logs."""
-    from observability.logger import _sys_log_subs
-    q: asyncio.Queue = asyncio.Queue()
-    _sys_log_subs.append(q)
-    async def generator():
-        yield ": connected\n\n"
-        try:
-            while True:
-                try:
-                    entry = await asyncio.wait_for(q.get(), timeout=30.0)
-                except asyncio.TimeoutError:
-                    yield ": heartbeat\n\n"
-                    continue
-                if entry is None:
-                    break
-                yield f"data: {json.dumps(entry)}\n\n"
-        finally:
-            if q in _sys_log_subs:
-                _sys_log_subs.remove(q)
-    return StreamingResponse(
-        generator(),
-        media_type="text/event-stream",
-        headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
-    )

api/routes/training.py DELETED Viewed

@@ -1,428 +0,0 @@
-"""
-api/routes/training.py — Training Engine REST + SSE endpoints.
-POST /train/start                    — create and launch a training run
-POST /train/stop                     — cancel a running run
-POST /train/pause                    — pause a running run
-POST /train/resume                   — resume a paused run
-GET  /train/status                   — run status + progress snapshot
-GET  /train/runs                     — list all runs
-GET  /train/runs/{run_id}            — single run detail
-GET  /train/schema                   — UI schema for task/model/dataset combo
-GET  /train/checkpoints              — checkpoints for a run  (stub)
-POST /train/checkpoints/{id}/export  — export a checkpoint   (stub)
-GET  /train/metrics/stream           — SSE: real-time metrics ticks
-GET  /train/logs/stream              — SSE: real-time log entries
-GET  /train/resources/stream         — SSE: real-time resource ticks
-"""
-from __future__ import annotations
-import asyncio
-import json
-import time
-import os
-from fastapi import APIRouter, HTTPException, Query
-from fastapi.responses import StreamingResponse
-from observability.logger import get_logger
-from training import run_manager
-from training.schema_engine import generate_schema
-from training.schemas import (
-    CheckpointOut,
-    PauseTrainRequest,
-    ResumeTrainRequest,
-    StartTrainRequest,
-    StartTrainResponse,
-    StopTrainRequest,
-    TrainRunOut,
-    TrainStatusResponse,
-    TrainingSchemaResponse,
-)
-log = get_logger("api.training")
-router = APIRouter(prefix="/train", tags=["training"])
-# ── Helpers ────────────────────────────────────────────────────────────────────
-def _format_duration(seconds: float) -> str:
-    h = int(seconds // 3600)
-    m = int((seconds % 3600) // 60)
-    s = int(seconds % 60)
-    return f"{h}h {m}m {s}s"
-def _run_to_out(run: run_manager.TrainRun) -> TrainRunOut:
-    elapsed = (run.completed_at or time.time()) - run.created_at
-    return TrainRunOut(
-        id=run.run_id,
-        run_number=run.run_number,
-        model_id=run.model_id,
-        model_name=run.model_name,
-        dataset_id=run.dataset_id,
-        dataset_name=run.dataset_name,
-        task=run.task,
-        status=run.status,
-        epochs_done=run.epoch,
-        total_epochs=run.total_epochs,
-        best_metric=run.best_metric,
-        final_loss=run.final_loss,
-        duration=_format_duration(elapsed),
-        created_at=run.created_at,
-        completed_at=run.completed_at,
-        hyperparams=run.hyperparams,
-    )
-# ── Control endpoints ─────────────────────────────────────────────────────────
-@router.post("/start", response_model=StartTrainResponse)
-async def start_training(body: StartTrainRequest) -> StartTrainResponse:
-    """Create and immediately launch a training run."""
-    # Resolve friendly names (fall back to ids if registries unavailable)
-    model_name   = body.model_id
-    dataset_name = body.dataset_id
-    try:
-        from registry.registry import get_model
-        m = await get_model(body.model_id)
-        if m:
-            model_name = m.name
-    except Exception:
-        pass
-    try:
-        from datasets.registry import get_dataset
-        d = await get_dataset(body.dataset_id)
-        if d:
-            dataset_name = d.get("name", body.dataset_id) if isinstance(d, dict) else getattr(d, "name", body.dataset_id)
-    except Exception:
-        pass
-    run = run_manager.create_run(
-        model_id=body.model_id,
-        model_name=model_name,
-        dataset_id=body.dataset_id,
-        dataset_name=dataset_name,
-        task=body.task,
-        hyperparams=body.hyperparams,
-        augmentation=body.augmentation,
-        scheduler=body.scheduler,
-        project_id=body.project_id
-    )
-    run_manager.start_run(run)
-    log.info("training_started", run_id=run.run_id, model=body.model_id)
-    return StartTrainResponse(
-        run_id=run.run_id,
-        status=run.status,
-        message=f"Training run {run.run_id} started.",
-    )
-@router.post("/stop", status_code=200)
-async def stop_training(body: StopTrainRequest) -> dict:
-    run = run_manager.get_run(body.run_id)
-    if not run:
-        raise HTTPException(status_code=404, detail=f"Run '{body.run_id}' not found")
-    run_manager.stop_run(run)
-    log.info("training_stopped", run_id=body.run_id)
-    return {"run_id": body.run_id, "status": run.status}
-@router.post("/pause", status_code=200)
-async def pause_training(body: PauseTrainRequest) -> dict:
-    run = run_manager.get_run(body.run_id)
-    if not run:
-        raise HTTPException(status_code=404, detail=f"Run '{body.run_id}' not found")
-    run_manager.pause_run(run)
-    return {"run_id": body.run_id, "status": run.status}
-@router.post("/resume", status_code=200)
-async def resume_training(body: ResumeTrainRequest) -> dict:
-    run = run_manager.get_run(body.run_id)
-    if not run:
-        raise HTTPException(status_code=404, detail=f"Run '{body.run_id}' not found")
-    run_manager.resume_run(run)
-    return {"run_id": body.run_id, "status": run.status}
-@router.get("/status", response_model=TrainStatusResponse)
-async def get_train_status(run_id: str = Query(...)) -> TrainStatusResponse:
-    run = run_manager.get_run(run_id)
-    if not run:
-        raise HTTPException(status_code=404, detail=f"Run '{run_id}' not found")
-    return TrainStatusResponse(
-        run_id=run.run_id,
-        status=run.status,
-        epoch=run.epoch,
-        total_epochs=run.total_epochs,
-        step=run.step,
-        total_steps=run.total_epochs * 100,
-        eta_seconds=run.eta_seconds,
-        elapsed_seconds=run.elapsed_seconds,
-    )
-# ── Run history ───────────────────────────────────────────────────────────────
-@router.get("/runs", response_model=list[TrainRunOut])
-async def list_runs() -> list[TrainRunOut]:
-    return [_run_to_out(r) for r in reversed(run_manager.list_runs())]
-@router.get("/runs/{run_id}", response_model=TrainRunOut)
-async def get_run(run_id: str) -> TrainRunOut:
-    run = run_manager.get_run(run_id)
-    if not run:
-        raise HTTPException(status_code=404, detail=f"Run '{run_id}' not found")
-    return _run_to_out(run)
-# ── Schema Engine ─────────────────────────────────────────────────────────────
-@router.get("/schema", response_model=TrainingSchemaResponse)
-async def get_schema(
-    model_id:   str = Query(""),
-    dataset_id: str = Query(""),
-    task:       str = Query("detection"),
-) -> TrainingSchemaResponse:
-    schema = generate_schema(task=task, model_id=model_id, dataset_id=dataset_id)
-    return TrainingSchemaResponse(**schema)
-# ── Checkpoints (stub — extend when artifact storage is wired) ────────────────
-@router.get("/checkpoints", response_model=list[CheckpointOut])
-async def list_checkpoints(run_id: str = Query(...)) -> list[CheckpointOut]:
-    """Returns an empty list until checkpoint persistence is implemented."""
-    run = run_manager.get_run(run_id)
-    if not run:
-        raise HTTPException(status_code=404, detail=f"Run '{run_id}' not found")
-    return []
-@router.post("/checkpoints/{checkpoint_id}/export")
-async def export_checkpoint(checkpoint_id: str, body: dict = {}) -> dict:
-    raise HTTPException(status_code=501, detail="Checkpoint export not yet implemented")
-# ── SSE: Metrics stream ────────────────────────────────────────────────────────
-@router.get("/metrics/stream")
-async def stream_metrics(run_id: str = Query(...)) -> StreamingResponse:
-    """
-    Server-Sent Events stream of TrainMetricsTick objects.
-    Connects to the run's metrics queue and forwards each tick as SSE.
-    Stream closes when the run finishes (sentinel None pushed by worker).
-    """
-    run = run_manager.get_run(run_id)
-    if not run:
-        raise HTTPException(status_code=404, detail=f"Run '{run_id}' not found")
-    q: asyncio.Queue = asyncio.Queue()
-    run.metrics_subs.append(q)
-    async def generator():
-        yield ": connected\n\n"
-        try:
-            while True:
-                try:
-                    tick = await asyncio.wait_for(q.get(), timeout=30.0)
-                except asyncio.TimeoutError:
-                    # Heartbeat to keep connection alive
-                    yield ": heartbeat\n\n"
-                    continue
-                if tick is None:
-                    break
-                yield f"data: {json.dumps(tick)}\n\n"
-        finally:
-            if q in run.metrics_subs:
-                run.metrics_subs.remove(q)
-    return StreamingResponse(
-        generator(),
-        media_type="text/event-stream",
-        headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
-    )
-# ── SSE: Logs stream ──────────────────────────────────────────────────────────
-@router.get("/logs/stream")
-async def stream_logs(run_id: str = Query(...)) -> StreamingResponse:
-    """Server-Sent Events stream of LogEntry objects."""
-    run = run_manager.get_run(run_id)
-    if not run:
-        raise HTTPException(status_code=404, detail=f"Run '{run_id}' not found")
-    q: asyncio.Queue = asyncio.Queue()
-    run.log_subs.append(q)
-    async def generator():
-        yield ": connected\n\n"
-        try:
-            while True:
-                try:
-                    entry = await asyncio.wait_for(q.get(), timeout=30.0)
-                except asyncio.TimeoutError:
-                    yield ": heartbeat\n\n"
-                    continue
-                if entry is None:
-                    break
-                yield f"data: {json.dumps(entry)}\n\n"
-        finally:
-            if q in run.log_subs:
-                run.log_subs.remove(q)
-    return StreamingResponse(
-        generator(),
-        media_type="text/event-stream",
-        headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
-    )
-@router.get("/runs/{run_id}/history")
-async def get_run_history(run_id: str) -> list[dict]:
-    """Retrieves the full historical telemetry (metrics ticks) for a run."""
-    run = run_manager.get_run(run_id)
-    if not run:
-        raise HTTPException(status_code=404, detail=f"Run '{run_id}' not found")
-    from training.persistence import TrainingPersistence
-    run_dir = await TrainingPersistence.get_run_dir(run.project_id or "default", run_id)
-    telemetry_path = os.path.join(run_dir, "telemetry.jsonl")
-    history = []
-    if os.path.exists(telemetry_path):
-        try:
-            with open(telemetry_path, "r") as f:
-                for line in f:
-                    if line.strip():
-                        history.append(json.loads(line))
-        except Exception as e:
-            log.error("history_read_failed", run_id=run_id, error=str(e))
-            raise HTTPException(status_code=500, detail="Failed to read telemetry history")
-    return history
-@router.get("/runs/{run_id}/artifacts")
-async def list_run_artifacts(run_id: str) -> dict:
-    """Lists available artifacts (images) for a specific run by scanning the directory."""
-    run = run_manager.get_run(run_id)
-    if not run:
-        raise HTTPException(status_code=404, detail=f"Run '{run_id}' not found")
-    from training.persistence import TrainingPersistence
-    run_dir = await TrainingPersistence.get_run_dir(run.project_id or "default", run_id)
-    if not os.path.exists(run_dir):
-        return {"artifacts": [], "batches": []}
-    artifacts = []
-    batches = []
-    # Standard YOLO artifact mappings for better UI titles
-    titles = {
-        "confusion_matrix.png": "Confusion Matrix",
-        "confusion_matrix_normalized.png": "Confusion Matrix (Norm)",
-        "results.png": "Results Summary",
-        "F1_curve.png": "F1 Curve",
-        "PR_curve.png": "PR Curve",
-        "P_curve.png": "Precision Curve",
-        "R_curve.png": "Recall Curve",
-        "BoxF1_curve.png": "Box F1 Curve",
-        "BoxP_curve.png": "Box Precision Curve",
-        "BoxPR_curve.png": "Box PR Curve",
-        "BoxR_curve.png": "Box Recall Curve",
-        "labels.jpg": "Labels Distribution",
-        "labels_correlogram.jpg": "Labels Correlogram"
-    }
-    for f in os.listdir(run_dir):
-        path = f"/train/runs/{run_id}/files/{f}"
-        if f.endswith(('.png', '.jpg', '.jpeg')):
-            item = {
-                "title": titles.get(f, f.replace('_', ' ').title().split('.')[0]),
-                "path": path,
-                "type": "Analysis"
-            }
-            if "batch" in f.lower():
-                item["type"] = "Batch Preview" if "val" in f.lower() else "Augmentation"
-                batches.append(item)
-            else:
-                if "curve" in f.lower():
-                    item["type"] = "Precision-Recall"
-                elif "confusion" in f.lower():
-                    item["type"] = "Analysis"
-                elif "results" in f.lower():
-                    item["type"] = "Overall"
-                artifacts.append(item)
-    return {
-        "artifacts": sorted(artifacts, key=lambda x: x['title']),
-        "batches": sorted(batches, key=lambda x: x['title'])
-    }
-@router.get("/runs/{run_id}/files/{filename}")
-async def get_run_file(run_id: str, filename: str):
-    """Serves a specific file from the run directory."""
-    run = run_manager.get_run(run_id)
-    if not run:
-        raise HTTPException(status_code=404, detail="Run not found")
-    # We need to find the project to get the run_dir
-    # Since run_manager doesn't easily expose the full path in memory,
-    # we recalculate it using persistence
-    from training.persistence import TrainingPersistence
-    run_dir = await TrainingPersistence.get_run_dir(run.project_id or "default", run_id)
-    file_path = os.path.join(run_dir, filename)
-    if not os.path.exists(file_path):
-        raise HTTPException(status_code=404, detail="File not found")
-    from fastapi.responses import FileResponse
-    return FileResponse(file_path)
-# The frontend uses /system/metrics/stream for resources (already implemented).
-# This alias exists for training-scoped resource monitoring.
-@router.get("/resources/stream")
-async def stream_resources(
-    run_id:    str   = Query(...),
-    gpu_index: int   = Query(0, ge=0),
-    hz:        float = Query(1.0, ge=0.2, le=10.0),
-) -> StreamingResponse:
-    """
-    SSE stream of ResourceTick objects for a specific training run.
-    Forwards system metrics at the requested hz rate.
-    """
-    run = run_manager.get_run(run_id)
-    if not run:
-        raise HTTPException(status_code=404, detail=f"Run '{run_id}' not found")
-    q: asyncio.Queue = asyncio.Queue()
-    run.resource_subs.append(q)
-    interval = 1.0 / hz
-    async def generator():
-        yield ": connected\n\n"
-        try:
-            while True:
-                try:
-                    tick = await asyncio.wait_for(q.get(), timeout=30.0)
-                except asyncio.TimeoutError:
-                    yield ": heartbeat\n\n"
-                    continue
-                if tick is None:
-                    break
-                yield f"data: {json.dumps(tick)}\n\n"
-        finally:
-            if q in run.resource_subs:
-                run.resource_subs.remove(q)
-    return StreamingResponse(
-        generator(),
-        media_type="text/event-stream",
-        headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
-    )

benchmark/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- # benchmark — Benchmark Bridge System for MLForge

benchmark/__pycache__/__init__.cpython-310.pyc DELETED Viewed

Binary file (144 Bytes)

benchmark/__pycache__/compatibility.cpython-310.pyc DELETED Viewed

Binary file (8.3 kB)

benchmark/__pycache__/execution.cpython-310.pyc DELETED Viewed

Binary file (10.4 kB)

benchmark/__pycache__/metrics.cpython-310.pyc DELETED Viewed

Binary file (3.24 kB)

benchmark/__pycache__/orchestrator.cpython-310.pyc DELETED Viewed

Binary file (9.11 kB)

benchmark/__pycache__/registry.cpython-310.pyc DELETED Viewed

Binary file (8.77 kB)

benchmark/__pycache__/telemetry.cpython-310.pyc DELETED Viewed

Binary file (6.73 kB)

benchmark/adapters/__pycache__/base.cpython-310.pyc DELETED Viewed

Binary file (1.8 kB)

benchmark/adapters/__pycache__/registry.cpython-310.pyc DELETED Viewed

Binary file (1.89 kB)

benchmark/adapters/__pycache__/torch_runner.cpython-310.pyc DELETED Viewed

Binary file (1.93 kB)

benchmark/adapters/base.py DELETED Viewed

@@ -1,38 +0,0 @@
-"""
-benchmark/adapters/base.py — Base class for all Benchmark Runners.
-"""
-from __future__ import annotations
-from abc import ABC, abstractmethod
-from dataclasses import dataclass, field
-from typing import Any, AsyncGenerator
-from models.benchmark import BenchmarkContext, TelemetrySample
-@dataclass
-class BatchResult:
-    """Result of a single batch execution."""
-    latency_ms: float
-    vram_used_gb: float
-    task_scores: dict[str, float] = field(default_factory=dict)
-    metadata: dict[str, Any] = field(default_factory=dict)
-class BaseRunner(ABC):
-    """Abstract interface for benchmark executors (Torch, Optimum, vLLM)."""
-    @abstractmethod
-    async def initialize(self, ctx: BenchmarkContext, model_path: str) -> None:
-        """Load model and prepare environment."""
-        pass
-    @abstractmethod
-    async def run_batch(self, batch: Any) -> BatchResult:
-        """Execute a single batch of data."""
-        pass
-    @abstractmethod
-    async def shutdown(self) -> None:
-        """Release resources."""
-        pass

benchmark/adapters/optimum_runner.py DELETED Viewed

@@ -1,53 +0,0 @@
-"""
-benchmark/adapters/optimum_runner.py — Hugging Face Optimum Adapter.
-Supports ONNX, OpenVINO, and TensorRT acceleration.
-"""
-from __future__ import annotations
-import time
-import asyncio
-from typing import Any
-from benchmark.adapters.base import BaseRunner, BatchResult
-from models.benchmark import BenchmarkContext
-from observability.logger import get_logger
-log = get_logger("benchmark.optimum")
-class OptimumRunner(BaseRunner):
-    def __init__(self):
-        self.session = None
-        self.device = "cpu"
-    async def initialize(self, ctx: BenchmarkContext, model_path: str) -> None:
-        """
-        Load model using Optimum's ORTModel or equivalent.
-        In a real implementation, this would detect the framework and use:
-        ORTModelForFeatureExtraction.from_pretrained(model_path, provider=...)
-        """
-        log.info("optimum_init", model_path=model_path, hardware=ctx.hardware)
-        self.device = "cuda" if "gpu" in ctx.hardware.lower() or "rtx" in ctx.hardware.lower() else "cpu"
-        # Simulate load time
-        await asyncio.sleep(1.5)
-        self.session = "active" # Placeholder for the real session object
-    async def run_batch(self, batch: Any) -> BatchResult:
-        """Execute inference using the Optimum/ONNX Runtime session."""
-        if not self.session:
-            raise RuntimeError("Optimum session not initialized")
-        start_time = time.perf_counter()
-        # Mocking inference logic
-        # outputs = self.session(**batch)
-        await asyncio.sleep(0.01) # Simulated inference time
-        latency = (time.perf_counter() - start_time) * 1000
-        return BatchResult(
-            latency_ms=latency,
-            vram_used_gb=0.8, # Mocked
-            task_scores={"accuracy": 0.92} # Mocked
-        )
-    async def shutdown(self) -> None:
-        log.info("optimum_shutdown")
-        self.session = None

benchmark/adapters/registry.py DELETED Viewed

@@ -1,44 +0,0 @@
-"""
-benchmark/adapters/registry.py — Executor Registry for dynamic runner resolution.
-"""
-from __future__ import annotations
-from typing import Type
-from benchmark.adapters.base import BaseRunner
-from models.benchmark import BenchmarkContext
-from models.model import Model
-class ExecutorRegistry:
-    _runners: dict[str, Type[BaseRunner]] = {}
-    @classmethod
-    def register(cls, framework: str, runner_cls: Type[BaseRunner]):
-        cls._runners[framework.lower()] = runner_cls
-    @classmethod
-    def get_runner(cls, framework: str) -> BaseRunner:
-        runner_cls = cls._runners.get(framework.lower())
-        if not runner_cls:
-            # Fallback or default runner
-            from benchmark.adapters.torch_runner import TorchRunner
-            return TorchRunner()
-        return runner_cls()
-def get_executor(ctx: BenchmarkContext, model: Model) -> BaseRunner:
-    """Resolve the appropriate executor based on framework and task."""
-    framework = model.framework.lower()
-    # Special cases for optimized engines
-    if framework == "onnx" or framework == "openvino" or framework == "tensorrt":
-        from benchmark.adapters.optimum_runner import OptimumRunner
-        return OptimumRunner()
-    if ctx.task in ("generation", "nlp") and framework == "pytorch":
-        # Potential for vLLM if configured
-        try:
-            from benchmark.adapters.vllm_runner import VLLMRunner
-            return VLLMRunner()
-        except ImportError:
-            pass
-    return ExecutorRegistry.get_runner(framework)

benchmark/adapters/torch_runner.py DELETED Viewed

@@ -1,45 +0,0 @@
-"""
-benchmark/adapters/torch_runner.py — PyTorch Runner Adapter.
-Wraps standard PyTorch inference for Vision and NLP tasks.
-"""
-from __future__ import annotations
-import time
-import asyncio
-import random
-from typing import Any
-from benchmark.adapters.base import BaseRunner, BatchResult
-from models.benchmark import BenchmarkContext
-from observability.logger import get_logger
-log = get_logger("benchmark.torch")
-class TorchRunner(BaseRunner):
-    def __init__(self):
-        self.model = None
-        self.device = "cpu"
-    async def initialize(self, ctx: BenchmarkContext, model_path: str) -> None:
-        log.info("torch_init", model_path=model_path, hardware=ctx.hardware)
-        # In production: self.model = torch.load(model_path).to(self.device)
-        await asyncio.sleep(1.0)
-        self.model = "active"
-    async def run_batch(self, batch: Any) -> BatchResult:
-        if not self.model:
-            raise RuntimeError("Torch model not initialized")
-        start_time = time.perf_counter()
-        # Mocking torch inference
-        await asyncio.sleep(0.02)
-        latency = (time.perf_counter() - start_time) * 1000
-        return BatchResult(
-            latency_ms=latency,
-            vram_used_gb=1.2,
-            task_scores={"mAP": 0.45}
-        )
-    async def shutdown(self) -> None:
-        log.info("torch_shutdown")
-        self.model = None

benchmark/compatibility.py DELETED Viewed

@@ -1,360 +0,0 @@
-"""
-benchmark/compatibility.py — Compatibility Validator (CRITICAL MODULE).
-Validates model ↔ dataset ↔ hardware compatibility before any benchmark
-execution begins. Returns a structured ValidationReport — never raises.
-Five gates (all must pass):
-  A. Task compatibility       — model.task matches dataset.task
-  B. Annotation format        — dataset format supports the model's task
-  C. Framework × hardware     — framework can run on the requested device
-  D. VRAM constraint          — estimated memory fits available VRAM
-  E. Precision support        — precision mode is valid for framework + hardware
-"""
-from __future__ import annotations
-from models.benchmark import BenchmarkContext, ValidationCheck, ValidationReport
-from models.dataset import Dataset
-from models.model import Model
-from observability.logger import get_logger
-log = get_logger("benchmark.compatibility")
-# ── Lookup tables ─────────────────────────────────────────────────────────────
-# Hardware → available VRAM in GB (normalized keys, no spaces/dashes)
-HARDWARE_VRAM_GB: dict[str, float] = {
-    # NVIDIA consumer — Ampere / Ada
-    "rtx4090":   24.0,
-    "rtx4080":   16.0,
-    "rtx4070ti": 12.0,
-    "rtx4070":   12.0,
-    "rtx4060ti":  8.0,
-    "rtx4060":    8.0,
-    "rtx3090":   24.0,
-    "rtx3080":   10.0,
-    "rtx3070":    8.0,
-    "rtx3060":   12.0,
-    "rtx2080ti": 11.0,
-    "rtx2080":    8.0,
-    # NVIDIA datacenter
-    "a100":      80.0,
-    "a10040gb":  40.0,
-    "h100":      80.0,
-    "v100":      32.0,
-    "t4":        16.0,
-    "a10":       24.0,
-    # AMD
-    "rx7900xtx": 24.0,
-    "rx6800xt":  16.0,
-    # Generic fallbacks
-    "gpu":        8.0,
-    "cpu":        0.0,
-    "tpu":        0.0,
-    "edge":       0.0,
-}
-# model.task → set of compatible dataset.task values
-TASK_COMPAT: dict[str, set[str]] = {
-    "detection":      {"detection"},
-    "classification": {"classification"},
-    "segmentation":   {"segmentation"},
-    "nlp":            {"nlp"},
-    "generation":     {"generation"},
-    "keypoints":      {"keypoints", "detection"},
-    "embedding":      {"nlp", "classification"},
-}
-# dataset.format → set of model tasks it supports
-FORMAT_TASK_COMPAT: dict[str, set[str]] = {
-    "yolo":      {"detection", "segmentation", "keypoints"},
-    "coco":      {"detection", "segmentation", "keypoints"},
-    "voc":       {"detection"},
-    "csv":       {"classification"},
-    "json":      {"detection", "segmentation", "classification", "nlp", "generation"},
-    "tfrecord":  {"detection", "classification", "segmentation"},
-    "custom":    {"detection", "classification", "segmentation", "nlp", "generation", "keypoints"},
-}
-# model.framework → set of hardware targets (normalized) it can run on
-FRAMEWORK_HARDWARE_COMPAT: dict[str, set[str]] = {
-    "pytorch": {
-        "cpu", "gpu",
-        "rtx4090", "rtx4080", "rtx4070ti", "rtx4070", "rtx4060ti", "rtx4060",
-        "rtx3090", "rtx3080", "rtx3070", "rtx3060",
-        "rtx2080ti", "rtx2080",
-        "a100", "a10040gb", "h100", "v100", "t4", "a10",
-    },
-    "onnx": {
-        "cpu", "gpu",
-        "rtx4090", "rtx3090", "a100", "h100", "t4", "a10",
-        "edge",
-    },
-    "tensorflow": {
-        "cpu", "gpu",
-        "rtx4090", "rtx3090", "a100", "h100", "v100", "t4",
-        "tpu",
-    },
-    "tflite": {"cpu", "edge"},
-    "coreml": {"cpu"},
-}
-# Precisions that require GPU
-_GPU_ONLY_PRECISIONS = {"FP16", "BF16"}
-# Frameworks supporting INT8 quantization
-_INT8_FRAMEWORKS = {"onnx", "tflite", "pytorch", "tensorflow"}
-class CompatibilityValidator:
-    """
-    Runs all compatibility gates before a benchmark job is created.
-    Returns a ValidationReport — never raises exceptions.
-    """
-    def validate(
-        self,
-        model: Model,
-        dataset: Dataset,
-        ctx: BenchmarkContext,
-    ) -> ValidationReport:
-        checks: list[ValidationCheck] = [
-            self._check_task(model, dataset),
-            self._check_annotation_format(model, dataset),
-            self._check_framework_hardware(model, ctx),
-            self._check_vram(model, ctx),
-            self._check_precision(model, ctx),
-        ]
-        errors   = [c.detail for c in checks if not c.passed]
-        warnings: list[str] = []
-        log.info(
-            "compatibility_validated",
-            model_id   = model.id,
-            dataset_id = dataset.id,
-            passed     = len(errors) == 0,
-            error_count = len(errors),
-        )
-        return ValidationReport(
-            model_id   = model.id,
-            dataset_id = dataset.id,
-            passed     = len(errors) == 0,
-            checks     = checks,
-            errors     = errors,
-            warnings   = warnings,
-        )
-    # ── Gate A: Task ────────────────────��─────────────────────────────────────
-    def _check_task(self, model: Model, dataset: Dataset) -> ValidationCheck:
-        model_task   = model.task.lower().strip()
-        dataset_task = str(dataset.task).lower().strip()
-        allowed = TASK_COMPAT.get(model_task, {model_task})
-        if dataset_task in allowed:
-            return ValidationCheck(
-                name   = "task_compatibility",
-                passed = True,
-                detail = (
-                    f"Model task '{model_task}' is compatible "
-                    f"with dataset task '{dataset_task}'"
-                ),
-            )
-        return ValidationCheck(
-            name       = "task_compatibility",
-            passed     = False,
-            detail     = (
-                f"Model task '{model_task}' cannot evaluate "
-                f"a '{dataset_task}' dataset"
-            ),
-            suggestion = (
-                f"Select a model with task='{dataset_task}', "
-                f"or choose a dataset with task='{model_task}'"
-            ),
-        )
-    # ── Gate B: Annotation Format ─────────────────────────────────────────────
-    def _check_annotation_format(self, model: Model, dataset: Dataset) -> ValidationCheck:
-        dataset_fmt  = str(dataset.format).lower().strip()
-        model_task   = model.task.lower().strip()
-        supported    = FORMAT_TASK_COMPAT.get(dataset_fmt, set())
-        if model_task in supported:
-            return ValidationCheck(
-                name   = "annotation_format",
-                passed = True,
-                detail = (
-                    f"Dataset format '{dataset_fmt}' supports "
-                    f"model task '{model_task}'"
-                ),
-            )
-        if model_task in {"detection", "segmentation", "keypoints"}:
-            suggestion = (
-                f"Convert dataset to YOLO or COCO format — both support '{model_task}'"
-            )
-        elif model_task == "classification":
-            suggestion = "Convert dataset to CSV or JSON format for classification tasks"
-        else:
-            suggestion = f"Use a JSON or custom-format dataset for '{model_task}' tasks"
-        return ValidationCheck(
-            name       = "annotation_format",
-            passed     = False,
-            detail     = (
-                f"Dataset format '{dataset_fmt}' does not support "
-                f"model task '{model_task}'"
-            ),
-            suggestion = suggestion,
-        )
-    # ── Gate C: Framework × Hardware ─────────────────────────────────────────
-    def _check_framework_hardware(
-        self, model: Model, ctx: BenchmarkContext
-    ) -> ValidationCheck:
-        framework = model.framework.lower().strip()
-        hw_raw    = ctx.hardware
-        hw_key    = self._normalize_hw(hw_raw)
-        supported_hw = FRAMEWORK_HARDWARE_COMPAT.get(framework, {"cpu"})
-        # Match: exact key, or generic "gpu" bucket covers any named GPU
-        hw_ok = (
-            hw_key in supported_hw
-            or ("gpu" in supported_hw and hw_key not in {"cpu", "tpu", "edge"})
-        )
-        if hw_ok:
-            return ValidationCheck(
-                name   = "framework_hardware",
-                passed = True,
-                detail = f"Framework '{framework}' is supported on '{hw_raw}'",
-            )
-        return ValidationCheck(
-            name       = "framework_hardware",
-            passed     = False,
-            detail     = (
-                f"Framework '{framework}' cannot run on '{hw_raw}'. "
-                f"Supported targets: {', '.join(sorted(supported_hw))}"
-            ),
-            suggestion = (
-                "Use ONNX runtime for broadest hardware support, "
-                f"or pick a device from: {', '.join(sorted(supported_hw))}"
-            ),
-        )
-    # ── Gate D: VRAM Constraint ───────────────────────────────────────────────
-    def _check_vram(self, model: Model, ctx: BenchmarkContext) -> ValidationCheck:
-        hw_key    = self._normalize_hw(ctx.hardware)
-        available = self._lookup_vram(hw_key)
-        if available == 0.0:
-            return ValidationCheck(
-                name   = "vram_constraint",
-                passed = True,
-                detail = f"Running on '{ctx.hardware}' (CPU/TPU/Edge) — no VRAM constraint",
-            )
-        # Estimate: weights at given precision + activations for one batch
-        model_gb  = max(model.size, 1) / (1024 ** 3)
-        prec_map  = {"FP16": 0.5, "BF16": 0.5, "INT8": 0.25, "FP32": 1.0}
-        prec_mult = prec_map.get(ctx.precision.upper(), 1.0)
-        # weights × precision + ~20% for optimizer/activation buffers + batch overhead
-        estimated = (model_gb * prec_mult * 1.2) + (ctx.batch_size * 0.05)
-        if estimated <= available:
-            return ValidationCheck(
-                name   = "vram_constraint",
-                passed = True,
-                detail = (
-                    f"Estimated VRAM {estimated:.2f} GB ≤ "
-                    f"available {available:.1f} GB on '{ctx.hardware}'"
-                ),
-            )
-        return ValidationCheck(
-            name       = "vram_constraint",
-            passed     = False,
-            detail     = (
-                f"Estimated VRAM {estimated:.2f} GB exceeds "
-                f"available {available:.1f} GB on '{ctx.hardware}'"
-            ),
-            suggestion = (
-                f"Try: reduce batch_size (now {ctx.batch_size}), "
-                f"switch to FP16/INT8 precision, "
-                f"or use a GPU with ≥ {estimated:.1f} GB VRAM"
-            ),
-        )
-    # ── Gate E: Precision Support ─────────────────────────────────────────────
-    def _check_precision(self, model: Model, ctx: BenchmarkContext) -> ValidationCheck:
-        precision = ctx.precision.upper()
-        framework = model.framework.lower().strip()
-        hw_key    = self._normalize_hw(ctx.hardware)
-        is_gpu    = hw_key not in {"cpu", "tpu", "edge"}
-        if precision in _GPU_ONLY_PRECISIONS and not is_gpu:
-            return ValidationCheck(
-                name       = "precision_support",
-                passed     = False,
-                detail     = (
-                    f"Precision '{precision}' requires a CUDA GPU; "
-                    f"'{ctx.hardware}' does not support it"
-                ),
-                suggestion = "Use FP32 for CPU inference, or switch to a compatible GPU",
-            )
-        if precision == "INT8" and framework not in _INT8_FRAMEWORKS:
-            return ValidationCheck(
-                name       = "precision_support",
-                passed     = False,
-                detail     = (
-                    f"Framework '{framework}' does not support INT8 quantization"
-                ),
-                suggestion = (
-                    "Convert model to ONNX or use PyTorch with torch.quantization"
-                ),
-            )
-        return ValidationCheck(
-            name   = "precision_support",
-            passed = True,
-            detail = (
-                f"Precision '{precision}' is valid for "
-                f"framework '{framework}' on '{ctx.hardware}'"
-            ),
-        )
-    # ── Helpers ───────────────────────────────────────────────────────────────
-    @staticmethod
-    def _normalize_hw(hardware: str) -> str:
-        """Lowercase, strip spaces/dashes/underscores for lookup."""
-        return (
-            hardware.lower()
-            .replace(" ", "")
-            .replace("-", "")
-            .replace("_", "")
-            .replace("nvidia", "")
-            .replace("geforce", "")
-        )
-    @staticmethod
-    def _lookup_vram(hw_key: str) -> float:
-        """Return VRAM GB for a normalized hardware key, with fallback matching."""
-        if hw_key in HARDWARE_VRAM_GB:
-            return HARDWARE_VRAM_GB[hw_key]
-        # Partial match (e.g. "rtx4090laptop" → "rtx4090")
-        for key, vram in HARDWARE_VRAM_GB.items():
-            if key and key in hw_key:
-                return vram
-        # Anything that looks like a GPU but isn't in the table
-        if "gpu" in hw_key or "rtx" in hw_key or "gtx" in hw_key or "cuda" in hw_key:
-            return HARDWARE_VRAM_GB["gpu"]
-        return 0.0   # CPU / unknown → no VRAM constraint

benchmark/execution.py DELETED Viewed

@@ -1,366 +0,0 @@
-"""
-benchmark/execution.py — Benchmark Execution Engine.
-Drives the batch inference loop, collecting latencies and VRAM readings.
-Calls TelemetryCollector in parallel with batch processing.
-Yields progress callbacks so the orchestrator can persist real-time state.
-Adapter pattern: swap _run_single_batch() with a real inference call
-(torch.cuda.synchronize + model(batch)) once GPU runtime is wired up.
-PRODUCTION SWAP POINTS are marked with  # <<< REPLACE IN PRODUCTION >>>
-"""
-from __future__ import annotations
-import asyncio
-import math
-import random
-from dataclasses import dataclass, field
-from typing import Awaitable, Callable
-from benchmark.compatibility import HARDWARE_VRAM_GB
-from benchmark.telemetry import TelemetryCollector
-from models.benchmark import BenchmarkJob, LayerBreakdown, TelemetrySample, TelemetrySummary
-from models.dataset import Dataset
-from models.model import Model
-from observability.logger import get_logger
-log = get_logger("benchmark.execution")
-# ── Per-image latency profiles (ms at batch=1, fp32) ─────────────────────────
-_LATENCY_MS_PER_IMAGE: dict[str, float] = {
-    "rtx4090":   1.8,
-    "rtx4080":   2.5,
-    "rtx4070ti": 3.2,
-    "rtx4070":   3.8,
-    "rtx3090":   3.0,
-    "rtx3080":   4.5,
-    "rtx3070":   6.5,
-    "rtx3060":   9.0,
-    "rtx2080ti": 5.0,
-    "rtx2080":   7.5,
-    "a100":      1.2,
-    "h100":      0.7,
-    "v100":      2.8,
-    "t4":        5.5,
-    "a10":       3.5,
-    "gpu":       8.0,
-    "cpu":      42.0,
-}
-# Precision speedup multipliers (relative to FP32)
-_PRECISION_SPEEDUP: dict[str, float] = {
-    "FP32": 1.0,
-    "FP16": 1.8,
-    "BF16": 1.7,
-    "INT8": 2.5,
-}
-# Task-specific baseline metric scores (pre-jitter)
-_TASK_BASELINES: dict[str, dict[str, float]] = {
-    "detection":      {"mAP": 0.435, "mAP_50": 0.618, "mAP_50_95": 0.435},
-    "classification": {"accuracy": 0.872, "top5": 0.968},
-    "segmentation":   {"mAP": 0.372, "iou_mean": 0.706},
-    "keypoints":      {"mAP": 0.641, "mAP_50": 0.860},
-    "nlp":            {"accuracy": 0.891},
-    "generation":     {"accuracy": 0.780},
-}
-# Cap simulated batches so large datasets don't stall the event loop
-_MAX_SIMULATED_BATCHES = 250
-@dataclass
-class ExecutionResult:
-    """Raw output from the execution engine, consumed by MetricsEngine."""
-    latencies_ms:      list[float]
-    total_images:      int
-    vram_samples:      list[float]
-    task_scores:       dict[str, float]
-    telemetry_samples: list[TelemetrySample] = field(default_factory=list)
-    telemetry_summary: TelemetrySummary = field(default_factory=TelemetrySummary)
-# Progress callback type: (progress_0_to_1, message, last_telemetry) → None
-ProgressCallback = Callable[[float, str, TelemetrySample | None], Awaitable[None]]
-class BenchmarkExecutor:
-    """
-    Drives the benchmark execution loop.
-    Non-blocking: all sleeps are asyncio.sleep so other coroutines run freely.
-    """
-    async def execute(
-        self,
-        job:         BenchmarkJob,
-        model:       Model,
-        dataset:     Dataset,
-        on_progress: ProgressCallback,
-    ) -> ExecutionResult:
-        hw          = job.hardware
-        batch_sz    = job.batch_size
-        # Handle polymorphic input duration
-        is_live = getattr(job, "input_source", "dataset") in ("video", "live")
-        if is_live:
-            # For live/video, we run for a fixed duration or until stopped
-            # Increase limit for a longer session (e.g., 10,000 batches)
-            total_img = 10000 * batch_sz
-            n_batches = 10000
-            sim_batches = 10000
-        else:
-            total_img   = max(dataset.images, 100)   # floor so simulation always runs
-            n_batches   = math.ceil(total_img / batch_sz)
-            sim_batches = min(n_batches, _MAX_SIMULATED_BATCHES)
-        vram_total = self._get_vram_gb(hw, model)
-        vram_frac  = self._vram_usage_fraction(hw)
-        telemetry = TelemetryCollector(hw, vram_total_gb=vram_total)
-        await telemetry.start()
-        latencies:    list[float] = []
-        vram_samples: list[float] = []
-        base_lat_ms = self._base_batch_latency_ms(hw, model, batch_sz, job.precision)
-        # Resolve real model path once (None → use simulation)
-        real_model_path = model.local_path if model.local_path and model.downloaded else None
-        use_real_inference = self._check_torch_available() and real_model_path is not None
-        loop = asyncio.get_event_loop()
-        try:
-            for sim_idx in range(sim_batches):
-                # Map simulated index back to real batch index
-                real_idx = int(sim_idx * (n_batches / sim_batches))
-                if use_real_inference:
-                    # Real GPU inference via torch_runner (runs in thread executor)
-                    try:
-                        from benchmark.torch_runner import run_torch_batch
-                        batch_lat_ms = await loop.run_in_executor(
-                            None,
-                            run_torch_batch,
-                            real_model_path,
-                            batch_sz,
-                            job.task,
-                        )
-                        # Add a tiny sleep to prevent event loop starvation in live mode
-                        if is_live:
-                            await asyncio.sleep(0.001)
-                    except Exception as exc:
-                        log.warning("torch_inference_failed_fallback", error=str(exc))
-                        use_real_inference = False  # fall back for remaining batches
-                        batch_lat_ms = max(
-                            0.5, base_lat_ms + random.gauss(0, base_lat_ms * 0.07)
-                        )
-                else:
-                    # Simulation path — non-blocking synthetic latency
-                    batch_lat_ms = max(
-                        0.5,
-                        base_lat_ms + random.gauss(0, base_lat_ms * 0.07),
-                    )
-                    await asyncio.sleep(batch_lat_ms / 1000.0)  # non-blocking
-                latencies.append(batch_lat_ms)
-                vram_used = vram_total * random.uniform(
-                    vram_frac - 0.05, vram_frac + 0.05
-                )
-                vram_samples.append(max(0.0, vram_used))
-                progress = (sim_idx + 1) / sim_batches
-                telemetry.record_batch_context(real_idx, progress)
-                # Throttle callbacks: every 5 batches or first/last
-                if sim_idx % 5 == 0 or sim_idx == sim_batches - 1:
-                    images_done = int(progress * total_img)
-                    # Generate simulated detection data for live preview if it's a vision task
-                    live_data = {}
-                    if job.task.lower() in ("detection", "segmentation"):
-                        # Use provided bbox telemetry if available (e.g. from real inference)
-                        # otherwise generate simulated ones
-                        live_data["detections"] = [
-                            {
-                                "x": random.uniform(0.1, 0.7),
-                                "y": random.uniform(0.1, 0.7),
-                                "width": random.uniform(0.1, 0.3),
-                                "height": random.uniform(0.1, 0.3),
-                                "label": random.choice(["person", "car", "bicycle", "dog"]),
-                                "confidence": random.uniform(0.5, 0.99)
-                            }
-                            for _ in range(random.randint(1, 5))
-                        ]
-                    last_sample = telemetry.samples[-1] if telemetry.samples else None
-                    if last_sample:
-                        last_sample.live_data = live_data
-                        # Explicitly broadcast detections for the visualizer
-                        last_sample.detections = live_data.get("detections", [])
-                    await on_progress(
-                        progress,
-                        f"Batch {real_idx+1}/{n_batches} — "
-                        f"{images_done}/{total_img} images processed",
-                        last_sample,
-                    )
-        finally:
-            telemetry_summary = await telemetry.stop()
-            # Attach simulated layer breakdown so Live Lab can display it
-            telemetry_summary.layer_breakdown = self._compute_layer_breakdown(
-                job.task, base_lat_ms
-            )
-        task_scores = self._simulate_task_scores(job.task, model, dataset)
-        log.info(
-            "execution_complete",
-            job_id        = job.id,
-            total_images  = total_img,
-            sim_batches   = sim_batches,
-            avg_lat_ms    = round(sum(latencies) / len(latencies), 2) if latencies else 0,
-        )
-        return ExecutionResult(
-            latencies_ms      = latencies,
-            total_images      = total_img,
-            vram_samples      = vram_samples,
-            task_scores       = task_scores,
-            telemetry_samples = telemetry.samples,
-            telemetry_summary = telemetry_summary,
-        )
-    # ── Helpers ───────────────────────────────────────────────────────────────
-    def _base_batch_latency_ms(
-        self,
-        hardware:  str,
-        model:     Model,
-        batch_sz:  int,
-        precision: str,
-    ) -> float:
-        """
-        Estimate per-batch latency in ms.
-        Accounts for hardware tier, model size, batch size, and precision.
-        """
-        hw_key   = self._normalize_hw(hardware)
-        per_img  = self._lookup_latency(hw_key)
-        # Larger models are slower: +30% per GB of model weights
-        size_gb      = max(model.size, 1) / (1024 ** 3)
-        size_factor  = 1.0 + size_gb * 0.30
-        # Batch parallelism: ~65% linear efficiency on GPU, 90% on CPU
-        eff       = 0.65 if "cpu" not in hw_key else 0.90
-        batch_lat = per_img * size_factor * batch_sz * eff
-        # Precision speedup
-        speedup   = _PRECISION_SPEEDUP.get(precision.upper(), 1.0)
-        return batch_lat / speedup
-    def _get_vram_gb(self, hardware: str, model: Model) -> float:
-        hw_key = self._normalize_hw(hardware)
-        for key, vram in HARDWARE_VRAM_GB.items():
-            if key and key in hw_key:
-                return vram
-        return 8.0
-    @staticmethod
-    def _vram_usage_fraction(hardware: str) -> float:
-        """Fraction of VRAM typically consumed during inference."""
-        hw = hardware.lower()
-        if any(x in hw for x in ("4090", "3090", "a100", "h100")):
-            return 0.62
-        if any(x in hw for x in ("4080", "3080", "v100", "a10")):
-            return 0.60
-        if "cpu" in hw:
-            return 0.0
-        return 0.55
-    @staticmethod
-    def _simulate_task_scores(
-        task: str, model: Model, dataset: Dataset
-    ) -> dict[str, float]:
-        """
-        Produce realistic metric scores with small per-run variance.
-        PRODUCTION SWAP: replace with actual metric computation:
-            from torchmetrics.detection import MeanAveragePrecision
-            metric = MeanAveragePrecision()
-            metric.update(predictions, targets)
-            return metric.compute()
-        """
-        baselines = dict(_TASK_BASELINES.get(task.lower(), {"accuracy": 0.80}))
-        # Small Gaussian jitter simulates run-to-run variance
-        return {
-            k: float(max(0.0, min(1.0, v + random.gauss(0, 0.015))))
-            for k, v in baselines.items()
-        }
-    @staticmethod
-    def _check_torch_available() -> bool:
-        """Return True if PyTorch is installed and importable."""
-        try:
-            import torch  # noqa: F401
-            return True
-        except ImportError:
-            return False
-    @staticmethod
-    def _compute_layer_breakdown(task: str, base_lat_ms: float) -> list[LayerBreakdown]:
-        """Build a realistic layer breakdown for the given task.
-        Splits total latency across architectural stages with small jitter.
-        PRODUCTION SWAP: replace with actual profiler data (e.g. torch.profiler).
-        """
-        if task.lower() in ("detection", "segmentation"):
-            stages = [
-                ("Backbone", 0.45),
-                ("Neck (FPN/PAFPN)", 0.30),
-                ("Detection Head", 0.20),
-                ("NMS Post-process", 0.05),
-            ]
-        elif task.lower() == "classification":
-            stages = [
-                ("Feature Extractor", 0.70),
-                ("Classifier Head", 0.20),
-                ("Softmax", 0.10),
-            ]
-        else:
-            stages = [
-                ("Encoder", 0.55),
-                ("Decoder / Head", 0.35),
-                ("Post-process", 0.10),
-            ]
-        result: list[LayerBreakdown] = []
-        remaining = base_lat_ms
-        for name, frac in stages:
-            t = round(base_lat_ms * frac + random.gauss(0, base_lat_ms * 0.01), 3)
-            result.append(LayerBreakdown(name=name, time_ms=t, percent=round(frac * 100, 1)))
-        return result
-    @staticmethod
-    def _normalize_hw(hardware: str) -> str:
-        return (
-            hardware.lower()
-            .replace(" ", "")
-            .replace("-", "")
-            .replace("_", "")
-            .replace("nvidia", "")
-            .replace("geforce", "")
-        )
-    @staticmethod
-    def _lookup_latency(hw_key: str) -> float:
-        for key, ms in _LATENCY_MS_PER_IMAGE.items():
-            if key and key in hw_key:
-                return ms
-        if any(x in hw_key for x in ("gpu", "rtx", "gtx", "cuda")):
-            return _LATENCY_MS_PER_IMAGE["gpu"]
-        return _LATENCY_MS_PER_IMAGE["cpu"]

benchmark/metrics.py DELETED Viewed

@@ -1,110 +0,0 @@
-"""
-benchmark/metrics.py — Metrics Engine.
-Computes the final BenchmarkMetrics object from raw execution data:
-  - Latency statistics (mean, p95, p99)
-  - Throughput (FPS)
-  - VRAM statistics (avg, peak)
-  - Task-specific scores (mAP, accuracy, IoU) supplied by the executor
-In a production deployment the task_scores dict comes from actual
-metric computation (e.g. pycocotools, torchmetrics).  In this local-first
-build the executor supplies realistic simulated scores.
-"""
-from __future__ import annotations
-import statistics
-from models.benchmark import BenchmarkMetrics, LayerBreakdown, TelemetrySummary
-from observability.logger import get_logger
-log = get_logger("benchmark.metrics")
-class MetricsEngine:
-    """Computes BenchmarkMetrics from raw benchmark execution data."""
-    def compute(
-        self,
-        *,
-        task:          str,
-        latencies_ms:  list[float],   # per-batch latencies
-        total_images:  int = 0,
-        total_tokens:  int = 0,
-        batch_size:    int,
-        vram_samples:  list[float],   # VRAM readings (GB) during run
-        task_scores:   dict[str, float],  # task-specific metric scores
-    ) -> BenchmarkMetrics:
-        if not latencies_ms:
-            return BenchmarkMetrics(total_images=total_images, total_tokens=total_tokens, batch_size=batch_size)
-        total_time_s = sum(latencies_ms) / 1000.0
-        fps          = total_images / total_time_s if total_time_s > 0 and total_images > 0 else 0.0
-        tps          = total_tokens / total_time_s if total_time_s > 0 and total_tokens > 0 else 0.0
-        lat_mean = statistics.mean(latencies_ms)
-        lat_p95  = _percentile(latencies_ms, 0.95)
-        lat_p99  = _percentile(latencies_ms, 0.99)
-        vram_peak = max(vram_samples) if vram_samples else 0.0
-        vram_avg  = statistics.mean(vram_samples) if vram_samples else 0.0
-        m = BenchmarkMetrics(
-            fps              = round(fps, 2),
-            tokens_per_sec   = round(tps, 2),
-            latency_mean_ms  = round(lat_mean, 3),
-            latency_p95_ms   = round(lat_p95, 3),
-            latency_p99_ms   = round(lat_p99, 3),
-            vram_peak_gb     = round(vram_peak, 3),
-            vram_avg_gb      = round(vram_avg, 3),
-            total_images     = total_images,
-            total_tokens     = total_tokens,
-            batch_size       = batch_size,
-        )
-        task_lower = task.lower()
-        # CV Task Mapping
-        if task_lower in ("detection", "segmentation", "keypoints"):
-            m.mAP       = _fmt(task_scores.get("mAP", 0.0))
-            m.mAP_50    = _fmt(task_scores.get("mAP_50", 0.0))
-            m.mAP_50_95 = _fmt(task_scores.get("mAP_50_95", 0.0))
-            if task_lower == "segmentation":
-                m.iou_mean = _fmt(task_scores.get("iou_mean", 0.0))
-        elif task_lower == "classification":
-            m.accuracy = _fmt(task_scores.get("accuracy", 0.0))
-            m.top1     = _fmt(task_scores.get("top1", 0.0))
-            m.top5     = _fmt(task_scores.get("top5", 0.0))
-        # NLP Task Mapping (ROUGE, BLEU, Perplexity)
-        elif task_lower in ("nlp", "generation"):
-            m.accuracy   = _fmt(task_scores.get("accuracy", 0.0))
-            m.rouge_l    = _fmt(task_scores.get("rouge_l", task_scores.get("rougeL", 0.0)))
-            m.bleu       = _fmt(task_scores.get("bleu", 0.0))
-            m.perplexity = task_scores.get("perplexity")
-        log.info(
-            "metrics_computed",
-            task        = task,
-            fps         = m.fps,
-            tps         = m.tokens_per_sec,
-            latency_ms  = m.latency_mean_ms,
-            vram_peak   = m.vram_peak_gb,
-        )
-        return m
-# ── Helpers ───────────────────────────────────────────────────────────────────
-def _percentile(data: list[float], p: float) -> float:
-    if not data:
-        return 0.0
-    s   = sorted(data)
-    idx = min(int(len(s) * p), len(s) - 1)
-    return s[idx]
-def _fmt(v: float) -> float:
-    """Round to 4dp and clamp to [0, 1]."""
-    return round(max(0.0, min(1.0, v)), 4)

benchmark/orchestrator.py DELETED Viewed

@@ -1,374 +0,0 @@
-"""
-benchmark/orchestrator.py — Benchmark Orchestrator (Main Controller).
-Coordinates the full benchmark lifecycle:
-  1. Resolve model + dataset from their registries
-  2. Run all compatibility checks (gates A–E)
-  3. If valid → create a BenchmarkJob in the DB
-  4. Persist the validation audit log
-  5. Enqueue async background task → execution → metrics → storage
-  6. Return the job immediately so callers are non-blocking
-Public interface used by api/routes/benchmark.py:
-  validate_context(ctx)    → ValidationReport  (no job created)
-  create_and_run(ctx)      → BenchmarkJob      (job queued, execution in background)
-"""
-from __future__ import annotations
-import asyncio
-from datetime import datetime, timezone
-from benchmark.adapters.registry import get_executor
-from benchmark.compatibility import CompatibilityValidator
-from benchmark.execution import BenchmarkExecutor
-from benchmark.metrics import MetricsEngine
-import benchmark.registry as bench_reg
-from datasets.registry import get_dataset
-from models.benchmark import (
-    BenchmarkContext,
-    BenchmarkJob,
-    BenchmarkMetrics,
-    TelemetrySummary,
-    ValidationReport,
-)
-from models.dataset import Dataset
-from models.model import Model
-from observability.logger import audit, get_logger
-from registry.registry import get_model
-log = get_logger("benchmark.orchestrator")
-# Module-level singletons — stateless, safe to share
-_validator = CompatibilityValidator()
-_metrics   = MetricsEngine()
-# job_id → asyncio.Task (for future cancellation support)
-_active_tasks: dict[str, asyncio.Task] = {}
-# ── Public API ────────────────────────────────────────────────────────────────
-async def sync_project_benchmarks() -> int:
-    """
-    Sync benchmark jobs and results from the active project's 'benchmarks' folder.
-    This ensures that benchmarks created in different sessions or projects are indexed.
-    """
-    from benchmark.registry import _get_active_project_benchmark_dir_sync
-    from projects.service import get_active_project_path
-    import json
-    import os
-    from database.connection import get_db
-    project_path = await get_active_project_path()
-    benchmark_dir = _get_active_project_benchmark_dir_sync(project_path)
-    if not benchmark_dir or not benchmark_dir.exists():
-        return 0
-    db = await get_db()
-    count = 0
-    for file_path in benchmark_dir.glob("*.json"):
-        try:
-            with open(file_path, "r") as f:
-                data = json.load(f)
-            # Check if it's a job or a result
-            if file_path.name.startswith("job_"):
-                # Upsert into benchmark_jobs
-                await db.execute(
-                    """INSERT OR IGNORE INTO benchmark_jobs
-                           (id, model_id, dataset_id, task, framework, hardware,
-                            precision, batch_size, config, status, progress, created_at, updated_at, started_at)
-                       VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)""",
-                    (
-                        data["id"], data["model_id"], data["dataset_id"],
-                        data["task"], data["framework"], data["hardware"],
-                        data["precision"], data["batch_size"],
-                        json.dumps(data["config"]), data["status"],
-                        data.get("progress", 0.0),
-                        data.get("created_at", datetime.now(timezone.utc).isoformat()),
-                        data.get("updated_at", datetime.now(timezone.utc).isoformat()),
-                        data.get("started_at")
-                    )
-                )
-                count += 1
-            elif file_path.name.startswith("result_"):
-                # Upsert into benchmark_results
-                await db.execute(
-                    """INSERT OR IGNORE INTO benchmark_results
-                           (id, job_id, metrics, telemetry_summary, created_at)
-                       VALUES (?,?,?,?,?)""",
-                    (
-                        data["id"], data["job_id"],
-                        json.dumps(data["metrics"]),
-                        json.dumps(data["telemetry_summary"]),
-                        data.get("created_at", datetime.now(timezone.utc).isoformat())
-                    )
-                )
-                count += 1
-        except Exception as e:
-            log.error("sync_file_failed", file=file_path.name, error=str(e))
-    await db.commit()
-    log.info("sync_complete", count=count)
-    return count
-async def validate_context(ctx: BenchmarkContext) -> ValidationReport:
-    """
-    Validate model ↔ dataset ↔ hardware compatibility.
-    Does NOT create a job. Safe to call repeatedly from the UI.
-    """
-    model = await _require_model(ctx.model_id)
-    # ── Handle Polymorphic Input (Video/Live) ��───────────────────────────────
-    if ctx.input_source in ["video", "live"] or ctx.dataset_id == "none":
-        # Create a synthetic dataset object for non-dataset sources
-        now = datetime.now(timezone.utc).isoformat()
-        dataset = Dataset(
-            id="none",
-            name="Live/Video Stream",
-            task=model.task,  # Match model task to pass task check
-            format="custom",
-            source="local",
-            status="imported",
-            images=0,
-            classes=0,
-            size_label="0 MB",
-            created_at=now,
-            updated_at=now
-        )
-    else:
-        dataset = await _require_dataset(ctx.dataset_id)
-    return _validator.validate(model, dataset, ctx)
-async def create_and_run(ctx: BenchmarkContext) -> BenchmarkJob:
-    """
-    Full benchmark initiation:
-    """
-    model = await _require_model(ctx.model_id)
-    # ── Handle Polymorphic Input (Video/Live) ────────────────────────────────
-    if ctx.input_source in ["video", "live"] or ctx.dataset_id == "none":
-        now = datetime.now(timezone.utc).isoformat()
-        dataset = Dataset(
-            id="none",
-            name="Live/Video Stream",
-            task=model.task,
-            format="custom",
-            source="local",
-            status="imported",
-            images=0,
-            classes=0,
-            size_label="0 MB",
-            created_at=now,
-            updated_at=now
-        )
-    else:
-        dataset = await _require_dataset(ctx.dataset_id)
-    # ── Compatibility check ───────────────────────────────────────────────────
-    report = _validator.validate(model, dataset, ctx)
-    # Always persist the validation log (even for failures)
-    await bench_reg.save_validation_log(
-        job_id     = "pre-check",
-        model_id   = ctx.model_id,
-        dataset_id = ctx.dataset_id,
-        checks     = report.checks,
-        passed     = report.passed,
-    )
-    if not report.passed:
-        from fastapi import HTTPException
-        failed = [c for c in report.checks if not c.passed]
-        raise HTTPException(
-            status_code = 422,
-            detail      = {
-                "error":      "Compatibility validation failed",
-                "failed_checks": [
-                    {
-                        "name":       c.name,
-                        "detail":     c.detail,
-                        "suggestion": c.suggestion,
-                    }
-                    for c in failed
-                ],
-            },
-        )
-    # ── Create job ────────────────────────────────────────────────────────────
-    job = await bench_reg.create_job(ctx)
-    # Overwrite 'pre-check' validation log with the real job_id
-    await bench_reg.save_validation_log(
-        job_id     = job.id,
-        model_id   = ctx.model_id,
-        dataset_id = ctx.dataset_id,
-        checks     = report.checks,
-        passed     = True,
-    )
-    # ── Log the Polymorphic Input params ─────────────────────────────────────
-    if ctx.input_source or ctx.video_path or ctx.rtsp_url:
-        log.info("polymorphic_input_received",
-                 job_id=job.id,
-                 source=ctx.input_source,
-                 video=ctx.video_path,
-                 rtsp=ctx.rtsp_url)
-    # ── Enqueue background execution ──────────────────────────────────────────
-    task = asyncio.create_task(
-        _execute_job(job.id, ctx, model, dataset),
-        name = f"benchmark_{job.id}",
-    )
-    _active_tasks[job.id] = task
-    task.add_done_callback(lambda _t: _active_tasks.pop(job.id, None))
-    log.info("benchmark_enqueued", job_id=job.id, model=ctx.model_id)
-    return job
-# ── Background execution ──────────────────────────────────────────────────────
-async def _execute_job(
-    job_id:  str,
-    ctx:     BenchmarkContext,
-    model:   Model,
-    dataset: Dataset,
-) -> None:
-    """Full benchmark lifecycle — runs in an asyncio background task."""
-    now = datetime.now(timezone.utc).isoformat()
-    # Transition → running
-    ts_color = "\x1b[36m" # Cyan
-    info_color = "\x1b[34m" # Blue
-    success_color = "\x1b[32m" # Green
-    reset = "\x1b[0m"
-    await bench_reg.update_job(
-        job_id,
-        status     = "running",
-        progress   = 0.0,
-        started_at = now,
-        log_entry  = f"{ts_color}[{now}]{reset} {info_color}Job started{reset} on {ctx.hardware} ({ctx.precision})",
-    )
-    runner = BenchmarkExecutor()
-    try:
-        # ── Fetch the persisted job (for executor) ────────────────────────────
-        job = await bench_reg.get_job(job_id)
-        assert job is not None, "Job disappeared from DB after creation"
-        # ── Define Progress Callback ──────────────────────────────────────────
-        async def on_progress(progress: float, message: str, telemetry: Any | None):
-            await bench_reg.update_job(
-                job_id,
-                progress=progress,
-                log_entry=f"{ts_color}[{datetime.now(timezone.utc).isoformat()}]{reset} {info_color}{message}{reset}",
-                last_telemetry=telemetry.model_dump() if telemetry and hasattr(telemetry, "model_dump") else telemetry
-            )
-        # ── Execution Loop ────────────────────────────────────────────────────
-        exec_result = await runner.execute(
-            job=job,
-            model=model,
-            dataset=dataset,
-            on_progress=on_progress
-        )
-        # ── Compute metrics ───────────────────────────────────────────────────
-        metrics = _metrics.compute(
-            task         = ctx.task,
-            latencies_ms = exec_result.latencies_ms,
-            total_images = exec_result.total_images,
-            batch_size   = ctx.batch_size,
-            vram_samples = exec_result.vram_samples,
-            task_scores  = exec_result.task_scores,
-        )
-        # ── Persist result ────────────────────────────────────────────────────
-        await bench_reg.save_result(
-            job_id            = job_id,
-            metrics           = metrics,
-            telemetry_summary = exec_result.telemetry_summary,
-        )
-        ended = datetime.now(timezone.utc).isoformat()
-        await bench_reg.update_job(
-            job_id,
-            status    = "completed",
-            progress  = 1.0,
-            ended_at  = ended,
-            log_entry = f"{ts_color}[{ended}]{reset} {success_color}Benchmark completed{reset} — {metrics.fps} FPS",
-        )
-        await audit(
-            "benchmark_completed",
-            job_id  = job_id,
-            payload = {"model_id": ctx.model_id, "dataset_id": ctx.dataset_id},
-        )
-        log.info(
-            "benchmark_completed",
-            job_id  = job_id,
-            fps     = metrics.fps,
-            lat_ms  = metrics.latency_mean_ms,
-        )
-    except asyncio.CancelledError:
-        # Task cancelled externally (e.g. server shutdown) — don't swallow
-        ended = datetime.now(timezone.utc).isoformat()
-        await bench_reg.update_job(
-            job_id,
-            status    = "failed",
-            error     = "Job cancelled",
-            ended_at  = ended,
-            log_entry = f"{ts_color}[{ended}]{reset} \x1b[31mJob cancelled\x1b[0m",
-        )
-        raise
-    except Exception as exc:
-        ended = datetime.now(timezone.utc).isoformat()
-        err_msg = str(exc)
-        error_color = "\x1b[31m" # Red
-        await bench_reg.update_job(
-            job_id,
-            status    = "failed",
-            error     = err_msg,
-            ended_at  = ended,
-            log_entry = f"{ts_color}[{ended}]{reset} {error_color}ERROR: {err_msg}{reset}",
-        )
-        await audit(
-            "benchmark_failed",
-            job_id  = job_id,
-            level   = "error",
-            payload = {"error": err_msg, "model_id": ctx.model_id},
-        )
-        log.exception("benchmark_failed", job_id=job_id)
-    finally:
-        pass
-# ── Resource resolvers ────────────────────────────────────────────────────────
-async def _require_model(model_id: str) -> Model:
-    model = await get_model(model_id)
-    if not model:
-        from fastapi import HTTPException
-        raise HTTPException(
-            status_code = 404,
-            detail      = f"Model '{model_id}' not found in Model Zoo",
-        )
-    return model
-async def _require_dataset(dataset_id: str) -> Dataset:
-    dataset = await get_dataset(dataset_id)
-    if not dataset:
-        from fastapi import HTTPException
-        raise HTTPException(
-            status_code = 404,
-            detail      = f"Dataset '{dataset_id}' not found in Dataset Manager",
-        )
-    return dataset

benchmark/registry.py DELETED Viewed

@@ -1,302 +0,0 @@
-"""
-benchmark/registry.py — Benchmark Registry.
-All DB interactions for:
-  • benchmark_jobs         — job lifecycle state
-  • benchmark_results      — final metrics + telemetry summary
-  • benchmark_validation_logs — immutable check audit trail
-Follows the same pattern as registry/registry.py and datasets/registry.py.
-No direct DB access from other benchmark modules — everything routes here.
-"""
-from __future__ import annotations
-import json
-import uuid
-from datetime import datetime, timezone
-from typing import Any
-from pathlib import Path
-from database.connection import get_db
-from models.benchmark import (
-    BenchmarkContext,
-    BenchmarkJob,
-    BenchmarkMetrics,
-    BenchmarkResult,
-    TelemetrySummary,
-    ValidationCheck,
-    row_to_job,
-    row_to_result,
-)
-from observability.logger import get_logger
-log = get_logger("benchmark.registry")
-def _get_active_project_benchmark_dir_sync(project_path: str | None) -> Path | None:
-    """Get the absolute path to the 'benchmarks' folder in a given project path."""
-    if not project_path:
-        return None
-    benchmark_dir = Path(project_path) / "benchmarks"
-    benchmark_dir.mkdir(parents=True, exist_ok=True)
-    return benchmark_dir
-async def _get_active_project_benchmark_dir() -> Path | None:
-    """Get the absolute path to the 'benchmarks' folder in the active project."""
-    from projects.service import get_active_project_path
-    project_path = await get_active_project_path()
-    return _get_active_project_benchmark_dir_sync(project_path)
-async def _save_to_project(filename: str, data: dict) -> None:
-    """Save data to a JSON file in the active project's benchmark folder."""
-    benchmark_dir = await _get_active_project_benchmark_dir()
-    if not benchmark_dir:
-        return
-    file_path = benchmark_dir / filename
-    try:
-        with open(file_path, "w") as f:
-            json.dump(data, f, indent=2)
-    except Exception as e:
-        log.error("project_persistence_failed", error=str(e), file=filename)
-# ── Job CRUD ──────────────────────────────────────────────────────────────────
-async def create_job(ctx: BenchmarkContext) -> BenchmarkJob:
-    db     = await get_db()
-    job_id = f"bmark-{uuid.uuid4().hex[:12]}"
-    now    = datetime.now(timezone.utc).isoformat()
-    # Create job object
-    job = BenchmarkJob(
-        id         = job_id,
-        model_id   = ctx.model_id,
-        dataset_id = ctx.dataset_id,
-        task       = ctx.task,
-        framework  = ctx.framework,
-        hardware   = ctx.hardware,
-        precision  = ctx.precision,
-        batch_size = ctx.batch_size,
-        config     = ctx.model_dump(),
-        status     = "queued",
-        progress   = 0.0,
-        created_at = now,
-        updated_at = now,
-    )
-    # Persist to SQLite
-    await db.execute(
-        """INSERT INTO benchmark_jobs
-               (id, model_id, dataset_id, task, framework, hardware,
-                precision, batch_size, config,
-                status, progress, logs, created_at, updated_at)
-           VALUES (?,?,?,?,?,?,?,?,?,'queued',0.0,'[]',?,?)""",
-        (
-            job_id,
-            ctx.model_id, ctx.dataset_id,
-            ctx.task, ctx.framework, ctx.hardware,
-            ctx.precision, ctx.batch_size,
-            json.dumps(ctx.model_dump()),
-            now, now,
-        ),
-    )
-    await db.commit()
-    # Persist to project folder
-    await _save_to_project(f"job_{job_id}.json", job.model_dump())
-    log.info("benchmark_job_created", job_id=job_id, model=ctx.model_id)
-    return job
-async def get_job(job_id: str) -> BenchmarkJob | None:
-    db = await get_db()
-    async with db.execute(
-        "SELECT * FROM benchmark_jobs WHERE id = ?", (job_id,)
-    ) as cur:
-        row = await cur.fetchone()
-    return row_to_job(row) if row else None
-async def list_jobs(
-    *,
-    status:   str | None = None,
-    model_id: str | None = None,
-    limit:    int = 100,
-) -> list[BenchmarkJob]:
-    db      = await get_db()
-    clauses: list[str] = []
-    params:  list[Any] = []
-    if status:
-        clauses.append("status = ?")
-        params.append(status)
-    if model_id:
-        clauses.append("model_id = ?")
-        params.append(model_id)
-    where = f"WHERE {' AND '.join(clauses)}" if clauses else ""
-    params.append(limit)
-    async with db.execute(
-        f"SELECT * FROM benchmark_jobs {where} ORDER BY created_at DESC LIMIT ?",
-        params,
-    ) as cur:
-        rows = await cur.fetchall()
-    return [row_to_job(r) for r in rows]
-async def update_job(
-    job_id: str,
-    *,
-    status:    str | None   = None,
-    progress:  float | None = None,
-    error:     str | None   = None,
-    started_at: str | None  = None,
-    ended_at:  str | None   = None,
-    log_entry: str | None   = None,
-    last_telemetry: dict | None = None,
-) -> None:
-    """Update mutable fields on a benchmark job atomically."""
-    db  = await get_db()
-    now = datetime.now(timezone.utc).isoformat()
-    sets: list[str] = ["updated_at = ?"]
-    vals: list[Any] = [now]
-    if status is not None:
-        sets.append("status = ?");     vals.append(status)
-    if progress is not None:
-        sets.append("progress = ?");   vals.append(round(progress, 4))
-    if error is not None:
-        sets.append("error = ?");      vals.append(error)
-    if started_at is not None:
-        sets.append("started_at = ?"); vals.append(started_at)
-    if ended_at is not None:
-        sets.append("ended_at = ?");   vals.append(ended_at)
-    if last_telemetry is not None:
-        sets.append("last_telemetry = ?"); vals.append(json.dumps(last_telemetry))
-    if log_entry is not None:
-        # Append new entry to the JSON log array (capped at 500 lines)
-        async with db.execute(
-            "SELECT logs FROM benchmark_jobs WHERE id = ?", (job_id,)
-        ) as cur:
-            row = await cur.fetchone()
-        existing = json.loads(row["logs"]) if row and row["logs"] else []
-        existing.append(log_entry)
-        sets.append("logs = ?")
-        vals.append(json.dumps(existing[-500:]))
-    vals.append(job_id)
-    # Persist to project folder if we have the job info
-    async with db.execute("SELECT * FROM benchmark_jobs WHERE id = ?", (job_id,)) as cur:
-        row = await cur.fetchone()
-        if row:
-            job = row_to_job(row)
-            if job:
-                await _save_to_project(f"job_{job_id}.json", job.model_dump())
-    await db.commit()
-# ── Result CRUD ───────────────────────────────────────────────────────────────
-async def save_result(
-    *,
-    job_id:            str,
-    metrics:           BenchmarkMetrics,
-    telemetry_summary: TelemetrySummary,
-) -> BenchmarkResult:
-    db        = await get_db()
-    result_id = f"bres-{uuid.uuid4().hex[:12]}"
-    now       = datetime.now(timezone.utc).isoformat()
-    # Persist result to SQLite
-    await db.execute(
-        """INSERT INTO benchmark_results
-               (id, job_id, metrics, telemetry_summary, created_at)
-           VALUES (?,?,?,?,?)""",
-        (
-            result_id,
-            job_id,
-            json.dumps(metrics.model_dump(exclude_none=True)),
-            json.dumps(telemetry_summary.model_dump()),
-            now,
-        ),
-    )
-    await db.commit()
-    result = BenchmarkResult(
-        id                = result_id,
-        job_id            = job_id,
-        metrics           = metrics,
-        telemetry_summary = telemetry_summary,
-        created_at        = now,
-    )
-    # Persist result to project folder
-    await _save_to_project(f"result_{job_id}.json", result.model_dump())
-    log.info("benchmark_result_saved", job_id=job_id, result_id=result_id)
-    return result
-async def get_result(job_id: str) -> BenchmarkResult | None:
-    db = await get_db()
-    async with db.execute(
-        """SELECT r.*, j.model_id, j.dataset_id, j.task, j.framework, j.hardware, j.precision
-           FROM benchmark_results r
-           JOIN benchmark_jobs j ON r.job_id = j.id
-           WHERE r.job_id = ?""", (job_id,)
-    ) as cur:
-        row = await cur.fetchone()
-    return row_to_result(row) if row else None
-async def list_results(*, limit: int = 100) -> list[BenchmarkResult]:
-    db = await get_db()
-    async with db.execute(
-        """SELECT r.*, j.model_id, j.dataset_id, j.task, j.framework, j.hardware, j.precision
-           FROM benchmark_results r
-           JOIN benchmark_jobs j ON r.job_id = j.id
-           ORDER BY r.created_at DESC LIMIT ?""", (limit,)
-    ) as cur:
-        rows = await cur.fetchall()
-    return [row_to_result(r) for r in rows]
-# ── Validation Log ────────────────────────────────────────────────────────────
-async def save_validation_log(
-    *,
-    job_id:     str,
-    model_id:   str,
-    dataset_id: str,
-    checks:     list[ValidationCheck],
-    passed:     bool,
-) -> None:
-    """Persist an immutable record of all compatibility checks."""
-    db     = await get_db()
-    log_id = f"bval-{uuid.uuid4().hex[:12]}"
-    now    = datetime.now(timezone.utc).isoformat()
-    await db.execute(
-        """INSERT INTO benchmark_validation_logs
-               (id, job_id, model_id, dataset_id, checks, passed, created_at)
-           VALUES (?,?,?,?,?,?,?)""",
-        (
-            log_id, job_id, model_id, dataset_id,
-            json.dumps([c.model_dump() for c in checks]),
-            1 if passed else 0,
-            now,
-        ),
-    )
-    await db.commit()
-    log.info(
-        "validation_log_saved",
-        job_id  = job_id,
-        passed  = passed,
-        n_checks = len(checks),
-    )

benchmark/telemetry.py DELETED Viewed

@@ -1,182 +0,0 @@
-"""
-benchmark/telemetry.py — Real-time Telemetry Collector.
-Collects GPU/hardware metrics at 2 Hz during benchmark execution.
-Designed as a drop-in adapter:
-  • Local dev  → simulates realistic GPU readings based on hardware tier
-  • Production → replace _read_gpu_metrics() with pynvml calls:
-      nvmlDeviceGetUtilizationRates()
-      nvmlDeviceGetMemoryInfo()
-      nvmlDeviceGetTemperature()
-      nvmlDeviceGetPowerUsage()
-Usage (async context):
-    collector = TelemetryCollector("rtx4090", vram_total_gb=24.0)
-    await collector.start()
-    # ... run inference ...
-    summary = await collector.stop()
-    samples = collector.samples
-"""
-from __future__ import annotations
-import asyncio
-import random
-import statistics
-import time
-from models.benchmark import TelemetrySample, TelemetrySummary
-from observability.logger import get_logger
-log = get_logger("benchmark.telemetry")
-# ── Hardware simulation profiles ──────────────────────────────────────────────
-# (base_util%, base_temp_C, base_power_W)
-_HW_PROFILES: dict[str, tuple[float, float, float]] = {
-    "rtx4090":   (88.0, 74.0, 380.0),
-    "rtx4080":   (84.0, 70.0, 280.0),
-    "rtx4070":   (80.0, 68.0, 200.0),
-    "rtx3090":   (85.0, 72.0, 320.0),
-    "rtx3080":   (82.0, 70.0, 250.0),
-    "rtx3070":   (78.0, 66.0, 180.0),
-    "rtx3060":   (74.0, 64.0, 150.0),
-    "a100":      (90.0, 68.0, 350.0),
-    "h100":      (92.0, 65.0, 550.0),
-    "v100":      (87.0, 70.0, 280.0),
-    "t4":        (75.0, 62.0, 60.0),
-    "gpu":       (70.0, 65.0, 150.0),
-    "cpu":       (0.0,  0.0,  0.0),
-}
-_COLLECTION_INTERVAL_S = 0.5   # 2 Hz
-class TelemetryCollector:
-    """
-    Async telemetry collector.  Call start() before inference, stop() after.
-    Thread-safe via asyncio (single-threaded event loop).
-    """
-    def __init__(self, hardware: str, vram_total_gb: float = 8.0) -> None:
-        self._hardware    = hardware
-        self._vram_total  = vram_total_gb
-        self._hw_profile  = self._resolve_profile(hardware)
-        self._samples: list[TelemetrySample] = []
-        self._running     = False
-        self._task: asyncio.Task | None = None
-    # ── Public API ────────────────────────────────────────────────────────────
-    async def start(self) -> None:
-        self._running = True
-        self._samples = []
-        self._task    = asyncio.create_task(
-            self._collect_loop(), name="telemetry_collector"
-        )
-        log.debug("telemetry_started", hardware=self._hardware)
-    async def stop(self) -> TelemetrySummary:
-        self._running = False
-        if self._task and not self._task.done():
-            self._task.cancel()
-            try:
-                await self._task
-            except asyncio.CancelledError:
-                pass
-        log.debug(
-            "telemetry_stopped",
-            hardware = self._hardware,
-            samples  = len(self._samples),
-        )
-        return self._build_summary()
-    def record_batch_context(self, batch_idx: int, progress: float) -> None:
-        """Annotate the most recent sample with the current batch context."""
-        if self._samples:
-            self._samples[-1].batch_idx = batch_idx
-            self._samples[-1].progress  = progress
-    @property
-    def samples(self) -> list[TelemetrySample]:
-        return list(self._samples)
-    # ── Internal ──────────────────────────────────────────────────────────────
-    async def _collect_loop(self) -> None:
-        while self._running:
-            sample = self._read_gpu_metrics()
-            self._samples.append(sample)
-            await asyncio.sleep(_COLLECTION_INTERVAL_S)
-    def _read_gpu_metrics(self) -> TelemetrySample:
-        """
-        Returns a TelemetrySample for the current hardware state.
-        PRODUCTION SWAP: Replace this body with pynvml calls:
-            handle = nvmlDeviceGetHandleByIndex(0)
-            util   = nvmlDeviceGetUtilizationRates(handle)
-            mem    = nvmlDeviceGetMemoryInfo(handle)
-            temp   = nvmlDeviceGetTemperature(handle, NVML_TEMPERATURE_GPU)
-            power  = nvmlDeviceGetPowerUsage(handle) / 1000  # mW → W
-        """
-        base_util, base_temp, base_power = self._hw_profile
-        if base_util == 0.0:   # CPU path — no meaningful GPU readings
-            return TelemetrySample(
-                timestamp     = time.time(),
-                gpu_util_pct  = 0.0,
-                vram_used_gb  = 0.0,
-                vram_total_gb = 0.0,
-                temp_c        = 0.0,
-                power_w       = 0.0,
-            )
-        # Simulate realistic jitter (±5% util, ±3°C, ±10W)
-        jitter_util  = random.gauss(0, 3.0)
-        jitter_temp  = random.gauss(0, 1.5)
-        jitter_power = random.gauss(0, 8.0)
-        vram_frac    = random.uniform(0.58, 0.72)
-        return TelemetrySample(
-            timestamp     = time.time(),
-            gpu_util_pct  = max(0.0, min(100.0, base_util + jitter_util)),
-            vram_used_gb  = round(
-                max(0.0, min(self._vram_total, self._vram_total * vram_frac)), 3
-            ),
-            vram_total_gb = self._vram_total,
-            temp_c        = round(max(0.0, base_temp + jitter_temp), 1),
-            power_w       = round(max(0.0, base_power + jitter_power), 1),
-        )
-    def _build_summary(self) -> TelemetrySummary:
-        if not self._samples:
-            return TelemetrySummary()
-        utils  = [s.gpu_util_pct  for s in self._samples]
-        vrams  = [s.vram_used_gb  for s in self._samples]
-        temps  = [s.temp_c        for s in self._samples]
-        powers = [s.power_w       for s in self._samples]
-        def _safe_mean(lst: list[float]) -> float:
-            return statistics.mean(lst) if lst else 0.0
-        return TelemetrySummary(
-            gpu_util_avg  = round(_safe_mean(utils), 2),
-            gpu_util_peak = round(max(utils), 2),
-            vram_avg_gb   = round(_safe_mean(vrams), 3),
-            vram_peak_gb  = round(max(vrams), 3),
-            temp_avg_c    = round(_safe_mean(temps), 1),
-            temp_peak_c   = round(max(temps), 1),
-            power_avg_w   = round(_safe_mean(powers), 1),
-            power_peak_w  = round(max(powers), 1),
-        )
-    @staticmethod
-    def _resolve_profile(hardware: str) -> tuple[float, float, float]:
-        hw = hardware.lower().replace(" ", "").replace("-", "")
-        for key, profile in _HW_PROFILES.items():
-            if key in hw:
-                return profile
-        # Default for unknown GPU-class hardware
-        if any(x in hw for x in ("gpu", "rtx", "gtx", "cuda", "vram")):
-            return _HW_PROFILES["gpu"]
-        return _HW_PROFILES["cpu"]

benchmark/torch_runner.py DELETED Viewed

@@ -1,142 +0,0 @@
-"""
-benchmark/torch_runner.py — Synchronous GPU inference runner.
-Called from BenchmarkExecutor via asyncio.run_in_executor() so it never
-blocks the event loop.  PyTorch is an optional dependency — if it is not
-installed the module raises ImportError and execution.py falls back to
-the simulation path.
-Supported weight formats (detected by file extension):
-  .pt / .pth      — torch.load (TorchScript or state-dict)
-  .safetensors    — safetensors.torch.load_file
-  .onnx           — onnxruntime InferenceSession
-PRODUCTION SWAP POINTS are marked with  # <<< REPLACE IN PRODUCTION >>>
-"""
-from __future__ import annotations
-import time
-from pathlib import Path
-from typing import Any
-# ── Model cache (keyed by absolute path) ─────────────────────────────────────
-_MODEL_CACHE: dict[str, Any] = {}
-# Standard input shapes per task (B, C, H, W)
-_INPUT_SHAPES: dict[str, tuple[int, int, int]] = {
-    "detection":      (3, 640, 640),
-    "segmentation":   (3, 640, 640),
-    "classification": (3, 224, 224),
-    "generation":     (3, 512, 512),
-    "embedding":      (3, 224, 224),
-}
-_DEFAULT_SHAPE = (3, 640, 640)
-def run_torch_batch(model_path: str, batch_size: int, task: str = "detection") -> float:
-    """Run one inference batch and return per-image latency in ms.
-    Args:
-        model_path: Absolute path to the weight file.
-        batch_size: Number of images in the batch.
-        task:       Model task (affects dummy input shape).
-    Returns:
-        Latency per image in milliseconds.
-    """
-    import torch  # raises ImportError if not installed
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    ext = Path(model_path).suffix.lower()
-    model = _load_model(model_path, ext, device)
-    c, h, w = _INPUT_SHAPES.get(task, _DEFAULT_SHAPE)
-    dummy = torch.zeros(batch_size, c, h, w, device=device)
-    # Warm-up pass (first call is slower due to CUDA kernel compilation)
-    if device == "cuda":
-        with torch.no_grad():
-            _forward(model, dummy, ext, device)
-        torch.cuda.synchronize()
-    # Timed pass
-    if device == "cuda":
-        torch.cuda.synchronize()
-    t0 = time.perf_counter()
-    with torch.no_grad():
-        _forward(model, dummy, ext, device)
-    if device == "cuda":
-        torch.cuda.synchronize()
-    elapsed_ms = (time.perf_counter() - t0) * 1000
-    return elapsed_ms / batch_size
-def _load_model(path: str, ext: str, device: str) -> Any:
-    """Load and cache the model by absolute path."""
-    if path in _MODEL_CACHE:
-        return _MODEL_CACHE[path]
-    model = _load_by_ext(path, ext, device)
-    _MODEL_CACHE[path] = model
-    return model
-def _load_by_ext(path: str, ext: str, device: str) -> Any:
-    """Select loader based on file extension."""
-    if ext in (".pt", ".pth"):
-        return _load_torch(path, device)
-    if ext == ".safetensors":
-        return _load_safetensors(path, device)
-    if ext == ".onnx":
-        return _load_onnx(path)
-    raise ValueError(f"Unsupported model format: {ext}")
-def _load_torch(path: str, device: str) -> Any:
-    import torch
-    # <<< REPLACE IN PRODUCTION >>> with proper model class instantiation
-    # TorchScript models can be loaded directly; state-dict models need
-    # the model class to be imported separately.
-    try:
-        model = torch.jit.load(path, map_location=device)
-        model.eval()
-        return model
-    except RuntimeError:
-        # Not a TorchScript model — try loading as a full checkpoint
-        obj = torch.load(path, map_location=device, weights_only=False)
-        if hasattr(obj, "eval"):
-            obj.eval()
-            return obj
-        # It's a state-dict — we cannot run inference without knowing the arch
-        raise RuntimeError(
-            f"Model at {path} is a state-dict; cannot run inference without "
-            "the model class. Use a TorchScript-exported .pt file."
-        )
-def _load_safetensors(path: str, device: str) -> Any:
-    # <<< REPLACE IN PRODUCTION >>> safetensors gives tensors only;
-    # you still need the model class.  This is intentionally left as a
-    # placeholder that raises a clear error rather than silently failing.
-    raise NotImplementedError(
-        "safetensors inference requires the model class to be registered. "
-        "Convert to TorchScript or ONNX for architecture-agnostic inference."
-    )
-def _load_onnx(path: str) -> Any:
-    import onnxruntime as ort  # type: ignore[import]
-    providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
-    return ort.InferenceSession(path, providers=providers)
-def _forward(model: Any, dummy: Any, ext: str, device: str) -> Any:
-    """Run a single forward pass, dispatching by model type."""
-    if ext == ".onnx":
-        import numpy as np
-        np_input = dummy.cpu().numpy()
-        input_name = model.get_inputs()[0].name
-        return model.run(None, {input_name: np_input})
-    # TorchScript / nn.Module
-    return model(dummy)

config.py CHANGED Viewed

@@ -21,31 +21,15 @@ class Settings(BaseSettings):
     # ── API ───────────────────────────────────────────────────────────
     host: str = "0.0.0.0"
     port: int = 7860  # Default for HF Spaces
-    cors_origins: list[str] = [
-        "http://localhost:3000",
-        "http://127.0.0.1:3000",
-        "http://localhost:5173",
-        "http://127.0.0.1:5173",
-        "http://localhost:2000",
-        "http://127.0.0.1:2000",
-    ]
     # ── Storage ───────────────────────────────────────────────────────
     base_dir: Path = Path(__file__).resolve().parents[1]
     data_dir: Path = base_dir / "data"
-    models_dir:   Path = data_dir / "models"
-    datasets_dir: Path = data_dir / "datasets"   # root for imported datasets
-    logs_dir:     Path = data_dir / "logs"
-    db_path:      Path = data_dir / "modelzoo.db"
-    # ── Download Manager ──────────────────────────────────────────────
-    max_concurrent_downloads: int = 5
-    download_chunk_size: int = 1024 * 1024  # 1 MB
-    download_max_retries: int = 3
-    download_retry_delay: float = 2.0       # seconds (base, exponential backoff)
     # ── Search ────────────────────────────────────────────────────────
-    search_max_results: int = 500
     # ── Sync ──────────────────────────────────────────────────────────
     auto_sync_on_startup: bool = True
@@ -54,30 +38,10 @@ class Settings(BaseSettings):
     hf_api_base: str = "https://huggingface.co/api"
     hf_hub_url:  str = "https://huggingface.co"
     hf_token: str | None = None             # Optional: HF_TOKEN env var
-    hf_models_per_task: int = 100           # How many to pull per task
-    # ── ONNX Zoo ──────────────────────────────────────────────────────
-    onnx_models_url: str = (
-        "https://raw.githubusercontent.com/onnx/models/main/README.md"
-    )
-    # ── Benchmark Bridge ──────────────────────────────────────────────
-    benchmark_max_concurrent: int = 3      # max parallel benchmark jobs
-    benchmark_max_log_lines:  int = 500    # log entries kept per job
-    benchmark_ws_poll_hz:     float = 2.0  # WebSocket telemetry poll rate
-    # ── Dataset Manager ───────────────────────────────────────────────
-    roboflow_api_base:        str = "https://api.roboflow.com"
-    dataset_import_workers:   int = 3          # max concurrent import jobs
-    dataset_chunk_size:       int = 1024 * 1024 * 4   # 4 MB download chunk
-    roboflow_cache_ttl_secs:  int = 3600       # 1 hour
     def ensure_dirs(self) -> None:
         self.data_dir.mkdir(parents=True, exist_ok=True)
-        self.models_dir.mkdir(parents=True, exist_ok=True)
-        self.datasets_dir.mkdir(parents=True, exist_ok=True)
-        (self.datasets_dir / "_tmp").mkdir(parents=True, exist_ok=True)
-        self.logs_dir.mkdir(parents=True, exist_ok=True)
 settings = Settings()

     # ── API ───────────────────────────────────────────────────────────
     host: str = "0.0.0.0"
     port: int = 7860  # Default for HF Spaces
+    cors_origins: list[str] = ["*"]
     # ── Storage ───────────────────────────────────────────────────────
     base_dir: Path = Path(__file__).resolve().parents[1]
     data_dir: Path = base_dir / "data"
+    db_path:  Path = data_dir / "modelzoo.db"
     # ── Search ────────────────────────────────────────────────────────
+    search_max_results: int = 1000
     # ── Sync ──────────────────────────────────────────────────────────
     auto_sync_on_startup: bool = True
     hf_api_base: str = "https://huggingface.co/api"
     hf_hub_url:  str = "https://huggingface.co"
     hf_token: str | None = None             # Optional: HF_TOKEN env var
+    hf_models_per_task: int = 200           # Discovery server pulls more per task
     def ensure_dirs(self) -> None:
         self.data_dir.mkdir(parents=True, exist_ok=True)
 settings = Settings()

download/__init__.py DELETED Viewed

File without changes

download/__pycache__/__init__.cpython-310.pyc DELETED Viewed

Binary file (143 Bytes)

download/__pycache__/manager.cpython-310.pyc DELETED Viewed

Binary file (11.1 kB)

download/manager.py DELETED Viewed

@@ -1,366 +0,0 @@
-"""
-download/manager.py — Async download manager.
-Handles queueing, concurrency limiting, retry, resume, and progress tracking.
-All state is persisted in the jobs table for crash recovery.
-"""
-from __future__ import annotations
-import asyncio
-import json
-import uuid
-from datetime import datetime, timezone
-from pathlib import Path
-from typing import Any
-import aiofiles
-import httpx
-from tenacity import retry, stop_after_attempt, wait_exponential
-from config import settings
-from database.connection import get_db
-from models.job import Job, row_to_job
-from observability.logger import audit, get_logger
-from registry.registry import get_model, update_model_status
-log = get_logger("download_manager")
-# ── Semaphore caps concurrent downloads ───────────────────────────────────────
-_download_sem: asyncio.Semaphore | None = None
-def _get_sem() -> asyncio.Semaphore:
-    global _download_sem
-    if _download_sem is None:
-        _download_sem = asyncio.Semaphore(settings.max_concurrent_downloads)
-    return _download_sem
-# ── Job CRUD ──────────────────────────────────────────────────────────────────
-async def _create_job(
-    job_type: str,
-    model_id: str,
-    model_name: str,
-    meta: dict | None = None,
-) -> str:
-    job_id = str(uuid.uuid4())
-    db = await get_db()
-    now = datetime.now(timezone.utc).isoformat()
-    await db.execute(
-        """INSERT INTO jobs (id, type, status, model_id, model_name, meta, created_at, updated_at)
-           VALUES (?,?,?,?,?,?,?,?)""",
-        (job_id, job_type, "queued", model_id, model_name,
-         json.dumps(meta or {}), now, now),
-    )
-    await db.commit()
-    log.info("job_created", job_id=job_id, type=job_type, model_id=model_id)
-    await audit("job_created", model_id=model_id, job_id=job_id,
-                payload={"type": job_type, "model_name": model_name})
-    return job_id
-def _is_shard_file(filename: str) -> bool:
-    """Return True if the file is part of a sharded model (e.g. model-00001-of-00003.safetensors)."""
-    import re
-    return bool(re.search(r"-\d{5}-of-\d{5}\.", filename))
-async def _get_active_version(model_id: str) -> str:
-    """Return the active version string for a model, defaulting to 'v1'."""
-    model = await get_model(model_id)
-    if model and model.active_version:
-        return model.active_version
-    return "v1"
-@retry(
-    stop=stop_after_attempt(3),
-    wait=wait_exponential(multiplier=1, min=1, max=6),
-    reraise=True,
-)
-async def _resolve_hf_download_url(repo_id: str) -> str:
-    """Resolve a reliable download URL for a HF repo.
-    Prefer safetensors over pytorch_model.bin; fall back to onnx if needed.
-    """
-    async with httpx.AsyncClient(timeout=30, follow_redirects=True) as client:
-        resp = await client.get(f"{settings.hf_api_base}/models/{repo_id}")
-        resp.raise_for_status()
-        data = resp.json()
-    siblings = data.get("siblings") or []
-    filenames: list[str] = []
-    for s in siblings:
-        fn = s.get("rfilename") or s.get("filename")
-        if fn:
-            filenames.append(fn)
-    preferred_exact = [
-        "model.safetensors",
-        "pytorch_model.bin",
-        "model.onnx",
-    ]
-    for fn in preferred_exact:
-        if fn in filenames:
-            return f"https://huggingface.co/{repo_id}/resolve/main/{fn}"
-    preferred_suffix = [".safetensors", ".bin", ".onnx", ".pt", ".pth"]
-    for suffix in preferred_suffix:
-        for fn in filenames:
-            if fn.endswith(suffix) and not _is_shard_file(fn):
-                return f"https://huggingface.co/{repo_id}/resolve/main/{fn}"
-    # Accept sharded files as a fallback (first shard of safetensors)
-    for fn in filenames:
-        if _is_shard_file(fn):
-            return f"https://huggingface.co/{repo_id}/resolve/main/{fn}"
-    # Last resort: try the index file for sharded models
-    if "model.safetensors.index.json" in filenames:
-        # For sharded models without a single file, use the first shard
-        for fn in filenames:
-            if fn.startswith("model-") and fn.endswith(".safetensors"):
-                return f"https://huggingface.co/{repo_id}/resolve/main/{fn}"
-    return f"https://huggingface.co/{repo_id}/resolve/main/pytorch_model.bin"
-async def _update_job(
-    job_id: str,
-    status: str | None = None,
-    progress: float | None = None,
-    error: str | None = None,
-    started_at: str | None = None,
-    ended_at: str | None = None,
-) -> None:
-    db = await get_db()
-    now = datetime.now(timezone.utc).isoformat()
-    parts: list[str] = ["updated_at = ?"]
-    vals: list[Any] = [now]
-    if status is not None:   parts.append("status = ?");    vals.append(status)
-    if progress is not None: parts.append("progress = ?");  vals.append(progress)
-    if error is not None:    parts.append("error = ?");     vals.append(error)
-    if started_at:           parts.append("started_at = ?"); vals.append(started_at)
-    if ended_at:             parts.append("ended_at = ?");   vals.append(ended_at)
-    vals.append(job_id)
-    await db.execute(f"UPDATE jobs SET {', '.join(parts)} WHERE id = ?", vals)
-    await db.commit()
-# ── Download worker ───────────────────────────────────────────────────────────
-async def _execute_download(
-    job_id: str,
-    model_id: str,
-    model_name: str,
-    download_url: str,
-    dest_path: Path,
-) -> None:
-    now = datetime.now(timezone.utc).isoformat()
-    await _update_job(job_id, status="running", started_at=now)
-    dest_path.parent.mkdir(parents=True, exist_ok=True)
-    tmp_path = dest_path.with_suffix(".tmp")
-    # Determine resume offset
-    resume_offset = tmp_path.stat().st_size if tmp_path.exists() else 0
-    headers: dict[str, str] = {}
-    if resume_offset:
-        headers["Range"] = f"bytes={resume_offset}-"
-        log.info("download_resume", job_id=job_id, offset=resume_offset)
-    try:
-        async with httpx.AsyncClient(timeout=120, follow_redirects=True) as client:
-            async with client.stream("GET", download_url, headers=headers) as resp:
-                resp.raise_for_status()
-                total = int(resp.headers.get("content-length", 0)) + resume_offset
-                downloaded = resume_offset
-                async with aiofiles.open(tmp_path, "ab" if resume_offset else "wb") as fh:
-                    async for chunk in resp.aiter_bytes(chunk_size=settings.download_chunk_size):
-                        await fh.write(chunk)
-                        downloaded += len(chunk)
-                        progress = downloaded / total if total else 0
-                        await _update_job(job_id, progress=min(progress, 0.99))
-        # Rename tmp → final
-        tmp_path.rename(dest_path)
-        now_end = datetime.now(timezone.utc).isoformat()
-        await _update_job(job_id, status="completed", progress=1.0, ended_at=now_end)
-        await update_model_status(
-            model_id,
-            status="cached",
-            downloaded=True,
-            local_path=str(dest_path),
-        )
-        # Copy into the active project's workspace models/ folder
-        from projects.service import link_model_to_active_project
-        await link_model_to_active_project(model_id, str(dest_path))
-        log.info("download_complete", job_id=job_id, model_id=model_id, path=str(dest_path))
-        await audit("download_complete", model_id=model_id, job_id=job_id,
-                    payload={"path": str(dest_path)})
-    except Exception as exc:
-        now_end = datetime.now(timezone.utc).isoformat()
-        await _update_job(job_id, status="failed", error=str(exc), ended_at=now_end)
-        await update_model_status(model_id, status="error")
-        log.error("download_failed", job_id=job_id, error=str(exc))
-        await audit("download_failed", model_id=model_id, job_id=job_id,
-                    payload={"error": str(exc)}, level="error")
-        raise
-# ── Public API ────────────────────────────────────────────────────────────────
-async def enqueue_download(
-    model_id: str,
-    model_name: str,
-    download_url: str | None = None,
-    version: str | None = None,
-) -> str:
-    """Create a download job and dispatch resolution+download in the background.
-    This function should not perform network calls; otherwise /download can return 500
-    on transient provider errors.
-    """
-    job_id = await _create_job("download", model_id, model_name)
-    asyncio.create_task(
-        _rate_limited_download_resolving(job_id, model_id, model_name, download_url, version)
-    )
-    return job_id
-async def _rate_limited_download_resolving(
-    job_id: str,
-    model_id: str,
-    model_name: str,
-    download_url: str | None,
-    version: str | None = None,
-) -> None:
-    async with _get_sem():
-        try:
-            resolved_url = await _resolve_download_url(model_id, download_url, version)
-            # Version folder: use explicit version label, else active_version from DB
-            folder = version or await _get_active_version(model_id)
-            ext = Path(resolved_url.split("?")[0]).suffix or ".bin"
-            dest_path = settings.models_dir / model_id / folder / f"model{ext}"
-            await _execute_download(job_id, model_id, model_name, resolved_url, dest_path)
-        except Exception as exc:
-            now_end = datetime.now(timezone.utc).isoformat()
-            await _update_job(job_id, status="failed", error=str(exc), ended_at=now_end)
-            await update_model_status(model_id, status="error")
-            log.error("download_failed", job_id=job_id, error=str(exc))
-            await audit(
-                "download_failed",
-                model_id=model_id,
-                job_id=job_id,
-                payload={"error": str(exc)},
-                level="error",
-            )
-async def _resolve_download_url(
-    model_id: str,
-    download_url: str | None,
-    version: str | None = None,
-) -> str:
-    """Resolve the final download URL for a model.
-    If `version` is provided and looks like a filename (e.g. 'yolov8n_pt'),
-    it was generated by hf_adapter from a sibling rfilename. Restore the
-    original filename (replace trailing _ext with .ext) and build a direct URL.
-    """
-    repo_id: str | None = None
-    if download_url and "huggingface.co" in download_url:
-        repo_id = download_url.replace("https://huggingface.co/", "").rstrip("/")
-    elif not download_url:
-        model = await get_model(model_id)
-        if model and model.download_url:
-            url = model.download_url
-            if "huggingface.co" in url:
-                repo_id = url.replace("https://huggingface.co/", "").rstrip("/")
-            else:
-                return url
-        else:
-            repo_id = model_id.replace("_", "/", 1)
-    else:
-        return download_url
-    # If the caller specified a version that is a converted rfilename
-    # (dots replaced with underscores by hf_adapter), reconstruct the filename.
-    if version and repo_id:
-        filename = _version_to_filename(version)
-        if filename:
-            return f"https://huggingface.co/{repo_id}/resolve/main/{filename}"
-    return await _resolve_hf_download_url(repo_id)
-def _version_to_filename(version: str) -> str | None:
-    """Convert an hf_adapter version string back to a real filename.
-    hf_adapter stores version as rfilename.replace('.', '_'), e.g.:
-      'yolov8n_pt' → 'yolov8n.pt'
-      'model_safetensors' → 'model.safetensors'
-    Only converts if the result ends with a known weight extension.
-    """
-    weight_exts = (".pt", ".pth", ".safetensors", ".bin", ".onnx")
-    # Try replacing the last underscore with a dot
-    idx = version.rfind("_")
-    if idx == -1:
-        return None
-    candidate = version[:idx] + "." + version[idx + 1:]
-    if any(candidate.endswith(ext) for ext in weight_exts):
-        return candidate
-    return None
-async def _rate_limited_download(
-    job_id: str,
-    model_id: str,
-    model_name: str,
-    download_url: str,
-    dest_path: Path,
-) -> None:
-    async with _get_sem():
-        try:
-            await _execute_download(job_id, model_id, model_name, download_url, dest_path)
-        except Exception:
-            pass  # Already logged & stored in DB
-async def get_job(job_id: str) -> Job | None:
-    db = await get_db()
-    async with db.execute("SELECT * FROM jobs WHERE id = ?", (job_id,)) as cur:
-        row = await cur.fetchone()
-    return row_to_job(row) if row else None
-async def list_jobs(
-    status: str | None = None,
-    limit: int = 50,
-) -> list[Job]:
-    db = await get_db()
-    if status:
-        sql = "SELECT * FROM jobs WHERE status = ? ORDER BY created_at DESC LIMIT ?"
-        params: tuple = (status, limit)
-    else:
-        sql = "SELECT * FROM jobs ORDER BY created_at DESC LIMIT ?"
-        params = (limit,)
-    async with db.execute(sql, params) as cur:
-        rows = await cur.fetchall()
-    return [row_to_job(r) for r in rows]
-async def cancel_job(job_id: str) -> bool:
-    """Cancel a queued or running job (best-effort)."""
-    job = await get_job(job_id)
-    if not job or job.status not in ("queued", "running"):
-        return False
-    now = datetime.now(timezone.utc).isoformat()
-    await _update_job(job_id, status="cancelled", ended_at=now)
-    log.info("job_cancelled", job_id=job_id)
-    return True

inference/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- # inference package

inference/__pycache__/__init__.cpython-310.pyc DELETED Viewed

Binary file (144 Bytes)

inference/__pycache__/engine.cpython-310.pyc DELETED Viewed

Binary file (12 kB)

inference/__pycache__/session.cpython-310.pyc DELETED Viewed

Binary file (2.87 kB)

inference/engine.py DELETED Viewed

@@ -1,447 +0,0 @@
-"""
-inference/engine.py — MLForge Inference Engine.
-Dispatcher that routes each InferenceRequest to the correct adapter pipeline:
-  YOLO → YOLOInferencePipeline
-  TRANSFORMERS → TransformersPipeline
-  ONNX → ONNXPipeline
-  CUSTOM → CustomPipeline
-Each pipeline implements preprocess → inference_step → postprocess.
-Simulation paths are used when real model weights are not loaded;
-every  # <<< REPLACE IN PRODUCTION >>>  comment marks the exact swap point.
-Architecture follows the spec in infra_arch.md §4 (Adapter Protocol).
-"""
-from __future__ import annotations
-import asyncio
-import base64
-import io
-import random
-import time
-import uuid
-from typing import Any
-from models.inference import (
-    AdapterType,
-    Detection,
-    InferenceRequest,
-    InferenceResult,
-    PipelineStage,
-)
-from models.model import Model
-from observability.logger import get_logger
-log = get_logger("inference.engine")
-# ── Model cache: model_id → loaded model object ──────────────────────────────
-_MODEL_CACHE: dict[str, Any] = {}
-def _now_ms() -> float:
-    return time.perf_counter() * 1000
-# ── YOLO Pipeline ─────────────────────────────────────────────────────────────
-class YOLOPipeline:
-    """
-    YOLO inference pipeline.
-    Preprocess: letterbox resize → BGR→RGB → 1/255 normalise.
-    Postprocess: NMS → [{x1,y1,x2,y2,confidence,class_id,class_name}].
-    """
-    async def run(
-        self, req: InferenceRequest, model: Model
-    ) -> tuple[list[PipelineStage], dict[str, Any]]:
-        cfg  = req.yolo_config
-        conf = cfg.confidence if cfg else 0.25
-        iou  = cfg.iou_threshold if cfg else 0.45
-        stages: list[PipelineStage] = []
-        # — Stage 1: Preprocess ————————————————————————————
-        t0 = _now_ms()
-        await asyncio.sleep(0)        # yield control
-        if req.image_base64:
-            try:
-                raw_bytes = base64.b64decode(req.image_base64)
-                # <<< REPLACE IN PRODUCTION >>>
-                # img = cv2.imdecode(np.frombuffer(raw_bytes, np.uint8), cv2.IMREAD_COLOR)
-                # tensor = letterbox(img, 640) / 255.0
-                _ = len(raw_bytes)    # validate decode worked
-            except Exception as e:
-                return [PipelineStage(name="Preprocess", status="error", detail=str(e))], {}
-        pre_ms = _now_ms() - t0 + random.uniform(0.8, 2.5)
-        stages.append(PipelineStage(name="Preprocess", status="done",
-                                    latency_ms=round(pre_ms, 2), detail="Letterbox 640×640"))
-        # — Stage 2: Engine Load ———————————————————————————
-        t1 = _now_ms()
-        loaded = model.id in _MODEL_CACHE
-        load_ms = 0.0 if loaded else random.uniform(80, 220)
-        await asyncio.sleep(load_ms / 1000.0)
-        if not loaded:
-            _MODEL_CACHE[model.id] = object()   # <<< REPLACE: load actual weights
-        stages.append(PipelineStage(name="Engine Load", status="done",
-                                    latency_ms=round(_now_ms() - t1, 2),
-                                    detail="Cache hit" if loaded else "Weights loaded"))
-        # — Stage 3: Inference ————————————————————————————
-        t2 = _now_ms()
-        size_gb   = max(model.size, 1) / (1024 ** 3)
-        base_lat  = 2.5 + size_gb * 1.5
-        infer_ms  = base_lat + random.gauss(0, base_lat * 0.07)
-        await asyncio.sleep(infer_ms / 1000.0)
-        # <<< REPLACE IN PRODUCTION >>>
-        # results = model_obj(tensor, conf=conf, iou=iou)
-        stages.append(PipelineStage(name="Inference", status="done",
-                                    latency_ms=round(infer_ms, 2),
-                                    detail=f"conf={conf} iou={iou}"))
-        # — Stage 4: Post-process (NMS) ——————————————————
-        t3 = _now_ms()
-        detections = self._simulate_detections(conf, cfg.class_filter if cfg else [])
-        post_ms = random.uniform(0.3, 1.2)
-        await asyncio.sleep(post_ms / 1000.0)
-        stages.append(PipelineStage(name="NMS Post-process", status="done",
-                                    latency_ms=round(post_ms, 2),
-                                    detail=f"{len(detections)} detections"))
-        output: dict[str, Any] = {
-            "detections": [d.model_dump() for d in detections],
-            "pre_ms":   round(pre_ms, 2),
-            "infer_ms": round(infer_ms, 2),
-            "post_ms":  round(post_ms, 2),
-        }
-        return stages, output
-    @staticmethod
-    def _simulate_detections(conf_thresh: float, class_filter: list[str]) -> list[Detection]:
-        """Simulate bounding-box detections. <<< REPLACE with real NMS output."""
-        CLASSES = ["person", "car", "truck", "bicycle", "dog", "cat",
-                   "traffic light", "stop sign", "bench", "bird"]
-        n = random.randint(0, 8)
-        dets: list[Detection] = []
-        for _ in range(n):
-            c  = random.uniform(conf_thresh, 1.0)
-            cid = random.randint(0, len(CLASSES) - 1)
-            cname = CLASSES[cid]
-            if class_filter and cname not in class_filter:
-                continue
-            x1 = random.uniform(0, 0.7)
-            y1 = random.uniform(0, 0.7)
-            dets.append(Detection(
-                x1=round(x1 * 640, 1), y1=round(y1 * 640, 1),
-                x2=round((x1 + random.uniform(0.05, 0.3)) * 640, 1),
-                y2=round((y1 + random.uniform(0.05, 0.3)) * 640, 1),
-                confidence=round(c, 4),
-                class_id=cid, class_name=cname,
-            ))
-        return dets
-# ── Transformers Pipeline ─────────────────────────────────────────────────────
-class TransformersPipeline:
-    """
-    HuggingFace Transformers pipeline.
-    Preprocess: AutoTokenizer.encode.
-    Inference: model.generate with KV-cache.
-    Postprocess: decode + strip special tokens.
-    """
-    async def run(
-        self, req: InferenceRequest, model: Model
-    ) -> tuple[list[PipelineStage], dict[str, Any]]:
-        cfg = req.transformers_config
-        stages: list[PipelineStage] = []
-        # — Tokenize ——————————————————————————————————————
-        t0  = _now_ms()
-        txt = req.text_input or "Hello, world!"
-        tok_count = len(txt.split()) * 2          # rough BPE estimate
-        await asyncio.sleep(0.002)
-        pre_ms = _now_ms() - t0 + random.uniform(1, 4)
-        stages.append(PipelineStage(name="Tokenise", status="done",
-                                    latency_ms=round(pre_ms, 2),
-                                    detail=f"{tok_count} tokens"))
-        # — Engine Load —————————————————————————————————
-        t1 = _now_ms()
-        loaded = model.id in _MODEL_CACHE
-        load_ms = 0.0 if loaded else random.uniform(150, 400)
-        await asyncio.sleep(load_ms / 1000.0)
-        if not loaded:
-            _MODEL_CACHE[model.id] = object()
-        stages.append(PipelineStage(name="Engine Load", status="done",
-                                    latency_ms=round(_now_ms() - t1, 2),
-                                    detail="Cache hit" if loaded else "Model loaded"))
-        # — Generate ——————————————————————————————————————
-        t2 = _now_ms()
-        max_tok = cfg.max_new_tokens if cfg else 256
-        # Simulate token-by-token generation at ~20 tok/s
-        infer_ms = (max_tok / 20.0) * 1000 + random.gauss(0, 50)
-        await asyncio.sleep(min(infer_ms / 1000.0, 0.5))  # cap sim delay
-        # <<< REPLACE IN PRODUCTION >>>
-        # outputs = model_obj.generate(input_ids, max_new_tokens=max_tok,
-        #   temperature=cfg.temperature, top_p=cfg.top_p, do_sample=cfg.do_sample)
-        stages.append(PipelineStage(name="Generate", status="done",
-                                    latency_ms=round(infer_ms, 2),
-                                    detail=f"~{max_tok} tokens @ fp16"))
-        # — Decode ————————————————————————————————————————
-        t3 = _now_ms()
-        text_output = self._simulate_text(txt, max_tok)
-        post_ms = random.uniform(0.5, 2.0)
-        stages.append(PipelineStage(name="Decode", status="done",
-                                    latency_ms=round(post_ms, 2),
-                                    detail="Special tokens stripped"))
-        output: dict[str, Any] = {
-            "text_output": text_output,
-            "tokens_generated": max_tok,
-            "pre_ms":   round(pre_ms, 2),
-            "infer_ms": round(infer_ms, 2),
-            "post_ms":  round(post_ms, 2),
-        }
-        return stages, output
-    @staticmethod
-    def _simulate_text(prompt: str, n_tokens: int) -> str:
-        """Placeholder generation. <<< REPLACE with model.generate."""
-        lorem = (
-            "The model processed your input and generated a response based on the "
-            "learned distribution of the training corpus. This output is a simulation "
-            "placeholder — replace with actual model.generate() in production. "
-        )
-        # Repeat to roughly match token count
-        words = (lorem * (n_tokens // 20 + 1)).split()[:n_tokens]
-        return " ".join(words)
-# ── ONNX Pipeline ─────────────────────────────────��───────────────────────────
-class ONNXPipeline:
-    """
-    ONNX Runtime pipeline.
-    Acts as universal wrapper for TF / sklearn / PyTorch exported models.
-    Dynamically maps input tensor names from model metadata.
-    """
-    async def run(
-        self, req: InferenceRequest, model: Model
-    ) -> tuple[list[PipelineStage], dict[str, Any]]:
-        cfg = req.onnx_config
-        stages: list[PipelineStage] = []
-        provider = cfg.execution_provider if cfg else "CUDAExecutionProvider"
-        # — Preprocess ————————————————————————————————————
-        t0 = _now_ms()
-        pre_ms = random.uniform(1.0, 3.5)
-        await asyncio.sleep(pre_ms / 1000.0)
-        stages.append(PipelineStage(name="Preprocess", status="done",
-                                    latency_ms=round(pre_ms, 2),
-                                    detail="Normalise + reshape tensor"))
-        # — ONNX Runtime ——————————————————————————————————
-        t1 = _now_ms()
-        loaded = model.id in _MODEL_CACHE
-        load_ms = 0.0 if loaded else random.uniform(50, 150)
-        await asyncio.sleep(load_ms / 1000.0)
-        if not loaded:
-            _MODEL_CACHE[model.id] = object()
-            # <<< REPLACE IN PRODUCTION >>>
-            # import onnxruntime as ort
-            # sess_opts = ort.SessionOptions()
-            # _MODEL_CACHE[model.id] = ort.InferenceSession(
-            #     model.local_path, sess_options=sess_opts,
-            #     providers=[provider])
-        stages.append(PipelineStage(name="ONNX Runtime", status="done",
-                                    latency_ms=round(_now_ms() - t1, 2),
-                                    detail=provider.replace("ExecutionProvider", "")))
-        # — Inference ————————————————————————————————————
-        t2 = _now_ms()
-        infer_ms = random.uniform(3.0, 12.0)
-        await asyncio.sleep(infer_ms / 1000.0)
-        # <<< REPLACE IN PRODUCTION >>>
-        # ort_inputs = {sess.get_inputs()[0].name: tensor.numpy()}
-        # raw = sess.run(None, ort_inputs)
-        stages.append(PipelineStage(name="Inference", status="done",
-                                    latency_ms=round(infer_ms, 2),
-                                    detail="session.run()"))
-        # — Format Output ————————————————————————————————
-        t3 = _now_ms()
-        post_ms = random.uniform(0.2, 0.8)
-        raw_out = {"output_0": [round(random.random(), 4) for _ in range(10)]}
-        stages.append(PipelineStage(name="Format Output", status="done",
-                                    latency_ms=round(post_ms, 2),
-                                    detail="Tensor → JSON"))
-        output: dict[str, Any] = {
-            "raw_output": raw_out,
-            "pre_ms":   round(pre_ms, 2),
-            "infer_ms": round(infer_ms, 2),
-            "post_ms":  round(post_ms, 2),
-        }
-        return stages, output
-# ── Custom Python Pipeline ────────────────────────────────────────────────────
-class CustomPipeline:
-    """
-    Sandboxed custom Python pipeline.
-    Executes user-supplied pre/postprocess scripts in a restricted namespace.
-    Only numpy, the input tensor, and the model's raw output are accessible.
-    """
-    FORBIDDEN = ("import os", "import sys", "subprocess", "open(", "__import__",
-                 "eval(", "exec(", "globals(", "locals(")
-    def _validate_script(self, script: str) -> str | None:
-        for tok in self.FORBIDDEN:
-            if tok in script:
-                return f"Forbidden token in script: {tok!r}"
-        return None
-    async def run(
-        self, req: InferenceRequest, model: Model
-    ) -> tuple[list[PipelineStage], dict[str, Any]]:
-        cfg    = req.custom_config
-        stages: list[PipelineStage] = []
-        # — Validate scripts ——————————————————————————————
-        if cfg:
-            for label, script in [("preprocess", cfg.preprocess_script),
-                                   ("postprocess", cfg.postprocess_script)]:
-                if script:
-                    err = self._validate_script(script)
-                    if err:
-                        return [PipelineStage(name=label.capitalize(),
-                                              status="error", detail=err)], {}
-        # — Transform Input ———————————————————————————————
-        pre_ms = random.uniform(1.0, 5.0)
-        await asyncio.sleep(pre_ms / 1000.0)
-        stages.append(PipelineStage(name="Transform Input", status="done",
-                                    latency_ms=round(pre_ms, 2),
-                                    detail="Custom preprocess script"))
-        # — Run Inference ————————————————————————————————
-        infer_ms = random.uniform(5.0, 30.0)
-        await asyncio.sleep(infer_ms / 1000.0)
-        # <<< REPLACE IN PRODUCTION >>>
-        # namespace = {"input": tensor, "model": raw_model}
-        # exec(compile(cfg.preprocess_script, "<pre>", "exec"), namespace)
-        # tensor = namespace.get("output", tensor)
-        stages.append(PipelineStage(name="Run Inference", status="done",
-                                    latency_ms=round(infer_ms, 2),
-                                    detail="Custom runtime"))
-        # — Format Result ————————————————————————————————
-        post_ms = random.uniform(0.5, 3.0)
-        stages.append(PipelineStage(name="Format Result", status="done",
-                                    latency_ms=round(post_ms, 2),
-                                    detail="Custom postprocess script"))
-        output: dict[str, Any] = {
-            "raw_output": {"custom_result": round(random.random(), 4)},
-            "pre_ms":   round(pre_ms, 2),
-            "infer_ms": round(infer_ms, 2),
-            "post_ms":  round(post_ms, 2),
-        }
-        return stages, output
-# ── Master Dispatcher ─────────────────────────────────────────────────────────
-_PIPELINE_MAP = {
-    AdapterType.YOLO:         YOLOPipeline,
-    AdapterType.TRANSFORMERS: TransformersPipeline,
-    AdapterType.ONNX:         ONNXPipeline,
-    AdapterType.CUSTOM:       CustomPipeline,
-}
-class InferenceEngine:
-    """
-    Central inference dispatcher.
-    Resolves the correct pipeline, executes it, and wraps the result
-    into a fully-populated InferenceResult.
-    """
-    async def run(self, req: InferenceRequest, model: Model) -> InferenceResult:
-        t_start = _now_ms()
-        pipeline_cls = _PIPELINE_MAP.get(req.adapter_type)
-        if pipeline_cls is None:
-            return InferenceResult(
-                request_id=str(uuid.uuid4()),
-                model_id=req.model_id,
-                adapter_type=req.adapter_type,
-                status="error",
-                error=f"Unknown adapter type: {req.adapter_type}",
-            )
-        try:
-            stages, output = await pipeline_cls().run(req, model)
-            total_ms = _now_ms() - t_start
-            pre_ms   = output.get("pre_ms", 0.0)
-            infer_ms = output.get("infer_ms", 0.0)
-            post_ms  = output.get("post_ms", 0.0)
-            # Quality score: mean confidence of detections (0–5 scale)
-            detections = [Detection(**d) for d in output.get("detections", [])]
-            if detections:
-                mean_conf = sum(d.confidence for d in detections) / len(detections)
-                quality   = round(mean_conf * 5.0, 2)
-            else:
-                quality = round(random.uniform(3.2, 4.8), 2)
-            result = InferenceResult(
-                model_id      = req.model_id,
-                adapter_type  = req.adapter_type,
-                preprocess_ms = pre_ms,
-                inference_ms  = infer_ms,
-                postprocess_ms= post_ms,
-                total_ms      = round(total_ms, 2),
-                pipeline      = stages,
-                detections    = detections,
-                text_output   = output.get("text_output"),
-                raw_output    = output.get("raw_output"),
-                quality_score = quality,
-                status        = "ok",
-            )
-            log.info("inference_complete",
-                     model_id=req.model_id,
-                     adapter=req.adapter_type,
-                     total_ms=round(total_ms, 2))
-            return result
-        except Exception as exc:
-            log.error("inference_error", model_id=req.model_id, error=str(exc))
-            return InferenceResult(
-                model_id=req.model_id,
-                adapter_type=req.adapter_type,
-                status="error",
-                error=str(exc),
-            )
-def get_cache_status() -> dict[str, bool]:
-    """Return which model IDs are currently warm in cache."""
-    return {k: True for k in _MODEL_CACHE}
-def evict_model(model_id: str) -> bool:
-    """Evict a model from the in-process cache (free VRAM sim)."""
-    if model_id in _MODEL_CACHE:
-        del _MODEL_CACHE[model_id]
-        return True
-    return False

inference/session.py DELETED Viewed

@@ -1,80 +0,0 @@
-"""
-inference/session.py — In-memory inference session ledger.
-Keeps the last MAX_HISTORY inference results per process lifetime.
-Persisted to the SQLite `inference_history` table on each write
-(non-blocking via aiosqlite).
-"""
-from __future__ import annotations
-import asyncio
-import json
-import uuid
-from collections import deque
-from typing import Deque
-from models.inference import InferenceHistoryEntry, InferenceRequest, InferenceResult
-from observability.logger import get_logger
-log = get_logger("inference.session")
-MAX_HISTORY = 200
-_history: Deque[InferenceHistoryEntry] = deque(maxlen=MAX_HISTORY)
-_lock = asyncio.Lock()
-async def record(req: InferenceRequest, result: InferenceResult, model_name: str) -> None:
-    """Append a completed inference run to the ledger."""
-    entry = InferenceHistoryEntry(
-        model_id    = req.model_id,
-        model_name  = model_name,
-        adapter_type = req.adapter_type,
-        total_ms    = result.total_ms,
-        quality_score = result.quality_score,
-        status      = result.status,
-        request_snapshot = req.model_dump(exclude={"image_base64"}),
-    )
-    async with _lock:
-        _history.appendleft(entry)
-    # Persist to DB (fire-and-forget)
-    asyncio.create_task(_persist(entry))
-async def _persist(entry: InferenceHistoryEntry) -> None:
-    try:
-        from database.connection import get_db
-        async with get_db() as db:
-            await db.execute(
-                """
-                INSERT OR REPLACE INTO inference_history
-                  (id, model_id, model_name, adapter_type, timestamp,
-                   total_ms, quality_score, status, request_snapshot)
-                VALUES (?,?,?,?,?,?,?,?,?)
-                """,
-                (
-                    entry.id,
-                    entry.model_id,
-                    entry.model_name,
-                    entry.adapter_type.value,
-                    entry.timestamp,
-                    entry.total_ms,
-                    entry.quality_score,
-                    entry.status,
-                    json.dumps(entry.request_snapshot),
-                ),
-            )
-            await db.commit()
-    except Exception as exc:
-        log.warning("inference_persist_failed", error=str(exc))
-async def get_history(limit: int = 50) -> list[InferenceHistoryEntry]:
-    async with _lock:
-        return list(_history)[:limit]
-async def clear_history() -> None:
-    async with _lock:
-        _history.clear()

main.py CHANGED Viewed

@@ -26,7 +26,6 @@ from fastapi.responses import JSONResponse
 from api.routes import models as models_router
 from api.routes import sync as sync_router
 from api.routes import datasets as datasets_router
-from api.routes import projects as projects_router
 from config import settings
 from database.connection import close_db, get_db
 from middleware.logging_middleware import RequestLoggingMiddleware
@@ -65,9 +64,9 @@ async def lifespan(app: FastAPI) -> AsyncIterator[None]:
 # ── Application ───────────────────────────────────────────────────────────────
 app = FastAPI(
-    title=settings.app_name,
     version=settings.version,
-    description="Production ML Model Zoo backend — local-first, traceable, extensible.",
     docs_url="/docs",
     redoc_url="/redoc",
     lifespan=lifespan,
@@ -91,8 +90,7 @@ async def global_exception_handler(request: Request, exc: Exception):
 # ── Middleware ─────────────────────────────────────────────────────────────────
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=settings.cors_origins,
-    allow_origin_regex=r"^https?://(localhost|127\\.0\\.0\\.1)(:\\d+)?$",
     allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
@@ -103,7 +101,6 @@ app.add_middleware(RequestLoggingMiddleware)
 app.include_router(models_router.router)
 app.include_router(sync_router.router)
 app.include_router(datasets_router.router)
-app.include_router(projects_router.router)
 @app.get("/health", tags=["system"])
@@ -114,6 +111,7 @@ async def health() -> dict:
     n_datasets = await count_datasets()
     return {
         "status": "ok",
         "version": settings.version,
         "model_count": n_models,
         "dataset_count": n_datasets,

 from api.routes import models as models_router
 from api.routes import sync as sync_router
 from api.routes import datasets as datasets_router
 from config import settings
 from database.connection import close_db, get_db
 from middleware.logging_middleware import RequestLoggingMiddleware
 # ── Application ───────────────────────────────────────────────────────────────
 app = FastAPI(
+    title="MLForge Cloud Registry",
     version=settings.version,
+    description="Global Model and Dataset Discovery Service — The Brain of MLForge.",
     docs_url="/docs",
     redoc_url="/redoc",
     lifespan=lifespan,
 # ── Middleware ─────────────────────────────────────────────────────────────────
 app.add_middleware(
     CORSMiddleware,
+    allow_origins=["*"],  # Allow all origins for the cloud registry to support SDK/CLI/UI
     allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
 app.include_router(models_router.router)
 app.include_router(sync_router.router)
 app.include_router(datasets_router.router)
 @app.get("/health", tags=["system"])
     n_datasets = await count_datasets()
     return {
         "status": "ok",
+        "service": "cloud_registry",
         "version": settings.version,
         "model_count": n_models,
         "dataset_count": n_datasets,

projects/__init__.py DELETED Viewed

File without changes