from __future__ import annotations import re from pathlib import Path from slop_farmer.app_config import command_defaults from slop_farmer.data.parquet_io import read_json from slop_farmer.data.snapshot_paths import ROOT_MANIFEST_FILENAME, resolve_snapshot_dir_from_output MODEL_SLUG_PATTERN = re.compile(r"[^a-z0-9]+") def model_slug(model: str) -> str: base = model.split("?", 1)[0].strip().lower() slug = MODEL_SLUG_PATTERN.sub("", base) return slug or "model" def build_analysis_id( *, snapshot_id: str, model: str, ranking_backend: str, suffix: str | None = None, ) -> str: parts = [ MODEL_SLUG_PATTERN.sub("", ranking_backend.strip().lower()) or "analysis", model_slug(model), snapshot_id.strip().lower(), ] if suffix: normalized_suffix = MODEL_SLUG_PATTERN.sub("-", suffix.strip().lower()).strip("-") if normalized_suffix: parts.append(normalized_suffix) return "-".join(parts) def analysis_id_from_snapshot( *, snapshot_dir: Path, model: str, ranking_backend: str, suffix: str | None = None, ) -> str: manifest_path = snapshot_dir / ROOT_MANIFEST_FILENAME manifest = read_json(manifest_path) if manifest_path.exists() else {} if not isinstance(manifest, dict): raise ValueError(f"Snapshot manifest at {manifest_path} must contain a JSON object.") snapshot_id = str(manifest.get("snapshot_id") or snapshot_dir.name).strip() if not snapshot_id: raise ValueError(f"Could not resolve snapshot_id from {manifest_path}") return build_analysis_id( snapshot_id=snapshot_id, model=model, ranking_backend=ranking_backend, suffix=suffix, ) def analysis_id_from_config( *, config_path: Path, output_dir: Path | None = None, snapshot_dir: Path | None = None, model: str | None = None, ranking_backend: str | None = None, suffix: str | None = None, ) -> str: defaults = command_defaults("analyze", config_path=config_path) resolved_snapshot_dir = resolve_snapshot_dir_from_output( Path(output_dir or defaults.get("output-dir", "data")), snapshot_dir, ) resolved_model = str(model or defaults.get("model", "gpt-5.4-mini?service_tier=flex")) resolved_backend = str(ranking_backend or defaults.get("ranking-backend", "hybrid")) return analysis_id_from_snapshot( snapshot_dir=resolved_snapshot_dir, model=resolved_model, ranking_backend=resolved_backend, suffix=suffix, )