openclaw-pr-api / src /slop_farmer /app /analysis_id.py
evalstate's picture
evalstate HF Staff
Deploy OpenClaw PR API
938e923 verified
from __future__ import annotations
import re
from pathlib import Path
from slop_farmer.app_config import command_defaults
from slop_farmer.data.parquet_io import read_json
from slop_farmer.data.snapshot_paths import ROOT_MANIFEST_FILENAME, resolve_snapshot_dir_from_output
MODEL_SLUG_PATTERN = re.compile(r"[^a-z0-9]+")
def model_slug(model: str) -> str:
base = model.split("?", 1)[0].strip().lower()
slug = MODEL_SLUG_PATTERN.sub("", base)
return slug or "model"
def build_analysis_id(
*,
snapshot_id: str,
model: str,
ranking_backend: str,
suffix: str | None = None,
) -> str:
parts = [
MODEL_SLUG_PATTERN.sub("", ranking_backend.strip().lower()) or "analysis",
model_slug(model),
snapshot_id.strip().lower(),
]
if suffix:
normalized_suffix = MODEL_SLUG_PATTERN.sub("-", suffix.strip().lower()).strip("-")
if normalized_suffix:
parts.append(normalized_suffix)
return "-".join(parts)
def analysis_id_from_snapshot(
*,
snapshot_dir: Path,
model: str,
ranking_backend: str,
suffix: str | None = None,
) -> str:
manifest_path = snapshot_dir / ROOT_MANIFEST_FILENAME
manifest = read_json(manifest_path) if manifest_path.exists() else {}
if not isinstance(manifest, dict):
raise ValueError(f"Snapshot manifest at {manifest_path} must contain a JSON object.")
snapshot_id = str(manifest.get("snapshot_id") or snapshot_dir.name).strip()
if not snapshot_id:
raise ValueError(f"Could not resolve snapshot_id from {manifest_path}")
return build_analysis_id(
snapshot_id=snapshot_id,
model=model,
ranking_backend=ranking_backend,
suffix=suffix,
)
def analysis_id_from_config(
*,
config_path: Path,
output_dir: Path | None = None,
snapshot_dir: Path | None = None,
model: str | None = None,
ranking_backend: str | None = None,
suffix: str | None = None,
) -> str:
defaults = command_defaults("analyze", config_path=config_path)
resolved_snapshot_dir = resolve_snapshot_dir_from_output(
Path(output_dir or defaults.get("output-dir", "data")),
snapshot_dir,
)
resolved_model = str(model or defaults.get("model", "gpt-5.4-mini?service_tier=flex"))
resolved_backend = str(ranking_backend or defaults.get("ranking-backend", "hybrid"))
return analysis_id_from_snapshot(
snapshot_dir=resolved_snapshot_dir,
model=resolved_model,
ranking_backend=resolved_backend,
suffix=suffix,
)