File size: 2,573 Bytes
938e923
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
from __future__ import annotations

import re
from pathlib import Path

from slop_farmer.app_config import command_defaults
from slop_farmer.data.parquet_io import read_json
from slop_farmer.data.snapshot_paths import ROOT_MANIFEST_FILENAME, resolve_snapshot_dir_from_output

MODEL_SLUG_PATTERN = re.compile(r"[^a-z0-9]+")


def model_slug(model: str) -> str:
    base = model.split("?", 1)[0].strip().lower()
    slug = MODEL_SLUG_PATTERN.sub("", base)
    return slug or "model"


def build_analysis_id(
    *,
    snapshot_id: str,
    model: str,
    ranking_backend: str,
    suffix: str | None = None,
) -> str:
    parts = [
        MODEL_SLUG_PATTERN.sub("", ranking_backend.strip().lower()) or "analysis",
        model_slug(model),
        snapshot_id.strip().lower(),
    ]
    if suffix:
        normalized_suffix = MODEL_SLUG_PATTERN.sub("-", suffix.strip().lower()).strip("-")
        if normalized_suffix:
            parts.append(normalized_suffix)
    return "-".join(parts)


def analysis_id_from_snapshot(
    *,
    snapshot_dir: Path,
    model: str,
    ranking_backend: str,
    suffix: str | None = None,
) -> str:
    manifest_path = snapshot_dir / ROOT_MANIFEST_FILENAME
    manifest = read_json(manifest_path) if manifest_path.exists() else {}
    if not isinstance(manifest, dict):
        raise ValueError(f"Snapshot manifest at {manifest_path} must contain a JSON object.")
    snapshot_id = str(manifest.get("snapshot_id") or snapshot_dir.name).strip()
    if not snapshot_id:
        raise ValueError(f"Could not resolve snapshot_id from {manifest_path}")
    return build_analysis_id(
        snapshot_id=snapshot_id,
        model=model,
        ranking_backend=ranking_backend,
        suffix=suffix,
    )


def analysis_id_from_config(
    *,
    config_path: Path,
    output_dir: Path | None = None,
    snapshot_dir: Path | None = None,
    model: str | None = None,
    ranking_backend: str | None = None,
    suffix: str | None = None,
) -> str:
    defaults = command_defaults("analyze", config_path=config_path)
    resolved_snapshot_dir = resolve_snapshot_dir_from_output(
        Path(output_dir or defaults.get("output-dir", "data")),
        snapshot_dir,
    )
    resolved_model = str(model or defaults.get("model", "gpt-5.4-mini?service_tier=flex"))
    resolved_backend = str(ranking_backend or defaults.get("ranking-backend", "hybrid"))
    return analysis_id_from_snapshot(
        snapshot_dir=resolved_snapshot_dir,
        model=resolved_model,
        ranking_backend=resolved_backend,
        suffix=suffix,
    )