File size: 2,794 Bytes
8c7975e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
from pathlib import Path
import json
import re
from .io_utils import save_json
# Replace with your real LLM runner
from .run_two_phase import call_model

def slugify(name: str) -> str:
    s = re.sub(r"[^a-zA-Z0-9]+", "_", name.strip()).strip("_").lower()
    return s or "scenario"

def build_pack_from_scenario(root_dir: Path, scenario_text: str, pack_name_hint: str = "") -> Path:
    """
    1) Calls extractor prompts to create inputs.json from free text.
    2) Writes default constraints.json and schema.json.
    3) Returns the created pack path.
    """
    prompts_dir = root_dir / "prompts"
    extractor_sys = (prompts_dir / "extractor_system.txt").read_text(encoding="utf-8")
    extractor_user_tmpl = (prompts_dir / "extractor_user_template.txt").read_text(encoding="utf-8")
    user_prompt = extractor_user_tmpl.replace("{SCENARIO_TEXT}", scenario_text)

    raw = call_model(extractor_sys, user_prompt)
    try:
        extracted = json.loads(raw)
    except Exception as e:
        raise ValueError(f"Extractor produced non-JSON output.\nRaw:\n{raw}") from e

    # Decide pack name
    hint = pack_name_hint or extracted.get("context") or "scenario"
    pack_slug = slugify(hint)[:48]
    pack_dir = root_dir / "packs" / pack_slug
    pack_dir.mkdir(parents=True, exist_ok=True)

    # inputs.json
    save_json(pack_dir / "inputs.json", extracted)

    # constraints.json (default, can be edited later)
    default_constraints = {
        "analytics_first": True,
        "require_longitudinal": True,
        "indicator_priority": ["obesity_pct", "metabolic_syndrome_pct", "membership_2021"],
        "capacity_formula": "teams * clients_per_team_per_day * working_days",
        "cost_formula": "startup_per_client + ongoing_per_client",
        "prioritization_rule": "Sort by membership_2021 and comorbidity prevalence; tie-breaker: infrastructure"
    }
    save_json(pack_dir / "constraints.json", default_constraints)

    # schema.json (points to global analysis schema)
    save_json(pack_dir / "schema.json", {
        "archetypes": ["burden_prioritization", "capacity", "cost", "outcomes", "optimization"],
        "output_schema": "schemas/analysis_output.schema.json"
    })

    # placeholders to support pipeline (optional for new packs)
    save_json(pack_dir / "rubric.json", {
        "set_equals": [],
        "must_contain": [],
        "numeric_equals": []
    })
    save_json(pack_dir / "expected.json", {
        "note": "Optional gold output for regression tests. Populate when available."
    })

    # empty clarifications; you can fill interactively or with a UI later
    save_json(pack_dir / "clarifications.json", {
        "_note": "Populate with answers to Phase 1 clarification questions if you want batch runs."
    })

    return pack_dir