Spaces:
Running
Running
| """Export a small, public-safe set of WitGym traces (Sharing is Caring). | |
| This writes deterministic-ish JSONL traces for a fixed set of canonical prompts. | |
| It intentionally omits any raw Office transcript text from retrieved scenes. | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import os | |
| import time | |
| import hashlib | |
| import sys | |
| from pathlib import Path | |
| CANONICAL_SINGLE_TURN = [ | |
| "I just got promoted to manager and I have no idea what I'm doing.", | |
| "My coworker keeps stealing my lunch from the fridge.", | |
| "My boss says he trusts me, but he rewrites every message I send.", | |
| ] | |
| def _scene_id(setup: str, response: str) -> str: | |
| h = hashlib.sha256((setup + "\n" + response).encode("utf-8")).hexdigest() | |
| return h[:12] | |
| def _safe_scene(scene) -> dict: | |
| # Do not export setup/response verbatim; this keeps traces inspectable without | |
| # publishing transcript text. | |
| return { | |
| "scene_id": _scene_id(scene.setup, scene.response), | |
| "show": scene.show, | |
| "character": scene.character, | |
| "archetype": scene.archetype.value, | |
| "tension_type": scene.tension_type.value, | |
| "violation_distance": scene.violation_distance.value, | |
| } | |
| def _pipeline_logs(result) -> list[dict]: | |
| meta = result.metadata | |
| scenes = result.retrieved_scenes | |
| candidates = result.candidates | |
| return [ | |
| { | |
| "step": "metadata", | |
| "status": "ok", | |
| "detail": f"twist={meta.twist_potential} archetype={meta.archetype.value}", | |
| }, | |
| { | |
| "step": "retrieval", | |
| "status": "ok", | |
| "detail": ", ".join(f"{s.character}:{s.archetype.value}" for s in scenes) or "no precedent scenes", | |
| }, | |
| { | |
| "step": "candidate_generation", | |
| "status": "ok", | |
| "detail": ", ".join(f"{c.persona}:{len(c.text.split())}w" for c in candidates) or "no candidates", | |
| }, | |
| {"step": "ranking", "status": "ok", "detail": result.winning_persona or "none"}, | |
| {"step": "compression", "status": "ok", "detail": "selected line finalized"}, | |
| ] | |
| def _run_single(engine, user_input: str) -> dict: | |
| t0 = time.time() | |
| result = engine.respond(user_input) | |
| dt = time.time() - t0 | |
| meta = result.metadata | |
| return { | |
| "kind": "single_turn", | |
| "input": user_input, | |
| "route": result.route, | |
| "model_id": os.getenv("LLM_MODEL_ID", "Qwen/Qwen3.5-27B"), | |
| "llm_backend": os.getenv("LLM_BACKEND", "hf_api"), | |
| "latency_s": round(dt, 2), | |
| "metadata": { | |
| "surface": meta.surface, | |
| "subtext": meta.subtext, | |
| "behavioral_observation": getattr(meta, "behavioral_observation", None), | |
| "archetype": meta.archetype.value, | |
| "archetype_confidence": meta.archetype_confidence, | |
| "tension_type": meta.tension_type.value, | |
| "power_dynamic": meta.power_dynamic, | |
| "speaker_strategy": meta.speaker_strategy, | |
| "obvious_response": meta.obvious_response, | |
| "violation_distance": meta.violation_distance.value, | |
| "twist_potential": meta.twist_potential, | |
| "connector": meta.connector, | |
| }, | |
| "retrieved_scenes": [_safe_scene(s) for s in result.retrieved_scenes], | |
| "candidates": [ | |
| {"persona": c.persona, "word_count": len(c.text.split())} | |
| for c in result.candidates | |
| ], | |
| "winning_persona": result.winning_persona, | |
| "selected": result.selected, | |
| "logs": _pipeline_logs(result), | |
| } | |
| def main() -> int: | |
| # Prefer HF API for reproducible “no local weights” runs. | |
| os.environ.setdefault("LLM_BACKEND", "hf_api") | |
| # Allow running without installing the package (pip/uv editable install). | |
| repo_root = Path(__file__).resolve().parents[1] | |
| if str(repo_root) not in sys.path: | |
| sys.path.insert(0, str(repo_root)) | |
| from witgym.engine import WitGymEngine | |
| out_path = Path("data/public_traces.jsonl") | |
| out_path.parent.mkdir(parents=True, exist_ok=True) | |
| engine = WitGymEngine(index_path="data/index.npz") | |
| rows: list[dict] = [] | |
| for prompt in CANONICAL_SINGLE_TURN: | |
| rows.append(_run_single(engine, prompt)) | |
| with out_path.open("w", encoding="utf-8") as f: | |
| for row in rows: | |
| f.write(json.dumps(row, ensure_ascii=False) + "\n") | |
| print(f"✓ Wrote {len(rows)} trace rows → {out_path}") | |
| return 0 | |
| if __name__ == "__main__": | |
| raise SystemExit(main()) | |