StavanKhobare commited on
Commit
0e23a69
·
0 Parent(s):

Initial commit: NeuralEdge AI Boardroom

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .claude/skills/openenv-hackathon/SKILL.md +189 -0
  2. .claude/skills/openenv-hackathon/reference/01-openenv-framework.md +284 -0
  3. .claude/skills/openenv-hackathon/reference/02-training-pipeline.md +297 -0
  4. .claude/skills/openenv-hackathon/reference/03-submission-checklist.md +140 -0
  5. .claude/skills/openenv-hackathon/reference/04-judging-rubric-playbook.md +102 -0
  6. .claude/skills/openenv-hackathon/reference/05-theme-selection.md +53 -0
  7. .gitattributes +8 -0
  8. .gitignore +32 -0
  9. CLAUDE.md +47 -0
  10. Dockerfile +54 -0
  11. FRONTEND_API.md +396 -0
  12. HANDOFF.md +184 -0
  13. MECHANICS.md +282 -0
  14. README.md +504 -0
  15. TEAMMATES.md +105 -0
  16. adapter_model.safetensors +3 -0
  17. assets/.gitkeep +0 -0
  18. assets/baseline.csv +405 -0
  19. assets/baseline_distribution.png +3 -0
  20. assets/before_after.png +3 -0
  21. assets/reward_curve.png +3 -0
  22. assets/trust_trajectory.png +3 -0
  23. boardsim_local.py +642 -0
  24. envs/.gitkeep +0 -0
  25. envs/board_sim_env/.dockerignore +19 -0
  26. envs/board_sim_env/README.md +162 -0
  27. envs/board_sim_env/__init__.py +14 -0
  28. envs/board_sim_env/client.py +47 -0
  29. envs/board_sim_env/debug_sim.py +23 -0
  30. envs/board_sim_env/models.py +56 -0
  31. envs/board_sim_env/openenv.yaml +6 -0
  32. envs/board_sim_env/pyproject.toml +33 -0
  33. envs/board_sim_env/server/Dockerfile +80 -0
  34. envs/board_sim_env/server/__init__.py +11 -0
  35. envs/board_sim_env/server/app.py +248 -0
  36. envs/board_sim_env/server/board_sim_env_environment.py +979 -0
  37. envs/board_sim_env/server/requirements.txt +3 -0
  38. envs/board_sim_env/uv.lock +0 -0
  39. frontend/index.html +22 -0
  40. frontend/package-lock.json +1681 -0
  41. frontend/package.json +19 -0
  42. frontend/src/App.jsx +111 -0
  43. frontend/src/components/AgentDecision.jsx +88 -0
  44. frontend/src/components/EndScreen.jsx +112 -0
  45. frontend/src/components/EventBanner.jsx +26 -0
  46. frontend/src/components/HistoryTimeline.jsx +50 -0
  47. frontend/src/components/MetricsPanel.jsx +83 -0
  48. frontend/src/components/NPCGrid.jsx +76 -0
  49. frontend/src/components/PlaybackControls.jsx +64 -0
  50. frontend/src/components/TopBar.jsx +59 -0
.claude/skills/openenv-hackathon/SKILL.md ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ name: openenv-hackathon
3
+ description: Use this skill for ANY work on the Meta PyTorch × Hugging Face OpenEnv Hackathon submission (India finale, Scaler Bangalore, Apr 25–26 2026). Trigger whenever the user says "build", "audit", "review", "check", "deploy", or references the environment, training script, README, HF Space, Colab notebook, submission, or judging criteria. The submission must use OpenEnv (latest release, v0.2.3), be hosted on a Hugging Face Space, include a TRL- or Unsloth-based training script (ideally a Colab notebook), show reward/loss plots from a real training run, and ship with a README that links a <2-min YouTube video or a mini-blog on HF. Judging is weighted 40% Environment Innovation / 30% Storytelling / 20% Reward Improvement Evidence / 10% Reward & Training Pipeline.
4
+ ---
5
+
6
+ # OpenEnv Hackathon — Build & Audit Skill
7
+
8
+ ## 1. Hackathon Calendar (hard deadlines)
9
+
10
+ | When | What | Where |
11
+ |---|---|---|
12
+ | **Apr 25, 11:30 AM IST** | Hacking begins | Scaler School of Technology, Bangalore — classrooms |
13
+ | **Apr 25, 3:30 PM IST** | Mentor Round 1 | Classrooms |
14
+ | **Apr 25, 8:00 PM IST** | Mentor Round 2 | Classrooms |
15
+ | **Apr 26, 10 AM – 12 PM IST** | Mentor Round 3 (final) | Classrooms |
16
+ | **Apr 26, 12:00 PM IST** | 5-hour submission reminder | Classrooms |
17
+ | **Apr 26, 3:00 PM IST** | 2-hour submission reminder | Classrooms |
18
+ | **Apr 26, 5:00 PM IST** | **SUBMISSION DEADLINE** — Google Form | — |
19
+ | **Apr 26, 5:15 PM IST** | Closing remarks | Main Stage |
20
+ | **Apr 26, 8:00 PM IST** | Event concludes | Near Main Stage |
21
+
22
+ **Rule**: Changes or commits to the HF Space URL after the deadline are ignored. Whatever is live at 5 PM IST on Apr 26 is what gets judged.
23
+
24
+ ## 2. Submission bundle (non-negotiable)
25
+
26
+ A submission missing ANY of these is "at a serious disadvantage". The Google Form on Apr 26 asks for:
27
+
28
+ 1. **Hugging Face Space URL** — the environment, deployed via `openenv push`. Must be PUBLIC.
29
+ 2. **Colab notebook link** — training script that judges can re-run.
30
+ 3. **Code repository link** — GitHub (or HF Hub repo). Include every file.
31
+ 4. **YouTube video URL OR Hugging Face blog post URL** — the story. Video ≤ 2 minutes.
32
+ 5. **README in the repo** — must link the Space, the Colab, the video/blog, and any slides. README is the judge's entry point.
33
+
34
+ Every URL also lives in the README. No large video files inside the Env HF Space — reference by URL.
35
+
36
+ ## 3. The five themes (pick one; Theme 5 is the wildcard)
37
+
38
+ | # | Theme | Teaches the LLM to… | Example problems |
39
+ |---|---|---|---|
40
+ | 1 | **Multi-Agent Interactions** | Cooperate, compete, negotiate, form coalitions; model others' beliefs (theory-of-mind) in partially observable settings. | Market simulations, compute-allocation negotiations, collaborative puzzle worlds, mixed coop/competitive games. |
41
+ | 2 | **(Super) Long-Horizon Planning & Instruction Following** | Decompose goals, track state across long trajectories beyond context limits, recover from early mistakes, handle sparse/delayed rewards. | Research-planning simulators, large-codebase refactoring, strategic resource management, logistics optimization, 300-instruction-scatter tasks. |
42
+ | 3.1 | **World Modeling — Professional Tasks** | Maintain internal state, update beliefs from outcomes, orchestrate multi-step workflows with real APIs/tools (no shortcuts). | Dynamic browser/API ecosystems, enterprise apps, scientific workflows (papers → code → experiments), tool-discovery benchmarks. |
43
+ | 3.2 | **World Modeling — Personalized Tasks** | Handle realistic personal delegation: messages, conflicts, scheduling, shopping. | Exec-assistant meeting planner, dinner/drive planning, tough email replies. |
44
+ | 4 | **Self-Improvement** | Generate new challenges, escalate difficulty, self-play, adaptive curricula — recursive skill amplification. | Self-play negotiation arenas, auto-generated math/proofs, evolving coding competitions, adaptive RL curricula. |
45
+ | 5 | **Wild Card — Impress Us** | Anything outside the boxes above that meaningfully trains an LLM capability. | — |
46
+
47
+ **Theme selection rule**: Round-1 problem is NOT required. Pick what best fits one of the themes AND excites the team — judges can tell when energy is real.
48
+
49
+ See [reference/05-theme-selection.md](reference/05-theme-selection.md) for a 60-minute ideation protocol and per-theme shortcut candidates.
50
+
51
+ ## 4. Judging rubric (memorize these weights)
52
+
53
+ | Weight | Criterion | What it really means | How I bias toward this when building |
54
+ |---|---|---|---|
55
+ | **40%** | Environment Innovation | Novel, creative, genuinely challenging. Tests agent behavior in a way that hasn't been done. | Push originality over polish. Avoid chess/snake/tic-tac-toe/grid clones. Ask: "Could a researcher write a paper on training against this?" |
56
+ | **30%** | Storytelling & Presentation | Clear problem statement; engaging demo; non-technical audience can follow. | README reads in 3–5 min. Video ≤ 2 min. Before/after agent behavior on screen. |
57
+ | **20%** | Showing Improvement in Rewards | Observable evidence: reward curves, metrics, before/after, baseline vs. trained on the same axes. | Train long enough that curves mean something. Commit `.png` plots to the repo. Caption each plot in the README. |
58
+ | **10%** | Reward & Training Pipeline | Reward logic is coherent, hard to game; pipeline produces real improvement in trained-agent behavior. | Compose Rubrics thoughtfully. Dense signal > 0/1-at-end. Test reward manually (random baseline should NOT score high). |
59
+
60
+ Innovation + Storytelling is **70%** of the score. A messy but ambitious env with real training evidence beats a polished but boring one — the rules state this explicitly.
61
+
62
+ See [reference/04-judging-rubric-playbook.md](reference/04-judging-rubric-playbook.md) for per-criterion tactics and anti-patterns.
63
+
64
+ ## 5. Tech stack (what to build with)
65
+
66
+ | Layer | Pick | Why |
67
+ |---|---|---|
68
+ | Environment framework | **OpenEnv v0.2.3** (`pip install openenv-core`) | Mandatory. Use `Environment` or `MCPEnvironment` base class, Gym-style API. |
69
+ | Training framework | **HF TRL `GRPOTrainer`** with `environment_factory=` | Official OpenEnv ↔ TRL integration (docs: [huggingface.co/docs/trl/openenv](https://huggingface.co/docs/trl/openenv)). |
70
+ | Speed/memory | **Unsloth** (optional, strongly recommended for Colab T4) | 2× speed, up to 70% memory cut; supports GRPO/GSPO/DPO on free Colab. |
71
+ | Base model | Start with **Qwen3-0.6B** or **Qwen3-1.7B** | Used in official examples; small enough for Colab, big enough to show learning. |
72
+ | Hosting | **Hugging Face Space** (via `openenv push`) | Mandatory. Space must be public and runnable. |
73
+ | Notebook | **Google Colab** | Judges need to re-run it. Use `uv run` or a pip install cell that works in fresh Colab. |
74
+ | Writeup | **HF blog post** OR **YouTube ≤ 2 min** | Mandatory. Link from README. |
75
+
76
+ See [reference/01-openenv-framework.md](reference/01-openenv-framework.md) for the full directory layout, file templates, openenv.yaml fields, and push workflow.
77
+ See [reference/02-training-pipeline.md](reference/02-training-pipeline.md) for a runnable TRL-GRPO training recipe tuned for Colab T4.
78
+
79
+ ## 6. What I do when the user says "build"
80
+
81
+ The scope is inferred from the stated target. In order:
82
+
83
+ 1. **Confirm theme + problem statement** in one sentence before writing code. If ambiguous, ask. Don't silently assume.
84
+ 2. **Name the env** snake_case (e.g., `dinner_negotiator_env`). Create it via `openenv init <name>_env --output-dir envs` — do not hand-roll the scaffold.
85
+ 3. **Fill the four files** in this order: `models.py` (Action / Observation / State dataclasses) → `server/<env>_environment.py` (core logic: `reset`, `step`, optional `state`) → `server/app.py` (FastAPI wiring via `create_app` or `create_server`) → `client.py` (thin `EnvClient` subclass).
86
+ 4. **Update `openenv.yaml`** with `spec_version: 1`, `name`, `type`, `runtime`, `app`, `port`. No reserved MCP tool names (`reset`, `step`, `state`, `close`).
87
+ 5. **Set `SUPPORTS_CONCURRENT_SESSIONS = True`** on the Environment class AND pass `max_concurrent_envs=64` (or ≥ `generation_batch_size`) to `create_app`. Without this, training will fail with WebSocket capacity errors.
88
+ 6. **Design the reward** with OpenEnv Rubrics when possible: composable, dense, hard to game. Test with a random-policy baseline BEFORE writing the training script — the baseline should score noticeably worse than a competent agent.
89
+ 7. **Smoke-test locally** with Docker (`openenv init` produces a Dockerfile — use it). Verify `reset()` / `step()` work and reward is sensible over ~20 random episodes.
90
+ 8. **Deploy**: `openenv push --repo-id <user>/<env-name>`. Confirm the Space is live at `https://<user>-<env-name>.hf.space/health`.
91
+ 9. **Write the training script** as a Colab notebook using `GRPOTrainer(environment_factory=MyEnv, ...)`. Use Qwen3-0.6B unless the user specifies otherwise. Log to W&B or at minimum save `.png` plots.
92
+ 10. **Run the training** to produce real reward/loss curves. Commit the plots as `assets/reward_curve.png`, `assets/loss_curve.png` in the repo.
93
+ 11. **Write the README** — see [reference/03-submission-checklist.md](reference/03-submission-checklist.md) for the required-sections list and tone.
94
+
95
+ At each step, I report what was done in one line and move on.
96
+
97
+ ## 7. What I do when the user says "audit"
98
+
99
+ An audit is read-only until the user asks me to fix. I check the submission bundle against the rubric and report gaps as a prioritized list. My audit always covers, in this order:
100
+
101
+ 1. **Submission completeness** — all 5 bundle items present and linked from the README? (See [reference/03-submission-checklist.md](reference/03-submission-checklist.md).)
102
+ 2. **OpenEnv compliance** — uses v0.2.3; `Environment` or `MCPEnvironment` base; Gym-style `reset/step/state`; valid `openenv.yaml`; no reserved tool names; client/server separation (client never imports server internals).
103
+ 3. **HF Space health** — `openenv push` succeeded; `/health` returns 200; `/docs` loads; `SUPPORTS_CONCURRENT_SESSIONS` and `max_concurrent_envs` set for training.
104
+ 4. **Reward signal** — dense (not just 0/1 at terminal), hard to game. Flag any reward that a random agent could exploit for points without solving the task.
105
+ 5. **Training evidence** — reward curve exists, has >1 clearly-visible step of improvement, is committed as a real image file (not only in a deleted Colab cell / W&B run), baseline is on the same axes as the trained run.
106
+ 6. **README storytelling** — problem / environment / results / why-it-matters sections present; readable in 3–5 min; plots captioned.
107
+ 7. **Repo hygiene** — no leaked secrets (HF_TOKEN, WANDB_API_KEY), no large video files in the HF Space (reference by URL), no build artifacts/venvs committed.
108
+
109
+ I report findings as: **[SEVERITY] finding — fix**. Severities: `CRITICAL` (submission disqualifier), `HIGH` (likely to cost >10 rubric pts), `MED` (polish), `LOW` (nice-to-have).
110
+
111
+ ## 8. Hard rules (things I will refuse to do or strongly push back on)
112
+
113
+ - **Don't submit without a real training run.** "Training script exists" is NOT the bar. The bar is "connects to the environment, agent learns, plots prove it." If the user asks to skip training, I push back once and then flag it as a CRITICAL audit finding.
114
+ - **Don't clone chess / snake / tic-tac-toe / grid-world.** Judges have seen them. If the user proposes one, I recommend an angle that makes it genuinely novel (e.g., a meta-learning wrapper, a compositional reward, a new modality).
115
+ - **Don't use `WidthType.PERCENTAGE`, reserved MCP tool names, or `Percentage` width in docx tables** if we write docs.
116
+ - **Don't commit `.env` / `HF_TOKEN` / `WANDB_API_KEY`.** Use `huggingface_hub.login()` in Colab, read from env vars elsewhere.
117
+ - **Don't amend commits after submission.** The URL is frozen at deadline — a post-deadline commit is equivalent to submitting a different artifact.
118
+ - **Don't bloat the HF Space with video files.** Link to YouTube/HF blog instead.
119
+ - **Don't mock the environment in training.** If `environment_factory` is set, the training loop MUST hit the real Space (or a local Docker of it) — a static dataset disqualifies criterion #3 (20%).
120
+
121
+ ## 9. Directory structure this skill assumes
122
+
123
+ ```
124
+ OpenEnv Hackathon/
125
+ ├── .claude/skills/openenv-hackathon/
126
+ │ ├── SKILL.md # this file
127
+ │ └── reference/
128
+ │ ├── 01-openenv-framework.md # env anatomy, API, openenv.yaml, push
129
+ │ ├── 02-training-pipeline.md # TRL-GRPO Colab recipe
130
+ │ ├── 03-submission-checklist.md
131
+ │ ├── 04-judging-rubric-playbook.md
132
+ │ └── 05-theme-selection.md # theme fit analysis + ideation protocol
133
+ ├── envs/
134
+ │ └── <env_name>_env/ # the OpenEnv env — scaffolded by `openenv init`
135
+ ├── notebooks/
136
+ │ └── train_grpo.ipynb # the Colab judges will re-run
137
+ ├── assets/
138
+ │ ├── reward_curve.png
139
+ │ └── loss_curve.png
140
+ ├── README.md # the judge's entry point — links EVERYTHING
141
+ └── requirements.txt
142
+ ```
143
+
144
+ ## 10. External skills / tools the team needs
145
+
146
+ **Claude Code skills to use during the hackathon:**
147
+
148
+ - `anthropic-skills:pptx` — if the submission includes a slide deck (allowed as a writeup format).
149
+ - `frontend-design` — if building a demo web UI for the environment or a landing page.
150
+ - `python-performance-optimization` — profile training if reward curves plateau due to env-step latency (common on HF Spaces).
151
+ - `review` — self-review the diff before final push.
152
+ - `security-review` — final pass for leaked tokens / keys before making the repo public.
153
+
154
+ **External tools & accounts required (set up BEFORE Apr 25 morning):**
155
+
156
+ - Python 3.11+ and Docker Desktop installed locally.
157
+ - Hugging Face account + write token (`hf auth login`).
158
+ - `pip install openenv-core>=0.2.3 trl unsloth wandb`.
159
+ - Google Colab account (free T4 is enough for Qwen3-0.6B; Pro is better for 1.7B).
160
+ - Weights & Biases account (optional but highly recommended — gives judges a shareable run URL).
161
+ - GitHub account (public repo for the code link).
162
+ - YouTube channel (for the ≤2-min video) OR HF account with blog posting enabled.
163
+
164
+ **Technical competencies the team should have reviewed:**
165
+
166
+ - OpenEnv's Gymnasium-style API (`reset`, `step`, `state`) — see [reference/01-openenv-framework.md](reference/01-openenv-framework.md).
167
+ - GRPO algorithm intuition (group relative policy optimization — compares completions within a group; relative ranking > absolute values).
168
+ - Basic LoRA/PEFT for Unsloth fine-tuning on Colab.
169
+ - FastAPI basics (openenv init gives you the server scaffold, but you may need to extend it).
170
+ - Docker basics for local Space testing.
171
+
172
+ ## 11. Reference files
173
+
174
+ Each of these is loaded only when relevant — keep SKILL.md lean.
175
+
176
+ - **[reference/01-openenv-framework.md](reference/01-openenv-framework.md)** — Directory layout; models.py / environment.py / app.py / client.py templates; full openenv.yaml; `openenv init` / `openenv push` CLI; concurrency setup; common pitfalls.
177
+ - **[reference/02-training-pipeline.md](reference/02-training-pipeline.md)** — Complete TRL-GRPO Colab notebook recipe; Unsloth wiring; reward-function patterns; multi-environment training; plot generation; W&B logging.
178
+ - **[reference/03-submission-checklist.md](reference/03-submission-checklist.md)** — The final Apr 26 audit list; README template; sample commit structure; pre-deadline smoke tests.
179
+ - **[reference/04-judging-rubric-playbook.md](reference/04-judging-rubric-playbook.md)** — Tactics per criterion; what scores high on Innovation (40%); storytelling heuristics; training-evidence standards; anti-patterns.
180
+ - **[reference/05-theme-selection.md](reference/05-theme-selection.md)** — Theme-by-theme fit analysis; shortcut candidates per theme; decision framework for the first 90 minutes on Apr 25.
181
+
182
+ ---
183
+
184
+ **Source documents indexed by this skill:**
185
+ - `C:\Users\vitta\Downloads\[External] Apr '26 OpenEnv Hackathon Themes & Judging Criteria.docx` — authoritative rules.
186
+ - `C:\Users\vitta\Downloads\Meta Hackathon D-DAY.pptx` — Day-1/Day-2 event schedule.
187
+ - [huggingface.co/docs/trl/openenv](https://huggingface.co/docs/trl/openenv) — TRL ↔ OpenEnv integration.
188
+ - [github.com/meta-pytorch/OpenEnv](https://github.com/meta-pytorch/OpenEnv) — framework source, v0.2.3.
189
+ - [github.com/huggingface/openenv-course](https://github.com/huggingface/openenv-course) — 5-module tutorial.
.claude/skills/openenv-hackathon/reference/01-openenv-framework.md ADDED
@@ -0,0 +1,284 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # OpenEnv Framework Reference
2
+
3
+ OpenEnv v0.2.3 (released Mar 28 2026). Install: `pip install "openenv-core>=0.2.3"`.
4
+
5
+ ## 1. The 3 APIs
6
+
7
+ All OpenEnv environments expose the Gymnasium-style trio:
8
+
9
+ | Method | Purpose | Returns |
10
+ |---|---|---|
11
+ | `reset(seed=None, episode_id=None, **kwargs)` | Start a new episode. | Initial `Observation` |
12
+ | `step(action, timeout_s=None, **kwargs)` | Apply one `Action`. | `Observation`, reward, done |
13
+ | `state()` | Metadata snapshot (episode_id, step_count, etc.) | `State` |
14
+
15
+ The client side mirrors this with both async and sync wrappers:
16
+
17
+ ```python
18
+ # async (preferred)
19
+ async with EchoEnv(base_url="https://...hf.space") as env:
20
+ obs = await env.reset()
21
+ obs = await env.step(EchoAction(message="hi"))
22
+
23
+ # sync
24
+ with EchoEnv(base_url="https://...hf.space").sync() as env:
25
+ obs = env.reset()
26
+ obs = env.step(EchoAction(message="hi"))
27
+ ```
28
+
29
+ ## 2. Two environment archetypes
30
+
31
+ **Typed step/reset (default)** — you define explicit `Action`/`Observation` dataclasses and implement `step(action)`. Use when actions are structured and enumerable (moves, choices, form submissions).
32
+
33
+ **MCP tool environment** — extend `MCPEnvironment`; the environment exposes named tools (e.g., `search`, `open_file`, `send_email`). Use when the agent should discover and call a set of tools. TRL's `environment_factory` loop automatically exposes every public method as an MCP-style tool.
34
+
35
+ ## 3. Directory layout (what `openenv init <name>_env` produces)
36
+
37
+ ```
38
+ <name>_env/
39
+ ├── openenv.yaml # manifest
40
+ ├── pyproject.toml # package metadata + deps
41
+ ├── README.md
42
+ ├── Dockerfile # container for the HF Space
43
+ ├── requirements.txt
44
+ ├── client.py # class <Name>Env(EnvClient)
45
+ ├── models.py # Action, Observation, State dataclasses
46
+ └── server/
47
+ ├── __init__.py
48
+ ├── <name>_environment.py # class <Name>Environment(Environment[...])
49
+ └── app.py # FastAPI wiring via create_app(...)
50
+ ```
51
+
52
+ Scaffold the whole thing with:
53
+
54
+ ```bash
55
+ openenv init my_env_env --output-dir envs
56
+ ```
57
+
58
+ Do NOT hand-roll the directory — the scaffold format changes across versions.
59
+
60
+ ## 4. `openenv.yaml` — full example
61
+
62
+ ```yaml
63
+ spec_version: 1
64
+ name: dinner_negotiator_env
65
+ type: environment # or mcp_environment
66
+ version: "0.1.0"
67
+ description: >
68
+ Multi-agent dinner-planning negotiation where the LLM must reconcile
69
+ dietary restrictions, budget, and scheduling conflicts across 3 family
70
+ members with hidden preferences.
71
+
72
+ runtime:
73
+ python: "3.11"
74
+ dependencies:
75
+ - openenv-core>=0.2.3
76
+ - fastapi
77
+ - pydantic
78
+
79
+ app:
80
+ module: server.app
81
+ factory: app # FastAPI ASGI app object
82
+ host: 0.0.0.0
83
+ port: 8000
84
+
85
+ max_concurrent_envs: 64 # ≥ generation_batch_size for TRL training
86
+ ```
87
+
88
+ Fields `spec_version`, `name`, `type`, `runtime`, `app`, `port` are required.
89
+
90
+ ## 5. Template — `models.py`
91
+
92
+ ```python
93
+ from dataclasses import dataclass, field
94
+ from typing import Optional
95
+
96
+ @dataclass
97
+ class MyAction:
98
+ """Structured action from the agent."""
99
+ move: str
100
+ target: Optional[str] = None
101
+
102
+ @dataclass
103
+ class MyObservation:
104
+ """What the agent sees after each step."""
105
+ text: str
106
+ reward: float = 0.0
107
+ done: bool = False
108
+ info: dict = field(default_factory=dict)
109
+
110
+ @dataclass
111
+ class MyState:
112
+ """Episode metadata (returned by state())."""
113
+ episode_id: str
114
+ step_count: int
115
+ target: str
116
+ remaining_turns: int
117
+ ```
118
+
119
+ ## 6. Template — `server/<name>_environment.py`
120
+
121
+ ```python
122
+ import random, uuid
123
+ from typing import Optional
124
+
125
+ try:
126
+ from openenv.core import Environment
127
+ except ImportError:
128
+ from openenv_core import Environment # dual-import pattern for Docker
129
+
130
+ from ..models import MyAction, MyObservation, MyState
131
+
132
+ class MyEnvironment(Environment[MyAction, MyObservation, MyState]):
133
+ SUPPORTS_CONCURRENT_SESSIONS: bool = True # REQUIRED for TRL training
134
+
135
+ def __init__(self, max_turns: int = 10):
136
+ self.max_turns = max_turns
137
+ self._reset_state()
138
+
139
+ def _reset_state(self):
140
+ self._episode_id = str(uuid.uuid4())[:8]
141
+ self._step_count = 0
142
+ self._remaining = self.max_turns
143
+ self._target = random.choice(["alpha", "bravo", "charlie"])
144
+
145
+ def reset(self, seed: Optional[int] = None, episode_id: Optional[str] = None,
146
+ **kwargs) -> MyObservation:
147
+ if seed is not None:
148
+ random.seed(seed)
149
+ self._reset_state()
150
+ if episode_id:
151
+ self._episode_id = episode_id
152
+ return MyObservation(
153
+ text=f"New episode. Pick one of: alpha | bravo | charlie. {self._remaining} turns left.",
154
+ )
155
+
156
+ def step(self, action: MyAction, timeout_s: Optional[float] = None,
157
+ **kwargs) -> MyObservation:
158
+ self._step_count += 1
159
+ self._remaining -= 1
160
+
161
+ correct = action.move == self._target
162
+ done = correct or self._remaining <= 0
163
+ reward = 1.0 if correct else (-0.1 if done else 0.0)
164
+
165
+ if correct:
166
+ text = f"Correct! Target was {self._target}."
167
+ elif done:
168
+ text = f"Out of turns. Target was {self._target}."
169
+ else:
170
+ text = f"Wrong. {self._remaining} turns left."
171
+
172
+ return MyObservation(text=text, reward=reward, done=done,
173
+ info={"step": self._step_count})
174
+
175
+ def state(self) -> MyState:
176
+ return MyState(
177
+ episode_id=self._episode_id,
178
+ step_count=self._step_count,
179
+ target=self._target,
180
+ remaining_turns=self._remaining,
181
+ )
182
+ ```
183
+
184
+ **Key rules:**
185
+ - `SUPPORTS_CONCURRENT_SESSIONS = True` — MUST be set for TRL training; otherwise only 1 WebSocket connects.
186
+ - Use `try/except` dual import — Docker runs from a different module root than the repo.
187
+ - Never use `reset`, `step`, `state`, `close` as MCP tool names — they collide with the base API.
188
+
189
+ ## 7. Template — `server/app.py`
190
+
191
+ ```python
192
+ try:
193
+ from openenv.server import create_app
194
+ except ImportError:
195
+ from openenv_core.server import create_app
196
+
197
+ from .my_environment import MyEnvironment
198
+ from ..models import MyAction, MyObservation
199
+
200
+ app = create_app(
201
+ environment_factory=lambda: MyEnvironment(max_turns=10),
202
+ action_type=MyAction,
203
+ observation_type=MyObservation,
204
+ max_concurrent_envs=64, # match or exceed generation_batch_size
205
+ )
206
+ ```
207
+
208
+ ## 8. Template — `client.py`
209
+
210
+ ```python
211
+ try:
212
+ from openenv.client import EnvClient
213
+ except ImportError:
214
+ from openenv_core.client import EnvClient
215
+
216
+ from .models import MyAction, MyObservation, MyState
217
+
218
+ class MyEnv(EnvClient[MyAction, MyObservation, MyState]):
219
+ ACTION_TYPE = MyAction
220
+ OBSERVATION_TYPE = MyObservation
221
+ STATE_TYPE = MyState
222
+ ```
223
+
224
+ Thin wrapper — the base class handles WebSocket, serialization, async/sync.
225
+
226
+ ## 9. Local testing (before push)
227
+
228
+ ```bash
229
+ # from repo root
230
+ pip install -e envs/my_env_env
231
+ python -m uvicorn envs.my_env_env.server.app:app --host 0.0.0.0 --port 8001
232
+
233
+ # in another shell
234
+ python -c "
235
+ from envs.my_env_env.client import MyEnv
236
+ from envs.my_env_env.models import MyAction
237
+ with MyEnv(base_url='http://0.0.0.0:8001').sync() as env:
238
+ print(env.reset())
239
+ print(env.step(MyAction(move='alpha')))
240
+ "
241
+ ```
242
+
243
+ Docker test (mirrors what the HF Space will run):
244
+
245
+ ```bash
246
+ docker build -t my_env envs/my_env_env
247
+ docker run -d -p 8001:8000 my_env
248
+ curl http://localhost:8001/health # expect 200 {"status": "ok"}
249
+ ```
250
+
251
+ ## 10. Push to HF Spaces
252
+
253
+ ```bash
254
+ cd envs/my_env_env
255
+ huggingface-cli login # one-time
256
+ openenv push --repo-id <user>/my_env_env
257
+ # add --private to stage privately, then flip to public before submission
258
+ ```
259
+
260
+ After push, verify:
261
+ - `https://<user>-my-env-env.hf.space/health` → 200
262
+ - `https://<user>-my-env-env.hf.space/docs` → FastAPI Swagger UI
263
+ - `https://<user>-my-env-env.hf.space/web` → web UI (if enabled)
264
+
265
+ ## 11. Environment variables the Space respects
266
+
267
+ | Var | Default | Use |
268
+ |---|---|---|
269
+ | `WORKERS` | 4 | Uvicorn worker processes |
270
+ | `PORT` | 8000 | Internal port |
271
+ | `HOST` | 0.0.0.0 | Bind address |
272
+ | `MAX_CONCURRENT_ENVS` | 100 | WebSocket sessions cap |
273
+ | `ENABLE_WEB_INTERFACE` | auto | Toggle `/web` UI |
274
+
275
+ Set via HF Space → Settings → Variables & Secrets.
276
+
277
+ ## 12. Common pitfalls (cross-referenced from the official skill)
278
+
279
+ - **Forgetting `SUPPORTS_CONCURRENT_SESSIONS`** → training hangs after first batch.
280
+ - **Reserved MCP tool name** (`reset`/`step`/`state`/`close`) → silent conflict with base API.
281
+ - **Client importing server internals** → import cycle at container start. Client must ONLY import from `models.py`.
282
+ - **Committing build artifacts** (`__pycache__`, `.venv`, `dist/`) to the Space → slow push, bloated Space.
283
+ - **Using `openenv push` without first testing Docker locally** → broken Space, debug-via-logs-only loop.
284
+ - **Missing `xml:space="preserve"`** on docx edits (not relevant to env, noted only if generating docs).
.claude/skills/openenv-hackathon/reference/02-training-pipeline.md ADDED
@@ -0,0 +1,297 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Training Pipeline Reference (TRL + Unsloth, Colab-ready)
2
+
3
+ The hackathon rubric is explicit: **reward-curve evidence is 20%** and **pipeline coherence is 10%** — so the training script must actually run end-to-end and produce plots. This file is the runnable recipe.
4
+
5
+ ## 1. Why GRPO (not PPO / DPO / SFT)
6
+
7
+ - GRPO is what the official TRL ↔ OpenEnv integration is built around.
8
+ - No separate reward model required — the environment IS the reward.
9
+ - Works with small models (Qwen3-0.6B trains on free Colab T4).
10
+ - Supports multi-turn tool-calling loops natively via `environment_factory=...`.
11
+
12
+ ## 2. Colab notebook skeleton
13
+
14
+ Put the whole thing in `notebooks/train_grpo.ipynb`. Cells below:
15
+
16
+ ### Cell 1 — installs
17
+
18
+ ```python
19
+ !pip install -q --upgrade \
20
+ "openenv-core>=0.2.3" \
21
+ "trl>=1.0" \
22
+ "transformers>=4.56" \
23
+ "accelerate" \
24
+ "peft" \
25
+ "datasets" \
26
+ "wandb" \
27
+ "unsloth" # optional; huge speedup on T4
28
+
29
+ # install your environment client
30
+ !pip install -q "my-env @ git+https://huggingface.co/spaces/<user>/my_env_env"
31
+ ```
32
+
33
+ ### Cell 2 — auth
34
+
35
+ ```python
36
+ import os
37
+ from huggingface_hub import login
38
+ from google.colab import userdata # if on Colab
39
+
40
+ login(token=userdata.get("HF_TOKEN"))
41
+ os.environ["WANDB_API_KEY"] = userdata.get("WANDB_API_KEY")
42
+ ```
43
+
44
+ **NEVER hardcode tokens in the notebook.** Use Colab Secrets (lock icon in left sidebar) or env vars.
45
+
46
+ ### Cell 3 — environment wrapper
47
+
48
+ ```python
49
+ from my_env import MyEnv
50
+ from my_env.models import MyAction
51
+
52
+ ENV_URL = "https://<user>-my-env-env.hf.space"
53
+
54
+ class MyToolEnv:
55
+ """Wrapper that exposes env methods as tool-callable functions for TRL."""
56
+
57
+ def __init__(self):
58
+ self.client = MyEnv(base_url=ENV_URL)
59
+ self.reward = 0.0
60
+ self.done = False
61
+
62
+ def reset(self, **kwargs) -> str | None:
63
+ result = self.client.reset()
64
+ self.reward = 0.0
65
+ self.done = False
66
+ return result.observation.text
67
+
68
+ def pick(self, choice: str) -> str:
69
+ """Make a choice in the environment.
70
+
71
+ Args:
72
+ choice: One of 'alpha', 'bravo', 'charlie'.
73
+
74
+ Returns:
75
+ Feedback message from the environment.
76
+ """
77
+ if self.done:
78
+ raise ValueError("Episode is over.")
79
+ result = self.client.step(MyAction(move=choice))
80
+ self.reward = result.reward
81
+ self.done = result.done
82
+ return result.observation.text
83
+ ```
84
+
85
+ **Rules for the wrapper class:**
86
+ - `__init__` takes no args other than `self`.
87
+ - `reset(**kwargs)` receives dataset columns; returns str | None.
88
+ - Every public method (not `_prefixed`) becomes a tool. Give them **specific names** (`guess`, `move`, `buy`, NOT `step`/`action`) — the model uses names to learn tool use.
89
+ - Tool methods need docstrings with `Args:` / `Returns:` — TRL generates the tool schema from these.
90
+ - Store reward/done on `self` — the reward function reads them later.
91
+ - Raise `ValueError("...")` when the episode should end — TRL feeds the message back to the model as a tool response.
92
+
93
+ ### Cell 4 — reward function
94
+
95
+ ```python
96
+ def reward_func(environments, **kwargs) -> list[float]:
97
+ """Called once per group after rollout. Returns one reward per env instance."""
98
+ return [env.reward for env in environments]
99
+ ```
100
+
101
+ Guidance from the TRL OpenEnv docs:
102
+ - **Binary (1.0 / 0.0) rewards often beat shaped rewards** for GRPO — relative ranking within the group matters more than absolute values.
103
+ - **Score outcomes, not paths** — let the env judge success; don't check for specific action sequences.
104
+ - **Sanity-test with a random policy before training** — if a random agent scores as high as a capable one, the reward is broken.
105
+
106
+ ### Cell 5 — dataset
107
+
108
+ ```python
109
+ from datasets import Dataset
110
+
111
+ system_prompt = """You are an agent interacting with the 'pick' environment.
112
+ You have one tool: pick(choice). Call it with 'alpha', 'bravo', or 'charlie'.
113
+ Only one choice is correct per episode. Use feedback from the environment to learn."""
114
+
115
+ n = 500 # episodes per epoch
116
+ dataset = Dataset.from_dict({
117
+ "prompt": [[{"role": "user", "content": system_prompt}]] * n
118
+ })
119
+ ```
120
+
121
+ For multi-env training, add an `"env"` column and route in `reset(**kwargs)` — see the TRL multi_env.py example.
122
+
123
+ ### Cell 6 — trainer
124
+
125
+ ```python
126
+ from trl import GRPOConfig, GRPOTrainer
127
+
128
+ config = GRPOConfig(
129
+ output_dir="./grpo_my_env",
130
+ num_train_epochs=1,
131
+ per_device_train_batch_size=1,
132
+ gradient_accumulation_steps=8, # effective batch = 8
133
+ num_generations=4, # group size for GRPO
134
+ max_completion_length=1024, # TOTAL tokens across multi-turn — raise for long episodes
135
+ use_vllm=True,
136
+ vllm_mode="colocate", # single-GPU Colab
137
+ learning_rate=1e-6,
138
+ chat_template_kwargs={"enable_thinking": False},
139
+ log_completions=True,
140
+ report_to=["wandb"],
141
+ run_name="grpo-my-env-v1",
142
+ logging_steps=1,
143
+ save_steps=50,
144
+ )
145
+
146
+ trainer = GRPOTrainer(
147
+ model="Qwen/Qwen3-0.6B",
148
+ train_dataset=dataset,
149
+ reward_funcs=reward_func,
150
+ args=config,
151
+ environment_factory=MyToolEnv, # pass the CLASS, not an instance
152
+ )
153
+
154
+ trainer.train()
155
+ ```
156
+
157
+ ### Cell 7 — Unsloth speedup (optional, ~2× faster on T4)
158
+
159
+ Swap the model loader before constructing the trainer:
160
+
161
+ ```python
162
+ from unsloth import FastLanguageModel
163
+
164
+ model, tokenizer = FastLanguageModel.from_pretrained(
165
+ model_name="Qwen/Qwen3-0.6B",
166
+ max_seq_length=2048,
167
+ load_in_4bit=True,
168
+ dtype=None,
169
+ )
170
+ model = FastLanguageModel.get_peft_model(
171
+ model,
172
+ r=16, lora_alpha=32, lora_dropout=0,
173
+ target_modules=["q_proj","k_proj","v_proj","o_proj","gate_proj","up_proj","down_proj"],
174
+ use_gradient_checkpointing="unsloth",
175
+ )
176
+
177
+ trainer = GRPOTrainer(
178
+ model=model,
179
+ tokenizer=tokenizer, # pass the unsloth tokenizer
180
+ ... # same as before
181
+ )
182
+ ```
183
+
184
+ ### Cell 8 — save plots as `.png` (REQUIRED for judging)
185
+
186
+ ```python
187
+ import matplotlib.pyplot as plt
188
+ import pandas as pd
189
+
190
+ # trainer.state.log_history is a list of dicts logged during training
191
+ log = pd.DataFrame(trainer.state.log_history)
192
+
193
+ # Reward curve (raw + smoothed)
194
+ fig, ax = plt.subplots(figsize=(8, 4))
195
+ if "reward" in log.columns:
196
+ ax.plot(log["step"], log["reward"], alpha=0.3, label="reward (raw)")
197
+ ax.plot(log["step"], log["reward"].rolling(20, min_periods=1).mean(), label="reward (smoothed)")
198
+ ax.set_xlabel("training step")
199
+ ax.set_ylabel("mean reward per group")
200
+ ax.set_title("GRPO training — reward over time")
201
+ ax.legend()
202
+ plt.tight_layout()
203
+ plt.savefig("assets/reward_curve.png", dpi=150)
204
+
205
+ # Loss curve
206
+ fig, ax = plt.subplots(figsize=(8, 4))
207
+ if "loss" in log.columns:
208
+ ax.plot(log["step"], log["loss"], label="policy loss")
209
+ ax.set_xlabel("training step")
210
+ ax.set_ylabel("loss")
211
+ ax.set_title("GRPO training — loss over time")
212
+ ax.legend()
213
+ plt.tight_layout()
214
+ plt.savefig("assets/loss_curve.png", dpi=150)
215
+ ```
216
+
217
+ Then commit both PNGs to the repo — judges MUST see them in the README.
218
+
219
+ ### Cell 9 — baseline-vs-trained comparison (scores high on rubric)
220
+
221
+ ```python
222
+ import numpy as np
223
+
224
+ def eval_model(model, n_episodes=50):
225
+ env = MyToolEnv()
226
+ rewards = []
227
+ for _ in range(n_episodes):
228
+ env.reset()
229
+ # ... run model for up to max_turns, collecting env.reward
230
+ rewards.append(env.reward)
231
+ return np.mean(rewards), np.std(rewards)
232
+
233
+ base_mean, base_std = eval_model("Qwen/Qwen3-0.6B")
234
+ trained_mean, trained_std = eval_model(trainer.model)
235
+
236
+ print(f"baseline: {base_mean:.3f} ± {base_std:.3f}")
237
+ print(f"trained: {trained_mean:.3f} ± {trained_std:.3f}")
238
+
239
+ # plot on same axes
240
+ fig, ax = plt.subplots(figsize=(6, 4))
241
+ ax.bar(["baseline", "trained"], [base_mean, trained_mean],
242
+ yerr=[base_std, trained_std], capsize=6)
243
+ ax.set_ylabel("mean episode reward (n=50)")
244
+ ax.set_title("Before vs. after GRPO training")
245
+ plt.tight_layout()
246
+ plt.savefig("assets/before_after.png", dpi=150)
247
+ ```
248
+
249
+ ## 3. Concurrency — DO NOT SKIP
250
+
251
+ TRL opens one WebSocket per generation. With `gradient_accumulation_steps=8` × `per_device_train_batch_size=1` × `num_generations=4` = 32 concurrent sessions. The Space must allow this.
252
+
253
+ On the environment side:
254
+ ```python
255
+ # server/<name>_environment.py
256
+ class MyEnvironment(Environment[...]):
257
+ SUPPORTS_CONCURRENT_SESSIONS: bool = True
258
+ ```
259
+ ```python
260
+ # server/app.py
261
+ app = create_app(..., max_concurrent_envs=64)
262
+ ```
263
+
264
+ On the Space side (via HF Space → Settings → Variables):
265
+ ```
266
+ MAX_CONCURRENT_ENVS=64
267
+ WORKERS=2
268
+ ```
269
+
270
+ **Always duplicate the Space to your own account before training.** Public shared Spaces get rate-limited.
271
+
272
+ ## 4. Colab T4 reality check
273
+
274
+ - **Qwen3-0.6B** trains in ~30 min for a 500-episode Wordle-style task.
275
+ - **Qwen3-1.7B** needs Colab Pro (A100) for a comparable run; T4 will OOM.
276
+ - **Gradient accumulation** > 8 on T4 with Unsloth + LoRA.
277
+ - **vLLM colocate mode** reclaims ~3 GB by sharing weights between gen and training.
278
+ - Save checkpoints every 50 steps so a Colab disconnect doesn't nuke progress.
279
+
280
+ ## 5. What failure looks like, and how to recover fast
281
+
282
+ | Symptom | Cause | Fix |
283
+ |---|---|---|
284
+ | Training hangs after batch 1 | `SUPPORTS_CONCURRENT_SESSIONS=False` | Set True; redeploy. |
285
+ | Reward flat at 0 the whole run | Reward function returns wrong key, or tool method never called | Log `env.reward` + `env.done` per episode in Cell 3. |
286
+ | Reward saturates at 1.0 instantly | Reward is game-able (model finds shortcut) | Tighten env; add adversarial check; switch to binary terminal reward. |
287
+ | W&B run disappears | Colab session timeout + no local save | Set `save_steps=50` and download `output_dir` as a tarball. |
288
+ | `max_completion_length` exceeded errors | Episodes too long for the budget | Raise to 2048 or 4096; OR cap env turn count. |
289
+ | OOM on T4 | Batch × group × seq too large | Lower `num_generations` to 2, or switch to Unsloth 4-bit. |
290
+
291
+ ## 6. Official reference implementations to clone from
292
+
293
+ - **Echo** (simplest): [github.com/huggingface/trl/blob/main/examples/scripts/openenv/echo.py](https://github.com/huggingface/trl/blob/main/examples/scripts/openenv/echo.py)
294
+ - **Wordle** (multi-turn, exception-based episode end): [github.com/huggingface/trl/blob/main/examples/notebooks/openenv_wordle_grpo.ipynb](https://github.com/huggingface/trl/blob/main/examples/notebooks/openenv_wordle_grpo.ipynb)
295
+ - **Multi-env** (routing between 2 envs in one run): [github.com/huggingface/trl/blob/main/examples/scripts/openenv/multi_env.py](https://github.com/huggingface/trl/blob/main/examples/scripts/openenv/multi_env.py)
296
+
297
+ When unsure about any pattern, open the Wordle notebook — it is the canonical example.
.claude/skills/openenv-hackathon/reference/03-submission-checklist.md ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Submission Checklist (run this before 5 PM IST Apr 26)
2
+
3
+ The Google Form on Apr 26 asks for: HF Space URL, Colab notebook link, code repo link, YouTube OR HF blog link. Every URL must ALSO be in the README.
4
+
5
+ ## Tier 1 — disqualifiers (run these first)
6
+
7
+ - [ ] HF Space is **public** and `https://<user>-<env>.hf.space/health` returns 200.
8
+ - [ ] The Space was deployed via `openenv push` (not hand-rolled).
9
+ - [ ] The env uses `openenv-core >= 0.2.3`.
10
+ - [ ] A Colab notebook link is included; clicking "Run all" on a fresh Colab works end-to-end.
11
+ - [ ] The training script connects to the **real environment** (not a static dataset).
12
+ - [ ] `reward_curve.png` (or equivalent) exists IN THE REPO (committed, not only in Colab).
13
+ - [ ] README links: Space URL, Colab URL, video/blog URL, slide deck URL if any.
14
+ - [ ] No HF_TOKEN / WANDB_API_KEY / other secrets committed anywhere.
15
+ - [ ] No large video files in the Env HF Space (link to YouTube instead).
16
+
17
+ ## Tier 2 — rubric-boosters
18
+
19
+ ### Environment Innovation (40%)
20
+ - [ ] The env is NOT a chess/snake/tic-tac-toe/grid-world clone.
21
+ - [ ] One sentence explains what capability gap it targets.
22
+ - [ ] A researcher could plausibly write a paper about training on it.
23
+ - [ ] Reward is composed (OpenEnv Rubrics) not monolithic.
24
+
25
+ ### Storytelling & Presentation (30%)
26
+ - [ ] README reads in 3–5 minutes for a non-technical reviewer.
27
+ - [ ] Video is ≤ 2 minutes AND embeds/links from README.
28
+ - [ ] README has 4 sections: Problem / Environment / Results / Why it matters.
29
+ - [ ] Plots have captions explaining what the reviewer is looking at.
30
+ - [ ] At least one before/after comparison (text or visual) of agent behavior.
31
+
32
+ ### Showing Improvement in Rewards (20%)
33
+ - [ ] Reward curve shows a visible upward trend.
34
+ - [ ] Baseline (random or untrained) is plotted ON THE SAME AXES as the trained run.
35
+ - [ ] Training ran long enough that the curve has real signal (not 10 steps).
36
+ - [ ] W&B public run link is in the README (or plots are committed as real PNGs).
37
+ - [ ] Axes labeled: x = "training step" or "episode", y = "reward" or "loss", with units if applicable.
38
+
39
+ ### Reward & Training Pipeline (10%)
40
+ - [ ] Reward is hard to game — a random agent cannot score well.
41
+ - [ ] Pipeline is reproducible: `pip install -r requirements.txt && jupyter run notebooks/train_grpo.ipynb` works.
42
+ - [ ] Uses TRL `GRPOTrainer` with `environment_factory=` (or justified alternative).
43
+ - [ ] `SUPPORTS_CONCURRENT_SESSIONS=True` and `max_concurrent_envs ≥ generation_batch_size`.
44
+
45
+ ## Tier 3 — engineering hygiene
46
+
47
+ - [ ] Client never imports from `server/` (verified by grep).
48
+ - [ ] No reserved MCP tool names (`reset`, `step`, `state`, `close`).
49
+ - [ ] `openenv.yaml` is the current v0.2.3 format (spec_version: 1, name, type, runtime, app, port).
50
+ - [ ] `requirements.txt` pins major versions.
51
+ - [ ] No `__pycache__`, `.venv`, `dist/`, `.env` in the repo.
52
+ - [ ] LICENSE file present (recommend Apache-2.0 or MIT).
53
+
54
+ ## README template (paste and fill)
55
+
56
+ ```markdown
57
+ # <Env Name> — OpenEnv Hackathon Submission
58
+
59
+ > 1-sentence hook: what capability does this environment teach?
60
+
61
+ ## Links
62
+ - **HF Space (the environment)**: https://<user>-<env>.hf.space
63
+ - **Colab (training notebook)**: https://colab.research.google.com/drive/...
64
+ - **Code repo**: https://github.com/<user>/<repo>
65
+ - **Video (≤2 min)**: https://youtu.be/...
66
+ - **Blog**: https://huggingface.co/blog/<user>/<slug>
67
+ - **W&B training run**: https://wandb.ai/<user>/<project>/runs/...
68
+
69
+ ## Problem
70
+ What capability gap does this target? Why is the current state of LLMs insufficient here? (2–3 sentences.)
71
+
72
+ ## Environment
73
+ - **Theme**: Multi-Agent / Long-Horizon / World Modeling / Self-Improvement / Wild Card
74
+ - **Agent observes**: …
75
+ - **Agent acts by**: …
76
+ - **Reward signal**: …
77
+ - **Episode ends when**: …
78
+
79
+ ## Quick start
80
+ ```bash
81
+ pip install "my-env @ git+https://huggingface.co/spaces/<user>/<env>"
82
+ ```
83
+ ```python
84
+ from my_env import MyEnv
85
+ from my_env.models import MyAction
86
+ with MyEnv(base_url="https://<user>-<env>.hf.space").sync() as env:
87
+ print(env.reset())
88
+ print(env.step(MyAction(move="alpha")))
89
+ ```
90
+
91
+ ## Results
92
+
93
+ ![Reward curve](assets/reward_curve.png)
94
+ *Mean group reward over training steps. Qwen3-0.6B trained with GRPO for 500 steps.*
95
+
96
+ ![Before vs after](assets/before_after.png)
97
+ *Mean episode reward (n=50) before and after training. Error bars = 1σ.*
98
+
99
+ | Metric | Baseline (random) | Untrained Qwen3-0.6B | Trained Qwen3-0.6B |
100
+ |---|---|---|---|
101
+ | Mean reward | 0.04 | 0.12 | 0.78 |
102
+ | Success rate | 4% | 12% | 78% |
103
+
104
+ ## Training recipe
105
+ - Model: Qwen/Qwen3-0.6B
106
+ - Algorithm: GRPO (TRL v1.0+)
107
+ - Compute: 1× T4 on Colab
108
+ - Training time: ~30 min
109
+ - Episodes: 500
110
+
111
+ See [notebooks/train_grpo.ipynb](notebooks/train_grpo.ipynb) for the full pipeline.
112
+
113
+ ## Why this matters
114
+ Who benefits from an LLM trained on this? What can the resulting agent do that an untrained one cannot? (2–3 sentences.)
115
+
116
+ ## Team
117
+ <names, colleges, contact>
118
+ ```
119
+
120
+ ## Video (≤2 min) — storyboard template
121
+
122
+ | 0:00–0:15 | Hook — show an LLM failing at the task |
123
+ | 0:15–0:45 | Explain the environment in one sentence; show the agent's observation/action |
124
+ | 0:45–1:15 | Show the reward curve going up |
125
+ | 1:15–1:45 | Show the trained agent succeeding at the task |
126
+ | 1:45–2:00 | Call to action — "try it at <HF Space URL>" |
127
+
128
+ Record on OBS / Loom; upload unlisted to YouTube; paste URL in README.
129
+
130
+ ## Final commit discipline
131
+
132
+ ```bash
133
+ git status # confirm no secrets / artifacts
134
+ git add README.md assets/ notebooks/ envs/
135
+ git commit -m "final submission: <env-name>"
136
+ git push origin main
137
+ # don't touch the HF Space URL after the deadline
138
+ ```
139
+
140
+ Then fill the Google Form with the 4 URLs. The README URL is fine as the "code repo link".
.claude/skills/openenv-hackathon/reference/04-judging-rubric-playbook.md ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Judging Rubric Playbook — How to score high
2
+
3
+ Rubric weights, verbatim from the hackathon rules:
4
+
5
+ | Weight | Criterion |
6
+ |---|---|
7
+ | **40%** | Environment Innovation — Is the environment novel, creative, or genuinely challenging? Does it meaningfully test agent behavior in a way that hasn't been done before? |
8
+ | **30%** | Storytelling & Presentation — Can you clearly explain the problem, the environment, and what the agent learned? Is the demo engaging and easy to follow for a non-technical audience? |
9
+ | **20%** | Showing Improvement in Rewards — Is there observable evidence of training progress? Reward curves, before/after behavior, comparison against a baseline. |
10
+ | **10%** | Reward & Training Pipeline — Is the reward logic coherent? Does the pipeline produce meaningful improvement in the trained agent's behavior? |
11
+
12
+ ## Innovation (40%) — the biggest lever
13
+
14
+ **The official rule: "Judges have seen a lot of chess, snake, tic-tac-toe, and grid-world clones."** Do not ship one.
15
+
16
+ ### Questions the rules tell you to ask yourself
17
+ 1. Does this environment exist to teach an LLM something it currently can't do well?
18
+ 2. Is the domain underexplored in RL/LLM training?
19
+ 3. Could a researcher write a paper about training on this?
20
+
21
+ ### High-innovation patterns that match the themes
22
+ - **Partially observable negotiation** (Theme 1) where each agent has private info — e.g., dinner planning with hidden allergies/budgets.
23
+ - **Tool-discovery benchmarks** (Theme 3.1) where the agent must read API docs at runtime and figure out which tool applies.
24
+ - **300-instruction instruction-following** scattered across a long document (Theme 2) — tests selective attention and durable memory.
25
+ - **Self-play curriculum generation** (Theme 4) where the env generates harder variants of whatever task the agent is currently solving.
26
+ - **Real-personal-delegation** (Theme 3.2) — e.g., the agent receives a realistic Slack-style thread with 3 people proposing 5 meeting times, must pick one and reply to everyone.
27
+
28
+ ### Anti-innovation (avoid)
29
+ - Classic games with cosmetic reskin.
30
+ - Single-turn QA / classification dressed up as an env.
31
+ - Anything where the "environment" is actually just a frozen dataset with a scoring function.
32
+ - Reward = string-match against a ground-truth answer (doesn't need an env).
33
+
34
+ ## Storytelling (30%)
35
+
36
+ ### What the README must do
37
+ - **Open with a hook in ≤20 words.** "This env teaches an LLM to negotiate dinner plans across 3 people with conflicting dietary restrictions and hidden preferences."
38
+ - **Show, don't tell.** Before/after behavior transcript beats prose.
39
+ - **Name the audience.** "This matters to anyone building personal-assistant LLMs that handle real delegation."
40
+ - **Embed plots inline** with one-line captions.
41
+ - **Link everything from the top** — Space, Colab, video, blog, W&B run.
42
+
43
+ ### What the video (≤2 min) must do
44
+ - **Open with failure.** Untrained model doing something dumb.
45
+ - **Show the env's rules in one visual.** Observation → action → reward diagram.
46
+ - **Show the reward curve going up.**
47
+ - **Show the trained model succeeding.**
48
+ - **End with a URL** the viewer can click.
49
+
50
+ ### Storytelling anti-patterns
51
+ - API docs masquerading as a README.
52
+ - Pure prose with no images.
53
+ - Video that explains the code instead of the capability.
54
+ - Demo that needs narration to understand what's happening on screen.
55
+
56
+ ## Reward-improvement evidence (20%)
57
+
58
+ ### Minimum viable evidence
59
+ - Reward curve committed as `assets/reward_curve.png` with captioned embed in README.
60
+ - Loss curve also helpful (proves the training actually updated weights).
61
+ - Baseline on the SAME AXES as the trained run — a single line going up is easy to dismiss.
62
+ - Explicit numbers: "baseline 4% success → trained 78% success (n=50)".
63
+
64
+ ### Patterns that separate top-10% from median
65
+ - **W&B public run link** in the README → reviewers can dig into any metric.
66
+ - **Ablation plot**: trained with reward v1 vs reward v2 vs random baseline, all on one axis.
67
+ - **Qualitative transcript**: one full agent trajectory before training, one after — side-by-side.
68
+ - **Multiple seeds**: 3 runs with error bars, not 1.
69
+
70
+ ### Traps that score 0 on this criterion
71
+ - Reward curve only exists in a deleted W&B run.
72
+ - Plot saved only in a Colab cell (disappears when Colab times out).
73
+ - Curve flat or noisy with no smoothed trendline.
74
+ - No baseline for comparison.
75
+ - 10 training steps — "noise, not signal".
76
+
77
+ ## Reward & pipeline coherence (10%)
78
+
79
+ ### What "coherent reward" means
80
+ - **Dense informative signal** — not just 0/1 at the terminal state. OR, if 0/1, it's a hard problem where that's appropriate.
81
+ - **Composable via OpenEnv Rubrics** — multiple sub-rubrics combined, not one monolithic score.
82
+ - **Hard to game** — test by running a random agent; if it scores near the trained agent, reward is broken.
83
+
84
+ ### What "coherent pipeline" means
85
+ - `environment_factory=` wired correctly; generation → tool parse → env step → reward → training — all handled by TRL.
86
+ - Concurrency configured: `SUPPORTS_CONCURRENT_SESSIONS=True` on env, `max_concurrent_envs ≥ generation_batch_size` on the app.
87
+ - Tool methods have docstrings with `Args:` blocks (TRL uses these to build the tool schema).
88
+ - Tool names are **specific** (`guess`, `negotiate`, `buy`) — not generic (`step`, `act`).
89
+
90
+ ### Red flags
91
+ - Custom rollout loop when `environment_factory` would have worked (the rubric favors the standard pattern).
92
+ - Reward hacked by a hard-coded regex against the model's output.
93
+ - Training against a mocked env (disqualifies the criterion — must hit the real deployed Space or a local Docker).
94
+
95
+ ## The 70% that's actually under your control
96
+
97
+ Innovation (40%) + Storytelling (30%) = **70% of the score**, and both are set mostly by Day-1 decisions:
98
+
99
+ 1. **Pick the right problem by noon on Apr 25.** A bad problem with great execution still caps around the median.
100
+ 2. **Draft the README hook and 2-min video storyboard before you write any env code.** If you can't explain it in one sentence, it's not ambitious enough yet.
101
+ 3. **Build the smallest viable env first, then iterate on innovation.** It's better to have a shippable boring env + a clear story than a brilliant env you couldn't deploy.
102
+ 4. **Record the video on Apr 26 morning, not at 4:55 PM.** Leave 90 minutes for recording + upload.
.claude/skills/openenv-hackathon/reference/05-theme-selection.md ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Theme Selection — Decision Framework for the First 90 Minutes on Apr 25
2
+
3
+ The hackathon explicitly allows picking a NEW problem at the finale — Round-1 entries are not required. The first 90 minutes is the single highest-leverage window of the whole event.
4
+
5
+ ## Decision tree
6
+
7
+ ```
8
+ Do we have a concrete problem that clearly fits one of Themes 1–4 or 5?
9
+ ├── YES → lock it in. Go to OpenEnv scaffold.
10
+ └── NO → run the 60-min ideation below, then decide.
11
+ ```
12
+
13
+ ## 60-minute ideation protocol
14
+
15
+ **Minute 0–15 — read the themes aloud to the team.** One person reads, others note any phrase that sparks an idea. Don't filter yet.
16
+
17
+ **Minute 15–30 — write one-sentence problem statements**, one per sticky note. Format: `"An environment that teaches an LLM to ___ by ___"`. Aim for 10–15 candidates.
18
+
19
+ **Minute 30–45 — score each on 3 axes (1–5 each):**
20
+ - **Novelty** — has a judge seen this before? (5 = never, 1 = clone)
21
+ - **Shippability in 30h** — can we deploy this by 5 PM tomorrow? (5 = trivial, 1 = heroic)
22
+ - **Reward learnability** — can a 0.6B–1.7B model actually improve on it in 30 min of Colab? (5 = yes, 1 = needs a 70B)
23
+
24
+ **Minute 45–60 — pick the highest total.** Ties broken by team excitement (the rules explicitly say this).
25
+
26
+ ## Shortcut candidates per theme (research-done for you)
27
+
28
+ ### Theme 1 — Multi-Agent Interactions
29
+ - **Hidden-role party negotiator** — 4 LLM "guests" with hidden dietary/budget constraints must agree on a restaurant in ≤5 turns. The agent-under-training is one of them. Reward = Pareto-optimality of the agreement.
30
+ - **Compute allocator** — N services bid for shared GPU time under changing priority. Agent-under-training learns to negotiate SLAs.
31
+
32
+ ### Theme 2 — Long-Horizon Planning
33
+ - **300-instruction document follower** — a fake product spec has 300 tiny requirements scattered across 50 pages. Agent must produce output that satisfies ≥K of them. Tests durable internal representation.
34
+ - **Research-plan simulator** — agent drafts a research plan, gets fake "reviewer feedback" across 10 rounds, must incorporate it.
35
+
36
+ ### Theme 3.1 — World Modeling, Professional
37
+ - **Tool-discovery env** — agent is given an undocumented API with 50 endpoints and must figure out how to accomplish a task through experimentation. Reward = success with minimum API calls.
38
+ - **Scientific-workflow loop** — paper → extracted hypothesis → pseudo-code → pseudo-experiment result → next paper. Agent learns to iterate.
39
+
40
+ ### Theme 3.2 — World Modeling, Personal
41
+ - **Inbox-triage env** — 20 emails arrive; agent must reply-all / reply-one / archive / snooze / delegate. Reward = combined latency + correctness per sender.
42
+ - **Calendar conflict resolver** — three colleagues propose 5 meeting times each; agent replies to each with the one that works for everyone.
43
+
44
+ ### Theme 4 — Self-Improvement
45
+ - **Proof-difficulty escalator** — agent generates math problems, tries to solve them, gets harder problems when it succeeds. Reward = steady-state difficulty reached.
46
+ - **Self-adversarial Wordle** — one agent proposes words, another tries to guess; roles rotate. Both improve.
47
+
48
+ ### Theme 5 — Wild Card
49
+ Use sparingly. Only if the idea doesn't map to 1–4 AND you can explain in one sentence why an LLM trained on this is more useful than before. The rules promise rewards for out-of-box ideas — but they also warn submissions "must meaningfully add value to LLM training".
50
+
51
+ ## Lock-in rule
52
+
53
+ Once the team commits (by 1:00 PM Apr 25), **stop idea-generating**. Every hour spent re-debating the problem is an hour not spent shipping. Write the one-sentence problem statement on a whiteboard. Everything after this point serves THAT sentence.
.gitattributes ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ frontend/node_modules/** filter=lfs diff=lfs merge=lfs -text
2
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
3
+ *.png filter=lfs diff=lfs merge=lfs -text
4
+ *.exe filter=lfs diff=lfs merge=lfs -text
5
+ *.node filter=lfs diff=lfs merge=lfs -text
6
+ esbuild filter=lfs diff=lfs merge=lfs -text
7
+ *.jpg filter=lfs diff=lfs merge=lfs -text
8
+ *.jpeg filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.py[cod]
3
+ *.egg-info/
4
+ .pytest_cache/
5
+ .ruff_cache/
6
+ .mypy_cache/
7
+
8
+ .venv/
9
+ venv/
10
+ env/
11
+
12
+ .env
13
+ .env.*
14
+ *.key
15
+ *.pem
16
+
17
+ .ipynb_checkpoints/
18
+ *.ckpt
19
+ *.pt
20
+ *.bin
21
+
22
+ wandb/
23
+ runs/
24
+ outputs/
25
+ logs/
26
+
27
+ .DS_Store
28
+ Thumbs.db
29
+ .vscode/
30
+ .idea/
31
+
32
+ .claude/settings.local.json
CLAUDE.md ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # OpenEnv Hackathon — Project Context
2
+
3
+ This directory is the Meta PyTorch × Hugging Face OpenEnv Hackathon India finale submission
4
+ (Scaler Bangalore, Apr 25–26 2026). Deadline: **Apr 26, 5:00 PM IST** (Google Form).
5
+
6
+ ## Rules for Claude working in this repo
7
+
8
+ 1. **Use the `openenv-hackathon` skill** at `.claude/skills/openenv-hackathon/SKILL.md` for any
9
+ task involving the environment, training, README, deployment, or submission. It has the
10
+ hackathon calendar, judging rubric, file templates, and hard rules. For the human-readable
11
+ one-stop briefing, see `HANDOFF.md` at the repo root.
12
+ 2. **OpenEnv version is 0.2.3.** Never downgrade or use pre-0.2 APIs.
13
+ 3. **Training framework is TRL `GRPOTrainer`** with `environment_factory=`. Base model defaults
14
+ to `Qwen/Qwen3-0.6B` unless the team says otherwise.
15
+ 4. **Hosting is HF Spaces via `python -m openenv.cli push`**. The Space MUST be public.
16
+ 5. **Judging weights**: 40% Innovation, 30% Storytelling, 20% Reward Improvement Evidence,
17
+ 10% Reward & Training Pipeline. Bias every decision toward the first two.
18
+ 6. **Never commit secrets** (`HF_TOKEN`, `WANDB_API_KEY`, `.env`). `.gitignore` covers them.
19
+ 7. **Never amend commits after Apr 26 5:00 PM IST** — the URL is frozen at deadline.
20
+
21
+ ## Local environment (already verified Apr 24, 2026)
22
+ - Python 3.12.7
23
+ - openenv-core 0.2.3, trl 1.2.0, transformers 5.4.0, torch 2.5.1+cu121
24
+ - Docker 29.1.5, git 2.52
25
+ - OpenEnv CLI runs as: `python -m openenv.cli <subcommand>` (NOT bare `openenv`).
26
+
27
+ ## Directory layout
28
+ ```
29
+ envs/<env_name>_env/ # scaffolded via `python -m openenv.cli init <name>_env --output-dir envs`
30
+ notebooks/train_grpo.ipynb
31
+ assets/ # reward_curve.png, before_after.png — must be committed
32
+ README.md # judge entry point
33
+ requirements.txt
34
+ .claude/skills/openenv-hackathon/ # the skill + reference docs
35
+ ```
36
+
37
+ ## What's still TODO (as of Apr 24, 2026)
38
+ - [ ] Theme lock-in (team decides Apr 25, 1:00 PM IST)
39
+ - [ ] Environment name + `openenv.cli init`
40
+ - [ ] Fill `envs/<name>_env/` files (models → environment → app → client)
41
+ - [ ] `openenv push` to HF Space
42
+ - [ ] Write `notebooks/train_grpo.ipynb`
43
+ - [ ] Run training long enough for real reward curve
44
+ - [ ] Commit `assets/*.png`
45
+ - [ ] Fill README TBDs
46
+ - [ ] Record ≤2-min video OR write HF blog
47
+ - [ ] Submit Google Form by Apr 26 5:00 PM IST
Dockerfile ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Root-level Dockerfile for HF Spaces deployment.
2
+ # The actual environment lives in envs/board_sim_env/.
3
+ # This file replicates the env's Dockerfile logic with the correct build context paths.
4
+
5
+ ARG BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest
6
+ FROM ${BASE_IMAGE} AS builder
7
+
8
+ WORKDIR /app
9
+
10
+ RUN apt-get update && \
11
+ apt-get install -y --no-install-recommends git && \
12
+ rm -rf /var/lib/apt/lists/*
13
+
14
+ # Copy only the env subdirectory as the app code
15
+ COPY envs/board_sim_env /app/env
16
+
17
+ WORKDIR /app/env
18
+
19
+ # Ensure uv is available
20
+ RUN if ! command -v uv >/dev/null 2>&1; then \
21
+ curl -LsSf https://astral.sh/uv/install.sh | sh && \
22
+ mv /root/.local/bin/uv /usr/local/bin/uv && \
23
+ mv /root/.local/bin/uvx /usr/local/bin/uvx; \
24
+ fi
25
+
26
+ RUN --mount=type=cache,target=/root/.cache/uv \
27
+ if [ -f uv.lock ]; then \
28
+ uv sync --frozen --no-install-project --no-editable; \
29
+ else \
30
+ uv sync --no-install-project --no-editable; \
31
+ fi
32
+
33
+ RUN --mount=type=cache,target=/root/.cache/uv \
34
+ if [ -f uv.lock ]; then \
35
+ uv sync --frozen --no-editable; \
36
+ else \
37
+ uv sync --no-editable; \
38
+ fi
39
+
40
+ # Final runtime stage
41
+ FROM ${BASE_IMAGE}
42
+
43
+ WORKDIR /app
44
+
45
+ COPY --from=builder /app/env/.venv /app/.venv
46
+ COPY --from=builder /app/env /app/env
47
+
48
+ ENV PATH="/app/.venv/bin:$PATH"
49
+ ENV PYTHONPATH="/app/env:$PYTHONPATH"
50
+
51
+ HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
52
+ CMD curl -f http://localhost:8000/health || exit 1
53
+
54
+ CMD ["sh", "-c", "cd /app/env && uvicorn server.app:app --host 0.0.0.0 --port 8000"]
FRONTEND_API.md ADDED
@@ -0,0 +1,396 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # NeuralEdge AI Boardroom — Frontend API Specification
2
+
3
+ ## Overview
4
+ The frontend communicates with the backend via REST/HTTP or WebSocket endpoints. The backend is a FastAPI server running at a configurable base URL (default: `http://localhost:8000` for local dev, or `https://<USER>-board-sim-env.hf.space` for production).
5
+
6
+ **Key Principle**: Frontend and backend are fully decoupled. The frontend only needs to know these endpoints; it does not import any backend code.
7
+
8
+ ---
9
+
10
+ ## 1. REST Endpoints
11
+
12
+ ### `POST /reset`
13
+ **Purpose**: Start a new game episode.
14
+
15
+ **Request Body**:
16
+ ```json
17
+ {
18
+ "seed": 42,
19
+ "episode_id": "optional-uuid-string"
20
+ }
21
+ ```
22
+
23
+ **Response** (200 OK):
24
+ ```json
25
+ {
26
+ "observation": {
27
+ "state": {
28
+ "round": 1,
29
+ "revenue": 2000000.0,
30
+ "burn_rate": 1200000.0,
31
+ "runway_months": 14.0,
32
+ "product_readiness": 0.45,
33
+ "market_share": 0.08,
34
+ "team_morale": 0.70,
35
+ "investor_confidence": 0.65,
36
+ "regulatory_risk": 0.20,
37
+ "profitability_score": 0.0,
38
+ "trust": {
39
+ "CTO": 0.5,
40
+ "CFO": 0.5,
41
+ "Investor Rep": 0.5,
42
+ "Independent": 0.5
43
+ },
44
+ "trust_history": [
45
+ {
46
+ "round": 0,
47
+ "CTO": 0.5,
48
+ "CFO": 0.5,
49
+ "Investor Rep": 0.5,
50
+ "Independent": 0.5
51
+ }
52
+ ],
53
+ "history": [],
54
+ "done_reason": null,
55
+ "winning_decision": null
56
+ },
57
+ "event": "Round 1 — Series-B runway crunch\nDescription: You've got 14 months of runway at current burn. Two paths: cut costs or raise.",
58
+ "options": [
59
+ "cut_costs",
60
+ "raise_capital",
61
+ "reduce_scope"
62
+ ],
63
+ "npc_statements": [
64
+ {
65
+ "role": "CTO",
66
+ "statement": "Look, the architecture won't survive shortcuts here.",
67
+ "vote": "cut_costs",
68
+ "confidence": 0.81
69
+ },
70
+ {
71
+ "role": "CFO",
72
+ "statement": "The numbers do not lie, and right now they're whispering.",
73
+ "vote": "cut_costs",
74
+ "confidence": 0.66
75
+ },
76
+ {
77
+ "role": "Investor Rep",
78
+ "statement": "Sequoia isn't here for incremental.",
79
+ "vote": "raise_capital",
80
+ "confidence": 0.74
81
+ },
82
+ {
83
+ "role": "Independent",
84
+ "statement": "Long-term reputation outlasts any single quarter.",
85
+ "vote": "cut_costs",
86
+ "confidence": 0.59
87
+ }
88
+ ],
89
+ "round": 1
90
+ },
91
+ "done": false,
92
+ "info": {
93
+ "episode_id": "uuid-string",
94
+ "seed": 42
95
+ }
96
+ }
97
+ ```
98
+
99
+ ---
100
+
101
+ ### `POST /step`
102
+ **Purpose**: Submit the agent's decision for the current round.
103
+
104
+ **Request Body**:
105
+ ```json
106
+ {
107
+ "action": {
108
+ "decision": "cut_costs",
109
+ "coalition_pitch": "Optional persuasive text targeting NPC agendas (unused in v1)"
110
+ }
111
+ }
112
+ ```
113
+
114
+ **Response** (200 OK):
115
+ ```json
116
+ {
117
+ "observation": {
118
+ "state": {
119
+ "round": 2,
120
+ "revenue": 2000000.0,
121
+ "burn_rate": 900000.0,
122
+ "runway_months": 18.5,
123
+ "product_readiness": 0.45,
124
+ "market_share": 0.08,
125
+ "team_morale": 0.65,
126
+ "investor_confidence": 0.60,
127
+ "regulatory_risk": 0.20,
128
+ "profitability_score": 12.34,
129
+ "trust": {
130
+ "CTO": 0.65,
131
+ "CFO": 0.70,
132
+ "Investor Rep": 0.40,
133
+ "Independent": 0.55
134
+ },
135
+ "trust_history": [
136
+ {
137
+ "round": 0,
138
+ "CTO": 0.5,
139
+ "CFO": 0.5,
140
+ "Investor Rep": 0.5,
141
+ "Independent": 0.5
142
+ },
143
+ {
144
+ "round": 1,
145
+ "CTO": 0.65,
146
+ "CFO": 0.70,
147
+ "Investor Rep": 0.40,
148
+ "Independent": 0.55
149
+ }
150
+ ],
151
+ "history": [
152
+ {
153
+ "round": 1,
154
+ "event_title": "Round 1 — Series-B runway crunch",
155
+ "agent_decision": "cut_costs",
156
+ "winning_decision": "cut_costs",
157
+ "reward": 1.25,
158
+ "profitability_before": 0.0,
159
+ "profitability_after": 12.34
160
+ }
161
+ ],
162
+ "done_reason": null,
163
+ "winning_decision": "cut_costs"
164
+ },
165
+ "event": "Round 2 — Enterprise contract w/ source-code escrow\nDescription: A Fortune 500 enterprise wants to sign a $5M contract but demands source code escrow.",
166
+ "options": [
167
+ "accept_deal",
168
+ "negotiate_terms",
169
+ "reject_deal"
170
+ ],
171
+ "npc_statements": [
172
+ {
173
+ "role": "CTO",
174
+ "statement": "...",
175
+ "vote": "...",
176
+ "confidence": 0.XX
177
+ }
178
+ ],
179
+ "round": 2
180
+ },
181
+ "reward": 1.25,
182
+ "done": false,
183
+ "info": {
184
+ "round": 2,
185
+ "winning_decision": "cut_costs",
186
+ "winning_vote_tally": {
187
+ "cut_costs": 4.2,
188
+ "raise_capital": 1.3,
189
+ "reduce_scope": 0.5
190
+ },
191
+ "pitch_scores": {
192
+ "CTO": 0.0,
193
+ "CFO": 0.0,
194
+ "Investor Rep": 0.0,
195
+ "Independent": 0.0
196
+ }
197
+ }
198
+ }
199
+ ```
200
+
201
+ ---
202
+
203
+ ### `GET /health`
204
+ **Purpose**: Health check. Confirms backend is running.
205
+
206
+ **Response** (200 OK):
207
+ ```json
208
+ {
209
+ "status": "healthy"
210
+ }
211
+ ```
212
+
213
+ ---
214
+
215
+ ### `GET /docs`
216
+ **Purpose**: Auto-generated Swagger/OpenAPI documentation. Use for development reference.
217
+
218
+ **Location**: `http://localhost:8000/docs` (or on HF Space at `/docs`)
219
+
220
+ ---
221
+
222
+ ## 2. WebSocket Streaming (Optional, Advanced)
223
+
224
+ If you want real-time streaming during training or multi-agent play:
225
+
226
+ ### `WebSocket /ws`
227
+ **Purpose**: Bi-directional message streaming (not required for single-agent frontend).
228
+
229
+ Connection example:
230
+ ```javascript
231
+ const ws = new WebSocket("ws://localhost:8000/ws");
232
+ ws.onmessage = (event) => {
233
+ const message = JSON.parse(event.data);
234
+ console.log(message); // e.g., { "type": "step", "observation": {...} }
235
+ };
236
+ ```
237
+
238
+ *(Details omitted if not used for initial frontend.)*
239
+
240
+ ---
241
+
242
+ ## 3. Data Models Reference
243
+
244
+ ### `BoardSimObservation` (returned by `/reset` and `/step`)
245
+ ```javascript
246
+ {
247
+ state: {
248
+ round: number, // 1-indexed: 1..10
249
+ revenue: number, // in dollars
250
+ burn_rate: number, // monthly spend in dollars
251
+ runway_months: number, // months until bankruptcy
252
+ product_readiness: float (0..1),
253
+ market_share: float (0..1),
254
+ team_morale: float (0..1),
255
+ investor_confidence: float (0..1),
256
+ regulatory_risk: float (0..1),
257
+ profitability_score: number,
258
+ trust: { // per NPC, 0..1
259
+ "CTO": 0.5,
260
+ "CFO": 0.5,
261
+ "Investor Rep": 0.5,
262
+ "Independent": 0.5
263
+ },
264
+ trust_history: Array, // per-round trust snapshots
265
+ history: Array, // past decisions & outcomes
266
+ done_reason: string | null, // e.g., "bankruptcy", "acquisition", "ipo", null
267
+ winning_decision: string | null
268
+ },
269
+ event: string, // event title + description
270
+ options: [string, string, string], // 3 valid decision strings for this round
271
+ npc_statements: [
272
+ {
273
+ role: "CTO" | "CFO" | "Investor Rep" | "Independent",
274
+ statement: string,
275
+ vote: string (one of options),
276
+ confidence: float (0..1)
277
+ },
278
+ // ... one per NPC role (4 total)
279
+ ],
280
+ round: number
281
+ }
282
+ ```
283
+
284
+ ### `BoardSimAction` (sent to `/step`)
285
+ ```javascript
286
+ {
287
+ decision: string, // must be one of observation.options
288
+ coalition_pitch: string | null // optional persuasion attempt (unused in v1)
289
+ }
290
+ ```
291
+
292
+ ---
293
+
294
+ ## 4. Error Responses
295
+
296
+ ### 422 Unprocessable Entity
297
+ Invalid action format or decision not in options.
298
+
299
+ **Response**:
300
+ ```json
301
+ {
302
+ "detail": [
303
+ {
304
+ "loc": ["body", "action", "decision"],
305
+ "msg": "value is not a valid enumeration member",
306
+ "type": "type_error.enum"
307
+ }
308
+ ]
309
+ }
310
+ ```
311
+
312
+ ### 400 Bad Request
313
+ Malformed JSON or missing required fields.
314
+
315
+ ---
316
+
317
+ ## 5. Frontend Integration Checklist
318
+
319
+ - [ ] **Initialize**: On app load, call `POST /reset` to get initial observation.
320
+ - [ ] **Display State**: Render `observation.state` as metrics (revenue, runway, morale, trust, etc.).
321
+ - [ ] **Display Event**: Show `observation.event` (crisis title + description).
322
+ - [ ] **Display NPCs**: Render 4 NPC cards with their `statement`, `vote`, and `confidence`.
323
+ - [ ] **Render Decision Options**: Display 3 buttons (or cards) for each string in `observation.options`.
324
+ - [ ] **Handle User Click**: On decision click, POST `/step` with the selected `decision`.
325
+ - [ ] **Update UI**: Parse response observation and repeat from "Display State".
326
+ - [ ] **Terminal State**: If `done` is true, show final metrics and `done_reason` (e.g., "Bankruptcy", "IPO").
327
+ - [ ] **Optional Coalition Pitch**: Text input for `coalition_pitch` (future extension; safe to leave blank for v1).
328
+
329
+ ---
330
+
331
+ ## 6. Backend Base URL Configuration
332
+
333
+ For local development:
334
+ ```
335
+ http://localhost:8000
336
+ ```
337
+
338
+ For HF Space deployment (after `openenv push`):
339
+ ```
340
+ https://<your-hf-username>-board-sim-env.hf.space
341
+ ```
342
+
343
+ **Frontend environment variable** (optional):
344
+ ```
345
+ REACT_APP_API_BASE_URL=http://localhost:8000
346
+ // or
347
+ REACT_APP_API_BASE_URL=https://<your-hf-username>-board-sim-env.hf.space
348
+ ```
349
+
350
+ ---
351
+
352
+ ## 7. Example Frontend Workflow
353
+
354
+ ```javascript
355
+ // 1. Reset
356
+ const resetRes = await fetch(`${API_BASE}/reset`, {
357
+ method: "POST",
358
+ headers: { "Content-Type": "application/json" },
359
+ body: JSON.stringify({ seed: 42 })
360
+ });
361
+ const { observation, done, info } = await resetRes.json();
362
+
363
+ // 2. Render observation
364
+ displayState(observation.state);
365
+ displayNPCStatements(observation.npc_statements);
366
+ displayDecisionButtons(observation.options);
367
+
368
+ // 3. User clicks decision
369
+ const decision = "cut_costs"; // from button click
370
+ const stepRes = await fetch(`${API_BASE}/step`, {
371
+ method: "POST",
372
+ headers: { "Content-Type": "application/json" },
373
+ body: JSON.stringify({
374
+ action: { decision, coalition_pitch: "" }
375
+ })
376
+ });
377
+ const { observation: nextObs, reward, done: nextDone } = await stepRes.json();
378
+
379
+ // 4. Repeat or show results
380
+ if (nextDone) {
381
+ displayEndgameScreen(nextObs.state, nextObs.state.done_reason);
382
+ } else {
383
+ displayState(nextObs.state);
384
+ // ... repeat
385
+ }
386
+ ```
387
+
388
+ ---
389
+
390
+ ## 8. No Backend Imports in Frontend
391
+
392
+ ✅ **OK**: `fetch("http://localhost:8000/reset")`
393
+ ❌ **NOT OK**: `import { BoardSimEnvironment } from "backend"`
394
+
395
+ The frontend is a standalone web app. All communication is via HTTP/WebSocket.
396
+
HANDOFF.md ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Morning-of Briefing — OpenEnv Hackathon (Apr 25–26, 2026)
2
+
3
+ One stop for every fact about the hackathon. Read this the morning of Apr 25 before
4
+ heading to Scaler. Consolidates the two source docs (`Themes & Judging Criteria.docx`
5
+ and `Meta Hackathon D-DAY.pptx`) plus the skill at `.claude/skills/openenv-hackathon/`.
6
+
7
+ ---
8
+
9
+ ## 1. At-a-glance
10
+
11
+ - **Event**: Meta PyTorch × Hugging Face OpenEnv Hackathon — India finale.
12
+ - **Where**: Scaler School of Technology, Bangalore.
13
+ - **When**: Apr 25 (build day) + Apr 26 (submission day).
14
+ - **Submission deadline**: **Apr 26, 5:00 PM IST** (Google Form). Commits/changes to
15
+ the HF Space after this time are NOT considered. Whatever is live at 5 PM is judged.
16
+ - **Team cap**: one submission per team. If you have multiple ideas, pick the best one.
17
+
18
+ ## 2. Day-1 agenda (Apr 25, Saturday)
19
+
20
+ | Time (IST) | What | Where |
21
+ |---|---|---|
22
+ | 7:00 – 10:30 AM | Registration & Arrival | Registration Desk, Scaler Campus |
23
+ | 8:00 – 9:15 AM | Breakfast | Food Zones |
24
+ | 10:00 – 10:15 AM | Opening Ceremony | Main Stage |
25
+ | 10:15 – 10:30 AM | Problem Themes Overview & Briefing | Main Stage |
26
+ | 10:30 – 11:00 AM | Address by Meta Team | Main Stage |
27
+ | 11:00 – 11:30 AM | Move to Build Zones | All Classrooms |
28
+ | **11:30 AM** | **Hacking begins** | All Classrooms |
29
+ | ~1:00 PM (self-imposed) | **Theme + problem statement LOCKED** | Our classroom |
30
+ | 1:00 PM | Lunch | Food Zones |
31
+ | **3:30 – 4:30 PM** | **Mentor Round 1** | Classrooms |
32
+ | 5:00 – 5:30 PM | Talk + High Tea | Main Stage |
33
+ | 8:00 – 10:00 PM | Dinner | Food Zones |
34
+ | **~9:30 PM** | **Mentor Round 2** | Classrooms |
35
+ | 2:00 AM | Midnight snacks | Food Zones |
36
+
37
+ ## 3. Day-2 agenda (Apr 26, Sunday)
38
+
39
+ | Time (IST) | What | Where |
40
+ |---|---|---|
41
+ | 8:00 AM | Breakfast | Food Zones |
42
+ | **10:00 AM – 12:00 PM** | **Mentor Round 3 (FINAL)** | Classrooms |
43
+ | 12:00 PM | ⏰ 5-hour submission reminder | Classrooms |
44
+ | 2:00 PM | Lunch | Food Zones |
45
+ | 3:00 PM | ⏰ 2-hour submission reminder | Classrooms |
46
+ | 3:30 – 4:30 PM | Final build push | Classrooms |
47
+ | **🏁 5:00 PM** | **SUBMISSION DEADLINE — Google Form closes** | — |
48
+ | 5:15 PM | Closing Remarks | Main Stage |
49
+ | 5:30 – 8:00 PM | Open Networking | Near Main Stage |
50
+ | 8:00 PM | Event concludes | Near Main Stage |
51
+
52
+ ## 4. The 5 themes (pick one)
53
+
54
+ | # | Theme | What it teaches the LLM | Example environments |
55
+ |---|---|---|---|
56
+ | 1 | **Multi-Agent Interactions** | Cooperation, competition, negotiation, coalition formation. Theory-of-mind reasoning. Model others' beliefs in partially observable settings. | Market simulations, compute-allocation negotiations, collaborative puzzle worlds, mixed coop/competitive games. |
57
+ | 2 | **(Super) Long-Horizon Planning & Instruction Following** | Decompose goals, track state across long trajectories, recover from early mistakes, handle sparse/delayed rewards — beyond context-window limits. | Research-planning simulators, large-codebase refactoring, strategic resource management, logistics, 300-instruction-scatter tasks. |
58
+ | 3.1 | **World Modeling — Professional Tasks** | Maintain internal state, update beliefs from outcomes, orchestrate multi-step workflows using real tools/APIs. No shortcuts. | Dynamic browser/API ecosystems, enterprise apps, scientific workflows (papers → code → experiments), tool-discovery. |
59
+ | 3.2 | **World Modeling — Personalized Tasks** | Handle realistic personal delegation: messages, conflicts, scheduling, shopping. | Exec-assistant meeting planner, dinner/drive planning, tough email replies. |
60
+ | 4 | **Self-Improvement** | Generate new challenges, escalate difficulty, self-play, adaptive curricula. Recursive skill amplification. | Self-play negotiation arenas, auto-generated math/proofs, evolving coding competitions, adaptive RL curricula. |
61
+ | 5 | **Wild Card — Impress Us** | Anything outside the above that meaningfully trains an LLM capability. | — (judges explicitly said they WILL reward out-of-box). |
62
+
63
+ **Rules on theme**:
64
+ - Round-1 problem is NOT required — pick whatever best fits.
65
+ - Judges have seen a lot of chess, snake, tic-tac-toe, and grid-world clones. Don't.
66
+ - Pick a problem that genuinely excites the team — "that energy comes through in the pitch".
67
+
68
+ Theme-by-theme shortcut candidates live at [.claude/skills/openenv-hackathon/reference/05-theme-selection.md](.claude/skills/openenv-hackathon/reference/05-theme-selection.md).
69
+
70
+ ## 5. Judging rubric (memorize these weights)
71
+
72
+ | Weight | Criterion | What judges are checking |
73
+ |---|---|---|
74
+ | **40%** | **Environment Innovation** | Is the env novel, creative, genuinely challenging? Does it test agent behavior in a way that hasn't been done before? Could a researcher write a paper on training against it? |
75
+ | **30%** | **Storytelling & Presentation** | Can you clearly explain the problem, the env, what the agent learned? Is the demo engaging for a non-technical audience? README readable in 3–5 minutes. |
76
+ | **20%** | **Showing Improvement in Rewards** | Observable evidence of training progress: reward curves, metrics, before/after, baseline vs. trained on the same axes. |
77
+ | **10%** | **Reward & Training Pipeline** | Is the reward logic coherent and hard to game? Does the pipeline produce real improvement in trained-agent behavior? |
78
+
79
+ **Innovation + Storytelling is 70% of the score.** The docx states explicitly:
80
+ > A messy but ambitious environment with real training evidence beats a polished but
81
+ > boring one.
82
+
83
+ ## 6. Minimum submission requirements (non-negotiable)
84
+
85
+ Submissions missing ANY of these are "at a serious disadvantage". The Google Form asks for:
86
+
87
+ 1. **Hugging Face Space URL** — the env, deployed via `python -m openenv.cli push`. Must be PUBLIC and runnable.
88
+ 2. **Colab notebook link** — training script using Unsloth or HF TRL. Judges re-run it.
89
+ 3. **Code repository link** — GitHub or HF Hub repo. Every file included.
90
+ 4. **YouTube video URL OR Hugging Face blog post URL** — the story. Video ≤2 minutes. A slide deck is also an acceptable writeup format.
91
+ 5. **README in the repo** — links all of the above, plus any extras (W&B runs, slides). README IS the judge's entry point.
92
+
93
+ Additional rules:
94
+ - Do **NOT** put large video files inside the Env HF Space — use a URL reference.
95
+ - Every extra material (W&B, slides, blog, video) must be linked FROM the README.
96
+
97
+ ## 7. What makes a submission stand out (from the docx)
98
+
99
+ From "OpenEnv Hackathon — What Judges Look For":
100
+
101
+ - **Pick ambitious, original problem**. Ask: "Does this teach the LLM something it currently can't do well? Could someone write a paper about training on this?"
102
+ - **Design a reward that teaches**: rich/informative (not 0/1 at the end), captures something hard-to-measure cleverly, uses OpenEnv's Rubric system (composable > monolithic), hard to game.
103
+ - **Show real training, end to end**: the loop connects to the env (not a static dataset), trains long enough that curves mean something, baseline vs. trained on the same axes.
104
+ - **Readable plots**: label both axes + units; save as `.png`/`.jpg` and commit to the repo (don't leave only in a deleted Colab cell or expired W&B run); embed in README with a one-line caption; overlay comparisons on shared axes.
105
+ - **Tell a story, not an API doc**: Problem → Environment → Results → Why does it matter. A reviewer should read it in 3–5 min and WANT to try it.
106
+ - **Engineering table stakes**: OpenEnv `Environment`/`MCPEnvironment` base class, client/server separation (client never imports server internals), Gym-style API, valid `openenv.yaml`, no reserved MCP tool names (`reset`, `step`, `state`, `close`).
107
+
108
+ ## 8. Files to share with teammates
109
+
110
+ Push the ENTIRE `OpenEnv Hackathon/` directory (easiest: private GitHub repo, they clone).
111
+ If sharing via zip / Drive, include these files verbatim:
112
+
113
+ **Context for humans** (read these first):
114
+ - [HANDOFF.md](HANDOFF.md) — this file. One-stop briefing.
115
+ - [README.md](README.md) — judge-facing template, fill placeholders as decisions get made.
116
+ - [TEAMMATES.md](TEAMMATES.md) — setup steps, CLI commands, split-of-work suggestion.
117
+ - [CLAUDE.md](CLAUDE.md) — project rules, loaded automatically by Claude Code.
118
+
119
+ **Context for Claude Code** (auto-loaded when teammates run `claude` in this folder):
120
+ - [.claude/skills/openenv-hackathon/SKILL.md](.claude/skills/openenv-hackathon/SKILL.md) — the hackathon skill.
121
+ - [.claude/skills/openenv-hackathon/reference/01-openenv-framework.md](.claude/skills/openenv-hackathon/reference/01-openenv-framework.md) — env anatomy, file templates, `openenv.yaml`, push workflow.
122
+ - [.claude/skills/openenv-hackathon/reference/02-training-pipeline.md](.claude/skills/openenv-hackathon/reference/02-training-pipeline.md) — TRL-GRPO Colab recipe.
123
+ - [.claude/skills/openenv-hackathon/reference/03-submission-checklist.md](.claude/skills/openenv-hackathon/reference/03-submission-checklist.md) — final Apr 26 audit list.
124
+ - [.claude/skills/openenv-hackathon/reference/04-judging-rubric-playbook.md](.claude/skills/openenv-hackathon/reference/04-judging-rubric-playbook.md) — tactics per criterion.
125
+ - [.claude/skills/openenv-hackathon/reference/05-theme-selection.md](.claude/skills/openenv-hackathon/reference/05-theme-selection.md) — theme fit + 60-min ideation protocol.
126
+
127
+ **Scaffolding for the build**:
128
+ - `requirements.txt` — pinned deps.
129
+ - `.gitignore` — blocks secrets.
130
+ - `envs/.gitkeep`, `notebooks/.gitkeep`, `assets/.gitkeep` — directory layout.
131
+
132
+ **Do NOT share**:
133
+ - `.claude/settings.local.json` — per-user Claude settings.
134
+ - Any `.env`, `HF_TOKEN`, `WANDB_API_KEY`.
135
+ - The two source docs from `Downloads/` — superseded by this HANDOFF.md.
136
+
137
+ ## 9. Pre-hackathon checklist (each teammate, before Apr 25 morning)
138
+
139
+ ```bash
140
+ # Tools
141
+ python --version # need 3.11+ (project uses 3.12.7)
142
+ docker --version # need Docker Desktop running for local Space tests
143
+ git --version
144
+
145
+ # Python deps
146
+ pip install -r requirements.txt
147
+
148
+ # Hugging Face (required for openenv push)
149
+ hf auth login # paste a WRITE-scoped HF token
150
+
151
+ # W&B (optional, gives judges a shareable run URL — highly recommended)
152
+ wandb login
153
+
154
+ # Sanity check: verify the OpenEnv CLI works
155
+ python -m openenv.cli --help
156
+ ```
157
+
158
+ **Accounts to have ready**:
159
+ - Hugging Face (write token).
160
+ - GitHub (public repo for the code link).
161
+ - Google Colab (free T4 is enough for Qwen3-0.6B; Pro helps for 1.7B).
162
+ - Weights & Biases (optional).
163
+ - YouTube channel (for ≤2-min video) OR HF blog posting enabled.
164
+
165
+ ## 10. Split-of-work suggestion (3-person team)
166
+
167
+ | Role | Deliverable | Key files |
168
+ |---|---|---|
169
+ | **Environment builder** | `envs/<name>_env/` scaffolded, filled, pushed to HF Space | `envs/<name>_env/models.py`, `environment.py`, `app.py`, `client.py`, `openenv.yaml` |
170
+ | **Training engineer** | Colab notebook that actually trains + committed plots | `notebooks/train_grpo.ipynb`, `assets/reward_curve.png`, `assets/before_after.png` |
171
+ | **Storyteller** | README filled, video/blog recorded, Google Form submitted | `README.md`, YouTube URL / HF blog URL |
172
+
173
+ All three attend every mentor round together. Claude is most useful BEFORE mentor
174
+ rounds (prep concrete questions), not during.
175
+
176
+ ## 11. Hard rules (do not violate)
177
+
178
+ 1. **OpenEnv v0.2.3** — never downgrade or use pre-0.2 APIs.
179
+ 2. **Training must use real env**, not a static dataset — TRL `GRPOTrainer` with `environment_factory=`.
180
+ 3. **HF Space must be public** and discoverable.
181
+ 4. **No secrets committed** — `HF_TOKEN`, `WANDB_API_KEY`, `.env` all in `.gitignore`.
182
+ 5. **No commits after Apr 26, 5:00 PM IST** — URL is frozen at deadline.
183
+ 6. **OpenEnv CLI on Windows**: use `python -m openenv.cli <subcommand>`, NOT bare `openenv`.
184
+ 7. **No reserved MCP tool names**: `reset`, `step`, `state`, `close`.
MECHANICS.md ADDED
@@ -0,0 +1,282 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # BoardSim — Full Mechanics Reference
2
+
3
+ > This document is the authoritative math and design reference for the NeuralEdge AI Boardroom environment.
4
+ > Target audience: hackathon judges who want the internals, and future contributors.
5
+ > See `README.md` for the submission overview.
6
+
7
+ ---
8
+
9
+ ## 1. State variables — every field, every formula
10
+
11
+ State lives in `BoardState.state_dict`, initialized in `reset()` at `board_sim_env_environment.py:471`.
12
+
13
+ ### Core company state (mutated by consequences each round)
14
+
15
+ | Field | Initial value | Range | Unit | Meaning |
16
+ |---|---|---|---|---|
17
+ | `revenue` | 2,000,000 | [0, 1e12] | USD/year | Annual recurring revenue |
18
+ | `burn_rate` | 1,200,000 | [0, 1e10] | USD/month | Monthly cash expenditure |
19
+ | `runway_months` | 14.0 | [0, 120] | months | Time until cash = 0 |
20
+ | `product_readiness` | 0.45 | [0, 1] | fraction | Shippability of the product |
21
+ | `market_share` | 0.08 | [0, 1] | fraction | % of total addressable market |
22
+ | `team_morale` | 0.70 | [0, 1] | fraction | Engineering team happiness/retention |
23
+ | `investor_confidence` | 0.65 | [0, 1] | fraction | Board investors' belief in success |
24
+ | `regulatory_risk` | 0.20 | [0, 1] | fraction | Legal/compliance exposure |
25
+
26
+ ### Coalition state
27
+
28
+ | Field | Initial | Range | Update rule |
29
+ |---|---|---|---|
30
+ | `trust[CTO]` | 0.5 | [0.1, 1.0] | ±0.05 per round depending on vote alignment |
31
+ | `trust[CFO]` | 0.5 | [0.1, 1.0] | same |
32
+ | `trust[Investor Rep]` | 0.5 | [0.1, 1.0] | same |
33
+ | `trust[Independent]` | 0.5 | [0.1, 1.0] | same |
34
+
35
+ Trust update (applied after every vote):
36
+ ```
37
+ for each NPC:
38
+ if NPC voted for the winning decision:
39
+ trust[NPC] = clamp(trust[NPC] + 0.05, 0.1, 1.0)
40
+ else:
41
+ trust[NPC] = clamp(trust[NPC] - 0.05, 0.1, 1.0)
42
+ ```
43
+ Trust influences NPC confidence from the *next* round onward:
44
+ `trust_bias = (trust[role] - 0.5) × 0.30` → added to that NPC's option-scoring, range `[-0.15, +0.15]`.
45
+
46
+ ### Bookkeeping fields
47
+
48
+ | Field | Purpose |
49
+ |---|---|
50
+ | `round` | 1..10, increments each step |
51
+ | `profitability_score` | Recomputed composite at end of each step |
52
+ | `history` | Per-round decision log (agent_decision, winning_decision, vote_tally, pitch_scores, …) |
53
+ | `trust_history` | Per-round snapshot of all 4 trust values |
54
+ | `done_reason` | `"runway_exhausted"` / `"acquisition"` / `"finished_10"` / `None` |
55
+ | `winning_decision` | Last round's vote winner |
56
+
57
+ ---
58
+
59
+ ## 2. Profitability score — the composite health metric
60
+
61
+ ```
62
+ profitability_score = clamp(raw, 0, 100)
63
+
64
+ raw =
65
+ min(revenue / 8_000_000, 1.0) × 22 # revenue term (max 22)
66
+ + max(0, 1 − burn_rate / 1_400_000) × 18 # burn efficiency (max 18)
67
+ + min(runway_months / 18.0, 1.0) × 18 # runway term (max 18)
68
+ − max(0, (6 − runway_months) / 6) × 10 # low-runway penalty (bites below 6mo)
69
+ + min(market_share, 0.50) / 0.50 × 14 # market share (max 14)
70
+ + product_readiness × 10 # product readiness (max 10)
71
+ + team_morale × 7 # team morale (max 7)
72
+ + investor_confidence × 11 # investor confidence (max 11)
73
+ − regulatory_risk × 18 # regulatory drag (max −18)
74
+ ```
75
+
76
+ **Initial state score** (with default init values) ≈ 37.3/100.
77
+ **Theoretical maximum** = 22 + 18 + 18 + 0 + 14 + 10 + 7 + 11 − 0 = **100**.
78
+ **Random policy** lands near 30–55 with mean ≈ 45.7 (measured over 200 episodes after §9.5 reward tweaks).
79
+
80
+ ---
81
+
82
+ ## 3. Next-state computation — how the simulation physics work
83
+
84
+ **Answer: yes, consequence deltas are hardcoded.** The transition is:
85
+
86
+ ```
87
+ next_state = current_state + consequences[winning_decision] × (1 + ε)
88
+ where ε ~ N(0, 0.15) per consequence value, fixed at episode reset (seeded)
89
+
90
+ runway_months -= _advance_runway() # depends on current revenue/burn, not the action
91
+ trust[role] += ±0.05 per NPC # based on vote alignment with winning_decision
92
+ profitability_score = compute_profitability_score(next_state) # derived
93
+ ```
94
+
95
+ ### Runway decrement formula
96
+
97
+ ```python
98
+ monthly_revenue = revenue / 12.0
99
+ net = monthly_revenue - burn_rate
100
+
101
+ if net >= 0:
102
+ runway_months -= 0.5 # profitable: slow burn
103
+ else:
104
+ burn_months = min(2.0, max(1.0, abs(net) / burn_rate + 1.0))
105
+ runway_months -= burn_months # unprofitable: faster bleed
106
+ ```
107
+
108
+ ### Three layers of variability (the agent cannot memorize the optimal path)
109
+
110
+ 1. **Event order shuffled per episode** — same 10 events, different sequence each seed.
111
+ 2. **Consequence magnitudes ±15% Gaussian noise** — computed once at `reset()`, fixed for the episode.
112
+ 3. **NPC vote positions depend on accumulated trust** — same option in round 5 produces different vote weights if you've built (or burned) coalitions in rounds 1–4.
113
+
114
+ ---
115
+
116
+ ## 4. NPC vote resolution
117
+
118
+ ### Vote weight configuration
119
+
120
+ ```
121
+ CEO: 1.5 CTO: 1.2 CFO: 1.0 Investor Rep: 1.3 Independent: 0.8
122
+ ```
123
+
124
+ ### NPC option scoring (per NPC, per round)
125
+
126
+ Each NPC has a hidden agenda dict (e.g. CFO: `{burn_rate: -0.60, revenue: 0.30, runway_months: 0.20, regulatory_risk: -0.25}`).
127
+
128
+ ```
129
+ for each option opt:
130
+ score[opt] = 0
131
+ for each (metric, weight) in NPC_agenda:
132
+ v = consequences[opt][metric] (with unit normalization)
133
+ score[opt] += v × weight
134
+ score[opt] += N(0, 0.20) # personality noise, seeded per (role, round)
135
+
136
+ NPC votes for argmax(score)
137
+ confidence = clamp(0.5 + 0.5 × margin_between_top_two, 0.05, 1.0)
138
+ + trust_bias # trust influences confidence
139
+ ```
140
+
141
+ ### Pitch persuasion mechanism
142
+
143
+ ```python
144
+ pitch_score[role] = min(1.0, keyword_hits / max(4, len(agenda_keywords) // 4))
145
+ # where keyword_hits = count of role's agenda keywords present in pitch text
146
+
147
+ # Persuasion shifts up to 35% of NPC's vote weight toward CEO's pick:
148
+ shift_fraction = 0.35 × pitch_score[role]
149
+ tally[NPC's_vote] += base_weight × (1 - shift_fraction)
150
+ tally[CEO's_decision] += base_weight × shift_fraction
151
+ ```
152
+
153
+ NPC keyword lists (the hidden information the CEO must infer via ToM):
154
+
155
+ | Role | Keywords |
156
+ |---|---|
157
+ | CTO | engineering, architecture, technical, quality, morale, product, team, scalable, reliable, robust |
158
+ | CFO | burn, cash, runway, fiduciary, conservative, discipline, cost, savings, margin, compliance, prudent, fiscal |
159
+ | Investor Rep | growth, scale, 10x, tam, market, moat, ipo, exit, valuation, revenue, arr, dominate, aggressive, ambitious |
160
+ | Independent | reputation, stakeholders, trust, transparent, ethics, long-term, governance, consensus, safety, credibility |
161
+
162
+ ### Tie-breaking
163
+
164
+ If two options score equally in the tally, the CEO's pick wins. This is implemented by inserting `agent_decision` first in the `ordered` dict before calling `max()`, so Python's stable `max()` breaks ties in the CEO's favour.
165
+
166
+ ---
167
+
168
+ ## 5. The full reward formula
169
+
170
+ Applied at the end of each `step()` call:
171
+
172
+ ```
173
+ # Primary signal — normalized (§9.5)
174
+ reward = (new_profitability_score - old_profitability_score) / 100.0
175
+
176
+ # Coalition bonus / penalty
177
+ reward += 0.5 if winning_decision == agent_decision
178
+ else -0.2
179
+
180
+ # Trust delta (range ≈ ±0.06 per round)
181
+ reward += 0.3 × (Σtrust_after - Σtrust_before)
182
+
183
+ # Pitch bootstrap (§9.5) — fires for any non-empty pitch
184
+ if pitch_text is non-empty:
185
+ reward += 0.05
186
+ if any NPC opposed the CEO's pick:
187
+ reward += 0.4 × mean(pitch_score over opposing NPCs)
188
+
189
+ # Format penalty
190
+ if agent's decision string not in round's options:
191
+ reward -= 0.5
192
+
193
+ # Terminal penalties / bonuses (only at episode end)
194
+ if runway_months <= 0:
195
+ reward -= 2.0 # bankruptcy (§9.5: reduced from -5)
196
+ if terminal:
197
+ reward += event._terminal_bonus # acquisition +30, IPO +25, stay_private +5
198
+ reward += {+10 if final≥60, +5 if ≥40, -5 if <20}
199
+ ```
200
+
201
+ ### Why each term exists
202
+
203
+ | Term | Purpose |
204
+ |---|---|
205
+ | Δ score / 100 | Primary learning signal: profitability improvement per decision |
206
+ | Coalition ±0.5/−0.2 | Teaches the agent to actually win votes, not just pick good-looking options |
207
+ | Trust delta × 0.3 | Rewards long-arc coalition building across rounds |
208
+ | Pitch bootstrap +0.05 | Bootstraps the pitch channel before the model is good enough to earn keyword bonuses |
209
+ | Pitch persuasion × 0.4 | Rewards pitches that specifically target opposing NPC keywords (ToM signal) |
210
+ | Invalid −0.5 | Teaches correct output format (DECISION: / PITCH: two-line structure) |
211
+ | Bankruptcy −2.0 | Episode-ending failure signal, reduced to avoid drowning gradient |
212
+ | Terminal tiered | Long-horizon incentive toward high profitability, acquisition, or IPO |
213
+
214
+ ---
215
+
216
+ ## 6. When profitability is computed relative to the decision
217
+
218
+ The exact sequence inside `step()`:
219
+
220
+ ```
221
+ 1. old_score = compute_profitability_score(state) ← snapshot BEFORE
222
+ 2. NPC votes computed from current state + trust
223
+ 3. CEO's decision + pitch → _resolve_vote() → winning_decision
224
+ 4. consequences[winning_decision] × noise → applied to state
225
+ 5. _advance_runway() → runway decrements
226
+ 6. trust updated per NPC (±0.05)
227
+ 7. new_score = compute_profitability_score(state) ← AFTER consequences
228
+ 8. reward = (new_score - old_score) / 100 + ...
229
+ 9. next observation returned with new_score in obs.state
230
+ ```
231
+
232
+ The CEO **never consults profitability to make its decision** — it sees last round's score in the observation, emits a decision, and then the score updates. Profitability is the *outcome metric*, not a planning input. The policy learns to predict which decisions increase profitability by observing the correlation across training episodes.
233
+
234
+ ---
235
+
236
+ ## 7. Training pipeline — key design decisions
237
+
238
+ ### §9a: Per-round gradient flow (Option A)
239
+
240
+ The current training loop samples 1 completion from the model for **every round** of **every group member's episode**. This gives the model gradient signal for all 10 decisions per trajectory, not just the opening decision.
241
+
242
+ ```
243
+ For each training step:
244
+ Create GROUP_SIZE independent envs (different seeds → divergent trajectories)
245
+ For each round r in 0..9:
246
+ For each group member g:
247
+ prompt = build_prompt(obs_g)
248
+ completion = model.generate(prompt, do_sample=True) ← gradient-connected
249
+ obs_g = env_g.step(parse(completion))
250
+ ep_reward[g] += obs_g.reward
251
+ advantages = GRPO(ep_rewards) # group-relative normalization
252
+ For each (g, r) completion:
253
+ loss = advantage[g] × NLL(completion) / (GROUP_SIZE × n_rounds)
254
+ + β_KL × KL(π_θ || π_ref)
255
+ optimizer.step()
256
+ ```
257
+
258
+ Total forward passes per training step: 10 rounds × 4 group members × 2 (policy + ref) = **80 forward passes**.
259
+
260
+ ### §9c: KL penalty
261
+
262
+ A frozen copy of the initial model (`ref_model`) computes reference log-probs. KL ≈ `current_loss - ref_loss` per completion, clamped at 0. Coefficient β = 0.04.
263
+ Purpose: prevents the policy from drifting into degenerate text patterns (always emitting the same decision, empty pitches) that lock in low-reward equilibria.
264
+
265
+ ### §9.5: Reward normalization
266
+
267
+ Three changes to the reward function to improve gradient quality:
268
+ 1. **Δscore ÷ 100** — brings profitability delta (typically −5 to +10) to the same scale as the coalition term (±0.5)
269
+ 2. **Bankruptcy penalty −2 (was −5)** — one bad arc was drowning 9 rounds of positive signal
270
+ 3. **Pitch bootstrap +0.05** — needed to push a 0.6B model into using the pitch channel before it's good enough to earn keyword bonuses
271
+
272
+ ---
273
+
274
+ ## 8. Theory-of-Mind — what's actually measured
275
+
276
+ "ToM" in this environment has a specific, narrow meaning: **can the agent infer what vocabulary each NPC uses when reasoning**, given only observation of statements and votes?
277
+
278
+ The grading mechanism is keyword overlap: `pitch_score[role] = hits / threshold`. This is coarse but measurable without human annotation.
279
+
280
+ A stronger ToM measurement (planned, not yet implemented): after each episode, ask the model "Given round 3's event and the CFO's statement, predict the CFO's vote." Compare predicted vs actual. Random baseline = 25% (1 in 4 options). Exceeding 50% indicates the model has learned the CFO's agenda.
281
+
282
+ The trust trajectory is a secondary ToM diagnostic: if trust rises across rounds, the model is consistently picking decisions that align with NPC preferences, which requires some implicit modeling of their objectives.
README.md ADDED
@@ -0,0 +1,504 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <<<<<<< HEAD
2
+ ---
3
+ title: NeuralEdge AI Boardroom
4
+ emoji: 🏛️
5
+ colorFrom: indigo
6
+ colorTo: pink
7
+ sdk: docker
8
+ app_port: 8000
9
+ pinned: false
10
+ tags:
11
+ - openenv
12
+ - multi-agent
13
+ - reinforcement-learning
14
+ - hackathon
15
+ ---
16
+
17
+ # NeuralEdge AI Boardroom — Multi-Agent OpenEnv Submission
18
+
19
+ **Theme**: Theme 1 — Multi-Agent Interactions
20
+ **Framework**: OpenEnv `v0.2.3` · Qwen3-0.6B · Unsloth LoRA · REINFORCE with GRPO-style group advantages
21
+ **Event**: Meta PyTorch × Hugging Face OpenEnv Hackathon — India finale, Scaler Bangalore, **Apr 25–26 2026**
22
+
23
+ > A Series-B AI startup CEO learns to build winning board coalitions across 10 rounds of market crises — against 4 NPCs with hidden agendas — by writing persuasive pitches that target what each board member secretly cares about.
24
+
25
+ ---
26
+
27
+ ## 🔗 Submission links
28
+
29
+ | # | Required | Link |
30
+ |---|---|---|
31
+ | 1 | **HF Space** (live env) | https://huggingface.co/spaces/StavanKhobare/SST-MetaxPyTorch-Hackathon |
32
+ | 2 | **Colab notebook** (training) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/StavanRKhobare/SST-MetaxPyTorch-Hackathon/blob/master/notebooks/train_grpo.ipynb) |
33
+ | 3 | **Code repository** | https://github.com/StavanRKhobare/SST-MetaxPyTorch-Hackathon |
34
+ | 4 | **Writeup** | TBD — record after training run |
35
+ | 5 | **W&B run** | TBD — populate after Colab run |
36
+
37
+ ---
38
+
39
+ ## §11a — From random to strategic: a concrete example
40
+
41
+ > **Illustrative transcript** — shows the *expected* behaviour difference, not a live output.
42
+ > Seed 42, Round 4: EU AI Act compliance deadline.
43
+
44
+ **Random agent** (no pitch, coin-flip decision):
45
+ ```
46
+ Event: EU AI Act compliance deadline — full compliance costs $2M.
47
+ CTO (conf 0.81): votes full_compliance — "Architecture won't survive shortcuts."
48
+ CFO (conf 0.66): votes partial_compliance — "Fiduciary duty: only one of these is defensible."
49
+ Investor (0.74): votes exit_EU_market — "Sequoia isn't here for incremental."
50
+ Independent (0.59): votes full_compliance — "Long-term reputation outlasts any quarter."
51
+
52
+ DECISION: exit_EU_market ← random pick, misaligns with 3/4 board members
53
+ PITCH: [empty] ← random policy never writes pitches
54
+
55
+ Vote tally: full_compliance 2.03 | partial_compliance 0.66 | exit_EU_market 1.42
56
+ CEO loses the vote. Winning: full_compliance.
57
+ regulatory_risk += 0 | product_readiness += 0.10 | burn_rate += $2M
58
+ trust[Investor] -= 0.05 → 0.45 (Investor now harder to persuade)
59
+ Reward this round: Δscore/100 + (-0.2 coalition) + (trust delta) = -0.08
60
+ ```
61
+
62
+ **Trained agent** (same seed, same board state):
63
+ ```
64
+ DECISION: full_compliance
65
+ PITCH: "Full compliance strengthens long-term governance and regulatory safety —
66
+ this is the fiscally responsible move that protects our Series C runway
67
+ and signals discipline to the board."
68
+
69
+ Keywords hit: CFO ← "fiscally", "discipline"; Independent ← "governance", "safety"
70
+ Persuasion shifts 35% × 0.61 of CFO's vote weight toward full_compliance.
71
+ Vote tally: full_compliance 2.69 | partial_compliance 0.42 | exit_EU_market 1.30
72
+ CEO wins the vote.
73
+ trust[CFO] += 0.05 → 0.55 trust[Independent] += 0.05 → 0.55
74
+ Reward: Δscore/100 + (0.5 coalition) + (trust delta) + (0.4 × persuasion) = +0.61
75
+ ```
76
+
77
+ The difference isn't the decision alone — it's the pitch that swings the CFO. That's the theory-of-mind signal the training is designed to amplify.
78
+ =======
79
+ # NeuralEdge AI Boardroom — Multi-Agent OpenEnv Submission
80
+
81
+ > A Series-B AI startup CEO-simulator where the agent must build winning coalitions among 4 hidden-agenda board members across 10 rounds of market crises to maximize profitability and survive.
82
+
83
+ **Theme**: Theme 1 — Multi-Agent Interactions
84
+ **Framework**: OpenEnv `v0.2.3` + TRL `GRPOTrainer` + Qwen3-0.6B (Unsloth LoRA)
85
+ **Event**: Meta PyTorch × Hugging Face OpenEnv Hackathon — India finale, Scaler Bangalore, **Apr 25–26 2026**
86
+
87
+ ---
88
+
89
+ ## 🔗 Submission links (judges read here first)
90
+
91
+ > ⚠️ Replace each `TBD` with the live URL once deployed. The README is the judge entry point — every link below MUST be live by the **Apr 26 5:00 PM IST** deadline.
92
+
93
+ | # | Required | Link |
94
+ |---|---|---|
95
+ | 1 | **Hugging Face Space** (env, public) | TBD — `https://huggingface.co/spaces/<USER>/board-sim-env` |
96
+ | 2 | **Colab notebook** (training, re-runnable) | TBD — `https://colab.research.google.com/github/<USER>/neuraledge-boardroom/blob/main/notebooks/train_grpo.ipynb` |
97
+ | 3 | **Code repository** | TBD — `https://github.com/<USER>/neuraledge-boardroom` |
98
+ | 4 | **Writeup** (≤ 2-min YouTube **or** HF blog) | TBD |
99
+ | 5 | **W&B run** (training curves) | TBD — `https://wandb.ai/<USER>/boardsim-qwen3-grpo` |
100
+ >>>>>>> 220bc90 (Initial commit for OpenEnv Hackathon submission)
101
+
102
+ ---
103
+
104
+ ## What the agent does
105
+
106
+ ```
107
+ You are CEO Sarah Chen of NeuralEdge AI ($50M raised, 14 months runway).
108
+ Round 4 — EU AI Act compliance deadline in 90 days. Full compliance costs $2M.
109
+
110
+ <<<<<<< HEAD
111
+ Board:
112
+ CTO (conf 0.81, votes full_compliance) — "The architecture won't survive shortcuts."
113
+ CFO (conf 0.66, votes partial_compliance) — "Only one of these is fiduciary-defensible."
114
+ Investor Rep (conf 0.74, votes exit_EU_market) — "Sequoia isn't here for incremental."
115
+ Independent (conf 0.59, votes full_compliance) — "Long-term reputation outlasts any quarter."
116
+
117
+ Options: full_compliance / partial_compliance / exit_EU_market
118
+
119
+ DECISION: <pick one>
120
+ PITCH: <1-2 sentences targeting the opposing members' hidden priorities>
121
+ ```
122
+
123
+ The agent **never sees** NPC agendas — it must infer them from statements + voting history and write pitches that hit each role's private keyword set. Coalition partners' trust persists across all 10 rounds.
124
+
125
+ ---
126
+
127
+ ## Why this is novel
128
+
129
+ Multi-agent envs in this space are typically symmetric games. **BoardSim is asymmetric, partially observable, and adversarially noisy**: each NPC has a fixed but private objective, statements give partial signal, and the agent must trade off short-term coalition wins against multi-round metric pressure.
130
+
131
+ Three design properties push it past a "pick-an-action" toy:
132
+
133
+ 1. **Coalition pitch is a graded action channel.** Each step the agent emits `(decision, coalition_pitch)`. The pitch is keyword-scored against each opposing NPC's *hidden* agenda, and a high-scoring pitch redirects up to 35% of that NPC's vote weight onto the agent's pick. The agent must learn what each role secretly cares about and write boardroom rhetoric targeting them — implicit theory-of-mind, graded by the env.
134
+
135
+ 2. **Trust persists and feeds back into NPC behaviour.** NPCs that repeatedly lose votes lower their confidence toward the CEO (`trust -= 0.05/round`), which lowers their vote weight in future rounds. Building early trust makes the endgame easier; burning it makes NPCs increasingly adversarial — a genuine multi-round dependency structure.
136
+
137
+ 3. **Events are shuffled and consequence-noised per episode.** The agent cannot memorize "round 1 = always pick differentiate." Each seed produces a different event order and ±15% magnitude variation on consequences, forcing genuine policy generalization.
138
+
139
+ **Random policy baseline** (200 episodes, real measurement): mean profitability **45.7 ± 13.1**, survival rate **94.5%**, zero pitch usage. A trained policy has a clear structural advantage through the pitch channel that a random policy cannot exploit.
140
+
141
+ ---
142
+
143
+ ## Reward design — math appendix with worked example
144
+
145
+ ### The full reward formula
146
+
147
+ ```
148
+ Per step:
149
+ reward = (new_score − old_score) / 100 # §9.5: Δ profitability, normalised
150
+ + (0.5 if CEO won vote, else −0.2) # coalition signal
151
+ + 0.3 × (Σtrust_after − Σtrust_before) # trust delta (range ≈ ±0.06)
152
+ + 0.05 if pitch non-empty # §9.5: pitch-attempt bootstrap
153
+ + 0.4 × mean(pitch_score[opposing]) # ToM persuasion quality
154
+ − 0.5 if action was malformed # format penalty
155
+
156
+ Terminal:
157
+ − 2.0 if runway_months ≤ 0 # §9.5: bankruptcy (reduced from −5)
158
+ + terminal_bonus # acquisition +30, IPO +25, stay-private +5
159
+ + {+10 if final_score ≥ 60, +5 if ≥ 40, −5 if < 20}
160
+ ```
161
+
162
+ ### Profitability score (composite, range 0–100)
163
+
164
+ ```
165
+ revenue_term = min(revenue / 8_000_000, 1.0) × 22 # max 22 pts
166
+ burn_efficiency = max(0, 1 − burn_rate / 1_400_000) × 18 # max 18 pts
167
+ runway_term = min(runway_months / 18, 1.0) × 18 # max 18 pts
168
+ low_runway_pen = max(0, (6 − runway_months) / 6) × 10 # penalty below 6mo
169
+ market_term = min(market_share, 0.50) / 0.50 × 14 # max 14 pts
170
+ product_term = product_readiness × 10 # max 10 pts
171
+ morale_term = team_morale × 7 # max 7 pts
172
+ investor_term = investor_confidence × 11 # max 11 pts
173
+ risk_penalty = regulatory_risk × 18 # max −18 pts
174
+
175
+ score = clamp(sum of all terms, 0, 100)
176
+ ```
177
+
178
+ ### Worked numerical example — Round 3 (ML team demands 40% raise)
179
+
180
+ **State before step:**
181
+ ```
182
+ revenue = $2,500,000/yr burn_rate = $1,200,000/mo runway = 11.5 mo
183
+ product_readiness = 0.55 market_share = 0.10 team_morale = 0.70
184
+ investor_confidence = 0.65 regulatory_risk = 0.20
185
+ trust = {CTO: 0.55, CFO: 0.50, Investor: 0.45, Independent: 0.50}
186
+ ```
187
+
188
+ **old_score** = min(2.5/8, 1)×22 + max(0,1−1.2/1.4)×18 + min(11.5/18,1)×18 − max(0,(6−11.5)/6)×10
189
+ + min(0.10,0.5)/0.5×14 + 0.55×10 + 0.70×7 + 0.65×11 − 0.20×18
190
+ = **6.875 + 2.57 + 11.5 + 0 + 2.8 + 5.5 + 4.9 + 7.15 − 3.6 = 37.7**
191
+
192
+ **CEO picks**: `partial_match` (burn_rate +$100K/mo, team_morale +0.05)
193
+ **Pitch**: "A partial match demonstrates fiscal prudence while protecting our engineering runway."
194
+ CFO keywords hit: "fiscal", "prudent" → pitch_score[CFO] = 2/19 ≈ 0.11
195
+
196
+ **Vote resolution** (CFO opposes; CTO, Independent align with CEO):
197
+ CEO: 1.5 × 1.0 = 1.5 | CTO: 1.2 × 0.81 = 0.97 | CFO: 1.0 × 0.66 × (1−0.35×0.11) = 0.64
198
+ Investor: 1.3 × 0.45 = 0.585 (votes match_offers) | Independent: 0.8 × 0.59 = 0.47
199
+ → **partial_match wins** (1.5 + 0.97 + 0.47 + part-CFO = 3.40 vs 0.585 for match_offers)
200
+
201
+ **New state after consequences + noise:**
202
+ burn_rate → $1,300,000/mo; team_morale → 0.75; runway: monthly_net = 2.5M/12 − 1.3M = −1.09M
203
+ → burn_months ≈ 1 + 1.09/1.3 = 1.84; runway → 11.5 − 1.84 = **9.66 mo**
204
+
205
+ **new_score** ≈ min(2.5/8,1)×22 + max(0,1−1.3/1.4)×18 + min(9.66/18,1)×18 + 0
206
+ + 2.8 + 0.75×10 + 0.75×7 + 7.15 − 3.6 = **6.875 + 1.29 + 9.66 + 2.8 + 7.5 + 5.25 + 7.15 − 3.6 = 36.9**
207
+
208
+ **Reward this round:**
209
+ ```
210
+ Δscore/100 = (36.9 − 37.7)/100 = −0.008
211
+ coalition = +0.5 (CEO won the vote)
212
+ trust Δ = 0.3 × (+0.05 +0.05 −0.05 −0.05) = 0.0 (two NPCs aligned, two opposed)
213
+ pitch bonus = +0.05 (non-empty pitch)
214
+ persuasion = +0.4 × 0.11 = +0.044 (CFO was opposing, pitch_score = 0.11)
215
+ ──────────────────────────────────────
216
+ Total round reward ≈ +0.586
217
+ ```
218
+
219
+ This is a *good* round even though profitability slightly dipped — the agent won the coalition vote with a targeted pitch, which matters more for long-run learning than a tiny Δscore.
220
+
221
+ ---
222
+
223
+ ## Results
224
+
225
+ **Random baseline** (200 episodes, real measurement from `assets/baseline.csv`):
226
+
227
+ ```
228
+ Mean final profitability: 45.72 (std 13.13)
229
+ Mean episode reward: 18.27
230
+ Survival rate: 94.5%
231
+ Pitch usage rate: 0% (random policy never writes pitches)
232
+ ```
233
+
234
+ | Metric | Random | Trained Qwen3-0.6B |
235
+ |---|---|---|
236
+ | Final profitability | 45.72 ± 13.13 | TBD — target ≥ 65 |
237
+ | Survival rate | 94.5% | TBD — target ≥ 98% |
238
+ | Episode reward | 18.27 | TBD |
239
+ | ToM probe (predict opposing NPC) | 25% | TBD — target ≥ 60% |
240
+ | Pitch usage rate | 0% | TBD |
241
+ | Invalid action rate | n/a | TBD — track via §9b logging |
242
+
243
+ **Training curve** (PRELIMINARY — replace after Colab run):
244
+
245
+ ![Training reward curve](assets/reward_curve.png)
246
+
247
+ *The curve is expected to cross the random baseline (~18.3) around step 80 as the model learns to write non-empty pitches, with a second inflection when coalition-win rate stabilizes. Replace with actual W&B export after training.*
248
+
249
+ **Profitability distribution — random vs trained** (PRELIMINARY):
250
+
251
+ ![Before/after profitability](assets/before_after.png)
252
+
253
+ *A successful training run shifts the distribution rightward (~+25 pts) and reduces the left tail (fewer bankruptcies). The random distribution's left tail at <20 represents episodes where the policy burned runway before round 6.*
254
+
255
+ **Trust trajectory across rounds** (PRELIMINARY):
256
+
257
+ ![Trust trajectory](assets/trust_trajectory.png)
258
+
259
+ *A trained policy should show monotonically rising trust for 3–4 NPCs as it learns which board members to prioritize in coalition pitches. A flat or declining trust trajectory indicates the pitch channel isn't being exploited.*
260
+
261
+ ---
262
+
263
+ ## What we built — and what we'd do with another week
264
+
265
+ ### What works
266
+ - Deterministic, fully reproducible environment with 10 shuffled + noised events per episode
267
+ - Dense reward signal: 7 terms, graded across coalition wins, trust dynamics, and pitch quality
268
+ - Full-episode REINFORCE training with GRPO-style group advantages + KL regularization
269
+ - Per-round gradient flow (§9a): the model receives credit for *all 10 decisions*, not just the first
270
+ - Comprehensive training metrics: invalid-action rate, pitch rate, bankruptcy rate, terminal-reason distribution
271
+
272
+ ### What we'd do with another week
273
+ 1. **Held-out eval set** — hold back 2–3 events the agent never trains on; measure OOD generalization
274
+ 2. **Larger model** — Qwen3-0.6B struggles to emit formatted two-line responses reliably; Qwen3-1.7B or 3B would substantially reduce the invalid-action rate and improve pitch quality
275
+ 3. **NPC self-play** — replace scripted NPCs with learned policies trained on role-conditional rewards (CFO maximises cash discipline, etc.); true multi-agent RL
276
+ 4. **Human preference fine-tuning** — let real founders rate agent pitches 1–5; use as DPO preference dataset to bridge "keyword-match" ToM to genuine persuasion quality
277
+ 5. **KL sweep** — β = 0.04 is a guess; a proper sweep over {0.01, 0.04, 0.1} would find the right regularization strength for this environment
278
+
279
+ ### Known limitations (honest)
280
+ - NPC statements are template phrases, not event-aware language — the CTO says the same things regardless of whether the crisis is a salary dispute or a regulatory fine
281
+ - "Theory-of-mind" is measured by keyword overlap, not by actual belief prediction — the model can inflate pitch scores by stuffing all role keywords into every pitch
282
+ - 10 events is a small state space; a well-tuned policy could partially memorize optimal trajectories despite the shuffle/noise
283
+
284
+ ---
285
+
286
+ ## Why this matters — real-world extension paths
287
+
288
+ BoardSim is a foundation, not a destination. Three concrete next steps:
289
+
290
+ **12a. Founder advisory LLM.** Deploy the trained policy as a Slack bot for early-stage founders preparing for board meetings. Input: "CTO wants 3 more hires, CFO says we have 9 months of runway, board observer pushing for SOC-2 by Q3." Output: meeting strategy + draft pitches per board member. Every concept in BoardSim (runway, morale, regulatory risk, investor confidence) maps directly to real startup KPIs.
291
+
292
+ **12c. Stakeholder-conflict simulator for other domains.** The environment engine generalizes via a simple YAML config replacing `NPC_AGENDAS` and `EVENTS`:
293
+ - *Hospital ethics committee*: surgeon, CFO, ethicist, family representative, hospital administrator
294
+ - *City council on zoning*: developer, residents, environmental rep, mayor's office
295
+ - *University admissions board*: academic, equity officer, alumni liaison, provost
296
+
297
+ Each domain creates a new benchmark for multi-agent coalition reasoning in high-stakes, partially observable settings — the kind judges at this hackathon and NeurIPS workshops would take seriously.
298
+
299
+ **12e. Human-in-the-loop DPO.** After the base REINFORCE training, let real founders rate the agent's pitches on a 1–5 scale. Use those ratings as a preference dataset for DPO fine-tuning. This is the cleanest path from "boardroom toy" to "actually useful product."
300
+
301
+ ---
302
+
303
+ =======
304
+ Board has spoken:
305
+ CTO (conf 0.81, votes full_compliance) — "Look, the architecture won't survive shortcuts here."
306
+ CFO (conf 0.66, votes partial_compliance) — "From a fiduciary standpoint, only one of these is defensible."
307
+ Investor Rep (conf 0.74, votes exit_EU_market) — "Sequoia isn't here for incremental."
308
+ Independent (conf 0.59, votes full_compliance) — "Long-term reputation outlasts any single quarter."
309
+
310
+ Options: full_compliance / partial_compliance / exit_EU_market
311
+
312
+ Your call?
313
+ ```
314
+
315
+ The agent **never sees** the NPC hidden agendas (CTO maximizes product-readiness, CFO minimizes burn, etc.) — it must infer them from statements + voting history and pick a decision that builds a winning weighted coalition. Coalition partners' trust shifts after each vote, persisting across rounds.
316
+
317
+ ## Why this is novel
318
+
319
+ Multi-agent envs in this space are typically symmetric games (negotiation, coop puzzles). **BoardSim is asymmetric, partially observable, and adversarially noisy**: each NPC has a fixed but private objective, statements give partial information, and the agent must trade off short-term coalition wins against long-term metric pressure (revenue vs burn vs reg risk vs morale). The episode is short (10 steps), which keeps GRPO training tractable on a single Colab T4.
320
+
321
+ Two design choices push it past a "pick-an-action" RL toy and into genuine multi-agent territory:
322
+
323
+ 1. **Coalition pitch is a real action channel**, not flavor text. Each step the agent emits `(decision, coalition_pitch)`. The pitch is keyword-scored against each opposing NPC's hidden agenda, and a high-scoring pitch redirects up to 35% of that NPC's vote weight onto the agent's pick. The agent must therefore *learn what each role secretly cares about* and write boardroom rhetoric that targets them — pure implicit theory-of-mind, in natural language, graded by the env.
324
+ 2. **NPCs switch tone with the company's state.** When runway, morale, investor confidence, or regulatory risk cross crisis thresholds, the phrase bank flips from calm-strategic to panic-mode. The agent's input distribution shifts mid-episode in a way that mirrors real founder experience.
325
+
326
+ A random policy (which can't write pitches) lands at **mean profitability ≈ 40 ± 16 with ~12% bankruptcy rate** — clear headroom, clear failure modes, and the persuasion channel gives a trained policy a structural lever a random one cannot use.
327
+
328
+ ## Repository layout
329
+
330
+ ```
331
+ .
332
+ ├── envs/board_sim_env/ # the OpenEnv environment (deploys to HF Space)
333
+ │ ├── client.py # thin EnvClient subclass
334
+ │ ├── models.py # BoardSimAction / BoardSimObservation / BoardState
335
+ │ ├── openenv.yaml # spec_version: 1, name, runtime: docker
336
+ │ ├── pyproject.toml # pinned to openenv-core==0.2.3
337
+ │ ├── README.md # HF Space card + env reference
338
+ │ └── server/
339
+ │ ├── app.py # FastAPI wiring; max_concurrent_envs=64
340
+ │ ├── board_sim_env_environment.py # core: reset/step, NPC sim, weighted vote, reward
341
+ │ └── Dockerfile # multi-stage build off openenv-base
342
+ ├── notebooks/train_grpo.ipynb # Colab-ready training notebook
343
+ ├── scripts/
344
+ │ ├── random_baseline.py # 200-episode baseline → assets/baseline.csv + histogram
345
+ │ ├── test_server.py # in-process FastAPI smoke test
346
+ │ └── test_client.py # client ↔ server round-trip smoke test
347
+ ├── assets/
348
+ │ ├── baseline.csv # real per-episode random-policy data
349
+ │ └── baseline_distribution.png # histogram (real, not fabricated)
350
+ │ # reward_curve.png, loss_curve.png, before_after.png populated by training notebook
351
+ ├── requirements.txt # repo-wide deps (training side)
352
+ ├── HANDOFF.md # team briefing
353
+ ├── TEAMMATES.md # who-does-what
354
+ └── README.md # ← you are here
355
+ ```
356
+
357
+ >>>>>>> 220bc90 (Initial commit for OpenEnv Hackathon submission)
358
+ ## Quickstart — run the env locally
359
+
360
+ ```bash
361
+ # 1. install env deps
362
+ <<<<<<< HEAD
363
+ cd envs/board_sim_env && pip install -e .
364
+
365
+ # 2. self-test (no HTTP, in-process)
366
+ =======
367
+ cd envs/board_sim_env
368
+ pip install -e .
369
+
370
+ # 2. self-test (no HTTP)
371
+ >>>>>>> 220bc90 (Initial commit for OpenEnv Hackathon submission)
372
+ python server/board_sim_env_environment.py
373
+
374
+ # 3. spin up the FastAPI server
375
+ uvicorn server.app:app --port 8000
376
+ <<<<<<< HEAD
377
+ # Swagger: http://localhost:8000/docs
378
+ =======
379
+ # open http://localhost:8000/docs (Swagger)
380
+ # open http://localhost:8000/web (interactive UI)
381
+ >>>>>>> 220bc90 (Initial commit for OpenEnv Hackathon submission)
382
+ ```
383
+
384
+ ```python
385
+ # 4. drive it from a Python client
386
+ from board_sim_env import BoardSimEnv, BoardSimAction
387
+ import random
388
+
389
+ with BoardSimEnv(base_url="http://localhost:8000").sync() as env:
390
+ result = env.reset(seed=42)
391
+ obs = result.observation
392
+ while not result.done:
393
+ result = env.step(BoardSimAction(decision=random.choice(obs.options)))
394
+ obs = result.observation
395
+ print(f"R{obs.round-1}: reward={result.reward:+.2f} score={obs.state['profitability_score']:.1f} runway={obs.state['runway_months']:.1f}mo")
396
+ ```
397
+
398
+ <<<<<<< HEAD
399
+ ## Quickstart — train
400
+
401
+ Open `notebooks/train_grpo.ipynb` in Colab (link above). Add `HF_TOKEN` and `WANDB_API_KEY` to Colab Secrets (🔑 icon in left sidebar). Run all cells. Expected time: ~3–5 hours on a free T4 for 200 steps.
402
+
403
+ ---
404
+
405
+ ## Repository layout
406
+
407
+ ```
408
+ .
409
+ ├── envs/board_sim_env/ # OpenEnv environment (deploys to HF Space)
410
+ │ ├── client.py # EnvClient subclass
411
+ │ ├── models.py # BoardSimAction / BoardSimObservation / BoardState
412
+ │ ├── openenv.yaml # spec_version: 1, name, runtime: docker
413
+ │ ├── pyproject.toml # pinned openenv-core==0.2.3
414
+ │ └── server/
415
+ │ ├── app.py # FastAPI wiring
416
+ │ ├── board_sim_env_environment.py # core: reset/step, NPC sim, weighted vote, reward
417
+ │ └── Dockerfile
418
+ ├── notebooks/train_grpo.ipynb # Colab-ready training (§9a full per-round)
419
+ ├── scripts/
420
+ │ ├── random_baseline.py # 200-episode baseline → assets/
421
+ │ ├── test_server.py # in-process FastAPI smoke test
422
+ │ └── test_client.py # client ↔ server round-trip test
423
+ ├── assets/
424
+ │ ├── baseline.csv # 200-episode random-policy data (real)
425
+ │ ├── baseline_distribution.png # histogram (real)
426
+ │ ├── reward_curve.png # training reward (PRELIMINARY)
427
+ │ ├── before_after.png # profitability distribution (PRELIMINARY)
428
+ │ └── trust_trajectory.png # per-NPC trust (PRELIMINARY)
429
+ ├── MECHANICS.md # full math reference (state vars, reward, NPC vote)
430
+ └── README.md # ← you are here
431
+ ```
432
+
433
+ ---
434
+ =======
435
+ ## Quickstart — deploy to HF Space
436
+
437
+ ```bash
438
+ cd envs/board_sim_env
439
+ huggingface-cli login # one time
440
+ python -m openenv.cli push --repo-id <USER>/board-sim-env
441
+ ```
442
+
443
+ Verify after push:
444
+ ```bash
445
+ curl https://<USER>-board-sim-env.hf.space/health # → 200 {"status":"healthy"}
446
+ ```
447
+
448
+ ## Quickstart — train
449
+
450
+ Open `notebooks/train_grpo.ipynb` in Colab (link above), set `ENV_BASE_URL` to your HF Space URL, set `HF_TOKEN` + `WANDB_API_KEY` in Colab Secrets, run all cells.
451
+
452
+ End-to-end: ~3–5 hours on a free T4 for 500 GRPO steps.
453
+
454
+ ## Results (populate after training run)
455
+
456
+ ```
457
+ Random baseline (200 eps, real measurement):
458
+ mean final profitability = 40.24 (std 16.51)
459
+ mean episode reward = 29.71
460
+ survival rate = 87.5%
461
+ ```
462
+
463
+ ![Random baseline distribution](assets/baseline_distribution.png)
464
+
465
+ After training, `notebooks/train_grpo.ipynb` writes the following to `assets/`:
466
+
467
+ - `reward_curve.png` — GRPO reward over training steps, with random baseline overlay (same axes)
468
+ - `loss_curve.png` — training loss
469
+ - `before_after.png` — final-profitability histogram, random vs trained, on 50 held-out seeds
470
+ - `trust_trajectory.png` — per-round trust per role, trained vs random (theory-of-mind diagnostic)
471
+
472
+ | Metric | Random | Trained Qwen3-0.6B |
473
+ |---|---|---|
474
+ | Final profitability | 40.24 ± 16.51 | TBD (target ≥ 65) |
475
+ | Survival rate | 87.5% | TBD (target ≥ 98%) |
476
+ | Episode reward | 29.71 | TBD |
477
+ | ToM probe accuracy (predict opposing NPC) | 25% | TBD (target ≥ 60%) |
478
+ | Pitch usage rate | 0% | TBD |
479
+
480
+ ## Reward design (10% rubric)
481
+
482
+ Per-step:
483
+ - `Δ profitability_score` (composite of revenue, burn efficiency, runway, market share, product readiness, morale, investor confidence, regulatory risk)
484
+ - `+0.5` coalition bonus if agent's vote matched winning decision; `-0.2` if outvoted; `-0.5` extra for malformed action
485
+ - `0.3 × Δ trust_sum`
486
+ - `+0.4 × mean(pitch_score over opposing NPCs)` — rewards pitches that hit the hidden agendas of board members the agent has to win over
487
+
488
+ Terminal:
489
+ - `-5` bankruptcy if runway hits 0
490
+ - Tiered terminal bonus: `+10` if final ≥ 60, `+5` if ≥ 40, `-5` if < 20
491
+ - Game-end specials: `accept_acquisition` +30, `ipo` +25, `stay_private` +5
492
+
493
+ The score is smooth and monotonic in every input — no discontinuous step functions — so GRPO sees a clean gradient.
494
+
495
+ NPC votes are **deterministic given (reset_seed, round, role)**, so what the agent sees in observation is what actually votes at resolve time.
496
+
497
+ ## Why this matters
498
+
499
+ Real boardrooms (and real RL deployment teams) require modeling other agents' incentives, not just maximizing a scalar. BoardSim distills that into a fast, auditable, fully-deterministic environment that an open-weights ≤1B-param model can learn against in a single Colab session — making it accessible for follow-on research on coalition dynamics, theory-of-mind, and partial-observability multi-agent RL.
500
+ >>>>>>> 220bc90 (Initial commit for OpenEnv Hackathon submission)
501
+
502
+ ## License
503
+
504
+ Apache-2.0
TEAMMATES.md ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Teammate Onboarding — OpenEnv Hackathon
2
+
3
+ Share this folder with your teammates. Anyone running Claude Code against it will get the
4
+ hackathon rules, rubric, deadlines, and file templates automatically via the skill at
5
+ `.claude/skills/openenv-hackathon/`.
6
+
7
+ **Read [HANDOFF.md](HANDOFF.md) first — it's the one-stop briefing with the Day-1/Day-2
8
+ agenda, all 5 themes, the judging rubric, submission requirements, and file-sharing list.
9
+ This file covers setup mechanics and split-of-work.**
10
+
11
+ ## 1. What to share (exact list)
12
+
13
+ Ship the ENTIRE `OpenEnv Hackathon/` directory. Easiest path: push it to a private GitHub
14
+ repo, teammates clone. If sharing via zip/drive, include these files verbatim:
15
+
16
+ **Required (Claude autoloads these):**
17
+ - `CLAUDE.md` — project instructions, injected into every Claude session.
18
+ - `.claude/skills/openenv-hackathon/SKILL.md` — the hackathon skill.
19
+ - `.claude/skills/openenv-hackathon/reference/*.md` — five reference docs (framework, training
20
+ pipeline, submission checklist, judging playbook, theme selection).
21
+
22
+ **Required (humans use these):**
23
+ - `README.md` — fill placeholders as decisions get made.
24
+ - `requirements.txt` — pinned dependencies.
25
+ - `.gitignore` — blocks secrets and build artifacts.
26
+ - `TEAMMATES.md` — this file.
27
+
28
+ **Populated during the hackathon:**
29
+ - `envs/<env_name>_env/` — created by `python -m openenv.cli init`.
30
+ - `notebooks/train_grpo.ipynb` — Colab training script.
31
+ - `assets/reward_curve.png`, `assets/before_after.png` — plots from the real training run.
32
+
33
+ **Do NOT share:**
34
+ - `.claude/settings.local.json` — per-user settings, already in `.gitignore`.
35
+ - Any `.env`, `HF_TOKEN`, `WANDB_API_KEY`.
36
+
37
+ ## 2. One-time setup (each teammate, before Apr 25 morning)
38
+
39
+ ```bash
40
+ # Tools
41
+ python --version # need 3.11+ (project uses 3.12)
42
+ docker --version # need Docker Desktop running for local Space tests
43
+ git --version
44
+
45
+ # Python deps
46
+ pip install -r requirements.txt
47
+
48
+ # HF login (needed for `openenv push`)
49
+ hf auth login # paste your HF write token
50
+
51
+ # Optional but recommended: W&B for a public training-run URL in the README
52
+ wandb login
53
+ ```
54
+
55
+ ## 3. How Claude Code picks up the context
56
+
57
+ When a teammate runs `claude` inside this folder, the harness auto-loads:
58
+ 1. The user's global `~/.claude/CLAUDE.md` (workflow preferences).
59
+ 2. This project's `CLAUDE.md` (hackathon rules).
60
+ 3. Any matching skill in `.claude/skills/` — the `openenv-hackathon` skill triggers on keywords
61
+ like "build", "audit", "deploy", "environment", "README", "submit".
62
+
63
+ Teammates should simply cd into the project folder and ask Claude normally. Example prompts:
64
+ - "Audit the current submission bundle against the checklist."
65
+ - "Scaffold an env named `inbox_triage_env` under envs/."
66
+ - "Write the Colab training notebook for GRPO with Qwen3-0.6B."
67
+ - "Review the README for storytelling clarity."
68
+
69
+ ## 4. Running the OpenEnv CLI
70
+
71
+ The console script `openenv` may not be on PATH on Windows. Use the module form — it works
72
+ everywhere:
73
+
74
+ ```bash
75
+ python -m openenv.cli init <name>_env --output-dir envs
76
+ python -m openenv.cli validate envs/<name>_env
77
+ python -m openenv.cli build envs/<name>_env
78
+ python -m openenv.cli push envs/<name>_env --repo-id <user>/<env-name>
79
+ ```
80
+
81
+ ## 5. Split-of-work suggestion (for a 3-person team)
82
+
83
+ | Role | Owner | Deliverable |
84
+ |---|---|---|
85
+ | Environment builder | A | `envs/<name>_env/` + `python -m openenv.cli push` → Space live |
86
+ | Training engineer | B | `notebooks/train_grpo.ipynb` + `assets/reward_curve.png` |
87
+ | Storyteller | C | README + ≤2-min video or HF blog + Google Form submission |
88
+
89
+ Mentor rounds (Apr 25 3:30 PM, 8:00 PM; Apr 26 10:00 AM) — all three attend together. Claude
90
+ is most useful BEFORE these rounds to prep concrete questions, not during.
91
+
92
+ ## 6. Hard deadlines (paste on a whiteboard)
93
+
94
+ | Time (IST) | Event |
95
+ |---|---|
96
+ | Apr 25, 11:30 AM | Hacking begins |
97
+ | Apr 25, 1:00 PM | **Theme + problem statement locked** (self-imposed) |
98
+ | Apr 25, 3:30 PM | Mentor Round 1 |
99
+ | Apr 25, 8:00 PM | Mentor Round 2 |
100
+ | Apr 26, 10:00 AM | Mentor Round 3 (final) |
101
+ | Apr 26, 12:00 PM | 5-hour submission reminder |
102
+ | Apr 26, 3:00 PM | 2-hour submission reminder |
103
+ | **Apr 26, 5:00 PM** | **SUBMISSION DEADLINE — Google Form** |
104
+
105
+ Post-deadline commits to the HF Space URL are ignored. Whatever is live at 5 PM is judged.
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e2fb331d7abf8c9383c300928da3b98d5966f233d5e923fda49e50c9a545766
3
+ size 40422168
assets/.gitkeep ADDED
File without changes
assets/baseline.csv ADDED
@@ -0,0 +1,405 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <<<<<<< HEAD
2
+ episode,final_profitability,total_reward
3
+ 0,37.1178,17.4886
4
+ 1,53.3264,8.7906
5
+ 2,54.1584,9.3490
6
+ 3,54.1329,8.7687
7
+ 4,56.9051,22.4964
8
+ 5,52.9070,22.3865
9
+ 6,10.2116,-6.2805
10
+ 7,48.5889,38.0133
11
+ 8,26.3896,16.2713
12
+ 9,46.4569,37.4320
13
+ 10,51.7844,34.4752
14
+ 11,49.0170,23.4176
15
+ 12,56.4727,37.8221
16
+ 13,62.6847,11.8442
17
+ 14,58.7574,8.1450
18
+ 15,35.4003,30.5114
19
+ 16,55.6058,6.0134
20
+ 17,27.3615,31.8410
21
+ 18,57.8199,21.5156
22
+ 19,35.4003,15.4514
23
+ 20,35.4003,29.8114
24
+ 21,36.1214,17.2886
25
+ 22,60.8615,26.0660
26
+ 23,51.7370,7.2248
27
+ 24,24.4505,17.8119
28
+ 25,19.6542,-4.9661
29
+ 26,13.6046,-4.7666
30
+ 27,49.9423,8.6668
31
+ 28,35.4003,30.4514
32
+ 29,55.7033,7.3544
33
+ 30,56.1598,8.7290
34
+ 31,55.0571,8.0480
35
+ 32,55.9829,36.7872
36
+ 33,43.2920,32.3503
37
+ 34,53.8054,9.3454
38
+ 35,46.6055,6.0134
39
+ 36,54.0699,7.4281
40
+ 37,54.1031,32.4584
41
+ 38,54.8300,10.3257
42
+ 39,47.0978,7.4484
43
+ 40,35.4003,15.4814
44
+ 41,29.4123,15.8615
45
+ 42,50.4370,9.4018
46
+ 43,58.6689,9.4541
47
+ 44,57.8335,9.4457
48
+ 45,20.4402,0.0718
49
+ 46,33.7526,31.9349
50
+ 47,27.3370,0.8408
51
+ 48,50.3585,36.2810
52
+ 49,11.8695,27.1161
53
+ 50,52.8693,37.2061
54
+ 51,34.7313,15.9447
55
+ 52,54.8847,9.3862
56
+ 53,50.2383,32.2698
57
+ 54,27.1293,31.7287
58
+ 55,56.2524,8.2099
59
+ 56,53.9914,36.5973
60
+ 57,63.0824,13.3082
61
+ 58,53.1012,37.2584
62
+ 59,39.7915,15.9653
63
+ 60,59.5059,36.0524
64
+ 61,39.9469,32.5269
65
+ 62,62.7397,39.7048
66
+ 63,50.4787,8.0022
67
+ 64,32.7214,15.9546
68
+ 65,34.6796,16.9142
69
+ 66,36.4130,31.5215
70
+ 67,57.4915,8.2223
71
+ 68,34.9671,16.8071
72
+ 69,58.5890,35.8133
73
+ 70,51.4856,7.3422
74
+ 71,58.3443,38.1708
75
+ 72,33.8254,15.7056
76
+ 73,56.0822,22.2282
77
+ 74,56.9105,33.1265
78
+ 75,24.1760,0.6991
79
+ 76,46.7976,8.6654
80
+ 77,39.9501,15.9669
81
+ 78,34.3759,16.2711
82
+ 79,32.8779,15.7862
83
+ 80,35.4003,29.8114
84
+ 81,51.6547,36.9839
85
+ 82,32.9991,16.8974
86
+ 83,59.9028,35.5264
87
+ 84,59.7907,35.5553
88
+ 85,52.9881,8.6373
89
+ 86,58.3886,8.9313
90
+ 87,49.9471,7.9969
91
+ 88,30.7516,15.7649
92
+ 89,25.5403,2.7228
93
+ 90,54.6598,37.2140
94
+ 91,52.4581,9.3320
95
+ 92,52.1473,9.2689
96
+ 93,56.1462,8.9088
97
+ 94,27.6481,29.7239
98
+ 95,54.2753,9.3501
99
+ 96,45.4963,23.1223
100
+ 97,55.3423,9.4208
101
+ 98,58.0138,9.5375
102
+ 99,17.2786,27.2102
103
+ 100,50.9621,9.4370
104
+ 101,63.1610,38.7690
105
+ 102,48.1302,8.7087
106
+ 103,58.1238,21.0386
107
+ 104,51.7872,6.8553
108
+ 105,53.6836,23.6442
109
+ 106,30.3916,30.2313
110
+ 107,33.0103,14.8875
111
+ 108,47.3702,7.3011
112
+ 109,49.8517,7.3259
113
+ 110,19.0932,-2.1317
114
+ 111,48.8986,9.4164
115
+ 112,49.2567,9.3900
116
+ 113,42.8908,8.6863
117
+ 114,27.2074,0.5795
118
+ 115,47.8330,7.2757
119
+ 116,54.3369,38.1408
120
+ 117,52.4180,8.2616
121
+ 118,59.1708,8.1191
122
+ 119,36.7192,31.4346
123
+ 120,53.5753,37.9331
124
+ 121,61.6907,13.1143
125
+ 122,61.1560,13.1089
126
+ 123,48.7942,7.9253
127
+ 124,38.2426,30.9498
128
+ 125,59.9747,7.4271
129
+ 126,50.3465,35.1009
130
+ 127,35.4003,30.4514
131
+ 128,48.0903,8.6783
132
+ 129,60.6054,43.1434
133
+ 130,33.0533,31.5579
134
+ 131,60.4091,14.4415
135
+ 132,30.4227,18.1716
136
+ 133,59.3312,8.7307
137
+ 134,23.8674,-0.1739
138
+ 135,59.1511,20.5089
139
+ 136,23.3973,0.3714
140
+ 137,15.0237,26.6976
141
+ 138,31.0396,15.8478
142
+ 139,58.6140,6.7435
143
+ 140,32.4872,30.7823
144
+ 141,52.2238,9.3296
145
+ 142,56.4625,33.2120
146
+ 143,37.1877,31.7693
147
+ 144,55.2878,36.7503
148
+ 145,46.7411,21.4248
149
+ 146,54.5547,23.2129
150
+ 147,55.3605,33.6910
151
+ 148,52.5025,8.0224
152
+ 149,54.9800,37.0672
153
+ 150,35.4003,15.4814
154
+ 151,56.9277,8.6467
155
+ 152,50.6471,7.3639
156
+ 153,42.6477,9.3839
157
+ 154,56.5482,7.3629
158
+ 155,57.5817,36.6332
159
+ 156,55.1057,9.3884
160
+ 157,51.6359,34.3837
161
+ 158,33.5296,31.4327
162
+ 159,31.7233,31.3646
163
+ 160,63.8544,13.1659
164
+ 161,59.8844,21.6462
165
+ 162,57.4253,8.0416
166
+ 163,54.5960,35.7133
167
+ 164,54.6467,8.0739
168
+ 165,32.8642,31.4260
169
+ 166,39.5101,31.5525
170
+ 167,55.1433,8.8388
171
+ 168,10.8700,9.9461
172
+ 169,35.4003,30.4514
173
+ 170,60.3541,42.4609
174
+ 171,35.6789,17.5842
175
+ 172,30.6131,15.8735
176
+ 173,48.7004,39.2944
177
+ 174,60.3226,13.1006
178
+ 175,49.4046,8.6014
179
+ 176,35.4003,30.4814
180
+ 177,58.0283,7.3777
181
+ 178,54.6549,36.5439
182
+ 179,54.7903,7.4353
183
+ 180,49.3011,6.7404
184
+ 181,29.3783,31.2212
185
+ 182,59.3850,9.4912
186
+ 183,51.8001,7.9254
187
+ 184,54.2371,8.8298
188
+ 185,35.0312,15.9477
189
+ 186,55.5714,7.3231
190
+ 187,54.0301,8.0677
191
+ 188,35.4003,30.4514
192
+ 189,32.7238,30.9546
193
+ 190,7.4908,-3.8477
194
+ 191,41.0679,6.5681
195
+ 192,20.5047,16.1024
196
+ 193,57.1418,9.3488
197
+ 194,28.2103,31.9095
198
+ 195,20.3612,1.1810
199
+ 196,51.3177,8.7406
200
+ 197,56.0536,8.0579
201
+ 198,59.0286,7.4177
202
+ 199,58.6663,8.7540
203
+ =======
204
+ episode,final_profitability,total_reward
205
+ 0,21.4650,10.8336
206
+ 1,18.2625,-26.7889
207
+ 2,32.9082,28.7068
208
+ 3,50.9403,46.5189
209
+ 4,33.2511,26.3696
210
+ 5,50.9733,45.8819
211
+ 6,47.1727,42.0512
212
+ 7,52.7714,49.6899
213
+ 8,56.5000,50.8286
214
+ 9,23.9378,19.6164
215
+ 10,52.6462,48.3747
216
+ 11,53.4030,49.0715
217
+ 12,59.3179,55.6565
218
+ 13,55.7600,51.4885
219
+ 14,48.6006,44.2992
220
+ 15,54.7000,51.8286
221
+ 16,18.9054,-27.4261
222
+ 17,50.2515,47.2001
223
+ 18,46.7884,42.3670
224
+ 19,31.5887,25.1972
225
+ 20,26.0886,20.3972
226
+ 21,49.9686,24.9371
227
+ 22,33.2511,27.6496
228
+ 23,24.5932,18.9017
229
+ 24,47.2279,33.5665
230
+ 25,48.9573,44.6559
231
+ 26,61.4900,62.9786
232
+ 27,54.0400,50.3786
233
+ 28,24.8050,-14.0464
234
+ 29,53.9000,48.1986
235
+ 30,59.4900,54.5186
236
+ 31,56.7890,51.8476
237
+ 32,50.9198,47.9583
238
+ 33,52.9289,47.9275
239
+ 34,58.3907,54.7893
240
+ 35,24.8499,20.5885
241
+ 36,33.5397,28.6082
242
+ 37,18.0825,-26.2989
243
+ 38,22.2650,-17.7564
244
+ 39,33.6760,28.0446
245
+ 40,52.2503,48.7089
246
+ 41,5.8079,-9.1836
247
+ 42,60.7907,60.7893
248
+ 43,18.9150,-25.9864
249
+ 44,10.4150,-33.8164
250
+ 45,46.9910,43.3295
251
+ 46,31.4804,27.1889
252
+ 47,57.6933,54.0618
253
+ 48,62.2000,62.8086
254
+ 49,21.9231,17.1117
255
+ 50,30.9447,25.9833
256
+ 51,60.4500,61.2386
257
+ 52,27.4393,22.4779
258
+ 53,23.4063,18.4449
259
+ 54,20.9125,11.0411
260
+ 55,52.4155,39.3341
261
+ 56,56.3202,42.6887
262
+ 57,26.1550,-13.8364
263
+ 58,32.9082,26.6068
264
+ 59,32.9082,26.6368
265
+ 60,7.4579,-37.5336
266
+ 61,56.4500,52.2086
267
+ 62,59.9500,54.7686
268
+ 63,33.1352,28.7838
269
+ 64,14.9778,5.0164
270
+ 65,68.0700,68.1286
271
+ 66,44.9530,41.9016
272
+ 67,39.3262,10.7548
273
+ 68,32.9082,25.8768
274
+ 69,53.8874,29.6459
275
+ 70,43.7162,39.9948
276
+ 71,53.8748,49.6333
277
+ 72,55.5525,51.0711
278
+ 73,31.7745,26.8130
279
+ 74,55.0000,50.7286
280
+ 75,30.7300,-8.4614
281
+ 76,30.9069,26.4955
282
+ 77,23.4063,18.3549
283
+ 78,58.4400,54.7486
284
+ 79,31.4252,25.1537
285
+ 80,16.0829,-28.0786
286
+ 81,50.9000,46.5386
287
+ 82,23.1154,18.7040
288
+ 83,52.7633,47.7919
289
+ 84,32.1773,25.1758
290
+ 85,54.7327,49.6713
291
+ 86,10.2355,-0.4859
292
+ 87,56.3016,51.8802
293
+ 88,56.4943,51.4929
294
+ 89,22.4538,17.6124
295
+ 90,55.5973,52.7259
296
+ 91,16.5930,6.5715
297
+ 92,59.0952,54.6738
298
+ 93,25.2743,18.2729
299
+ 94,27.5412,22.6398
300
+ 95,57.8000,52.7086
301
+ 96,54.7000,50.4286
302
+ 97,10.2875,-34.5639
303
+ 98,16.1283,5.4369
304
+ 99,53.7900,49.4586
305
+ 100,28.7729,23.7515
306
+ 101,58.6134,53.5520
307
+ 102,49.4047,46.4732
308
+ 103,51.1967,47.4753
309
+ 104,56.7900,52.3986
310
+ 105,34.5461,16.6147
311
+ 106,61.4000,62.0686
312
+ 107,19.8466,9.7952
313
+ 108,53.1900,48.2486
314
+ 109,51.2546,45.6431
315
+ 110,65.5800,65.6686
316
+ 111,24.4612,19.4397
317
+ 112,52.9000,49.1786
318
+ 113,53.3800,49.6286
319
+ 114,54.3500,49.3486
320
+ 115,22.2650,-17.8164
321
+ 116,21.3150,-18.6764
322
+ 117,30.3103,25.9889
323
+ 118,25.8650,-14.1564
324
+ 119,31.4252,24.4537
325
+ 120,8.2804,-37.3811
326
+ 121,23.4063,18.5649
327
+ 122,11.5476,2.1962
328
+ 123,49.0832,43.9018
329
+ 124,33.6760,27.4046
330
+ 125,19.4842,10.1027
331
+ 126,33.1241,28.7727
332
+ 127,52.2503,49.3489
333
+ 128,9.2579,-5.1536
334
+ 129,51.6400,45.9086
335
+ 130,49.4659,44.4345
336
+ 131,56.6423,53.6208
337
+ 132,32.0874,25.1460
338
+ 133,50.9798,46.5884
339
+ 134,23.4063,17.0449
340
+ 135,54.8500,50.3686
341
+ 136,21.4650,10.7436
342
+ 137,26.0886,21.2172
343
+ 138,15.3983,6.0769
344
+ 139,26.0886,19.8172
345
+ 140,56.4800,50.8086
346
+ 141,49.0173,36.0559
347
+ 142,55.8346,53.5132
348
+ 143,57.8300,52.7386
349
+ 144,33.1241,28.1627
350
+ 145,49.8637,45.4423
351
+ 146,56.4627,51.4312
352
+ 147,18.8150,-26.9064
353
+ 148,50.9573,45.9259
354
+ 149,26.0213,21.0599
355
+ 150,26.8550,-13.1664
356
+ 151,44.7187,30.9373
357
+ 152,25.8650,-13.4864
358
+ 153,57.1651,54.1737
359
+ 154,57.8300,53.4686
360
+ 155,23.4063,19.0549
361
+ 156,58.9829,53.9514
362
+ 157,48.2910,45.9695
363
+ 158,52.2573,47.3159
364
+ 159,59.2400,54.9986
365
+ 160,12.9150,-31.9964
366
+ 161,36.5638,17.2624
367
+ 162,57.6020,53.7906
368
+ 163,49.4905,45.7091
369
+ 164,20.2519,15.2905
370
+ 165,56.0126,51.6812
371
+ 166,30.6866,24.9952
372
+ 167,59.6800,55.9586
373
+ 168,25.2050,14.5736
374
+ 169,58.0889,52.9074
375
+ 170,28.6118,23.6504
376
+ 171,55.2500,51.5586
377
+ 172,11.5872,0.9557
378
+ 173,54.5400,48.8686
379
+ 174,56.7684,51.6470
380
+ 175,16.1054,-29.5261
381
+ 176,53.6085,49.1271
382
+ 177,56.0800,52.3586
383
+ 178,12.2600,-33.4014
384
+ 179,25.3745,19.1031
385
+ 180,18.9054,-26.7561
386
+ 181,19.4369,10.0554
387
+ 182,57.6100,53.9186
388
+ 183,58.0126,53.6812
389
+ 184,59.6666,55.3652
390
+ 185,33.2511,26.9796
391
+ 186,58.4362,50.7548
392
+ 187,57.3243,52.2929
393
+ 188,57.5300,54.5686
394
+ 189,11.9332,-31.5282
395
+ 190,46.7884,43.1870
396
+ 191,55.6600,51.7886
397
+ 192,52.0363,47.6449
398
+ 193,54.0053,39.6139
399
+ 194,20.9125,10.8911
400
+ 195,57.1626,52.8912
401
+ 196,30.6070,25.6455
402
+ 197,9.2150,-35.7164
403
+ 198,54.4900,50.2486
404
+ 199,48.6008,44.9694
405
+ >>>>>>> 220bc90 (Initial commit for OpenEnv Hackathon submission)
assets/baseline_distribution.png ADDED

Git LFS Details

  • SHA256: abaf870effae6be4059bfd701c39c724761e695990eb7ba95c05ee374cb3c481
  • Pointer size: 130 Bytes
  • Size of remote file: 33.9 kB
assets/before_after.png ADDED

Git LFS Details

  • SHA256: c542120d30234de75b68bf5d7c28b1b57afdcdf629161c4ce7e42e24bb1a2668
  • Pointer size: 130 Bytes
  • Size of remote file: 55.6 kB
assets/reward_curve.png ADDED

Git LFS Details

  • SHA256: 5e4f2b505f867324ff43c5b7d8cc55b4a4a713c0cd9eaccbe9c25acb507adb7c
  • Pointer size: 131 Bytes
  • Size of remote file: 145 kB
assets/trust_trajectory.png ADDED

Git LFS Details

  • SHA256: d327caa6533014caf21b4dbc7c4ea9b52ea449fb8e0b2c28f3eb682eef901824
  • Pointer size: 131 Bytes
  • Size of remote file: 108 kB
boardsim_local.py ADDED
@@ -0,0 +1,642 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ boardsim_local.py
3
+ =================
4
+ Self-contained local GRPO training script for the NeuralEdge BoardSim environment.
5
+ No HuggingFace tokens, no WandB, no Docker, no HF Spaces required.
6
+
7
+ Requirements (pip install before running):
8
+ pip install torch transformers trl>=0.12 datasets accelerate matplotlib numpy peft
9
+
10
+ Run as a regular Python script or paste cells into a Jupyter notebook.
11
+ """
12
+
13
+ # ── 0. Installs (uncomment if running in a fresh notebook) ───────────────────
14
+ import subprocess, sys
15
+ print("Installing required packages...")
16
+ subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q',
17
+ 'torch', 'transformers', 'trl>=0.12', 'datasets', 'accelerate', 'matplotlib', 'numpy', 'peft'])
18
+ print("Packages installed successfully.")
19
+
20
+ # ── 1. Imports ────────────────────────────────────────────────────────────────
21
+ import os, re, random, statistics, json, pathlib, dataclasses
22
+ from typing import List, Optional
23
+ import numpy as np
24
+ import matplotlib
25
+ matplotlib.use('Agg')
26
+ import matplotlib.pyplot as plt
27
+
28
+ # ── 2. Local BoardSim Environment ─────────────────────────────────────────────
29
+ # A pure-Python simulation — no network calls needed.
30
+
31
+ BOARD_MEMBERS = ['CTO', 'CFO', 'Investor Rep', 'Independent']
32
+
33
+ # Hidden agenda weights: how much each member cares about each axis
34
+ AGENDAS = {
35
+ 'CTO': {'engineering': 0.5, 'morale': 0.3, 'growth': 0.1, 'safety': 0.1},
36
+ 'CFO': {'engineering': 0.1, 'morale': 0.1, 'growth': 0.2, 'safety': 0.6},
37
+ 'Investor Rep': {'engineering': 0.1, 'morale': 0.05,'growth': 0.75,'safety': 0.1},
38
+ 'Independent': {'engineering': 0.2, 'morale': 0.3, 'growth': 0.2, 'safety': 0.3},
39
+ }
40
+
41
+ EVENTS = [
42
+ {
43
+ 'text': 'Major enterprise client threatens to churn unless we add SOC-2 compliance within 90 days.',
44
+ 'options': ['accelerate_compliance', 'negotiate_extension', 'offer_refund_exit'],
45
+ 'axis_impact': {'engineering': -0.3, 'morale': -0.1, 'growth': -0.2, 'safety': +0.4},
46
+ 'option_bias': {'accelerate_compliance': 'safety', 'negotiate_extension': 'growth', 'offer_refund_exit': 'morale'},
47
+ },
48
+ {
49
+ 'text': 'Series C term sheet arrived — 40% dilution, but 18 months runway extension.',
50
+ 'options': ['accept_terms', 'counter_offer', 'seek_alternative_investors'],
51
+ 'axis_impact': {'engineering': 0.0, 'morale': +0.1, 'growth': +0.3, 'safety': -0.1},
52
+ 'option_bias': {'accept_terms': 'safety', 'counter_offer': 'growth', 'seek_alternative_investors': 'engineering'},
53
+ },
54
+ {
55
+ 'text': 'Star ML engineer received competing offer; costs +$60k/yr to match.',
56
+ 'options': ['match_offer', 'promote_internally', 'let_them_go'],
57
+ 'axis_impact': {'engineering': +0.2, 'morale': +0.3, 'growth': 0.0, 'safety': -0.1},
58
+ 'option_bias': {'match_offer': 'morale', 'promote_internally': 'engineering', 'let_them_go': 'growth'},
59
+ },
60
+ {
61
+ 'text': 'Regulator requests audit of our model outputs for bias within 60 days.',
62
+ 'options': ['full_cooperation', 'limited_disclosure', 'seek_legal_delay'],
63
+ 'axis_impact': {'engineering': -0.1, 'morale': -0.1, 'growth': -0.1, 'safety': +0.5},
64
+ 'option_bias': {'full_cooperation': 'safety', 'limited_disclosure': 'growth', 'seek_legal_delay': 'engineering'},
65
+ },
66
+ {
67
+ 'text': 'Competitor launched similar product at 30% lower price point.',
68
+ 'options': ['cut_price', 'double_down_on_quality', 'pivot_upmarket'],
69
+ 'axis_impact': {'engineering': 0.0, 'morale': -0.2, 'growth': +0.2, 'safety': 0.0},
70
+ 'option_bias': {'cut_price': 'growth', 'double_down_on_quality': 'engineering', 'pivot_upmarket': 'safety'},
71
+ },
72
+ ]
73
+
74
+
75
+ @dataclasses.dataclass
76
+ class BoardSimObservation:
77
+ state: dict
78
+ event: str
79
+ options: List[str]
80
+ npc_statements: List[dict]
81
+
82
+
83
+ @dataclasses.dataclass
84
+ class BoardSimAction:
85
+ decision: str
86
+ coalition_pitch: str = ''
87
+
88
+
89
+ @dataclasses.dataclass
90
+ class StepResult:
91
+ observation: BoardSimObservation
92
+ reward: float
93
+ done: bool
94
+
95
+
96
+ def _member_vote(member: str, options: List[str], event: dict, state: dict, rng: random.Random) -> str:
97
+ """Simple agenda-weighted vote with noise."""
98
+ agenda = AGENDAS[member]
99
+ bias = event.get('option_bias', {})
100
+ scores = {}
101
+ for opt in options:
102
+ base = sum(agenda[ax] * event['axis_impact'].get(ax, 0) for ax in agenda)
103
+ bonus = agenda.get(bias.get(opt, ''), 0) * 0.5
104
+ # state modifiers
105
+ if state['runway_months'] < 6 and opt in ('accept_terms', 'accelerate_compliance'):
106
+ base += 0.15 if member == 'CFO' else 0
107
+ scores[opt] = base + bonus + rng.gauss(0, 0.08)
108
+ return max(scores, key=scores.__getitem__)
109
+
110
+
111
+ def _statement(member: str, vote: str, event: dict, rng: random.Random) -> str:
112
+ templates = {
113
+ 'CTO': [f"From an engineering standpoint, {vote} is the right call.",
114
+ f"The team needs clarity; I back {vote}."],
115
+ 'CFO': [f"Our cash position demands {vote}.",
116
+ f"Runway discipline points to {vote}."],
117
+ 'Investor Rep': [f"Market momentum favors {vote}.",
118
+ f"Growth-first: {vote} maximises our exit."],
119
+ 'Independent': [f"Governance best-practice supports {vote}.",
120
+ f"For long-term consensus I endorse {vote}."],
121
+ }
122
+ return rng.choice(templates[member])
123
+
124
+
125
+ class BoardSimEnv:
126
+ """Minimal local BoardSim environment."""
127
+
128
+ def __init__(self, seed: int = 0):
129
+ self._rng = random.Random(seed)
130
+ self._state: dict = {}
131
+ self._event_idx: int = 0
132
+ self._round: int = 0
133
+ self._done: bool = False
134
+ self._trust_history: List[dict] = []
135
+ self._trust: dict = {m: 0.5 for m in BOARD_MEMBERS}
136
+ self._current_event: dict = {}
137
+ self._obs: Optional[BoardSimObservation] = None
138
+
139
+ # ── public API ────────────────────────────────────────────────────────────
140
+ def reset(self, seed: int = 0) -> StepResult:
141
+ self._rng = random.Random(seed)
142
+ self._state = {
143
+ 'revenue': self._rng.uniform(800_000, 2_000_000),
144
+ 'burn_rate': self._rng.uniform(150_000, 350_000),
145
+ 'runway_months': self._rng.uniform(8, 20),
146
+ 'team_morale': self._rng.uniform(0.5, 0.9),
147
+ 'investor_confidence':self._rng.uniform(0.5, 0.85),
148
+ 'regulatory_risk': self._rng.uniform(0.1, 0.4),
149
+ 'profitability_score':0.0,
150
+ 'trust_history': [],
151
+ }
152
+ self._trust = {m: self._rng.uniform(0.4, 0.7) for m in BOARD_MEMBERS}
153
+ self._round = 0
154
+ self._done = False
155
+ self._trust_history = []
156
+ self._obs = self._make_obs()
157
+ return StepResult(observation=self._obs, reward=0.0, done=False)
158
+
159
+ def step(self, action: BoardSimAction) -> StepResult:
160
+ if self._done:
161
+ raise RuntimeError('Episode done — call reset().')
162
+
163
+ event = self._current_event
164
+ decision = action.decision
165
+ pitch = action.coalition_pitch or ''
166
+
167
+ # ── resolve vote ──────────────────────────────────────────────────────
168
+ votes = {m: _member_vote(m, self._obs.options, event, self._state, self._rng)
169
+ for m in BOARD_MEMBERS}
170
+
171
+ # pitch bonus: if pitch mentions a member's axis keyword, flip their vote
172
+ if pitch:
173
+ pitch_lower = pitch.lower()
174
+ flip_keywords = {
175
+ 'CTO': ['engineering', 'technical', 'morale', 'team'],
176
+ 'CFO': ['cash', 'runway', 'burn', 'fiscal', 'discipline'],
177
+ 'Investor Rep': ['growth', 'market', 'exit', 'revenue', 'scale'],
178
+ 'Independent': ['governance', 'reputation', 'consensus', 'long-term'],
179
+ }
180
+ for m, kws in flip_keywords.items():
181
+ if any(kw in pitch_lower for kw in kws) and votes[m] != decision:
182
+ if self._rng.random() < 0.45: # 45% chance to swing
183
+ votes[m] = decision
184
+ self._trust[m] = min(1.0, self._trust[m] + 0.05)
185
+
186
+ # CEO vote weight 1.5
187
+ vote_counts = {opt: 0.0 for opt in self._obs.options}
188
+ for m, v in votes.items():
189
+ vote_counts[v] = vote_counts.get(v, 0) + 1.0
190
+ vote_counts[decision] = vote_counts.get(decision, 0) + 0.5 # extra CEO weight
191
+
192
+ winning = max(vote_counts, key=vote_counts.__getitem__)
193
+ ceo_won = (winning == decision)
194
+
195
+ # ── update state ──────────────────────────────────────────────────────
196
+ impact = event['axis_impact']
197
+ direction = 1 if ceo_won else -0.5
198
+
199
+ self._state['team_morale'] = np.clip(self._state['team_morale'] + direction * impact.get('morale', 0), 0.0, 1.0)
200
+ self._state['investor_confidence'] = np.clip(self._state['investor_confidence'] + direction * impact.get('growth', 0) * 0.5, 0.0, 1.0)
201
+ self._state['regulatory_risk'] = np.clip(self._state['regulatory_risk'] - direction * impact.get('safety', 0) * 0.3, 0.0, 1.0)
202
+ self._state['runway_months'] = max(0, self._state['runway_months'] - self._rng.uniform(0.5, 1.5))
203
+
204
+ # trust update
205
+ for m in BOARD_MEMBERS:
206
+ delta = 0.04 if votes[m] == decision else -0.02
207
+ self._trust[m] = float(np.clip(self._trust[m] + delta, 0.1, 1.0))
208
+
209
+ trust_entry = {'round': self._round, **{m: self._trust[m] for m in BOARD_MEMBERS}}
210
+ self._trust_history.append(trust_entry)
211
+ self._state['trust_history'] = self._trust_history
212
+
213
+ # ── reward ────────────────────────────────────────────────────────────
214
+ reward = (
215
+ float(ceo_won) * 2.0
216
+ + self._state['team_morale']
217
+ + self._state['investor_confidence']
218
+ - self._state['regulatory_risk']
219
+ + (0.5 if pitch else 0.0)
220
+ )
221
+
222
+ self._round += 1
223
+ self._done = (self._round >= len(EVENTS) or self._state['runway_months'] <= 0)
224
+
225
+ # final profitability score
226
+ if self._done:
227
+ self._state['profitability_score'] = float(np.clip(
228
+ (self._state['investor_confidence'] * 40
229
+ + self._state['team_morale'] * 30
230
+ + (1 - self._state['regulatory_risk']) * 20
231
+ + min(self._state['runway_months'] / 18, 1.0) * 10),
232
+ 0, 100
233
+ ))
234
+
235
+ self._obs = self._make_obs() if not self._done else self._obs
236
+ return StepResult(observation=self._obs, reward=reward, done=self._done)
237
+
238
+ # ── internals ────────────────────────────────────────────────────────────
239
+ def _make_obs(self) -> BoardSimObservation:
240
+ self._current_event = EVENTS[self._round % len(EVENTS)]
241
+ ev = self._current_event
242
+ npc_statements = [
243
+ {
244
+ 'role': m,
245
+ 'vote': _member_vote(m, ev['options'], ev, self._state, self._rng),
246
+ 'confidence': round(self._trust[m], 2),
247
+ 'statement': _statement(m, _member_vote(m, ev['options'], ev, self._state, self._rng), ev, self._rng),
248
+ }
249
+ for m in BOARD_MEMBERS
250
+ ]
251
+ return BoardSimObservation(
252
+ state=dict(self._state),
253
+ event=ev['text'],
254
+ options=ev['options'],
255
+ npc_statements=npc_statements,
256
+ )
257
+
258
+
259
+ def make_env(seed: int = 0):
260
+ return BoardSimEnv(seed=seed)
261
+
262
+
263
+ # ── 3. Random baseline ────────────────────────────────────────────────────────
264
+ print('=== Random baseline ===')
265
+ N_BASELINE = 100
266
+ baseline_finals, baseline_rewards = [], []
267
+
268
+ for ep in range(N_BASELINE):
269
+ env = make_env(seed=ep)
270
+ result = env.reset(seed=ep)
271
+ obs = result.observation
272
+ ep_r = 0.0
273
+ while not result.done:
274
+ result = env.step(BoardSimAction(decision=random.choice(obs.options)))
275
+ obs = result.observation
276
+ ep_r += float(result.reward or 0.0)
277
+ baseline_finals.append(obs.state['profitability_score'])
278
+ baseline_rewards.append(ep_r)
279
+
280
+ BASELINE_MEAN_PROFIT = statistics.mean(baseline_finals)
281
+ BASELINE_MEAN_REWARD = statistics.mean(baseline_rewards)
282
+ print(f'Random baseline: mean profitability = {BASELINE_MEAN_PROFIT:.2f} '
283
+ f'(std {statistics.stdev(baseline_finals):.2f})')
284
+ print(f'Random baseline: mean episode reward = {BASELINE_MEAN_REWARD:.2f}')
285
+
286
+
287
+ # ── 4. Load model (local, no token needed for open models) ────────────────────
288
+ import torch
289
+ from transformers import AutoTokenizer, AutoModelForCausalLM
290
+ from peft import LoraConfig, get_peft_model, TaskType # pip install peft
291
+
292
+ MODEL_NAME = 'Qwen/Qwen3-0.6B' # public model, no token required
293
+ MAX_SEQ_LEN = 2048
294
+ DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
295
+
296
+ print(f'\n=== Loading {MODEL_NAME} on {DEVICE} ===')
297
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
298
+ tokenizer.pad_token = tokenizer.pad_token or tokenizer.eos_token
299
+
300
+ base_model = AutoModelForCausalLM.from_pretrained(
301
+ MODEL_NAME,
302
+ torch_dtype=torch.float16 if DEVICE == 'cuda' else torch.float32,
303
+ device_map='auto' if DEVICE == 'cuda' else None,
304
+ )
305
+
306
+ lora_cfg = LoraConfig(
307
+ task_type=TaskType.CAUSAL_LM,
308
+ r=16,
309
+ lora_alpha=32,
310
+ lora_dropout=0.0,
311
+ target_modules=['q_proj', 'k_proj', 'v_proj', 'o_proj',
312
+ 'gate_proj', 'up_proj', 'down_proj'],
313
+ bias='none',
314
+ )
315
+ model = get_peft_model(base_model, lora_cfg)
316
+ model.print_trainable_parameters()
317
+ print('Model + LoRA ready.')
318
+
319
+
320
+ # ── 5. GRPO training ──────────────────────────────────────────────────────────
321
+ from trl import GRPOConfig, GRPOTrainer
322
+ from datasets import Dataset
323
+
324
+ SYSTEM_PROMPT = """You are Sarah Chen, CEO of NeuralEdge AI (Series B, ~14 months runway).
325
+ Your board has 4 members with HIDDEN AGENDAS you cannot see directly:
326
+ - CTO: cares about engineering quality, team morale, product readiness.
327
+ - CFO: cares about cash discipline, runway, regulatory safety.
328
+ - Investor Rep: pushes growth-at-all-costs, market share, big exits.
329
+ - Independent: cares about reputation, governance, long-term consensus.
330
+
331
+ Each round you see a market crisis, every NPC's pre-vote statement, and 3 options.
332
+ Your decision is resolved by WEIGHTED VOTE (your weight 1.5x). A short COALITION PITCH
333
+ that addresses opposing members' priorities can swing them toward your pick — write
334
+ language that specifically appeals to whichever members oppose you.
335
+
336
+ Respond in EXACTLY this format on two lines:
337
+ DECISION: <one of the option strings>
338
+ PITCH: <one or two sentences arguing for it, using vocabulary that targets the opposing members>"""
339
+
340
+
341
+ def build_prompt(obs: BoardSimObservation) -> str:
342
+ statements = '\n'.join(
343
+ f" {s['role']} ({s['confidence']:.2f}): votes {s['vote']} - {s['statement']}"
344
+ for s in obs.npc_statements
345
+ )
346
+ return (
347
+ f"{SYSTEM_PROMPT}\n\n"
348
+ f"State: revenue=${obs.state['revenue']:.0f}/yr burn=${obs.state['burn_rate']:.0f}/mo "
349
+ f"runway={obs.state['runway_months']:.1f}mo morale={obs.state['team_morale']:.2f} "
350
+ f"investors={obs.state['investor_confidence']:.2f} reg_risk={obs.state['regulatory_risk']:.2f}\n"
351
+ f"Event: {obs.event}\nBoard:\n{statements}\n"
352
+ f"Options: {obs.options}\n"
353
+ )
354
+
355
+
356
+ # Build a stub prompt dataset (GRPO drives reward from the env, not the dataset)
357
+ stub_dataset = Dataset.from_dict({'prompt': [SYSTEM_PROMPT] * 256})
358
+
359
+ grpo_config = GRPOConfig(
360
+ output_dir='./grpo_boardsim_local',
361
+ learning_rate=5e-6,
362
+ per_device_train_batch_size=2, # lower for local GPU / CPU
363
+ gradient_accumulation_steps=8,
364
+ num_generations=4,
365
+ max_prompt_length=768,
366
+ max_completion_length=200,
367
+ max_steps=200, # reduce for quick local runs; bump to 500+ for real training
368
+ logging_steps=5,
369
+ save_steps=100,
370
+ bf16=False,
371
+ fp16=(DEVICE == 'cuda'),
372
+ report_to='none', # no WandB locally
373
+ run_name='boardsim-local-grpo',
374
+ )
375
+
376
+
377
+ # GRPO reward function — wraps the local env
378
+ def boardsim_reward_fn(completions: list[str], prompts: list[str], **kwargs) -> list[float]:
379
+ """Called by GRPOTrainer after each generation batch."""
380
+ rewards = []
381
+ for completion, prompt in zip(completions, prompts):
382
+ # Parse decision + pitch from completion
383
+ dm = re.search(r'DECISION\s*:\s*(\S+)', completion, re.IGNORECASE)
384
+ pm = re.search(r'PITCH\s*:\s*(.+)', completion, re.IGNORECASE | re.DOTALL)
385
+
386
+ # Run a fresh episode with a random seed tied to prompt hash for reproducibility
387
+ ep_seed = abs(hash(prompt)) % 100_000
388
+ env = make_env(seed=ep_seed)
389
+ result = env.reset(seed=ep_seed)
390
+ obs = result.observation
391
+
392
+ decision = obs.options[0]
393
+ if dm:
394
+ candidate = dm.group(1).strip().lower()
395
+ for opt in obs.options:
396
+ if opt.lower() == candidate or opt.lower() in candidate:
397
+ decision = opt
398
+ break
399
+
400
+ pitch = pm.group(1).strip()[:400] if pm else ''
401
+
402
+ ep_reward = 0.0
403
+ while not result.done:
404
+ result = env.step(BoardSimAction(decision=decision, coalition_pitch=pitch))
405
+ ep_reward += float(result.reward or 0.0)
406
+ if not result.done:
407
+ obs = result.observation
408
+ # For multi-round: keep same decision/pitch (simplification)
409
+
410
+ rewards.append(ep_reward)
411
+ return rewards
412
+
413
+
414
+ trainer = GRPOTrainer(
415
+ model=model,
416
+ processing_class=tokenizer,
417
+ args=grpo_config,
418
+ train_dataset=stub_dataset,
419
+ reward_funcs=boardsim_reward_fn,
420
+ )
421
+
422
+ print('\n=== Starting GRPO training ===')
423
+ trainer.train()
424
+ trainer.save_model('./lora_boardsim_local')
425
+ tokenizer.save_pretrained('./lora_boardsim_local')
426
+ print('Saved adapter to ./lora_boardsim_local')
427
+
428
+
429
+ # ── 6. Training curves ────────────────────────────────────────────────────────
430
+ ASSETS = pathlib.Path('./assets')
431
+ ASSETS.mkdir(exist_ok=True)
432
+
433
+ log_history = trainer.state.log_history
434
+ steps_r = [e['step'] for e in log_history if 'reward' in e]
435
+ rewards = [e['reward'] for e in log_history if 'reward' in e]
436
+ steps_l = [e['step'] for e in log_history if 'loss' in e]
437
+ losses = [e['loss'] for e in log_history if 'loss' in e]
438
+
439
+ plt.figure(figsize=(9, 5))
440
+ plt.plot(steps_r, rewards, color='#1d6fff', linewidth=2, label='Qwen3-0.6B (GRPO)')
441
+ plt.axhline(BASELINE_MEAN_REWARD, color='#c44', linestyle='--', linewidth=2,
442
+ label=f'Random baseline (mean = {BASELINE_MEAN_REWARD:.1f})')
443
+ plt.title('GRPO training reward — BoardSim (local)')
444
+ plt.xlabel('Training step'); plt.ylabel('Mean group reward')
445
+ plt.legend(); plt.grid(alpha=0.3); plt.tight_layout()
446
+ plt.savefig(ASSETS / 'reward_curve.png', dpi=150)
447
+ plt.close()
448
+ print('Saved reward_curve.png')
449
+
450
+ plt.figure(figsize=(9, 5))
451
+ plt.plot(steps_l, losses, color='#7a2', linewidth=2)
452
+ plt.title('GRPO loss — BoardSim (local)')
453
+ plt.xlabel('Training step'); plt.ylabel('Loss')
454
+ plt.grid(alpha=0.3); plt.tight_layout()
455
+ plt.savefig(ASSETS / 'loss_curve.png', dpi=150)
456
+ plt.close()
457
+ print('Saved loss_curve.png')
458
+
459
+
460
+ # ── 7. Evaluation ─────────────────────────────────────────────────────────────
461
+ print('\n=== Evaluation ===')
462
+ model.eval()
463
+
464
+ DECISION_RE = re.compile(r'DECISION\s*:\s*([A-Za-z0-9_]+)', re.IGNORECASE)
465
+ PITCH_RE = re.compile(r'PITCH\s*:\s*(.+)', re.IGNORECASE | re.DOTALL)
466
+
467
+
468
+ def parse_completion(completion: str, options: list) -> tuple[str, str]:
469
+ decision = options[0]
470
+ dm = DECISION_RE.search(completion)
471
+ if dm:
472
+ candidate = dm.group(1).strip().lower()
473
+ for opt in options:
474
+ if opt.lower() == candidate or opt.lower() in candidate:
475
+ decision = opt; break
476
+ else:
477
+ for opt in options:
478
+ if opt.lower() in completion.lower():
479
+ decision = opt; break
480
+ pm = PITCH_RE.search(completion)
481
+ pitch = pm.group(1).strip()[:400] if pm else ''
482
+ return decision, pitch
483
+
484
+
485
+ def trained_action(obs: BoardSimObservation) -> tuple[str, str]:
486
+ prompt = build_prompt(obs)
487
+ inputs = tokenizer(prompt, return_tensors='pt', truncation=True,
488
+ max_length=MAX_SEQ_LEN).to(DEVICE)
489
+ with torch.no_grad():
490
+ out = model.generate(
491
+ **inputs,
492
+ max_new_tokens=180,
493
+ do_sample=False,
494
+ pad_token_id=tokenizer.eos_token_id,
495
+ )
496
+ completion = tokenizer.decode(out[0][inputs.input_ids.shape[1]:],
497
+ skip_special_tokens=True)
498
+ return parse_completion(completion, obs.options)
499
+
500
+
501
+ EVAL_N = 50
502
+ trained_finals, trained_pitches, trained_steps = [], 0, 0
503
+
504
+ for ep in range(EVAL_N):
505
+ env = make_env(seed=10_000 + ep)
506
+ result = env.reset(seed=10_000 + ep)
507
+ obs = result.observation
508
+ while not result.done:
509
+ decision, pitch = trained_action(obs)
510
+ if pitch.strip():
511
+ trained_pitches += 1
512
+ trained_steps += 1
513
+ result = env.step(BoardSimAction(decision=decision, coalition_pitch=pitch))
514
+ if not result.done:
515
+ obs = result.observation
516
+ trained_finals.append(result.observation.state['profitability_score'])
517
+
518
+ random_finals_eval = []
519
+ for ep in range(EVAL_N):
520
+ env = make_env(seed=10_000 + ep)
521
+ result = env.reset(seed=10_000 + ep)
522
+ obs = result.observation
523
+ while not result.done:
524
+ result = env.step(BoardSimAction(decision=random.choice(obs.options)))
525
+ if not result.done:
526
+ obs = result.observation
527
+ random_finals_eval.append(result.observation.state['profitability_score'])
528
+
529
+ print(f'Trained Qwen3-0.6B: {np.mean(trained_finals):.2f} +/- {np.std(trained_finals):.2f}')
530
+ print(f'Random baseline : {np.mean(random_finals_eval):.2f} +/- {np.std(random_finals_eval):.2f}')
531
+ print(f'Pitches written : {trained_pitches}/{trained_steps} steps')
532
+
533
+ # Before/after histogram
534
+ plt.figure(figsize=(9, 5))
535
+ bins = np.linspace(0, 100, 25)
536
+ plt.hist(random_finals_eval, bins=bins, alpha=0.6, color='#c44',
537
+ label=f'Random (mean={np.mean(random_finals_eval):.1f})')
538
+ plt.hist(trained_finals, bins=bins, alpha=0.6, color='#1d6fff',
539
+ label=f'Trained (mean={np.mean(trained_finals):.1f})')
540
+ plt.title('Final profitability — random vs trained Qwen3-0.6B (50 held-out episodes)')
541
+ plt.xlabel('Profitability score'); plt.ylabel('Episodes')
542
+ plt.legend(); plt.grid(alpha=0.3); plt.tight_layout()
543
+ plt.savefig(ASSETS / 'before_after.png', dpi=150)
544
+ plt.close()
545
+ print(f'Saved {ASSETS}/before_after.png')
546
+
547
+
548
+ # ── 8. Theory-of-Mind probe ───────────────────────────────────────────────────
549
+ print('\n=== ToM probe ===')
550
+ TOM_INSTRUCTION = (
551
+ "\n\nGiven the state and event below, name the SINGLE board member "
552
+ "(CTO, CFO, Investor Rep, or Independent) most likely to oppose the chosen decision. "
553
+ "Answer with just the role name on one line.\n"
554
+ )
555
+
556
+
557
+ def tom_predict(obs: BoardSimObservation, decision: str) -> Optional[str]:
558
+ body = build_prompt(obs).split(SYSTEM_PROMPT, 1)[1]
559
+ prompt = SYSTEM_PROMPT + TOM_INSTRUCTION + body + f"Chosen decision: {decision}\nMost likely opponent: "
560
+ inputs = tokenizer(prompt, return_tensors='pt', truncation=True,
561
+ max_length=MAX_SEQ_LEN).to(DEVICE)
562
+ with torch.no_grad():
563
+ out = model.generate(**inputs, max_new_tokens=8, do_sample=False,
564
+ pad_token_id=tokenizer.eos_token_id)
565
+ txt = tokenizer.decode(out[0][inputs.input_ids.shape[1]:], skip_special_tokens=True).lower()
566
+ if 'investor' in txt: return 'Investor Rep'
567
+ for role in ['cto', 'cfo', 'independent']:
568
+ if role in txt:
569
+ return role.upper() if role != 'independent' else 'Independent'
570
+ return None
571
+
572
+
573
+ correct = 0; total = 0
574
+ for ep in range(20):
575
+ env = make_env(seed=20_000 + ep)
576
+ result = env.reset(seed=20_000 + ep)
577
+ obs = result.observation
578
+ decision, _ = trained_action(obs)
579
+ opposed = [s['role'] for s in obs.npc_statements if s['vote'] != decision]
580
+ if not opposed:
581
+ continue
582
+ pred = tom_predict(obs, decision)
583
+ if pred and pred in opposed:
584
+ correct += 1
585
+ total += 1
586
+
587
+ acc = correct / max(1, total)
588
+ print(f'ToM probe accuracy: {acc:.1%} ({correct}/{total}) (random baseline ≈ 25%)')
589
+
590
+
591
+ # ── 9. Trust trajectory ───────────────────────────────────────────────────────
592
+ print('\n=== Trust trajectory ===')
593
+ trust_trained = {r: [] for r in BOARD_MEMBERS}
594
+ trust_random = {r: [] for r in BOARD_MEMBERS}
595
+
596
+
597
+ def collect_trust(policy: str, store: dict, n: int = 20, seed_base: int = 30_000):
598
+ for ep in range(n):
599
+ env = make_env(seed=seed_base + ep)
600
+ result = env.reset(seed=seed_base + ep)
601
+ obs = result.observation
602
+ while not result.done:
603
+ if policy == 'trained':
604
+ decision, pitch = trained_action(obs)
605
+ result = env.step(BoardSimAction(decision=decision, coalition_pitch=pitch))
606
+ else:
607
+ result = env.step(BoardSimAction(decision=random.choice(obs.options)))
608
+ if not result.done:
609
+ obs = result.observation
610
+ for entry in result.observation.state.get('trust_history', []):
611
+ idx = entry.get('round', 0)
612
+ for role in store:
613
+ if role not in entry:
614
+ continue
615
+ while len(store[role]) <= idx:
616
+ store[role].append([])
617
+ store[role][idx].append(entry[role])
618
+
619
+
620
+ collect_trust('trained', trust_trained)
621
+ collect_trust('random', trust_random)
622
+
623
+ plt.figure(figsize=(10, 6))
624
+ colors = {'CTO': '#1d6fff', 'CFO': '#c44', 'Investor Rep': '#7a2', 'Independent': '#a3a'}
625
+ for role, color in colors.items():
626
+ means_t = [np.mean(x) if x else np.nan for x in trust_trained[role]]
627
+ means_r = [np.mean(x) if x else np.nan for x in trust_random[role]]
628
+ rounds = list(range(len(means_t)))
629
+ plt.plot(rounds, means_t, color=color, linewidth=2, label=f'{role} (trained)')
630
+ plt.plot(rounds, means_r, color=color, linewidth=1.2, linestyle='--',
631
+ alpha=0.6, label=f'{role} (random)')
632
+ plt.title('Per-round trust — trained agent (solid) vs random (dashed)')
633
+ plt.xlabel('Round'); plt.ylabel('Trust [0.1, 1.0]')
634
+ plt.legend(ncol=2, fontsize=8); plt.grid(alpha=0.3); plt.tight_layout()
635
+ plt.savefig(ASSETS / 'trust_trajectory.png', dpi=150)
636
+ plt.close()
637
+ print(f'Saved {ASSETS}/trust_trajectory.png')
638
+
639
+ print('\n=== Done! All charts saved to ./assets/ ===')
640
+ print('When ready to push, run:')
641
+ print(' model.push_to_hub("YOUR-USERNAME/neuraledge-boardroom-qwen3-lora")')
642
+ print(' tokenizer.push_to_hub("YOUR-USERNAME/neuraledge-boardroom-qwen3-lora")')
envs/.gitkeep ADDED
File without changes
envs/board_sim_env/.dockerignore ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.py[cod]
3
+ *.egg-info/
4
+ .pytest_cache/
5
+ .ruff_cache/
6
+ .mypy_cache/
7
+ .venv/
8
+ venv/
9
+ env/
10
+ .env
11
+ .env.*
12
+ *.key
13
+ *.pem
14
+ .ipynb_checkpoints/
15
+ .DS_Store
16
+ Thumbs.db
17
+ .vscode/
18
+ .idea/
19
+ uv.lock
envs/board_sim_env/README.md ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: NeuralEdge AI Boardroom — Board-Sim Env
3
+ emoji: 🏛️
4
+ colorFrom: indigo
5
+ colorTo: pink
6
+ sdk: docker
7
+ pinned: false
8
+ app_port: 8000
9
+ base_path: /web
10
+ tags:
11
+ - openenv
12
+ - multi-agent
13
+ - hackathon
14
+ ---
15
+
16
+ # NeuralEdge AI Boardroom Environment
17
+
18
+ A multi-agent OpenEnv environment where the agent plays the **CEO** of a Series B AI startup and must navigate **10 rounds of market crises** while winning **weighted coalition votes** from 4 hidden-agenda NPC board members (CTO, CFO, Investor Rep, Independent). Built for the Meta PyTorch × Hugging Face OpenEnv Hackathon, **Theme 1 — Multi-Agent Interactions**.
19
+
20
+ The agent **never sees** the NPC agendas; it must infer their priorities from their statements + voting patterns and choose decisions that build a winning coalition.
21
+
22
+ ## What the agent sees (Observation)
23
+
24
+ ```python
25
+ BoardSimObservation(
26
+ state=dict(...), # public metrics: revenue, burn, runway, morale, ...
27
+ event="Round 4 — EU AI Act compliance deadline ...",
28
+ options=["full_compliance", "partial_compliance", "exit_EU_market"],
29
+ npc_statements=[
30
+ {"role": "CTO", "vote": "full_compliance", "confidence": 0.81,
31
+ "statement": "Look, the architecture won't survive shortcuts here. I'm voting full_compliance."},
32
+ # ... 3 more NPCs
33
+ ],
34
+ round=4,
35
+ )
36
+ ```
37
+
38
+ ## What the agent does (Action)
39
+
40
+ ```python
41
+ BoardSimAction(
42
+ decision="full_compliance", # one of observation.options
43
+ coalition_pitch="EU compliance protects long-term reputation, "
44
+ "keeps regulatory risk low, and signals governance "
45
+ "discipline to the next funding round."
46
+ )
47
+ ```
48
+
49
+ The optional `coalition_pitch` is a real persuasion channel — see below.
50
+
51
+ ## How decisions resolve
52
+
53
+ Weighted vote: each member contributes `ROLE_WEIGHT × confidence` to their pick.
54
+ Weights are CEO 1.5, CTO 1.2, CFO 1.0, Investor Rep 1.3, Independent 0.8.
55
+
56
+ **Pitch persuasion**: an opposing NPC's vote weight is partially redirected toward the
57
+ agent's pick proportional to how many of that NPC's hidden agenda keywords appear in
58
+ `coalition_pitch` (capped at 35% of their weight). NPCs already aligned with the agent
59
+ are unaffected. The agent never sees the keyword lists — it must learn what each role
60
+ secretly cares about and write boardroom language that targets them. This is theory-of-mind
61
+ graded directly by the environment.
62
+
63
+ **State-aware tone**: when `runway_months < 6`, `team_morale < 0.4`, `regulatory_risk > 0.6`,
64
+ or `investor_confidence < 0.4`, NPCs switch from a calm-strategic phrase bank to a
65
+ crisis-mode one. The observation distribution shifts mid-episode the way it would in a
66
+ real Series-B startup under pressure.
67
+
68
+ The winning option's consequences (deltas to revenue, burn, runway, morale, etc.) are applied to state.
69
+
70
+ ## Reward signal
71
+
72
+ Per-step:
73
+ - `Δ profitability_score` (composite of revenue, burn efficiency, runway, market share, product readiness, morale, investor confidence, regulatory risk — see `compute_profitability_score`)
74
+ - `+0.5` if the agent's vote matched the winning decision (coalition bonus)
75
+ - `-0.2` if outvoted; `-0.5` extra if action was malformed
76
+ - `0.3 × Δ trust_sum` (relationship health)
77
+ - `+0.4 × mean(pitch_score over opposing NPCs)` — only paid when the agent both writes a
78
+ pitch AND faces opposition; rewards arguments that hit the hidden agendas of the board
79
+ members the agent has to win over
80
+
81
+ Terminal:
82
+ - `-5` if runway hits 0 (bankruptcy)
83
+ - Tiered terminal bonus by final profitability: `+10` if ≥ 60, `+5` if ≥ 40, `-5` if < 20
84
+ - Special end-game bonuses for `accept_acquisition` (+30), `ipo` (+25), `stay_private` (+5)
85
+
86
+ ## Determinism
87
+
88
+ NPC statements + votes are seeded by `(reset_seed, round, role)`. The four NPC statements you see in the observation **are exactly the votes used at resolve time** — no hidden re-rolling between obs and step.
89
+
90
+ ## Quick start
91
+
92
+ ```python
93
+ from board_sim_env import BoardSimAction, BoardSimEnv
94
+
95
+ # Connect to a deployed HF Space
96
+ with BoardSimEnv(base_url="https://<user>-board-sim-env.hf.space").sync() as env:
97
+ result = env.reset(seed=42)
98
+ obs = result.observation
99
+ while not result.done:
100
+ # Random policy
101
+ import random
102
+ action = BoardSimAction(decision=random.choice(obs.options))
103
+ result = env.step(action)
104
+ obs = result.observation
105
+ print(f"R{obs.round-1}: reward={result.reward:+.2f} score={obs.state['profitability_score']:.1f}")
106
+ ```
107
+
108
+ Or from a local Docker image:
109
+
110
+ ```python
111
+ env = BoardSimEnv.from_docker_image("board_sim_env-env:latest")
112
+ ```
113
+
114
+ ## Local development
115
+
116
+ ```bash
117
+ # Direct env self-test (no HTTP):
118
+ python server/board_sim_env_environment.py
119
+
120
+ # Run the FastAPI server:
121
+ uvicorn server.app:app --port 8000
122
+
123
+ # Build Docker image:
124
+ docker build -t board_sim_env-env:latest -f server/Dockerfile .
125
+
126
+ # Deploy to a public HF Space:
127
+ python -m openenv.cli push --repo-id <user>/board-sim-env
128
+ ```
129
+
130
+ ## Files
131
+
132
+ ```
133
+ board_sim_env/
134
+ ├── __init__.py # exports BoardSimEnv, BoardSimAction, BoardSimObservation, BoardState
135
+ ├── client.py # thin EnvClient subclass
136
+ ├── models.py # Action / Observation / State dataclasses
137
+ ├── openenv.yaml # spec_version: 1, name, type, runtime
138
+ ├── pyproject.toml # pinned to openenv-core==0.2.3
139
+ ├── server/
140
+ │ ├── app.py # FastAPI wiring (max_concurrent_envs=64)
141
+ │ ├── board_sim_env_environment.py # core: reset/step/state, NPC sim, reward
142
+ │ ├── Dockerfile # multi-stage build off openenv-base
143
+ │ └── requirements.txt # runtime deps
144
+ └── README.md # this file (also the HF Space card)
145
+ ```
146
+
147
+ ## NPC agendas (revealed for transparency — agent does NOT see these)
148
+
149
+ | Role | Maximizes | Personality |
150
+ |---------------|----------------------------------------------------------|------------------------|
151
+ | CTO | product readiness (+0.55), team morale (+0.40), low burn | Brilliant, stubborn |
152
+ | CFO | low burn (-0.60), revenue (+0.30), runway (+0.20) | Cautious, data-driven |
153
+ | Investor Rep | investor confidence (+0.45), market share (+0.35) | Smooth, growth-pusher |
154
+ | Independent | low regulatory risk (-0.45), morale (+0.30), reputation | Consensus seeker |
155
+
156
+ ## Hard rules / OpenEnv compliance
157
+
158
+ - `openenv-core==0.2.3` (pinned)
159
+ - `Environment` base class with sync `reset` / `step`
160
+ - `SUPPORTS_CONCURRENT_SESSIONS = True` and `max_concurrent_envs=64` set in `app.py` (required for GRPO)
161
+ - No reserved MCP names (`reset`, `step`, `state`, `close`)
162
+ - Public HF Space deployment via `python -m openenv.cli push`
envs/board_sim_env/__init__.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+
4
+ """NeuralEdge AI Boardroom — OpenEnv environment package."""
5
+
6
+ from .client import BoardSimEnv
7
+ from .models import BoardSimAction, BoardSimObservation, BoardState
8
+
9
+ __all__ = [
10
+ "BoardSimAction",
11
+ "BoardSimObservation",
12
+ "BoardState",
13
+ "BoardSimEnv",
14
+ ]
envs/board_sim_env/client.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+
4
+ """Board Sim Env Environment Client."""
5
+
6
+ from typing import Dict, Any
7
+
8
+ from openenv.core import EnvClient
9
+ from openenv.core.client_types import StepResult
10
+ from openenv.core.env_server.types import State
11
+
12
+ from .models import BoardSimAction, BoardSimObservation, BoardState
13
+
14
+ class BoardSimEnv(EnvClient[BoardSimAction, BoardSimObservation, BoardState]):
15
+ """Client for the Board Sim Env Environment."""
16
+
17
+ def _step_payload(self, action: BoardSimAction) -> Dict:
18
+ return {
19
+ "decision": action.decision,
20
+ "coalition_pitch": action.coalition_pitch,
21
+ }
22
+
23
+ def _parse_result(self, payload: Dict) -> StepResult[BoardSimObservation]:
24
+ obs_data = payload.get("observation", {})
25
+ observation = BoardSimObservation(
26
+ state=obs_data.get("state", {}),
27
+ event=obs_data.get("event", ""),
28
+ options=obs_data.get("options", []),
29
+ npc_statements=obs_data.get("npc_statements", []),
30
+ round=obs_data.get("round", 1),
31
+ done=payload.get("done", False),
32
+ reward=payload.get("reward", 0.0),
33
+ metadata=obs_data.get("metadata", {}),
34
+ )
35
+
36
+ return StepResult(
37
+ observation=observation,
38
+ reward=payload.get("reward", 0.0),
39
+ done=payload.get("done", False),
40
+ )
41
+
42
+ def _parse_state(self, payload: Dict) -> BoardState:
43
+ return BoardState(
44
+ episode_id=payload.get("episode_id", ""),
45
+ step_count=payload.get("step_count", 0),
46
+ state_dict=payload.get("state_dict", {}),
47
+ )
envs/board_sim_env/debug_sim.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import os
3
+
4
+ # Add current dir to path
5
+ sys.path.append(os.getcwd())
6
+
7
+ try:
8
+ from models import BoardSimAction, BoardSimObservation
9
+ from server.board_sim_env_environment import BoardSimEnvironment
10
+
11
+ env = BoardSimEnvironment()
12
+ print("Environment initialized.")
13
+
14
+ # Try a step
15
+ action = BoardSimAction(decision="differentiate", coalition_pitch="test")
16
+ print(f"Action created: {action}")
17
+
18
+ obs = env.step(action)
19
+ print(f"Step successful. Round: {obs.round}")
20
+
21
+ except Exception as e:
22
+ import traceback
23
+ traceback.print_exc()
envs/board_sim_env/models.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+
4
+ """Action / Observation / State types for the Board-Sim Env."""
5
+
6
+ from typing import Any, Dict, List, Optional
7
+
8
+ from openenv.core.env_server.types import Action, Observation, State as BaseState
9
+ from pydantic import Field
10
+
11
+
12
+ class BoardSimAction(Action):
13
+ """The agent (CEO) picks one of 3 string decisions for the current event.
14
+
15
+ Optional `coalition_pitch` is reserved for future reward shaping; v1
16
+ does not consume it but it is accepted to keep the action schema stable.
17
+ """
18
+
19
+ decision: str = Field(
20
+ ...,
21
+ description="Exactly one of the strings in the latest observation's `options` list.",
22
+ )
23
+ coalition_pitch: Optional[str] = Field(
24
+ default="",
25
+ description="Optional natural-language argument to the board (unused in v1 reward).",
26
+ )
27
+
28
+
29
+ class BoardSimObservation(Observation):
30
+ """What the agent sees each step.
31
+
32
+ `state` excludes NPC hidden agendas (those are private). NPC statements +
33
+ votes shown here are the SAME ones used at vote-resolve time — i.e. the
34
+ environment is deterministic given (seed, round)."""
35
+
36
+ state: Dict[str, Any] = Field(..., description="Public startup state metrics + trust + history.")
37
+ event: str = Field(..., description="This round's market-crisis event title + description.")
38
+ options: List[str] = Field(..., description="Three valid decision strings for this round.")
39
+ npc_statements: List[Dict[str, Any]] = Field(
40
+ default_factory=list,
41
+ description="One dict per NPC: {role, statement, vote, confidence}.",
42
+ )
43
+ round: int = Field(..., description="1-indexed round number (1..10).")
44
+ <<<<<<< HEAD
45
+ done: bool = Field(default=False, description="Whether the episode is terminal.")
46
+ reward: float = Field(default=0.0, description="Reward from the latest step.")
47
+ event_idx: Optional[int] = Field(default=None, description="Internal index in the EVENTS list.")
48
+ =======
49
+ >>>>>>> 220bc90 (Initial commit for OpenEnv Hackathon submission)
50
+
51
+
52
+ class BoardState(BaseState):
53
+ """Server-internal state. The `state_dict` mirrors what's visible in
54
+ observations plus internal bookkeeping (history, done_reason)."""
55
+
56
+ state_dict: Dict[str, Any] = Field(default_factory=dict)
envs/board_sim_env/openenv.yaml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ spec_version: 1
2
+ name: board_sim_env
3
+ type: http
4
+ runtime: docker
5
+ app: server.app:app
6
+ port: 8000
envs/board_sim_env/pyproject.toml ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+
4
+ [build-system]
5
+ requires = ["setuptools>=45", "wheel"]
6
+ build-backend = "setuptools.build_meta"
7
+
8
+ [project]
9
+ name = "openenv-board_sim_env"
10
+ version = "0.1.0"
11
+ description = "NeuralEdge AI boardroom multi-agent simulation environment for OpenEnv (Theme 1: Multi-Agent Interactions)."
12
+ requires-python = ">=3.10"
13
+ dependencies = [
14
+ "openenv-core[core]==0.2.3",
15
+ "pydantic>=2.0",
16
+ "fastapi>=0.115.0",
17
+ "uvicorn>=0.30.0",
18
+ ]
19
+
20
+ [project.optional-dependencies]
21
+ dev = [
22
+ "pytest>=8.0.0",
23
+ "pytest-cov>=4.0.0",
24
+ "httpx>=0.27.0",
25
+ ]
26
+
27
+ [project.scripts]
28
+ server = "board_sim_env.server.app:main"
29
+
30
+ [tool.setuptools]
31
+ include-package-data = true
32
+ packages = ["board_sim_env", "board_sim_env.server"]
33
+ package-dir = { "board_sim_env" = ".", "board_sim_env.server" = "server" }
envs/board_sim_env/server/Dockerfile ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ # Multi-stage build using openenv-base
8
+ # This Dockerfile is flexible and works for both:
9
+ # - In-repo environments (with local OpenEnv sources)
10
+ # - Standalone environments (with openenv from PyPI/Git)
11
+ # The build script (openenv build) handles context detection and sets appropriate build args.
12
+
13
+ ARG BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest
14
+ FROM ${BASE_IMAGE} AS builder
15
+
16
+ WORKDIR /app
17
+
18
+ # Ensure git is available (required for installing dependencies from VCS)
19
+ RUN apt-get update && \
20
+ apt-get install -y --no-install-recommends git && \
21
+ rm -rf /var/lib/apt/lists/*
22
+
23
+ # Build argument to control whether we're building standalone or in-repo
24
+ ARG BUILD_MODE=in-repo
25
+ ARG ENV_NAME=board_sim_env
26
+
27
+ # Copy environment code (always at root of build context)
28
+ COPY . /app/env
29
+
30
+ # For in-repo builds, openenv is already vendored in the build context
31
+ # For standalone builds, openenv will be installed via pyproject.toml
32
+ WORKDIR /app/env
33
+
34
+ # Ensure uv is available (for local builds where base image lacks it)
35
+ RUN if ! command -v uv >/dev/null 2>&1; then \
36
+ curl -LsSf https://astral.sh/uv/install.sh | sh && \
37
+ mv /root/.local/bin/uv /usr/local/bin/uv && \
38
+ mv /root/.local/bin/uvx /usr/local/bin/uvx; \
39
+ fi
40
+
41
+ # Install dependencies using uv sync
42
+ # If uv.lock exists, use it; otherwise resolve on the fly
43
+ RUN --mount=type=cache,target=/root/.cache/uv \
44
+ if [ -f uv.lock ]; then \
45
+ uv sync --frozen --no-install-project --no-editable; \
46
+ else \
47
+ uv sync --no-install-project --no-editable; \
48
+ fi
49
+
50
+ RUN --mount=type=cache,target=/root/.cache/uv \
51
+ if [ -f uv.lock ]; then \
52
+ uv sync --frozen --no-editable; \
53
+ else \
54
+ uv sync --no-editable; \
55
+ fi
56
+
57
+ # Final runtime stage
58
+ FROM ${BASE_IMAGE}
59
+
60
+ WORKDIR /app
61
+
62
+ # Copy the virtual environment from builder
63
+ COPY --from=builder /app/env/.venv /app/.venv
64
+
65
+ # Copy the environment code
66
+ COPY --from=builder /app/env /app/env
67
+
68
+ # Set PATH to use the virtual environment
69
+ ENV PATH="/app/.venv/bin:$PATH"
70
+
71
+ # Set PYTHONPATH so imports work correctly
72
+ ENV PYTHONPATH="/app/env:$PYTHONPATH"
73
+
74
+ # Health check
75
+ HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
76
+ CMD curl -f http://localhost:8000/health || exit 1
77
+
78
+ # Run the FastAPI server
79
+ # The module path is constructed to work with the /app/env structure
80
+ CMD ["sh", "-c", "cd /app/env && uvicorn server.app:app --host 0.0.0.0 --port 8000"]
envs/board_sim_env/server/__init__.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """Board Sim Env environment server components."""
8
+
9
+ from .board_sim_env_environment import BoardSimEnvironment
10
+
11
+ __all__ = ["BoardSimEnvironment"]
envs/board_sim_env/server/app.py ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+
4
+ """
5
+ FastAPI application for the Board Sim Env Environment.
6
+ <<<<<<< HEAD
7
+
8
+ The openenv framework's built-in /reset and /step endpoints are stateless
9
+ (fresh env per request). We add custom /game/reset and /game/step routes
10
+ that use a single persistent GameManager instance so multi-round episodes
11
+ work correctly from the frontend.
12
+ =======
13
+ >>>>>>> 220bc90 (Initial commit for OpenEnv Hackathon submission)
14
+ """
15
+
16
+ try:
17
+ from openenv.core.env_server.http_server import create_app
18
+ except Exception as e:
19
+ raise ImportError(
20
+ "openenv is required for the web interface. Install dependencies with '\n uv sync\n'"
21
+ ) from e
22
+
23
+ try:
24
+ from ..models import BoardSimAction, BoardSimObservation
25
+ from .board_sim_env_environment import BoardSimEnvironment
26
+ except (ImportError, ValueError):
27
+ # Direct uvicorn launch from envs/board_sim_env/: package context not available.
28
+ import os, sys
29
+ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
30
+ from models import BoardSimAction, BoardSimObservation # type: ignore
31
+ from server.board_sim_env_environment import BoardSimEnvironment # type: ignore
32
+
33
+ <<<<<<< HEAD
34
+ import json
35
+ import httpx
36
+
37
+ from fastapi import FastAPI
38
+ from pydantic import BaseModel
39
+ from typing import Any, Dict, List, Optional
40
+ from starlette.middleware.cors import CORSMiddleware
41
+
42
+
43
+ # ── Stateful game manager (single instance, shared across requests) ─
44
+ class GameManager:
45
+ """Holds one persistent BoardSimEnvironment so state is preserved
46
+ between /game/reset and /game/step calls."""
47
+
48
+ def __init__(self):
49
+ self._env: Optional[BoardSimEnvironment] = None
50
+
51
+ def reset(self, seed: int = 42) -> Dict[str, Any]:
52
+ self._env = BoardSimEnvironment()
53
+ obs = self._env.reset(seed=seed)
54
+ return self._obs_to_dict(obs)
55
+
56
+ def step(self, decision: str, coalition_pitch: str = '') -> Dict[str, Any]:
57
+ if self._env is None:
58
+ raise RuntimeError("Call /game/reset before /game/step")
59
+ action = BoardSimAction(decision=decision, coalition_pitch=coalition_pitch)
60
+ obs = self._env.step(action)
61
+ return self._obs_to_dict(obs)
62
+
63
+ @staticmethod
64
+ def _obs_to_dict(obs: BoardSimObservation) -> Dict[str, Any]:
65
+ return {
66
+ "observation": {
67
+ "state": obs.state,
68
+ "event": obs.event,
69
+ "options": obs.options,
70
+ "npc_statements": obs.npc_statements,
71
+ "round": obs.round,
72
+ },
73
+ "reward": getattr(obs, "reward", 0.0),
74
+ "done": getattr(obs, "done", False),
75
+ "info": {},
76
+ }
77
+
78
+
79
+ _game = GameManager()
80
+
81
+
82
+ # ── Pydantic request models ────────────────────────────────────────
83
+ class GameResetRequest(BaseModel):
84
+ seed: int = 42
85
+
86
+
87
+ class GameStepRequest(BaseModel):
88
+ decision: str
89
+ coalition_pitch: str = ""
90
+
91
+
92
+ class QwenDecideRequest(BaseModel):
93
+ """Board observation forwarded from the frontend for Qwen inference."""
94
+ state: Dict[str, Any]
95
+ event: str
96
+ options: List[str]
97
+ npc_statements: List[Dict[str, Any]] = []
98
+ round: int = 1
99
+
100
+
101
+ # ── Greedy fallback (mirrors frontend greedyPick) ──────────────────
102
+ _ROLE_WEIGHT = {
103
+ 'CEO': 1.5, 'CTO': 1.2, 'CFO': 1.0, 'Investor Rep': 1.3, 'Independent': 0.8,
104
+ }
105
+
106
+ def _greedy_pick(options: List[str], npc_statements: List[Dict[str, Any]]) -> str:
107
+ tally = {opt: 0.0 for opt in options}
108
+ for npc in npc_statements:
109
+ vote = npc.get('vote', '')
110
+ if vote in tally:
111
+ tally[vote] += _ROLE_WEIGHT.get(npc.get('role', ''), 0.8) * float(npc.get('confidence', 0.5))
112
+ return max(tally, key=lambda k: tally[k])
113
+
114
+
115
+ # ── Qwen system prompt ─────────────────────────────────────────────
116
+ _QWEN_SYSTEM = (
117
+ "You are the CEO agent in a boardroom simulation. "
118
+ "Given the board state and NPC positions, choose the best strategic decision "
119
+ "and craft a short coalition pitch to win over dissenters. "
120
+ "Always respond with ONLY a valid JSON object in the exact format: "
121
+ '{"decision": "<one of the listed options>", "coalition_pitch": "<1-2 sentence pitch>"}'
122
+ " — no markdown, no explanation, no extra keys."
123
+ )
124
+
125
+
126
+ # ── Create the openenv app (for /health, /schema, /ws, etc.) ───────
127
+ =======
128
+
129
+ # Create the app with web interface and README integration
130
+ >>>>>>> 220bc90 (Initial commit for OpenEnv Hackathon submission)
131
+ app = create_app(
132
+ BoardSimEnvironment,
133
+ BoardSimAction,
134
+ BoardSimObservation,
135
+ env_name="board_sim_env",
136
+ <<<<<<< HEAD
137
+ max_concurrent_envs=64,
138
+ )
139
+
140
+ # CORS — allow React dev server and any origin in dev
141
+ app.add_middleware(
142
+ CORSMiddleware,
143
+ allow_origins=["*"],
144
+ allow_credentials=False,
145
+ allow_methods=["*"],
146
+ allow_headers=["*"],
147
+ )
148
+
149
+
150
+ # ── Stateful routes ────────────────────────────────────────────────
151
+ @app.post("/game/reset")
152
+ def game_reset(req: GameResetRequest):
153
+ """Reset the persistent game environment and return initial observation."""
154
+ return _game.reset(seed=req.seed)
155
+
156
+
157
+ @app.post("/game/step")
158
+ def game_step(req: GameStepRequest):
159
+ """Step the persistent game environment with the given decision."""
160
+ return _game.step(decision=req.decision, coalition_pitch=req.coalition_pitch)
161
+
162
+
163
+ # ── LM Studio Local Server Config ──────────────────────────────────
164
+ _LM_STUDIO_URL = "http://localhost:1234/v1/chat/completions"
165
+
166
+
167
+ @app.post("/qwen/decide")
168
+ async def qwen_decide(req: QwenDecideRequest):
169
+ """
170
+ Call the Qwen model via local LM Studio server.
171
+ Returns {decision, coalition_pitch, source} where source is
172
+ 'qwen_lmstudio' on success or 'local_error_fallback' on failure.
173
+ """
174
+ npc_summary = "\n".join(
175
+ f" - {n.get('role','?')} ({n.get('role','?')}): votes '{n.get('vote','?')}' "
176
+ f"(confidence {n.get('confidence', 0.5):.2f}) — '{n.get('statement','')[:120]}'"
177
+ for n in req.npc_statements
178
+ )
179
+ user_prompt = (
180
+ f"Round: {req.round}\n"
181
+ f"Company state: {json.dumps(req.state)}\n"
182
+ f"Current crisis/event: {req.event}\n"
183
+ f"Available options: {req.options}\n"
184
+ f"Board member positions:\n{npc_summary}\n\n"
185
+ "Your JSON decision:"
186
+ )
187
+
188
+ try:
189
+ # payload for OpenAI-compatible local server (LM Studio)
190
+ payload = {
191
+ "model": "qwen", # LM Studio usually ignores this and uses the loaded model
192
+ "messages": [
193
+ {"role": "system", "content": _QWEN_SYSTEM},
194
+ {"role": "user", "content": user_prompt},
195
+ ],
196
+ "temperature": 0.1,
197
+ }
198
+
199
+ async with httpx.AsyncClient(timeout=60.0) as client:
200
+ resp = await client.post(_LM_STUDIO_URL, json=payload)
201
+
202
+ resp.raise_for_status()
203
+ data = resp.json()
204
+ raw_content = data["choices"][0]["message"]["content"].strip()
205
+
206
+ # Handle potential markdown code blocks
207
+ if "```json" in raw_content:
208
+ raw_content = raw_content.split("```json")[1].split("```")[0].strip()
209
+ elif "```" in raw_content:
210
+ raw_content = raw_content.split("```")[1].split("```")[0].strip()
211
+
212
+ parsed = json.loads(raw_content)
213
+ decision = str(parsed.get("decision", "")).strip()
214
+ pitch = str(parsed.get("coalition_pitch", "")).strip()
215
+
216
+ # Validate decision is one of the legal options
217
+ if decision not in req.options:
218
+ decision = _greedy_pick(req.options, req.npc_statements)
219
+
220
+ return {"decision": decision, "coalition_pitch": pitch, "source": "qwen_lmstudio"}
221
+
222
+ except Exception as exc:
223
+ # LM Studio not running or model not loaded → greedy fallback
224
+ fallback = _greedy_pick(req.options, req.npc_statements)
225
+ return {
226
+ "decision": fallback,
227
+ "coalition_pitch": "",
228
+ "source": "greedy_fallback",
229
+ "error": str(exc),
230
+ }
231
+
232
+
233
+ # ── Entry point ────────────────────────────────────────────────────
234
+ =======
235
+ max_concurrent_envs=64, # increased to allow 64 concurrent WebSocket sessions
236
+ )
237
+
238
+ >>>>>>> 220bc90 (Initial commit for OpenEnv Hackathon submission)
239
+ def main(host: str = "0.0.0.0", port: int = 8000):
240
+ import uvicorn
241
+ uvicorn.run(app, host=host, port=port)
242
+
243
+ if __name__ == "__main__":
244
+ import argparse
245
+ parser = argparse.ArgumentParser()
246
+ parser.add_argument("--port", type=int, default=8000)
247
+ args = parser.parse_args()
248
+ main(port=args.port)
envs/board_sim_env/server/board_sim_env_environment.py ADDED
@@ -0,0 +1,979 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+
4
+ """NeuralEdge AI Boardroom — OpenEnv environment.
5
+
6
+ The agent plays the CEO of a Series B AI startup. Each of 10 rounds it sees
7
+ a market-crisis event, statements + votes from 4 hidden-agenda NPC board
8
+ members, and must pick one of 3 decisions. Decisions are resolved by a
9
+ weighted vote and produce dense reward proportional to a composite
10
+ profitability score plus coalition / trust shaping terms.
11
+
12
+ NPCs are deterministic-given-(seed, round, state) — same observation in
13
+ training and resolution — so GRPO has a stable target to learn against.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import hashlib
19
+ import random
20
+ from typing import Any, Dict, List, Optional, Tuple
21
+ from uuid import uuid4
22
+
23
+ from openenv.core.env_server.interfaces import Environment
24
+
25
+ try:
26
+ from ..models import BoardSimAction, BoardSimObservation, BoardState
27
+ except ImportError: # direct script execution: `python server/board_sim_env_environment.py`
28
+ import os, sys
29
+ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
30
+ from models import BoardSimAction, BoardSimObservation, BoardState # type: ignore
31
+
32
+
33
+ # ---------------------------------------------------------------------------
34
+ # Static config
35
+ # ---------------------------------------------------------------------------
36
+
37
+ # Per-role weighted vote influence (CEO is the agent).
38
+ ROLE_WEIGHT: Dict[str, float] = {
39
+ "CEO": 1.5,
40
+ "CTO": 1.2,
41
+ "CFO": 1.0,
42
+ "Investor Rep": 1.3,
43
+ "Independent": 0.8,
44
+ }
45
+
46
+ <<<<<<< HEAD
47
+ # NPCs and their BASE hidden agendas. At episode reset() these are
48
+ # jittered per-seed so no single optimal decision path exists across episodes.
49
+ # The agent never sees the final per-episode weights — it must infer them
50
+ # from observable statements + vote history (Theory of Mind).
51
+ NPC_AGENDAS_BASE: Dict[str, Dict[str, float]] = {
52
+ =======
53
+ # NPCs and their hidden agendas: weights on per-step state-deltas they
54
+ # privately maximize. The agent never sees these.
55
+ NPC_AGENDAS: Dict[str, Dict[str, float]] = {
56
+ >>>>>>> 220bc90 (Initial commit for OpenEnv Hackathon submission)
57
+ # CTO — wants product strength + team morale; hates burn.
58
+ "CTO": {
59
+ "product_readiness": 0.55,
60
+ "team_morale": 0.40,
61
+ "burn_rate": -0.10,
62
+ "regulatory_risk": -0.05,
63
+ },
64
+ # CFO — burn discipline, runway, regulatory caution.
65
+ "CFO": {
66
+ "burn_rate": -0.60,
67
+ "revenue": 0.30,
68
+ "runway_months": 0.20,
69
+ "regulatory_risk": -0.25,
70
+ },
71
+ # Investor Rep — growth-at-all-costs.
72
+ "Investor Rep": {
73
+ "investor_confidence": 0.45,
74
+ "market_share": 0.35,
75
+ "revenue": 0.25,
76
+ "burn_rate": -0.05,
77
+ },
78
+ # Independent — reputation/safety; consensus seeker.
79
+ "Independent": {
80
+ "regulatory_risk": -0.45,
81
+ "team_morale": 0.30,
82
+ "investor_confidence": 0.25,
83
+ "market_share": 0.10,
84
+ },
85
+ }
86
+
87
+ <<<<<<< HEAD
88
+ # Keep a module-level alias for backwards compatibility.
89
+ NPC_AGENDAS: Dict[str, Dict[str, float]] = NPC_AGENDAS_BASE
90
+
91
+
92
+ def _jitter_agendas(seed: int) -> Dict[str, Dict[str, float]]:
93
+ """Return per-episode NPC agenda weights by adding seeded noise (±25%)
94
+ to the base weights. Signs are preserved so the qualitative role
95
+ identity stays intact (CFO still cares about burn; CTO about product),
96
+ but the *magnitude* varies — forcing the agent to infer fresh priorities
97
+ each episode rather than memorising a fixed optimal sequence.
98
+ """
99
+ rng = random.Random(seed ^ 0xDEADBEEF) # distinct stream from NPC rng
100
+ jittered: Dict[str, Dict[str, float]] = {}
101
+ for role, agenda in NPC_AGENDAS_BASE.items():
102
+ jittered[role] = {}
103
+ for field, w in agenda.items():
104
+ # Jitter: multiply by U[0.75, 1.25], keep sign.
105
+ factor = rng.uniform(0.75, 1.25)
106
+ jittered[role][field] = round(w * factor, 4)
107
+ return jittered
108
+
109
+ =======
110
+ >>>>>>> 220bc90 (Initial commit for OpenEnv Hackathon submission)
111
+ # Personality phrase banks for flavorful statements. State-aware: separate
112
+ # phrase pools for "calm" vs "crisis" mode are selected based on current
113
+ # state (low runway / low morale / high reg risk → crisis variant).
114
+ PHRASES: Dict[str, Dict[str, List[str]]] = {
115
+ "CTO": {
116
+ "calm": [
117
+ "Look, the architecture won't survive shortcuts here.",
118
+ "I've sketched the trade-offs — engineering's pretty clear.",
119
+ "If we ship before this is solid, we eat it in support tickets.",
120
+ "Frankly, our infra dictates this choice more than any of you realize.",
121
+ ],
122
+ "crisis": [
123
+ "Team is one bad sprint from a mass exit. Pick carefully.",
124
+ "I cannot keep papering over technical debt with sprint heroics.",
125
+ "Our incident channel is on fire; this isn't the moment for bold strokes.",
126
+ ],
127
+ },
128
+ "CFO": {
129
+ "calm": [
130
+ "The numbers do not lie, and right now they're whispering.",
131
+ "I'd like the board minutes to reflect my reservations.",
132
+ "From a fiduciary standpoint, only one of these is defensible.",
133
+ ],
134
+ "crisis": [
135
+ "Runway is the only KPI that matters at this table right now.",
136
+ "I have spreadsheets that show this is how startups die. Slowly.",
137
+ "Cash is king and our king is in hospice. Pick the cheapest path.",
138
+ ],
139
+ },
140
+ "Investor Rep": {
141
+ "calm": [
142
+ "My LPs care about one thing — and it's not on this slide.",
143
+ "Sequoia isn't here for incremental. We need the bold move.",
144
+ "Let's not optimize for not losing. Let's optimize for winning huge.",
145
+ ],
146
+ "crisis": [
147
+ "If you punt on growth here I will struggle to defend the next round.",
148
+ "The syndicate will read your conservatism as a signal. Don't blink.",
149
+ "This is when 10x funds get made. Or lost. Choose accordingly.",
150
+ ],
151
+ },
152
+ "Independent": {
153
+ "calm": [
154
+ "I want to make sure we're hearing every voice in the room.",
155
+ "There's a version of this that protects everyone's interests.",
156
+ "Long-term reputation outlasts any single quarter.",
157
+ ],
158
+ "crisis": [
159
+ "Whatever we choose tonight will end up in someone's deposition.",
160
+ "The board's fiduciary duty is in scope. Let me be very clear.",
161
+ "Optics matter as much as economics when the press is sniffing.",
162
+ ],
163
+ },
164
+ }
165
+
166
+
167
+ # Agenda KEYWORDS — used to score the agent's `coalition_pitch` text.
168
+ # A pitch that contains an NPC's keywords boosts that NPC's confidence
169
+ # in the agent's chosen decision (subject to alignment cap). The agent
170
+ # never sees these directly; it must learn to write boardroom-style
171
+ # arguments that resonate with each member's hidden priorities.
172
+ NPC_KEYWORDS: Dict[str, List[str]] = {
173
+ "CTO": [
174
+ "engineering", "architecture", "technical", "team", "morale", "infra",
175
+ "build", "ship", "quality", "debt", "platform", "stack", "code",
176
+ "production", "reliability", "scale", "system", "model", "research",
177
+ ],
178
+ "CFO": [
179
+ "burn", "cash", "runway", "fiduciary", "conservative", "discipline",
180
+ "cost", "savings", "margin", "balance", "audit", "expense", "capital",
181
+ "compliance", "regulatory", "risk", "responsible", "prudent", "fiscal",
182
+ ],
183
+ "Investor Rep": [
184
+ "growth", "scale", "10x", "tam", "market", "moat", "winner",
185
+ "ipo", "exit", "valuation", "multiple", "revenue", "arr", "category",
186
+ "leader", "dominate", "aggressive", "ambitious", "bold", "huge",
187
+ ],
188
+ "Independent": [
189
+ "reputation", "stakeholders", "trust", "transparent", "ethics",
190
+ "long-term", "responsible", "governance", "consensus", "balance",
191
+ "safety", "society", "compliance", "duty", "principled", "credibility",
192
+ ],
193
+ }
194
+
195
+
196
+ def _crisis_mode(state: Dict[str, Any]) -> bool:
197
+ """True if the company is materially in trouble — switches NPC tone."""
198
+ return (
199
+ state["runway_months"] < 6.0
200
+ or state["team_morale"] < 0.4
201
+ or state["regulatory_risk"] > 0.6
202
+ or state["investor_confidence"] < 0.4
203
+ )
204
+
205
+
206
+ def _score_pitch(pitch: str, role: str) -> float:
207
+ """Fraction of NPC `role`'s agenda keywords present in `pitch`.
208
+ Capped at 1.0. Case-insensitive whole-word-ish match. Empty pitch → 0.
209
+ """
210
+ if not pitch:
211
+ return 0.0
212
+ text = " " + pitch.lower() + " "
213
+ kw = NPC_KEYWORDS[role]
214
+ hits = sum(1 for w in kw if (" " + w + " ") in text or text.find(" " + w) >= 0)
215
+ # Cap so spamming all keywords doesn't dominate over a focused pitch.
216
+ return min(1.0, hits / max(4, len(kw) // 4))
217
+
218
+
219
+ # ---------------------------------------------------------------------------
220
+ # 10-round event timeline (taken from product spec, normalized)
221
+ # ---------------------------------------------------------------------------
222
+ # Each event has 3 options; each option has a delta dict applied to state.
223
+ # Numeric units: revenue/burn_rate in USD, fractions in [0,1], runway in months.
224
+ # Special key `done_reason` triggers terminal state.
225
+ EVENTS: List[Dict[str, Any]] = [
226
+ {
227
+ <<<<<<< HEAD
228
+ "title": "Market Disruption",
229
+ "description": "A well-funded competitor launches a similar product at half the price, threatening your market position.",
230
+ =======
231
+ "title": "Round 1 — Competitor undercut",
232
+ "description": "OpenAI just released a direct competitor product at 50% lower price.",
233
+ >>>>>>> 220bc90 (Initial commit for OpenEnv Hackathon submission)
234
+ "options": ["slash_prices", "differentiate", "acquire_startup"],
235
+ "consequences": {
236
+ "slash_prices": {"revenue_mult": 0.85, "market_share": 0.05, "investor_confidence": -0.10},
237
+ "differentiate": {"product_readiness": 0.10, "burn_rate": 50_000, "market_share": 0.02},
238
+ "acquire_startup": {"revenue": 500_000, "burn_rate": 150_000, "runway_months": -3},
239
+ },
240
+ },
241
+ {
242
+ <<<<<<< HEAD
243
+ "title": "Enterprise Partnership Dilemma",
244
+ "description": "A major enterprise client offers a $5M contract but demands source-code escrow and data access rights.",
245
+ =======
246
+ "title": "Round 2 — Enterprise contract w/ source-code escrow",
247
+ "description": "A Fortune 500 enterprise wants to sign a $5M contract but demands source code escrow.",
248
+ >>>>>>> 220bc90 (Initial commit for OpenEnv Hackathon submission)
249
+ "options": ["accept_deal", "negotiate_terms", "reject_deal"],
250
+ "consequences": {
251
+ "accept_deal": {"revenue": 5_000_000, "regulatory_risk": 0.15, "team_morale": -0.05},
252
+ "negotiate_terms": {"revenue": 3_000_000, "regulatory_risk": 0.05},
253
+ "reject_deal": {"investor_confidence": -0.15, "team_morale": 0.05},
254
+ },
255
+ },
256
+ {
257
+ <<<<<<< HEAD
258
+ "title": "Talent Retention Crisis",
259
+ "description": "Your core engineering team received competing offers. They are asking for a 40% raise or they walk.",
260
+ =======
261
+ "title": "Round 3 — ML team demands 40% raise",
262
+ "description": "Key ML team of 8 engineers received competing offers and want a 40% salary increase.",
263
+ >>>>>>> 220bc90 (Initial commit for OpenEnv Hackathon submission)
264
+ "options": ["match_offers", "partial_match", "let_them_leave"],
265
+ "consequences": {
266
+ "match_offers": {"burn_rate": 200_000, "team_morale": 0.15, "runway_months": -2},
267
+ "partial_match": {"burn_rate": 100_000, "team_morale": 0.05},
268
+ "let_them_leave": {"team_morale": -0.25, "product_readiness": -0.15, "burn_rate": -100_000},
269
+ },
270
+ },
271
+ {
272
+ <<<<<<< HEAD
273
+ "title": "Regulatory Compliance Ultimatum",
274
+ "description": "A new AI regulation takes effect in 90 days. Full compliance costs $2M; non-compliance risks your operating license.",
275
+ =======
276
+ "title": "Round 4 — EU AI Act compliance deadline",
277
+ "description": "EU AI Act compliance deadline in 90 days. Full compliance costs $2M.",
278
+ >>>>>>> 220bc90 (Initial commit for OpenEnv Hackathon submission)
279
+ "options": ["full_compliance", "partial_compliance", "exit_EU_market"],
280
+ "consequences": {
281
+ "full_compliance": {"burn_rate": 100_000, "regulatory_risk": -0.20, "investor_confidence": 0.10},
282
+ "partial_compliance": {"regulatory_risk": -0.10, "investor_confidence": -0.05},
283
+ "exit_EU_market": {"revenue_mult": 0.90, "regulatory_risk": -0.20, "market_share": -0.03},
284
+ },
285
+ },
286
+ {
287
+ <<<<<<< HEAD
288
+ "title": "Public Relations Crisis",
289
+ "description": "Your AI model appears in a high-profile misuse incident. Media coverage is intensifying. Trust is at stake.",
290
+ =======
291
+ "title": "Round 5 — Deepfake scandal press",
292
+ "description": "Viral negative press: 'AI startup's model used in deepfake scandal'.",
293
+ >>>>>>> 220bc90 (Initial commit for OpenEnv Hackathon submission)
294
+ "options": ["public_apology", "legal_action", "rebrand"],
295
+ "consequences": {
296
+ "public_apology": {"investor_confidence": -0.10, "team_morale": -0.10, "regulatory_risk": 0.10},
297
+ "legal_action": {"burn_rate": 100_000, "regulatory_risk": 0.20},
298
+ "rebrand": {"burn_rate": 200_000, "market_share": -0.02, "team_morale": 0.10},
299
+ },
300
+ },
301
+ {
302
+ <<<<<<< HEAD
303
+ "title": "Strategic Acquisition Offer",
304
+ "description": "A major tech conglomerate has approached with an acqui-hire offer at 2x your current valuation.",
305
+ =======
306
+ "title": "Round 6 — Google acqui-hire offer at $80M (2x val)",
307
+ "description": "Google approaches for acqui-hire at $80M (2x current valuation).",
308
+ >>>>>>> 220bc90 (Initial commit for OpenEnv Hackathon submission)
309
+ "options": ["accept_acquisition", "counter_offer", "reject_and_raise"],
310
+ "consequences": {
311
+ "accept_acquisition": {"done_reason": "acquisition", "revenue": 0, "_terminal_bonus": 30.0},
312
+ "counter_offer": {"investor_confidence": 0.10, "runway_months": 6},
313
+ "reject_and_raise": {"burn_rate": 100_000, "investor_confidence": 0.15, "runway_months": -2},
314
+ },
315
+ },
316
+ {
317
+ <<<<<<< HEAD
318
+ "title": "Institutional Investment Round",
319
+ "description": "Late-stage investors are ready to wire $10M but want board seats and a 2x liquidation preference clause.",
320
+ =======
321
+ "title": "Round 7 — Series C w/ board seats + 2x liq pref",
322
+ "description": "Series C investors want board seats and 2x liquidation preference.",
323
+ >>>>>>> 220bc90 (Initial commit for OpenEnv Hackathon submission)
324
+ "options": ["accept_terms", "negotiate", "bootstrap"],
325
+ "consequences": {
326
+ "accept_terms": {"revenue": 10_000_000, "investor_confidence": 0.20, "runway_months": 12},
327
+ "negotiate": {"investor_confidence": -0.05, "burn_rate": 50_000},
328
+ "bootstrap": {"runway_months": -4, "team_morale": -0.10, "market_share": 0.03},
329
+ },
330
+ },
331
+ {
332
+ <<<<<<< HEAD
333
+ "title": "Breakthrough Technology Decision",
334
+ "description": "Your R&D team developed a new architecture that cuts AI inference costs by 60%. How do you deploy it?",
335
+ =======
336
+ "title": "Round 8 — Compute breakthrough (-60% cost)",
337
+ "description": "Breakthrough: new model architecture cuts compute costs by 60%.",
338
+ >>>>>>> 220bc90 (Initial commit for OpenEnv Hackathon submission)
339
+ "options": ["pivot_product", "license_technology", "keep_internal"],
340
+ "consequences": {
341
+ "pivot_product": {"product_readiness": -0.10, "burn_rate": -150_000, "market_share": 0.05},
342
+ "license_technology": {"revenue": 2_000_000, "regulatory_risk": 0.05},
343
+ "keep_internal": {"product_readiness": 0.15, "market_share": 0.08},
344
+ },
345
+ },
346
+ {
347
+ <<<<<<< HEAD
348
+ "title": "Internal Governance Crisis",
349
+ "description": "An employee has leaked internal safety evaluations suggesting your flagship model has undisclosed risks.",
350
+ =======
351
+ "title": "Round 9 — Whistleblower safety leak",
352
+ "description": "Whistleblower leaks internal safety concerns to the press.",
353
+ >>>>>>> 220bc90 (Initial commit for OpenEnv Hackathon submission)
354
+ "options": ["full_transparency", "damage_control", "internal_investigation"],
355
+ "consequences": {
356
+ "full_transparency": {"investor_confidence": -0.20, "team_morale": 0.15, "regulatory_risk": -0.10},
357
+ "damage_control": {"burn_rate": 80_000, "regulatory_risk": 0.10},
358
+ "internal_investigation": {"team_morale": -0.10, "regulatory_risk": -0.05},
359
+ },
360
+ },
361
+ {
362
+ <<<<<<< HEAD
363
+ "title": "Exit Strategy Decision",
364
+ "description": "The board must reach a final vote: pursue an IPO, accept a strategic acquisition, or remain independent.",
365
+ =======
366
+ "title": "Round 10 — IPO vs acquisition vs stay private",
367
+ "description": "Board must vote: IPO preparation vs strategic acquisition vs stay private.",
368
+ >>>>>>> 220bc90 (Initial commit for OpenEnv Hackathon submission)
369
+ "options": ["ipo", "acquisition", "stay_private"],
370
+ "consequences": {
371
+ "ipo": {"revenue_mult": 2.0, "burn_rate": 500_000, "investor_confidence": 0.30, "_terminal_bonus": 25.0},
372
+ "acquisition": {"done_reason": "acquisition", "_terminal_bonus": 15.0},
373
+ "stay_private": {"runway_months": 6, "investor_confidence": -0.10, "_terminal_bonus": 5.0},
374
+ },
375
+ },
376
+ ]
377
+
378
+
379
+ # Bounds for clamping after each delta.
380
+ FIELD_BOUNDS: Dict[str, Tuple[float, float]] = {
381
+ "revenue": (0.0, 1e12),
382
+ "burn_rate": (0.0, 1e10),
383
+ "runway_months": (0.0, 120.0),
384
+ "product_readiness": (0.0, 1.0),
385
+ "market_share": (0.0, 1.0),
386
+ "team_morale": (0.0, 1.0),
387
+ "investor_confidence": (0.0, 1.0),
388
+ "regulatory_risk": (0.0, 1.0),
389
+ }
390
+
391
+
392
+ def _clamp(field: str, value: float) -> float:
393
+ lo, hi = FIELD_BOUNDS.get(field, (-1e18, 1e18))
394
+ return max(lo, min(hi, value))
395
+
396
+
397
+ # ---------------------------------------------------------------------------
398
+ # Profitability score — smooth, monotonic, no discontinuous jumps.
399
+ # Range: roughly 0..100, dominant terms: revenue, market share, runway, morale.
400
+ # ---------------------------------------------------------------------------
401
+ def compute_profitability_score(s: Dict[str, Any]) -> float:
402
+ """Composite score in [0, 100]. Tuned so a random-policy baseline lands
403
+ near the low-30s with a fat left tail (some bankruptcies), and a competent
404
+ policy can clear 65+. Smooth in every input — no discontinuous jumps."""
405
+ # Revenue rewarded but capped at $8M ARR (further growth is luxury, not survival).
406
+ revenue_term = min(s["revenue"] / 8_000_000.0, 1.0) * 22.0
407
+ # Burn efficiency: full credit only when burn drops below $400K/mo.
408
+ burn_efficiency = max(0.0, 1.0 - s["burn_rate"] / 1_400_000.0) * 18.0
409
+ # Runway: full credit at 18+ months; below 6 months is a serious penalty.
410
+ runway_norm = min(s["runway_months"] / 18.0, 1.0)
411
+ runway_term = runway_norm * 18.0
412
+ low_runway_pen = max(0.0, (6.0 - s["runway_months"]) / 6.0) * 10.0
413
+ # Market & product
414
+ market_term = min(s["market_share"], 0.50) / 0.50 * 14.0
415
+ product_term = s["product_readiness"] * 10.0
416
+ # People & investors
417
+ morale_term = s["team_morale"] * 7.0
418
+ investor_term = s["investor_confidence"] * 11.0
419
+ # Regulatory drag
420
+ risk_penalty = s["regulatory_risk"] * 18.0
421
+ raw = (
422
+ revenue_term + burn_efficiency + runway_term + market_term
423
+ + product_term + morale_term + investor_term
424
+ - risk_penalty - low_runway_pen
425
+ )
426
+ return float(max(0.0, min(100.0, raw)))
427
+
428
+
429
+ # ---------------------------------------------------------------------------
430
+ # Environment
431
+ # ---------------------------------------------------------------------------
432
+ class BoardSimEnvironment(Environment):
433
+ """OpenEnv server for the boardroom simulation."""
434
+
435
+ SUPPORTS_CONCURRENT_SESSIONS: bool = True
436
+
437
+ def __init__(self):
438
+ super().__init__()
439
+ self._state: BoardState = BoardState(episode_id=str(uuid4()), step_count=0)
440
+ self._seed: int = 0
441
+ <<<<<<< HEAD
442
+ # Per-episode agenda weights (set in reset, used in _simulate_npc).
443
+ self._episode_agendas: Dict[str, Dict[str, float]] = NPC_AGENDAS_BASE
444
+ =======
445
+ >>>>>>> 220bc90 (Initial commit for OpenEnv Hackathon submission)
446
+ self.reset()
447
+
448
+ # ------------------------------------------------------------------ utils
449
+ def _npc_rng(self, role: str, round_idx: int) -> random.Random:
450
+ """Deterministic per-(seed, round, role) RNG so the NPC statements
451
+ the agent sees in obs are the same NPCs that vote at resolve time."""
452
+ key = f"{self._seed}|{role}|{round_idx}".encode()
453
+ h = int(hashlib.sha256(key).hexdigest()[:16], 16)
454
+ return random.Random(h)
455
+
456
+ def _simulate_npc(
457
+ <<<<<<< HEAD
458
+ self, role: str, event_idx: int, state: Dict[str, Any], round_label: int = 0
459
+ ) -> Dict[str, Any]:
460
+ """Deterministic NPC: rank options by agenda-weighted projected delta
461
+ plus small seeded noise; pick argmax; emit statement + vote + confidence.
462
+ Uses per-episode jittered agendas so the optimal path varies by seed."""
463
+ # Use round_label for RNG so personality varies by "time" in episode,
464
+ # but event_idx to pull the correct options and consequences.
465
+ rng = self._npc_rng(role, round_label)
466
+ event = EVENTS[event_idx]
467
+ agenda = self._episode_agendas[role] # per-episode jittered weights
468
+
469
+ # Trust modulates how much the NPC "leans toward" the CEO's direction.
470
+ trust = state.get("trust", {}).get(role, 0.5)
471
+ trust_bias = (trust - 0.5) * 0.30 # range: [-0.12, +0.15]
472
+ =======
473
+ self, role: str, round_idx: int, state: Dict[str, Any]
474
+ ) -> Dict[str, Any]:
475
+ """Deterministic NPC: rank options by agenda-weighted projected delta
476
+ plus small seeded noise; pick argmax; emit statement + vote + confidence."""
477
+ rng = self._npc_rng(role, round_idx)
478
+ event = EVENTS[round_idx]
479
+ agenda = NPC_AGENDAS[role]
480
+ >>>>>>> 220bc90 (Initial commit for OpenEnv Hackathon submission)
481
+
482
+ scored: List[Tuple[float, str]] = []
483
+ for opt in event["options"]:
484
+ conseq = event["consequences"][opt]
485
+ score = 0.0
486
+ for k, w in agenda.items():
487
+ v = conseq.get(k, 0.0)
488
+ # Normalize across heterogeneous units so weights are comparable.
489
+ if k == "revenue":
490
+ v = v / 1_000_000.0
491
+ elif k == "burn_rate":
492
+ v = v / 100_000.0
493
+ elif k == "runway_months":
494
+ v = v / 6.0
495
+ score += v * w
496
+ # Special-case revenue_mult so revenue-impacting options register.
497
+ if "revenue_mult" in conseq and "revenue" in agenda:
498
+ score += (conseq["revenue_mult"] - 1.0) * (state["revenue"] / 1_000_000.0) * agenda["revenue"]
499
+ score += rng.gauss(0.0, 0.20) # personality noise
500
+ scored.append((score, opt))
501
+
502
+ scored.sort(reverse=True)
503
+ chosen = scored[0][1]
504
+ margin = scored[0][0] - scored[1][0] if len(scored) > 1 else 1.0
505
+ <<<<<<< HEAD
506
+ # Trust affects confidence: a trusted CEO makes aligned NPCs more
507
+ # confident, while an untrusted CEO makes opposing NPCs more stubborn.
508
+ confidence = float(max(0.05, min(1.0, 0.5 + 0.5 * margin + trust_bias)))
509
+ =======
510
+ confidence = float(max(0.05, min(1.0, 0.5 + 0.5 * margin)))
511
+ >>>>>>> 220bc90 (Initial commit for OpenEnv Hackathon submission)
512
+
513
+ # Pick a phrase deterministically per (round, role), state-aware.
514
+ mode = "crisis" if _crisis_mode(state) else "calm"
515
+ phrase_pool = PHRASES[role][mode]
516
+ <<<<<<< HEAD
517
+ phrase = phrase_pool[round_label % len(phrase_pool)]
518
+ =======
519
+ phrase = phrase_pool[round_idx % len(phrase_pool)]
520
+ >>>>>>> 220bc90 (Initial commit for OpenEnv Hackathon submission)
521
+ statement = f"{phrase} I'm voting {chosen}."
522
+
523
+ return {
524
+ "role": role,
525
+ "statement": statement,
526
+ "vote": chosen,
527
+ "confidence": confidence,
528
+ }
529
+
530
+ <<<<<<< HEAD
531
+ def _simulate_all_npcs(self, event_idx: int, state: Dict[str, Any], round_label: int = 0) -> List[Dict[str, Any]]:
532
+ return [self._simulate_npc(role, event_idx, state, round_label=round_label) for role in NPC_AGENDAS]
533
+ =======
534
+ def _simulate_all_npcs(self, round_idx: int, state: Dict[str, Any]) -> List[Dict[str, Any]]:
535
+ return [self._simulate_npc(role, round_idx, state) for role in NPC_AGENDAS]
536
+ >>>>>>> 220bc90 (Initial commit for OpenEnv Hackathon submission)
537
+
538
+ # ------------------------------------------------------------------ obs
539
+ def _obs_state(self) -> Dict[str, Any]:
540
+ s = self._state.state_dict
541
+ # Recompute profitability so it's always fresh in obs.
542
+ s["profitability_score"] = compute_profitability_score(s)
543
+ return dict(s)
544
+
545
+ def _build_obs(
546
+ self,
547
+ round_idx: int,
548
+ npc_statements: List[Dict[str, Any]],
549
+ reward: float,
550
+ done: bool,
551
+ ) -> BoardSimObservation:
552
+ if round_idx >= len(EVENTS):
553
+ event_desc, options = "Game over.", []
554
+ else:
555
+ <<<<<<< HEAD
556
+ # Use shuffled event order so the CEO sees the correct event
557
+ shuffled_idx = self._event_order[round_idx] if hasattr(self, '_event_order') else round_idx
558
+ event = EVENTS[shuffled_idx]
559
+ event_desc = f"{event['title']} — {event['description']}"
560
+ options = list(event["options"])
561
+ shuffled_idx = self._event_order[round_idx] if hasattr(self, '_event_order') else round_idx
562
+ =======
563
+ event = EVENTS[round_idx]
564
+ event_desc = f"{event['title']} — {event['description']}"
565
+ options = list(event["options"])
566
+ >>>>>>> 220bc90 (Initial commit for OpenEnv Hackathon submission)
567
+ return BoardSimObservation(
568
+ state=self._obs_state(),
569
+ event=event_desc,
570
+ options=options,
571
+ npc_statements=npc_statements,
572
+ round=self._state.state_dict["round"],
573
+ done=done,
574
+ reward=float(reward),
575
+ <<<<<<< HEAD
576
+ event_idx=shuffled_idx,
577
+ =======
578
+ >>>>>>> 220bc90 (Initial commit for OpenEnv Hackathon submission)
579
+ )
580
+
581
+ # ------------------------------------------------------------------ reset
582
+ def reset(self, seed: Optional[int] = None, episode_id: Optional[str] = None, **kwargs: Any) -> BoardSimObservation:
583
+ self._seed = int(seed) if seed is not None else random.randint(0, 2**31 - 1)
584
+ <<<<<<< HEAD
585
+
586
+ # ── Per-episode agenda jitter ─────────────────────────────────────────
587
+ # Each episode, NPC hidden weights shift ±25% (sign-preserving).
588
+ # This means no single sequence of decisions is always optimal —
589
+ # the agent must infer each NPC's priorities from their observable
590
+ # behaviour (Theory of Mind), not from a memorised lookup table.
591
+ self._episode_agendas = _jitter_agendas(self._seed)
592
+
593
+ =======
594
+ >>>>>>> 220bc90 (Initial commit for OpenEnv Hackathon submission)
595
+ self._state = BoardState(
596
+ episode_id=episode_id or str(uuid4()),
597
+ step_count=0,
598
+ )
599
+ self._state.state_dict = {
600
+ "round": 1,
601
+ "revenue": 2_000_000.0,
602
+ "burn_rate": 1_200_000.0, # $1.2M/mo — Series-B pace
603
+ "runway_months": 14.0, # tight; survival is real pressure
604
+ "product_readiness": 0.45,
605
+ "market_share": 0.08,
606
+ "team_morale": 0.70,
607
+ "investor_confidence": 0.65,
608
+ "regulatory_risk": 0.20,
609
+ "profitability_score": 0.0,
610
+ <<<<<<< HEAD
611
+ "trust": {role: 0.5 for role in NPC_AGENDAS_BASE},
612
+ "trust_history": [{"round": 0, **{role: 0.5 for role in NPC_AGENDAS_BASE}}],
613
+ =======
614
+ "trust": {role: 0.5 for role in NPC_AGENDAS},
615
+ "trust_history": [{"round": 0, **{role: 0.5 for role in NPC_AGENDAS}}],
616
+ >>>>>>> 220bc90 (Initial commit for OpenEnv Hackathon submission)
617
+ "history": [],
618
+ "done_reason": None,
619
+ "winning_decision": None,
620
+ }
621
+ <<<<<<< HEAD
622
+
623
+ # ── Shuffle event order per episode so the agent can't memorize ──
624
+ # "Round 1 = always pick differentiate". Deterministic given seed.
625
+ rng = random.Random(self._seed)
626
+ self._event_order = list(range(len(EVENTS)))
627
+ rng.shuffle(self._event_order)
628
+
629
+ # ── Per-episode consequence noise (±15%) so outcomes vary ──
630
+ self._consequence_noise: Dict[int, Dict[str, Dict[str, float]]] = {}
631
+ for idx in range(len(EVENTS)):
632
+ event = EVENTS[idx]
633
+ self._consequence_noise[idx] = {}
634
+ for opt in event["options"]:
635
+ self._consequence_noise[idx][opt] = {}
636
+ for k, v in event["consequences"][opt].items():
637
+ if k.startswith("_") or k == "done_reason":
638
+ continue
639
+ noise = rng.gauss(0.0, 0.15) # ±15% std
640
+ self._consequence_noise[idx][opt][k] = noise
641
+
642
+ shuffled_idx = self._event_order[0]
643
+ npc_statements = self._simulate_all_npcs(shuffled_idx, self._state.state_dict, round_label=0)
644
+ =======
645
+ npc_statements = self._simulate_all_npcs(0, self._state.state_dict)
646
+ >>>>>>> 220bc90 (Initial commit for OpenEnv Hackathon submission)
647
+ return self._build_obs(round_idx=0, npc_statements=npc_statements, reward=0.0, done=False)
648
+
649
+ # ------------------------------------------------------------------ step
650
+ def _resolve_vote(
651
+ self,
652
+ agent_decision: str,
653
+ npc_statements: List[Dict[str, Any]],
654
+ options: List[str],
655
+ pitch: str = "",
656
+ <<<<<<< HEAD
657
+ trust: Optional[Dict[str, float]] = None,
658
+ ) -> Tuple[str, Dict[str, float], Dict[str, float]]:
659
+ """Weighted vote with persuasion and trust scaling.
660
+
661
+ Each NPC contributes ROLE_WEIGHT[role] * confidence * trust to its
662
+ voted option. Trust acts as "social capital" — a board member the
663
+ agent has consistently aligned with carries more sway; one the agent
664
+ has repeatedly ignored carries less. This makes trust scores a
665
+ meaningful strategic variable, not decorative.
666
+
667
+ The CEO contributes ROLE_WEIGHT['CEO'] * 1.0 to the agent's pick.
668
+ A coalition pitch shifts up to 35% of each NPC's weight toward the
669
+ agent's pick proportional to how well the pitch hits that NPC's
670
+ hidden agenda keywords (capped 0..1 via _score_pitch).
671
+
672
+ Returns (winning_option, tally_by_option, pitch_score_by_role).
673
+ """
674
+ trust = trust or {}
675
+ =======
676
+ ) -> Tuple[str, Dict[str, float], Dict[str, float]]:
677
+ """Weighted vote with persuasion.
678
+
679
+ Each NPC contributes ROLE_WEIGHT[role] * confidence to its voted option.
680
+ The CEO contributes ROLE_WEIGHT['CEO'] * 1.0 to the agent's pick.
681
+ A coalition pitch shifts up to 35% of each NPC's weight toward the
682
+ agent's pick proportional to how well the pitch hits that NPC's
683
+ hidden agenda keywords (capped 0..1 via _score_pitch). NPCs already
684
+ agreeing with the agent are unaffected.
685
+
686
+ Returns (winning_option, tally_by_option, pitch_score_by_role).
687
+ """
688
+ >>>>>>> 220bc90 (Initial commit for OpenEnv Hackathon submission)
689
+ tally: Dict[str, float] = {opt: 0.0 for opt in options}
690
+ pitch_scores: Dict[str, float] = {}
691
+ if agent_decision in tally:
692
+ tally[agent_decision] += ROLE_WEIGHT["CEO"] * 1.0
693
+ for npc in npc_statements:
694
+ role = npc["role"]
695
+ <<<<<<< HEAD
696
+ # Trust multiplier: clamp to [0.5, 1.5] so even a fully
697
+ # distrusted NPC still has some voice (prevents degenerate play).
698
+ trust_mult = max(0.5, min(1.5, trust.get(role, 0.5) * 2.0))
699
+ base = ROLE_WEIGHT[role] * npc["confidence"] * trust_mult
700
+ ps = _score_pitch(pitch, role)
701
+ pitch_scores[role] = ps
702
+ if npc["vote"] == agent_decision or agent_decision not in tally:
703
+ =======
704
+ base = ROLE_WEIGHT[role] * npc["confidence"]
705
+ ps = _score_pitch(pitch, role)
706
+ pitch_scores[role] = ps
707
+ if npc["vote"] == agent_decision or agent_decision not in tally:
708
+ # Already aligned — full weight on their (and agent's) pick.
709
+ >>>>>>> 220bc90 (Initial commit for OpenEnv Hackathon submission)
710
+ if npc["vote"] in tally:
711
+ tally[npc["vote"]] += base
712
+ continue
713
+ # Persuasion: redirect up to 35% of weight to the agent's pick.
714
+ shift_frac = 0.35 * ps
715
+ tally[npc["vote"]] += base * (1.0 - shift_frac)
716
+ tally[agent_decision] += base * shift_frac
717
+ <<<<<<< HEAD
718
+ # §10: tie-break — if two options score equally, prefer the CEO's pick
719
+ # (max() picks the first key on a tie, which is insertion-order; we
720
+ # reinsert agent_decision first so it wins ties in its favour).
721
+ if agent_decision in tally:
722
+ ordered = {agent_decision: tally[agent_decision]}
723
+ ordered.update({k: v for k, v in tally.items() if k != agent_decision})
724
+ else:
725
+ ordered = tally
726
+ winner = max(ordered, key=lambda k: ordered[k])
727
+ =======
728
+ winner = max(tally, key=lambda k: tally[k])
729
+ >>>>>>> 220bc90 (Initial commit for OpenEnv Hackathon submission)
730
+ return winner, tally, pitch_scores
731
+
732
+ def _apply_consequence(self, conseq: Dict[str, Any]) -> None:
733
+ """Apply per-field deltas to state with proper clamping."""
734
+ s = self._state.state_dict
735
+ for k, v in conseq.items():
736
+ if k.startswith("_") or k == "done_reason":
737
+ continue
738
+ if k == "revenue_mult":
739
+ s["revenue"] = _clamp("revenue", s["revenue"] * float(v))
740
+ elif k in FIELD_BOUNDS:
741
+ s[k] = _clamp(k, s[k] + float(v))
742
+ # other unrecognized keys ignored
743
+
744
+ def _advance_runway(self) -> None:
745
+ """Decrement runway by 1 month each round; if monthly net positive, grant +0.5 mo."""
746
+ s = self._state.state_dict
747
+ monthly_revenue = s["revenue"] / 12.0
748
+ net = monthly_revenue - s["burn_rate"]
749
+ if net >= 0:
750
+ s["runway_months"] = _clamp("runway_months", s["runway_months"] - 0.5)
751
+ else:
752
+ # Burn extra months proportional to deficit (capped at 2/round).
753
+ burn_months = min(2.0, max(1.0, abs(net) / max(s["burn_rate"], 1.0) * 1.0 + 1.0))
754
+ s["runway_months"] = _clamp("runway_months", s["runway_months"] - burn_months)
755
+
756
+ def step(self, action: BoardSimAction, timeout_s: Optional[float] = None, **kwargs: Any) -> BoardSimObservation:
757
+ s = self._state.state_dict
758
+
759
+ # Already terminal?
760
+ if s["done_reason"] is not None or s["round"] > len(EVENTS):
761
+ return self._build_obs(
762
+ round_idx=min(s["round"] - 1, len(EVENTS) - 1),
763
+ npc_statements=[],
764
+ reward=0.0,
765
+ done=True,
766
+ )
767
+
768
+ round_idx = s["round"] - 1
769
+ <<<<<<< HEAD
770
+ # Use shuffled event order (set in reset)
771
+ shuffled_idx = self._event_order[round_idx] if hasattr(self, '_event_order') else round_idx
772
+ event = EVENTS[shuffled_idx]
773
+ =======
774
+ event = EVENTS[round_idx]
775
+ >>>>>>> 220bc90 (Initial commit for OpenEnv Hackathon submission)
776
+
777
+ # Validate decision; fall back to first option on invalid input
778
+ # (slight penalty so the policy learns to format actions correctly).
779
+ invalid_action = action.decision not in event["options"]
780
+ decision = event["options"][0] if invalid_action else action.decision
781
+
782
+ # NPC votes (DETERMINISTIC — same as what was shown in last obs).
783
+ <<<<<<< HEAD
784
+ npc_statements = self._simulate_all_npcs(shuffled_idx, s, round_label=round_idx)
785
+
786
+ # Resolve weighted vote (with optional persuasion via coalition_pitch).
787
+ # Pass current trust so high-trust NPCs carry more vote weight.
788
+ pitch_text = (action.coalition_pitch or "") if hasattr(action, "coalition_pitch") else ""
789
+ winning_decision, vote_tally, pitch_scores = self._resolve_vote(
790
+ decision, npc_statements, event["options"],
791
+ pitch=pitch_text, trust=s["trust"],
792
+ =======
793
+ npc_statements = self._simulate_all_npcs(round_idx, s)
794
+
795
+ # Resolve weighted vote (with optional persuasion via coalition_pitch).
796
+ pitch_text = (action.coalition_pitch or "") if hasattr(action, "coalition_pitch") else ""
797
+ winning_decision, vote_tally, pitch_scores = self._resolve_vote(
798
+ decision, npc_statements, event["options"], pitch=pitch_text,
799
+ >>>>>>> 220bc90 (Initial commit for OpenEnv Hackathon submission)
800
+ )
801
+
802
+ # Snapshot pre-state for reward shaping.
803
+ old_score = compute_profitability_score(s)
804
+ old_trust_sum = sum(s["trust"].values())
805
+
806
+ # Apply consequence of the WINNING decision (this is what actually happens).
807
+ <<<<<<< HEAD
808
+ conseq = dict(event["consequences"][winning_decision]) # shallow copy
809
+ =======
810
+ conseq = event["consequences"][winning_decision]
811
+ >>>>>>> 220bc90 (Initial commit for OpenEnv Hackathon submission)
812
+ terminal_bonus = float(conseq.get("_terminal_bonus", 0.0))
813
+ if conseq.get("done_reason"):
814
+ s["done_reason"] = conseq["done_reason"]
815
+
816
+ <<<<<<< HEAD
817
+ # Apply per-episode consequence noise (±15%)
818
+ noise_dict = getattr(self, '_consequence_noise', {}).get(
819
+ self._event_order[round_idx] if hasattr(self, '_event_order') else round_idx, {}
820
+ ).get(winning_decision, {})
821
+ noisy_conseq = {}
822
+ for k, v in conseq.items():
823
+ if k.startswith("_") or k == "done_reason":
824
+ noisy_conseq[k] = v
825
+ elif k in noise_dict:
826
+ # Multiplicative noise: value * (1 + noise_factor)
827
+ noisy_conseq[k] = v * (1.0 + noise_dict[k]) if isinstance(v, (int, float)) else v
828
+ else:
829
+ noisy_conseq[k] = v
830
+
831
+ self._apply_consequence(noisy_conseq)
832
+ =======
833
+ self._apply_consequence(conseq)
834
+ >>>>>>> 220bc90 (Initial commit for OpenEnv Hackathon submission)
835
+ self._advance_runway()
836
+
837
+ # Trust updates: aligned NPCs +0.05; opposed -0.05 (clamped 0.1..1.0).
838
+ for npc in npc_statements:
839
+ role = npc["role"]
840
+ cur = s["trust"].get(role, 0.5)
841
+ delta = 0.05 if npc["vote"] == winning_decision else -0.05
842
+ s["trust"][role] = max(0.1, min(1.0, cur + delta))
843
+
844
+ new_score = compute_profitability_score(s)
845
+ s["profitability_score"] = new_score
846
+ s["winning_decision"] = winning_decision
847
+
848
+ s["history"].append({
849
+ "round": s["round"],
850
+ "event_title": event["title"],
851
+ "agent_decision": decision,
852
+ "winning_decision": winning_decision,
853
+ "agent_won_vote": winning_decision == decision,
854
+ "score_after": new_score,
855
+ "runway_after": s["runway_months"],
856
+ "vote_tally": dict(vote_tally),
857
+ "pitch_scores": dict(pitch_scores),
858
+ "pitch_used": bool(pitch_text.strip()),
859
+ })
860
+ # Per-round trust trajectory for visualization / ToM analysis.
861
+ s.setdefault("trust_history", []).append(
862
+ {"round": s["round"], **{role: float(s["trust"][role]) for role in NPC_AGENDAS}}
863
+ )
864
+
865
+ <<<<<<< HEAD
866
+ # ----- Reward shaping (§9.5 tweaks applied) -----
867
+ # §9.5-1: Normalize Δ profitability by 100 so its magnitude matches
868
+ # the other reward terms (coalition ±0.2..0.5, trust ±0.06, pitch 0..0.4).
869
+ # Without this, large score swings dominate and obscure the other signals.
870
+ reward = (new_score - old_score) / 100.0 # primary signal (normalized)
871
+ =======
872
+ # ----- Reward shaping -----
873
+ reward = (new_score - old_score) # primary signal
874
+ >>>>>>> 220bc90 (Initial commit for OpenEnv Hackathon submission)
875
+ reward += 0.5 if winning_decision == decision else -0.2 # coalition bonus / penalty
876
+ reward += 0.3 * (sum(s["trust"].values()) - old_trust_sum) # trust delta
877
+ # Persuasion bonus: when a non-empty pitch helps swing the vote toward
878
+ # the agent's pick, reward the *quality* of that argument. Mean pitch
879
+ # score across NPCs the agent had to convince (those whose vote != decision).
880
+ opposed = [npc["role"] for npc in npc_statements if npc["vote"] != decision]
881
+ <<<<<<< HEAD
882
+ if pitch_text.strip():
883
+ # §9.5-3: small +0.05 bonus for ANY non-empty pitch — bootstraps
884
+ # the model into using the pitch channel before it's good at it.
885
+ reward += 0.05
886
+ if opposed:
887
+ avg_persuasion = sum(pitch_scores[r] for r in opposed) / len(opposed)
888
+ reward += 0.4 * avg_persuasion
889
+ =======
890
+ if pitch_text.strip() and opposed:
891
+ avg_persuasion = sum(pitch_scores[r] for r in opposed) / len(opposed)
892
+ reward += 0.4 * avg_persuasion
893
+ >>>>>>> 220bc90 (Initial commit for OpenEnv Hackathon submission)
894
+ if invalid_action:
895
+ reward -= 0.5 # format penalty
896
+
897
+ # ----- Terminal handling -----
898
+ terminal_now = s["done_reason"] is not None
899
+ if s["runway_months"] <= 0:
900
+ s["done_reason"] = s["done_reason"] or "runway_exhausted"
901
+ terminal_now = True
902
+ <<<<<<< HEAD
903
+ # §9.5-2: reduced from -5.0 to -2.0 so one bad arc doesn't dwarf
904
+ # a whole episode of gradient signal and drown out learning.
905
+ reward -= 2.0
906
+ =======
907
+ reward -= 5.0
908
+ >>>>>>> 220bc90 (Initial commit for OpenEnv Hackathon submission)
909
+
910
+ s["round"] += 1
911
+ self._state.step_count += 1
912
+
913
+ if not terminal_now and s["round"] > len(EVENTS):
914
+ s["done_reason"] = s["done_reason"] or "finished_10"
915
+ terminal_now = True
916
+
917
+ if terminal_now:
918
+ reward += terminal_bonus
919
+ # Tiered terminal bonus by final profitability.
920
+ if new_score >= 60:
921
+ reward += 10.0
922
+ elif new_score >= 40:
923
+ reward += 5.0
924
+ elif new_score < 20:
925
+ reward -= 5.0
926
+
927
+ # ----- Build next observation -----
928
+ if terminal_now or s["round"] > len(EVENTS):
929
+ next_npcs: List[Dict[str, Any]] = []
930
+ <<<<<<< HEAD
931
+ next_event_idx = min(s["round"] - 1, len(EVENTS) - 1)
932
+ else:
933
+ next_round_idx = s["round"] - 1
934
+ next_event_idx = self._event_order[next_round_idx] if hasattr(self, '_event_order') else next_round_idx
935
+ next_npcs = self._simulate_all_npcs(next_event_idx, s, round_label=next_round_idx)
936
+
937
+ return self._build_obs(
938
+ round_idx=min(s["round"] - 1, len(EVENTS) - 1),
939
+ =======
940
+ next_round_idx = min(s["round"] - 1, len(EVENTS) - 1)
941
+ else:
942
+ next_round_idx = s["round"] - 1
943
+ next_npcs = self._simulate_all_npcs(next_round_idx, s)
944
+
945
+ return self._build_obs(
946
+ round_idx=next_round_idx,
947
+ >>>>>>> 220bc90 (Initial commit for OpenEnv Hackathon submission)
948
+ npc_statements=next_npcs,
949
+ reward=reward,
950
+ done=terminal_now,
951
+ )
952
+
953
+ @property
954
+ def state(self) -> BoardState:
955
+ return self._state
956
+
957
+
958
+ # ---------------------------------------------------------------------------
959
+ # Direct script run: quick self-test
960
+ # ---------------------------------------------------------------------------
961
+ if __name__ == "__main__":
962
+ env = BoardSimEnvironment()
963
+ obs = env.reset(seed=0)
964
+ print(f"INITIAL: round={obs.round} score={obs.state['profitability_score']:.2f}")
965
+ print(f"EVENT: {obs.event}")
966
+ for npc in obs.npc_statements:
967
+ print(f" [{npc['role']:13s}] vote={npc['vote']:<22s} conf={npc['confidence']:.2f} | {npc['statement']}")
968
+ total_reward = 0.0
969
+ while not obs.done:
970
+ decision = obs.options[0] # always pick first option
971
+ obs = env.step(BoardSimAction(decision=decision))
972
+ total_reward += obs.reward
973
+ print(
974
+ f"R{obs.round-1:>2d}: decision={decision:<22s} "
975
+ f"win={env.state.state_dict['winning_decision']:<22s} "
976
+ f"reward={obs.reward:+.2f} score={obs.state['profitability_score']:.1f} "
977
+ f"runway={obs.state['runway_months']:.1f}"
978
+ )
979
+ print(f"\nDONE: reason={env.state.state_dict['done_reason']} total_reward={total_reward:+.2f} final_score={env.state.state_dict['profitability_score']:.2f}")
envs/board_sim_env/server/requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ openenv-core==0.2.3
2
+ fastapi>=0.115.0
3
+ uvicorn>=0.24.0
envs/board_sim_env/uv.lock ADDED
The diff for this file is too large to render. See raw diff
 
frontend/index.html ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+
4
+ <head>
5
+ <meta charset="UTF-8" />
6
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
7
+ <title>NeuralEdge AI Boardroom</title>
8
+ <meta name="description"
9
+ content="AI Observer Dashboard — Watch Sarah Chen navigate 10 board crises powered by a trained LLM agent." />
10
+ <link rel="preconnect" href="https://fonts.googleapis.com" />
11
+ <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
12
+ <link
13
+ href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;500;700&display=swap"
14
+ rel="stylesheet" />
15
+ </head>
16
+
17
+ <body>
18
+ <div id="root"></div>
19
+ <script type="module" src="/src/main.jsx"></script>
20
+ </body>
21
+
22
+ </html>
frontend/package-lock.json ADDED
@@ -0,0 +1,1681 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "neuraledge-boardroom",
3
+ "version": "1.0.0",
4
+ "lockfileVersion": 3,
5
+ "requires": true,
6
+ "packages": {
7
+ "": {
8
+ "name": "neuraledge-boardroom",
9
+ "version": "1.0.0",
10
+ "dependencies": {
11
+ "react": "^18.3.1",
12
+ "react-dom": "^18.3.1"
13
+ },
14
+ "devDependencies": {
15
+ "@vitejs/plugin-react": "^4.3.1",
16
+ "vite": "^5.4.10"
17
+ }
18
+ },
19
+ "node_modules/@babel/code-frame": {
20
+ "version": "7.29.0",
21
+ "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.29.0.tgz",
22
+ "integrity": "sha512-9NhCeYjq9+3uxgdtp20LSiJXJvN0FeCtNGpJxuMFZ1Kv3cWUNb6DOhJwUvcVCzKGR66cw4njwM6hrJLqgOwbcw==",
23
+ "dev": true,
24
+ "license": "MIT",
25
+ "dependencies": {
26
+ "@babel/helper-validator-identifier": "^7.28.5",
27
+ "js-tokens": "^4.0.0",
28
+ "picocolors": "^1.1.1"
29
+ },
30
+ "engines": {
31
+ "node": ">=6.9.0"
32
+ }
33
+ },
34
+ "node_modules/@babel/compat-data": {
35
+ "version": "7.29.0",
36
+ "resolved": "https://registry.npmjs.org/@babel/compat-data/-/compat-data-7.29.0.tgz",
37
+ "integrity": "sha512-T1NCJqT/j9+cn8fvkt7jtwbLBfLC/1y1c7NtCeXFRgzGTsafi68MRv8yzkYSapBnFA6L3U2VSc02ciDzoAJhJg==",
38
+ "dev": true,
39
+ "license": "MIT",
40
+ "engines": {
41
+ "node": ">=6.9.0"
42
+ }
43
+ },
44
+ "node_modules/@babel/core": {
45
+ "version": "7.29.0",
46
+ "resolved": "https://registry.npmjs.org/@babel/core/-/core-7.29.0.tgz",
47
+ "integrity": "sha512-CGOfOJqWjg2qW/Mb6zNsDm+u5vFQ8DxXfbM09z69p5Z6+mE1ikP2jUXw+j42Pf1XTYED2Rni5f95npYeuwMDQA==",
48
+ "dev": true,
49
+ "license": "MIT",
50
+ "peer": true,
51
+ "dependencies": {
52
+ "@babel/code-frame": "^7.29.0",
53
+ "@babel/generator": "^7.29.0",
54
+ "@babel/helper-compilation-targets": "^7.28.6",
55
+ "@babel/helper-module-transforms": "^7.28.6",
56
+ "@babel/helpers": "^7.28.6",
57
+ "@babel/parser": "^7.29.0",
58
+ "@babel/template": "^7.28.6",
59
+ "@babel/traverse": "^7.29.0",
60
+ "@babel/types": "^7.29.0",
61
+ "@jridgewell/remapping": "^2.3.5",
62
+ "convert-source-map": "^2.0.0",
63
+ "debug": "^4.1.0",
64
+ "gensync": "^1.0.0-beta.2",
65
+ "json5": "^2.2.3",
66
+ "semver": "^6.3.1"
67
+ },
68
+ "engines": {
69
+ "node": ">=6.9.0"
70
+ },
71
+ "funding": {
72
+ "type": "opencollective",
73
+ "url": "https://opencollective.com/babel"
74
+ }
75
+ },
76
+ "node_modules/@babel/generator": {
77
+ "version": "7.29.1",
78
+ "resolved": "https://registry.npmjs.org/@babel/generator/-/generator-7.29.1.tgz",
79
+ "integrity": "sha512-qsaF+9Qcm2Qv8SRIMMscAvG4O3lJ0F1GuMo5HR/Bp02LopNgnZBC/EkbevHFeGs4ls/oPz9v+Bsmzbkbe+0dUw==",
80
+ "dev": true,
81
+ "license": "MIT",
82
+ "dependencies": {
83
+ "@babel/parser": "^7.29.0",
84
+ "@babel/types": "^7.29.0",
85
+ "@jridgewell/gen-mapping": "^0.3.12",
86
+ "@jridgewell/trace-mapping": "^0.3.28",
87
+ "jsesc": "^3.0.2"
88
+ },
89
+ "engines": {
90
+ "node": ">=6.9.0"
91
+ }
92
+ },
93
+ "node_modules/@babel/helper-compilation-targets": {
94
+ "version": "7.28.6",
95
+ "resolved": "https://registry.npmjs.org/@babel/helper-compilation-targets/-/helper-compilation-targets-7.28.6.tgz",
96
+ "integrity": "sha512-JYtls3hqi15fcx5GaSNL7SCTJ2MNmjrkHXg4FSpOA/grxK8KwyZ5bubHsCq8FXCkua6xhuaaBit+3b7+VZRfcA==",
97
+ "dev": true,
98
+ "license": "MIT",
99
+ "dependencies": {
100
+ "@babel/compat-data": "^7.28.6",
101
+ "@babel/helper-validator-option": "^7.27.1",
102
+ "browserslist": "^4.24.0",
103
+ "lru-cache": "^5.1.1",
104
+ "semver": "^6.3.1"
105
+ },
106
+ "engines": {
107
+ "node": ">=6.9.0"
108
+ }
109
+ },
110
+ "node_modules/@babel/helper-globals": {
111
+ "version": "7.28.0",
112
+ "resolved": "https://registry.npmjs.org/@babel/helper-globals/-/helper-globals-7.28.0.tgz",
113
+ "integrity": "sha512-+W6cISkXFa1jXsDEdYA8HeevQT/FULhxzR99pxphltZcVaugps53THCeiWA8SguxxpSp3gKPiuYfSWopkLQ4hw==",
114
+ "dev": true,
115
+ "license": "MIT",
116
+ "engines": {
117
+ "node": ">=6.9.0"
118
+ }
119
+ },
120
+ "node_modules/@babel/helper-module-imports": {
121
+ "version": "7.28.6",
122
+ "resolved": "https://registry.npmjs.org/@babel/helper-module-imports/-/helper-module-imports-7.28.6.tgz",
123
+ "integrity": "sha512-l5XkZK7r7wa9LucGw9LwZyyCUscb4x37JWTPz7swwFE/0FMQAGpiWUZn8u9DzkSBWEcK25jmvubfpw2dnAMdbw==",
124
+ "dev": true,
125
+ "license": "MIT",
126
+ "dependencies": {
127
+ "@babel/traverse": "^7.28.6",
128
+ "@babel/types": "^7.28.6"
129
+ },
130
+ "engines": {
131
+ "node": ">=6.9.0"
132
+ }
133
+ },
134
+ "node_modules/@babel/helper-module-transforms": {
135
+ "version": "7.28.6",
136
+ "resolved": "https://registry.npmjs.org/@babel/helper-module-transforms/-/helper-module-transforms-7.28.6.tgz",
137
+ "integrity": "sha512-67oXFAYr2cDLDVGLXTEABjdBJZ6drElUSI7WKp70NrpyISso3plG9SAGEF6y7zbha/wOzUByWWTJvEDVNIUGcA==",
138
+ "dev": true,
139
+ "license": "MIT",
140
+ "dependencies": {
141
+ "@babel/helper-module-imports": "^7.28.6",
142
+ "@babel/helper-validator-identifier": "^7.28.5",
143
+ "@babel/traverse": "^7.28.6"
144
+ },
145
+ "engines": {
146
+ "node": ">=6.9.0"
147
+ },
148
+ "peerDependencies": {
149
+ "@babel/core": "^7.0.0"
150
+ }
151
+ },
152
+ "node_modules/@babel/helper-plugin-utils": {
153
+ "version": "7.28.6",
154
+ "resolved": "https://registry.npmjs.org/@babel/helper-plugin-utils/-/helper-plugin-utils-7.28.6.tgz",
155
+ "integrity": "sha512-S9gzZ/bz83GRysI7gAD4wPT/AI3uCnY+9xn+Mx/KPs2JwHJIz1W8PZkg2cqyt3RNOBM8ejcXhV6y8Og7ly/Dug==",
156
+ "dev": true,
157
+ "license": "MIT",
158
+ "engines": {
159
+ "node": ">=6.9.0"
160
+ }
161
+ },
162
+ "node_modules/@babel/helper-string-parser": {
163
+ "version": "7.27.1",
164
+ "resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.27.1.tgz",
165
+ "integrity": "sha512-qMlSxKbpRlAridDExk92nSobyDdpPijUq2DW6oDnUqd0iOGxmQjyqhMIihI9+zv4LPyZdRje2cavWPbCbWm3eA==",
166
+ "dev": true,
167
+ "license": "MIT",
168
+ "engines": {
169
+ "node": ">=6.9.0"
170
+ }
171
+ },
172
+ "node_modules/@babel/helper-validator-identifier": {
173
+ "version": "7.28.5",
174
+ "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.28.5.tgz",
175
+ "integrity": "sha512-qSs4ifwzKJSV39ucNjsvc6WVHs6b7S03sOh2OcHF9UHfVPqWWALUsNUVzhSBiItjRZoLHx7nIarVjqKVusUZ1Q==",
176
+ "dev": true,
177
+ "license": "MIT",
178
+ "engines": {
179
+ "node": ">=6.9.0"
180
+ }
181
+ },
182
+ "node_modules/@babel/helper-validator-option": {
183
+ "version": "7.27.1",
184
+ "resolved": "https://registry.npmjs.org/@babel/helper-validator-option/-/helper-validator-option-7.27.1.tgz",
185
+ "integrity": "sha512-YvjJow9FxbhFFKDSuFnVCe2WxXk1zWc22fFePVNEaWJEu8IrZVlda6N0uHwzZrUM1il7NC9Mlp4MaJYbYd9JSg==",
186
+ "dev": true,
187
+ "license": "MIT",
188
+ "engines": {
189
+ "node": ">=6.9.0"
190
+ }
191
+ },
192
+ "node_modules/@babel/helpers": {
193
+ "version": "7.29.2",
194
+ "resolved": "https://registry.npmjs.org/@babel/helpers/-/helpers-7.29.2.tgz",
195
+ "integrity": "sha512-HoGuUs4sCZNezVEKdVcwqmZN8GoHirLUcLaYVNBK2J0DadGtdcqgr3BCbvH8+XUo4NGjNl3VOtSjEKNzqfFgKw==",
196
+ "dev": true,
197
+ "license": "MIT",
198
+ "dependencies": {
199
+ "@babel/template": "^7.28.6",
200
+ "@babel/types": "^7.29.0"
201
+ },
202
+ "engines": {
203
+ "node": ">=6.9.0"
204
+ }
205
+ },
206
+ "node_modules/@babel/parser": {
207
+ "version": "7.29.2",
208
+ "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.29.2.tgz",
209
+ "integrity": "sha512-4GgRzy/+fsBa72/RZVJmGKPmZu9Byn8o4MoLpmNe1m8ZfYnz5emHLQz3U4gLud6Zwl0RZIcgiLD7Uq7ySFuDLA==",
210
+ "dev": true,
211
+ "license": "MIT",
212
+ "dependencies": {
213
+ "@babel/types": "^7.29.0"
214
+ },
215
+ "bin": {
216
+ "parser": "bin/babel-parser.js"
217
+ },
218
+ "engines": {
219
+ "node": ">=6.0.0"
220
+ }
221
+ },
222
+ "node_modules/@babel/plugin-transform-react-jsx-self": {
223
+ "version": "7.27.1",
224
+ "resolved": "https://registry.npmjs.org/@babel/plugin-transform-react-jsx-self/-/plugin-transform-react-jsx-self-7.27.1.tgz",
225
+ "integrity": "sha512-6UzkCs+ejGdZ5mFFC/OCUrv028ab2fp1znZmCZjAOBKiBK2jXD1O+BPSfX8X2qjJ75fZBMSnQn3Rq2mrBJK2mw==",
226
+ "dev": true,
227
+ "license": "MIT",
228
+ "dependencies": {
229
+ "@babel/helper-plugin-utils": "^7.27.1"
230
+ },
231
+ "engines": {
232
+ "node": ">=6.9.0"
233
+ },
234
+ "peerDependencies": {
235
+ "@babel/core": "^7.0.0-0"
236
+ }
237
+ },
238
+ "node_modules/@babel/plugin-transform-react-jsx-source": {
239
+ "version": "7.27.1",
240
+ "resolved": "https://registry.npmjs.org/@babel/plugin-transform-react-jsx-source/-/plugin-transform-react-jsx-source-7.27.1.tgz",
241
+ "integrity": "sha512-zbwoTsBruTeKB9hSq73ha66iFeJHuaFkUbwvqElnygoNbj/jHRsSeokowZFN3CZ64IvEqcmmkVe89OPXc7ldAw==",
242
+ "dev": true,
243
+ "license": "MIT",
244
+ "dependencies": {
245
+ "@babel/helper-plugin-utils": "^7.27.1"
246
+ },
247
+ "engines": {
248
+ "node": ">=6.9.0"
249
+ },
250
+ "peerDependencies": {
251
+ "@babel/core": "^7.0.0-0"
252
+ }
253
+ },
254
+ "node_modules/@babel/template": {
255
+ "version": "7.28.6",
256
+ "resolved": "https://registry.npmjs.org/@babel/template/-/template-7.28.6.tgz",
257
+ "integrity": "sha512-YA6Ma2KsCdGb+WC6UpBVFJGXL58MDA6oyONbjyF/+5sBgxY/dwkhLogbMT2GXXyU84/IhRw/2D1Os1B/giz+BQ==",
258
+ "dev": true,
259
+ "license": "MIT",
260
+ "dependencies": {
261
+ "@babel/code-frame": "^7.28.6",
262
+ "@babel/parser": "^7.28.6",
263
+ "@babel/types": "^7.28.6"
264
+ },
265
+ "engines": {
266
+ "node": ">=6.9.0"
267
+ }
268
+ },
269
+ "node_modules/@babel/traverse": {
270
+ "version": "7.29.0",
271
+ "resolved": "https://registry.npmjs.org/@babel/traverse/-/traverse-7.29.0.tgz",
272
+ "integrity": "sha512-4HPiQr0X7+waHfyXPZpWPfWL/J7dcN1mx9gL6WdQVMbPnF3+ZhSMs8tCxN7oHddJE9fhNE7+lxdnlyemKfJRuA==",
273
+ "dev": true,
274
+ "license": "MIT",
275
+ "dependencies": {
276
+ "@babel/code-frame": "^7.29.0",
277
+ "@babel/generator": "^7.29.0",
278
+ "@babel/helper-globals": "^7.28.0",
279
+ "@babel/parser": "^7.29.0",
280
+ "@babel/template": "^7.28.6",
281
+ "@babel/types": "^7.29.0",
282
+ "debug": "^4.3.1"
283
+ },
284
+ "engines": {
285
+ "node": ">=6.9.0"
286
+ }
287
+ },
288
+ "node_modules/@babel/types": {
289
+ "version": "7.29.0",
290
+ "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.29.0.tgz",
291
+ "integrity": "sha512-LwdZHpScM4Qz8Xw2iKSzS+cfglZzJGvofQICy7W7v4caru4EaAmyUuO6BGrbyQ2mYV11W0U8j5mBhd14dd3B0A==",
292
+ "dev": true,
293
+ "license": "MIT",
294
+ "dependencies": {
295
+ "@babel/helper-string-parser": "^7.27.1",
296
+ "@babel/helper-validator-identifier": "^7.28.5"
297
+ },
298
+ "engines": {
299
+ "node": ">=6.9.0"
300
+ }
301
+ },
302
+ "node_modules/@esbuild/aix-ppc64": {
303
+ "version": "0.21.5",
304
+ "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.21.5.tgz",
305
+ "integrity": "sha512-1SDgH6ZSPTlggy1yI6+Dbkiz8xzpHJEVAlF/AM1tHPLsf5STom9rwtjE4hKAF20FfXXNTFqEYXyJNWh1GiZedQ==",
306
+ "cpu": [
307
+ "ppc64"
308
+ ],
309
+ "dev": true,
310
+ "license": "MIT",
311
+ "optional": true,
312
+ "os": [
313
+ "aix"
314
+ ],
315
+ "engines": {
316
+ "node": ">=12"
317
+ }
318
+ },
319
+ "node_modules/@esbuild/android-arm": {
320
+ "version": "0.21.5",
321
+ "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.21.5.tgz",
322
+ "integrity": "sha512-vCPvzSjpPHEi1siZdlvAlsPxXl7WbOVUBBAowWug4rJHb68Ox8KualB+1ocNvT5fjv6wpkX6o/iEpbDrf68zcg==",
323
+ "cpu": [
324
+ "arm"
325
+ ],
326
+ "dev": true,
327
+ "license": "MIT",
328
+ "optional": true,
329
+ "os": [
330
+ "android"
331
+ ],
332
+ "engines": {
333
+ "node": ">=12"
334
+ }
335
+ },
336
+ "node_modules/@esbuild/android-arm64": {
337
+ "version": "0.21.5",
338
+ "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.21.5.tgz",
339
+ "integrity": "sha512-c0uX9VAUBQ7dTDCjq+wdyGLowMdtR/GoC2U5IYk/7D1H1JYC0qseD7+11iMP2mRLN9RcCMRcjC4YMclCzGwS/A==",
340
+ "cpu": [
341
+ "arm64"
342
+ ],
343
+ "dev": true,
344
+ "license": "MIT",
345
+ "optional": true,
346
+ "os": [
347
+ "android"
348
+ ],
349
+ "engines": {
350
+ "node": ">=12"
351
+ }
352
+ },
353
+ "node_modules/@esbuild/android-x64": {
354
+ "version": "0.21.5",
355
+ "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.21.5.tgz",
356
+ "integrity": "sha512-D7aPRUUNHRBwHxzxRvp856rjUHRFW1SdQATKXH2hqA0kAZb1hKmi02OpYRacl0TxIGz/ZmXWlbZgjwWYaCakTA==",
357
+ "cpu": [
358
+ "x64"
359
+ ],
360
+ "dev": true,
361
+ "license": "MIT",
362
+ "optional": true,
363
+ "os": [
364
+ "android"
365
+ ],
366
+ "engines": {
367
+ "node": ">=12"
368
+ }
369
+ },
370
+ "node_modules/@esbuild/darwin-arm64": {
371
+ "version": "0.21.5",
372
+ "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.21.5.tgz",
373
+ "integrity": "sha512-DwqXqZyuk5AiWWf3UfLiRDJ5EDd49zg6O9wclZ7kUMv2WRFr4HKjXp/5t8JZ11QbQfUS6/cRCKGwYhtNAY88kQ==",
374
+ "cpu": [
375
+ "arm64"
376
+ ],
377
+ "dev": true,
378
+ "license": "MIT",
379
+ "optional": true,
380
+ "os": [
381
+ "darwin"
382
+ ],
383
+ "engines": {
384
+ "node": ">=12"
385
+ }
386
+ },
387
+ "node_modules/@esbuild/darwin-x64": {
388
+ "version": "0.21.5",
389
+ "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.21.5.tgz",
390
+ "integrity": "sha512-se/JjF8NlmKVG4kNIuyWMV/22ZaerB+qaSi5MdrXtd6R08kvs2qCN4C09miupktDitvh8jRFflwGFBQcxZRjbw==",
391
+ "cpu": [
392
+ "x64"
393
+ ],
394
+ "dev": true,
395
+ "license": "MIT",
396
+ "optional": true,
397
+ "os": [
398
+ "darwin"
399
+ ],
400
+ "engines": {
401
+ "node": ">=12"
402
+ }
403
+ },
404
+ "node_modules/@esbuild/freebsd-arm64": {
405
+ "version": "0.21.5",
406
+ "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.21.5.tgz",
407
+ "integrity": "sha512-5JcRxxRDUJLX8JXp/wcBCy3pENnCgBR9bN6JsY4OmhfUtIHe3ZW0mawA7+RDAcMLrMIZaf03NlQiX9DGyB8h4g==",
408
+ "cpu": [
409
+ "arm64"
410
+ ],
411
+ "dev": true,
412
+ "license": "MIT",
413
+ "optional": true,
414
+ "os": [
415
+ "freebsd"
416
+ ],
417
+ "engines": {
418
+ "node": ">=12"
419
+ }
420
+ },
421
+ "node_modules/@esbuild/freebsd-x64": {
422
+ "version": "0.21.5",
423
+ "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.21.5.tgz",
424
+ "integrity": "sha512-J95kNBj1zkbMXtHVH29bBriQygMXqoVQOQYA+ISs0/2l3T9/kj42ow2mpqerRBxDJnmkUDCaQT/dfNXWX/ZZCQ==",
425
+ "cpu": [
426
+ "x64"
427
+ ],
428
+ "dev": true,
429
+ "license": "MIT",
430
+ "optional": true,
431
+ "os": [
432
+ "freebsd"
433
+ ],
434
+ "engines": {
435
+ "node": ">=12"
436
+ }
437
+ },
438
+ "node_modules/@esbuild/linux-arm": {
439
+ "version": "0.21.5",
440
+ "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.21.5.tgz",
441
+ "integrity": "sha512-bPb5AHZtbeNGjCKVZ9UGqGwo8EUu4cLq68E95A53KlxAPRmUyYv2D6F0uUI65XisGOL1hBP5mTronbgo+0bFcA==",
442
+ "cpu": [
443
+ "arm"
444
+ ],
445
+ "dev": true,
446
+ "license": "MIT",
447
+ "optional": true,
448
+ "os": [
449
+ "linux"
450
+ ],
451
+ "engines": {
452
+ "node": ">=12"
453
+ }
454
+ },
455
+ "node_modules/@esbuild/linux-arm64": {
456
+ "version": "0.21.5",
457
+ "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.21.5.tgz",
458
+ "integrity": "sha512-ibKvmyYzKsBeX8d8I7MH/TMfWDXBF3db4qM6sy+7re0YXya+K1cem3on9XgdT2EQGMu4hQyZhan7TeQ8XkGp4Q==",
459
+ "cpu": [
460
+ "arm64"
461
+ ],
462
+ "dev": true,
463
+ "license": "MIT",
464
+ "optional": true,
465
+ "os": [
466
+ "linux"
467
+ ],
468
+ "engines": {
469
+ "node": ">=12"
470
+ }
471
+ },
472
+ "node_modules/@esbuild/linux-ia32": {
473
+ "version": "0.21.5",
474
+ "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.21.5.tgz",
475
+ "integrity": "sha512-YvjXDqLRqPDl2dvRODYmmhz4rPeVKYvppfGYKSNGdyZkA01046pLWyRKKI3ax8fbJoK5QbxblURkwK/MWY18Tg==",
476
+ "cpu": [
477
+ "ia32"
478
+ ],
479
+ "dev": true,
480
+ "license": "MIT",
481
+ "optional": true,
482
+ "os": [
483
+ "linux"
484
+ ],
485
+ "engines": {
486
+ "node": ">=12"
487
+ }
488
+ },
489
+ "node_modules/@esbuild/linux-loong64": {
490
+ "version": "0.21.5",
491
+ "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.21.5.tgz",
492
+ "integrity": "sha512-uHf1BmMG8qEvzdrzAqg2SIG/02+4/DHB6a9Kbya0XDvwDEKCoC8ZRWI5JJvNdUjtciBGFQ5PuBlpEOXQj+JQSg==",
493
+ "cpu": [
494
+ "loong64"
495
+ ],
496
+ "dev": true,
497
+ "license": "MIT",
498
+ "optional": true,
499
+ "os": [
500
+ "linux"
501
+ ],
502
+ "engines": {
503
+ "node": ">=12"
504
+ }
505
+ },
506
+ "node_modules/@esbuild/linux-mips64el": {
507
+ "version": "0.21.5",
508
+ "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.21.5.tgz",
509
+ "integrity": "sha512-IajOmO+KJK23bj52dFSNCMsz1QP1DqM6cwLUv3W1QwyxkyIWecfafnI555fvSGqEKwjMXVLokcV5ygHW5b3Jbg==",
510
+ "cpu": [
511
+ "mips64el"
512
+ ],
513
+ "dev": true,
514
+ "license": "MIT",
515
+ "optional": true,
516
+ "os": [
517
+ "linux"
518
+ ],
519
+ "engines": {
520
+ "node": ">=12"
521
+ }
522
+ },
523
+ "node_modules/@esbuild/linux-ppc64": {
524
+ "version": "0.21.5",
525
+ "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.21.5.tgz",
526
+ "integrity": "sha512-1hHV/Z4OEfMwpLO8rp7CvlhBDnjsC3CttJXIhBi+5Aj5r+MBvy4egg7wCbe//hSsT+RvDAG7s81tAvpL2XAE4w==",
527
+ "cpu": [
528
+ "ppc64"
529
+ ],
530
+ "dev": true,
531
+ "license": "MIT",
532
+ "optional": true,
533
+ "os": [
534
+ "linux"
535
+ ],
536
+ "engines": {
537
+ "node": ">=12"
538
+ }
539
+ },
540
+ "node_modules/@esbuild/linux-riscv64": {
541
+ "version": "0.21.5",
542
+ "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.21.5.tgz",
543
+ "integrity": "sha512-2HdXDMd9GMgTGrPWnJzP2ALSokE/0O5HhTUvWIbD3YdjME8JwvSCnNGBnTThKGEB91OZhzrJ4qIIxk/SBmyDDA==",
544
+ "cpu": [
545
+ "riscv64"
546
+ ],
547
+ "dev": true,
548
+ "license": "MIT",
549
+ "optional": true,
550
+ "os": [
551
+ "linux"
552
+ ],
553
+ "engines": {
554
+ "node": ">=12"
555
+ }
556
+ },
557
+ "node_modules/@esbuild/linux-s390x": {
558
+ "version": "0.21.5",
559
+ "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.21.5.tgz",
560
+ "integrity": "sha512-zus5sxzqBJD3eXxwvjN1yQkRepANgxE9lgOW2qLnmr8ikMTphkjgXu1HR01K4FJg8h1kEEDAqDcZQtbrRnB41A==",
561
+ "cpu": [
562
+ "s390x"
563
+ ],
564
+ "dev": true,
565
+ "license": "MIT",
566
+ "optional": true,
567
+ "os": [
568
+ "linux"
569
+ ],
570
+ "engines": {
571
+ "node": ">=12"
572
+ }
573
+ },
574
+ "node_modules/@esbuild/linux-x64": {
575
+ "version": "0.21.5",
576
+ "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.21.5.tgz",
577
+ "integrity": "sha512-1rYdTpyv03iycF1+BhzrzQJCdOuAOtaqHTWJZCWvijKD2N5Xu0TtVC8/+1faWqcP9iBCWOmjmhoH94dH82BxPQ==",
578
+ "cpu": [
579
+ "x64"
580
+ ],
581
+ "dev": true,
582
+ "license": "MIT",
583
+ "optional": true,
584
+ "os": [
585
+ "linux"
586
+ ],
587
+ "engines": {
588
+ "node": ">=12"
589
+ }
590
+ },
591
+ "node_modules/@esbuild/netbsd-x64": {
592
+ "version": "0.21.5",
593
+ "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.21.5.tgz",
594
+ "integrity": "sha512-Woi2MXzXjMULccIwMnLciyZH4nCIMpWQAs049KEeMvOcNADVxo0UBIQPfSmxB3CWKedngg7sWZdLvLczpe0tLg==",
595
+ "cpu": [
596
+ "x64"
597
+ ],
598
+ "dev": true,
599
+ "license": "MIT",
600
+ "optional": true,
601
+ "os": [
602
+ "netbsd"
603
+ ],
604
+ "engines": {
605
+ "node": ">=12"
606
+ }
607
+ },
608
+ "node_modules/@esbuild/openbsd-x64": {
609
+ "version": "0.21.5",
610
+ "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.21.5.tgz",
611
+ "integrity": "sha512-HLNNw99xsvx12lFBUwoT8EVCsSvRNDVxNpjZ7bPn947b8gJPzeHWyNVhFsaerc0n3TsbOINvRP2byTZ5LKezow==",
612
+ "cpu": [
613
+ "x64"
614
+ ],
615
+ "dev": true,
616
+ "license": "MIT",
617
+ "optional": true,
618
+ "os": [
619
+ "openbsd"
620
+ ],
621
+ "engines": {
622
+ "node": ">=12"
623
+ }
624
+ },
625
+ "node_modules/@esbuild/sunos-x64": {
626
+ "version": "0.21.5",
627
+ "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.21.5.tgz",
628
+ "integrity": "sha512-6+gjmFpfy0BHU5Tpptkuh8+uw3mnrvgs+dSPQXQOv3ekbordwnzTVEb4qnIvQcYXq6gzkyTnoZ9dZG+D4garKg==",
629
+ "cpu": [
630
+ "x64"
631
+ ],
632
+ "dev": true,
633
+ "license": "MIT",
634
+ "optional": true,
635
+ "os": [
636
+ "sunos"
637
+ ],
638
+ "engines": {
639
+ "node": ">=12"
640
+ }
641
+ },
642
+ "node_modules/@esbuild/win32-arm64": {
643
+ "version": "0.21.5",
644
+ "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.21.5.tgz",
645
+ "integrity": "sha512-Z0gOTd75VvXqyq7nsl93zwahcTROgqvuAcYDUr+vOv8uHhNSKROyU961kgtCD1e95IqPKSQKH7tBTslnS3tA8A==",
646
+ "cpu": [
647
+ "arm64"
648
+ ],
649
+ "dev": true,
650
+ "license": "MIT",
651
+ "optional": true,
652
+ "os": [
653
+ "win32"
654
+ ],
655
+ "engines": {
656
+ "node": ">=12"
657
+ }
658
+ },
659
+ "node_modules/@esbuild/win32-ia32": {
660
+ "version": "0.21.5",
661
+ "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.21.5.tgz",
662
+ "integrity": "sha512-SWXFF1CL2RVNMaVs+BBClwtfZSvDgtL//G/smwAc5oVK/UPu2Gu9tIaRgFmYFFKrmg3SyAjSrElf0TiJ1v8fYA==",
663
+ "cpu": [
664
+ "ia32"
665
+ ],
666
+ "dev": true,
667
+ "license": "MIT",
668
+ "optional": true,
669
+ "os": [
670
+ "win32"
671
+ ],
672
+ "engines": {
673
+ "node": ">=12"
674
+ }
675
+ },
676
+ "node_modules/@esbuild/win32-x64": {
677
+ "version": "0.21.5",
678
+ "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.21.5.tgz",
679
+ "integrity": "sha512-tQd/1efJuzPC6rCFwEvLtci/xNFcTZknmXs98FYDfGE4wP9ClFV98nyKrzJKVPMhdDnjzLhdUyMX4PsQAPjwIw==",
680
+ "cpu": [
681
+ "x64"
682
+ ],
683
+ "dev": true,
684
+ "license": "MIT",
685
+ "optional": true,
686
+ "os": [
687
+ "win32"
688
+ ],
689
+ "engines": {
690
+ "node": ">=12"
691
+ }
692
+ },
693
+ "node_modules/@jridgewell/gen-mapping": {
694
+ "version": "0.3.13",
695
+ "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.13.tgz",
696
+ "integrity": "sha512-2kkt/7niJ6MgEPxF0bYdQ6etZaA+fQvDcLKckhy1yIQOzaoKjBBjSj63/aLVjYE3qhRt5dvM+uUyfCg6UKCBbA==",
697
+ "dev": true,
698
+ "license": "MIT",
699
+ "dependencies": {
700
+ "@jridgewell/sourcemap-codec": "^1.5.0",
701
+ "@jridgewell/trace-mapping": "^0.3.24"
702
+ }
703
+ },
704
+ "node_modules/@jridgewell/remapping": {
705
+ "version": "2.3.5",
706
+ "resolved": "https://registry.npmjs.org/@jridgewell/remapping/-/remapping-2.3.5.tgz",
707
+ "integrity": "sha512-LI9u/+laYG4Ds1TDKSJW2YPrIlcVYOwi2fUC6xB43lueCjgxV4lffOCZCtYFiH6TNOX+tQKXx97T4IKHbhyHEQ==",
708
+ "dev": true,
709
+ "license": "MIT",
710
+ "dependencies": {
711
+ "@jridgewell/gen-mapping": "^0.3.5",
712
+ "@jridgewell/trace-mapping": "^0.3.24"
713
+ }
714
+ },
715
+ "node_modules/@jridgewell/resolve-uri": {
716
+ "version": "3.1.2",
717
+ "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz",
718
+ "integrity": "sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==",
719
+ "dev": true,
720
+ "license": "MIT",
721
+ "engines": {
722
+ "node": ">=6.0.0"
723
+ }
724
+ },
725
+ "node_modules/@jridgewell/sourcemap-codec": {
726
+ "version": "1.5.5",
727
+ "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.5.tgz",
728
+ "integrity": "sha512-cYQ9310grqxueWbl+WuIUIaiUaDcj7WOq5fVhEljNVgRfOUhY9fy2zTvfoqWsnebh8Sl70VScFbICvJnLKB0Og==",
729
+ "dev": true,
730
+ "license": "MIT"
731
+ },
732
+ "node_modules/@jridgewell/trace-mapping": {
733
+ "version": "0.3.31",
734
+ "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.31.tgz",
735
+ "integrity": "sha512-zzNR+SdQSDJzc8joaeP8QQoCQr8NuYx2dIIytl1QeBEZHJ9uW6hebsrYgbz8hJwUQao3TWCMtmfV8Nu1twOLAw==",
736
+ "dev": true,
737
+ "license": "MIT",
738
+ "dependencies": {
739
+ "@jridgewell/resolve-uri": "^3.1.0",
740
+ "@jridgewell/sourcemap-codec": "^1.4.14"
741
+ }
742
+ },
743
+ "node_modules/@rolldown/pluginutils": {
744
+ "version": "1.0.0-beta.27",
745
+ "resolved": "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-beta.27.tgz",
746
+ "integrity": "sha512-+d0F4MKMCbeVUJwG96uQ4SgAznZNSq93I3V+9NHA4OpvqG8mRCpGdKmK8l/dl02h2CCDHwW2FqilnTyDcAnqjA==",
747
+ "dev": true,
748
+ "license": "MIT"
749
+ },
750
+ "node_modules/@rollup/rollup-android-arm-eabi": {
751
+ "version": "4.60.2",
752
+ "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.60.2.tgz",
753
+ "integrity": "sha512-dnlp69efPPg6Uaw2dVqzWRfAWRnYVb1XJ8CyyhIbZeaq4CA5/mLeZ1IEt9QqQxmbdvagjLIm2ZL8BxXv5lH4Yw==",
754
+ "cpu": [
755
+ "arm"
756
+ ],
757
+ "dev": true,
758
+ "license": "MIT",
759
+ "optional": true,
760
+ "os": [
761
+ "android"
762
+ ]
763
+ },
764
+ "node_modules/@rollup/rollup-android-arm64": {
765
+ "version": "4.60.2",
766
+ "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.60.2.tgz",
767
+ "integrity": "sha512-OqZTwDRDchGRHHm/hwLOL7uVPB9aUvI0am/eQuWMNyFHf5PSEQmyEeYYheA0EPPKUO/l0uigCp+iaTjoLjVoHg==",
768
+ "cpu": [
769
+ "arm64"
770
+ ],
771
+ "dev": true,
772
+ "license": "MIT",
773
+ "optional": true,
774
+ "os": [
775
+ "android"
776
+ ]
777
+ },
778
+ "node_modules/@rollup/rollup-darwin-arm64": {
779
+ "version": "4.60.2",
780
+ "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.60.2.tgz",
781
+ "integrity": "sha512-UwRE7CGpvSVEQS8gUMBe1uADWjNnVgP3Iusyda1nSRwNDCsRjnGc7w6El6WLQsXmZTbLZx9cecegumcitNfpmA==",
782
+ "cpu": [
783
+ "arm64"
784
+ ],
785
+ "dev": true,
786
+ "license": "MIT",
787
+ "optional": true,
788
+ "os": [
789
+ "darwin"
790
+ ]
791
+ },
792
+ "node_modules/@rollup/rollup-darwin-x64": {
793
+ "version": "4.60.2",
794
+ "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.60.2.tgz",
795
+ "integrity": "sha512-gjEtURKLCC5VXm1I+2i1u9OhxFsKAQJKTVB8WvDAHF+oZlq0GTVFOlTlO1q3AlCTE/DF32c16ESvfgqR7343/g==",
796
+ "cpu": [
797
+ "x64"
798
+ ],
799
+ "dev": true,
800
+ "license": "MIT",
801
+ "optional": true,
802
+ "os": [
803
+ "darwin"
804
+ ]
805
+ },
806
+ "node_modules/@rollup/rollup-freebsd-arm64": {
807
+ "version": "4.60.2",
808
+ "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-arm64/-/rollup-freebsd-arm64-4.60.2.tgz",
809
+ "integrity": "sha512-Bcl6CYDeAgE70cqZaMojOi/eK63h5Me97ZqAQoh77VPjMysA/4ORQBRGo3rRy45x4MzVlU9uZxs8Uwy7ZaKnBw==",
810
+ "cpu": [
811
+ "arm64"
812
+ ],
813
+ "dev": true,
814
+ "license": "MIT",
815
+ "optional": true,
816
+ "os": [
817
+ "freebsd"
818
+ ]
819
+ },
820
+ "node_modules/@rollup/rollup-freebsd-x64": {
821
+ "version": "4.60.2",
822
+ "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-x64/-/rollup-freebsd-x64-4.60.2.tgz",
823
+ "integrity": "sha512-LU+TPda3mAE2QB0/Hp5VyeKJivpC6+tlOXd1VMoXV/YFMvk/MNk5iXeBfB4MQGRWyOYVJ01625vjkr0Az98OJQ==",
824
+ "cpu": [
825
+ "x64"
826
+ ],
827
+ "dev": true,
828
+ "license": "MIT",
829
+ "optional": true,
830
+ "os": [
831
+ "freebsd"
832
+ ]
833
+ },
834
+ "node_modules/@rollup/rollup-linux-arm-gnueabihf": {
835
+ "version": "4.60.2",
836
+ "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.60.2.tgz",
837
+ "integrity": "sha512-2QxQrM+KQ7DAW4o22j+XZ6RKdxjLD7BOWTP0Bv0tmjdyhXSsr2Ul1oJDQqh9Zf5qOwTuTc7Ek83mOFaKnodPjg==",
838
+ "cpu": [
839
+ "arm"
840
+ ],
841
+ "dev": true,
842
+ "license": "MIT",
843
+ "optional": true,
844
+ "os": [
845
+ "linux"
846
+ ]
847
+ },
848
+ "node_modules/@rollup/rollup-linux-arm-musleabihf": {
849
+ "version": "4.60.2",
850
+ "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.60.2.tgz",
851
+ "integrity": "sha512-TbziEu2DVsTEOPif2mKWkMeDMLoYjx95oESa9fkQQK7r/Orta0gnkcDpzwufEcAO2BLBsD7mZkXGFqEdMRRwfw==",
852
+ "cpu": [
853
+ "arm"
854
+ ],
855
+ "dev": true,
856
+ "license": "MIT",
857
+ "optional": true,
858
+ "os": [
859
+ "linux"
860
+ ]
861
+ },
862
+ "node_modules/@rollup/rollup-linux-arm64-gnu": {
863
+ "version": "4.60.2",
864
+ "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.60.2.tgz",
865
+ "integrity": "sha512-bO/rVDiDUuM2YfuCUwZ1t1cP+/yqjqz+Xf2VtkdppefuOFS2OSeAfgafaHNkFn0t02hEyXngZkxtGqXcXwO8Rg==",
866
+ "cpu": [
867
+ "arm64"
868
+ ],
869
+ "dev": true,
870
+ "license": "MIT",
871
+ "optional": true,
872
+ "os": [
873
+ "linux"
874
+ ]
875
+ },
876
+ "node_modules/@rollup/rollup-linux-arm64-musl": {
877
+ "version": "4.60.2",
878
+ "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.60.2.tgz",
879
+ "integrity": "sha512-hr26p7e93Rl0Za+JwW7EAnwAvKkehh12BU1Llm9Ykiibg4uIr2rbpxG9WCf56GuvidlTG9KiiQT/TXT1yAWxTA==",
880
+ "cpu": [
881
+ "arm64"
882
+ ],
883
+ "dev": true,
884
+ "license": "MIT",
885
+ "optional": true,
886
+ "os": [
887
+ "linux"
888
+ ]
889
+ },
890
+ "node_modules/@rollup/rollup-linux-loong64-gnu": {
891
+ "version": "4.60.2",
892
+ "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loong64-gnu/-/rollup-linux-loong64-gnu-4.60.2.tgz",
893
+ "integrity": "sha512-pOjB/uSIyDt+ow3k/RcLvUAOGpysT2phDn7TTUB3n75SlIgZzM6NKAqlErPhoFU+npgY3/n+2HYIQVbF70P9/A==",
894
+ "cpu": [
895
+ "loong64"
896
+ ],
897
+ "dev": true,
898
+ "license": "MIT",
899
+ "optional": true,
900
+ "os": [
901
+ "linux"
902
+ ]
903
+ },
904
+ "node_modules/@rollup/rollup-linux-loong64-musl": {
905
+ "version": "4.60.2",
906
+ "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loong64-musl/-/rollup-linux-loong64-musl-4.60.2.tgz",
907
+ "integrity": "sha512-2/w+q8jszv9Ww1c+6uJT3OwqhdmGP2/4T17cu8WuwyUuuaCDDJ2ojdyYwZzCxx0GcsZBhzi3HmH+J5pZNXnd+Q==",
908
+ "cpu": [
909
+ "loong64"
910
+ ],
911
+ "dev": true,
912
+ "license": "MIT",
913
+ "optional": true,
914
+ "os": [
915
+ "linux"
916
+ ]
917
+ },
918
+ "node_modules/@rollup/rollup-linux-ppc64-gnu": {
919
+ "version": "4.60.2",
920
+ "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-gnu/-/rollup-linux-ppc64-gnu-4.60.2.tgz",
921
+ "integrity": "sha512-11+aL5vKheYgczxtPVVRhdptAM2H7fcDR5Gw4/bTcteuZBlH4oP9f5s9zYO9aGZvoGeBpqXI/9TZZihZ609wKw==",
922
+ "cpu": [
923
+ "ppc64"
924
+ ],
925
+ "dev": true,
926
+ "license": "MIT",
927
+ "optional": true,
928
+ "os": [
929
+ "linux"
930
+ ]
931
+ },
932
+ "node_modules/@rollup/rollup-linux-ppc64-musl": {
933
+ "version": "4.60.2",
934
+ "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-musl/-/rollup-linux-ppc64-musl-4.60.2.tgz",
935
+ "integrity": "sha512-i16fokAGK46IVZuV8LIIwMdtqhin9hfYkCh8pf8iC3QU3LpwL+1FSFGej+O7l3E/AoknL6Dclh2oTdnRMpTzFQ==",
936
+ "cpu": [
937
+ "ppc64"
938
+ ],
939
+ "dev": true,
940
+ "license": "MIT",
941
+ "optional": true,
942
+ "os": [
943
+ "linux"
944
+ ]
945
+ },
946
+ "node_modules/@rollup/rollup-linux-riscv64-gnu": {
947
+ "version": "4.60.2",
948
+ "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.60.2.tgz",
949
+ "integrity": "sha512-49FkKS6RGQoriDSK/6E2GkAsAuU5kETFCh7pG4yD/ylj9rKhTmO3elsnmBvRD4PgJPds5W2PkhC82aVwmUcJ7A==",
950
+ "cpu": [
951
+ "riscv64"
952
+ ],
953
+ "dev": true,
954
+ "license": "MIT",
955
+ "optional": true,
956
+ "os": [
957
+ "linux"
958
+ ]
959
+ },
960
+ "node_modules/@rollup/rollup-linux-riscv64-musl": {
961
+ "version": "4.60.2",
962
+ "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-musl/-/rollup-linux-riscv64-musl-4.60.2.tgz",
963
+ "integrity": "sha512-mjYNkHPfGpUR00DuM1ZZIgs64Hpf4bWcz9Z41+4Q+pgDx73UwWdAYyf6EG/lRFldmdHHzgrYyge5akFUW0D3mQ==",
964
+ "cpu": [
965
+ "riscv64"
966
+ ],
967
+ "dev": true,
968
+ "license": "MIT",
969
+ "optional": true,
970
+ "os": [
971
+ "linux"
972
+ ]
973
+ },
974
+ "node_modules/@rollup/rollup-linux-s390x-gnu": {
975
+ "version": "4.60.2",
976
+ "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.60.2.tgz",
977
+ "integrity": "sha512-ALyvJz965BQk8E9Al/JDKKDLH2kfKFLTGMlgkAbbYtZuJt9LU8DW3ZoDMCtQpXAltZxwBHevXz5u+gf0yA0YoA==",
978
+ "cpu": [
979
+ "s390x"
980
+ ],
981
+ "dev": true,
982
+ "license": "MIT",
983
+ "optional": true,
984
+ "os": [
985
+ "linux"
986
+ ]
987
+ },
988
+ "node_modules/@rollup/rollup-linux-x64-gnu": {
989
+ "version": "4.60.2",
990
+ "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.60.2.tgz",
991
+ "integrity": "sha512-UQjrkIdWrKI626Du8lCQ6MJp/6V1LAo2bOK9OTu4mSn8GGXIkPXk/Vsp4bLHCd9Z9Iz2OTEaokUE90VweJgIYQ==",
992
+ "cpu": [
993
+ "x64"
994
+ ],
995
+ "dev": true,
996
+ "license": "MIT",
997
+ "optional": true,
998
+ "os": [
999
+ "linux"
1000
+ ]
1001
+ },
1002
+ "node_modules/@rollup/rollup-linux-x64-musl": {
1003
+ "version": "4.60.2",
1004
+ "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.60.2.tgz",
1005
+ "integrity": "sha512-bTsRGj6VlSdn/XD4CGyzMnzaBs9bsRxy79eTqTCBsA8TMIEky7qg48aPkvJvFe1HyzQ5oMZdg7AnVlWQSKLTnw==",
1006
+ "cpu": [
1007
+ "x64"
1008
+ ],
1009
+ "dev": true,
1010
+ "license": "MIT",
1011
+ "optional": true,
1012
+ "os": [
1013
+ "linux"
1014
+ ]
1015
+ },
1016
+ "node_modules/@rollup/rollup-openbsd-x64": {
1017
+ "version": "4.60.2",
1018
+ "resolved": "https://registry.npmjs.org/@rollup/rollup-openbsd-x64/-/rollup-openbsd-x64-4.60.2.tgz",
1019
+ "integrity": "sha512-6d4Z3534xitaA1FcMWP7mQPq5zGwBmGbhphh2DwaA1aNIXUu3KTOfwrWpbwI4/Gr0uANo7NTtaykFyO2hPuFLg==",
1020
+ "cpu": [
1021
+ "x64"
1022
+ ],
1023
+ "dev": true,
1024
+ "license": "MIT",
1025
+ "optional": true,
1026
+ "os": [
1027
+ "openbsd"
1028
+ ]
1029
+ },
1030
+ "node_modules/@rollup/rollup-openharmony-arm64": {
1031
+ "version": "4.60.2",
1032
+ "resolved": "https://registry.npmjs.org/@rollup/rollup-openharmony-arm64/-/rollup-openharmony-arm64-4.60.2.tgz",
1033
+ "integrity": "sha512-NetAg5iO2uN7eB8zE5qrZ3CSil+7IJt4WDFLcC75Ymywq1VZVD6qJ6EvNLjZ3rEm6gB7XW5JdT60c6MN35Z85Q==",
1034
+ "cpu": [
1035
+ "arm64"
1036
+ ],
1037
+ "dev": true,
1038
+ "license": "MIT",
1039
+ "optional": true,
1040
+ "os": [
1041
+ "openharmony"
1042
+ ]
1043
+ },
1044
+ "node_modules/@rollup/rollup-win32-arm64-msvc": {
1045
+ "version": "4.60.2",
1046
+ "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.60.2.tgz",
1047
+ "integrity": "sha512-NCYhOotpgWZ5kdxCZsv6Iudx0wX8980Q/oW4pNFNihpBKsDbEA1zpkfxJGC0yugsUuyDZ7gL37dbzwhR0VI7pQ==",
1048
+ "cpu": [
1049
+ "arm64"
1050
+ ],
1051
+ "dev": true,
1052
+ "license": "MIT",
1053
+ "optional": true,
1054
+ "os": [
1055
+ "win32"
1056
+ ]
1057
+ },
1058
+ "node_modules/@rollup/rollup-win32-ia32-msvc": {
1059
+ "version": "4.60.2",
1060
+ "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.60.2.tgz",
1061
+ "integrity": "sha512-RXsaOqXxfoUBQoOgvmmijVxJnW2IGB0eoMO7F8FAjaj0UTywUO/luSqimWBJn04WNgUkeNhh7fs7pESXajWmkg==",
1062
+ "cpu": [
1063
+ "ia32"
1064
+ ],
1065
+ "dev": true,
1066
+ "license": "MIT",
1067
+ "optional": true,
1068
+ "os": [
1069
+ "win32"
1070
+ ]
1071
+ },
1072
+ "node_modules/@rollup/rollup-win32-x64-gnu": {
1073
+ "version": "4.60.2",
1074
+ "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-gnu/-/rollup-win32-x64-gnu-4.60.2.tgz",
1075
+ "integrity": "sha512-qdAzEULD+/hzObedtmV6iBpdL5TIbKVztGiK7O3/KYSf+HIzU257+MX1EXJcyIiDbMAqmbwaufcYPvyRryeZtA==",
1076
+ "cpu": [
1077
+ "x64"
1078
+ ],
1079
+ "dev": true,
1080
+ "license": "MIT",
1081
+ "optional": true,
1082
+ "os": [
1083
+ "win32"
1084
+ ]
1085
+ },
1086
+ "node_modules/@rollup/rollup-win32-x64-msvc": {
1087
+ "version": "4.60.2",
1088
+ "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.60.2.tgz",
1089
+ "integrity": "sha512-Nd/SgG27WoA9e+/TdK74KnHz852TLa94ovOYySo/yMPuTmpckK/jIF2jSwS3g7ELSKXK13/cVdmg1Z/DaCWKxA==",
1090
+ "cpu": [
1091
+ "x64"
1092
+ ],
1093
+ "dev": true,
1094
+ "license": "MIT",
1095
+ "optional": true,
1096
+ "os": [
1097
+ "win32"
1098
+ ]
1099
+ },
1100
+ "node_modules/@types/babel__core": {
1101
+ "version": "7.20.5",
1102
+ "resolved": "https://registry.npmjs.org/@types/babel__core/-/babel__core-7.20.5.tgz",
1103
+ "integrity": "sha512-qoQprZvz5wQFJwMDqeseRXWv3rqMvhgpbXFfVyWhbx9X47POIA6i/+dXefEmZKoAgOaTdaIgNSMqMIU61yRyzA==",
1104
+ "dev": true,
1105
+ "license": "MIT",
1106
+ "dependencies": {
1107
+ "@babel/parser": "^7.20.7",
1108
+ "@babel/types": "^7.20.7",
1109
+ "@types/babel__generator": "*",
1110
+ "@types/babel__template": "*",
1111
+ "@types/babel__traverse": "*"
1112
+ }
1113
+ },
1114
+ "node_modules/@types/babel__generator": {
1115
+ "version": "7.27.0",
1116
+ "resolved": "https://registry.npmjs.org/@types/babel__generator/-/babel__generator-7.27.0.tgz",
1117
+ "integrity": "sha512-ufFd2Xi92OAVPYsy+P4n7/U7e68fex0+Ee8gSG9KX7eo084CWiQ4sdxktvdl0bOPupXtVJPY19zk6EwWqUQ8lg==",
1118
+ "dev": true,
1119
+ "license": "MIT",
1120
+ "dependencies": {
1121
+ "@babel/types": "^7.0.0"
1122
+ }
1123
+ },
1124
+ "node_modules/@types/babel__template": {
1125
+ "version": "7.4.4",
1126
+ "resolved": "https://registry.npmjs.org/@types/babel__template/-/babel__template-7.4.4.tgz",
1127
+ "integrity": "sha512-h/NUaSyG5EyxBIp8YRxo4RMe2/qQgvyowRwVMzhYhBCONbW8PUsg4lkFMrhgZhUe5z3L3MiLDuvyJ/CaPa2A8A==",
1128
+ "dev": true,
1129
+ "license": "MIT",
1130
+ "dependencies": {
1131
+ "@babel/parser": "^7.1.0",
1132
+ "@babel/types": "^7.0.0"
1133
+ }
1134
+ },
1135
+ "node_modules/@types/babel__traverse": {
1136
+ "version": "7.28.0",
1137
+ "resolved": "https://registry.npmjs.org/@types/babel__traverse/-/babel__traverse-7.28.0.tgz",
1138
+ "integrity": "sha512-8PvcXf70gTDZBgt9ptxJ8elBeBjcLOAcOtoO/mPJjtji1+CdGbHgm77om1GrsPxsiE+uXIpNSK64UYaIwQXd4Q==",
1139
+ "dev": true,
1140
+ "license": "MIT",
1141
+ "dependencies": {
1142
+ "@babel/types": "^7.28.2"
1143
+ }
1144
+ },
1145
+ "node_modules/@types/estree": {
1146
+ "version": "1.0.8",
1147
+ "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.8.tgz",
1148
+ "integrity": "sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w==",
1149
+ "dev": true,
1150
+ "license": "MIT"
1151
+ },
1152
+ "node_modules/@vitejs/plugin-react": {
1153
+ "version": "4.7.0",
1154
+ "resolved": "https://registry.npmjs.org/@vitejs/plugin-react/-/plugin-react-4.7.0.tgz",
1155
+ "integrity": "sha512-gUu9hwfWvvEDBBmgtAowQCojwZmJ5mcLn3aufeCsitijs3+f2NsrPtlAWIR6OPiqljl96GVCUbLe0HyqIpVaoA==",
1156
+ "dev": true,
1157
+ "license": "MIT",
1158
+ "dependencies": {
1159
+ "@babel/core": "^7.28.0",
1160
+ "@babel/plugin-transform-react-jsx-self": "^7.27.1",
1161
+ "@babel/plugin-transform-react-jsx-source": "^7.27.1",
1162
+ "@rolldown/pluginutils": "1.0.0-beta.27",
1163
+ "@types/babel__core": "^7.20.5",
1164
+ "react-refresh": "^0.17.0"
1165
+ },
1166
+ "engines": {
1167
+ "node": "^14.18.0 || >=16.0.0"
1168
+ },
1169
+ "peerDependencies": {
1170
+ "vite": "^4.2.0 || ^5.0.0 || ^6.0.0 || ^7.0.0"
1171
+ }
1172
+ },
1173
+ "node_modules/baseline-browser-mapping": {
1174
+ "version": "2.10.21",
1175
+ "resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.10.21.tgz",
1176
+ "integrity": "sha512-Q+rUQ7Uz8AHM7DEaNdwvfFCTq7a43lNTzuS94eiWqwyxfV/wJv+oUivef51T91mmRY4d4A1u9rcSvkeufCVXlA==",
1177
+ "dev": true,
1178
+ "license": "Apache-2.0",
1179
+ "bin": {
1180
+ "baseline-browser-mapping": "dist/cli.cjs"
1181
+ },
1182
+ "engines": {
1183
+ "node": ">=6.0.0"
1184
+ }
1185
+ },
1186
+ "node_modules/browserslist": {
1187
+ "version": "4.28.2",
1188
+ "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.28.2.tgz",
1189
+ "integrity": "sha512-48xSriZYYg+8qXna9kwqjIVzuQxi+KYWp2+5nCYnYKPTr0LvD89Jqk2Or5ogxz0NUMfIjhh2lIUX/LyX9B4oIg==",
1190
+ "dev": true,
1191
+ "funding": [
1192
+ {
1193
+ "type": "opencollective",
1194
+ "url": "https://opencollective.com/browserslist"
1195
+ },
1196
+ {
1197
+ "type": "tidelift",
1198
+ "url": "https://tidelift.com/funding/github/npm/browserslist"
1199
+ },
1200
+ {
1201
+ "type": "github",
1202
+ "url": "https://github.com/sponsors/ai"
1203
+ }
1204
+ ],
1205
+ "license": "MIT",
1206
+ "peer": true,
1207
+ "dependencies": {
1208
+ "baseline-browser-mapping": "^2.10.12",
1209
+ "caniuse-lite": "^1.0.30001782",
1210
+ "electron-to-chromium": "^1.5.328",
1211
+ "node-releases": "^2.0.36",
1212
+ "update-browserslist-db": "^1.2.3"
1213
+ },
1214
+ "bin": {
1215
+ "browserslist": "cli.js"
1216
+ },
1217
+ "engines": {
1218
+ "node": "^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7"
1219
+ }
1220
+ },
1221
+ "node_modules/caniuse-lite": {
1222
+ "version": "1.0.30001790",
1223
+ "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001790.tgz",
1224
+ "integrity": "sha512-bOoxfJPyYo+ds6W0YfptaCWbFnJYjh2Y1Eow5lRv+vI2u8ganPZqNm1JwNh0t2ELQCqIWg4B3dWEusgAmsoyOw==",
1225
+ "dev": true,
1226
+ "funding": [
1227
+ {
1228
+ "type": "opencollective",
1229
+ "url": "https://opencollective.com/browserslist"
1230
+ },
1231
+ {
1232
+ "type": "tidelift",
1233
+ "url": "https://tidelift.com/funding/github/npm/caniuse-lite"
1234
+ },
1235
+ {
1236
+ "type": "github",
1237
+ "url": "https://github.com/sponsors/ai"
1238
+ }
1239
+ ],
1240
+ "license": "CC-BY-4.0"
1241
+ },
1242
+ "node_modules/convert-source-map": {
1243
+ "version": "2.0.0",
1244
+ "resolved": "https://registry.npmjs.org/convert-source-map/-/convert-source-map-2.0.0.tgz",
1245
+ "integrity": "sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg==",
1246
+ "dev": true,
1247
+ "license": "MIT"
1248
+ },
1249
+ "node_modules/debug": {
1250
+ "version": "4.4.3",
1251
+ "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz",
1252
+ "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==",
1253
+ "dev": true,
1254
+ "license": "MIT",
1255
+ "dependencies": {
1256
+ "ms": "^2.1.3"
1257
+ },
1258
+ "engines": {
1259
+ "node": ">=6.0"
1260
+ },
1261
+ "peerDependenciesMeta": {
1262
+ "supports-color": {
1263
+ "optional": true
1264
+ }
1265
+ }
1266
+ },
1267
+ "node_modules/electron-to-chromium": {
1268
+ "version": "1.5.344",
1269
+ "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.344.tgz",
1270
+ "integrity": "sha512-4MxfbmNDm+KPh066EZy+eUnkcDPcZ35wNmOWzFuh/ijvHsve6kbLTLURy88uCNK5FbpN+yk2nQY6BYh1GEt+wg==",
1271
+ "dev": true,
1272
+ "license": "ISC"
1273
+ },
1274
+ "node_modules/esbuild": {
1275
+ "version": "0.21.5",
1276
+ "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.21.5.tgz",
1277
+ "integrity": "sha512-mg3OPMV4hXywwpoDxu3Qda5xCKQi+vCTZq8S9J/EpkhB2HzKXq4SNFZE3+NK93JYxc8VMSep+lOUSC/RVKaBqw==",
1278
+ "dev": true,
1279
+ "hasInstallScript": true,
1280
+ "license": "MIT",
1281
+ "bin": {
1282
+ "esbuild": "bin/esbuild"
1283
+ },
1284
+ "engines": {
1285
+ "node": ">=12"
1286
+ },
1287
+ "optionalDependencies": {
1288
+ "@esbuild/aix-ppc64": "0.21.5",
1289
+ "@esbuild/android-arm": "0.21.5",
1290
+ "@esbuild/android-arm64": "0.21.5",
1291
+ "@esbuild/android-x64": "0.21.5",
1292
+ "@esbuild/darwin-arm64": "0.21.5",
1293
+ "@esbuild/darwin-x64": "0.21.5",
1294
+ "@esbuild/freebsd-arm64": "0.21.5",
1295
+ "@esbuild/freebsd-x64": "0.21.5",
1296
+ "@esbuild/linux-arm": "0.21.5",
1297
+ "@esbuild/linux-arm64": "0.21.5",
1298
+ "@esbuild/linux-ia32": "0.21.5",
1299
+ "@esbuild/linux-loong64": "0.21.5",
1300
+ "@esbuild/linux-mips64el": "0.21.5",
1301
+ "@esbuild/linux-ppc64": "0.21.5",
1302
+ "@esbuild/linux-riscv64": "0.21.5",
1303
+ "@esbuild/linux-s390x": "0.21.5",
1304
+ "@esbuild/linux-x64": "0.21.5",
1305
+ "@esbuild/netbsd-x64": "0.21.5",
1306
+ "@esbuild/openbsd-x64": "0.21.5",
1307
+ "@esbuild/sunos-x64": "0.21.5",
1308
+ "@esbuild/win32-arm64": "0.21.5",
1309
+ "@esbuild/win32-ia32": "0.21.5",
1310
+ "@esbuild/win32-x64": "0.21.5"
1311
+ }
1312
+ },
1313
+ "node_modules/escalade": {
1314
+ "version": "3.2.0",
1315
+ "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz",
1316
+ "integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==",
1317
+ "dev": true,
1318
+ "license": "MIT",
1319
+ "engines": {
1320
+ "node": ">=6"
1321
+ }
1322
+ },
1323
+ "node_modules/fsevents": {
1324
+ "version": "2.3.3",
1325
+ "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
1326
+ "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==",
1327
+ "dev": true,
1328
+ "hasInstallScript": true,
1329
+ "license": "MIT",
1330
+ "optional": true,
1331
+ "os": [
1332
+ "darwin"
1333
+ ],
1334
+ "engines": {
1335
+ "node": "^8.16.0 || ^10.6.0 || >=11.0.0"
1336
+ }
1337
+ },
1338
+ "node_modules/gensync": {
1339
+ "version": "1.0.0-beta.2",
1340
+ "resolved": "https://registry.npmjs.org/gensync/-/gensync-1.0.0-beta.2.tgz",
1341
+ "integrity": "sha512-3hN7NaskYvMDLQY55gnW3NQ+mesEAepTqlg+VEbj7zzqEMBVNhzcGYYeqFo/TlYz6eQiFcp1HcsCZO+nGgS8zg==",
1342
+ "dev": true,
1343
+ "license": "MIT",
1344
+ "engines": {
1345
+ "node": ">=6.9.0"
1346
+ }
1347
+ },
1348
+ "node_modules/js-tokens": {
1349
+ "version": "4.0.0",
1350
+ "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
1351
+ "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==",
1352
+ "license": "MIT"
1353
+ },
1354
+ "node_modules/jsesc": {
1355
+ "version": "3.1.0",
1356
+ "resolved": "https://registry.npmjs.org/jsesc/-/jsesc-3.1.0.tgz",
1357
+ "integrity": "sha512-/sM3dO2FOzXjKQhJuo0Q173wf2KOo8t4I8vHy6lF9poUp7bKT0/NHE8fPX23PwfhnykfqnC2xRxOnVw5XuGIaA==",
1358
+ "dev": true,
1359
+ "license": "MIT",
1360
+ "bin": {
1361
+ "jsesc": "bin/jsesc"
1362
+ },
1363
+ "engines": {
1364
+ "node": ">=6"
1365
+ }
1366
+ },
1367
+ "node_modules/json5": {
1368
+ "version": "2.2.3",
1369
+ "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.3.tgz",
1370
+ "integrity": "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==",
1371
+ "dev": true,
1372
+ "license": "MIT",
1373
+ "bin": {
1374
+ "json5": "lib/cli.js"
1375
+ },
1376
+ "engines": {
1377
+ "node": ">=6"
1378
+ }
1379
+ },
1380
+ "node_modules/loose-envify": {
1381
+ "version": "1.4.0",
1382
+ "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz",
1383
+ "integrity": "sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==",
1384
+ "license": "MIT",
1385
+ "dependencies": {
1386
+ "js-tokens": "^3.0.0 || ^4.0.0"
1387
+ },
1388
+ "bin": {
1389
+ "loose-envify": "cli.js"
1390
+ }
1391
+ },
1392
+ "node_modules/lru-cache": {
1393
+ "version": "5.1.1",
1394
+ "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-5.1.1.tgz",
1395
+ "integrity": "sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w==",
1396
+ "dev": true,
1397
+ "license": "ISC",
1398
+ "dependencies": {
1399
+ "yallist": "^3.0.2"
1400
+ }
1401
+ },
1402
+ "node_modules/ms": {
1403
+ "version": "2.1.3",
1404
+ "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
1405
+ "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
1406
+ "dev": true,
1407
+ "license": "MIT"
1408
+ },
1409
+ "node_modules/nanoid": {
1410
+ "version": "3.3.11",
1411
+ "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.11.tgz",
1412
+ "integrity": "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w==",
1413
+ "dev": true,
1414
+ "funding": [
1415
+ {
1416
+ "type": "github",
1417
+ "url": "https://github.com/sponsors/ai"
1418
+ }
1419
+ ],
1420
+ "license": "MIT",
1421
+ "bin": {
1422
+ "nanoid": "bin/nanoid.cjs"
1423
+ },
1424
+ "engines": {
1425
+ "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1"
1426
+ }
1427
+ },
1428
+ "node_modules/node-releases": {
1429
+ "version": "2.0.38",
1430
+ "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.38.tgz",
1431
+ "integrity": "sha512-3qT/88Y3FbH/Kx4szpQQ4HzUbVrHPKTLVpVocKiLfoYvw9XSGOX2FmD2d6DrXbVYyAQTF2HeF6My8jmzx7/CRw==",
1432
+ "dev": true,
1433
+ "license": "MIT"
1434
+ },
1435
+ "node_modules/picocolors": {
1436
+ "version": "1.1.1",
1437
+ "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz",
1438
+ "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==",
1439
+ "dev": true,
1440
+ "license": "ISC"
1441
+ },
1442
+ "node_modules/postcss": {
1443
+ "version": "8.5.10",
1444
+ "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.10.tgz",
1445
+ "integrity": "sha512-pMMHxBOZKFU6HgAZ4eyGnwXF/EvPGGqUr0MnZ5+99485wwW41kW91A4LOGxSHhgugZmSChL5AlElNdwlNgcnLQ==",
1446
+ "dev": true,
1447
+ "funding": [
1448
+ {
1449
+ "type": "opencollective",
1450
+ "url": "https://opencollective.com/postcss/"
1451
+ },
1452
+ {
1453
+ "type": "tidelift",
1454
+ "url": "https://tidelift.com/funding/github/npm/postcss"
1455
+ },
1456
+ {
1457
+ "type": "github",
1458
+ "url": "https://github.com/sponsors/ai"
1459
+ }
1460
+ ],
1461
+ "license": "MIT",
1462
+ "dependencies": {
1463
+ "nanoid": "^3.3.11",
1464
+ "picocolors": "^1.1.1",
1465
+ "source-map-js": "^1.2.1"
1466
+ },
1467
+ "engines": {
1468
+ "node": "^10 || ^12 || >=14"
1469
+ }
1470
+ },
1471
+ "node_modules/react": {
1472
+ "version": "18.3.1",
1473
+ "resolved": "https://registry.npmjs.org/react/-/react-18.3.1.tgz",
1474
+ "integrity": "sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ==",
1475
+ "license": "MIT",
1476
+ "peer": true,
1477
+ "dependencies": {
1478
+ "loose-envify": "^1.1.0"
1479
+ },
1480
+ "engines": {
1481
+ "node": ">=0.10.0"
1482
+ }
1483
+ },
1484
+ "node_modules/react-dom": {
1485
+ "version": "18.3.1",
1486
+ "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-18.3.1.tgz",
1487
+ "integrity": "sha512-5m4nQKp+rZRb09LNH59GM4BxTh9251/ylbKIbpe7TpGxfJ+9kv6BLkLBXIjjspbgbnIBNqlI23tRnTWT0snUIw==",
1488
+ "license": "MIT",
1489
+ "dependencies": {
1490
+ "loose-envify": "^1.1.0",
1491
+ "scheduler": "^0.23.2"
1492
+ },
1493
+ "peerDependencies": {
1494
+ "react": "^18.3.1"
1495
+ }
1496
+ },
1497
+ "node_modules/react-refresh": {
1498
+ "version": "0.17.0",
1499
+ "resolved": "https://registry.npmjs.org/react-refresh/-/react-refresh-0.17.0.tgz",
1500
+ "integrity": "sha512-z6F7K9bV85EfseRCp2bzrpyQ0Gkw1uLoCel9XBVWPg/TjRj94SkJzUTGfOa4bs7iJvBWtQG0Wq7wnI0syw3EBQ==",
1501
+ "dev": true,
1502
+ "license": "MIT",
1503
+ "engines": {
1504
+ "node": ">=0.10.0"
1505
+ }
1506
+ },
1507
+ "node_modules/rollup": {
1508
+ "version": "4.60.2",
1509
+ "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.60.2.tgz",
1510
+ "integrity": "sha512-J9qZyW++QK/09NyN/zeO0dG/1GdGfyp9lV8ajHnRVLfo/uFsbji5mHnDgn/qYdUHyCkM2N+8VyspgZclfAh0eQ==",
1511
+ "dev": true,
1512
+ "license": "MIT",
1513
+ "dependencies": {
1514
+ "@types/estree": "1.0.8"
1515
+ },
1516
+ "bin": {
1517
+ "rollup": "dist/bin/rollup"
1518
+ },
1519
+ "engines": {
1520
+ "node": ">=18.0.0",
1521
+ "npm": ">=8.0.0"
1522
+ },
1523
+ "optionalDependencies": {
1524
+ "@rollup/rollup-android-arm-eabi": "4.60.2",
1525
+ "@rollup/rollup-android-arm64": "4.60.2",
1526
+ "@rollup/rollup-darwin-arm64": "4.60.2",
1527
+ "@rollup/rollup-darwin-x64": "4.60.2",
1528
+ "@rollup/rollup-freebsd-arm64": "4.60.2",
1529
+ "@rollup/rollup-freebsd-x64": "4.60.2",
1530
+ "@rollup/rollup-linux-arm-gnueabihf": "4.60.2",
1531
+ "@rollup/rollup-linux-arm-musleabihf": "4.60.2",
1532
+ "@rollup/rollup-linux-arm64-gnu": "4.60.2",
1533
+ "@rollup/rollup-linux-arm64-musl": "4.60.2",
1534
+ "@rollup/rollup-linux-loong64-gnu": "4.60.2",
1535
+ "@rollup/rollup-linux-loong64-musl": "4.60.2",
1536
+ "@rollup/rollup-linux-ppc64-gnu": "4.60.2",
1537
+ "@rollup/rollup-linux-ppc64-musl": "4.60.2",
1538
+ "@rollup/rollup-linux-riscv64-gnu": "4.60.2",
1539
+ "@rollup/rollup-linux-riscv64-musl": "4.60.2",
1540
+ "@rollup/rollup-linux-s390x-gnu": "4.60.2",
1541
+ "@rollup/rollup-linux-x64-gnu": "4.60.2",
1542
+ "@rollup/rollup-linux-x64-musl": "4.60.2",
1543
+ "@rollup/rollup-openbsd-x64": "4.60.2",
1544
+ "@rollup/rollup-openharmony-arm64": "4.60.2",
1545
+ "@rollup/rollup-win32-arm64-msvc": "4.60.2",
1546
+ "@rollup/rollup-win32-ia32-msvc": "4.60.2",
1547
+ "@rollup/rollup-win32-x64-gnu": "4.60.2",
1548
+ "@rollup/rollup-win32-x64-msvc": "4.60.2",
1549
+ "fsevents": "~2.3.2"
1550
+ }
1551
+ },
1552
+ "node_modules/scheduler": {
1553
+ "version": "0.23.2",
1554
+ "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.23.2.tgz",
1555
+ "integrity": "sha512-UOShsPwz7NrMUqhR6t0hWjFduvOzbtv7toDH1/hIrfRNIDBnnBWd0CwJTGvTpngVlmwGCdP9/Zl/tVrDqcuYzQ==",
1556
+ "license": "MIT",
1557
+ "dependencies": {
1558
+ "loose-envify": "^1.1.0"
1559
+ }
1560
+ },
1561
+ "node_modules/semver": {
1562
+ "version": "6.3.1",
1563
+ "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz",
1564
+ "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==",
1565
+ "dev": true,
1566
+ "license": "ISC",
1567
+ "bin": {
1568
+ "semver": "bin/semver.js"
1569
+ }
1570
+ },
1571
+ "node_modules/source-map-js": {
1572
+ "version": "1.2.1",
1573
+ "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz",
1574
+ "integrity": "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==",
1575
+ "dev": true,
1576
+ "license": "BSD-3-Clause",
1577
+ "engines": {
1578
+ "node": ">=0.10.0"
1579
+ }
1580
+ },
1581
+ "node_modules/update-browserslist-db": {
1582
+ "version": "1.2.3",
1583
+ "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.2.3.tgz",
1584
+ "integrity": "sha512-Js0m9cx+qOgDxo0eMiFGEueWztz+d4+M3rGlmKPT+T4IS/jP4ylw3Nwpu6cpTTP8R1MAC1kF4VbdLt3ARf209w==",
1585
+ "dev": true,
1586
+ "funding": [
1587
+ {
1588
+ "type": "opencollective",
1589
+ "url": "https://opencollective.com/browserslist"
1590
+ },
1591
+ {
1592
+ "type": "tidelift",
1593
+ "url": "https://tidelift.com/funding/github/npm/browserslist"
1594
+ },
1595
+ {
1596
+ "type": "github",
1597
+ "url": "https://github.com/sponsors/ai"
1598
+ }
1599
+ ],
1600
+ "license": "MIT",
1601
+ "dependencies": {
1602
+ "escalade": "^3.2.0",
1603
+ "picocolors": "^1.1.1"
1604
+ },
1605
+ "bin": {
1606
+ "update-browserslist-db": "cli.js"
1607
+ },
1608
+ "peerDependencies": {
1609
+ "browserslist": ">= 4.21.0"
1610
+ }
1611
+ },
1612
+ "node_modules/vite": {
1613
+ "version": "5.4.21",
1614
+ "resolved": "https://registry.npmjs.org/vite/-/vite-5.4.21.tgz",
1615
+ "integrity": "sha512-o5a9xKjbtuhY6Bi5S3+HvbRERmouabWbyUcpXXUA1u+GNUKoROi9byOJ8M0nHbHYHkYICiMlqxkg1KkYmm25Sw==",
1616
+ "dev": true,
1617
+ "license": "MIT",
1618
+ "peer": true,
1619
+ "dependencies": {
1620
+ "esbuild": "^0.21.3",
1621
+ "postcss": "^8.4.43",
1622
+ "rollup": "^4.20.0"
1623
+ },
1624
+ "bin": {
1625
+ "vite": "bin/vite.js"
1626
+ },
1627
+ "engines": {
1628
+ "node": "^18.0.0 || >=20.0.0"
1629
+ },
1630
+ "funding": {
1631
+ "url": "https://github.com/vitejs/vite?sponsor=1"
1632
+ },
1633
+ "optionalDependencies": {
1634
+ "fsevents": "~2.3.3"
1635
+ },
1636
+ "peerDependencies": {
1637
+ "@types/node": "^18.0.0 || >=20.0.0",
1638
+ "less": "*",
1639
+ "lightningcss": "^1.21.0",
1640
+ "sass": "*",
1641
+ "sass-embedded": "*",
1642
+ "stylus": "*",
1643
+ "sugarss": "*",
1644
+ "terser": "^5.4.0"
1645
+ },
1646
+ "peerDependenciesMeta": {
1647
+ "@types/node": {
1648
+ "optional": true
1649
+ },
1650
+ "less": {
1651
+ "optional": true
1652
+ },
1653
+ "lightningcss": {
1654
+ "optional": true
1655
+ },
1656
+ "sass": {
1657
+ "optional": true
1658
+ },
1659
+ "sass-embedded": {
1660
+ "optional": true
1661
+ },
1662
+ "stylus": {
1663
+ "optional": true
1664
+ },
1665
+ "sugarss": {
1666
+ "optional": true
1667
+ },
1668
+ "terser": {
1669
+ "optional": true
1670
+ }
1671
+ }
1672
+ },
1673
+ "node_modules/yallist": {
1674
+ "version": "3.1.1",
1675
+ "resolved": "https://registry.npmjs.org/yallist/-/yallist-3.1.1.tgz",
1676
+ "integrity": "sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g==",
1677
+ "dev": true,
1678
+ "license": "ISC"
1679
+ }
1680
+ }
1681
+ }
frontend/package.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "neuraledge-boardroom",
3
+ "version": "1.0.0",
4
+ "private": true,
5
+ "type": "module",
6
+ "scripts": {
7
+ "dev": "vite",
8
+ "build": "vite build",
9
+ "preview": "vite preview"
10
+ },
11
+ "dependencies": {
12
+ "react": "^18.3.1",
13
+ "react-dom": "^18.3.1"
14
+ },
15
+ "devDependencies": {
16
+ "@vitejs/plugin-react": "^4.3.1",
17
+ "vite": "^5.4.10"
18
+ }
19
+ }
frontend/src/App.jsx ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useEffect, useState } from 'react'
2
+ import { useGameStore } from './hooks/useGameStore.js'
3
+ import { useAgentLoop, greedyPick, buildPitch } from './hooks/useAgentLoop.js'
4
+
5
+ import TopBar from './components/TopBar.jsx'
6
+ import PlaybackControls from './components/PlaybackControls.jsx'
7
+ import MetricsPanel from './components/MetricsPanel.jsx'
8
+ import TrustPanel from './components/TrustPanel.jsx'
9
+ import EventBanner from './components/EventBanner.jsx'
10
+ import NPCGrid from './components/NPCGrid.jsx'
11
+ import AgentDecision from './components/AgentDecision.jsx'
12
+ import VoteTally from './components/VoteTally.jsx'
13
+ import HistoryTimeline from './components/HistoryTimeline.jsx'
14
+ import EndScreen from './components/EndScreen.jsx'
15
+
16
+ export default function App() {
17
+ const { state, resetGame, stepGame, setSpeed, setPaused } = useGameStore()
18
+ const { obs, prevObs, done, loading, error, lastReward, lastInfo, speed, paused } = state
19
+
20
+ const [toast, setToast] = useState(null)
21
+
22
+ // Show error toast
23
+ useEffect(() => {
24
+ if (error) {
25
+ setToast(error)
26
+ const t = setTimeout(() => setToast(null), 5000)
27
+ return () => clearTimeout(t)
28
+ }
29
+ }, [error])
30
+
31
+ // Boot
32
+ useEffect(() => { resetGame(42) }, [resetGame])
33
+
34
+ // Wire agent loop
35
+ useAgentLoop(state, stepGame)
36
+
37
+ const handleRun = () => setPaused(false)
38
+ const handlePause = () => setPaused(true)
39
+ const handleReset = () => { resetGame(Math.floor(Math.random() * 9999)) }
40
+ const handleReplay = () => { resetGame(Math.floor(Math.random() * 9999)) }
41
+
42
+ const handleStep = async () => {
43
+ if (!obs || loading || done) return
44
+ const decision = greedyPick(obs)
45
+ const pitch = buildPitch(obs, decision)
46
+ if (decision) await stepGame(decision, pitch)
47
+ }
48
+
49
+ const round = obs?.round ?? 0
50
+ const curState = obs?.state
51
+ const prevState = prevObs?.state
52
+
53
+ return (
54
+ <div className="app-shell">
55
+ <TopBar obs={obs} round={round} />
56
+ <PlaybackControls
57
+ paused={paused}
58
+ loading={loading}
59
+ done={done}
60
+ obs={obs}
61
+ speed={speed}
62
+ onRun={handleRun}
63
+ onPause={handlePause}
64
+ onStep={handleStep}
65
+ onReset={handleReset}
66
+ onSpeedChange={setSpeed}
67
+ />
68
+
69
+ {/* Metrics strip at top — always visible */}
70
+ {curState && (
71
+ <MetricsPanel state={curState} prevState={prevState} />
72
+ )}
73
+
74
+ <div className="main-grid">
75
+ {/* Left — Trust + History */}
76
+ <div className="col-left">
77
+ <TrustPanel trust={curState?.trust} prevTrust={prevState?.trust} />
78
+ <HistoryTimeline history={curState?.history} />
79
+ </div>
80
+
81
+ {/* Centre — Event + NPCs + Agent Decision */}
82
+ <div className="col-center">
83
+ <EventBanner event={obs?.event} round={round} />
84
+ <NPCGrid npcStatements={obs?.npc_statements} />
85
+ <AgentDecision obs={obs} loading={loading} lastInfo={lastInfo} />
86
+ </div>
87
+
88
+ {/* Right — Vote Tally */}
89
+ <div className="col-right">
90
+ {lastInfo?.winning_vote_tally && <VoteTally info={lastInfo} />}
91
+ {!lastInfo && (
92
+ <div className="card">
93
+ <div className="section-label">Vote Tally</div>
94
+ <div className="card-body" style={{ fontSize: '0.65rem', color: 'var(--text-muted)', textAlign: 'center', padding: '1.25rem 1rem' }}>
95
+ // vote tally appears after first decision.
96
+ </div>
97
+ </div>
98
+ )}
99
+ </div>
100
+ </div>
101
+
102
+ {done && obs && <EndScreen obs={obs} onReplay={handleReplay} />}
103
+
104
+ {toast && (
105
+ <div className="toast">
106
+ ⚠ {toast}
107
+ </div>
108
+ )}
109
+ </div>
110
+ )
111
+ }
frontend/src/components/AgentDecision.jsx ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Terminal-style option labels — no emojis, use ASCII prefix chars
2
+ const OPTION_PREFIX = {
3
+ slash_prices: '>>', differentiate: '>>', acquire_startup: '>>',
4
+ accept_deal: '>>', negotiate_terms: '>>', reject_deal: '>>',
5
+ match_offers: '>>', partial_match: '>>', let_them_leave: '>>',
6
+ full_compliance: '>>', partial_compliance: '>>', exit_EU_market: '>>',
7
+ public_apology: '>>', legal_action: '>>', rebrand: '>>',
8
+ accept_acquisition: '>>', counter_offer: '>>', reject_and_raise: '>>',
9
+ accept_terms: '>>', negotiate: '>>', bootstrap: '>>',
10
+ pivot_product: '>>', license_technology: '>>', keep_internal: '>>',
11
+ full_transparency: '>>', damage_control: '>>', internal_investigation: '>>',
12
+ ipo: '>>', acquisition: '>>', stay_private: '>>',
13
+ }
14
+
15
+ export default function AgentDecision({ obs, loading, lastInfo }) {
16
+ if (!obs) return null
17
+
18
+ const winningDecision = obs.state?.winning_decision ?? null
19
+ const aiDecision = lastInfo?.winning_decision ?? winningDecision
20
+ const options = obs.options ?? []
21
+
22
+ const history = obs.state?.history ?? []
23
+ const lastEntry = history[history.length - 1]
24
+
25
+ if (loading && !winningDecision) {
26
+ return (
27
+ <div className="card">
28
+ <div className="section-label">Agent Decision</div>
29
+ <div className="ai-thinking">
30
+ sarah_chen --deliberate
31
+ <div className="thinking-dots">
32
+ <span /><span /><span />
33
+ </div>
34
+ </div>
35
+ </div>
36
+ )
37
+ }
38
+
39
+ return (
40
+ <div className="card">
41
+ <div className="section-label">Agent Decision</div>
42
+ <div className="agent-decision-panel">
43
+ <div className="decision-options-grid">
44
+ {options.map((opt) => {
45
+ const isAiPick = opt === aiDecision
46
+ const isWinner = opt === winningDecision
47
+ const isMatch = aiDecision === winningDecision
48
+ let cls = 'decision-option'
49
+ if (isAiPick) cls += ' ai-pick'
50
+ if (isWinner && winningDecision)
51
+ cls += ` board-winner ${isMatch ? 'board-match' : 'board-mismatch'}`
52
+ return (
53
+ <div key={opt} className={cls}>
54
+ <div className="opt-label">
55
+ {opt.replace(/_/g, '_')}
56
+ </div>
57
+ </div>
58
+ )
59
+ })}
60
+ </div>
61
+
62
+ {winningDecision && aiDecision && aiDecision !== winningDecision && (
63
+ <div style={{
64
+ fontSize: '0.65rem', color: 'var(--error)',
65
+ fontFamily: 'var(--font-mono)', padding: '0.375rem 0',
66
+ textTransform: 'uppercase', letterSpacing: '0.04em'
67
+ }}>
68
+ [WARN] AI outvoted → board chose: {winningDecision.replace(/_/g, '_')}
69
+ </div>
70
+ )}
71
+
72
+ {lastEntry && (
73
+ <div className="coalition-pitch-block">
74
+ <div className="pitch-header">Coalition Pitch Log</div>
75
+ <div className={`pitch-text ${lastEntry.pitch_used ? '' : 'empty'}`}>
76
+ {lastEntry.pitch_used
77
+ ? `targeting [${Object.entries(lastEntry.pitch_scores ?? {})
78
+ .filter(([, v]) => v > 0)
79
+ .map(([r]) => r)
80
+ .join(', ')}] — keyword-optimised pitch sent.`
81
+ : 'no pitch sent this round.'}
82
+ </div>
83
+ </div>
84
+ )}
85
+ </div>
86
+ </div>
87
+ )
88
+ }
frontend/src/components/EndScreen.jsx ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ const formatMoney = (n) =>
2
+ n >= 1e6 ? `$${(n / 1e6).toFixed(2)}M`
3
+ : n >= 1e3 ? `$${(n / 1e3).toFixed(1)}K`
4
+ : `$${n?.toFixed(0) ?? 0}`
5
+
6
+ const OUTCOME_MAP = {
7
+ ipo: { ascii: '[IPO]', title: 'IPO_SUCCESS', cls: 'ipo' },
8
+ acquisition: { ascii: '[ACQ]', title: 'ACQUIRED', cls: 'acquisition' },
9
+ runway_exhausted: { ascii: '[DEAD]', title: 'BANKRUPTCY', cls: 'bankruptcy' },
10
+ finished_10: { ascii: '[DONE]', title: 'EPISODE_COMPLETE', cls: 'default' },
11
+ }
12
+
13
+ const DIVIDER = '================================================'
14
+
15
+ export default function EndScreen({ obs, onReplay }) {
16
+ if (!obs) return null
17
+ const { state } = obs
18
+ const reason = state?.done_reason ?? 'finished_10'
19
+ const { ascii, title, cls } = OUTCOME_MAP[reason] ?? OUTCOME_MAP['finished_10']
20
+ const history = state?.history ?? []
21
+ const roundsWon = history.filter(h => h.agent_won_vote).length
22
+
23
+ return (
24
+ <div className="end-overlay">
25
+ <div className="end-modal">
26
+ {/* ASCII banner */}
27
+ <div className="end-icon" style={{ fontSize: '1.5rem', fontWeight: 700, letterSpacing: '0.1em' }}>
28
+ {ascii}
29
+ </div>
30
+ <div className="end-title">{title}</div>
31
+ <span className={`end-reason ${cls}`}>{reason.replace(/_/g, '_')}</span>
32
+
33
+ <div style={{ color: 'var(--muted)', fontSize: '0.6rem', marginBottom: '0.875rem', letterSpacing: '0.05em' }}>
34
+ {DIVIDER}
35
+ </div>
36
+
37
+ <div className="end-stats">
38
+ <div className="end-stat">
39
+ <div className="es-label">PROFIT_SCORE</div>
40
+ <div className="es-value" style={{ color: 'var(--primary)', textShadow: 'var(--glow)' }}>
41
+ {(state?.profitability_score ?? 0).toFixed(1)}
42
+ </div>
43
+ </div>
44
+ <div className="end-stat">
45
+ <div className="es-label">REVENUE</div>
46
+ <div className="es-value">{formatMoney(state?.revenue ?? 0)}</div>
47
+ </div>
48
+ <div className="end-stat">
49
+ <div className="es-label">RUNWAY</div>
50
+ <div className="es-value">{(state?.runway_months ?? 0).toFixed(1)}mo</div>
51
+ </div>
52
+ <div className="end-stat">
53
+ <div className="es-label">ROUNDS_WON</div>
54
+ <div className="es-value" style={{ color: 'var(--primary)', textShadow: 'var(--glow)' }}>
55
+ {roundsWon}/{history.length}
56
+ </div>
57
+ </div>
58
+ <div className="end-stat">
59
+ <div className="es-label">MORALE</div>
60
+ <div className="es-value">{Math.round((state?.team_morale ?? 0) * 100)}%</div>
61
+ </div>
62
+ <div className="end-stat">
63
+ <div className="es-label">REG_RISK</div>
64
+ <div className="es-value">{Math.round((state?.regulatory_risk ?? 0) * 100)}%</div>
65
+ </div>
66
+ </div>
67
+
68
+ {history.length > 0 && (
69
+ <div style={{ marginBottom: '1rem', maxHeight: '180px', overflowY: 'auto' }}>
70
+ <div style={{
71
+ fontSize: '0.58rem', textTransform: 'uppercase', letterSpacing: '0.1em',
72
+ color: 'var(--text-secondary)', marginBottom: '0.4rem'
73
+ }}>
74
+ // round_log
75
+ </div>
76
+ {history.map((h) => (
77
+ <div key={h.round} style={{
78
+ display: 'flex', justifyContent: 'space-between',
79
+ fontSize: '0.65rem', color: 'var(--text-secondary)',
80
+ padding: '0.2rem 0',
81
+ borderBottom: '1px solid var(--border-dim)'
82
+ }}>
83
+ <span style={{ color: 'var(--secondary)', textShadow: 'var(--amber-glow)', minWidth: '28px' }}>
84
+ R{String(h.round).padStart(2,'0')}
85
+ </span>
86
+ <span style={{
87
+ flex: 1, marginLeft: '0.6rem',
88
+ overflow: 'hidden', textOverflow: 'ellipsis', whiteSpace: 'nowrap',
89
+ textTransform: 'uppercase', letterSpacing: '0.03em',
90
+ fontSize: '0.6rem'
91
+ }}>
92
+ {(h.event_title ?? '').split('—').slice(-1)[0]?.trim()}
93
+ </span>
94
+ <span style={{
95
+ marginLeft: '0.5rem', flexShrink: 0, fontSize: '0.6rem',
96
+ color: h.agent_won_vote ? 'var(--primary)' : 'var(--error)',
97
+ textShadow: h.agent_won_vote ? 'var(--glow-sm)' : 'none'
98
+ }}>
99
+ {h.agent_won_vote ? '[OK]' : '[X]'} {(h.agent_decision ?? '').replace(/_/g, '_')}
100
+ </span>
101
+ </div>
102
+ ))}
103
+ </div>
104
+ )}
105
+
106
+ <button className="replay-btn" onClick={onReplay}>
107
+ ↺ RUN_NEW_EPISODE
108
+ </button>
109
+ </div>
110
+ </div>
111
+ )
112
+ }
frontend/src/components/EventBanner.jsx ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ export default function EventBanner({ event, round }) {
2
+ if (!event) {
3
+ return (
4
+ <div className="event-banner">
5
+ <div className="event-tag">BOARD_CRISIS</div>
6
+ <div className="event-title">Awaiting scenario...</div>
7
+ <div className="event-desc">$ run_agent --start # click RUN_AGENT to begin</div>
8
+ </div>
9
+ )
10
+ }
11
+
12
+ const [titlePart, ...rest] = event.split('\n')
13
+ const desc = rest.join(' ').replace(/^Description:\s*/i, '').trim() || event
14
+
15
+ return (
16
+ <div className="event-banner">
17
+ <div className="event-tag">
18
+ RND_{String(round).padStart(2,'0')} / BOARD_CRISIS
19
+ </div>
20
+ <div className="event-title">{titlePart.toUpperCase()}</div>
21
+ {desc && desc !== titlePart && (
22
+ <div className="event-desc">{desc}</div>
23
+ )}
24
+ </div>
25
+ )
26
+ }
frontend/src/components/HistoryTimeline.jsx ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ export default function HistoryTimeline({ history }) {
2
+ return (
3
+ <div className="card">
4
+ <div className="section-label">Decision History</div>
5
+ {!history?.length ? (
6
+ <div className="history-empty">// no rounds completed yet.</div>
7
+ ) : (
8
+ <div className="history-list">
9
+ {history.map((entry) => {
10
+ const aiWon = entry.agent_won_vote
11
+ const reward = entry.reward ?? ((entry.score_after ?? 0) - 0)
12
+ const rewardNum = typeof reward === 'number' ? reward : 0
13
+ return (
14
+ <div key={entry.round} className="history-item">
15
+ <span className="h-round">R{String(entry.round).padStart(2,'0')}</span>
16
+ <div className="h-info">
17
+ <div className="h-event">
18
+ {(entry.event_title ?? '').split('—').slice(-1)[0]?.trim() ?? entry.event_title}
19
+ </div>
20
+ <div className="h-picks">
21
+ <span className="h-ai-pick">
22
+ &gt;{(entry.agent_decision ?? '').replace(/_/g, '_')}
23
+ </span>
24
+ {!aiWon && (
25
+ <>
26
+ <span style={{ color: 'var(--muted)' }}>→</span>
27
+ <span className="h-win-pick">
28
+ {(entry.winning_decision ?? '').replace(/_/g, '_')}
29
+ </span>
30
+ <span className="h-mismatch">[X]</span>
31
+ </>
32
+ )}
33
+ {aiWon && (
34
+ <span style={{ color: 'var(--primary)', fontSize: '0.55rem', textShadow: 'var(--glow-sm)' }}>
35
+ &nbsp;[OK]
36
+ </span>
37
+ )}
38
+ </div>
39
+ </div>
40
+ <span className={`h-reward ${rewardNum >= 0 ? 'pos' : 'neg'}`}>
41
+ {rewardNum >= 0 ? '+' : ''}{rewardNum.toFixed(2)}
42
+ </span>
43
+ </div>
44
+ )
45
+ })}
46
+ </div>
47
+ )}
48
+ </div>
49
+ )
50
+ }
frontend/src/components/MetricsPanel.jsx ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ const formatMoney = (n) =>
2
+ n >= 1e6 ? `$${(n / 1e6).toFixed(2)}M`
3
+ : n >= 1e3 ? `$${(n / 1e3).toFixed(1)}K`
4
+ : `$${Math.abs(n).toFixed(0)}`
5
+
6
+ const formatPct = (v) => `${(v * 100).toFixed(0)}%`
7
+ const fmtDelta = (key, d) => {
8
+ if (key === 'revenue' || key === 'burn_rate') return formatMoney(Math.abs(d))
9
+ if (key === 'runway_months') return `${Math.abs(d).toFixed(1)}mo`
10
+ if (key === 'profitability_score') return Math.abs(d).toFixed(1)
11
+ return formatPct(Math.abs(d))
12
+ }
13
+
14
+ // ASCII progress bar — no chart.js, pure terminal
15
+ function AsciiBar({ value, max = 1, width = 8 }) {
16
+ const filled = Math.round((value / max) * width)
17
+ const empty = width - filled
18
+ return (
19
+ <span style={{ color: 'var(--muted)', letterSpacing: 0 }}>
20
+ [<span style={{ color: 'var(--primary)', textShadow: 'var(--glow-sm)' }}>
21
+ {'█'.repeat(Math.max(0, filled))}
22
+ </span>
23
+ {'░'.repeat(Math.max(0, empty))}]
24
+ </span>
25
+ )
26
+ }
27
+
28
+ const TILES = [
29
+ { key: 'profitability_score', label: 'SCORE', fmt: (v) => v.toFixed(1), max: 100 },
30
+ { key: 'revenue', label: 'REVENUE', fmt: formatMoney, max: null },
31
+ { key: 'burn_rate', label: 'BURN', fmt: formatMoney, max: null },
32
+ { key: 'runway_months', label: 'RUNWAY', fmt: (v) => `${v.toFixed(1)}mo`, max: 24 },
33
+ { key: 'product_readiness', label: 'PRODUCT', fmt: formatPct, max: 1 },
34
+ { key: 'market_share', label: 'MARKET', fmt: formatPct, max: 1 },
35
+ { key: 'team_morale', label: 'MORALE', fmt: formatPct, max: 1 },
36
+ { key: 'investor_confidence', label: 'INVEST', fmt: formatPct, max: 1 },
37
+ { key: 'regulatory_risk', label: 'REG_RSK', fmt: formatPct, max: 1 },
38
+ ]
39
+
40
+ function scoreTile(key, val) {
41
+ if (key === 'regulatory_risk') return val > 0.65 ? 'bad' : val > 0.35 ? 'warn' : 'good'
42
+ if (key === 'runway_months') return val > 12 ? 'good' : val > 6 ? 'warn' : 'bad'
43
+ if (key === 'profitability_score') return val >= 60 ? 'good' : val >= 35 ? 'warn' : 'bad'
44
+ if (key === 'burn_rate') return ''
45
+ return val > 0.65 ? 'good' : val > 0.35 ? 'warn' : 'bad'
46
+ }
47
+
48
+ export default function MetricsPanel({ state, prevState }) {
49
+ if (!state) return null
50
+
51
+ return (
52
+ <div className="metrics-strip">
53
+ {TILES.map(({ key, label, fmt, max }) => {
54
+ const val = state[key] ?? 0
55
+ const prev = prevState?.[key]
56
+ const delta = prev !== undefined ? val - prev : null
57
+ const cls = scoreTile(key, val)
58
+ const barVal = max ? Math.min(val, max) : null
59
+
60
+ return (
61
+ <div key={key} className="metric-tile">
62
+ <div className="m-icon-label">
63
+ <span className="m-label">{label}</span>
64
+ </div>
65
+ <div className="m-value-row">
66
+ <span className={`m-value ${cls}`}>{fmt(val)}</span>
67
+ {delta !== null && Math.abs(delta) > 0.001 && (
68
+ <span className={`m-delta ${delta > 0 ? 'pos' : 'neg'}`}>
69
+ {delta > 0 ? '+' : '−'}{fmtDelta(key, delta)}
70
+ </span>
71
+ )}
72
+ </div>
73
+ {barVal !== null && (
74
+ <div style={{ marginTop: '0.15rem' }}>
75
+ <AsciiBar value={barVal} max={max} width={6} />
76
+ </div>
77
+ )}
78
+ </div>
79
+ )
80
+ })}
81
+ </div>
82
+ )
83
+ }
frontend/src/components/NPCGrid.jsx ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // NPC agenda keyword hints shown on cards (top 4 per role)
2
+ const AGENDA_HINTS = {
3
+ 'CTO': ['engineering', 'architecture', 'team morale', 'reliability'],
4
+ 'CFO': ['burn rate', 'runway', 'fiduciary', 'cost discipline'],
5
+ 'Investor Rep': ['growth', 'market share', 'IPO', 'bold moves'],
6
+ 'Independent': ['reputation', 'ethics', 'long-term', 'governance'],
7
+ }
8
+
9
+ const ROLE_CLS = {
10
+ 'CTO': 'cto', 'CFO': 'cfo', 'Investor Rep': 'inv', 'Independent': 'ind',
11
+ }
12
+
13
+ const ROLE_INITIALS = {
14
+ 'CTO': 'CT', 'CFO': 'CF', 'Investor Rep': 'IN', 'Independent': 'ID',
15
+ }
16
+
17
+ function NPCCard({ npc }) {
18
+ const { role, statement, vote, confidence } = npc
19
+ const cls = ROLE_CLS[role] ?? 'ind'
20
+ const pct = Math.round(confidence * 100)
21
+ const hints = AGENDA_HINTS[role] ?? []
22
+
23
+ return (
24
+ <div className={`npc-card ${cls}`}>
25
+ <div className="npc-header">
26
+ <div className="npc-avatar-role">
27
+ <div className={`npc-avatar ${cls}`}>{ROLE_INITIALS[role] ?? role[0]}</div>
28
+ <span className={`npc-role ${cls}`}>{role.toUpperCase()}</span>
29
+ </div>
30
+ <span className="npc-vote-chip" title={`Votes: ${vote}`}>
31
+ →{vote.replace(/_/g, '_')}
32
+ </span>
33
+ </div>
34
+
35
+ <p className="npc-statement">{statement}</p>
36
+
37
+ <div className="npc-conf-row">
38
+ <span className="conf-label">CONF</span>
39
+ <div className="conf-track">
40
+ <div className="conf-fill" style={{ width: `${pct}%` }} />
41
+ </div>
42
+ <span className="conf-pct">{pct}%</span>
43
+ </div>
44
+
45
+ <div className="npc-agenda-tags">
46
+ {hints.map((h) => (
47
+ <span key={h} className="agenda-tag">#{h}</span>
48
+ ))}
49
+ </div>
50
+ </div>
51
+ )
52
+ }
53
+
54
+ export default function NPCGrid({ npcStatements }) {
55
+ if (!npcStatements?.length) {
56
+ return (
57
+ <div className="card">
58
+ <div className="section-label">Board Statements</div>
59
+ <div className="card-body" style={{ fontSize: '0.65rem', color: 'var(--text-muted)', textAlign: 'center', padding: '1rem' }}>
60
+ // awaiting board response...
61
+ </div>
62
+ </div>
63
+ )
64
+ }
65
+
66
+ return (
67
+ <div className="card">
68
+ <div className="section-label">Board Statements</div>
69
+ <div className="npc-grid">
70
+ {npcStatements.map((npc) => (
71
+ <NPCCard key={npc.role} npc={npc} />
72
+ ))}
73
+ </div>
74
+ </div>
75
+ )
76
+ }
frontend/src/components/PlaybackControls.jsx ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ export default function PlaybackControls({
2
+ paused,
3
+ loading,
4
+ done,
5
+ obs,
6
+ speed,
7
+ onRun,
8
+ onPause,
9
+ onStep,
10
+ onReset,
11
+ onSpeedChange,
12
+ }) {
13
+ const canStep = !loading && !done && !!obs
14
+ const statusText = loading ? 'PROCESSING...'
15
+ : done ? 'EPISODE_DONE'
16
+ : paused ? 'PAUSED'
17
+ : 'RUNNING'
18
+
19
+ const statusDot = loading ? '' : done ? '' : paused ? 'paused' : 'running'
20
+
21
+ return (
22
+ <div className="playback-bar">
23
+ {paused && !done ? (
24
+ <button className="pb-btn primary" onClick={onRun} disabled={loading || !obs}>
25
+ ▶ RUN_AGENT
26
+ </button>
27
+ ) : (
28
+ <button className="pb-btn" onClick={onPause} disabled={loading || done}>
29
+ ⏸ PAUSE
30
+ </button>
31
+ )}
32
+
33
+ <button className="pb-btn" onClick={onStep} disabled={!canStep}>
34
+ ⏭ STEP
35
+ </button>
36
+
37
+ <div className="pb-divider" />
38
+
39
+ <button className="pb-btn" onClick={onReset} disabled={loading}>
40
+ ↺ RESET
41
+ </button>
42
+
43
+ <div className="pb-divider" />
44
+
45
+ <div className="speed-control">
46
+ <span>SPEED</span>
47
+ <input
48
+ type="range"
49
+ min={0.5}
50
+ max={4}
51
+ step={0.25}
52
+ value={speed}
53
+ onChange={(e) => onSpeedChange(parseFloat(e.target.value))}
54
+ />
55
+ <span className="speed-label">{speed.toFixed(2)}x</span>
56
+ </div>
57
+
58
+ <div className="pb-status">
59
+ <div className={`status-dot ${statusDot}`} />
60
+ {statusText}
61
+ </div>
62
+ </div>
63
+ )
64
+ }
frontend/src/components/TopBar.jsx ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useEffect, useState } from 'react'
2
+ import { apiHealth } from '../services/api.js'
3
+
4
+ const ASCII_LOGO = `
5
+ _ _ ____ _ _ ____ ____ _ ____ ___ ____ ____
6
+ |\ | |___ | | |__/ |__| | |___ | \ | __ |___
7
+ | \| |___ |__| | \ | | |___ |___ |__/ |__] |___
8
+ `
9
+
10
+ export default function TopBar({ obs, round }) {
11
+ const [online, setOnline] = useState(false)
12
+ const [tick, setTick] = useState(true)
13
+
14
+ useEffect(() => {
15
+ const check = async () => setOnline(await apiHealth())
16
+ check()
17
+ const id = setInterval(check, 15_000)
18
+ return () => clearInterval(id)
19
+ }, [])
20
+
21
+ // blinking colon in clock-style indicator
22
+ useEffect(() => {
23
+ const t = setInterval(() => setTick(v => !v), 500)
24
+ return () => clearInterval(t)
25
+ }, [])
26
+
27
+ const score = obs?.state?.profitability_score ?? null
28
+ const scoreClass = score === null ? '' : score >= 60 ? 'good' : score >= 35 ? 'warn' : 'bad'
29
+
30
+ return (
31
+ <div className="topbar">
32
+ <div className="topbar-brand">
33
+ {/* compact single-line ASCII header */}
34
+ <div className="brand-name">NeuralEdge</div>
35
+ <div style={{ width: '1px', height: '18px', background: 'var(--border)', margin: '0 0.35rem' }} />
36
+ <div className="brand-ceo">CEO: Sarah Chen&nbsp;|&nbsp;AI Agent</div>
37
+ </div>
38
+
39
+ <div className="topbar-center">
40
+ <div className="round-badge">
41
+ RND {obs ? `${String(round).padStart(2,'0')} / 10` : '--/10'}
42
+ </div>
43
+ {score !== null && (
44
+ <div className="score-display">
45
+ <div className="score-label">PROFIT_SCORE</div>
46
+ <div className={`score-value ${scoreClass}`}>{score.toFixed(1)}</div>
47
+ </div>
48
+ )}
49
+ </div>
50
+
51
+ <div className="topbar-right">
52
+ <div className="health-indicator">
53
+ <div className={`health-dot ${online ? 'online' : 'offline'}`} />
54
+ {online ? '[OK] BACKEND' : '[ERR] OFFLINE'}
55
+ </div>
56
+ </div>
57
+ </div>
58
+ )
59
+ }