Codex commited on
Commit
1794757
·
0 Parent(s):

sync main snapshot for HF Space

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .dockerignore +6 -0
  2. .env.example +4 -0
  3. .gitignore +16 -0
  4. .idea/.gitignore +10 -0
  5. .idea/misc.xml +9 -0
  6. .idea/modules.xml +8 -0
  7. .idea/swift-toolchain.xml +6 -0
  8. .idea/trenches.iml +9 -0
  9. .idea/vcs.xml +6 -0
  10. BACKEND_SUMMARY.md +524 -0
  11. DATA.md +226 -0
  12. Dockerfile +41 -0
  13. Dockerfile.frontend +14 -0
  14. ENTITIES.md +128 -0
  15. FLOW.md +163 -0
  16. HANDOFF.md +17 -0
  17. IMPROVEMENTS.md +84 -0
  18. PLAN.md +118 -0
  19. README.md +84 -0
  20. RL.md +354 -0
  21. TODO.md +49 -0
  22. TOOLS.md +137 -0
  23. TRAINING_PLAN.md +209 -0
  24. app/api/health/route.ts +10 -0
  25. app/api/source-registry/route.ts +12 -0
  26. app/globals.css +159 -0
  27. app/layout.tsx +24 -0
  28. app/page.tsx +5 -0
  29. backend/Dockerfile +19 -0
  30. backend/HOW_POST_TRAINING_WORKS.md +127 -0
  31. backend/POST_TRAINING_PLAN.md +121 -0
  32. backend/README.md +86 -0
  33. backend/TRAINING_FLOW.md +156 -0
  34. backend/TRAINING_RUNBOOK.md +441 -0
  35. backend/examples/trl_openenv_colab_minimal.py +5 -0
  36. backend/pyproject.toml +49 -0
  37. backend/src/trenches_env/__init__.py +25 -0
  38. backend/src/trenches_env/agents.py +90 -0
  39. backend/src/trenches_env/benchmark_runner.py +175 -0
  40. backend/src/trenches_env/entity_knowledge.py +50 -0
  41. backend/src/trenches_env/env.py +0 -0
  42. backend/src/trenches_env/historical_collection.py +461 -0
  43. backend/src/trenches_env/historical_collection_cli.py +163 -0
  44. backend/src/trenches_env/historical_replay.py +76 -0
  45. backend/src/trenches_env/historical_replays/gulf_2025_events.json +0 -0
  46. backend/src/trenches_env/historical_replays/hezbollah_2025_events.json +1993 -0
  47. backend/src/trenches_env/historical_replays/iran_2025_events.json +0 -0
  48. backend/src/trenches_env/historical_replays/israel_2025_events.json +0 -0
  49. backend/src/trenches_env/historical_replays/oversight_2025_events.json +0 -0
  50. backend/src/trenches_env/historical_replays/us_2025_events.json +0 -0
.dockerignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ node_modules
2
+ dist
3
+ .git
4
+ .idea
5
+ backend/.venv
6
+ backend/__pycache__
.env.example ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ NEXT_PUBLIC_API_BASE_URL=http://localhost:8000
2
+ NEXT_PUBLIC_VERCEL_API_BASE=/api
3
+ NEXT_PUBLIC_ENABLE_SOURCE_LOGIC=false
4
+ NEXT_PUBLIC_MAPBOX_TOKEN=
.gitignore ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ node_modules
2
+ dist
3
+ .next
4
+ tsconfig.tsbuildinfo
5
+ .env
6
+ .env.local
7
+ .env.*.local
8
+ __pycache__
9
+ .pytest_cache
10
+ .venv
11
+ backend/.venv
12
+
13
+ # Training checkpoints (binary files)
14
+ backend/tmp-training-run/
15
+ backend/tmp-*/
16
+ /tmp/smoke-test*
.idea/.gitignore ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
4
+ # Ignored default folder with query files
5
+ /queries/
6
+ # Datasource local storage ignored files
7
+ /dataSources/
8
+ /dataSources.local.xml
9
+ # Editor-based HTTP Client requests
10
+ /httpRequests/
.idea/misc.xml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="Black">
4
+ <option name="sdkName" value="uv" />
5
+ </component>
6
+ <component name="ProjectRootManager" version="2" project-jdk-name="openjdk-25" project-jdk-type="JavaSDK">
7
+ <output url="file://$PROJECT_DIR$/out" />
8
+ </component>
9
+ </project>
.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/trenches.iml" filepath="$PROJECT_DIR$/.idea/trenches.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
.idea/swift-toolchain.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="SwiftToolchain">
4
+ <option name="toolchain" value="system-/usr/bin/swift" />
5
+ </component>
6
+ </project>
.idea/trenches.iml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="JAVA_MODULE" version="4">
3
+ <component name="NewModuleRootManager" inherit-compiler-output="true">
4
+ <exclude-output />
5
+ <content url="file://$MODULE_DIR$" />
6
+ <orderEntry type="jdk" jdkName="uv" jdkType="Python SDK" />
7
+ <orderEntry type="sourceFolder" forTests="false" />
8
+ </component>
9
+ </module>
.idea/vcs.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="" vcs="Git" />
5
+ </component>
6
+ </project>
BACKEND_SUMMARY.md ADDED
@@ -0,0 +1,524 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Backend Summary
2
+
3
+ This is the backend handoff for the frontend team.
4
+
5
+ ## Plain-English State Model
6
+
7
+ There are five different layers of state:
8
+
9
+ 1. `world.latent_state`
10
+ Backend truth. Rewards and simulation logic use this.
11
+
12
+ 2. `world.latent_events`
13
+ Canonical hidden event chain. News, actions, asset damage, and oversight now create or update these events.
14
+
15
+ 3. `world.actor_state`
16
+ Lagged/public summary of the world.
17
+
18
+ 4. `observations[agent_id]`
19
+ What each entity actually sees. This can be partial, delayed, contradictory, and low-confidence.
20
+
21
+ 5. `belief_state[agent_id]`
22
+ What each entity currently believes across turns. This is persistent memory, not just the current observation. It now uses doctrine-specific priors, slow false-belief decay, and contradiction-driven revision.
23
+
24
+ The frontend should not treat those layers as interchangeable.
25
+
26
+ ## Real Model Behavior
27
+
28
+ Each entity has a `model_bindings[agent_id]` object.
29
+
30
+ That tells you:
31
+
32
+ - which provider is configured
33
+ - which model is configured
34
+ - whether the binding is ready for inference
35
+ - which tools/actions the entity is allowed to use
36
+ - whether the entity is currently on real provider execution or heuristic fallback
37
+
38
+ Current behavior:
39
+
40
+ - if a provider binding is ready, the backend tries real provider inference first
41
+ - if that fails or returns an invalid action, the backend falls back explicitly to heuristic policy
42
+ - action metadata records whether the action came from `provider_inference` or `heuristic_fallback`
43
+
44
+ Supported provider names now include:
45
+
46
+ - `openai`
47
+ - `anthropic`
48
+ - `openrouter`
49
+ - `huggingface`
50
+ - `ollama`
51
+ - `vllm`
52
+ - `custom`
53
+
54
+ Hugging Face notes:
55
+
56
+ - `huggingface` uses the HF router chat-completions endpoint
57
+ - if `api_key_env` is not set, the backend defaults to `HF_TOKEN`
58
+ - if `TRENCHES_HF_ROUTING_POLICY` is set to `fastest`, `cheapest`, or `preferred`, the backend appends that routing suffix to HF model names that do not already include one
59
+ - the recommended deployment pattern is to store `HF_TOKEN` as a secret, not in repo files
60
+
61
+ ## Main Endpoints
62
+
63
+ Server file:
64
+
65
+ - [backend/src/trenches_env/server.py](/Users/alazarmanakelew/IdeaProjects/trenches/backend/src/trenches_env/server.py)
66
+
67
+ ### Health And Capabilities
68
+
69
+ - `GET /healthz`
70
+ Returns `{ "status": "ok" }`
71
+
72
+ - `GET /capabilities`
73
+ Returns:
74
+ - session/OpenEnv capability flags
75
+ - CORS settings
76
+ - per-entity `model_bindings`
77
+
78
+ Use this once at app startup.
79
+
80
+ ### Session Lifecycle
81
+
82
+ - `POST /sessions`
83
+ Creates a session.
84
+
85
+ - `POST /sessions/{session_id}/reset`
86
+ Resets an existing session.
87
+
88
+ - `GET /sessions/{session_id}`
89
+ Returns the latest `SessionState`.
90
+
91
+ - `POST /sessions/{session_id}/step`
92
+ Advances one turn.
93
+
94
+ Request body:
95
+ - `actions: Record<agentId, AgentAction>`
96
+ - `external_signals: ExternalSignal[]`
97
+
98
+ Response:
99
+ - `StepSessionResponse`
100
+ - `session`
101
+ - `oversight`
102
+ - `done`
103
+
104
+ ### Live News And Reaction Timeline
105
+
106
+ - `POST /sessions/{session_id}/news`
107
+ Injects public/news signals, lets the backend resolve entity reactions, steps the world, and returns the structured reaction entry for that news event.
108
+
109
+ Request body:
110
+ - `signals: ExternalSignal[]`
111
+ - `agent_ids?: string[]`
112
+
113
+ Notes:
114
+ - if `agent_ids` is omitted, all entities react
115
+ - if `agent_ids` is provided, only those entities are auto-resolved for that news event
116
+ - this still goes through the same env step path, so it stays aligned with OpenEnv behavior
117
+
118
+ Response:
119
+ - `IngestNewsResponse`
120
+ - `session`
121
+ - `oversight`
122
+ - `reaction`
123
+ - `done`
124
+
125
+ - `GET /sessions/{session_id}/reactions`
126
+ Returns the rolling `reaction_log`.
127
+
128
+ Use these two endpoints for:
129
+
130
+ - incoming-news timeline
131
+ - “who reacted to what” UI
132
+ - live world-monitoring panels
133
+
134
+ ### Provider Diagnostics
135
+
136
+ - `GET /sessions/{session_id}/providers/diagnostics`
137
+ Returns per-entity provider runtime health and recent inference telemetry.
138
+
139
+ Important fields per entity:
140
+
141
+ - `status`
142
+ - `request_count`
143
+ - `success_count`
144
+ - `error_count`
145
+ - `consecutive_failures`
146
+ - `last_latency_ms`
147
+ - `avg_latency_ms`
148
+ - `last_success_at`
149
+ - `last_error_at`
150
+ - `last_error`
151
+
152
+ Use this for:
153
+
154
+ - provider health badges
155
+ - fallback warnings
156
+ - “model is unhealthy” operator panels
157
+ - debugging why an entity is on heuristic fallback
158
+
159
+ ### Live Source Controls
160
+
161
+ - `POST /sessions/{session_id}/live`
162
+ Enables or disables live mode.
163
+
164
+ - `POST /sessions/{session_id}/sources/refresh`
165
+ Forces source refresh and rebuilds observations.
166
+
167
+ - `GET /sessions/{session_id}/sources/monitor`
168
+ Returns source-health and delivery status per entity.
169
+
170
+ ### Scenarios And Benchmarks
171
+
172
+ - `GET /scenarios`
173
+ Returns seeded scenarios.
174
+
175
+ - `POST /benchmarks/run`
176
+ Runs scenario benchmarks and returns scorecards.
177
+
178
+ ### OpenEnv
179
+
180
+ Legacy tuple-style endpoints:
181
+
182
+ - `POST /reset`
183
+ - `POST /step`
184
+ - `GET /state`
185
+
186
+ If `openenv-core` is installed, native OpenEnv is mounted at:
187
+
188
+ - `/openenv`
189
+
190
+ OpenEnv file:
191
+
192
+ - [backend/src/trenches_env/openenv_adapter.py](/Users/alazarmanakelew/IdeaProjects/trenches/backend/src/trenches_env/openenv_adapter.py)
193
+
194
+ ## Main Schemas
195
+
196
+ Schema file:
197
+
198
+ - [backend/src/trenches_env/models.py](/Users/alazarmanakelew/IdeaProjects/trenches/backend/src/trenches_env/models.py)
199
+
200
+ ### SessionState
201
+
202
+ Main top-level object for the frontend.
203
+
204
+ Important fields:
205
+
206
+ - `session_id`
207
+ - `world`
208
+ - `observations`
209
+ - `belief_state`
210
+ - `rewards`
211
+ - `model_bindings`
212
+ - `recent_traces`
213
+ - `action_log`
214
+ - `reaction_log`
215
+ - `live`
216
+ - `episode`
217
+
218
+ ### WorldState
219
+
220
+ Important fields:
221
+
222
+ - `latent_state`
223
+ - `latent_events`
224
+ - `actor_state`
225
+ - `active_events`
226
+ - `asset_state`
227
+ - `coalition_graph`
228
+ - `risk_scores`
229
+ - `last_actions`
230
+
231
+ Important distinction:
232
+
233
+ - `latent_events` are canonical hidden events
234
+ - `active_events` are the public-facing projection of those latent events
235
+
236
+ ### AgentObservation
237
+
238
+ Main entity-facing view.
239
+
240
+ Important fields:
241
+
242
+ - `decision_prompt`
243
+ - `belief_brief`
244
+ - `belief_topics`
245
+ - `available_actions`
246
+ - `available_data_sources`
247
+ - `strategic_state`
248
+ - `strategic_assets`
249
+ - `asset_alerts`
250
+ - `source_packets`
251
+ - `training_source_packets`
252
+ - `live_source_packets`
253
+ - `projection`
254
+
255
+ ### ObservationProjection
256
+
257
+ This explains how messy the entity’s current view is.
258
+
259
+ Important fields:
260
+
261
+ - `mode`
262
+ - `worldview_reliability`
263
+ - `delayed_source_count`
264
+ - `contested_source_count`
265
+ - `contradiction_packet_count`
266
+ - `contradiction_topics`
267
+ - `obscured_metric_count`
268
+ - `notes`
269
+
270
+ Frontend rule:
271
+
272
+ Show this clearly. Do not present entity observations as perfect truth.
273
+
274
+ ### EntityModelBinding
275
+
276
+ Per-entity provider/runtime config.
277
+
278
+ Important fields:
279
+
280
+ - `provider`
281
+ - `model_name`
282
+ - `configured`
283
+ - `ready_for_inference`
284
+ - `decision_mode`
285
+ - `supports_tool_calls`
286
+ - `supports_structured_output`
287
+ - `action_tools`
288
+ - `observation_tools`
289
+ - `notes`
290
+
291
+ ### ProviderAgentDiagnostics
292
+
293
+ Per-entity runtime telemetry for provider-backed execution.
294
+
295
+ Important fields:
296
+
297
+ - `agent_id`
298
+ - `provider`
299
+ - `model_name`
300
+ - `configured`
301
+ - `ready_for_inference`
302
+ - `decision_mode`
303
+ - `status`
304
+ - `request_count`
305
+ - `success_count`
306
+ - `error_count`
307
+ - `consecutive_failures`
308
+ - `last_latency_ms`
309
+ - `avg_latency_ms`
310
+ - `last_success_at`
311
+ - `last_error_at`
312
+ - `last_error`
313
+
314
+ ### ActionLogEntry
315
+
316
+ Per-action activity feed row.
317
+
318
+ Important fields:
319
+
320
+ - `turn`
321
+ - `actor`
322
+ - `action_type`
323
+ - `summary`
324
+ - `target`
325
+ - `reward_total`
326
+ - `metadata`
327
+
328
+ Use this for the entity activity log.
329
+
330
+ ### ReactionLogEntry
331
+
332
+ Structured “public release -> entity reaction” object.
333
+
334
+ Important fields:
335
+
336
+ - `event_id`
337
+ - `turn`
338
+ - `source`
339
+ - `latent_event_ids`
340
+ - `signals`
341
+ - `actor_outcomes`
342
+ - `oversight_triggered`
343
+ - `tension_before`
344
+ - `tension_after`
345
+ - `market_stress_after`
346
+ - `oil_pressure_after`
347
+
348
+ This is the easiest object for a live news feed.
349
+
350
+ ### AgentBeliefState
351
+
352
+ Persistent per-entity memory.
353
+
354
+ Important fields:
355
+
356
+ - `agent_id`
357
+ - `dominant_topics`
358
+ - `beliefs`
359
+ - `last_revision_turn`
360
+
361
+ ### AgentBeliefEntry
362
+
363
+ One remembered belief/hypothesis for an entity.
364
+
365
+ Important fields:
366
+
367
+ - `belief_id`
368
+ - `topic`
369
+ - `summary`
370
+ - `confidence`
371
+ - `status`
372
+ - `source`
373
+ - `suspected_agents`
374
+ - `related_event_ids`
375
+ - `confirmation_count`
376
+ - `contradiction_count`
377
+ - `last_confirmed_turn`
378
+ - `last_updated_turn`
379
+
380
+ Belief behavior:
381
+
382
+ - entities do not weight all topics equally
383
+ - beliefs decay gradually when no new confirmation arrives
384
+ - contradictory evidence usually downgrades a belief first before fully disconfirming it
385
+ - two entities can see the same event and end up with different confidence because doctrine priors differ
386
+
387
+ ### Latent Events
388
+
389
+ The backend now treats event flow as first-class, not just metric movement.
390
+
391
+ Main schema:
392
+
393
+ - `LatentEvent`
394
+
395
+ Key fields:
396
+
397
+ - `event_id`
398
+ - `topic`
399
+ - `status`
400
+ - `severity`
401
+ - `visibility`
402
+ - `reliability`
403
+ - `origin`
404
+ - `affected_agents`
405
+ - `affected_assets`
406
+ - `started_at_turn`
407
+ - `last_updated_turn`
408
+ - `decay_rate`
409
+ - `linked_event_ids`
410
+ - `narratives`
411
+
412
+ What this means:
413
+
414
+ - scenarios can seed hidden events
415
+ - incoming news creates or updates hidden events
416
+ - entity actions create hidden events
417
+ - linked spillover events can be spawned
418
+ - public event feeds are projected from latent events
419
+ - source contradictions now key off latent events, not only metric heuristics
420
+
421
+ ### ReactionActorOutcome
422
+
423
+ One entity’s response to one news event.
424
+
425
+ Important fields:
426
+
427
+ - `agent_id`
428
+ - `action`
429
+ - `reward_total`
430
+ - `decision_mode`
431
+
432
+ ## What Is Good To Go
433
+
434
+ Backend pieces that are ready for frontend integration:
435
+
436
+ - session lifecycle
437
+ - live source monitoring
438
+ - latent truth vs public state split
439
+ - latent event engine and event-driven public projection
440
+ - persistent belief state per entity
441
+ - doctrine-specific belief revision and false-belief persistence
442
+ - contradiction-aware observation projection
443
+ - per-entity rewards
444
+ - per-entity action logging
445
+ - structured reaction logging for public/news events
446
+ - seeded scenarios
447
+ - benchmark runs
448
+ - provider bindings
449
+ - real provider execution with explicit fallback
450
+ - provider runtime diagnostics
451
+ - OpenEnv-compatible environment flow
452
+
453
+ ## What Is Still Left
454
+
455
+ ### Backend
456
+
457
+ 1. Persist replay history.
458
+ `recent_traces`, `action_log`, `reaction_log`, and latent event evolution are still rolling in-memory state, not durable history.
459
+
460
+ 2. Deepen the latent event graph.
461
+ The event engine now exists, but it can still be improved with stronger causal chains, event merging, event resolution rules, and richer cross-front propagation.
462
+
463
+ 3. Add event-delta summaries.
464
+ A compact backend-generated turn delta would make replay/debug views much easier to build.
465
+
466
+ 4. Keep hardening provider execution.
467
+ Retries and diagnostics now exist. The next step is richer classification for rate limits, timeout classes, and provider-specific retry traces.
468
+
469
+ 5. Add a durable event archive or export path.
470
+ There is still no persistent event timeline outside in-memory session state.
471
+
472
+ ### Frontend
473
+
474
+ 1. Build the app shell around:
475
+ - `/capabilities`
476
+ - `/scenarios`
477
+ - `/sessions`
478
+ - `/sessions/{id}`
479
+ - `/sessions/{id}/step`
480
+ - `/sessions/{id}/news`
481
+ - `/sessions/{id}/reactions`
482
+ - `/sessions/{id}/providers/diagnostics`
483
+ - `/sessions/{id}/live`
484
+ - `/sessions/{id}/sources/monitor`
485
+
486
+ 2. Add entity cards that show:
487
+ - projected state
488
+ - persistent belief topics / belief memory
489
+ - reward total
490
+ - provider readiness
491
+ - provider health/latency
492
+ - latest action
493
+ - uncertainty/projection info
494
+
495
+ 3. Add a live news/reaction timeline.
496
+ Use `/sessions/{id}/news` for ingestion and `reaction_log` or `/sessions/{id}/reactions` for history.
497
+
498
+ 4. Add latent event visibility to operator surfaces.
499
+ Show:
500
+ - key latent event topics
501
+ - event severity
502
+ - event visibility
503
+ - linked spillovers
504
+
505
+ 5. Add a source-health panel.
506
+ Use `/sessions/{id}/sources/monitor`.
507
+
508
+ 6. Add replay panels.
509
+ Use `recent_traces`, `action_log`, `reaction_log`, and `world.latent_events`.
510
+
511
+ 7. Make uncertainty visible.
512
+ Show reliability, contradiction topics, delayed sources, and contested-source counts.
513
+
514
+ ## Rule Of Thumb For Frontend
515
+
516
+ If the UI means:
517
+
518
+ - “what the entity believes” -> use `session.observations[agent_id]`
519
+ - “what the entity currently remembers/believes across turns” -> use `session.belief_state[agent_id]`
520
+ - “what the operator/debugger sees” -> use `session.world`
521
+ - “what hidden developments are driving the sim” -> use `session.world.latent_events`
522
+ - “what the backend can execute” -> use `session.model_bindings`
523
+ - “what just happened on a turn” -> use `session.action_log` and `session.recent_traces`
524
+ - “what public news triggered reactions” -> use `session.reaction_log`
DATA.md ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Data Handoff
2
+
3
+ ## Chosen Base Model
4
+
5
+ Use:
6
+
7
+ - `Qwen/Qwen3-8B`
8
+
9
+ Why this is the best default for the `2025-01 -> 2026-01` post-training window:
10
+
11
+ - it was released inside the required time frame
12
+ - it is available on Hugging Face
13
+ - it is strong enough for structured action + prediction output
14
+ - it is still realistic to run six separate entity post-training jobs on it
15
+
16
+ This is the recommended first real base model for all six entities.
17
+
18
+ ## What I Added For Data
19
+
20
+ The repo already had:
21
+
22
+ - synthetic seed replay JSON files under [backend/src/trenches_env/historical_replays](/Users/alazarmanakelew/IdeaProjects/trenches/backend/src/trenches_env/historical_replays)
23
+ - an OpenEnv replay training path
24
+ - a training CLI that consumes replay JSON with the `HistoricalReplayDefinition -> HistoricalEvent` schema
25
+
26
+ What I added is the first path from real historical sources into that same replay schema.
27
+
28
+ ### New Files
29
+
30
+ - [backend/src/trenches_env/historical_collection.py](/Users/alazarmanakelew/IdeaProjects/trenches/backend/src/trenches_env/historical_collection.py)
31
+ - builds historical source profiles from the existing source manifest
32
+ - derives historical domains from allowlisted agent sources
33
+ - defines the `2025` and `2026` collection windows
34
+ - dedupes collected articles
35
+ - converts collected articles into the exact replay event schema used by training
36
+
37
+ - [backend/src/trenches_env/historical_collection_cli.py](/Users/alazarmanakelew/IdeaProjects/trenches/backend/src/trenches_env/historical_collection_cli.py)
38
+ - CLI collector
39
+ - queries the GDELT DOC API month by month
40
+ - writes raw article audit files
41
+ - writes replay JSON files in the same schema as the existing synthetic seeds
42
+
43
+ - [backend/tests/test_historical_collection.py](/Users/alazarmanakelew/IdeaProjects/trenches/backend/tests/test_historical_collection.py)
44
+ - validates source-profile extraction
45
+ - validates article -> replay-event conversion
46
+ - validates replay JSON compatibility with the existing historical replay loader
47
+
48
+ ## What Source Data It Uses
49
+
50
+ The collector starts from the existing [backend/src/trenches_env/source_manifest.json](/Users/alazarmanakelew/IdeaProjects/trenches/backend/src/trenches_env/source_manifest.json).
51
+
52
+ That means it does not invent a separate source universe. It reuses the current project’s aligned sources, then extracts historical domains from them. In practice this means it leans on the project’s existing training-core sources such as:
53
+
54
+ - Reuters and wire-style reporting
55
+ - official government / ministry sources
56
+ - regional English-language outlets already assigned to the entities
57
+ - market / shipping / sanctions / diplomacy sources already present in the manifest
58
+
59
+ For historical collection, it converts those sources into domain-filtered GDELT queries and collects article candidates month by month.
60
+
61
+ ## Output Files
62
+
63
+ The collector writes two outputs per run.
64
+
65
+ ### 1. Replay JSON
66
+
67
+ Path example:
68
+
69
+ - `backend/src/trenches_env/historical_replays/us_historical_2025.json`
70
+
71
+ This matches the same structure as the existing synthetic seed files:
72
+
73
+ - `replay_id`
74
+ - `name`
75
+ - `description`
76
+ - `training_agent`
77
+ - `events[]`
78
+
79
+ Each event matches the current training schema:
80
+
81
+ - `event_id`
82
+ - `timestamp`
83
+ - `topic`
84
+ - `region`
85
+ - `actors`
86
+ - `targets`
87
+ - `severity`
88
+ - `summary`
89
+ - `public_summary`
90
+ - `source_type`
91
+ - `confirmed`
92
+ - `tags`
93
+ - `impact`
94
+
95
+ ### 2. Raw Audit JSONL
96
+
97
+ Path example:
98
+
99
+ - `backend/tmp-historical-raw/us_historical_2025.articles.jsonl`
100
+
101
+ Each line contains:
102
+
103
+ - `article_id`
104
+ - `agent_id`
105
+ - `source_id`
106
+ - `source_name`
107
+ - `title`
108
+ - `url`
109
+ - `domain`
110
+ - `timestamp`
111
+ - `query`
112
+ - `window_id`
113
+
114
+ This is the provenance trail for curator review.
115
+
116
+ ## Date Windows
117
+
118
+ The collector currently supports:
119
+
120
+ - `2025` -> `2025-01-01` through `2026-01-01`
121
+ - `2026` -> `2026-01-01` through the current day at collection time
122
+
123
+ Important note:
124
+
125
+ As of March 7, 2026, `2026` cannot honestly mean `2026-01-01 -> 2027-01-01` yet. The collector clamps future end dates to the current day so it does not pretend future historical data exists.
126
+
127
+ ## What Is Real vs Heuristic
128
+
129
+ Real:
130
+
131
+ - source alignment from the project’s own source manifest
132
+ - historical article collection via GDELT
133
+ - raw audit/provenance files
134
+ - replay JSON output in the exact schema the training system already consumes
135
+
136
+ Heuristic:
137
+
138
+ - topic classification from article titles
139
+ - severity classification from article titles
140
+ - dedupe logic
141
+ - actor/target inference
142
+ - event `impact` generation
143
+
144
+ That heuristic layer is intentional. It gives you a bootstrap pipeline from real historical articles into replay training data, but the resulting replay should still be curator-reviewed before production post-training.
145
+
146
+ ## Commands
147
+
148
+ From repo root:
149
+
150
+ ```bash
151
+ backend/.venv/bin/python -m trenches_env.historical_collection_cli \
152
+ --training-agent us \
153
+ --window 2025 \
154
+ --window 2026 \
155
+ --max-records-per-query 50 \
156
+ --max-events 128 \
157
+ --output-dir backend/src/trenches_env/historical_replays \
158
+ --raw-dir backend/tmp-historical-raw
159
+ ```
160
+
161
+ All entities:
162
+
163
+ ```bash
164
+ backend/.venv/bin/python -m trenches_env.historical_collection_cli \
165
+ --training-agent all \
166
+ --window 2025 \
167
+ --window 2026 \
168
+ --max-records-per-query 50 \
169
+ --max-events 128 \
170
+ --output-dir backend/src/trenches_env/historical_replays \
171
+ --raw-dir backend/tmp-historical-raw
172
+ ```
173
+
174
+ ## Docs Updated
175
+
176
+ I also updated:
177
+
178
+ - [backend/TRAINING_RUNBOOK.md](/Users/alazarmanakelew/IdeaProjects/trenches/backend/TRAINING_RUNBOOK.md)
179
+ - [backend/TRAINING_FLOW.md](/Users/alazarmanakelew/IdeaProjects/trenches/backend/TRAINING_FLOW.md)
180
+ - [backend/POST_TRAINING_PLAN.md](/Users/alazarmanakelew/IdeaProjects/trenches/backend/POST_TRAINING_PLAN.md)
181
+ - [backend/pyproject.toml](/Users/alazarmanakelew/IdeaProjects/trenches/backend/pyproject.toml)
182
+
183
+ So the collection path is now documented and exposed as a real CLI entry point.
184
+
185
+ ## Verification
186
+
187
+ The added data-collection path was verified locally with:
188
+
189
+ ```bash
190
+ PYTHONPYCACHEPREFIX=/tmp/trenches-pyc python -m py_compile \
191
+ backend/src/trenches_env/historical_collection.py \
192
+ backend/src/trenches_env/historical_collection_cli.py
193
+ ```
194
+
195
+ ```bash
196
+ cd backend
197
+ uv run --extra dev python -m pytest \
198
+ tests/test_historical_collection.py \
199
+ tests/test_openenv_adapter.py \
200
+ tests/test_server.py -q
201
+ ```
202
+
203
+ Result:
204
+
205
+ - `20 passed in 8.78s`
206
+
207
+ ## Handoff
208
+
209
+ What is ready now:
210
+
211
+ - a chosen base model: `Qwen/Qwen3-8B`
212
+ - a collector path from real historical sources into the existing replay schema
213
+ - raw provenance output
214
+ - replay JSON output compatible with the current OpenEnv training flow
215
+
216
+ What still needs to happen next:
217
+
218
+ 1. Run the collector for each entity.
219
+ 2. Curator-review the raw article audit files and the generated replay JSON.
220
+ 3. Replace the current synthetic seed replays with reviewed historical replays.
221
+ 4. Update the actual training runs to use `Qwen/Qwen3-8B` as the base model.
222
+ 5. Keep the old synthetic seeds only for smoke tests.
223
+
224
+ One important truth:
225
+
226
+ The collector is the first real data path, but it does not magically make the replay production-grade by itself. The training-ready replay still needs human review because event impact shaping is currently heuristic.
Dockerfile ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.12-slim
2
+
3
+ ENV PYTHONDONTWRITEBYTECODE=1 \
4
+ PYTHONUNBUFFERED=1 \
5
+ NEXT_TELEMETRY_DISABLED=1 \
6
+ TRENCHES_ENTITIES_ROOT=/app/entities \
7
+ PORT=7860 \
8
+ BACKEND_PROXY_TARGET=http://127.0.0.1:8000 \
9
+ NEXT_PUBLIC_API_BASE_URL=/backend-api \
10
+ NEXT_PUBLIC_VERCEL_API_BASE=/api \
11
+ NEXT_PUBLIC_ENABLE_SOURCE_LOGIC=true
12
+
13
+ WORKDIR /app
14
+
15
+ RUN apt-get update \
16
+ && apt-get install -y --no-install-recommends curl ca-certificates unzip \
17
+ && rm -rf /var/lib/apt/lists/*
18
+
19
+ RUN curl -fsSL https://bun.sh/install | bash
20
+ ENV PATH="/root/.bun/bin:${PATH}"
21
+
22
+ COPY package.json bun.lock* ./
23
+ RUN bun install --frozen-lockfile
24
+
25
+ COPY next.config.ts postcss.config.mjs tsconfig.json next-env.d.ts ./
26
+ COPY app ./app
27
+ COPY src ./src
28
+
29
+ COPY backend/pyproject.toml backend/README.md ./backend/
30
+ COPY backend/src ./backend/src
31
+ COPY entities ./entities
32
+
33
+ RUN pip install --no-cache-dir ./backend
34
+ RUN bun run build
35
+
36
+ COPY scripts/start-space.sh ./scripts/start-space.sh
37
+ RUN chmod +x ./scripts/start-space.sh
38
+
39
+ EXPOSE 7860
40
+
41
+ CMD ["./scripts/start-space.sh"]
Dockerfile.frontend ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM oven/bun:1.2.22-alpine
2
+
3
+ WORKDIR /app
4
+
5
+ COPY package.json bun.lock* ./
6
+ RUN bun install --frozen-lockfile
7
+
8
+ COPY next.config.ts postcss.config.mjs tsconfig.json next-env.d.ts ./
9
+ COPY app ./app
10
+ COPY src ./src
11
+
12
+ EXPOSE 3000
13
+
14
+ CMD ["bun", "run", "dev"]
ENTITIES.md ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ENTITY.md: Detailed Breakdown of Agents in Fog of War Diplomacy Simulator
2
+
3
+ This document provides a comprehensive breakdown of the 6 agents in the Fog of War Diplomacy Simulator, an OpenEnv-based multi-agent RL environment simulating the 2026 US-Israel-Iran geopolitical crisis. Each agent represents a key entity with a unique "identity" (embedded via LLM system prompts), personalized data feeds (filtered from World Monitor's 435+ RSS sources and other integrations), models, tools, observation spaces, and reward considerations. The goal is to foster emergent behaviors like coalition formation, deception, and de-escalation under partial observability.
4
+
5
+ Agents receive consistent, role-specific information feeds through periodic queries to World Monitor APIs (e.g., every 5-10 turns or on-demand via tool calls). This ensures "fog of war"—no agent sees the full picture, but data is reliable and live-updated. Rewards are shared via a multi-component formula, tuned per agent to align with their adversarial "defeat enemies while staying strong" mindset.
6
+
7
+ ## General Setup Guidance
8
+
9
+ ### How to Use OpenEnv
10
+
11
+ OpenEnv is a Gymnasium-compatible RL library for agentic environments. Extend `openenv.Env` to create your simulator:
12
+
13
+ - **Core Class**: Define `FogOfWarDiplomacy` with `reset()` (initialize crisis state, e.g., tension at 50%), `step(actions)` (process text actions from LLMs, update world probabilistically), and per-agent observations/rewards as dicts.
14
+ - **Multi-Agent Handling**: Use dict-based spaces (e.g., `observations = {"US": obs_us, ...}`) for partial observability.
15
+ - **Training**: Wrap with RL libraries like TRL (Hugging Face) or RLlib. Loop: `env.reset()` → LLM agents generate actions via prompts → `env.step(actions)` → Update policies with PPO/GRPO on rewards.
16
+ - **Deployment**: Dockerize as FastAPI server (expose `/reset`, `/step`). Client: `openenv.client` for remote training.
17
+ - **Integration Tips**: Add World Monitor queries in `step()` for live data; use oversight as a wrapper class.
18
+
19
+ ### Setting Up Rewards
20
+
21
+ Rewards are sparse/delayed for long-horizon planning, calculated per agent in `step()`:
22
+ \[ r_t = w_1 \cdot C_t + w_2 \cdot E_t + w_3 \cdot M_t + w_4 \cdot B_t \]
23
+
24
+ - \( C_t \): Coalition Stability (\( \frac{\# \text{allied} - \# \text{betrayals}}{\# \text{agents}} \)).
25
+ - \( E_t \): Escalation Penalty (\( - \sigma(2 \cdot \Delta \text{tension}\_t) \)).
26
+ - \( M_t \): Market Gain (\( \frac{\Delta \text{oil} + \Delta \text{sanctions}}{2} \)).
27
+ - \( B*t \): Belief Alignment (\( 1 - |I*{\text{inferred}} - I\_{\text{true}}| \)).
28
+ - Weights (\( w \)): Customized per agent (e.g., US emphasizes \( M_t \)); oversight scales by 0.5 on high risk.
29
+ - Implementation: NumPy in env code; normalize to [-1,1]. Train via RL to amplify entity-specific goals (e.g., penalize weakness).
30
+
31
+ ### Representing Entities
32
+
33
+ - **Identity Embedding**: Use system prompts in LLM pipelines (e.g., Hugging Face Transformers). Prepend to every inference: "You are [entity]. Prioritize [goals]. Forget unrelated knowledge—focus on defeating enemies while building strength."
34
+ - **Consistency**: Fine-tune with RLHF on entity-aligned trajectories (reward persona adherence). Agents "forget" via prompt engineering and training masks.
35
+
36
+ ### Consistent Feed of Information
37
+
38
+ - **Mechanism**: In `step()`, env queries World Monitor APIs (deployed on Vercel/Railway) for filtered data. Agents access via tool calls in prompts (e.g., "Query RSS for polls").
39
+ - **Consistency**: Poll every 5 turns or on events; cache in env state (Redis). Partial: Each gets 20-50% relevant snippets, injected into obs dicts.
40
+ - **Tools for Agents**: Text-based function calling (e.g., "query_intel(keywords)"); oversight has meta-tools.
41
+ - **Fallback**: Procedural mocks for offline.
42
+
43
+ ## Agent Breakdowns
44
+
45
+ ### 1. US (Trump Admin / CENTCOM)
46
+
47
+ - **Role/Identity**: Hawkish strategist leading military strikes, sanctions, and alliances. Prompt: "You are the US President in 2026 Iran war. Prioritize alliances and oil stability. Think aggressively: Defeat enemies via superior force, avoid domestic backlash, model incentives to exploit weaknesses."
48
+ - **Model**: Qwen3-8B (shared base across all entities, post-trained per entity via GRPO).
49
+ - **Personalized RSS/Data Feeds** (Filtered via World Monitor APIs, e.g., `/api/geopolitics/v1/filter?agent=US&keywords=polls+markets`):
50
+ - US domestic: Polymarket prediction markets (polls/approval ratings), GDELT US events.
51
+ - Economic: Bloomberg US feeds, commodity dashboard (oil prices).
52
+ - Alliances: AIS vessel tracking (Gulf bases), Sky News Middle East (ally updates).
53
+ - Query Frequency: High on domestic (every turn for polls); stochastic injection for events like "Dow drop".
54
+ - **Tools/Actions**: "impose_sanctions", "propose_alliance", "query_polls", "cyber_command".
55
+ - **Observation Space**: Dict with public news, private intel (allies, polls), market impacts; partial (hides Iran internals).
56
+ - **Rewards Tuning**: High weight on \( M_t \) (markets) and \( C_t \) (alliances); bonus for bluff detection (\( B_t \)).
57
+ - **Training Notes**: RL emphasizes domestic strength; fine-tune on trajectories avoiding "forever war" fatigue.
58
+
59
+ ### 2. Israel (Netanyahu / IDF)
60
+
61
+ - **Role/Identity**: Defensive aggressor focused on regime change and border security. Prompt: "You are Israel's PM/IDF in 2026 crisis. Eliminate threats decisively. Reason multi-step: Defeat Iran proxies, form unbreakable coalitions, infer hidden aggressions."
62
+ - **Model**: Qwen3-8B (shared base across all entities, post-trained per entity via GRPO).
63
+ - **Personalized RSS/Data Feeds** (e.g., `/api/geopolitics/v1/filter?agent=Israel&keywords=threats+lebanon`):
64
+ - Regional threats: OREF rocket alerts, ACLED conflict data (Lebanon/Syria).
65
+ - Defense: Sky News Middle East, Al Jazeera regional (proxy movements).
66
+ - Borders: MTV Lebanon streams/webcams, NASA FIRMS (strike fires).
67
+ - Query Frequency: Event-triggered (e.g., on "clash" headlines); consistent northern front updates.
68
+ - **Tools/Actions**: "launch_strike", "border_defense", "query_alerts", "coalition_propose".
69
+ - **Observation Space**: Public escalations, private troop intel; hides Gulf economics.
70
+ - **Rewards Tuning**: Emphasize \( E_t \) (penalize escalations if not decisive) and \( B_t \) (belief on proxies).
71
+ - **Training Notes**: Optimize for high-pressure recovery; RL on decapitation scenarios.
72
+
73
+ ### 3. Iran (IRGC / Interim Leadership)
74
+
75
+ - **Role/Identity**: Resilient defender using proxies and asymmetry. Prompt: "You are Iran's IRGC post-Khamenei. Defend sovereignty via deception. Survive escalations: Weaken foes indirectly, defeat through attrition while maintaining internal strength."
76
+ - **Model**: Qwen3-8B (shared base across all entities, post-trained per entity via GRPO).
77
+ - **Personalized RSS/Data Feeds** (e.g., `/api/geopolitics/v1/filter?agent=Iran&keywords=proxies+oil`):
78
+ - Proxies: Telegram OSINT channels (militias), GDELT Iran events.
79
+ - Internal: NASA FIRMS (strike impacts), commodity dashboard (Hormuz oil).
80
+ - Retaliation: ACLED global conflicts (proxy actions).
81
+ - Query Frequency: Real-time on proxies (WebSockets); consistent for losses.
82
+ - **Tools/Actions**: "activate_proxy", "missile_launch", "query_osint", "deception_campaign".
83
+ - **Observation Space**: Private morale/funding, public strikes; hides US polls.
84
+ - **Rewards Tuning**: High on \( E_t \) (survive escalations) and \( M_t \) (oil resilience).
85
+ - **Training Notes**: RL for deception emergence; fine-tune on asymmetric wins.
86
+
87
+ ### 4. Hezbollah (Proxy Swarm Leader)
88
+
89
+ - **Role/Identity**: Opportunistic insurgent in asymmetric warfare. Prompt: "You are Hezbollah's leader. Swarm enemies with minimal resources. Infer weaknesses: Defeat via guerrilla tactics, align with Iran while exploiting gaps for strength."
90
+ - **Model**: Qwen3-8B (shared base across all entities, post-trained per entity via GRPO).
91
+ - **Personalized RSS/Data Feeds** (e.g., `/api/geopolitics/v1/filter?agent=Hezbollah&keywords=border+swarms`):
92
+ - Warfare: Telegram OSINT, ACLED Lebanon clashes.
93
+ - Morale: Al Jazeera proxies, border webcams/videos.
94
+ - Funding: Filtered RSS (Iran ties).
95
+ - Query Frequency: High on borders (streams); event-based for swarms.
96
+ - **Tools/Actions**: "drone_swarm", "asymmetric_strike", "query_border", "morale_boost".
97
+ - **Observation Space**: Proxy reports, limited global; hides market data.
98
+ - **Rewards Tuning**: Bonus on \( C_t \) (Iran alignment) and \( B_t \) (infer Israel bluffs).
99
+ - **Training Notes**: Train for sub-agent spawning; RL on opportunistic plays.
100
+
101
+ ### 5. Gulf Coalition (Saudi/UAE/Qatar)
102
+
103
+ - **Role/Identity**: Pragmatic hedger balancing neutrality and security. Prompt: "You are the Gulf Coalition. Protect markets selectively. Hedge alliances: Defeat disruptions economically, stay strong via resource leverage without full commitment."
104
+ - **Model**: Qwen3-8B (shared base across all entities, post-trained per entity via GRPO).
105
+ - **Personalized RSS/Data Feeds** (e.g., `/api/market/v1/filter?agent=Gulf&keywords=oil+security`):
106
+ - Energy: Commodity dashboard (oil shocks), Bloomberg Gulf feeds.
107
+ - Security: AIS Hormuz vessels, finance variant (market data).
108
+ - Neutrality: Climate/anomaly APIs (disruptions).
109
+ - Query Frequency: Consistent markets (every turn); triggered on blockades.
110
+ - **Tools/Actions**: "hedge_neutrality", "resource_allocate", "query_markets", "evade_blockade".
111
+ - **Observation Space**: Economic ripples, partial alliances; hides proxy internals.
112
+ - **Rewards Tuning**: Heavy on \( M_t \) (markets) and \( C_t \) (hedging).
113
+ - **Training Notes**: RL for balanced neutrality; fine-tune on ripple effects.
114
+
115
+ ### 6. Oversight Agent (Fleet AI Meta-Layer)
116
+
117
+ - **Role/Identity**: Impartial auditor for scalable monitoring. Prompt: "You are an AI overseer. Analyze drifts probabilistically. Explain/intervene neutrally: Ensure alignment without bias, focusing on crisis de-escalation."
118
+ - **Model**: Qwen3-8B (shared base across all entities, post-trained per entity via GRPO).
119
+ - **Personalized RSS/Data Feeds** (e.g., `/api/geopolitics/v1/synthesized?scope=global`):
120
+ - Meta: Full AI-briefs, Country Instability Index, hotspot scores.
121
+ - Aggregated: RAG headline memory (cross-agent).
122
+ - Query Frequency: Every step for traces; real-time escalations.
123
+ - **Tools/Actions**: "analyze_drift", "generate_explanation", "intervene_realign", "query_global".
124
+ - **Observation Space**: Aggregated traces, beliefs; no direct actions.
125
+ - **Rewards Tuning**: Tied to primaries (e.g., bonus if reduces \( E_t \)); self-reward on accuracy.
126
+ - **Training Notes**: Meta-RL; fine-tune on intervention efficacy.
127
+
128
+ This setup ensures agents are fully representative, with consistent live feeds driving adaptive, entity-aligned behaviors in OpenEnv. For code examples, see the main repo.
FLOW.md ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Trenches — System Flow
2
+
3
+ ## High-Level Architecture
4
+
5
+ ```mermaid
6
+ graph TB
7
+ subgraph Frontend ["Frontend (Next.js · port 3000)"]
8
+ Globe["🌍 Mapbox Globe"]
9
+ TopBar["Top Bar (Trenches + Stats)"]
10
+ News["📰 News Feed Panel"]
11
+ Activity["📋 Activity Log Panel"]
12
+ Chat["💬 Chat Panel"]
13
+ Controls["🎮 Map Controls"]
14
+ Timeline["⏱️ Timeline Scrubber (planned)"]
15
+ end
16
+
17
+ subgraph API ["Next.js API Routes (/api)"]
18
+ Bootstrap["GET /api/bootstrap"]
19
+ SessionAPI["POST /api/session"]
20
+ StepAPI["POST /api/step"]
21
+ ChatAPI["POST /api/chat"]
22
+ end
23
+
24
+ subgraph Backend ["Backend (FastAPI · port 8000)"]
25
+ Server["FastAPI Server"]
26
+ Env["FogOfWarDiplomacyEnv"]
27
+ SessionMgr["Session Manager"]
28
+ RL["RL / Rewards Engine"]
29
+ Oversight["Oversight Agent"]
30
+ Scenarios["Scenario Engine"]
31
+ SourceHarvester["Source Harvester"]
32
+ ProviderRuntime["Provider Runtime (LLM)"]
33
+ end
34
+
35
+ subgraph Data ["Data Layer"]
36
+ Entities["📁 Entity Packs (6 agents)"]
37
+ SourceManifest["📋 Source Manifest (RSS/OSINT)"]
38
+ LiveFeeds["🔴 Live Feeds (RSS/Telegram/API)"]
39
+ end
40
+
41
+ Globe --- TopBar
42
+ Globe --- News
43
+ Globe --- Activity
44
+ Globe --- Chat
45
+ Globe --- Controls
46
+
47
+ Frontend -->|HTTP| API
48
+ API -->|proxy| Backend
49
+
50
+ Server --> SessionMgr
51
+ SessionMgr --> Env
52
+ Env --> RL
53
+ Env --> Oversight
54
+ Env --> Scenarios
55
+ Env --> SourceHarvester
56
+ Env --> ProviderRuntime
57
+
58
+ SourceHarvester --> LiveFeeds
59
+ SourceHarvester --> SourceManifest
60
+ Env --> Entities
61
+ ```
62
+
63
+ ## Simulation Loop (per turn)
64
+
65
+ ```mermaid
66
+ sequenceDiagram
67
+ participant User as User / Chat
68
+ participant FE as Frontend
69
+ participant API as API Layer
70
+ participant Env as FogOfWarDiplomacyEnv
71
+ participant Sources as Source Harvester
72
+ participant Agents as 6 LLM Agents
73
+ participant OA as Oversight Agent
74
+ participant RL as Rewards Engine
75
+
76
+ User->>FE: Injects event via Chat (fake) or auto-step (real)
77
+ FE->>API: POST /step {actions, external_signals}
78
+ API->>Env: step_session(session, request)
79
+
80
+ Note over Env: Turn increments
81
+
82
+ Env->>Sources: refresh_due_batch()
83
+ Sources-->>Env: Latest RSS/OSINT packets
84
+
85
+ Env->>Env: inject_external_signals (real or fake)
86
+
87
+ Env->>Agents: resolve_policy_actions()
88
+ Note over Agents: Each agent picks action based on<br/>partial observations + signals
89
+
90
+ Env->>OA: compute_oversight(world, actions)
91
+ OA-->>Env: Risk score + interventions
92
+ Note over OA: If risk > 0.5, scale rewards<br/>or force re-action
93
+
94
+ Env->>Env: apply_actions → update world state
95
+ Env->>Env: update tension, market, oil
96
+
97
+ Env->>RL: compute_rewards(world, episode)
98
+ Note over RL: r = 0.3·Coalition + 0.4·Escalation<br/>+ 0.2·Market + 0.1·Belief
99
+ RL-->>Env: Per-agent reward breakdowns
100
+
101
+ Env->>Env: build_observations (fog of war)
102
+ Env-->>API: StepSessionResponse {session, oversight, done}
103
+ API-->>FE: Updated state
104
+ FE->>FE: Re-render globe, panels, stats
105
+ ```
106
+
107
+ ## Event Types and Reward Impact
108
+
109
+ ```mermaid
110
+ flowchart LR
111
+ subgraph Real ["Real Events"]
112
+ RSS["RSS/OSINT Feed"]
113
+ Scenario["Scenario Engine"]
114
+ end
115
+
116
+ subgraph Fake ["Fake Events"]
117
+ ChatInput["Chat Injection"]
118
+ end
119
+
120
+ RSS -->|"source: live"| Env["Environment"]
121
+ Scenario -->|"source: env"| Env
122
+ ChatInput -->|"source: manual"| Env
123
+
124
+ Env --> AgentBehavior["Agent Behavior<br/>(all events affect actions)"]
125
+
126
+ Env --> RewardCalc{"Reward Calculation"}
127
+
128
+ RewardCalc -->|"✅ Real events only"| RLSignal["RL Training Signal"]
129
+ RewardCalc -->|"❌ Fake events filtered"| NoReward["No reward impact"]
130
+ ```
131
+
132
+ ## Agent Decision Flow
133
+
134
+ ```mermaid
135
+ flowchart TD
136
+ Obs["Partial Observation<br/>(fog of war filtered)"] --> Agent["Agent (LLM)"]
137
+
138
+ subgraph Context ["Agent Context"]
139
+ Identity["Identity / System Prompt"]
140
+ Intel["Private Intel Briefs"]
141
+ Beliefs["Belief State"]
142
+ Tools["Available Tools"]
143
+ end
144
+
145
+ Context --> Agent
146
+
147
+ Agent --> Action["Choose Action"]
148
+ Action --> Strike["⚔️ Strike"]
149
+ Action --> Negotiate["🤝 Negotiate"]
150
+ Action --> Sanction["💰 Sanction"]
151
+ Action --> Defend["🛡️ Defend"]
152
+ Action --> Intel2["🔍 Intel Query"]
153
+ Action --> Mobilize["🚀 Mobilize"]
154
+ Action --> Deceive["🎭 Deceive"]
155
+
156
+ Strike & Negotiate & Sanction & Defend & Intel2 & Mobilize & Deceive --> Oversight{"Oversight Check"}
157
+
158
+ Oversight -->|"Risk ≤ 0.5"| Execute["Execute Action"]
159
+ Oversight -->|"Risk > 0.5"| Intervene["Intervene / Modify"]
160
+
161
+ Execute --> WorldUpdate["Update World State"]
162
+ Intervene --> WorldUpdate
163
+ ```
HANDOFF.md ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ The frontend now presents the RL environment as a black intelligence-style operator console instead of a generic dashboard. The main map in src/components/CommandMap.tsx:44 was restyled toward the
2
+ WorldMonitor look: darker basemap treatment, suppressed civilian labels, stronger fog/space treatment, and scanline/grid atmosphere. The app shell in src/App.tsx:161 now builds a monitoring snapshot per
3
+ agent from live session state and renders the monitoring deck beside the map, so the user can watch reward pressure, source health, recent actions, and model posture in one place. IMPROVEMENTS.md is fully
4
+ written at IMPROVEMENTS.md:1.
5
+
6
+ On the backend, reward shaping is no longer mostly shared. backend/src/trenches_env/rl.py:203 now defines doctrine-specific strategic state baselines and per-actor action effects, and backend/src/
7
+ trenches_env/env.py:222 now carries persistent actor_state, applies signal pressure and action pressure into that state, exposes it in observations, flattens geolocated assets for model/viewer use, and
8
+ computes unique reward functions for each entity at backend/src/trenches_env/env.py:892. Type surfaces were aligned in src/lib/types.ts:45, reward coverage was extended in backend/tests/
9
+ test_reward_differentiation.py:5, and the source manifest was regenerated in backend/src/trenches_env/source_manifest.json so the Israel/Hezbollah source specialization matches runtime.
10
+
11
+ What still needs to be done: the monitoring deck supports tool inventory, but entity tools.json packs are not wired into observations yet, so that part of the UI will stay empty until the tool layer is
12
+ integrated. The model labels in the monitoring view are still product-style placeholders, not final checkpoint selections. The map requires VITE_MAPBOX_TOKEN for full rendering, and the frontend still has a
13
+ large-bundle warning on build, so code-splitting is still worth doing before the UI grows further. The bigger roadmap items in IMPROVEMENTS.md:5 are still future work: hidden-world engine, benchmark/
14
+ curriculum harness, and deeper replay/comparison observability.
15
+
16
+ Verification: PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 uv run --extra dev python -m pytest passed with 14 passed, 1 skipped. npm run typecheck passed. npm run build passed, with the existing Vite chunk-size warning
17
+ only.
IMPROVEMENTS.md ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Improvements
2
+
3
+ This document captures the three highest-leverage improvements for the simulator after the MVP. Each one is large enough to materially change training quality, realism, and operator usefulness.
4
+
5
+ ## 1. Hidden-World Engine
6
+
7
+ ### Objective
8
+ Replace the current mostly surface-level world update loop with a canonical latent world state that each entity only perceives through partial, noisy, delayed, and bias-shaped observations.
9
+
10
+ ### Why This Matters
11
+ Right now the simulator has actor-specific rewards and source bundles, but the underlying world can still be too direct and too legible. That makes the task easier than the real problem and increases the risk of reward gaming. A hidden-world engine forces the policies to reason under uncertainty instead of reacting to a clean omniscient state.
12
+
13
+ ### What To Build
14
+ - A canonical state graph for logistics, infrastructure integrity, domestic resilience, proxy health, coalition confidence, chokepoint access, and military readiness.
15
+ - Observation projection layers that transform latent state into actor-specific intel packets with source lag, missingness, confidence, and bias.
16
+ - Source reliability and deception mechanics so models must reason about contradictory or manipulated evidence.
17
+ - Damage persistence so strikes and mobilization affect later turns instead of only the immediate step.
18
+ - Cross-front coupling so a Gulf shock can change Israeli decision quality, US domestic resilience, or Iranian leverage.
19
+
20
+ ### Implementation Shape
21
+ - Keep one hidden state store per session.
22
+ - Each step applies actions to the hidden state first.
23
+ - Each source/tool reads from that hidden state through a projection function.
24
+ - Each actor receives only its projection, not the full state.
25
+ - The viewer dashboard can still render a privileged map and replay, but that view remains unavailable to the models.
26
+
27
+ ### Success Criteria
28
+ - Agents can no longer trivially infer the full world from public state alone.
29
+ - Different source bundles produce meaningfully different beliefs for the same event.
30
+ - Training runs become less brittle and less prone to one-step exploitation.
31
+
32
+ ## 2. Evaluation Harness And Curriculum
33
+
34
+ ### Objective
35
+ Turn training from open-ended sandbox play into measurable policy development with regression protection, seeded scenarios, and staged learning.
36
+
37
+ ### Why This Matters
38
+ The project will improve much faster once performance is measured against doctrine-specific benchmarks rather than by whether the simulator runs. Without evaluation, reward shaping tends to drift and policies often learn artifacts instead of strategy.
39
+
40
+ ### What To Build
41
+ - Seeded scenario packs for shipping crises, border flare-ups, corridor interdiction, domestic unrest, and coalition fracture.
42
+ - Policy scorecards per entity with doctrine-aligned metrics.
43
+ - Reward-gaming checks that detect obviously degenerate policies.
44
+ - Self-play and adversarial evaluation between versions.
45
+ - Curriculum stages that move from narrow tactical cases to full multi-front regional crises.
46
+ - Unsloth-based per-entity post-training loops so each actor can be adapted efficiently without retraining the full stack.
47
+
48
+ ### Implementation Shape
49
+ - Add fixed seeds and replayable scenario fixtures.
50
+ - Run benchmark suites after policy changes.
51
+ - Store reward decomposition and trace outputs for each benchmark run.
52
+ - Train smaller doctrine-specific adapters first, then graduate them into the full environment.
53
+
54
+ ### Success Criteria
55
+ - Every entity has a stable benchmark suite.
56
+ - Policy regressions are visible in CI or scheduled evaluation runs.
57
+ - New reward changes can be justified with measurable gains, not intuition alone.
58
+
59
+ ## 3. Command Dashboard And Replay Observability
60
+
61
+ ### Objective
62
+ Promote the frontend from a session viewer to a real command-and-control observability layer for simulation, training, and debugging.
63
+
64
+ ### Why This Matters
65
+ If a run collapses, you need to know why immediately. A polished dashboard is not just presentation; it is the main debugging surface for understanding model behavior, source health, intervention timing, and reward dynamics.
66
+
67
+ ### What To Build
68
+ - A unified operational map showing entities, geolocated assets, fronts, chokepoints, and coalition links.
69
+ - A per-agent monitoring deck for model status, source health, reward decomposition, and recent actions.
70
+ - Step-by-step replay with diff views between timesteps.
71
+ - Source-ingestion health views so failed feeds are visible.
72
+ - Oversight visibility showing when intervention risk crossed thresholds and what triggered it.
73
+ - Run comparison views so two policies or seeds can be compared side by side.
74
+
75
+ ### Implementation Shape
76
+ - Keep the map viewer privileged for the human operator only.
77
+ - Feed the dashboard from structured session snapshots, not hand-built UI-only state.
78
+ - Surface both raw metrics and human-readable summaries.
79
+ - Preserve replay history so failures can be audited after the run ends.
80
+
81
+ ### Success Criteria
82
+ - A user can explain a bad decision by tracing source inputs, action choice, and reward terms.
83
+ - Replay is fast enough to inspect long runs without digging through logs.
84
+ - The dashboard is useful for both live demos and offline training analysis.
PLAN.md ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Fog of War Diplomacy Simulator
2
+
3
+ ## Overview
4
+
5
+ The Fog of War Diplomacy Simulator is an innovative OpenEnv-based multi-agent reinforcement learning (RL) environment designed for the OpenEnv Hackathon under **Statement 1: Multi-Agent Interactions**, with a focus on the **Fleet AI Sub-Theme: Scalable Oversight**. This project simulates the volatile 2026 US-Israel-Iran geopolitical crisis—drawing from real-time events like airstrikes on Tehran, retaliatory missile barrages across the Gulf, threats to the Strait of Hormuz, leadership assassinations (e.g., Khamenei's fallout), US naval engagements (e.g., sub sinkings), domestic political upheavals (e.g., Trump's mid-war oustings of figures like Noem and Mullin), market crashes (Dow drops of 800+ points), and public opinion shifts (e.g., 59% disapproval polls)—to train LLM agents in emergent strategic behaviors, theory-of-mind reasoning, and de-escalation tactics.
6
+
7
+ At its core, the simulator creates a partially observable "fog-of-war" world where agents negotiate coalitions, manage deceptions, and respond to stochastic "black swan" events. A dedicated oversight agent monitors and intervenes scalably, preventing cascading failures. By integrating live global data feeds (via a forked World Monitor integration), the environment pushes the boundaries of LLM training in adversarial, long-horizon multi-agent settings—addressing the unsolved challenge of preparing AI for real-world crises where misjudgment could exacerbate global instability.
8
+
9
+ This project is difficult and untried due to its scale: combining infinite-horizon partial observability, emergent deception in high-dimensional state spaces, live-trending stochasticity, and recursive oversight mechanisms that risk computational explosion or reward hacking. The expected outcome is an environment that trains LLMs for scalable oversight in complex multi-actor crises, fostering resilient, explainable AI behaviors amid 2026-style volatility.
10
+
11
+ ## Key Features
12
+
13
+ - **Multi-Agent Dynamics**: 6 LLM agents representing key geopolitical entities, engaging in cooperation, competition, negotiation, and coalition formation.
14
+ - **Partial Observability and Fog of War**: Agents receive personalized, incomplete views of the world state, forcing inference of hidden incentives and beliefs.
15
+ - **Live Data Integration**: Real-time ingestion from 435+ RSS feeds, Telegram OSINT, video streams, and structured data sources (via World Monitor fork) for dynamic event injection.
16
+ - **Scalable Oversight**: A meta-agent analyzes behaviors, calculates risks, and intervenes using probabilistic formulas, aligning with Fleet AI's emphasis on monitoring complex settings.
17
+ - **RL Training Loop**: Agents undergo post-training via methods like PPO/GRPO, with sparse rewards encouraging de-escalation while maintaining adversarial "defeat enemies" mindsets.
18
+ - **Centralized Dashboard**: Military-themed UI for monitoring all agents from a single command center, with per-agent tabs showing personalized intel and actions.
19
+ - **Dockerized Deployment**: Fully containerized for reproducibility, scalability, and hackathon judging (e.g., upload to Hugging Face Hub).
20
+ - **Hackathon Alignment**: Builds a realistic multi-actor environment for task discovery and achievement, with bonus potential for Fleet AI prizes.
21
+
22
+ ## Architecture
23
+
24
+ The simulator is built as a Dockerized OpenEnv environment, extending `openenv.Env` for Gymnasium-compatible RL interfaces. It runs as a FastAPI server in a container, exposing endpoints like `/reset`, `/step`, and `/state` for agent interactions. The architecture emphasizes modularity:
25
+
26
+ - **Core Environment Class (`FogOfWarDiplomacy`)**: Manages the global state, including tension levels, coalitions, and stochastic events. Uses NumPy for probabilistic simulations (e.g., event triggers based on real-time data).
27
+ - **Multi-Agent Setup**: Agents operate in parallel, submitting text-based actions (e.g., "Propose ceasefire with sanctions relief") via LLM prompts. Observations are returned as a dict keyed by agent ID, enforcing partial observability.
28
+ - **Oversight Wrapper (`OversightAgent`)**: A meta-layer that queries primary traces without direct interference, generating explanations and interventions.
29
+ - **Data Ingestion Sidecar**: A forked World Monitor service (cloned from https://github.com/koala73/worldmonitor) runs in a separate Docker container via Docker Compose. It aggregates data from RSS feeds (e.g., Bloomberg, Al Jazeera), Telegram channels, video streams (HLS), webcams, and structured APIs (e.g., ACLED conflicts, Polymarket polls, GDELT events). The env queries this via HTTP/protobuf for filtered, per-agent intel.
30
+ - **Dashboard**: Built with Streamlit or Gradio, themed as a tactical command center (dark greens/blacks, radar overlays, red alerts). Connects to OpenEnv's API for live visualization; features a global map (using Plotly.js), intel streams, and tabbed per-agent views.
31
+ - **Training Integration**: Compatible with RL libraries like TRL (Hugging Face) or TorchForge. Agents train in loops: Reset env → Step with prompts → Update policies via rewards.
32
+
33
+ Processing of data (e.g., RSS feeds) occurs in the World Monitor sidecar:
34
+
35
+ - **Ingestion**: World Monitor polls feeds in real-time (e.g., every 5-10 minutes via cron-like jobs) and stores in a lightweight DB (e.g., SQLite or Redis cache).
36
+ - **Filtering and Distribution**: On env `step()` or `reset()`, the OpenEnv server requests agent-specific subsets (e.g., via `/api/geopolitics/v1/filter?agent=US&keywords=polls`). No agent processes the full dataset—each gets tailored snippets (e.g., US: Polymarket polls; Iran: Telegram proxy reports), maintaining fog of war.
37
+ - **Event Injection**: Parsed data triggers stochastic events (e.g., if "strike" in headline, increase tension by 20% with probability 0.7).
38
+
39
+ Agents do not have individual dashboards; instead, they access data/tools via personalized APIs in their prompts (e.g., "Query RSS for US polls"). The centralized dashboard monitors all, allowing human oversight during training/demos.
40
+
41
+ ## Agents and Models
42
+
43
+ Exactly 6 agents are defined to capture the crisis's core dynamics without combinatorial overload:
44
+
45
+ 1. **US (Trump Admin / CENTCOM)**: Focuses on alliances, sanctions, and domestic stability. Identity: "Hawkish strategist prioritizing oil and polls; defeat enemies via superior force while avoiding backlash."
46
+ - Model: Qwen3-8B (post-trained per entity via GRPO).
47
+ - Personalized Data/Tools: Polymarket polls, Bloomberg US feeds, sanctions imposition API.
48
+
49
+ 2. **Israel (Netanyahu / IDF)**: Emphasizes regime change and border security. Identity: "Defensive aggressor; eliminate threats decisively, model allies' incentives to form unbreakable coalitions."
50
+ - Model: Qwen3-8B (post-trained per entity via GRPO).
51
+ - Personalized Data/Tools: OREF alerts, ACLED Lebanon data, strike simulation tools.
52
+
53
+ 3. **Iran (IRGC / Interim Leadership)**: Coordinates retaliation and proxies. Identity: "Resilient defender; use asymmetry and deception to weaken foes, survive escalations at all costs."
54
+ - Model: Qwen3-8B (post-trained per entity via GRPO).
55
+ - Personalized Data/Tools: Telegram OSINT, GDELT Iran events, missile launch tools.
56
+
57
+ 4. **Hezbollah (Proxy Swarm Leader)**: Handles asymmetric attacks. Identity: "Opportunistic insurgent; swarm enemies with minimal resources, infer hidden weaknesses for strikes."
58
+ - Model: Qwen3-8B (post-trained per entity via GRPO).
59
+ - Personalized Data/Tools: Border webcams, ACLED clashes, drone activation tools.
60
+
61
+ 5. **Gulf Coalition (Saudi/UAE/Qatar)**: Balances energy security and neutrality. Identity: "Pragmatic hedger; protect markets by allying selectively, defeat disruptions through economic leverage."
62
+ - Model: Qwen3-8B (post-trained per entity via GRPO).
63
+ - Personalized Data/Tools: Commodity dashboard, AIS vessel tracking, blockade evasion tools.
64
+
65
+ 6. **Oversight Agent (Fleet AI Meta-Layer)**: Monitors without negotiating. Identity: "Impartial auditor; explain drifts probabilistically, intervene to align without bias."
66
+ - Model: Qwen3-8B (post-trained per entity via GRPO).
67
+ - Personalized Data/Tools: Full synthesized briefs, hotspot scores, intervention APIs.
68
+
69
+ Each agent's "identity" is embedded via system prompts in LLM inference, ensuring focus on "defeating enemies" (adversarial goals) while building strength (e.g., coalitions). During training, agents "forget" irrelevant knowledge, optimizing solely for crisis survival via RL.
70
+
71
+ ## Data Integration with World Monitor
72
+
73
+ Forked from https://github.com/koala73/worldmonitor (AGPL-3.0), this service provides the backbone for live intel:
74
+
75
+ - **Sources**: 435+ RSS (media outlets like Reuters, Sky News), 26 Telegram OSINT channels, 30+ HLS video streams, 22 webcams (e.g., Gulf hotspots), structured feeds (ACLED conflicts, Polymarket markets, GDELT events, NASA FIRMS fires).
76
+ - **Processing**: Self-hosted in Docker; uses Vite/Tauri for frontend (optional), but we leverage its 22 proto-first APIs (e.g., `/api/geopolitics/v1/list-hotspots`) and WebSockets for real-time pushes.
77
+ - **Per-Agent Filtering**: Agents query tailored endpoints (e.g., US: `?filter=polls+us`; Iran: `?filter=proxies+iran`). No full requests—agents decide what to pull based on prompts (e.g., "If tension >50%, query RSS for enemy movements").
78
+ - **Fallbacks**: Cached data for offline demos; procedural mocks if live feeds fail.
79
+
80
+ This setup makes agents adaptive: Live info streams in during training, influencing decisions (e.g., a fresh Polymarket poll shifts US rewards).
81
+
82
+ ## RL Training and Rewards
83
+
84
+ Agents train in an RL loop using OpenEnv's interfaces, with each having independent policy updates (via TRL) while sharing the env. Live data injects during episodes, allowing decisions/actions in real-time simulations. Method: GRPO for multi-agent cooperation.
85
+
86
+ **Rewards**: Sparse and delayed to encourage long-horizon planning. Formula per agent at timestep \( t \):
87
+
88
+ \[ r_t = 0.3 \cdot C_t + 0.4 \cdot E_t + 0.2 \cdot M_t + 0.1 \cdot B_t \]
89
+
90
+ - \( C_t \): Coalition Stability (\( \frac{\# \text{allied} - \# \text{betrayals}}{\# \text{agents}} \)).
91
+ - \( E_t \): Escalation Penalty (\( - \sigma(2 \cdot \Delta \text{tension}\_t) \)).
92
+ - \( M_t \): Market Gain (\( \frac{\Delta \text{oil} + \Delta \text{sanctions}}{2} \)).
93
+ - \( B*t \): Belief Alignment (\( 1 - |I*{\text{inferred}} - I\_{\text{true}}| \)).
94
+
95
+ Oversight scales rewards by 0.5 on high risk. Normalized to [-1, 1]; aggregated over 1000+ turn episodes.
96
+
97
+ ## Oversight Analytical Method
98
+
99
+ Oversight uses belief propagation for risk:
100
+
101
+ 1. Belief Update: \( B'(s') = \eta \sum_s P(o_t | a_t, s') T(s, a_t, s') B(s) \).
102
+
103
+ 2. Risk Score: \( R(a*t) = \sum*{s'} B'(s') \cdot U(s', a*t) \cdot \sigma(2 \cdot (I*{\text{self}} - I\_{\text{other}})) \).
104
+
105
+ Intervene if \( R > 0.5 \). Implemented in NumPy for efficiency.
106
+
107
+ ## Installation and Setup
108
+
109
+ 1. **Clone Repo**: `git clone https://github.com/shlawgathon/trenches.git`
110
+ 2. **Frontend**: `bun install && bun run dev` → `http://localhost:3000`
111
+ 3. **Backend**: `cd backend && uv sync && source .venv/bin/activate && uvicorn trenches_env.server:app --reload --port 8000`
112
+ 4. **Training**: `python -m trenches_env.training_cli --model-id Qwen/Qwen3-8B --training-agent us`
113
+ 5. **HF Space**: Live at [AlazarM/trenches](https://huggingface.co/spaces/AlazarM/trenches)
114
+ 6. **Dependencies**: Python 3.12, OpenEnv, FastAPI, NumPy, Hugging Face TRL.
115
+
116
+ ## Contributing and License
117
+
118
+ Open-source under MIT. Contributions welcome for expanding agents or data sources. Built for the OpenEnv Hackathon (March 7-8, 2026).
README.md ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Trenches
3
+ emoji: 🌍
4
+ colorFrom: red
5
+ colorTo: gray
6
+ sdk: docker
7
+ app_port: 7860
8
+ ---
9
+
10
+ <img width="512" height="512" alt="Image Background Remover (1)" src="https://github.com/user-attachments/assets/a1ab0df2-435f-444b-b8a1-36b1a64b55e8" />
11
+
12
+ # Trenches
13
+
14
+ A multi-agent crisis simulator built on [OpenEnv](https://github.com/openenv-ai/openenv). LLM agents navigate a fog-of-war geopolitical scenario — negotiating coalitions, managing deception, and responding to live global events — while a dedicated oversight agent monitors for dangerous escalation.
15
+
16
+ ## Overview
17
+
18
+ Trenches drops six LLM-powered actors into a volatile 2026 Middle East crisis. Each agent operates under partial observability with role-specific intelligence, tools, and incentives. A scalable oversight mechanism intervenes when escalation risk crosses critical thresholds.
19
+
20
+ | Agent | Role | Model |
21
+ | ----------------- | ----------------------------------------------- | -------- |
22
+ | 🇺🇸 United States | Hawkish superpower balancing polls & projection | Qwen3-8B |
23
+ | 🇮🇱 Israel | Regional actor with strike autonomy | Qwen3-8B |
24
+ | 🇮🇷 Iran | Adversary leveraging proxies & deception | Qwen3-8B |
25
+ | 🪖 Hezbollah | Non-state militia with asymmetric tactics | Qwen3-8B |
26
+ | 🛢️ Gulf Coalition | Economic bloc protecting oil & stability | Qwen3-8B |
27
+ | 🔍 Oversight | Monitors all actors, intervenes on escalation | Qwen3-8B |
28
+
29
+ ## Key Features
30
+
31
+ - **Fog of War** — agents see only their role-filtered intel, never the full world state
32
+ - **Live News Injection** — real-time RSS/OSINT feeds drive stochastic in-sim events
33
+ - **Scalable Oversight** — Bayesian risk scoring triggers interventions before runaway escalation
34
+ - **Tool Use** — agents call `query_intel`, `propose_negotiation`, `impose_sanctions`, etc.
35
+ - **Multi-component Rewards** — coalition stability, escalation penalty, market impact, belief alignment
36
+
37
+ ## Stack
38
+
39
+ | Layer | Tech |
40
+ | -------- | ---------------------------------------------------- |
41
+ | Frontend | Next.js 16 · Tailwind v4 · Mapbox GL · Framer Motion |
42
+ | Backend | FastAPI · OpenEnv Core · NumPy |
43
+ | Infra | Docker · Bun · uv |
44
+
45
+ ## Quick Start
46
+
47
+ ```bash
48
+ # Frontend
49
+ bun install
50
+ bun run dev # → http://localhost:3000
51
+
52
+ # Backend
53
+ cd backend
54
+ uv sync
55
+ source .venv/bin/activate
56
+ uvicorn trenches_env.server:app --reload --port 8000
57
+ ```
58
+
59
+ Set your environment variables in `.env.local`:
60
+
61
+ ```
62
+ NEXT_PUBLIC_MAPBOX_TOKEN=...
63
+ NEXT_PUBLIC_API_URL=http://localhost:8000
64
+ ```
65
+
66
+ ## Project Structure
67
+
68
+ ```
69
+ trenches/
70
+ ├── app/ # Next.js app router + API routes
71
+ ├── src/
72
+ │ ├── components/ # Globe, NewsFeed, ActivityLog, ChatPanel
73
+ │ ├── hooks/ # React hooks
74
+ │ └── lib/ # Types, utils, bootstrap
75
+ ├── backend/
76
+ │ ├── src/ # FastAPI server, OpenEnv environment
77
+ │ └── tests/ # Backend test suite
78
+ ├── entities/ # Agent identity configs (US, Israel, Iran, etc.)
79
+ └── scripts/ # Utility scripts
80
+ ```
81
+
82
+ ## License
83
+
84
+ MIT
RL.md ADDED
@@ -0,0 +1,354 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # RL.md: Reinforcement Learning Mechanics in Fog of War Diplomacy Simulator
2
+
3
+ This document describes the RL design for the Fog of War Diplomacy Simulator and, critically, the boundary between what OpenEnv supports directly and what this project must implement on top of OpenEnv.
4
+
5
+ The short version:
6
+
7
+ - OpenEnv is the environment packaging and execution layer.
8
+ - The crisis simulator, reward model, six-agent orchestration, and oversight logic are project code.
9
+ - CTDE, MAPPO, PPO, GRPO, TRL, and RLlib are training-stack choices outside OpenEnv itself.
10
+
11
+ The current repository now includes a native OpenEnv-facing adapter layer around the simulator, while still retaining the older session-oriented FastAPI API used by the dashboard.
12
+
13
+ ---
14
+
15
+ ## OpenEnv Support Boundary
16
+
17
+ ### What OpenEnv directly supports
18
+
19
+ OpenEnv gives us the environment contract and runtime surface:
20
+
21
+ - An async environment interface built around `reset()` and `step(action)`.
22
+ - Environment-side `Action`, `Observation`, and `State` models.
23
+ - Client-side `StepResult` objects that expose:
24
+ - `observation`
25
+ - scalar `reward`
26
+ - `done`
27
+ - Client access through `EnvClient`, typically from a Docker image or a deployed URL.
28
+ - Containerized environment packaging.
29
+ - Optional custom UI support.
30
+
31
+ In other words, OpenEnv is well-suited to hosting the simulator and stepping it remotely, but it is not a multi-agent RL trainer and it does not natively provide Gymnasium-style multi-agent dict rewards or observations.
32
+
33
+ ### What this project layers on top
34
+
35
+ The following are project-level features, not native OpenEnv guarantees:
36
+
37
+ - Six-agent world state with partial observability.
38
+ - Joint action parsing and per-agent observation projection.
39
+ - Per-agent reward vectors and reward breakdowns.
40
+ - Oversight risk scoring and corrective interventions.
41
+ - Curriculum design, CTDE, MAPPO, PPO, GRPO, TRL, and RLlib integration.
42
+ - Live data ingestion plans, RSS/Telegram/video source routing, and demo-only live mode.
43
+ - The current FastAPI session API in this repo.
44
+
45
+ ### Important design consequence
46
+
47
+ OpenEnv expects one action in and one scalar reward out per `step()`. For this simulator, that means one of two patterns:
48
+
49
+ 1. Encode the full six-agent joint move as a single structured OpenEnv action, then expose per-agent details through custom observation fields and `state()`.
50
+ 2. Build a higher-level coordinator outside OpenEnv that manages multiple policies against one shared world state.
51
+
52
+ For the MVP and the current backend shape, pattern `1` is the cleaner fit.
53
+
54
+ ---
55
+
56
+ ## OpenEnv-Aligned Environment Contract
57
+
58
+ The OpenEnv adapter for this project should look conceptually like this:
59
+
60
+ ```python
61
+ from openenv.core import Action, Environment, Observation
62
+ from openenv.core.env_server.types import State
63
+
64
+
65
+ class JointAction(Action):
66
+ actions: dict[str, dict]
67
+
68
+
69
+ class DiplomacyObservation(Observation):
70
+ training_agent: str
71
+ reward_breakdown: dict
72
+
73
+
74
+ class DiplomacyState(State):
75
+ training_agent: str
76
+ world: dict
77
+
78
+
79
+ class DiplomacyEnvironment(Environment):
80
+ @property
81
+ def state(self) -> DiplomacyState:
82
+ return DiplomacyState(
83
+ episode_id=self.session_id,
84
+ step_count=self.turn,
85
+ training_agent=self.training_agent,
86
+ world=self._serialize_world(self.world),
87
+ )
88
+
89
+ async def reset(self):
90
+ self.world = self._initial_world()
91
+ return self._build_observation(self.world)
92
+
93
+ async def step(self, action):
94
+ # `action` is a structured joint action for all six agents.
95
+ joint_actions = self._decode_joint_action(action)
96
+
97
+ self._apply_actions(self.world, joint_actions)
98
+ oversight = self._compute_oversight(self.world, joint_actions)
99
+ self._apply_oversight(self.world, oversight)
100
+
101
+ per_agent_rewards = self._compute_rewards(self.world, joint_actions)
102
+
103
+ # OpenEnv needs a scalar reward. The trainer/coordinator chooses
104
+ # which policy is being optimized for this rollout.
105
+ scalar_reward = per_agent_rewards[self.training_agent]
106
+
107
+ return DiplomacyObservation(
108
+ reward=scalar_reward,
109
+ done=self._is_done(self.world),
110
+ training_agent=self.training_agent,
111
+ reward_breakdown=per_agent_rewards[self.training_agent],
112
+ )
113
+ ```
114
+
115
+ This is the key correction relative to earlier drafts: the simulator may compute rich multi-agent state internally, but the OpenEnv-facing `step()` still emits a single `Observation`, and the client sees a scalar-reward `StepResult`.
116
+
117
+ ---
118
+
119
+ ## Relationship to the Current Repo
120
+
121
+ The current backend environment is not yet a direct OpenEnv environment. Today it is a custom simulator plus FastAPI session layer that exposes:
122
+
123
+ - session creation/reset
124
+ - live-mode toggles
125
+ - turn stepping
126
+ - structured per-agent observations and reward breakdowns
127
+
128
+ That is still useful. It means the simulator logic is already mostly in place, and the OpenEnv integration work becomes an adapter task rather than a full rewrite.
129
+
130
+ The practical implication is:
131
+
132
+ - `backend/src/trenches_env/env.py` is the world simulator.
133
+ - OpenEnv should wrap or call into that simulator.
134
+ - `RL.md` must not describe current behavior as if the repo is already using OpenEnv natively.
135
+
136
+ ---
137
+
138
+ ## Reward System: Project Logic on Top of OpenEnv
139
+
140
+ Rewards remain a project design choice. OpenEnv does not impose the reward formula; it only transports the scalar training reward through `Observation.reward` and exposes richer environment state through `state()`.
141
+
142
+ ### Design constraints
143
+
144
+ - Reward computation happens after action processing and state updates.
145
+ - Each component should be normalized before weighting.
146
+ - Oversight should modify environment state, not rescale rewards.
147
+ - Hidden incentives should not be used as a direct reward signal.
148
+ - Full per-agent reward breakdowns can live in simulator state or custom observation fields even though the outward training reward is scalar.
149
+
150
+ ### Core Reward Formula
151
+
152
+ Per timestep $t$, for each agent $i$:
153
+
154
+ $$r_t^i = w_1 \cdot \hat{C}_t^i + w_2 \cdot \hat{E}_t^i + w_3 \cdot \hat{M}_t^i + w_4 \cdot \hat{B}_t^i$$
155
+
156
+ where each component is normalized independently to `[-1, 1]` before aggregation.
157
+
158
+ ### Components
159
+
160
+ - $C_t$: Coalition stability.
161
+ - $E_t$: Escalation penalty using an EMA of tension level.
162
+ - $M_t$: Market/economic gain from observable stress reduction.
163
+ - $B_t$: Behavioral consistency from observable action and rationale traces rather than hidden oracle state.
164
+
165
+ ### OpenEnv-facing reward rule
166
+
167
+ Internally we may compute:
168
+
169
+ ```python
170
+ per_agent_rewards = {
171
+ "us": ...,
172
+ "israel": ...,
173
+ "iran": ...,
174
+ "hezbollah": ...,
175
+ "gulf": ...,
176
+ "oversight": ...,
177
+ }
178
+ ```
179
+
180
+ But the OpenEnv adapter should emit:
181
+
182
+ ```python
183
+ DiplomacyObservation(
184
+ reward=per_agent_rewards[active_training_agent],
185
+ done=done,
186
+ reward_breakdown=per_agent_rewards[active_training_agent],
187
+ )
188
+ ```
189
+
190
+ That keeps the simulator expressive without claiming unsupported native multi-agent reward output.
191
+
192
+ ---
193
+
194
+ ## Oversight: OpenEnv-Compatible Placement
195
+
196
+ The previous Gymnasium-wrapper framing was too specific. OpenEnv does not give us a native `gym.Wrapper` abstraction, so oversight should be implemented as part of the simulator transition or as a thin project-side interceptor around the environment.
197
+
198
+ An OpenEnv-compatible pattern is:
199
+
200
+ ```python
201
+ async def step(self, action):
202
+ joint_actions = self._decode_joint_action(action)
203
+ self._apply_actions(self.world, joint_actions)
204
+
205
+ oversight = self._compute_oversight(self.world, joint_actions)
206
+ if oversight["triggered"]:
207
+ self._apply_oversight(self.world, oversight)
208
+
209
+ per_agent_rewards = self._compute_rewards(self.world, joint_actions)
210
+
211
+ return DiplomacyObservation(
212
+ reward=per_agent_rewards[self.training_agent],
213
+ done=self._is_done(self.world),
214
+ reward_breakdown=per_agent_rewards[self.training_agent],
215
+ )
216
+ ```
217
+
218
+ This preserves the intended semantics:
219
+
220
+ - oversight changes the transition
221
+ - reward is not rescaled
222
+ - intervention details remain inspectable through `state()` or custom observation fields
223
+
224
+ ---
225
+
226
+ ## Multi-Agent Training Architecture
227
+
228
+ ### What OpenEnv does not do for us
229
+
230
+ OpenEnv is not a built-in multi-agent trainer. It does not natively provide:
231
+
232
+ - CTDE
233
+ - MAPPO
234
+ - GRPO
235
+ - centralized critics
236
+ - per-agent replay buffers
237
+ - RLlib or TRL integration
238
+
239
+ Those belong in the training harness.
240
+
241
+ ### Recommended architecture
242
+
243
+ Use OpenEnv as the rollout environment, then place the multi-agent trainer above it:
244
+
245
+ 1. OpenEnv hosts one simulator instance.
246
+ 2. Each rollout step carries a structured joint action for all six agents.
247
+ 3. The simulator computes the full per-agent reward vector.
248
+ 4. The OpenEnv adapter returns the scalar reward for the currently optimized policy and exposes richer diagnostics through `state()` and custom observation fields.
249
+ 5. The trainer reconstructs per-agent trajectories from state snapshots and session traces.
250
+
251
+ This works for:
252
+
253
+ - independent PPO baselines
254
+ - CTDE with a centralized critic
255
+ - MAPPO-style actor-critic training
256
+ - GRPO-style grouped rollout training
257
+
258
+ But again: these are external training choices, not native OpenEnv features.
259
+
260
+ ---
261
+
262
+ ## Algorithm Choice
263
+
264
+ ### CTDE
265
+
266
+ CTDE remains a sound design choice for this simulator because agents interact in a shared partially observable world. The centralized critic is trainer-side logic and does not require native OpenEnv support.
267
+
268
+ ### GRPO vs PPO
269
+
270
+ GRPO is still a plausible fit for sparse long-horizon signals, but the doc should treat it as an external training-stack choice. OpenEnv will not provide `GRPOTrainer`; it only supplies environment rollouts.
271
+
272
+ PPO remains a valid baseline, especially for shorter or denser curriculum stages.
273
+
274
+ ### Practical recommendation
275
+
276
+ Phrase the implementation plan as:
277
+
278
+ - OpenEnv for rollout generation
279
+ - custom trainer or external framework for policy updates
280
+ - state snapshots and session traces for reconstructing per-agent returns
281
+
282
+ not as:
283
+
284
+ - OpenEnv natively handles GRPO or multi-agent PPO
285
+
286
+ ---
287
+
288
+ ## Training Flow
289
+
290
+ 1. Package the simulator as an OpenEnv environment or connect to it through `EnvClient.from_url(...)` once deployed.
291
+ 2. Reset the environment to get the initial joint observation.
292
+ 3. Query all six policies to produce one joint action.
293
+ 4. Step the environment and capture:
294
+ - next observation
295
+ - scalar reward for the active policy
296
+ - done flag
297
+ - environment state with per-agent rewards, oversight data, and world trace
298
+ 5. Reconstruct trainer-side trajectories for CTDE, MAPPO, PPO, or GRPO.
299
+ 6. Keep all training episodic and reproducible.
300
+
301
+ ### Live mode rule
302
+
303
+ Live mode is inference/demo only.
304
+
305
+ Training should use:
306
+
307
+ - episodic rollouts
308
+ - fixed seeds where needed
309
+ - replayed or sampled event bundles
310
+
311
+ Training should not depend on live RSS/Telegram/video streams if reproducibility matters.
312
+
313
+ That means the earlier idea of "Stage 3 training with live RSS injection" should be replaced by "Stage 3 training with replayed sampled event bundles and oversight enabled."
314
+
315
+ ---
316
+
317
+ ## Evaluation
318
+
319
+ These evaluation targets are still reasonable project metrics:
320
+
321
+ | Metric | Definition | Target |
322
+ |---|---|---|
323
+ | Avg reward/episode | Mean $\sum_t r_t$ over recent episodes | Upward trend |
324
+ | De-escalation rate | % of episodes ending with tension < 30 | > 60% |
325
+ | Oversight intervention rate | Interventions per episode | Decreasing over training |
326
+ | Behavioral consistency | Mean $B_t$ across agents | > 0.7 |
327
+ | Coalition durability | Avg turns before first betrayal | > 200 |
328
+
329
+ But the trainer must compute them from rollout traces. OpenEnv will not provide these metrics automatically.
330
+
331
+ ---
332
+
333
+ ## Known Challenges
334
+
335
+ - Multi-agent credit assignment is trainer complexity, not environment complexity.
336
+ - If training data comes from changing live sources, reproducibility degrades fast.
337
+ - Reward hacking remains a real risk.
338
+ - OpenEnv scalar reward output means the adapter boundary must be explicit and carefully documented.
339
+
340
+ ---
341
+
342
+ ## Summary of Corrections
343
+
344
+ | Earlier claim | Corrected statement |
345
+ |---|---|
346
+ | OpenEnv extends Gymnasium with dict observations/actions and done-truncated-info tuples | OpenEnv uses its own async `reset()` / `step()` contract with typed `Action`/`Observation`/`State`; the client exposes `StepResult` with scalar reward |
347
+ | OpenEnv natively handles six-agent dict rewards and observations | Multi-agent orchestration is project logic layered on top of OpenEnv |
348
+ | OpenEnv envs are raw FastAPI `/reset` and `/step` servers | This repo uses FastAPI today, but OpenEnv itself exposes an environment contract plus `EnvClient` transport |
349
+ | OpenEnv directly supports CTDE, MAPPO, GRPO, TRL, or RLlib | Those are trainer-side integrations outside OpenEnv |
350
+ | Oversight should be a Gym wrapper | In this project it should be implemented inside the simulator transition or a thin project-side interceptor |
351
+ | Stage 3 training can use live RSS injection | Live mode is demo-only; training should remain episodic and reproducible |
352
+ | The current repo is already a native OpenEnv environment | The current repo now includes a native OpenEnv-facing adapter while still keeping the session-oriented dashboard API |
353
+
354
+ This version is the correct mental model: OpenEnv is the execution shell for the simulator, while nearly all of the interesting multi-agent RL behavior is our own design sitting above that shell.
TODO.md ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Trenches — TODO
2
+
3
+ ## Reward System
4
+
5
+ - [ ] **Event-prediction RL rewards** — when a real-world event occurs and an agent's prior prediction/action aligns with it, grant a positive reward signal. This closes the loop between live data ingestion and agent learning.
6
+ - Track agent predictions per turn (e.g., "Iran will retaliate within 2 turns")
7
+ - Compare predictions against actual events that fire from RSS/OSINT feeds
8
+ - Reward = f(prediction accuracy, lead time, specificity)
9
+ - Only **real events** (from live feeds or env-generated stochastic events) impact the reward signal
10
+
11
+ - [ ] **Chat-injected fake events** — allow manual event injection via the chat panel that influences agent behavior but does **not** affect reward calculations.
12
+ - Tag chat-injected events with `source: "manual"` vs real events with `source: "live"` or `source: "env"`
13
+ - Agents still react to fake events (observe and act), but the reward function filters them out
14
+ - Useful for demos, testing edge cases, and probing agent behavior without polluting the training signal
15
+
16
+ ## UI / Frontend
17
+
18
+ - [ ] **Event timeline with time control** — scrubber bar (like a video editor) for navigating, rewinding, and branching the simulation
19
+ - **Scrubber bar** at the bottom: drag to jump to any turn/timestamp, play/pause, rewind, fast-forward
20
+ - Two event types on the timeline: **predictions** (agent forecasts) and **actuals** (confirmed real events)
21
+ - Predictions that matched actual outcomes are visually linked; incorrect ones shown faded
22
+ - **Branching**: when a fake scenario is injected via chat, the timeline forks — you can scrub back to before the injection and see the "what if" branch vs the real timeline
23
+ - Playback controls: step-by-step (turn by turn), continuous playback at adjustable speed
24
+ - Markers on the scrubber for key events (escalations, interventions, injected scenarios)
25
+ - Filterable by agent, event type, and time range
26
+ - Feeds into the reward system — correct predictions on the timeline = positive RL signal
27
+
28
+ - [x] Merge tension/stats pills into top bar
29
+ - [x] Disable text selection on floating panels
30
+ - [x] Remove Mapbox logo
31
+ - [x] Clean up README
32
+
33
+ ## Infrastructure
34
+
35
+ - [x] Push to HF Space (`AlazarM/trenches`)
36
+ - [ ] Add `NEXT_PUBLIC_MAPBOX_TOKEN` as HF Space secret
37
+
38
+ ## Post-Training
39
+
40
+ - [x] 6 synthetic seed replay datasets (in `synthetic_historical_replays/`)
41
+ - [x] Training CLI with GRPO, hyperparameter args, checkpointing
42
+ - [x] Local smoke test (tiny-gpt2, US + Israel)
43
+ - [x] HF GPU smoke test on T4 ([trenches-training-smoke](https://huggingface.co/spaces/AlazarM/trenches-training-smoke))
44
+ - [x] All 6 entity models → `Qwen/Qwen3-8B` (no quantization)
45
+ - [x] Historical data collection pipeline (GDELT → replay JSON)
46
+ - [ ] Run historical collector for all 6 entities
47
+ - [ ] Curator-review collected replay data
48
+ - [ ] Spin up 6 HF A100 Spaces for production training
49
+ - [ ] Evaluation/baseline reporting
TOOLS.md ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # TOOLS.md: Agent Tools and Function-Calling Interface in Fog of War Diplomacy Simulator
2
+
3
+ This document details the **tools** available to the 6 agents in the Fog of War Diplomacy Simulator. These tools enable agents to interact with the environment, query personalized intelligence feeds (from World Monitor integration), perform actions that affect the world state, and gather information consistent with their partial-observability constraints.
4
+
5
+ Tools are implemented as **text-based function-calling** within each agent's LLM inference loop (using Hugging Face Transformers or similar). When an agent needs information or wants to act, it outputs a structured function call in its response (e.g., JSON or XML-like format). The OpenEnv environment parses these calls, executes them, and injects results back into the next observation.
6
+
7
+ This design:
8
+ - Reinforces **theory-of-mind** (agents must infer when others might use tools)
9
+ - Supports **tool-use fine-tuning** during RL post-training
10
+ - Maintains **partial observability** (tools return only agent-specific data)
11
+ - Aligns with **entity identity** (some tools are role-exclusive)
12
+
13
+ ## General Tool Usage Rules
14
+
15
+ - **Invocation Format**: Agents output function calls in a parseable format, e.g.:
16
+ ```json
17
+ {
18
+ "tool": "query_intel",
19
+ "parameters": {
20
+ "keywords": "US_polls Iran_strike",
21
+ "source": "polymarket"
22
+ }
23
+ }
24
+ ```
25
+ or XML-style if preferred by the prompt.
26
+
27
+ - **Execution**: In `step(actions)`, the env:
28
+ 1. Parses tool calls from agent text output
29
+ 2. Validates agent permissions (e.g., Iran cannot use "impose_sanctions")
30
+ 3. Executes (queries World Monitor API, simulates action outcome)
31
+ 4. Returns result in next obs dict (e.g., `obs["US"]["tool_result"]`)
32
+
33
+ - **Cost/Cooldown**: Most tools have simulated "cost" (e.g., -0.1 reward for heavy queries) or cooldown (e.g., query every 3 turns) to prevent spam.
34
+
35
+ - **Consistency**: Tools pull from World Monitor APIs (deployed on Vercel/Railway) → filtered JSON snippets → injected into prompt history for persistent context.
36
+
37
+ ## Common Tools (Available to All Agents)
38
+
39
+ 1. **query_intel**
40
+ - Description: Request filtered intelligence from World Monitor feeds.
41
+ - Parameters:
42
+ - `keywords`: string (space-separated search terms, e.g., "oil Hormuz strike")
43
+ - `source`: optional string (e.g., "polymarket", "acled", "telegram_osint", "commodity_dashboard")
44
+ - `time_range`: optional string ("last_hour", "last_day")
45
+ - Returns: Dict of snippets/headlines (e.g., {"headline": "...", "sentiment": 0.6, "source": "..."})
46
+ - Usage: Core tool for maintaining situational awareness; agents decide what to query based on current tension.
47
+
48
+ 2. **analyze_belief**
49
+ - Description: Infer hidden incentives/beliefs of another agent (theory-of-mind).
50
+ - Parameters:
51
+ - `target_agent`: string (e.g., "Iran")
52
+ - `evidence`: string (short summary of observed actions)
53
+ - Returns: Dict {"inferred_incentive": "...", "confidence": 0.72}
54
+ - Usage: Used to improve \( B_t \) reward component.
55
+
56
+ 3. **propose_negotiation**
57
+ - Description: Send a diplomatic proposal to one or more agents.
58
+ - Parameters:
59
+ - `recipients`: list[string] (e.g., ["US", "Gulf Coalition"])
60
+ - `proposal_text`: string (e.g., "Ceasefire in exchange for sanctions relief")
61
+ - Returns: Dict {"sent": true, "acknowledged_by": [...], "immediate_response": "..."}
62
+ - Usage: Forms coalitions; can be deceptive.
63
+
64
+ ## Agent-Specific Tools
65
+
66
+ ### 1. US (Trump Admin / CENTCOM)
67
+ - **impose_sanctions**
68
+ - Parameters: `target`: string, `severity`: float (0-1)
69
+ - Effect: Increases tension for target, boosts US \( M_t \), risks backlash if overused.
70
+ - **deploy_assets**
71
+ - Parameters: `location`: string (e.g., "Gulf"), `type`: string ("carrier", "cyber")
72
+ - Effect: Deters escalation, visible to allies.
73
+ - **query_polls**
74
+ - Shortcut to `query_intel(keywords="US approval rating Polymarket")`
75
+
76
+ ### 2. Israel (Netanyahu / IDF)
77
+ - **launch_precise_strike**
78
+ - Parameters: `target`: string (e.g., "IRGC facility"), `risk_level`: float
79
+ - Effect: High escalation potential, strong \( E_t \) penalty if civilian risk high.
80
+ - **activate_iron_dome**
81
+ - Parameters: `region`: string
82
+ - Effect: Reduces damage from incoming rockets (reduces \( E_t \) penalty).
83
+ - **query_border_alerts**
84
+ - Shortcut: `query_intel(keywords="OREF Lebanon rocket")`
85
+
86
+ ### 3. Iran (IRGC / Interim Leadership)
87
+ - **activate_proxy**
88
+ - Parameters: `proxy`: string (e.g., "Hezbollah"), `action_type`: string ("drone", "cyber")
89
+ - Effect: Asymmetric retaliation, lower escalation visibility.
90
+ - **threaten_hormuz**
91
+ - Parameters: `severity`: float
92
+ - Effect: Spikes global oil price, strong \( M_t \) impact for Gulf.
93
+ - **query_proxy_status**
94
+ - Shortcut: `query_intel(keywords="Hezbollah militia Telegram")`
95
+
96
+ ### 4. Hezbollah (Proxy Swarm Leader)
97
+ - **launch_drone_swarm**
98
+ - Parameters: `target`: string, `scale`: int (1-10)
99
+ - Effect: High asymmetric damage, visible to Israel.
100
+ - **evade_detection**
101
+ - Parameters: None
102
+ - Effect: Reduces probability of being traced back to Iran.
103
+ - **query_border_streams**
104
+ - Shortcut: `query_intel(keywords="MTV Lebanon webcam Hezbollah")`
105
+
106
+ ### 5. Gulf Coalition (Saudi/UAE/Qatar)
107
+ - **adjust_oil_output**
108
+ - Parameters: `delta`: float (-1 to +1)
109
+ - Effect: Stabilizes or shocks markets, directly affects \( M_t \).
110
+ - **host_base_access**
111
+ - Parameters: `ally`: string (e.g., "US")
112
+ - Effect: Strengthens coalition, visible to Iran.
113
+ - **query_market_impact**
114
+ - Shortcut: `query_intel(keywords="oil price Hormuz Bloomberg")`
115
+
116
+ ### 6. Oversight Agent (Fleet AI Meta-Layer)
117
+ - **generate_explanation**
118
+ - Parameters: `target_action`: string, `agent`: string
119
+ - Returns: Natural language explanation + risk score.
120
+ - **calculate_risk**
121
+ - Parameters: `action`: dict
122
+ - Returns: Float risk score (0-1) using belief propagation formula.
123
+ - **intervene**
124
+ - Parameters: `target_agent`: string, `action`: string ("force_rethink", "audit_beliefs")
125
+ - Effect: Scales reward by 0.5 or forces re-action.
126
+ - **query_global_synthesis**
127
+ - Shortcut: `query_intel(scope="global", keywords="hotspot escalation")`
128
+
129
+ ## Implementation Notes
130
+
131
+ - **Tool Parsing**: Use structured output prompting (e.g., "Always respond with JSON tool call if using a tool") + regex/JSON parser in env.
132
+ - **Tool Result Injection**: Results appended to prompt history (e.g., "Tool result: [JSON]") for context window management.
133
+ - **RL Fine-Tuning**: Reward tool usage that leads to high entity-aligned outcomes (e.g., US sanctions → coalition strength).
134
+ - **Debugging**: Log all tool calls/results in dashboard for judging.
135
+ - **Security**: Validate parameters server-side to prevent invalid actions.
136
+
137
+ This toolset empowers agents to act intelligently within their roles while maintaining the simulator's core challenge: operating under incomplete, live-fed information in a high-stakes multi-agent crisis.
TRAINING_PLAN.md ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Trenches Training Plan
2
+
3
+ This document is the working plan for the historical prediction training setup.
4
+
5
+ ## Goal
6
+
7
+ Train six separate entity models in the same OpenEnv-backed simulator so they do two things at each turn:
8
+
9
+ 1. choose an action
10
+ 2. predict what will happen next
11
+
12
+ The core idea is:
13
+
14
+ - the environment replays a real historical event window
15
+ - each model only sees information available up to that point in time
16
+ - each model generates a predicted future timeline
17
+ - the environment later reveals what actually happened
18
+ - reward is based partly on whether the model predicted correctly
19
+
20
+ Target training window:
21
+
22
+ - 2025
23
+ - 2026
24
+
25
+ ## Intended Training Shape
26
+
27
+ Two timelines exist at once:
28
+
29
+ 1. `ground_truth_timeline`
30
+ The real historical sequence of events.
31
+
32
+ 2. `predicted_timeline`
33
+ What the entity believed would happen next, based only on available information at that turn.
34
+
35
+ The environment reward should compare the second timeline against the first.
36
+
37
+ ## Why OpenEnv Is The Right Boundary
38
+
39
+ OpenEnv is the environment interface, not the trainer itself.
40
+
41
+ That is exactly what we need:
42
+
43
+ - `reset()` starts a historical replay episode at a chosen point
44
+ - `step()` accepts an entity output
45
+ - the env advances time
46
+ - the env computes reward from action quality and prediction quality
47
+
48
+ Training should happen outside the backend with something like Hugging Face TRL.
49
+
50
+ ## What Exists Already
51
+
52
+ The current backend already has:
53
+
54
+ - an OpenEnv environment boundary
55
+ - session and step logic
56
+ - per-entity observations
57
+ - per-entity rewards
58
+ - latent state
59
+ - latent events
60
+ - belief state
61
+ - source projection
62
+ - scenario and benchmark support
63
+ - a structured `Prediction` schema
64
+ - prediction storage and scoring in session state
65
+ - replay mode driven by historical event timestamps
66
+ - a bundled set of 6 synthetic seed replay datasets (in `synthetic_historical_replays/`)
67
+ - a replay-aware TRL/OpenEnv CLI training loop
68
+ - a historical data collection pipeline (GDELT → replay JSON)
69
+
70
+ ## What Is Missing
71
+
72
+ The backend does not yet have:
73
+
74
+ - a larger curated truth dataset beyond the bundled synthetic seed replays
75
+ - a proper evaluation report for prediction quality
76
+ - baselines and train/eval split reporting
77
+
78
+ ## Planned Implementation Order
79
+
80
+ ### Phase 1: Historical Replay Foundation
81
+
82
+ 1. Define a normalized historical event schema.
83
+ 2. Build a replay dataset for selected 2025-2026 events.
84
+ 3. Add historical replay mode to the backend environment.
85
+ 4. Ensure agents only see information available before each replay timestamp.
86
+
87
+ ### Phase 2: Prediction Contract
88
+
89
+ 1. Add a structured `Prediction` object for each agent.
90
+ 2. Extend agent outputs so a turn can include:
91
+ - `action`
92
+ - `prediction`
93
+ 3. Store prediction history in session state.
94
+
95
+ ### Phase 3: Reward Logic
96
+
97
+ 1. Add reward terms for:
98
+ - correct topic
99
+ - correct actor
100
+ - correct target
101
+ - correct timing window
102
+ - correct severity band
103
+ - confidence calibration
104
+ 2. Penalize:
105
+ - confident false predictions
106
+ - vague predictions
107
+ - repeated contradiction with real history
108
+ 3. Exclude fake/manual events from training reward.
109
+
110
+ ### Phase 4: Training Loop
111
+
112
+ 1. Train one entity first.
113
+ 2. Use OpenEnv + HF TRL.
114
+ 3. Prove a working historical replay training loop.
115
+ 4. Scale to six entity-specific models.
116
+
117
+ ### Phase 5: Evaluation
118
+
119
+ 1. Build evaluation metrics for forecast quality.
120
+ 2. Compare against simple baselines.
121
+ 3. Separate train and eval windows.
122
+ 4. Report before/after performance.
123
+
124
+ ## Recommended Minimal Event Schema
125
+
126
+ Each historical event should have:
127
+
128
+ - `event_id`
129
+ - `timestamp`
130
+ - `topic`
131
+ - `region`
132
+ - `actors`
133
+ - `targets`
134
+ - `severity`
135
+ - `summary`
136
+ - `source_type`
137
+ - `confirmed`
138
+ - `tags`
139
+
140
+ ## Recommended Prediction Schema
141
+
142
+ Each prediction should have:
143
+
144
+ - `prediction_id`
145
+ - `agent_id`
146
+ - `turn`
147
+ - `timestamp`
148
+ - `topic`
149
+ - `predicted_actor`
150
+ - `predicted_target`
151
+ - `time_horizon_turns`
152
+ - `expected_severity`
153
+ - `confidence`
154
+ - `summary`
155
+ - `rationale`
156
+
157
+ ## Critical Design Rules
158
+
159
+ 1. No leakage.
160
+ The model must never see future information.
161
+
162
+ 2. Real events and fake events must be separated.
163
+ Manual events can drive behavior but must not drive training reward.
164
+
165
+ 3. Action and prediction should remain separate outputs.
166
+ Mixing them into one blob will make both training and debugging worse.
167
+
168
+ 4. Train one entity first before scaling to six.
169
+ Prove the loop on one actor before multiplying complexity.
170
+
171
+ 5. Evaluate against baselines.
172
+ Otherwise there is no evidence the training helped.
173
+
174
+ ## Suggested First Entity
175
+
176
+ Start with:
177
+
178
+ - `us`
179
+
180
+ Why:
181
+
182
+ - broad observation surface
183
+ - strong strategic tradeoffs
184
+ - likely easiest to benchmark against known 2025-2026 developments
185
+
186
+ ## Known Future Work
187
+
188
+ After the first working replay-training loop:
189
+
190
+ - train all six entities
191
+ - compare model families
192
+ - add branch evaluation for counterfactual timelines
193
+ - add replay UI for predicted vs actual timeline alignment
194
+
195
+ ## Working Status
196
+
197
+ Current status:
198
+
199
+ - all 6 synthetic seed replay datasets created and bundled (in `synthetic_historical_replays/`)
200
+ - base model: `Qwen/Qwen3-8B` (shared across all entities, no quantization)
201
+ - OpenEnv step accepts separate `action` and `prediction`
202
+ - forecast reward is blended into entity reward on replay steps
203
+ - TRL CLI training path is implemented and smoke-tested end to end
204
+ - local smoke tests pass for US + Israel entities (tiny-gpt2)
205
+ - HF GPU smoke test passed on T4 ([trenches-training-smoke](https://huggingface.co/spaces/AlazarM/trenches-training-smoke))
206
+ - historical data collection pipeline implemented (GDELT → replay JSON)
207
+ - multi-entity scaling to A100 and evaluation still pending
208
+
209
+ This file should be updated as the forecasting/replay training system is built.
app/api/health/route.ts ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import { NextResponse } from "next/server";
2
+
3
+ export const runtime = "edge";
4
+
5
+ export function GET() {
6
+ return NextResponse.json({
7
+ status: "ok",
8
+ service: "trenches-vercel-api",
9
+ });
10
+ }
app/api/source-registry/route.ts ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { NextResponse } from "next/server";
2
+
3
+ import { getAllSources, validateSourceRegistry } from "../../../src/lib/data-sources";
4
+
5
+ export const runtime = "edge";
6
+
7
+ export function GET() {
8
+ return NextResponse.json({
9
+ sources: getAllSources(),
10
+ validation: validateSourceRegistry(),
11
+ });
12
+ }
app/globals.css ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @import url("https://fonts.googleapis.com/css2?family=Oxanium:wght@200..800&family=Source+Code+Pro:wght@200..900&display=swap");
2
+
3
+ @import "tailwindcss";
4
+
5
+ :root {
6
+ --card: #b0b0b0;
7
+ --ring: #b71c1c;
8
+ --input: #505050;
9
+ --muted: #b8b8b8;
10
+ --accent: #4682b4;
11
+ --border: #505050;
12
+ --radius: 0px;
13
+ --chart-1: #b71c1c;
14
+ --chart-2: #556b2f;
15
+ --chart-3: #4682b4;
16
+ --chart-4: #ff6f00;
17
+ --chart-5: #8d6e63;
18
+ --popover: #b0b0b0;
19
+ --primary: #b71c1c;
20
+ --sidebar: #b0b0b0;
21
+ --spacing: 0.25rem;
22
+ --font-mono: "Source Code Pro", monospace;
23
+ --font-sans: "Oxanium", sans-serif;
24
+ --secondary: #556b2f;
25
+ --background: #cccccc;
26
+ --font-serif: ui-serif, Georgia, Cambria, "Times New Roman", Times, serif;
27
+ --foreground: #1f1f1f;
28
+ --destructive: #ff6f00;
29
+ --shadow-blur: 4px;
30
+ --shadow-color: hsl(0 0% 0%);
31
+ --sidebar-ring: #b71c1c;
32
+ --shadow-spread: 0px;
33
+ --letter-spacing: 0em;
34
+ --shadow-opacity: 0.4;
35
+ --sidebar-accent: #4682b4;
36
+ --sidebar-border: #505050;
37
+ --card-foreground: #1f1f1f;
38
+ --shadow-offset-x: 0px;
39
+ --shadow-offset-y: 2px;
40
+ --sidebar-primary: #b71c1c;
41
+ --muted-foreground: #4a4a4a;
42
+ --accent-foreground: #ffffff;
43
+ --popover-foreground: #1f1f1f;
44
+ --primary-foreground: #ffffff;
45
+ --sidebar-foreground: #1f1f1f;
46
+ --secondary-foreground: #ffffff;
47
+ --destructive-foreground: #000000;
48
+ --sidebar-accent-foreground: #ffffff;
49
+ --sidebar-primary-foreground: #ffffff;
50
+ }
51
+
52
+ .dark {
53
+ --card: #2a2a2a;
54
+ --ring: #e53935;
55
+ --input: #4a4a4a;
56
+ --muted: #252525;
57
+ --accent: #64b5f6;
58
+ --border: #4a4a4a;
59
+ --radius: 0px;
60
+ --chart-1: #e53935;
61
+ --chart-2: #689f38;
62
+ --chart-3: #64b5f6;
63
+ --chart-4: #ffa000;
64
+ --chart-5: #a1887f;
65
+ --popover: #2a2a2a;
66
+ --primary: #e53935;
67
+ --sidebar: #141414;
68
+ --spacing: 0.25rem;
69
+ --font-mono: "Source Code Pro", monospace;
70
+ --font-sans: "Oxanium", sans-serif;
71
+ --secondary: #689f38;
72
+ --background: #1a1a1a;
73
+ --font-serif: ui-serif, Georgia, Cambria, "Times New Roman", Times, serif;
74
+ --foreground: #e0e0e0;
75
+ --destructive: #ffa000;
76
+ --shadow-blur: 5px;
77
+ --shadow-color: hsl(0 0% 0%);
78
+ --sidebar-ring: #e53935;
79
+ --shadow-spread: 0px;
80
+ --letter-spacing: 0em;
81
+ --shadow-opacity: 0.6;
82
+ --sidebar-accent: #64b5f6;
83
+ --sidebar-border: #4a4a4a;
84
+ --card-foreground: #e0e0e0;
85
+ --shadow-offset-x: 0px;
86
+ --shadow-offset-y: 2px;
87
+ --sidebar-primary: #e53935;
88
+ --muted-foreground: #a0a0a0;
89
+ --accent-foreground: #000000;
90
+ --popover-foreground: #e0e0e0;
91
+ --primary-foreground: #ffffff;
92
+ --sidebar-foreground: #e0e0e0;
93
+ --secondary-foreground: #000000;
94
+ --destructive-foreground: #000000;
95
+ --sidebar-accent-foreground: #000000;
96
+ --sidebar-primary-foreground: #ffffff;
97
+ }
98
+
99
+ @theme inline {
100
+ --color-card: var(--card);
101
+ --color-ring: var(--ring);
102
+ --color-input: var(--input);
103
+ --color-muted: var(--muted);
104
+ --color-accent: var(--accent);
105
+ --color-border: var(--border);
106
+ --color-chart-1: var(--chart-1);
107
+ --color-chart-2: var(--chart-2);
108
+ --color-chart-3: var(--chart-3);
109
+ --color-chart-4: var(--chart-4);
110
+ --color-chart-5: var(--chart-5);
111
+ --color-popover: var(--popover);
112
+ --color-primary: var(--primary);
113
+ --color-sidebar: var(--sidebar);
114
+ --color-secondary: var(--secondary);
115
+ --color-background: var(--background);
116
+ --color-foreground: var(--foreground);
117
+ --color-destructive: var(--destructive);
118
+ --color-sidebar-ring: var(--sidebar-ring);
119
+ --color-sidebar-accent: var(--sidebar-accent);
120
+ --color-sidebar-border: var(--sidebar-border);
121
+ --color-card-foreground: var(--card-foreground);
122
+ --color-sidebar-primary: var(--sidebar-primary);
123
+ --color-muted-foreground: var(--muted-foreground);
124
+ --color-accent-foreground: var(--accent-foreground);
125
+ --color-popover-foreground: var(--popover-foreground);
126
+ --color-primary-foreground: var(--primary-foreground);
127
+ --color-sidebar-foreground: var(--sidebar-foreground);
128
+ --color-secondary-foreground: var(--secondary-foreground);
129
+ --color-destructive-foreground: var(--destructive-foreground);
130
+ --color-sidebar-accent-foreground: var(--sidebar-accent-foreground);
131
+ --color-sidebar-primary-foreground: var(--sidebar-primary-foreground);
132
+ --radius-sm: calc(var(--radius) - 4px);
133
+ --radius-md: calc(var(--radius) - 2px);
134
+ --radius-lg: var(--radius);
135
+ --radius-xl: calc(var(--radius) + 4px);
136
+ --font-sans: var(--font-sans);
137
+ --font-mono: var(--font-mono);
138
+ --font-serif: var(--font-serif);
139
+ --spacing: var(--spacing);
140
+ }
141
+
142
+ * {
143
+ box-sizing: border-box;
144
+ border-color: var(--border);
145
+ }
146
+
147
+ body {
148
+ font-family: var(--font-sans);
149
+ background: var(--background);
150
+ color: var(--foreground);
151
+ -webkit-font-smoothing: antialiased;
152
+ -moz-osx-font-smoothing: grayscale;
153
+ }
154
+
155
+ /* Hide Mapbox branding */
156
+ .mapboxgl-ctrl-logo,
157
+ .mapboxgl-ctrl-attrib {
158
+ display: none !important;
159
+ }
app/layout.tsx ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import type { Metadata } from "next";
2
+
3
+ import "./globals.css";
4
+ import "mapbox-gl/dist/mapbox-gl.css";
5
+
6
+ export const metadata: Metadata = {
7
+ title: "Trenches — Fog of War Diplomacy Simulator",
8
+ description:
9
+ "Multi-agent geopolitical crisis simulator with live intelligence feeds and Mapbox globe visualization.",
10
+ };
11
+
12
+ export default function RootLayout({
13
+ children,
14
+ }: Readonly<{
15
+ children: React.ReactNode;
16
+ }>) {
17
+ return (
18
+ <html lang="en" className="dark">
19
+ <body className="min-h-screen overflow-hidden antialiased">
20
+ {children}
21
+ </body>
22
+ </html>
23
+ );
24
+ }
app/page.tsx ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import GlobePage from "@/src/components/GlobePage";
2
+
3
+ export default function HomePage() {
4
+ return <GlobePage />;
5
+ }
backend/Dockerfile ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.12-slim
2
+
3
+ ENV PYTHONDONTWRITEBYTECODE=1
4
+ ENV PYTHONUNBUFFERED=1
5
+ ENV TRENCHES_ENTITIES_ROOT=/app/entities
6
+
7
+ WORKDIR /app
8
+
9
+ COPY backend/pyproject.toml backend/README.md ./backend/
10
+ COPY backend/src ./backend/src
11
+ COPY entities ./entities
12
+
13
+ WORKDIR /app/backend
14
+
15
+ RUN pip install --no-cache-dir .
16
+
17
+ EXPOSE 8000
18
+
19
+ CMD ["python", "-m", "trenches_env.server"]
backend/HOW_POST_TRAINING_WORKS.md ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # How Post-Training Works: Steps, Data, and Rewards
2
+
3
+ ## The Loop in One Sentence
4
+
5
+ Each GRPO step resets the environment at a random replay position, generates 16 completions, scores them against the real timeline, and updates the model to favor better responses.
6
+
7
+ ## Steps vs Data
8
+
9
+ You have **10 replay events** and **100 GRPO steps**. They don't map 1:1.
10
+
11
+ ```
12
+ Step 1: reset() → random position in 10-event timeline
13
+ → generate 16 completions
14
+ → score all 16 against the next revealed event
15
+ → GRPO update (reinforce good, suppress bad)
16
+
17
+ Step 2: reset() → different random position
18
+ → generate 16 completions → score → update
19
+
20
+ ...
21
+
22
+ Step 100: same process
23
+ ```
24
+
25
+ Across 100 steps × 16 generations = **1,600 rollouts** through those 10 events.
26
+ Each event seen ~160 times from different angles.
27
+
28
+ ## What The Model Sees (Input)
29
+
30
+ Built by `_render_observation_prompt()` from the replay timeline:
31
+
32
+ ```
33
+ You are training the us policy in the Trenches OpenEnv historical replay
34
+ environment. Return strict JSON only.
35
+
36
+ Training agent: us
37
+ Turn: 3
38
+
39
+ Historical brief:
40
+ - Commercial shipping insurers flag elevated Gulf transit risk near Hormuz.
41
+ - Washington reinforces maritime protection with Gulf partners.
42
+ - A renewed cross-border volley drives northern-front alerting.
43
+
44
+ Public brief:
45
+ - Gulf transit risk elevated near Hormuz.
46
+ - Coalition deconfliction messaging underway.
47
+
48
+ Private brief:
49
+ - Domestic approval is sensitive to prolonged escalation.
50
+ - Forward naval posture can deter but also spike market stress.
51
+
52
+ Strategic state:
53
+ - regional_access: 74.5
54
+ - shipping_security: 72.0
55
+ - domestic_support: 63.9
56
+ - force_posture: 76.0
57
+
58
+ Allowed actions: hold, negotiate, sanction, strike, defend, intel_query, mobilize, deceive
59
+ ```
60
+
61
+ ## What The Model Returns (Output)
62
+
63
+ ```json
64
+ {
65
+ "action": {
66
+ "type": "sanction",
67
+ "target": "iran",
68
+ "summary": "Target proxy logistics channels to degrade corridor sustainment."
69
+ },
70
+ "prediction": {
71
+ "topic": "domestic",
72
+ "predicted_actor": "us",
73
+ "predicted_target": "iran",
74
+ "time_horizon_turns": 1,
75
+ "expected_severity": "medium",
76
+ "confidence": 0.7,
77
+ "summary": "Washington will announce a sanctions package aimed at proxy sustainment.",
78
+ "rationale": "Escalating Hormuz pressure creates political pressure for economic action."
79
+ }
80
+ }
81
+ ```
82
+
83
+ ## Ground Truth (Revealed Event)
84
+
85
+ The environment reveals the next event from `us_synthetic_seed_2025_2026.json`:
86
+
87
+ ```json
88
+ {
89
+ "event_id": "evt-2025-04-us-sanctions-package",
90
+ "timestamp": "2025-04-22T12:00:00Z",
91
+ "topic": "domestic",
92
+ "actors": ["us"],
93
+ "targets": ["iran"],
94
+ "severity": "medium",
95
+ "summary": "Washington rolls out a coordinated sanctions package aimed at procurement and logistics channels linked to proxy sustainment."
96
+ }
97
+ ```
98
+
99
+ ## Scoring
100
+
101
+ ```
102
+ action_reward: +0.55 (sanction aligns with us policy at 0.55 per rl.py)
103
+ forecast_reward: +0.82 (topic ✅ actor ✅ target ✅ severity ✅ confidence ✅)
104
+ ─────────────────────────
105
+ total_reward: +1.37 → fed back to GRPO
106
+ ```
107
+
108
+ ## Where Each Piece Comes From
109
+
110
+ | Data | Source File | What It Provides |
111
+ | ----------------- | ------------------------------------- | ------------------------------------------------------------------------- |
112
+ | Replay events | `synthetic_historical_replays/*.json` | 10 synthetic events (timestamp, topic, actors, severity, impact) |
113
+ | Intel briefings | `source_manifest.json` | Public + private brief items |
114
+ | Agent identity | `agents.py` | Role, intel focus, private intel baseline |
115
+ | Reward config | `rl.py` | Allowed actions, action alignment scores, state baselines, metric targets |
116
+ | Environment logic | `env.py` | Builds observation, applies actions, scores predictions, computes rewards |
117
+ | Training loop | `training_cli.py` | Connects model ↔ environment via GRPO rollouts |
118
+ | OpenEnv boundary | `openenv_adapter.py` | reset/step interface between TRL and the simulator |
119
+
120
+ ## Key Numbers
121
+
122
+ | Metric | Value | Formula |
123
+ | ----------------------------- | ----- | -------------------------------------- |
124
+ | Total rollouts per entity | 1,600 | 100 steps × 16 generations |
125
+ | Times each event is seen | ~160 | 1,600 ÷ 10 events |
126
+ | Effective batch size | 8 | batch_size(1) × grad_accum(8) |
127
+ | Completions compared per step | 16 | GRPO ranks them relative to each other |
backend/POST_TRAINING_PLAN.md ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Post-Training Plan: 6 Entities × 1 Hour Parallel
2
+
3
+ ## Overview
4
+
5
+ 6 HF A100 Spaces running in parallel. Total wall time: **1 hour**. Total cost: **$15**. Base model: **Qwen/Qwen3-8B** (no quantization).
6
+
7
+ GRPO post-training on OpenEnv. Qwen3-8B already knows how to reason — we're aligning it to each entity's policy behavior through the environment reward signal.
8
+
9
+ ## Cost
10
+
11
+ | Item | Rate | Quantity | Cost |
12
+ | --------- | -------- | ------------- | ------- |
13
+ | A100 80GB | $2.50/hr | 6 Spaces × 1h | **$15** |
14
+
15
+ ## Optimal Hyperparameters
16
+
17
+ Researched from TRL docs, DeepSeek-R1 paper, Open-R1 recipe, and TRL OpenEnv examples.
18
+
19
+ ```yaml
20
+ # Model
21
+ model_id: Qwen/Qwen3-8B
22
+ # No quantization — full precision on A100 80GB.
23
+ # Quantization noise actually aids exploration (QeRL paper).
24
+
25
+ # GRPO Core (from DeepSeek-R1 + Open-R1 recipes)
26
+ algorithm: GRPO
27
+ loss_type: grpo
28
+ beta: 0.001 # KL coefficient (DeepSeek-R1 uses 0.001)
29
+ num_generations:
30
+ 16 # DeepSeek-R1: "sample 16 outputs per prompt"
31
+ # More generations = better group-relative advantage signal
32
+ max_steps: 100 # 1 hour on A100 with these settings
33
+ warmup_steps: 10 # Stabilize early training
34
+
35
+ # Learning Rate
36
+ learning_rate:
37
+ 5e-6 # Open-R1 + OpenEnv Sudoku example both use 5e-6
38
+ # Higher than our earlier 5e-7; research shows
39
+ # post-training converges faster with this range
40
+
41
+ # Batching
42
+ per_device_train_batch_size: 1 # Memory-safe for 9B 4-bit
43
+ gradient_accumulation_steps: 8 # Effective batch = 8 (from TRL Sudoku OpenEnv example)
44
+
45
+ # Context
46
+ max_prompt_length: 1536
47
+ max_completion_length: 256
48
+
49
+ # Generation Sampling (from TRL OpenEnv Sudoku)
50
+ temperature: 0.8 # Balanced exploration vs exploitation
51
+ top_k: 10 # Focused sampling
52
+
53
+ # Saving
54
+ save_strategy: steps
55
+ save_steps: 25 # Checkpoint every 25 steps (4 saves per run)
56
+
57
+ # Inference
58
+ generation_backend: transformers # vllm if CUDA available
59
+ # If vllm: use_vllm=True, vllm_mode="colocate", vllm_gpu_memory_utilization=0.3
60
+
61
+ # Preview
62
+ preview_samples: 3
63
+ training_stage: stage_1_dense
64
+ ```
65
+
66
+ ### Why These Settings
67
+
68
+ | Setting | Value | Source/Reasoning |
69
+ | -------------------------- | ----------------------------------------------- | ---------------------------------------------------------------------------------------------- |
70
+ | `num_generations: 16` | DeepSeek-R1 | More rollouts = better advantage estimation. 16 is the standard for GRPO |
71
+ | `beta: 0.001` | DeepSeek-R1 | Low KL penalty allows the model to explore further from base policy |
72
+ | `learning_rate: 5e-6` | Open-R1 + TRL examples | 10x higher than our earlier setting; post-training on instruct models converges with higher LR |
73
+ | `gradient_accumulation: 8` | TRL OpenEnv Sudoku | Effective batch of 8 stabilizes updates without excessive VRAM |
74
+ | `temperature: 0.8` | TRL OpenEnv Sudoku | Encourages diverse completions during rollout |
75
+ | `No quantization` | A100 80GB has enough VRAM for 8B full precision | Full precision avoids quantization noise and simplifies checkpointing |
76
+
77
+ ## Per-Space Command
78
+
79
+ Replace `ENTITY` with: `us`, `israel`, `iran`, `hezbollah`, `gulf`, `oversight`
80
+
81
+ ```bash
82
+ python -m trenches_env.training_cli \
83
+ --model-id Qwen/Qwen3-8B \
84
+ --training-agent ENTITY \
85
+ --replay-id ENTITY_synthetic_seed_2025_2026 \
86
+ --output-dir checkpoints/ENTITY-qwen3-8b \
87
+ --generation-backend transformers \
88
+ --training-stage stage_1_dense \
89
+ --max-steps 100 \
90
+ --train-size 256 \
91
+ --num-generations 16 \
92
+ --per-device-train-batch-size 1 \
93
+ --gradient-accumulation-steps 8 \
94
+ --learning-rate 5e-6 \
95
+ --max-prompt-length 1536 \
96
+ --max-completion-length 256 \
97
+ --preview-samples 3
98
+ ```
99
+
100
+ ## HuggingFace Hub Output
101
+
102
+ ```
103
+ shlawgathon/trenches-us-qwen3-8b
104
+ shlawgathon/trenches-israel-qwen3-8b
105
+ shlawgathon/trenches-iran-qwen3-8b
106
+ shlawgathon/trenches-hezbollah-qwen3-8b
107
+ shlawgathon/trenches-gulf-qwen3-8b
108
+ shlawgathon/trenches-oversight-qwen3-8b
109
+ ```
110
+
111
+ Each checkpoint contains: `config.json`, `model.safetensors`, `tokenizer.json`, `generation_config.json`, `training_args.bin`
112
+
113
+ ## Build Steps
114
+
115
+ 1. ~~Create 5 replay datasets (israel, iran, hezbollah, gulf, oversight)~~ ✅ done (synthetic seed data in `synthetic_historical_replays/`)
116
+ 2. ~~Add `--quantize-4bit` to `training_cli.py` (NF4 via bitsandbytes)~~ ✅ done
117
+ 3. ~~Add `beta`, `warmup_steps`, `temperature`, `top_k`, `save_strategy` CLI args~~ ✅ done
118
+ 4. ~~Add `bitsandbytes>=0.43.0` to `pyproject.toml`~~ ✅ done
119
+ 5. ~~Smoke test locally with tiny-gpt2~~ ✅ done (US + Israel pass)
120
+ 6. ~~Smoke test on HF T4 GPU~~ ✅ done ([trenches-training-smoke](https://huggingface.co/spaces/AlazarM/trenches-training-smoke))
121
+ 7. Spin up 6 HF A100 Spaces → 1 hour → done
backend/README.md ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Trenches Backend
2
+
3
+ This directory contains the Python backend for the Trenches simulator.
4
+
5
+ It now exposes two layers:
6
+
7
+ - the existing session-oriented FastAPI API used by the React dashboard
8
+ - a native OpenEnv-compatible environment mounted under `/openenv` when `openenv-core` is installed
9
+
10
+ The backend does not serve frontend assets and is intended to stay frontend-stack agnostic. Any web client
11
+ (Next.js, Vite, Bun, mobile, or a thin dashboard proxy) should be able to consume the same HTTP contract.
12
+
13
+ CORS is configurable so frontend migrations do not require backend code changes:
14
+
15
+ - `TRENCHES_CORS_ALLOW_ORIGINS=https://app.example.com,https://ops.example.com`
16
+ - `TRENCHES_CORS_ALLOW_ORIGIN_REGEX=https://.*\\.example\\.com`
17
+ - `TRENCHES_CORS_ALLOW_CREDENTIALS=true|false`
18
+
19
+ If no CORS env vars are set, the backend allows local development origins on `localhost` / `127.0.0.1` for any port.
20
+
21
+ Entity-model provider bindings are also configurable per agent. The backend does not fake provider readiness:
22
+ if a provider/model is not configured, the runtime reports `heuristic_fallback` explicitly in session state and
23
+ `/capabilities`.
24
+
25
+ Supported env patterns:
26
+
27
+ - `TRENCHES_MODEL_PROVIDER=openai|anthropic|openrouter|ollama|vllm|custom`
28
+ - `TRENCHES_MODEL_NAME=<provider model id>`
29
+ - `TRENCHES_MODEL_BASE_URL=<custom base url>`
30
+ - `TRENCHES_MODEL_API_KEY_ENV=<name of env var holding the secret>`
31
+ - `TRENCHES_MODEL_SUPPORTS_TOOL_CALLS=true|false`
32
+ - `TRENCHES_MODEL_SUPPORTS_STRUCTURED_OUTPUT=true|false`
33
+
34
+ Per-entity overrides use the uppercase agent suffix, for example:
35
+
36
+ - `TRENCHES_MODEL_PROVIDER_US=openai`
37
+ - `TRENCHES_MODEL_NAME_US=gpt-4.1`
38
+ - `TRENCHES_MODEL_API_KEY_ENV_US=OPENAI_API_KEY`
39
+
40
+ Relevant OpenEnv pieces in this package:
41
+
42
+ - `trenches_env.openenv_adapter.TrenchesOpenEnvEnvironment`
43
+ - `trenches_env.openenv_adapter.TrenchesOpenEnvAction`
44
+ - `trenches_env.openenv_adapter.TrenchesOpenEnvObservation`
45
+ - `trenches_env.openenv_adapter.TrenchesOpenEnvState`
46
+ - `trenches_env.openenv_client.TrenchesEnvClient`
47
+
48
+ Historical replay training pieces:
49
+
50
+ - `trenches_env.models.Prediction`
51
+ - `trenches_env.models.HistoricalEvent`
52
+ - `trenches_env.models.HistoricalReplayState`
53
+ - `trenches_env.training_cli`
54
+
55
+ The backend now supports replay-aware forecast training:
56
+
57
+ - `reset(..., replay_id=...)` starts from a visible historical context event
58
+ - `step(...)` accepts separate `action` and `prediction`
59
+ - the next ground-truth event is revealed on the same OpenEnv step
60
+ - reward blends the entity action reward with forecast scoring terms
61
+
62
+ Bundled bootstrap replay (⚠️ **all replays are synthetic seed data** — replace with curated truth sets for production):
63
+
64
+ - `us_synthetic_seed_2025_2026`
65
+
66
+ CLI training entrypoint:
67
+
68
+ ```bash
69
+ trenches-train \
70
+ --training-agent us \
71
+ --replay-id us_synthetic_seed_2025_2026 \
72
+ --generation-backend transformers
73
+ ```
74
+
75
+ The CLI supports two rollout backends:
76
+
77
+ - `transformers` for portable local smoke runs
78
+ - `vllm` for the documented colocated OpenEnv + TRL path on a GPU box
79
+
80
+ Planned responsibilities:
81
+
82
+ - Hold in-memory crisis sessions.
83
+ - Expose `create`, `reset`, `step`, and `state` HTTP endpoints.
84
+ - Model the fog-of-war world state and per-agent observations.
85
+ - Provide a native OpenEnv boundary with scalar rewards for one active training agent while retaining full per-agent state internally.
86
+ - Provide extension points for World Monitor ingestion and RL training hooks.
backend/TRAINING_FLOW.md ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Trenches OpenEnv Training Flow
2
+
3
+ ## End-to-End Training Pipeline
4
+
5
+ ```mermaid
6
+ flowchart TD
7
+ subgraph CLI["training_cli.py — CLI Entry Point"]
8
+ A["python -m trenches_env.training_cli<br/>--model-id · --training-agent · --replay-id<br/>--output-dir · --generation-backend"]
9
+ end
10
+
11
+ A -->|"Loads base model<br/>from HuggingFace Hub"| B["🤗 HuggingFace Model<br/>(e.g. Qwen/Qwen3-8B<br/>or sshleifer/tiny-gpt2)"]
12
+ A -->|"Starts in-process"| C["FastAPI Backend<br/>server.py → uvicorn<br/>localhost:8000"]
13
+
14
+ B --> D["GRPOTrainer<br/>(HF TRL)"]
15
+
16
+ subgraph GRPO["GRPO Training Loop (per step)"]
17
+ D -->|"1. Build prompts<br/>from base_prompt × train_size"| E["Prompt Dataset"]
18
+ E -->|"2. rollout_func()"| F["OpenEnv Client<br/>POST /openenv/reset"]
19
+ F -->|"Returns observation"| G["Render Grounded Prompt<br/>agent obs + historical brief<br/>+ strategic state + allowed actions"]
20
+ G -->|"3. Generate completions"| H{Generation Backend?}
21
+ H -->|transformers| I["transformers .generate()<br/>(CPU / Apple Silicon)"]
22
+ H -->|vllm| J["vLLM inference<br/>(Linux CUDA GPU)"]
23
+ I --> K["Parse JSON Output<br/>→ action + prediction"]
24
+ J --> K
25
+ K -->|"4. POST /openenv/step"| L["OpenEnv Environment<br/>openenv_adapter.py"]
26
+ end
27
+
28
+ subgraph ENV["OpenEnv Environment Boundary"]
29
+ L --> M["FogOfWarDiplomacyEnv<br/>env.py"]
30
+ M -->|"Load replay"| N["Replay Data<br/>synthetic_historical_replays/<br/>us_synthetic_seed_2025_2026.json"]
31
+ M -->|"Apply action in sim"| O["Advance World State"]
32
+ M -->|"Reveal next event"| P["Compare prediction<br/>vs actual event"]
33
+ P --> Q["Compute Blended Reward<br/>action_reward + forecast_reward"]
34
+ end
35
+
36
+ Q -->|"5. Return env_reward<br/>+ forecast_reward"| D
37
+ D -->|"6. GRPO policy update<br/>(gradient step)"| D
38
+
39
+ D -->|"After max_steps"| R["trainer.save_model()"]
40
+ R -->|"Writes checkpoint"| S["📁 output-dir/<br/>(--output-dir flag)"]
41
+
42
+ R -->|"Optional"| T["Preview Rollouts<br/>--preview-samples N"]
43
+
44
+ style CLI fill:#1a1a2e,stroke:#e94560,color:#fff
45
+ style GRPO fill:#16213e,stroke:#0f3460,color:#fff
46
+ style ENV fill:#0f3460,stroke:#533483,color:#fff
47
+ style S fill:#e94560,stroke:#fff,color:#fff
48
+ ```
49
+
50
+ ## Model Storage Locations
51
+
52
+ | What | Where | Notes |
53
+ | --------------------------------- | -------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
54
+ | **Base model (source)** | HuggingFace Hub or a local checkpoint directory | Loaded at training start via `AutoTokenizer.from_pretrained(model_id)` + `GRPOTrainer(model=model_id)` |
55
+ | **HF cache (downloaded weights)** | `~/.cache/huggingface/hub/` | Automatic HF cache, reused across runs |
56
+ | **Trained checkpoint (output)** | `--output-dir` flag | Default: `trl-openenv-historical-replay/`. Examples: `backend/tmp-training-run/`, `backend/us-qwen-replay-run/`, `backend/us-vllm-replay-run/` |
57
+ | **Replay dataset** | `backend/src/trenches_env/synthetic_historical_replays/` | Bundled JSON files (e.g. `us_synthetic_seed_2025_2026.json`). ⚠️ **All 6 replays are currently synthetic seed data** — replace with curated truth sets for production. |
58
+
59
+ ## Per-Entity Model Pattern
60
+
61
+ ```mermaid
62
+ flowchart LR
63
+ subgraph Entities["6 Entity Models (1 per agent)"]
64
+ US["us model<br/>📁 backend/us-run/"]
65
+ ISR["israel model<br/>📁 backend/israel-run/"]
66
+ IRN["iran model<br/>📁 backend/iran-run/"]
67
+ HEZ["hezbollah model<br/>📁 backend/hezbollah-run/"]
68
+ GULF["gulf model<br/>📁 backend/gulf-run/"]
69
+ OVR["oversight model<br/>📁 backend/oversight-run/"]
70
+ end
71
+
72
+ subgraph Replays["Replay Datasets"]
73
+ R1["us_synthetic_seed_2025_2026.json ✅"]
74
+ R2["israel_synthetic_seed_2025_2026.json ✅"]
75
+ R3["iran_synthetic_seed_2025_2026.json ✅"]
76
+ R4["hezbollah_synthetic_seed_2025_2026.json ✅"]
77
+ R5["gulf_synthetic_seed_2025_2026.json ✅"]
78
+ R6["oversight_synthetic_seed_2025_2026.json ✅"]
79
+ end
80
+
81
+ R1 --> US
82
+ R2 --> ISR
83
+ R3 --> IRN
84
+ R4 --> HEZ
85
+ R5 --> GULF
86
+ R6 --> OVR
87
+
88
+ BASE["🤗 Base Model<br/>(shared starting point)"] --> US
89
+ BASE --> ISR
90
+ BASE --> IRN
91
+ BASE --> HEZ
92
+ BASE --> GULF
93
+ BASE --> OVR
94
+
95
+ style Entities fill:#16213e,stroke:#e94560,color:#fff
96
+ style Replays fill:#0f3460,stroke:#533483,color:#fff
97
+ style BASE fill:#e94560,stroke:#fff,color:#fff
98
+ ```
99
+
100
+ > ✅ = implemented (all 6 replays are **synthetic seed data** for smoke-testing — replace with curated truth sets for production)
101
+
102
+ The first collection step for replacing those seeds is now:
103
+
104
+ ```bash
105
+ python -m trenches_env.historical_collection_cli --training-agent us --window 2025 --window 2026
106
+ ```
107
+
108
+ That collector writes replay JSON in the same schema as the bundled seed files plus raw article audit JSONL for review.
109
+
110
+ Saved output directories are reusable as future `--model-id` inputs and can be served with standard Hugging Face-compatible deployment tooling.
111
+
112
+ ## Data Sources During Post-Training
113
+
114
+ All data is bundled in the repo. No external API calls during post-training.
115
+
116
+ ```mermaid
117
+ flowchart LR
118
+ subgraph Bundled["All in backend/src/trenches_env/"]
119
+ REPLAY["synthetic_historical_replays/*.json<br/>10 synthetic events per entity<br/>(timestamps, topics, actors, severity, impacts)"]
120
+ MANIFEST["source_manifest.json<br/>63KB intel briefings<br/>(public + private)"]
121
+ AGENTS["agents.py<br/>6 agent profiles<br/>(role, intel focus, private intel)"]
122
+ RL["rl.py<br/>Reward configs, allowed actions,<br/>strategic state baselines"]
123
+ end
124
+
125
+ REPLAY -->|"Replay timeline"| ENV["env.py builds<br/>observation"]
126
+ MANIFEST -->|"Intel briefings"| ENV
127
+ AGENTS -->|"Agent identity"| ENV
128
+ RL -->|"Reward + actions"| ENV
129
+
130
+ ENV --> PROMPT["Model sees:<br/>• decision prompt<br/>• historical brief<br/>• public/private brief<br/>• strategic state<br/>• allowed actions"]
131
+
132
+ PROMPT --> MODEL["Model outputs JSON<br/>{action, prediction}"]
133
+ MODEL --> SCORE["env.py scores:<br/>action reward + forecast reward<br/>→ GRPO update"]
134
+
135
+ style Bundled fill:#0f3460,stroke:#533483,color:#fff
136
+ style SCORE fill:#e94560,stroke:#fff,color:#fff
137
+ ```
138
+
139
+ ## Dual-Output Per Step
140
+
141
+ Each training step requires the model to produce **two outputs**:
142
+
143
+ ```mermaid
144
+ flowchart LR
145
+ MODEL["Entity Model"] --> ACTION["action<br/>{type, target, summary}"]
146
+ MODEL --> PRED["prediction<br/>{topic, actor, target,<br/>severity, confidence,<br/>time_horizon, summary}"]
147
+
148
+ ACTION -->|"Applied in simulator"| SIM["World State Update"]
149
+ PRED -->|"Compared against<br/>revealed event"| SCORE["Forecast Reward"]
150
+
151
+ SIM --> BLEND["Blended Reward<br/>= action_reward + forecast_reward"]
152
+ SCORE --> BLEND
153
+
154
+ style MODEL fill:#e94560,stroke:#fff,color:#fff
155
+ style BLEND fill:#533483,stroke:#fff,color:#fff
156
+ ```
backend/TRAINING_RUNBOOK.md ADDED
@@ -0,0 +1,441 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Trenches OpenEnv Training Runbook
2
+
3
+ This runbook shows how to run the current CLI training loop for the Trenches entity models.
4
+
5
+ The important architecture rule is simple:
6
+
7
+ - each entity is its own model
8
+ - each run trains one entity to become a better version of itself
9
+ - training happens through the native OpenEnv environment boundary
10
+ - the environment scores both action quality and forecast quality
11
+
12
+ The first implemented proof path is the `us` entity.
13
+
14
+ ## Historical Data Collection Before Post-Training
15
+
16
+ The bundled replay JSON files under `backend/src/trenches_env/historical_replays/` are still synthetic seed data for smoke tests.
17
+
18
+ To move toward real post-training data, collect historical article candidates first and then write them back into the same replay JSON schema that the trainer already consumes.
19
+
20
+ The new collector CLI does exactly that:
21
+
22
+ ```bash
23
+ cd /Users/xiao/trenches
24
+ backend/.venv/bin/python -m trenches_env.historical_collection_cli \
25
+ --training-agent us \
26
+ --window 2025 \
27
+ --window 2026 \
28
+ --max-records-per-query 50 \
29
+ --max-events 128 \
30
+ --output-dir backend/src/trenches_env/historical_replays \
31
+ --raw-dir backend/tmp-historical-raw
32
+ ```
33
+
34
+ What it writes:
35
+
36
+ - replay JSON matching the existing seed schema used by `training_cli.py`
37
+ - raw article JSONL audit files for provenance and curator review
38
+
39
+ Important date note:
40
+
41
+ - `2025` maps to `2025-01-01` through `2026-01-01`
42
+ - `2026` maps to `2026-01-01` through the current date at collection time
43
+
44
+ As of March 7, 2026, a full January 1, 2026 to January 1, 2027 window does not exist yet, so the collector clamps the `2026` window to the current day.
45
+
46
+ Collection path:
47
+
48
+ 1. start from existing agent-aligned sources in `source_manifest.json`
49
+ 2. derive historical source domains from those allowlisted feeds
50
+ 3. query the GDELT DOC API month by month
51
+ 4. write raw article audit data
52
+ 5. transform those articles into replay JSON with the same `HistoricalEvent` schema as the synthetic seeds
53
+ 6. curator-review the resulting replay before production post-training
54
+
55
+ Replay file shape:
56
+
57
+ ```json
58
+ {
59
+ "replay_id": "us_historical_2025",
60
+ "name": "US historical replay 2025-01-01 to 2026-01-01",
61
+ "description": "Historically collected replay built from allowlisted source domains via the GDELT DOC API.",
62
+ "training_agent": "us",
63
+ "events": [
64
+ {
65
+ "event_id": "us-20250112090000-abcd1234",
66
+ "timestamp": "2025-01-12T09:00:00Z",
67
+ "topic": "shipping",
68
+ "region": "us",
69
+ "actors": ["iran", "gulf"],
70
+ "targets": ["shipping_lanes"],
71
+ "severity": "medium",
72
+ "summary": "Commercial shipping risk rises near Hormuz after new tanker threat warning.",
73
+ "public_summary": "Commercial shipping risk rises near Hormuz after new tanker threat warning.",
74
+ "source_type": "gdelt_historical_collection",
75
+ "confirmed": true,
76
+ "tags": ["shipping", "wire", "reuters.com"],
77
+ "impact": {
78
+ "tension_delta": 3.5,
79
+ "market_stress_delta": 4.2,
80
+ "oil_pressure_delta": 5.25,
81
+ "actor_metric_deltas": {
82
+ "us": { "shipping_security": -4.2, "regional_access": -4.2 }
83
+ }
84
+ }
85
+ }
86
+ ]
87
+ }
88
+ ```
89
+
90
+ Raw audit file shape:
91
+
92
+ ```json
93
+ {
94
+ "article_id": "7d8b1f5dcb87d4f2",
95
+ "agent_id": "us",
96
+ "source_id": "us-reuters-us",
97
+ "source_name": "Reuters US",
98
+ "title": "Commercial shipping risk rises near Hormuz after new tanker threat warning.",
99
+ "url": "https://www.reuters.com/world/middle-east/example",
100
+ "domain": "reuters.com",
101
+ "timestamp": "2025-01-12T09:00:00Z",
102
+ "query": "(domainis:reuters.com) AND (\"Hormuz\" OR \"shipping\")",
103
+ "window_id": "2025"
104
+ }
105
+ ```
106
+
107
+ ## What This Training Loop Does
108
+
109
+ On each replay step the model must return two separate outputs:
110
+
111
+ 1. an `action`
112
+ 2. a `prediction`
113
+
114
+ The backend then:
115
+
116
+ 1. applies the action in the simulator
117
+ 2. reveals the next historical event in the replay timeline
118
+ 3. scores the prediction against that revealed event
119
+ 4. blends forecast reward into the entity reward
120
+
121
+ This means the `us` model is not learning to be a generic strategist. It is learning to be a better `us` policy inside this simulator.
122
+
123
+ ## Current Scope
124
+
125
+ Implemented now:
126
+
127
+ - native OpenEnv replay-aware training loop
128
+ - 6 **synthetic** seed replay datasets (us, israel, iran, hezbollah, gulf, oversight) — replace with curated truth sets for production
129
+ - CLI trainer using Hugging Face TRL
130
+ - portable local generation path with `transformers`
131
+ - GPU-oriented generation path with `vllm`
132
+
133
+ Not implemented yet:
134
+
135
+ - evaluation/baseline reporting across all entities
136
+ - UI training controls
137
+ - production (non-synthetic) replay datasets
138
+
139
+ ## Requirements
140
+
141
+ Use Python `3.12`.
142
+
143
+ From the repo root:
144
+
145
+ ```bash
146
+ cd /Users/xiao/trenches
147
+ ```
148
+
149
+ Create a virtualenv:
150
+
151
+ ```bash
152
+ uv venv backend/.venv --python 3.12
153
+ ```
154
+
155
+ Install the backend plus training dependencies:
156
+
157
+ ```bash
158
+ uv pip install --python backend/.venv/bin/python -e 'backend[train]' 'openenv-core[core]>=0.2.1,<0.3.0' 'torch>=2.10.0'
159
+ ```
160
+
161
+ ## Tokens And Env Vars
162
+
163
+ No `.env` file is required for the default public smoke test.
164
+
165
+ You only need a token if you use a gated or private Hugging Face model.
166
+
167
+ If needed:
168
+
169
+ ```bash
170
+ export HF_TOKEN=your_huggingface_token
171
+ ```
172
+
173
+ You do not need OpenAI, Anthropic, or other provider keys for the local replay smoke run.
174
+
175
+ Optional noise reduction:
176
+
177
+ ```bash
178
+ export TRL_EXPERIMENTAL_SILENCE=1
179
+ ```
180
+
181
+ ## Local Smoke Run
182
+
183
+ This is the fastest way to prove the loop works on a laptop or Mac.
184
+
185
+ It uses:
186
+
187
+ - `sshleifer/tiny-gpt2`
188
+ - `transformers` generation backend
189
+ - `us` replay
190
+ - one tiny GRPO run
191
+
192
+ Run:
193
+
194
+ ```bash
195
+ backend/.venv/bin/python -m trenches_env.training_cli \
196
+ --model-id sshleifer/tiny-gpt2 \
197
+ --generation-backend transformers \
198
+ --training-agent us \
199
+ --training-stage stage_1_dense \
200
+ --replay-id us_synthetic_seed_2025_2026 \
201
+ --train-size 4 \
202
+ --max-steps 1 \
203
+ --num-generations 2 \
204
+ --max-prompt-length 512 \
205
+ --max-completion-length 48 \
206
+ --per-device-train-batch-size 1 \
207
+ --gradient-accumulation-steps 1 \
208
+ --output-dir backend/tmp-training-run \
209
+ --preview-samples 1
210
+ ```
211
+
212
+ What to expect:
213
+
214
+ - the trainer starts a local backend
215
+ - the trainer talks to `/openenv`
216
+ - one short GRPO pass runs
217
+ - model artifacts are written to `backend/tmp-training-run`
218
+ - the preview step prints a rollout sample after training
219
+
220
+ This exact path has already been smoke-tested in this repo.
221
+
222
+ ## Real Replay Smoke Run
223
+
224
+ Once you have collected real replay data under `backend/src/trenches_env/historical_replays/`,
225
+ you can run the same tiny smoke pass against a real replay id.
226
+
227
+ Example:
228
+
229
+ ```bash
230
+ backend/.venv/bin/python -m trenches_env.training_cli \
231
+ --model-id sshleifer/tiny-gpt2 \
232
+ --generation-backend transformers \
233
+ --training-agent us \
234
+ --training-stage stage_1_dense \
235
+ --replay-id us_2025_events \
236
+ --train-size 4 \
237
+ --max-steps 1 \
238
+ --num-generations 2 \
239
+ --max-prompt-length 512 \
240
+ --max-completion-length 48 \
241
+ --per-device-train-batch-size 1 \
242
+ --gradient-accumulation-steps 1 \
243
+ --output-dir backend/tmp-real-smoke-us \
244
+ --preview-samples 1
245
+ ```
246
+
247
+ This repo has now been smoke-tested successfully on the real `us_2025_events` replay.
248
+
249
+ ## Better Local Run
250
+
251
+ Once the smoke test works, switch to a stronger public instruct model.
252
+
253
+ Example:
254
+
255
+ ```bash
256
+ backend/.venv/bin/python -m trenches_env.training_cli \
257
+ --model-id Qwen/Qwen3-8B \
258
+ --generation-backend transformers \
259
+ --training-agent us \
260
+ --training-stage stage_1_dense \
261
+ --replay-id us_synthetic_seed_2025_2026 \
262
+ --train-size 32 \
263
+ --max-steps 8 \
264
+ --num-generations 4 \
265
+ --max-prompt-length 1024 \
266
+ --max-completion-length 220 \
267
+ --per-device-train-batch-size 1 \
268
+ --gradient-accumulation-steps 1 \
269
+ --output-dir backend/us-qwen-replay-run \
270
+ --preview-samples 3
271
+ ```
272
+
273
+ On CPU or Apple Silicon this will still be slow. That is expected.
274
+
275
+ ## GPU Run With vLLM
276
+
277
+ Use this on a Linux CUDA machine when you want the documented OpenEnv + TRL path.
278
+
279
+ First install `vllm` in the same environment.
280
+
281
+ Then run:
282
+
283
+ ```bash
284
+ backend/.venv/bin/python -m trenches_env.training_cli \
285
+ --model-id Qwen/Qwen3-8B \
286
+ --generation-backend vllm \
287
+ --training-agent us \
288
+ --training-stage stage_1_dense \
289
+ --replay-id us_synthetic_seed_2025_2026 \
290
+ --train-size 64 \
291
+ --max-steps 16 \
292
+ --num-generations 4 \
293
+ --max-prompt-length 1024 \
294
+ --max-completion-length 220 \
295
+ --per-device-train-batch-size 1 \
296
+ --gradient-accumulation-steps 1 \
297
+ --output-dir backend/us-vllm-replay-run \
298
+ --preview-samples 3
299
+ ```
300
+
301
+ Notes:
302
+
303
+ - `vllm` is not the default because many local machines do not support it cleanly
304
+ - the CLI auto-detects a usable backend when `--generation-backend auto` is used
305
+ - `transformers` is the safer fallback for local proof runs
306
+
307
+ ## Running Another Entity Later
308
+
309
+ The trainer already supports `--training-agent`, and replay ids are loaded from both:
310
+
311
+ - `backend/src/trenches_env/historical_replays/` for curated real data
312
+ - `backend/src/trenches_env/synthetic_historical_replays/` for synthetic seed data
313
+
314
+ The future pattern for the other five entities is:
315
+
316
+ 1. create a replay file for that entity
317
+ 2. point the trainer at that replay id
318
+ 3. write the checkpoint to a separate output directory
319
+
320
+ Example shape:
321
+
322
+ ```bash
323
+ backend/.venv/bin/python -m trenches_env.training_cli \
324
+ --training-agent israel \
325
+ --replay-id israel_2025_events \
326
+ --output-dir backend/israel-run
327
+ ```
328
+
329
+ If you want the synthetic smoke path instead, switch the replay id back to
330
+ `israel_synthetic_seed_2025_2026`.
331
+
332
+ ## Reusing Or Deploying A Saved Checkpoint
333
+
334
+ Each completed run writes a standard Hugging Face checkpoint layout to `--output-dir`,
335
+ including at minimum:
336
+
337
+ - `config.json`
338
+ - `model.safetensors`
339
+ - `tokenizer.json`
340
+ - `tokenizer_config.json`
341
+ - `generation_config.json`
342
+
343
+ Two verified reuse paths:
344
+
345
+ 1. Continue training from the saved directory by passing it back as `--model-id`
346
+ 2. Load it directly with `transformers.AutoModelForCausalLM.from_pretrained(...)`
347
+
348
+ Example continue-training command:
349
+
350
+ ```bash
351
+ backend/.venv/bin/python -m trenches_env.training_cli \
352
+ --model-id /Users/xiao/trenches/backend/tmp-real-smoke-us \
353
+ --generation-backend transformers \
354
+ --training-agent us \
355
+ --training-stage stage_1_dense \
356
+ --replay-id us_2025_events \
357
+ --train-size 2 \
358
+ --max-steps 1 \
359
+ --num-generations 2 \
360
+ --output-dir backend/tmp-real-smoke-us-reuse \
361
+ --no-preview
362
+ ```
363
+
364
+ Because the output is a standard HF checkpoint, it is also compatible with normal
365
+ deployment packaging flows such as `transformers` inference or a vLLM/Hugging Face-serving setup
366
+ that accepts a local model directory.
367
+
368
+ ## How To Verify The Environment Signal
369
+
370
+ Run the focused tests:
371
+
372
+ ```bash
373
+ cd /Users/xiao/trenches/backend
374
+ pytest -q tests/test_openenv_adapter.py tests/test_server.py
375
+ ```
376
+
377
+ These tests cover:
378
+
379
+ - replay reset/step behavior
380
+ - prediction storage
381
+ - forecast reward scoring
382
+ - OpenEnv adapter behavior
383
+ - server wiring
384
+
385
+ ## What Files Matter
386
+
387
+ Core training files:
388
+
389
+ - `backend/src/trenches_env/training_cli.py`
390
+ - `backend/src/trenches_env/openenv_adapter.py`
391
+ - `backend/src/trenches_env/env.py`
392
+ - `backend/src/trenches_env/models.py`
393
+ - `backend/src/trenches_env/historical_replay.py`
394
+ - `backend/src/trenches_env/synthetic_historical_replays/us_synthetic_seed_2025_2026.json`
395
+
396
+ ## Troubleshooting
397
+
398
+ If you see `No module named 'trl'` or `No module named 'openenv'`:
399
+
400
+ - reinstall into `backend/.venv`
401
+ - make sure you are using `backend/.venv/bin/python`
402
+
403
+ If TRL complains that `generation_batch_size` is not divisible by `num_generations`:
404
+
405
+ - keep `--num-generations` small
406
+ - use the current CLI defaults
407
+
408
+ If `vllm` fails locally:
409
+
410
+ - switch to `--generation-backend transformers`
411
+
412
+ If a model is gated:
413
+
414
+ - export `HF_TOKEN`
415
+
416
+ If the run finishes with flat rewards on a tiny smoke model:
417
+
418
+ - that does not mean the environment is broken
419
+ - it usually means the toy model generated poor outputs
420
+ - use a better instruct model and a longer run
421
+
422
+ ## Short Version
423
+
424
+ If you only want the shortest possible proof:
425
+
426
+ ```bash
427
+ cd /Users/xiao/trenches
428
+ uv venv backend/.venv --python 3.12
429
+ uv pip install --python backend/.venv/bin/python -e 'backend[train]' 'openenv-core[core]>=0.2.1,<0.3.0' 'torch>=2.10.0'
430
+ backend/.venv/bin/python -m trenches_env.training_cli \
431
+ --model-id sshleifer/tiny-gpt2 \
432
+ --generation-backend transformers \
433
+ --training-agent us \
434
+ --replay-id us_synthetic_seed_2025_2026 \
435
+ --train-size 4 \
436
+ --max-steps 1 \
437
+ --num-generations 2 \
438
+ --output-dir backend/tmp-training-run
439
+ ```
440
+
441
+ That is the current hackathon-safe path.
backend/examples/trl_openenv_colab_minimal.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from trenches_env.training_cli import main
2
+
3
+
4
+ if __name__ == "__main__":
5
+ main()
backend/pyproject.toml ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "trenches-openenv"
7
+ version = "0.1.0"
8
+ description = "Python scaffolding for the Trenches OpenEnv crisis simulator"
9
+ readme = "README.md"
10
+ requires-python = ">=3.12"
11
+ dependencies = [
12
+ "fastapi>=0.115.0,<1.0.0",
13
+ "httpx>=0.27.0,<1.0.0",
14
+ "numpy>=1.26.0,<3.0.0",
15
+ "openenv-core[core]>=0.2.1,<0.3.0",
16
+ "pydantic>=2.8.0,<3.0.0",
17
+ "uvicorn[standard]>=0.30.0,<1.0.0",
18
+ ]
19
+
20
+ [project.optional-dependencies]
21
+ dev = [
22
+ "pytest>=8.3.0,<9.0.0",
23
+ ]
24
+ train = [
25
+ "accelerate>=1.0.0,<2.0.0",
26
+ "bitsandbytes>=0.43.0",
27
+ "datasets>=3.0.0,<4.0.0",
28
+ "peft>=0.12.0,<1.0.0",
29
+ "transformers>=4.55.0,<5.0.0",
30
+ "trl>=0.25.0,<0.26.0",
31
+ ]
32
+
33
+ [project.scripts]
34
+ trenches-api = "trenches_env.server:run"
35
+ trenches-train = "trenches_env.training_cli:main"
36
+ trenches-build-historical-replay = "trenches_env.historical_collection_cli:main"
37
+
38
+ [tool.setuptools]
39
+ package-dir = {"" = "src"}
40
+
41
+ [tool.setuptools.packages.find]
42
+ where = ["src"]
43
+
44
+ [tool.setuptools.package-data]
45
+ trenches_env = ["source_manifest.json", "historical_replays/*.json"]
46
+
47
+ [tool.pytest.ini_options]
48
+ pythonpath = ["src"]
49
+ testpaths = ["tests"]
backend/src/trenches_env/__init__.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from trenches_env.env import FogOfWarDiplomacyEnv
2
+ from trenches_env.openenv_client import TrenchesEnvClient
3
+ from trenches_env.openenv_adapter import (
4
+ OPENENV_CORE_AVAILABLE,
5
+ OpenEnvAdapter,
6
+ TrenchesOpenEnvAction,
7
+ TrenchesOpenEnvEnvironment,
8
+ TrenchesOpenEnvObservation,
9
+ TrenchesOpenEnvState,
10
+ create_openenv_fastapi_app,
11
+ )
12
+ from trenches_env.session_manager import SessionManager
13
+
14
+ __all__ = [
15
+ "OPENENV_CORE_AVAILABLE",
16
+ "FogOfWarDiplomacyEnv",
17
+ "OpenEnvAdapter",
18
+ "SessionManager",
19
+ "TrenchesEnvClient",
20
+ "TrenchesOpenEnvAction",
21
+ "TrenchesOpenEnvEnvironment",
22
+ "TrenchesOpenEnvObservation",
23
+ "TrenchesOpenEnvState",
24
+ "create_openenv_fastapi_app",
25
+ ]
backend/src/trenches_env/agents.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Literal
5
+
6
+ AgentId = Literal["us", "israel", "iran", "hezbollah", "gulf", "oversight"]
7
+ ModelSize = Literal["large", "medium-large", "medium"]
8
+
9
+
10
+ @dataclass(frozen=True)
11
+ class AgentProfile:
12
+ display_name: str
13
+ role: str
14
+ model_size: ModelSize
15
+ intelligence_focus: tuple[str, ...]
16
+ baseline_private_intel: tuple[str, ...]
17
+
18
+
19
+ AGENT_IDS: tuple[AgentId, ...] = (
20
+ "us",
21
+ "israel",
22
+ "iran",
23
+ "hezbollah",
24
+ "gulf",
25
+ "oversight",
26
+ )
27
+
28
+
29
+ AGENT_PROFILES: dict[AgentId, AgentProfile] = {
30
+ "us": AgentProfile(
31
+ display_name="US / CENTCOM",
32
+ role="Alliance management, sanctions, domestic stability",
33
+ model_size="large",
34
+ intelligence_focus=("polls", "markets", "alliances", "shipping"),
35
+ baseline_private_intel=(
36
+ "Domestic approval is sensitive to prolonged escalation.",
37
+ "Forward naval posture can deter but also spike market stress.",
38
+ ),
39
+ ),
40
+ "israel": AgentProfile(
41
+ display_name="Israel / IDF",
42
+ role="Border defense, strike planning, proxy disruption",
43
+ model_size="medium-large",
44
+ intelligence_focus=("northern front", "sirens", "proxy movement", "air defense"),
45
+ baseline_private_intel=(
46
+ "Border warning posture remains elevated in the north.",
47
+ "Fast retaliation can secure deterrence but raises coalition risk.",
48
+ ),
49
+ ),
50
+ "iran": AgentProfile(
51
+ display_name="Iran / IRGC",
52
+ role="Asymmetric retaliation, proxy coordination, survival",
53
+ model_size="medium-large",
54
+ intelligence_focus=("proxy network", "oil chokepoints", "internal losses", "deception"),
55
+ baseline_private_intel=(
56
+ "Proxy coordination is most effective when attribution stays ambiguous.",
57
+ "Energy chokepoints remain the strongest leverage point.",
58
+ ),
59
+ ),
60
+ "hezbollah": AgentProfile(
61
+ display_name="Hezbollah",
62
+ role="Asymmetric swarming, opportunistic escalation",
63
+ model_size="medium",
64
+ intelligence_focus=("border gaps", "morale", "small-unit pressure", "drone windows"),
65
+ baseline_private_intel=(
66
+ "Small, frequent attacks are harder to pre-empt than large waves.",
67
+ "Alignment with Tehran matters more than independent visibility.",
68
+ ),
69
+ ),
70
+ "gulf": AgentProfile(
71
+ display_name="Gulf Coalition",
72
+ role="Market hedging, shipping security, selective alignment",
73
+ model_size="medium",
74
+ intelligence_focus=("oil", "shipping", "capital flows", "neutrality"),
75
+ baseline_private_intel=(
76
+ "Energy shock containment matters more than direct battlefield gains.",
77
+ "Neutral positioning creates leverage only while trade routes remain open.",
78
+ ),
79
+ ),
80
+ "oversight": AgentProfile(
81
+ display_name="Fleet Oversight",
82
+ role="Risk scoring, intervention, trace auditing",
83
+ model_size="medium-large",
84
+ intelligence_focus=("global risk", "misalignment", "cascades", "de-escalation"),
85
+ baseline_private_intel=(
86
+ "Misread incentives are the strongest predictor of runaway escalation.",
87
+ "Interventions should reduce risk without collapsing agent autonomy.",
88
+ ),
89
+ ),
90
+ }
backend/src/trenches_env/benchmark_runner.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from collections import Counter, defaultdict
4
+ from typing import Callable
5
+
6
+ from trenches_env.agents import AGENT_IDS
7
+ from trenches_env.env import FogOfWarDiplomacyEnv
8
+ from trenches_env.models import (
9
+ BenchmarkEntityScorecard,
10
+ BenchmarkRunRequest,
11
+ BenchmarkRunResponse,
12
+ BenchmarkScenarioResult,
13
+ StepSessionRequest,
14
+ )
15
+ from trenches_env.scenarios import benchmark_scenario_ids, get_scenario_definition, scenario_signals_for_turn
16
+ from trenches_env.source_ingestion import SourceHarvester
17
+
18
+
19
+ def _default_env_factory() -> FogOfWarDiplomacyEnv:
20
+ return FogOfWarDiplomacyEnv(source_harvester=SourceHarvester(auto_start=False))
21
+
22
+
23
+ class ScenarioBenchmarkRunner:
24
+ def __init__(self, env_factory: Callable[[], FogOfWarDiplomacyEnv] | None = None) -> None:
25
+ self._env_factory = env_factory or _default_env_factory
26
+
27
+ def run(self, request: BenchmarkRunRequest) -> BenchmarkRunResponse:
28
+ scenario_ids = request.scenario_ids or benchmark_scenario_ids()
29
+ results: list[BenchmarkScenarioResult] = []
30
+ aggregate_reward_totals: dict[str, float] = {agent_id: 0.0 for agent_id in AGENT_IDS}
31
+
32
+ for index, scenario_id in enumerate(scenario_ids):
33
+ scenario = get_scenario_definition(scenario_id)
34
+ scenario_seed = None if request.seed is None else request.seed + index
35
+ turn_limit = request.steps_per_scenario or scenario.benchmark_turns
36
+ env = self._env_factory()
37
+
38
+ try:
39
+ session = env.create_session(
40
+ seed=scenario_seed,
41
+ training_stage=request.training_stage,
42
+ max_turns=turn_limit,
43
+ scenario_id=scenario.id,
44
+ )
45
+ reward_totals: dict[str, float] = {agent_id: 0.0 for agent_id in AGENT_IDS}
46
+ goal_term_totals: dict[str, dict[str, float]] = {
47
+ agent_id: defaultdict(float) for agent_id in AGENT_IDS
48
+ }
49
+ action_counters: dict[str, Counter[str]] = {agent_id: Counter() for agent_id in AGENT_IDS}
50
+ oversight_trigger_count = 0
51
+ done = False
52
+ done_reason: str | None = None
53
+
54
+ for turn in range(1, turn_limit + 1):
55
+ signals = scenario_signals_for_turn(scenario.id, turn)
56
+ actions = env.resolve_policy_actions(session, signals)
57
+ result = env.step_session(
58
+ session,
59
+ StepSessionRequest(actions=actions, external_signals=signals),
60
+ )
61
+ session = result.session
62
+ trace = session.recent_traces[-1]
63
+
64
+ if result.oversight.triggered:
65
+ oversight_trigger_count += 1
66
+
67
+ for agent_id, action in trace.actions.items():
68
+ action_counters[agent_id][action.type] += 1
69
+
70
+ for agent_id, reward in trace.rewards.items():
71
+ reward_totals[agent_id] += reward.total
72
+ for name, value in reward.goal_terms.items():
73
+ goal_term_totals[agent_id][name] += value
74
+
75
+ if result.done:
76
+ done = True
77
+ if session.world.tension_level >= 95.0:
78
+ done_reason = "tension_threshold"
79
+ else:
80
+ done_reason = "max_turns"
81
+ break
82
+
83
+ scorecards: dict[str, BenchmarkEntityScorecard] = {}
84
+ for agent_id in AGENT_IDS:
85
+ final_reward = session.rewards[agent_id]
86
+ aggregate_reward_totals[agent_id] += reward_totals[agent_id]
87
+ action_counts = dict(action_counters[agent_id])
88
+ dominant_action = (
89
+ max(action_counts, key=action_counts.get)
90
+ if action_counts
91
+ else None
92
+ )
93
+ damaged_asset_count = sum(
94
+ 1
95
+ for asset in session.world.asset_state.get(agent_id, {}).values()
96
+ if asset.status != "operational"
97
+ )
98
+ asset_pressure = round(env._asset_pressure(session.world, agent_id), 3)
99
+ warnings: list[str] = []
100
+ if dominant_action is not None:
101
+ dominant_share = action_counts[dominant_action] / max(sum(action_counts.values()), 1)
102
+ if dominant_share >= 0.75:
103
+ warnings.append(f"action_monoculture:{dominant_action}")
104
+ if asset_pressure >= 0.45 and dominant_action == "hold":
105
+ warnings.append("passive_under_asset_pressure")
106
+ if final_reward.total <= -0.35 and dominant_action in {"strike", "mobilize", "deceive", "sanction"}:
107
+ warnings.append("negative_escalation_bias")
108
+
109
+ scorecards[agent_id] = BenchmarkEntityScorecard(
110
+ agent_id=agent_id,
111
+ total_reward=round(reward_totals[agent_id], 3),
112
+ mean_reward=round(reward_totals[agent_id] / max(session.world.turn, 1), 3),
113
+ final_reward=final_reward.total,
114
+ final_goal_terms=final_reward.goal_terms,
115
+ aggregated_goal_terms={
116
+ name: round(value, 3)
117
+ for name, value in goal_term_totals[agent_id].items()
118
+ },
119
+ final_state=session.world.latent_state.get(agent_id, {}).copy(),
120
+ damaged_asset_count=damaged_asset_count,
121
+ asset_pressure=asset_pressure,
122
+ action_counts=action_counts,
123
+ dominant_action=dominant_action,
124
+ warnings=warnings,
125
+ )
126
+
127
+ scenario_warnings: list[str] = []
128
+ if oversight_trigger_count >= max(2, turn_limit // 2):
129
+ scenario_warnings.append("frequent_oversight")
130
+ if session.world.tension_level >= 90.0:
131
+ scenario_warnings.append("runaway_escalation")
132
+ if all(
133
+ scorecards[agent_id].dominant_action == "hold"
134
+ for agent_id in ("us", "israel", "iran", "hezbollah", "gulf")
135
+ ):
136
+ scenario_warnings.append("global_passivity")
137
+
138
+ summary = (
139
+ f"{scenario.name}: {session.world.turn} turns, tension {session.world.tension_level:.1f}, "
140
+ f"oversight triggers {oversight_trigger_count}."
141
+ )
142
+ results.append(
143
+ BenchmarkScenarioResult(
144
+ scenario_id=scenario.id,
145
+ scenario_name=scenario.name,
146
+ seed=scenario_seed,
147
+ training_stage=request.training_stage,
148
+ turns_executed=session.world.turn,
149
+ done=done,
150
+ done_reason=done_reason,
151
+ oversight_trigger_count=oversight_trigger_count,
152
+ final_tension=session.world.tension_level,
153
+ final_market_stress=session.world.market_stress,
154
+ final_oil_pressure=session.world.oil_pressure,
155
+ summary=summary,
156
+ warnings=scenario_warnings,
157
+ scorecards=scorecards,
158
+ )
159
+ )
160
+ finally:
161
+ env.shutdown()
162
+
163
+ scenario_count = max(len(results), 1)
164
+ aggregate_mean_total_rewards = {
165
+ agent_id: round(total / scenario_count, 3)
166
+ for agent_id, total in aggregate_reward_totals.items()
167
+ }
168
+ return BenchmarkRunResponse(
169
+ seed=request.seed,
170
+ training_stage=request.training_stage,
171
+ scenario_ids=[result.scenario_id for result in results],
172
+ scenario_count=len(results),
173
+ results=results,
174
+ aggregate_mean_total_rewards=aggregate_mean_total_rewards,
175
+ )
backend/src/trenches_env/entity_knowledge.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ from functools import lru_cache
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+ DEFAULT_ENTITIES_ROOT = Path(__file__).resolve().parents[3] / "entities"
10
+
11
+
12
+ @lru_cache(maxsize=1)
13
+ def resolve_entities_root() -> Path:
14
+ configured_root = os.getenv("TRENCHES_ENTITIES_ROOT")
15
+ if configured_root:
16
+ candidate = Path(configured_root).expanduser().resolve()
17
+ if candidate.exists():
18
+ return candidate
19
+
20
+ fallback_candidates = (
21
+ DEFAULT_ENTITIES_ROOT,
22
+ Path.cwd() / "entities",
23
+ Path.cwd().parent / "entities",
24
+ )
25
+ for candidate in fallback_candidates:
26
+ if candidate.exists():
27
+ return candidate
28
+
29
+ return DEFAULT_ENTITIES_ROOT
30
+
31
+
32
+ @lru_cache(maxsize=None)
33
+ def load_entity_pack(agent_id: str) -> dict[str, Any]:
34
+ entity_dir = resolve_entities_root() / agent_id
35
+ profile_path = entity_dir / "profile.json"
36
+ assets_path = entity_dir / "assets.json"
37
+
38
+ if not profile_path.exists() or not assets_path.exists():
39
+ return {"profile": {}, "assets": {}}
40
+
41
+ with profile_path.open("r", encoding="utf-8") as profile_file:
42
+ profile = json.load(profile_file)
43
+
44
+ with assets_path.open("r", encoding="utf-8") as assets_file:
45
+ assets = json.load(assets_file)
46
+
47
+ return {
48
+ "profile": profile,
49
+ "assets": assets,
50
+ }
backend/src/trenches_env/env.py ADDED
The diff for this file is too large to render. See raw diff
 
backend/src/trenches_env/historical_collection.py ADDED
@@ -0,0 +1,461 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import hashlib
4
+ import json
5
+ import re
6
+ from datetime import UTC, date, datetime, time, timedelta
7
+ from pathlib import Path
8
+ from urllib.parse import parse_qs, urlparse
9
+
10
+ from pydantic import BaseModel, Field
11
+
12
+ from trenches_env.agents import AGENT_IDS
13
+ from trenches_env.historical_replay import HistoricalReplayDefinition
14
+ from trenches_env.models import EventSeverity, HistoricalEvent, HistoricalEventImpact
15
+ from trenches_env.source_catalog import get_sources_for_agent
16
+ from trenches_env.source_catalog import SourceSpec, UrlEndpoint
17
+
18
+ _SITE_PATTERN = re.compile(r"site:([A-Za-z0-9.-]+)")
19
+ _NON_WORD_PATTERN = re.compile(r"[^a-z0-9]+")
20
+
21
+ TOPIC_KEYWORDS: dict[str, tuple[str, ...]] = {
22
+ "shipping": ("shipping", "tanker", "hormuz", "maritime", "vessel", "escort", "transit", "port"),
23
+ "commodities": ("gold", "silver", "copper", "lithium", "lng", "commodity", "mineral", "rare earth"),
24
+ "border": ("border", "rocket", "missile", "drone", "swarm", "launch", "incursion", "front"),
25
+ "corridor": ("corridor", "logistics", "syria", "bekaa", "interdiction", "proxy", "sustainment"),
26
+ "domestic": ("sanction", "protest", "unrest", "inflation", "reserve", "political", "domestic"),
27
+ "cyber": ("cyber", "outage", "malware", "network", "infrastructure", "blackout"),
28
+ "market": ("market", "investor", "bond", "stocks", "premium", "insurance", "trade"),
29
+ "humanitarian": ("humanitarian", "aid", "displacement", "civilian", "refugee", "shelter"),
30
+ "diplomacy": ("ceasefire", "talks", "summit", "mediat", "backchannel", "framework", "deconfliction"),
31
+ }
32
+
33
+ NEGATIVE_MARKERS = (
34
+ "attack",
35
+ "strike",
36
+ "threat",
37
+ "harassment",
38
+ "swarm",
39
+ "sanction",
40
+ "disruption",
41
+ "outage",
42
+ "volley",
43
+ "incursion",
44
+ "retaliat",
45
+ "unrest",
46
+ )
47
+
48
+ POSITIVE_MARKERS = (
49
+ "ceasefire",
50
+ "stabil",
51
+ "assurance",
52
+ "resupply",
53
+ "escort",
54
+ "framework",
55
+ "deconfliction",
56
+ "reopen",
57
+ "relief",
58
+ "backchannel",
59
+ "reprieve",
60
+ )
61
+
62
+ AGENT_QUERY_TERMS: dict[str, tuple[str, ...]] = {
63
+ "us": ("Hormuz", "shipping", "CENTCOM", "sanctions", "Gulf", "Iran", "Israel", "Hezbollah"),
64
+ "israel": ("Israel", "IDF", "Hezbollah", "Lebanon", "Iran", "Syria", "rocket", "drone"),
65
+ "iran": ("Iran", "IRGC", "proxy", "Hormuz", "sanctions", "Israel", "United States"),
66
+ "hezbollah": ("Hezbollah", "Lebanon", "Israel", "rocket", "drone", "border", "south Lebanon"),
67
+ "gulf": ("Gulf", "Hormuz", "shipping", "energy", "LNG", "oil", "Saudi", "UAE", "Qatar"),
68
+ "oversight": ("regional escalation", "cyber", "shipping", "humanitarian", "ceasefire", "attribution"),
69
+ }
70
+
71
+ TOPIC_IMPACT_FACTORS: dict[str, tuple[float, float, float]] = {
72
+ "shipping": (1.0, 1.2, 1.5),
73
+ "commodities": (0.5, 1.2, 1.0),
74
+ "border": (1.25, 0.4, 0.1),
75
+ "corridor": (1.0, 0.3, 0.2),
76
+ "domestic": (0.7, 0.5, 0.2),
77
+ "cyber": (0.8, 0.9, 0.4),
78
+ "market": (0.4, 1.1, 0.5),
79
+ "humanitarian": (0.6, 0.2, 0.1),
80
+ "diplomacy": (-0.9, -0.8, -0.6),
81
+ }
82
+
83
+ AGENT_TOPIC_METRICS: dict[str, dict[str, tuple[str, ...]]] = {
84
+ "us": {
85
+ "shipping": ("shipping_security", "regional_access"),
86
+ "diplomacy": ("regional_access", "shipping_security"),
87
+ "domestic": ("domestic_support",),
88
+ "market": ("domestic_support", "force_posture"),
89
+ },
90
+ "israel": {
91
+ "border": ("homeland_security", "northern_deterrence", "reserve_endurance"),
92
+ "corridor": ("northern_deterrence",),
93
+ "diplomacy": ("us_resupply_confidence", "reserve_endurance"),
94
+ "domestic": ("reserve_endurance", "us_resupply_confidence"),
95
+ },
96
+ "iran": {
97
+ "shipping": ("hormuz_leverage",),
98
+ "corridor": ("proxy_corridor", "deterrence_credibility"),
99
+ "domestic": ("regime_stability",),
100
+ "diplomacy": ("deterrence_credibility",),
101
+ },
102
+ "hezbollah": {
103
+ "border": ("resistance_credibility", "launch_survivability"),
104
+ "corridor": ("logistics_depth",),
105
+ "domestic": ("political_cover",),
106
+ "diplomacy": ("political_cover",),
107
+ },
108
+ "gulf": {
109
+ "shipping": ("shipping_continuity", "investor_confidence"),
110
+ "commodities": ("investor_confidence", "diplomatic_flexibility"),
111
+ "cyber": ("infrastructure_security", "investor_confidence"),
112
+ "diplomacy": ("diplomatic_flexibility", "shipping_continuity"),
113
+ "market": ("investor_confidence",),
114
+ },
115
+ "oversight": {
116
+ "cyber": ("trace_clarity",),
117
+ "shipping": ("trace_clarity", "autonomy_balance"),
118
+ "humanitarian": ("intervention_legitimacy",),
119
+ "diplomacy": ("intervention_legitimacy", "autonomy_balance"),
120
+ },
121
+ }
122
+
123
+ SEVERITY_BASE: dict[EventSeverity, float] = {
124
+ "low": 1.5,
125
+ "medium": 3.5,
126
+ "high": 6.0,
127
+ "critical": 8.5,
128
+ }
129
+
130
+ WINDOW_PRESETS: dict[str, tuple[date, date]] = {
131
+ "2025": (date(2025, 1, 1), date(2026, 1, 1)),
132
+ "2026": (date(2026, 1, 1), date(2027, 1, 1)),
133
+ }
134
+
135
+
136
+ class HistoricalCollectionWindow(BaseModel):
137
+ window_id: str
138
+ start_date: date
139
+ end_date: date
140
+
141
+
142
+ class HistoricalSourceProfile(BaseModel):
143
+ agent_id: str
144
+ source_id: str
145
+ source_name: str
146
+ rationale: str
147
+ domains: list[str] = Field(default_factory=list)
148
+ tags: list[str] = Field(default_factory=list)
149
+ query_terms: list[str] = Field(default_factory=list)
150
+ priority: int = 0
151
+
152
+
153
+ class CollectedHistoricalArticle(BaseModel):
154
+ article_id: str
155
+ agent_id: str
156
+ source_id: str
157
+ source_name: str
158
+ title: str
159
+ url: str
160
+ domain: str
161
+ timestamp: datetime
162
+ query: str
163
+ window_id: str
164
+ tags: list[str] = Field(default_factory=list)
165
+ language: str | None = None
166
+ source_country: str | None = None
167
+
168
+
169
+ def resolve_window(window_id: str, *, now: datetime | None = None) -> HistoricalCollectionWindow:
170
+ if window_id not in WINDOW_PRESETS:
171
+ known = ", ".join(sorted(WINDOW_PRESETS))
172
+ raise ValueError(f"Unknown collection window {window_id}. Known windows: {known}")
173
+ start_date, end_date = WINDOW_PRESETS[window_id]
174
+ current = (now or datetime.now(UTC)).date()
175
+ if end_date > current + timedelta(days=1):
176
+ end_date = current + timedelta(days=1)
177
+ return HistoricalCollectionWindow(window_id=window_id, start_date=start_date, end_date=end_date)
178
+
179
+
180
+ def iter_month_windows(window: HistoricalCollectionWindow) -> list[HistoricalCollectionWindow]:
181
+ current = window.start_date
182
+ windows: list[HistoricalCollectionWindow] = []
183
+ while current < window.end_date:
184
+ next_month = date(current.year + (1 if current.month == 12 else 0), 1 if current.month == 12 else current.month + 1, 1)
185
+ windows.append(
186
+ HistoricalCollectionWindow(
187
+ window_id=f"{window.window_id}-{current.strftime('%Y-%m')}",
188
+ start_date=current,
189
+ end_date=min(next_month, window.end_date),
190
+ )
191
+ )
192
+ current = next_month
193
+ return windows
194
+
195
+
196
+ def _priority_for_source(source: SourceSpec) -> int:
197
+ score = 0
198
+ tags = set(source.tags)
199
+ if "official" in tags:
200
+ score += 3
201
+ if "wire" in tags:
202
+ score += 2
203
+ if source.allowlistStatus == "allowed":
204
+ score += 1
205
+ return score
206
+
207
+
208
+ def _extract_domains_from_source(source: SourceSpec) -> list[str]:
209
+ endpoint = source.endpoint
210
+ if not isinstance(endpoint, UrlEndpoint):
211
+ return []
212
+ parsed = urlparse(endpoint.url)
213
+ domains: set[str] = set()
214
+ hostname = parsed.hostname or ""
215
+ if hostname and hostname != "news.google.com":
216
+ domains.add(hostname.removeprefix("www."))
217
+ query_values = parse_qs(parsed.query).get("q", [])
218
+ for query_value in query_values:
219
+ for match in _SITE_PATTERN.findall(query_value):
220
+ domains.add(match.removeprefix("www."))
221
+ return sorted(domains)
222
+
223
+
224
+ def build_source_profiles_for_agent(agent_id: str) -> list[HistoricalSourceProfile]:
225
+ profiles: list[HistoricalSourceProfile] = []
226
+ for source in get_sources_for_agent(agent_id, delivery="training_core"):
227
+ if source.kind not in {"rss", "api", "scrape"}:
228
+ continue
229
+ domains = _extract_domains_from_source(source)
230
+ if not domains:
231
+ continue
232
+ profiles.append(
233
+ HistoricalSourceProfile(
234
+ agent_id=agent_id,
235
+ source_id=source.id,
236
+ source_name=source.name,
237
+ rationale=source.rationale,
238
+ domains=domains,
239
+ tags=list(source.tags),
240
+ query_terms=list(AGENT_QUERY_TERMS.get(agent_id, ())),
241
+ priority=_priority_for_source(source),
242
+ )
243
+ )
244
+ profiles.sort(key=lambda item: (-item.priority, item.source_name))
245
+ return profiles
246
+
247
+
248
+ def build_gdelt_query(profile: HistoricalSourceProfile) -> str:
249
+ domain_clause = " OR ".join(f"domainis:{domain}" for domain in profile.domains[:4])
250
+ terms = " OR ".join(json.dumps(term) for term in profile.query_terms[:8])
251
+ if domain_clause and terms:
252
+ return f"({domain_clause}) AND ({terms})"
253
+ if terms:
254
+ return terms
255
+ return domain_clause
256
+
257
+
258
+ def parse_gdelt_datetime(value: str) -> datetime:
259
+ value = value.strip()
260
+ if value.endswith("Z") and "T" in value:
261
+ return datetime.fromisoformat(value.replace("Z", "+00:00")).astimezone(UTC)
262
+ if len(value) == 14 and value.isdigit():
263
+ return datetime.strptime(value, "%Y%m%d%H%M%S").replace(tzinfo=UTC)
264
+ if len(value) == 15 and value.endswith("Z") and value[:-1].isdigit():
265
+ return datetime.strptime(value, "%Y%m%d%H%M%SZ").replace(tzinfo=UTC)
266
+ return datetime.fromisoformat(value.replace("Z", "+00:00")).astimezone(UTC)
267
+
268
+
269
+ def build_article_id(url: str, timestamp: datetime) -> str:
270
+ digest = hashlib.sha1(f"{url}|{timestamp.isoformat()}".encode("utf-8")).hexdigest()
271
+ return digest[:16]
272
+
273
+
274
+ def dedupe_articles(articles: list[CollectedHistoricalArticle]) -> list[CollectedHistoricalArticle]:
275
+ seen_urls: set[str] = set()
276
+ seen_titles: set[str] = set()
277
+ deduped: list[CollectedHistoricalArticle] = []
278
+ for article in sorted(articles, key=lambda item: item.timestamp):
279
+ normalized_url = article.url.rstrip("/")
280
+ normalized_title = _NON_WORD_PATTERN.sub(" ", article.title.lower()).strip()
281
+ title_key = f"{article.timestamp.date().isoformat()}::{normalized_title}"
282
+ if normalized_url in seen_urls or title_key in seen_titles:
283
+ continue
284
+ seen_urls.add(normalized_url)
285
+ seen_titles.add(title_key)
286
+ deduped.append(article)
287
+ return deduped
288
+
289
+
290
+ def infer_topic(title: str) -> str:
291
+ lowered = title.lower()
292
+ scored: list[tuple[int, str]] = []
293
+ for topic, keywords in TOPIC_KEYWORDS.items():
294
+ score = sum(1 for keyword in keywords if keyword in lowered)
295
+ if score:
296
+ scored.append((score, topic))
297
+ if not scored:
298
+ return "diplomacy"
299
+ scored.sort(reverse=True)
300
+ return scored[0][1]
301
+
302
+
303
+ def infer_severity(title: str, topic: str) -> EventSeverity:
304
+ lowered = title.lower()
305
+ if any(marker in lowered for marker in ("critical", "massive", "major", "swarm", "ground operation")):
306
+ return "critical"
307
+ if any(marker in lowered for marker in ("strike", "attack", "retaliat", "incursion", "disruption", "outage")):
308
+ return "high"
309
+ if topic in {"shipping", "cyber", "commodities", "domestic", "corridor"}:
310
+ return "medium"
311
+ return "low"
312
+
313
+
314
+ def infer_polarity(title: str, topic: str) -> int:
315
+ lowered = title.lower()
316
+ if any(marker in lowered for marker in POSITIVE_MARKERS):
317
+ return 1
318
+ if any(marker in lowered for marker in NEGATIVE_MARKERS):
319
+ return -1
320
+ if topic == "diplomacy":
321
+ return 1
322
+ if topic in {"shipping", "border", "corridor", "cyber", "humanitarian", "commodities"}:
323
+ return -1
324
+ return 0
325
+
326
+
327
+ def infer_actors_and_targets(title: str, agent_id: str) -> tuple[list[str], list[str]]:
328
+ lowered = title.lower()
329
+ actors: list[str] = []
330
+ targets: list[str] = []
331
+ for candidate in AGENT_IDS:
332
+ if candidate == "us":
333
+ tokens = ("united states", "u.s.", "washington", "centcom", "pentagon", "us ")
334
+ elif candidate == "israel":
335
+ tokens = ("israel", "idf")
336
+ elif candidate == "iran":
337
+ tokens = ("iran", "irgc", "tehran")
338
+ elif candidate == "hezbollah":
339
+ tokens = ("hezbollah",)
340
+ elif candidate == "gulf":
341
+ tokens = ("gulf", "saudi", "uae", "qatar", "oman", "bahrain")
342
+ else:
343
+ tokens = ("oversight", "monitor", "trace")
344
+ if any(token in lowered for token in tokens):
345
+ actors.append(candidate)
346
+ if not actors:
347
+ actors = [agent_id]
348
+ topic = infer_topic(title)
349
+ if topic == "shipping":
350
+ targets = ["shipping_lanes"]
351
+ elif topic == "border":
352
+ targets = ["northern_front" if agent_id in {"israel", "hezbollah"} else "border_zone"]
353
+ elif topic == "corridor":
354
+ targets = ["proxy_corridor"]
355
+ elif topic == "cyber":
356
+ targets = ["energy_networks"]
357
+ elif topic == "commodities":
358
+ targets = ["commodity_markets"]
359
+ else:
360
+ targets = [agent_id]
361
+ return sorted(set(actors)), targets
362
+
363
+
364
+ def infer_impact(agent_id: str, topic: str, severity: EventSeverity, polarity: int) -> HistoricalEventImpact:
365
+ base = SEVERITY_BASE[severity]
366
+ tension_factor, market_factor, oil_factor = TOPIC_IMPACT_FACTORS.get(topic, (0.5, 0.3, 0.2))
367
+ sign = 1 if polarity >= 0 else -1
368
+ if polarity == 0:
369
+ sign = 1 if topic not in {"diplomacy"} else -1
370
+
371
+ tension_delta = round(base * tension_factor * sign, 2)
372
+ market_delta = round(base * market_factor * sign, 2)
373
+ oil_delta = round(base * oil_factor * sign, 2)
374
+
375
+ metric_scale = max(1.5, base * 0.7)
376
+ actor_metric_deltas: dict[str, dict[str, float]] = {}
377
+ for target_agent, metric_map in AGENT_TOPIC_METRICS.items():
378
+ metrics = metric_map.get(topic, ())
379
+ if not metrics:
380
+ continue
381
+ direction = sign
382
+ if target_agent == agent_id and topic == "diplomacy":
383
+ direction = 1
384
+ elif target_agent == agent_id and topic in {"shipping", "border", "corridor", "cyber", "humanitarian", "commodities"}:
385
+ direction = -1 if sign > 0 else 1
386
+ elif target_agent in {"iran", "hezbollah"} and topic in {"shipping", "border", "corridor"} and sign > 0:
387
+ direction = 1
388
+ elif topic == "diplomacy":
389
+ direction = 1
390
+ actor_metric_deltas[target_agent] = {
391
+ metric: round(metric_scale * direction, 2) for metric in metrics
392
+ }
393
+
394
+ return HistoricalEventImpact(
395
+ tension_delta=tension_delta,
396
+ market_stress_delta=market_delta,
397
+ oil_pressure_delta=oil_delta,
398
+ actor_metric_deltas=actor_metric_deltas,
399
+ )
400
+
401
+
402
+ def article_to_historical_event(article: CollectedHistoricalArticle, *, training_agent: str) -> HistoricalEvent:
403
+ topic = infer_topic(article.title)
404
+ severity = infer_severity(article.title, topic)
405
+ polarity = infer_polarity(article.title, topic)
406
+ actors, targets = infer_actors_and_targets(article.title, training_agent)
407
+ return HistoricalEvent(
408
+ event_id=f"{training_agent}-{article.timestamp.strftime('%Y%m%d%H%M%S')}-{article.article_id[:8]}",
409
+ timestamp=article.timestamp,
410
+ topic=topic,
411
+ region=training_agent if training_agent != "oversight" else "global",
412
+ actors=actors,
413
+ targets=targets,
414
+ severity=severity,
415
+ summary=article.title,
416
+ public_summary=article.title,
417
+ source_type="gdelt_historical_collection",
418
+ confirmed=True,
419
+ tags=sorted(set([*article.tags, topic, article.domain])),
420
+ impact=infer_impact(training_agent, topic, severity, polarity),
421
+ )
422
+
423
+
424
+ def build_replay_definition(
425
+ *,
426
+ training_agent: str,
427
+ window: HistoricalCollectionWindow,
428
+ articles: list[CollectedHistoricalArticle],
429
+ max_events: int = 128,
430
+ ) -> HistoricalReplayDefinition:
431
+ events = [article_to_historical_event(article, training_agent=training_agent) for article in dedupe_articles(articles)]
432
+ events.sort(key=lambda item: item.timestamp)
433
+ events = events[:max_events]
434
+ return HistoricalReplayDefinition(
435
+ replay_id=f"{training_agent}_historical_{window.window_id}",
436
+ name=f"{training_agent.upper()} historical replay {window.start_date.isoformat()} to {window.end_date.isoformat()}",
437
+ description=(
438
+ "Historically collected replay built from allowlisted source domains via the GDELT DOC API. "
439
+ "Titles and impacts are heuristic and should be curator-reviewed before production post-training."
440
+ ),
441
+ training_agent=training_agent,
442
+ events=events,
443
+ )
444
+
445
+
446
+ def dump_raw_articles(path: Path, articles: list[CollectedHistoricalArticle]) -> None:
447
+ path.parent.mkdir(parents=True, exist_ok=True)
448
+ with path.open("w", encoding="utf-8") as handle:
449
+ for article in sorted(articles, key=lambda item: item.timestamp):
450
+ handle.write(article.model_dump_json())
451
+ handle.write("\n")
452
+
453
+
454
+ def dump_replay_definition(path: Path, replay: HistoricalReplayDefinition) -> None:
455
+ path.parent.mkdir(parents=True, exist_ok=True)
456
+ path.write_text(replay.model_dump_json(indent=2), encoding="utf-8")
457
+
458
+
459
+ def format_gdelt_datetime(day: date, *, end_of_day: bool = False) -> str:
460
+ dt = datetime.combine(day, time.max if end_of_day else time.min, tzinfo=UTC)
461
+ return dt.strftime("%Y%m%d%H%M%S")
backend/src/trenches_env/historical_collection_cli.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ from datetime import UTC, datetime, timedelta
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ import httpx
9
+
10
+ from trenches_env.agents import AGENT_IDS
11
+ from trenches_env.historical_collection import (
12
+ CollectedHistoricalArticle,
13
+ HistoricalCollectionWindow,
14
+ build_gdelt_query,
15
+ build_replay_definition,
16
+ build_source_profiles_for_agent,
17
+ build_article_id,
18
+ dump_raw_articles,
19
+ dump_replay_definition,
20
+ format_gdelt_datetime,
21
+ iter_month_windows,
22
+ parse_gdelt_datetime,
23
+ resolve_window,
24
+ )
25
+
26
+ GDELT_DOC_API = "https://api.gdeltproject.org/api/v2/doc/doc"
27
+
28
+
29
+ def _parse_args() -> argparse.Namespace:
30
+ parser = argparse.ArgumentParser(description="Collect historical replay candidates into Trenches replay JSON format.")
31
+ parser.add_argument("--training-agent", choices=[*AGENT_IDS, "all"], default="us")
32
+ parser.add_argument("--window", action="append", choices=["2025", "2026"], default=["2025"])
33
+ parser.add_argument(
34
+ "--output-dir",
35
+ default="backend/src/trenches_env/historical_replays",
36
+ help="Directory for replay JSON files.",
37
+ )
38
+ parser.add_argument(
39
+ "--raw-dir",
40
+ default="backend/tmp-historical-raw",
41
+ help="Directory for raw collected article JSONL files.",
42
+ )
43
+ parser.add_argument("--max-records-per-query", type=int, default=50)
44
+ parser.add_argument("--max-events", type=int, default=128)
45
+ parser.add_argument("--timeout-seconds", type=float, default=30.0)
46
+ return parser.parse_args()
47
+
48
+
49
+ def _fetch_gdelt_articles(
50
+ client: httpx.Client,
51
+ *,
52
+ agent_id: str,
53
+ window: HistoricalCollectionWindow,
54
+ max_records_per_query: int,
55
+ ) -> list[CollectedHistoricalArticle]:
56
+ articles: list[CollectedHistoricalArticle] = []
57
+ for profile in build_source_profiles_for_agent(agent_id):
58
+ query = build_gdelt_query(profile)
59
+ if not query:
60
+ continue
61
+ for month_window in iter_month_windows(window):
62
+ params = {
63
+ "query": query,
64
+ "mode": "artlist",
65
+ "format": "json",
66
+ "maxrecords": max_records_per_query,
67
+ "startdatetime": format_gdelt_datetime(month_window.start_date),
68
+ "enddatetime": format_gdelt_datetime(month_window.end_date - timedelta(days=1), end_of_day=True),
69
+ "sort": "datedesc",
70
+ }
71
+ response = client.get(GDELT_DOC_API, params=params)
72
+ response.raise_for_status()
73
+ payload = response.json()
74
+ for item in payload.get("articles", []):
75
+ url = str(item.get("url") or "").strip()
76
+ title = str(item.get("title") or "").strip()
77
+ seendate = str(item.get("seendate") or "").strip()
78
+ domain = str(item.get("domain") or "").strip()
79
+ if not url or not title or not seendate:
80
+ continue
81
+ timestamp = parse_gdelt_datetime(seendate)
82
+ if timestamp.date() < window.start_date or timestamp.date() >= window.end_date:
83
+ continue
84
+ articles.append(
85
+ CollectedHistoricalArticle(
86
+ article_id=build_article_id(url, timestamp),
87
+ agent_id=agent_id,
88
+ source_id=profile.source_id,
89
+ source_name=profile.source_name,
90
+ title=title,
91
+ url=url,
92
+ domain=domain or url.split("/")[2],
93
+ timestamp=timestamp,
94
+ query=query,
95
+ window_id=window.window_id,
96
+ tags=sorted(set([*profile.tags, *profile.query_terms[:3]])),
97
+ language=item.get("language"),
98
+ source_country=item.get("sourcecountry"),
99
+ )
100
+ )
101
+ return articles
102
+
103
+
104
+ def _collect_for_agent(
105
+ client: httpx.Client,
106
+ *,
107
+ agent_id: str,
108
+ windows: list[str],
109
+ output_dir: Path,
110
+ raw_dir: Path,
111
+ max_records_per_query: int,
112
+ max_events: int,
113
+ ) -> list[Path]:
114
+ written: list[Path] = []
115
+ for window_id in windows:
116
+ resolved_window = resolve_window(window_id, now=datetime.now(UTC))
117
+ articles = _fetch_gdelt_articles(
118
+ client,
119
+ agent_id=agent_id,
120
+ window=resolved_window,
121
+ max_records_per_query=max_records_per_query,
122
+ )
123
+ replay = build_replay_definition(
124
+ training_agent=agent_id,
125
+ window=resolved_window,
126
+ articles=articles,
127
+ max_events=max_events,
128
+ )
129
+ replay_path = output_dir / f"{replay.replay_id}.json"
130
+ raw_path = raw_dir / f"{replay.replay_id}.articles.jsonl"
131
+ dump_replay_definition(replay_path, replay)
132
+ dump_raw_articles(raw_path, articles)
133
+ written.append(replay_path)
134
+ return written
135
+
136
+
137
+ def main() -> None:
138
+ args = _parse_args()
139
+ output_dir = Path(args.output_dir)
140
+ raw_dir = Path(args.raw_dir)
141
+ agent_ids = list(AGENT_IDS) if args.training_agent == "all" else [args.training_agent]
142
+
143
+ with httpx.Client(timeout=args.timeout_seconds, headers={"User-Agent": "trenches-historical-collector/0.1"}) as client:
144
+ written: list[Path] = []
145
+ for agent_id in agent_ids:
146
+ written.extend(
147
+ _collect_for_agent(
148
+ client,
149
+ agent_id=agent_id,
150
+ windows=args.window,
151
+ output_dir=output_dir,
152
+ raw_dir=raw_dir,
153
+ max_records_per_query=args.max_records_per_query,
154
+ max_events=args.max_events,
155
+ )
156
+ )
157
+
158
+ for path in written:
159
+ print(path)
160
+
161
+
162
+ if __name__ == "__main__":
163
+ main()
backend/src/trenches_env/historical_replay.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from functools import lru_cache
5
+ from importlib.resources import files
6
+
7
+ from pydantic import BaseModel, Field
8
+
9
+ from trenches_env.models import EventSeverity, HistoricalEvent
10
+
11
+
12
+ class HistoricalReplayDefinition(BaseModel):
13
+ replay_id: str
14
+ name: str
15
+ description: str
16
+ training_agent: str = "us"
17
+ events: list[HistoricalEvent] = Field(default_factory=list)
18
+
19
+
20
+ SEVERITY_SCORES: dict[EventSeverity, float] = {
21
+ "low": 0.25,
22
+ "medium": 0.5,
23
+ "high": 0.75,
24
+ "critical": 1.0,
25
+ }
26
+
27
+ SEVERITY_ORDER: tuple[EventSeverity, ...] = ("low", "medium", "high", "critical")
28
+
29
+
30
+ @lru_cache(maxsize=1)
31
+ def _load_replays() -> dict[str, HistoricalReplayDefinition]:
32
+ # Scan both dirs: historical_replays/ (curated real data) and
33
+ # synthetic_historical_replays/ (synthetic seed data for smoke-testing).
34
+ replay_dirs = [
35
+ files("trenches_env").joinpath("historical_replays"),
36
+ files("trenches_env").joinpath("synthetic_historical_replays"),
37
+ ]
38
+ replays: dict[str, HistoricalReplayDefinition] = {}
39
+ for replay_dir in replay_dirs:
40
+ try:
41
+ children = list(replay_dir.iterdir())
42
+ except (FileNotFoundError, TypeError):
43
+ continue
44
+ for child in children:
45
+ if not str(child).endswith(".json"):
46
+ continue
47
+ payload = json.loads(child.read_text(encoding="utf-8"))
48
+ replay = HistoricalReplayDefinition.model_validate(payload)
49
+ replays[replay.replay_id] = replay
50
+ return replays
51
+
52
+
53
+ def list_historical_replays() -> list[HistoricalReplayDefinition]:
54
+ return [replay.model_copy(deep=True) for replay in _load_replays().values()]
55
+
56
+
57
+ def get_historical_replay(replay_id: str) -> HistoricalReplayDefinition:
58
+ replay = _load_replays().get(replay_id)
59
+ if replay is None:
60
+ raise KeyError(replay_id)
61
+ return replay.model_copy(deep=True)
62
+
63
+
64
+ def default_replay_id_for_agent(agent_id: str) -> str | None:
65
+ for replay in _load_replays().values():
66
+ if replay.training_agent == agent_id:
67
+ return replay.replay_id
68
+ return None
69
+
70
+
71
+ def severity_score(severity: EventSeverity) -> float:
72
+ return SEVERITY_SCORES[severity]
73
+
74
+
75
+ def severity_distance(expected: EventSeverity, actual: EventSeverity) -> int:
76
+ return abs(SEVERITY_ORDER.index(expected) - SEVERITY_ORDER.index(actual))
backend/src/trenches_env/historical_replays/gulf_2025_events.json ADDED
The diff for this file is too large to render. See raw diff
 
backend/src/trenches_env/historical_replays/hezbollah_2025_events.json ADDED
@@ -0,0 +1,1993 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "replay_id": "hezbollah_2025_events",
3
+ "name": "HEZBOLLAH Real Events 2025",
4
+ "description": "Real historical events from 2025 relevant to the hezbollah entity. Source: Reubencf/2025_events HuggingFace dataset (68 events). Impact values are heuristic \u2014 curator review recommended before production training.",
5
+ "training_agent": "hezbollah",
6
+ "events": [
7
+ {
8
+ "event_id": "evt-2025-jan-07-d7a7aa3e",
9
+ "timestamp": "2025-01-07T12:00:00Z",
10
+ "topic": "security",
11
+ "region": "levant",
12
+ "actors": [
13
+ "hezbollah",
14
+ "gulf"
15
+ ],
16
+ "targets": [
17
+ "general"
18
+ ],
19
+ "severity": "medium",
20
+ "summary": "Lebanon approves the extradition of Abdul Rahman Yusuf, the son of late Egyptian ulama Yusuf al-Qaradawi, to the United Arab Emirates following his detention on December 28 after returning from Syria.",
21
+ "public_summary": "Lebanon approves the extradition of Abdul Rahman Yusuf, the son of late Egyptian ulama Yusuf al-Qaradawi, to the United Arab Emirates following his detention on December 28 after returning from Syria.",
22
+ "source_type": "hf_2025_events",
23
+ "confirmed": true,
24
+ "tags": [
25
+ "security",
26
+ "law_and_crime",
27
+ "levant"
28
+ ],
29
+ "impact": {
30
+ "tension_delta": 2.0,
31
+ "market_stress_delta": 1.0,
32
+ "oil_pressure_delta": 0.5,
33
+ "actor_metric_deltas": {}
34
+ }
35
+ },
36
+ {
37
+ "event_id": "evt-2025-jan-09-03df61dc",
38
+ "timestamp": "2025-01-09T12:00:00Z",
39
+ "topic": "domestic",
40
+ "region": "levant",
41
+ "actors": [
42
+ "hezbollah",
43
+ "oversight"
44
+ ],
45
+ "targets": [
46
+ "civilians"
47
+ ],
48
+ "severity": "medium",
49
+ "summary": "2022\u20132025 Lebanese presidential election Military commander Joseph Aoun is elected as President of Lebanon, ending the power vacuum that began in October 2022.",
50
+ "public_summary": "2022\u20132025 Lebanese presidential election Military commander Joseph Aoun is elected as President of Lebanon, ending the power vacuum that began in October 2022.",
51
+ "source_type": "hf_2025_events",
52
+ "confirmed": true,
53
+ "tags": [
54
+ "domestic",
55
+ "politics_and_elections",
56
+ "levant"
57
+ ],
58
+ "impact": {
59
+ "tension_delta": 1.0,
60
+ "market_stress_delta": 1.5,
61
+ "oil_pressure_delta": 0.5,
62
+ "actor_metric_deltas": {}
63
+ }
64
+ },
65
+ {
66
+ "event_id": "evt-2025-jan-11-6823bb50",
67
+ "timestamp": "2025-01-11T12:00:00Z",
68
+ "topic": "diplomacy",
69
+ "region": "levant",
70
+ "actors": [
71
+ "hezbollah"
72
+ ],
73
+ "targets": [
74
+ "general"
75
+ ],
76
+ "severity": "medium",
77
+ "summary": "Lebanese prime minister Najib Mikati and Syrian de facto leader Ahmed al-Sharaa hold a meeting in Damascus, Syria, the first time that the two countries have met since the Syrian civil war began in 2011. (24 Digital) (MTV)",
78
+ "public_summary": "Lebanese prime minister Najib Mikati and Syrian de facto leader Ahmed al-Sharaa hold a meeting in Damascus, Syria, the first time that the two countries have met since the Syrian civil war began in 20",
79
+ "source_type": "hf_2025_events",
80
+ "confirmed": true,
81
+ "tags": [
82
+ "diplomacy",
83
+ "international_relations",
84
+ "levant"
85
+ ],
86
+ "impact": {
87
+ "tension_delta": -3.0,
88
+ "market_stress_delta": -2.0,
89
+ "oil_pressure_delta": -1.0,
90
+ "actor_metric_deltas": {}
91
+ }
92
+ },
93
+ {
94
+ "event_id": "evt-2025-jan-13-cc2bf47a",
95
+ "timestamp": "2025-01-13T12:00:00Z",
96
+ "topic": "diplomacy",
97
+ "region": "levant",
98
+ "actors": [
99
+ "israel",
100
+ "hezbollah",
101
+ "oversight"
102
+ ],
103
+ "targets": [
104
+ "general"
105
+ ],
106
+ "severity": "high",
107
+ "summary": "2024 Israel\u2013Lebanon ceasefire agreement The Israel Defense Forces strike several Hezbollah sites in Lebanon, alleging that the targets violated the terms of the ceasefire agreement. (The Times of Israel)",
108
+ "public_summary": "2024 Israel\u2013Lebanon ceasefire agreement The Israel Defense Forces strike several Hezbollah sites in Lebanon, alleging that the targets violated the terms of the ceasefire agreement. (The Times of Isra",
109
+ "source_type": "hf_2025_events",
110
+ "confirmed": true,
111
+ "tags": [
112
+ "diplomacy",
113
+ "armed_conflicts_and_attacks",
114
+ "levant"
115
+ ],
116
+ "impact": {
117
+ "tension_delta": -4.5,
118
+ "market_stress_delta": -3.0,
119
+ "oil_pressure_delta": -1.5,
120
+ "actor_metric_deltas": {}
121
+ }
122
+ },
123
+ {
124
+ "event_id": "evt-2025-jan-13-d53bb5ad",
125
+ "timestamp": "2025-01-13T12:00:00Z",
126
+ "topic": "domestic",
127
+ "region": "levant",
128
+ "actors": [
129
+ "hezbollah",
130
+ "oversight"
131
+ ],
132
+ "targets": [
133
+ "civilians",
134
+ "government"
135
+ ],
136
+ "severity": "medium",
137
+ "summary": "International Court of Justice President Nawaf Salam becomes the prime minister-designate of Lebanon after winning an absolute majority of votes from parliament, while stepping down from the ICJ. (TVN24)",
138
+ "public_summary": "International Court of Justice President Nawaf Salam becomes the prime minister-designate of Lebanon after winning an absolute majority of votes from parliament, while stepping down from the ICJ. (TVN",
139
+ "source_type": "hf_2025_events",
140
+ "confirmed": true,
141
+ "tags": [
142
+ "domestic",
143
+ "politics_and_elections",
144
+ "levant"
145
+ ],
146
+ "impact": {
147
+ "tension_delta": 1.0,
148
+ "market_stress_delta": 1.5,
149
+ "oil_pressure_delta": 0.5,
150
+ "actor_metric_deltas": {}
151
+ }
152
+ },
153
+ {
154
+ "event_id": "evt-2025-jan-26-36fdad2c",
155
+ "timestamp": "2025-01-26T12:00:00Z",
156
+ "topic": "diplomacy",
157
+ "region": "levant",
158
+ "actors": [
159
+ "israel",
160
+ "hezbollah",
161
+ "oversight"
162
+ ],
163
+ "targets": [
164
+ "civilians"
165
+ ],
166
+ "severity": "high",
167
+ "summary": "2024 Israel\u2013Lebanon ceasefire agreement January 2025 southern Lebanon attack Israeli soldiers open fire against the Lebanese Army and civilians in two villages in southern Lebanon, killing 22 people and wounding at least 124 more. (The Times of Israel)",
168
+ "public_summary": "2024 Israel\u2013Lebanon ceasefire agreement January 2025 southern Lebanon attack Israeli soldiers open fire against the Lebanese Army and civilians in two villages in southern Lebanon, killing 22 people a",
169
+ "source_type": "hf_2025_events",
170
+ "confirmed": true,
171
+ "tags": [
172
+ "diplomacy",
173
+ "armed_conflicts_and_attacks",
174
+ "levant"
175
+ ],
176
+ "impact": {
177
+ "tension_delta": -4.5,
178
+ "market_stress_delta": -3.0,
179
+ "oil_pressure_delta": -1.5,
180
+ "actor_metric_deltas": {}
181
+ }
182
+ },
183
+ {
184
+ "event_id": "evt-2025-jan-26-294ead53",
185
+ "timestamp": "2025-01-26T12:00:00Z",
186
+ "topic": "diplomacy",
187
+ "region": "levant",
188
+ "actors": [
189
+ "us",
190
+ "israel",
191
+ "hezbollah",
192
+ "oversight"
193
+ ],
194
+ "targets": [
195
+ "general"
196
+ ],
197
+ "severity": "high",
198
+ "summary": "2024 Israel\u2013Lebanon ceasefire agreement The United States announces that the ceasefire between Israel and Lebanon will be extended to February 18. (Axios)",
199
+ "public_summary": "2024 Israel\u2013Lebanon ceasefire agreement The United States announces that the ceasefire between Israel and Lebanon will be extended to February 18. (Axios)",
200
+ "source_type": "hf_2025_events",
201
+ "confirmed": true,
202
+ "tags": [
203
+ "diplomacy",
204
+ "armed_conflicts_and_attacks",
205
+ "levant"
206
+ ],
207
+ "impact": {
208
+ "tension_delta": -4.5,
209
+ "market_stress_delta": -3.0,
210
+ "oil_pressure_delta": -1.5,
211
+ "actor_metric_deltas": {}
212
+ }
213
+ },
214
+ {
215
+ "event_id": "evt-2025-jan-31-5d3ec117",
216
+ "timestamp": "2025-01-31T12:00:00Z",
217
+ "topic": "diplomacy",
218
+ "region": "levant",
219
+ "actors": [
220
+ "israel",
221
+ "hezbollah",
222
+ "oversight"
223
+ ],
224
+ "targets": [
225
+ "general"
226
+ ],
227
+ "severity": "high",
228
+ "summary": "2024 Israel\u2013Lebanon ceasefire agreement Two people are killed and ten others are injured during Israeli airstrikes in the Beqaa Valley, Lebanon.",
229
+ "public_summary": "2024 Israel\u2013Lebanon ceasefire agreement Two people are killed and ten others are injured during Israeli airstrikes in the Beqaa Valley, Lebanon.",
230
+ "source_type": "hf_2025_events",
231
+ "confirmed": true,
232
+ "tags": [
233
+ "diplomacy",
234
+ "armed_conflicts_and_attacks",
235
+ "levant"
236
+ ],
237
+ "impact": {
238
+ "tension_delta": -4.5,
239
+ "market_stress_delta": -3.0,
240
+ "oil_pressure_delta": -1.5,
241
+ "actor_metric_deltas": {}
242
+ }
243
+ },
244
+ {
245
+ "event_id": "evt-2025-feb-08-42563620",
246
+ "timestamp": "2025-02-08T12:00:00Z",
247
+ "topic": "border",
248
+ "region": "levant",
249
+ "actors": [
250
+ "hezbollah"
251
+ ],
252
+ "targets": [
253
+ "northern_front"
254
+ ],
255
+ "severity": "high",
256
+ "summary": "Four people are killed in border clashes between Syrian Hay'at Tahrir al-Sham and Lebanese clans. (Al-Monitor)",
257
+ "public_summary": "Four people are killed in border clashes between Syrian Hay'at Tahrir al-Sham and Lebanese clans. (Al-Monitor)",
258
+ "source_type": "hf_2025_events",
259
+ "confirmed": true,
260
+ "tags": [
261
+ "border",
262
+ "armed_conflicts_and_attacks",
263
+ "levant"
264
+ ],
265
+ "impact": {
266
+ "tension_delta": 7.5,
267
+ "market_stress_delta": 1.5,
268
+ "oil_pressure_delta": 0.8,
269
+ "actor_metric_deltas": {}
270
+ }
271
+ },
272
+ {
273
+ "event_id": "evt-2025-feb-08-221a1303",
274
+ "timestamp": "2025-02-08T12:00:00Z",
275
+ "topic": "domestic",
276
+ "region": "levant",
277
+ "actors": [
278
+ "hezbollah"
279
+ ],
280
+ "targets": [
281
+ "civilians",
282
+ "government"
283
+ ],
284
+ "severity": "medium",
285
+ "summary": "A new government is formed in Lebanon, with former International Court of Justice president Nawaf Salam as the new prime minister, following two years of the country under a caretaker government.",
286
+ "public_summary": "A new government is formed in Lebanon, with former International Court of Justice president Nawaf Salam as the new prime minister, following two years of the country under a caretaker government.",
287
+ "source_type": "hf_2025_events",
288
+ "confirmed": true,
289
+ "tags": [
290
+ "domestic",
291
+ "politics_and_elections",
292
+ "levant"
293
+ ],
294
+ "impact": {
295
+ "tension_delta": 1.0,
296
+ "market_stress_delta": 1.5,
297
+ "oil_pressure_delta": 0.5,
298
+ "actor_metric_deltas": {}
299
+ }
300
+ },
301
+ {
302
+ "event_id": "evt-2025-feb-09-10b945ba",
303
+ "timestamp": "2025-02-09T12:00:00Z",
304
+ "topic": "military",
305
+ "region": "levant",
306
+ "actors": [
307
+ "israel",
308
+ "hezbollah"
309
+ ],
310
+ "targets": [
311
+ "general"
312
+ ],
313
+ "severity": "high",
314
+ "summary": "Six people are killed and two others are wounded in Israeli airstrikes near Jannata, South Governorate, Lebanon.",
315
+ "public_summary": "Six people are killed and two others are wounded in Israeli airstrikes near Jannata, South Governorate, Lebanon.",
316
+ "source_type": "hf_2025_events",
317
+ "confirmed": true,
318
+ "tags": [
319
+ "military",
320
+ "armed_conflicts_and_attacks",
321
+ "levant"
322
+ ],
323
+ "impact": {
324
+ "tension_delta": 9.0,
325
+ "market_stress_delta": 3.8,
326
+ "oil_pressure_delta": 2.2,
327
+ "actor_metric_deltas": {}
328
+ }
329
+ },
330
+ {
331
+ "event_id": "evt-2025-feb-15-d2808a36",
332
+ "timestamp": "2025-02-15T12:00:00Z",
333
+ "topic": "diplomacy",
334
+ "region": "levant",
335
+ "actors": [
336
+ "hezbollah",
337
+ "oversight"
338
+ ],
339
+ "targets": [
340
+ "infrastructure"
341
+ ],
342
+ "severity": "medium",
343
+ "summary": "2025 Beirut attack on UN convoy The Lebanese Armed Forces arrest over 25 people on suspicion of attacking a UNIFIL convoy and United Nations peacekeepers near Rafic Hariri International Airport in Beirut.",
344
+ "public_summary": "2025 Beirut attack on UN convoy The Lebanese Armed Forces arrest over 25 people on suspicion of attacking a UNIFIL convoy and United Nations peacekeepers near Rafic Hariri International Airport in Bei",
345
+ "source_type": "hf_2025_events",
346
+ "confirmed": true,
347
+ "tags": [
348
+ "diplomacy",
349
+ "law_and_crime",
350
+ "levant"
351
+ ],
352
+ "impact": {
353
+ "tension_delta": -3.0,
354
+ "market_stress_delta": -2.0,
355
+ "oil_pressure_delta": -1.0,
356
+ "actor_metric_deltas": {}
357
+ }
358
+ },
359
+ {
360
+ "event_id": "evt-2025-feb-17-0eae2220",
361
+ "timestamp": "2025-02-17T12:00:00Z",
362
+ "topic": "diplomacy",
363
+ "region": "levant",
364
+ "actors": [
365
+ "israel",
366
+ "hezbollah",
367
+ "oversight"
368
+ ],
369
+ "targets": [
370
+ "general"
371
+ ],
372
+ "severity": "high",
373
+ "summary": "2024 Israel\u2013Lebanon ceasefire agreement Mohammad Shahin, a senior Hamas commander, is assassinated in an Israeli airstrike on a vehicle in Sidon, Lebanon. (Anadolu Ajans\u0131)",
374
+ "public_summary": "2024 Israel\u2013Lebanon ceasefire agreement Mohammad Shahin, a senior Hamas commander, is assassinated in an Israeli airstrike on a vehicle in Sidon, Lebanon. (Anadolu Ajans\u0131)",
375
+ "source_type": "hf_2025_events",
376
+ "confirmed": true,
377
+ "tags": [
378
+ "diplomacy",
379
+ "armed_conflicts_and_attacks",
380
+ "levant"
381
+ ],
382
+ "impact": {
383
+ "tension_delta": -4.5,
384
+ "market_stress_delta": -3.0,
385
+ "oil_pressure_delta": -1.5,
386
+ "actor_metric_deltas": {}
387
+ }
388
+ },
389
+ {
390
+ "event_id": "evt-2025-feb-18-5cf1ac14",
391
+ "timestamp": "2025-02-18T12:00:00Z",
392
+ "topic": "conflict",
393
+ "region": "levant",
394
+ "actors": [
395
+ "israel",
396
+ "hezbollah"
397
+ ],
398
+ "targets": [
399
+ "general"
400
+ ],
401
+ "severity": "high",
402
+ "summary": "Israel partially withdraws troops from Lebanon as the deadline for their withdrawal expires.",
403
+ "public_summary": "Israel partially withdraws troops from Lebanon as the deadline for their withdrawal expires.",
404
+ "source_type": "hf_2025_events",
405
+ "confirmed": true,
406
+ "tags": [
407
+ "conflict",
408
+ "armed_conflicts_and_attacks",
409
+ "levant"
410
+ ],
411
+ "impact": {
412
+ "tension_delta": 7.5,
413
+ "market_stress_delta": 3.0,
414
+ "oil_pressure_delta": 1.5,
415
+ "actor_metric_deltas": {}
416
+ }
417
+ },
418
+ {
419
+ "event_id": "evt-2025-feb-23-996c089e",
420
+ "timestamp": "2025-02-23T12:00:00Z",
421
+ "topic": "military",
422
+ "region": "levant",
423
+ "actors": [
424
+ "israel",
425
+ "hezbollah"
426
+ ],
427
+ "targets": [
428
+ "general"
429
+ ],
430
+ "severity": "high",
431
+ "summary": "Israel launches airstrikes in the Baalbek area in northeastern Lebanon and other areas in the south of the country. (Times of Israel)",
432
+ "public_summary": "Israel launches airstrikes in the Baalbek area in northeastern Lebanon and other areas in the south of the country. (Times of Israel)",
433
+ "source_type": "hf_2025_events",
434
+ "confirmed": true,
435
+ "tags": [
436
+ "military",
437
+ "armed_conflicts_and_attacks",
438
+ "levant"
439
+ ],
440
+ "impact": {
441
+ "tension_delta": 9.0,
442
+ "market_stress_delta": 3.8,
443
+ "oil_pressure_delta": 2.2,
444
+ "actor_metric_deltas": {}
445
+ }
446
+ },
447
+ {
448
+ "event_id": "evt-2025-feb-23-32a876cf",
449
+ "timestamp": "2025-02-23T12:00:00Z",
450
+ "topic": "domestic",
451
+ "region": "levant",
452
+ "actors": [
453
+ "hezbollah",
454
+ "oversight"
455
+ ],
456
+ "targets": [
457
+ "infrastructure"
458
+ ],
459
+ "severity": "medium",
460
+ "summary": "Funeral of Hassan Nasrallah and Hashem Safieddine The joint funeral for Hezbollah leaders Hassan Nasrallah and Hashem Safieddine is held at the Camille Chamoun Sports City Stadium in Beirut, Lebanon.",
461
+ "public_summary": "Funeral of Hassan Nasrallah and Hashem Safieddine The joint funeral for Hezbollah leaders Hassan Nasrallah and Hashem Safieddine is held at the Camille Chamoun Sports City Stadium in Beirut, Lebanon.",
462
+ "source_type": "hf_2025_events",
463
+ "confirmed": true,
464
+ "tags": [
465
+ "domestic",
466
+ "politics_and_elections",
467
+ "levant"
468
+ ],
469
+ "impact": {
470
+ "tension_delta": 1.0,
471
+ "market_stress_delta": 1.5,
472
+ "oil_pressure_delta": 0.5,
473
+ "actor_metric_deltas": {}
474
+ }
475
+ },
476
+ {
477
+ "event_id": "evt-2025-mar-04-bf37eb45",
478
+ "timestamp": "2025-03-04T12:00:00Z",
479
+ "topic": "military",
480
+ "region": "levant",
481
+ "actors": [
482
+ "israel",
483
+ "hezbollah"
484
+ ],
485
+ "targets": [
486
+ "general"
487
+ ],
488
+ "severity": "high",
489
+ "summary": "An Israeli drone strike in Tyre, Lebanon, kills Haidar Hashem, the head of naval forces in Hezbollah's Radwan Force. (Times of Israel)",
490
+ "public_summary": "An Israeli drone strike in Tyre, Lebanon, kills Haidar Hashem, the head of naval forces in Hezbollah's Radwan Force. (Times of Israel)",
491
+ "source_type": "hf_2025_events",
492
+ "confirmed": true,
493
+ "tags": [
494
+ "military",
495
+ "armed_conflicts_and_attacks",
496
+ "levant"
497
+ ],
498
+ "impact": {
499
+ "tension_delta": 9.0,
500
+ "market_stress_delta": 3.8,
501
+ "oil_pressure_delta": 2.2,
502
+ "actor_metric_deltas": {}
503
+ }
504
+ },
505
+ {
506
+ "event_id": "evt-2025-mar-17-c455ec67",
507
+ "timestamp": "2025-03-17T12:00:00Z",
508
+ "topic": "border",
509
+ "region": "levant",
510
+ "actors": [
511
+ "hezbollah"
512
+ ],
513
+ "targets": [
514
+ "government",
515
+ "northern_front"
516
+ ],
517
+ "severity": "high",
518
+ "summary": "Hezbollah involvement in the Syrian civil war, Hezbollah\u2013Syria relations Hezbollah\u2013Syria clashes, Lebanon\u2013Syria border clashes According to Syria's state media, clashes occur at the Lebanon\u2013Syria border, after the Syrian transitional government accused Hezbollah militants of kidnapping three soldier",
519
+ "public_summary": "Hezbollah involvement in the Syrian civil war, Hezbollah\u2013Syria relations Hezbollah\u2013Syria clashes, Lebanon\u2013Syria border clashes According to Syria's state media, clashes occur at the Lebanon\u2013Syria bord",
520
+ "source_type": "hf_2025_events",
521
+ "confirmed": true,
522
+ "tags": [
523
+ "border",
524
+ "armed_conflicts_and_attacks",
525
+ "levant"
526
+ ],
527
+ "impact": {
528
+ "tension_delta": 7.5,
529
+ "market_stress_delta": 1.5,
530
+ "oil_pressure_delta": 0.8,
531
+ "actor_metric_deltas": {}
532
+ }
533
+ },
534
+ {
535
+ "event_id": "evt-2025-mar-22-8beb634e",
536
+ "timestamp": "2025-03-22T12:00:00Z",
537
+ "topic": "diplomacy",
538
+ "region": "levant",
539
+ "actors": [
540
+ "israel",
541
+ "hezbollah",
542
+ "oversight"
543
+ ],
544
+ "targets": [
545
+ "general"
546
+ ],
547
+ "severity": "high",
548
+ "summary": "Six people, including a child, are killed by Israeli airstrikes in Lebanon in the heaviest exchange of fire since the ceasefire with the Lebanese militant group Hezbollah almost four months ago. The strikes were carried out in retaliation for rockets from Lebanon being fired into Israel. (CTV News)",
549
+ "public_summary": "Six people, including a child, are killed by Israeli airstrikes in Lebanon in the heaviest exchange of fire since the ceasefire with the Lebanese militant group Hezbollah almost four months ago. The s",
550
+ "source_type": "hf_2025_events",
551
+ "confirmed": true,
552
+ "tags": [
553
+ "diplomacy",
554
+ "armed_conflicts_and_attacks",
555
+ "levant"
556
+ ],
557
+ "impact": {
558
+ "tension_delta": -4.5,
559
+ "market_stress_delta": -3.0,
560
+ "oil_pressure_delta": -1.5,
561
+ "actor_metric_deltas": {}
562
+ }
563
+ },
564
+ {
565
+ "event_id": "evt-2025-mar-28-784c03ba",
566
+ "timestamp": "2025-03-28T12:00:00Z",
567
+ "topic": "diplomacy",
568
+ "region": "levant",
569
+ "actors": [
570
+ "israel",
571
+ "hezbollah",
572
+ "oversight"
573
+ ],
574
+ "targets": [
575
+ "general"
576
+ ],
577
+ "severity": "high",
578
+ "summary": "2024 Israel\u2013Lebanon ceasefire agreement The Israel Defense Forces strikes southern Beirut, Lebanon, for the first time since November 2024, violating the ceasefire agreement.",
579
+ "public_summary": "2024 Israel\u2013Lebanon ceasefire agreement The Israel Defense Forces strikes southern Beirut, Lebanon, for the first time since November 2024, violating the ceasefire agreement.",
580
+ "source_type": "hf_2025_events",
581
+ "confirmed": true,
582
+ "tags": [
583
+ "diplomacy",
584
+ "armed_conflicts_and_attacks",
585
+ "levant"
586
+ ],
587
+ "impact": {
588
+ "tension_delta": -4.5,
589
+ "market_stress_delta": -3.0,
590
+ "oil_pressure_delta": -1.5,
591
+ "actor_metric_deltas": {}
592
+ }
593
+ },
594
+ {
595
+ "event_id": "evt-2025-apr-01-abc80e69",
596
+ "timestamp": "2025-04-01T12:00:00Z",
597
+ "topic": "diplomacy",
598
+ "region": "levant",
599
+ "actors": [
600
+ "us",
601
+ "israel",
602
+ "hezbollah",
603
+ "oversight"
604
+ ],
605
+ "targets": [
606
+ "general"
607
+ ],
608
+ "severity": "high",
609
+ "summary": "2024 Israel\u2013Lebanon ceasefire agreement Four people are killed, including Hassan Bdeir, a high-ranking Hezbollah official, and seven others are injured by an Israeli airstrike on a building in Dahieh, Beirut, Lebanon.",
610
+ "public_summary": "2024 Israel\u2013Lebanon ceasefire agreement Four people are killed, including Hassan Bdeir, a high-ranking Hezbollah official, and seven others are injured by an Israeli airstrike on a building in Dahieh,",
611
+ "source_type": "hf_2025_events",
612
+ "confirmed": true,
613
+ "tags": [
614
+ "diplomacy",
615
+ "armed_conflicts_and_attacks",
616
+ "levant"
617
+ ],
618
+ "impact": {
619
+ "tension_delta": -4.5,
620
+ "market_stress_delta": -3.0,
621
+ "oil_pressure_delta": -1.5,
622
+ "actor_metric_deltas": {}
623
+ }
624
+ },
625
+ {
626
+ "event_id": "evt-2025-apr-16-a1545048",
627
+ "timestamp": "2025-04-16T12:00:00Z",
628
+ "topic": "security",
629
+ "region": "levant",
630
+ "actors": [
631
+ "israel",
632
+ "hezbollah"
633
+ ],
634
+ "targets": [
635
+ "general"
636
+ ],
637
+ "severity": "high",
638
+ "summary": "The Lebanese military detains a group of people, including several Palestinians, for firing rockets towards Israel in two separate attacks. Hezbollah denies their involvement in the rocket attacks.",
639
+ "public_summary": "The Lebanese military detains a group of people, including several Palestinians, for firing rockets towards Israel in two separate attacks. Hezbollah denies their involvement in the rocket attacks.",
640
+ "source_type": "hf_2025_events",
641
+ "confirmed": true,
642
+ "tags": [
643
+ "security",
644
+ "law_and_crime",
645
+ "levant"
646
+ ],
647
+ "impact": {
648
+ "tension_delta": 3.0,
649
+ "market_stress_delta": 1.5,
650
+ "oil_pressure_delta": 0.8,
651
+ "actor_metric_deltas": {}
652
+ }
653
+ },
654
+ {
655
+ "event_id": "evt-2025-apr-27-6e271ef9",
656
+ "timestamp": "2025-04-27T12:00:00Z",
657
+ "topic": "diplomacy",
658
+ "region": "levant",
659
+ "actors": [
660
+ "israel",
661
+ "hezbollah",
662
+ "oversight"
663
+ ],
664
+ "targets": [
665
+ "infrastructure"
666
+ ],
667
+ "severity": "high",
668
+ "summary": "Israel carries out airstrikes on several suburbs of Beirut, Lebanon, despite a ceasefire agreement following the 2024 invasion. The Lebanese Civil Defense reports no casualties.",
669
+ "public_summary": "Israel carries out airstrikes on several suburbs of Beirut, Lebanon, despite a ceasefire agreement following the 2024 invasion. The Lebanese Civil Defense reports no casualties.",
670
+ "source_type": "hf_2025_events",
671
+ "confirmed": true,
672
+ "tags": [
673
+ "diplomacy",
674
+ "armed_conflicts_and_attacks",
675
+ "levant"
676
+ ],
677
+ "impact": {
678
+ "tension_delta": -4.5,
679
+ "market_stress_delta": -3.0,
680
+ "oil_pressure_delta": -1.5,
681
+ "actor_metric_deltas": {}
682
+ }
683
+ },
684
+ {
685
+ "event_id": "evt-2025-may-04-4ef8e5be",
686
+ "timestamp": "2025-05-04T12:00:00Z",
687
+ "topic": "domestic",
688
+ "region": "levant",
689
+ "actors": [
690
+ "hezbollah"
691
+ ],
692
+ "targets": [
693
+ "general"
694
+ ],
695
+ "severity": "medium",
696
+ "summary": "2025 Lebanese municipal elections First round of voting in the Lebanese municipal elections begin in the Mount Lebanon Governorate and Keserwan-Jbeil Governorate. (NBC News)",
697
+ "public_summary": "2025 Lebanese municipal elections First round of voting in the Lebanese municipal elections begin in the Mount Lebanon Governorate and Keserwan-Jbeil Governorate. (NBC News)",
698
+ "source_type": "hf_2025_events",
699
+ "confirmed": true,
700
+ "tags": [
701
+ "domestic",
702
+ "politics_and_elections",
703
+ "levant"
704
+ ],
705
+ "impact": {
706
+ "tension_delta": 1.0,
707
+ "market_stress_delta": 1.5,
708
+ "oil_pressure_delta": 0.5,
709
+ "actor_metric_deltas": {}
710
+ }
711
+ },
712
+ {
713
+ "event_id": "evt-2025-may-11-453debd8",
714
+ "timestamp": "2025-05-11T12:00:00Z",
715
+ "topic": "conflict",
716
+ "region": "levant",
717
+ "actors": [
718
+ "israel",
719
+ "hezbollah"
720
+ ],
721
+ "targets": [
722
+ "general"
723
+ ],
724
+ "severity": "medium",
725
+ "summary": "The body of Israel Defense Forces soldier Zvi Feldman, missing since the 1982 Lebanon War, is repatriated to Israel from \"deep inside Syria\" in a joint Mossad\u2013IDF operation. (The Times of Israel)",
726
+ "public_summary": "The body of Israel Defense Forces soldier Zvi Feldman, missing since the 1982 Lebanon War, is repatriated to Israel from \"deep inside Syria\" in a joint Mossad\u2013IDF operation. (The Times of Israel)",
727
+ "source_type": "hf_2025_events",
728
+ "confirmed": true,
729
+ "tags": [
730
+ "conflict",
731
+ "armed_conflicts_and_attacks",
732
+ "levant"
733
+ ],
734
+ "impact": {
735
+ "tension_delta": 5.0,
736
+ "market_stress_delta": 2.0,
737
+ "oil_pressure_delta": 1.0,
738
+ "actor_metric_deltas": {}
739
+ }
740
+ },
741
+ {
742
+ "event_id": "evt-2025-may-14-3fd3730d",
743
+ "timestamp": "2025-05-14T12:00:00Z",
744
+ "topic": "military",
745
+ "region": "levant",
746
+ "actors": [
747
+ "israel",
748
+ "hezbollah"
749
+ ],
750
+ "targets": [
751
+ "general"
752
+ ],
753
+ "severity": "high",
754
+ "summary": "The Israel Defense Forces claim that a drone strike in Qaaqaait al-Jisr, Nabatieh Governorate, Lebanon, killed a Hezbollah commander. (The Times of Israel)",
755
+ "public_summary": "The Israel Defense Forces claim that a drone strike in Qaaqaait al-Jisr, Nabatieh Governorate, Lebanon, killed a Hezbollah commander. (The Times of Israel)",
756
+ "source_type": "hf_2025_events",
757
+ "confirmed": true,
758
+ "tags": [
759
+ "military",
760
+ "armed_conflicts_and_attacks",
761
+ "levant"
762
+ ],
763
+ "impact": {
764
+ "tension_delta": 9.0,
765
+ "market_stress_delta": 3.8,
766
+ "oil_pressure_delta": 2.2,
767
+ "actor_metric_deltas": {}
768
+ }
769
+ },
770
+ {
771
+ "event_id": "evt-2025-jun-03-8ce77e7e",
772
+ "timestamp": "2025-06-03T12:00:00Z",
773
+ "topic": "security",
774
+ "region": "levant",
775
+ "actors": [
776
+ "hezbollah",
777
+ "oversight"
778
+ ],
779
+ "targets": [
780
+ "general"
781
+ ],
782
+ "severity": "high",
783
+ "summary": "A court in Stuttgart, Germany, sentences a Syrian man to life in prison under universal jurisdiction for leading a Hezbollah-backed group and committing alleged war crimes against Sunni Muslims in Busra al-Sham, Syria, during the Syrian civil war. (DW)",
784
+ "public_summary": "A court in Stuttgart, Germany, sentences a Syrian man to life in prison under universal jurisdiction for leading a Hezbollah-backed group and committing alleged war crimes against Sunni Muslims in Bus",
785
+ "source_type": "hf_2025_events",
786
+ "confirmed": true,
787
+ "tags": [
788
+ "security",
789
+ "law_and_crime",
790
+ "levant"
791
+ ],
792
+ "impact": {
793
+ "tension_delta": 3.0,
794
+ "market_stress_delta": 1.5,
795
+ "oil_pressure_delta": 0.8,
796
+ "actor_metric_deltas": {}
797
+ }
798
+ },
799
+ {
800
+ "event_id": "evt-2025-jun-05-284cb00e",
801
+ "timestamp": "2025-06-05T12:00:00Z",
802
+ "topic": "military",
803
+ "region": "levant",
804
+ "actors": [
805
+ "us",
806
+ "israel",
807
+ "hezbollah"
808
+ ],
809
+ "targets": [
810
+ "general"
811
+ ],
812
+ "severity": "high",
813
+ "summary": "Israeli Air Force jets launch airstrikes on southern Beirut, Lebanon, targeting alleged underground Hezbollah drone manufacturing facilities. (T\u00fcrkiye Today) (Al-Monitor)",
814
+ "public_summary": "Israeli Air Force jets launch airstrikes on southern Beirut, Lebanon, targeting alleged underground Hezbollah drone manufacturing facilities. (T\u00fcrkiye Today) (Al-Monitor)",
815
+ "source_type": "hf_2025_events",
816
+ "confirmed": true,
817
+ "tags": [
818
+ "military",
819
+ "armed_conflicts_and_attacks",
820
+ "levant"
821
+ ],
822
+ "impact": {
823
+ "tension_delta": 9.0,
824
+ "market_stress_delta": 3.8,
825
+ "oil_pressure_delta": 2.2,
826
+ "actor_metric_deltas": {}
827
+ }
828
+ },
829
+ {
830
+ "event_id": "evt-2025-jun-28-1c0ebb59",
831
+ "timestamp": "2025-06-28T12:00:00Z",
832
+ "topic": "conflict",
833
+ "region": "levant",
834
+ "actors": [
835
+ "israel",
836
+ "hezbollah"
837
+ ],
838
+ "targets": [
839
+ "general"
840
+ ],
841
+ "severity": "high",
842
+ "summary": "Three people are killed by Israeli attacks on multiple vehicles in southern Lebanon.",
843
+ "public_summary": "Three people are killed by Israeli attacks on multiple vehicles in southern Lebanon.",
844
+ "source_type": "hf_2025_events",
845
+ "confirmed": true,
846
+ "tags": [
847
+ "conflict",
848
+ "armed_conflicts_and_attacks",
849
+ "levant"
850
+ ],
851
+ "impact": {
852
+ "tension_delta": 7.5,
853
+ "market_stress_delta": 3.0,
854
+ "oil_pressure_delta": 1.5,
855
+ "actor_metric_deltas": {}
856
+ }
857
+ },
858
+ {
859
+ "event_id": "evt-2025-jul-06-599bfcd6",
860
+ "timestamp": "2025-07-06T12:00:00Z",
861
+ "topic": "military",
862
+ "region": "levant",
863
+ "actors": [
864
+ "israel",
865
+ "hezbollah"
866
+ ],
867
+ "targets": [
868
+ "general"
869
+ ],
870
+ "severity": "high",
871
+ "summary": "Israel conducts four drone strikes in Bint Jbeil, Shebaa, and Shaqra, in southern Lebanon over the weekend, killing one person and injuring several others.",
872
+ "public_summary": "Israel conducts four drone strikes in Bint Jbeil, Shebaa, and Shaqra, in southern Lebanon over the weekend, killing one person and injuring several others.",
873
+ "source_type": "hf_2025_events",
874
+ "confirmed": true,
875
+ "tags": [
876
+ "military",
877
+ "armed_conflicts_and_attacks",
878
+ "levant"
879
+ ],
880
+ "impact": {
881
+ "tension_delta": 9.0,
882
+ "market_stress_delta": 3.8,
883
+ "oil_pressure_delta": 2.2,
884
+ "actor_metric_deltas": {}
885
+ }
886
+ },
887
+ {
888
+ "event_id": "evt-2025-jul-08-ef9f612a",
889
+ "timestamp": "2025-07-08T12:00:00Z",
890
+ "topic": "military",
891
+ "region": "levant",
892
+ "actors": [
893
+ "israel",
894
+ "hezbollah"
895
+ ],
896
+ "targets": [
897
+ "general"
898
+ ],
899
+ "severity": "high",
900
+ "summary": "An Israeli airstrike kills at least three people and injures 13 others in an attack on Tripoli, Lebanon.",
901
+ "public_summary": "An Israeli airstrike kills at least three people and injures 13 others in an attack on Tripoli, Lebanon.",
902
+ "source_type": "hf_2025_events",
903
+ "confirmed": true,
904
+ "tags": [
905
+ "military",
906
+ "armed_conflicts_and_attacks",
907
+ "levant"
908
+ ],
909
+ "impact": {
910
+ "tension_delta": 9.0,
911
+ "market_stress_delta": 3.8,
912
+ "oil_pressure_delta": 2.2,
913
+ "actor_metric_deltas": {}
914
+ }
915
+ },
916
+ {
917
+ "event_id": "evt-2025-jul-15-373cb31b",
918
+ "timestamp": "2025-07-15T12:00:00Z",
919
+ "topic": "diplomacy",
920
+ "region": "levant",
921
+ "actors": [
922
+ "israel",
923
+ "hezbollah",
924
+ "oversight"
925
+ ],
926
+ "targets": [
927
+ "general"
928
+ ],
929
+ "severity": "high",
930
+ "summary": "At least 12 people are killed in Israeli air strikes in Lebanon's eastern Bekaa Valley. The missiles struck a number of military compounds belonging to the armed group Hezbollah, including training camps affiliated to its elite Radwan Force. These were the deadliest strikes since a ceasefire ended a",
931
+ "public_summary": "At least 12 people are killed in Israeli air strikes in Lebanon's eastern Bekaa Valley. The missiles struck a number of military compounds belonging to the armed group Hezbollah, including training ca",
932
+ "source_type": "hf_2025_events",
933
+ "confirmed": true,
934
+ "tags": [
935
+ "diplomacy",
936
+ "armed_conflicts_and_attacks",
937
+ "levant"
938
+ ],
939
+ "impact": {
940
+ "tension_delta": -4.5,
941
+ "market_stress_delta": -3.0,
942
+ "oil_pressure_delta": -1.5,
943
+ "actor_metric_deltas": {}
944
+ }
945
+ },
946
+ {
947
+ "event_id": "evt-2025-jul-24-911b9aa0",
948
+ "timestamp": "2025-07-24T12:00:00Z",
949
+ "topic": "security",
950
+ "region": "levant",
951
+ "actors": [
952
+ "israel",
953
+ "hezbollah"
954
+ ],
955
+ "targets": [
956
+ "infrastructure"
957
+ ],
958
+ "severity": "medium",
959
+ "summary": "Hungary\u2013Israel relations, Hungary\u2013United Kingdom relations Hungary bans Irish rap trio Kneecap for three years ahead of a music festival, citing a national security threat over their alleged support for Hamas and Hezbollah. (The Times of Israel)",
960
+ "public_summary": "Hungary\u2013Israel relations, Hungary\u2013United Kingdom relations Hungary bans Irish rap trio Kneecap for three years ahead of a music festival, citing a national security threat over their alleged support f",
961
+ "source_type": "hf_2025_events",
962
+ "confirmed": true,
963
+ "tags": [
964
+ "security",
965
+ "law_and_crime",
966
+ "levant"
967
+ ],
968
+ "impact": {
969
+ "tension_delta": 2.0,
970
+ "market_stress_delta": 1.0,
971
+ "oil_pressure_delta": 0.5,
972
+ "actor_metric_deltas": {}
973
+ }
974
+ },
975
+ {
976
+ "event_id": "evt-2025-jul-29-3e354f7d",
977
+ "timestamp": "2025-07-29T12:00:00Z",
978
+ "topic": "diplomacy",
979
+ "region": "levant",
980
+ "actors": [
981
+ "hezbollah",
982
+ "oversight"
983
+ ],
984
+ "targets": [
985
+ "general"
986
+ ],
987
+ "severity": "high",
988
+ "summary": "Lebanon convicts and sentences six people who are accused of killing a United Nations peacekeeper in Beirut in 2023.",
989
+ "public_summary": "Lebanon convicts and sentences six people who are accused of killing a United Nations peacekeeper in Beirut in 2023.",
990
+ "source_type": "hf_2025_events",
991
+ "confirmed": true,
992
+ "tags": [
993
+ "diplomacy",
994
+ "law_and_crime",
995
+ "levant"
996
+ ],
997
+ "impact": {
998
+ "tension_delta": -4.5,
999
+ "market_stress_delta": -3.0,
1000
+ "oil_pressure_delta": -1.5,
1001
+ "actor_metric_deltas": {}
1002
+ }
1003
+ },
1004
+ {
1005
+ "event_id": "evt-2025-aug-07-dfc4fcd9",
1006
+ "timestamp": "2025-08-07T12:00:00Z",
1007
+ "topic": "domestic",
1008
+ "region": "levant",
1009
+ "actors": [
1010
+ "us",
1011
+ "israel",
1012
+ "hezbollah"
1013
+ ],
1014
+ "targets": [
1015
+ "general"
1016
+ ],
1017
+ "severity": "medium",
1018
+ "summary": "The cabinet of Lebanon approves a U.S. proposal for disarming Hezbollah by the end of the year, along with ending Israel's occupation in the country, despite Hezbollah's rejection of this demand.",
1019
+ "public_summary": "The cabinet of Lebanon approves a U.S. proposal for disarming Hezbollah by the end of the year, along with ending Israel's occupation in the country, despite Hezbollah's rejection of this demand.",
1020
+ "source_type": "hf_2025_events",
1021
+ "confirmed": true,
1022
+ "tags": [
1023
+ "domestic",
1024
+ "politics_and_elections",
1025
+ "levant"
1026
+ ],
1027
+ "impact": {
1028
+ "tension_delta": 1.0,
1029
+ "market_stress_delta": 1.5,
1030
+ "oil_pressure_delta": 0.5,
1031
+ "actor_metric_deltas": {}
1032
+ }
1033
+ },
1034
+ {
1035
+ "event_id": "evt-2025-aug-09-ecc2fce9",
1036
+ "timestamp": "2025-08-09T12:00:00Z",
1037
+ "topic": "border",
1038
+ "region": "levant",
1039
+ "actors": [
1040
+ "israel",
1041
+ "hezbollah"
1042
+ ],
1043
+ "targets": [
1044
+ "northern_front"
1045
+ ],
1046
+ "severity": "high",
1047
+ "summary": "At least six Lebanese soldiers are killed in an explosion while removing munitions from a Hezbollah facility near the Israeli border. (Al Arabiya)",
1048
+ "public_summary": "At least six Lebanese soldiers are killed in an explosion while removing munitions from a Hezbollah facility near the Israeli border. (Al Arabiya)",
1049
+ "source_type": "hf_2025_events",
1050
+ "confirmed": true,
1051
+ "tags": [
1052
+ "border",
1053
+ "disasters_and_accidents",
1054
+ "levant"
1055
+ ],
1056
+ "impact": {
1057
+ "tension_delta": 7.5,
1058
+ "market_stress_delta": 1.5,
1059
+ "oil_pressure_delta": 0.8,
1060
+ "actor_metric_deltas": {}
1061
+ }
1062
+ },
1063
+ {
1064
+ "event_id": "evt-2025-aug-28-53dcd36c",
1065
+ "timestamp": "2025-08-28T12:00:00Z",
1066
+ "topic": "military",
1067
+ "region": "levant",
1068
+ "actors": [
1069
+ "israel",
1070
+ "hezbollah"
1071
+ ],
1072
+ "targets": [
1073
+ "infrastructure"
1074
+ ],
1075
+ "severity": "high",
1076
+ "summary": "The Lebanese army reports that two soldiers were killed and two others were injured after an Israeli drone crashed and exploded in An-Naqoura, southern Lebanon, while they were inspecting it.",
1077
+ "public_summary": "The Lebanese army reports that two soldiers were killed and two others were injured after an Israeli drone crashed and exploded in An-Naqoura, southern Lebanon, while they were inspecting it.",
1078
+ "source_type": "hf_2025_events",
1079
+ "confirmed": true,
1080
+ "tags": [
1081
+ "military",
1082
+ "armed_conflicts_and_attacks",
1083
+ "levant"
1084
+ ],
1085
+ "impact": {
1086
+ "tension_delta": 9.0,
1087
+ "market_stress_delta": 3.8,
1088
+ "oil_pressure_delta": 2.2,
1089
+ "actor_metric_deltas": {}
1090
+ }
1091
+ },
1092
+ {
1093
+ "event_id": "evt-2025-aug-28-901efa72",
1094
+ "timestamp": "2025-08-28T12:00:00Z",
1095
+ "topic": "diplomacy",
1096
+ "region": "levant",
1097
+ "actors": [
1098
+ "israel",
1099
+ "hezbollah",
1100
+ "oversight"
1101
+ ],
1102
+ "targets": [
1103
+ "general"
1104
+ ],
1105
+ "severity": "high",
1106
+ "summary": "The United Nations Security Council votes unanimously to withdraw its peacekeeping mission in Lebanon on December 31, 2026.",
1107
+ "public_summary": "The United Nations Security Council votes unanimously to withdraw its peacekeeping mission in Lebanon on December 31, 2026.",
1108
+ "source_type": "hf_2025_events",
1109
+ "confirmed": true,
1110
+ "tags": [
1111
+ "diplomacy",
1112
+ "armed_conflicts_and_attacks",
1113
+ "levant"
1114
+ ],
1115
+ "impact": {
1116
+ "tension_delta": -4.5,
1117
+ "market_stress_delta": -3.0,
1118
+ "oil_pressure_delta": -1.5,
1119
+ "actor_metric_deltas": {}
1120
+ }
1121
+ },
1122
+ {
1123
+ "event_id": "evt-2025-sep-08-2deaa7a0",
1124
+ "timestamp": "2025-09-08T12:00:00Z",
1125
+ "topic": "military",
1126
+ "region": "levant",
1127
+ "actors": [
1128
+ "israel",
1129
+ "hezbollah"
1130
+ ],
1131
+ "targets": [
1132
+ "general"
1133
+ ],
1134
+ "severity": "high",
1135
+ "summary": "Five people are killed and five others injured during at least eight Israeli airstrikes between Beqaa Valley and Hermel, in Lebanon.",
1136
+ "public_summary": "Five people are killed and five others injured during at least eight Israeli airstrikes between Beqaa Valley and Hermel, in Lebanon.",
1137
+ "source_type": "hf_2025_events",
1138
+ "confirmed": true,
1139
+ "tags": [
1140
+ "military",
1141
+ "armed_conflicts_and_attacks",
1142
+ "levant"
1143
+ ],
1144
+ "impact": {
1145
+ "tension_delta": 9.0,
1146
+ "market_stress_delta": 3.8,
1147
+ "oil_pressure_delta": 2.2,
1148
+ "actor_metric_deltas": {}
1149
+ }
1150
+ },
1151
+ {
1152
+ "event_id": "evt-2025-sep-09-6ee67073",
1153
+ "timestamp": "2025-09-09T12:00:00Z",
1154
+ "topic": "diplomacy",
1155
+ "region": "levant",
1156
+ "actors": [
1157
+ "us",
1158
+ "israel",
1159
+ "hezbollah"
1160
+ ],
1161
+ "targets": [
1162
+ "proxy_corridor"
1163
+ ],
1164
+ "severity": "medium",
1165
+ "summary": "Iraq\u2013Israel relations, Iraq\u2013United States relations Iraqi Shia militia Kata'ib Hezbollah releases Russian\u2013Israeli researcher Elizabeth Tsurkov who was abducted in 2023 during a research trip to Iraq following negotiations involving Israel and the United States. (CNBC)",
1166
+ "public_summary": "Iraq\u2013Israel relations, Iraq\u2013United States relations Iraqi Shia militia Kata'ib Hezbollah releases Russian\u2013Israeli researcher Elizabeth Tsurkov who was abducted in 2023 during a research trip to Iraq f",
1167
+ "source_type": "hf_2025_events",
1168
+ "confirmed": true,
1169
+ "tags": [
1170
+ "diplomacy",
1171
+ "international_relations",
1172
+ "levant"
1173
+ ],
1174
+ "impact": {
1175
+ "tension_delta": -3.0,
1176
+ "market_stress_delta": -2.0,
1177
+ "oil_pressure_delta": -1.0,
1178
+ "actor_metric_deltas": {}
1179
+ }
1180
+ },
1181
+ {
1182
+ "event_id": "evt-2025-sep-16-1203ea8d",
1183
+ "timestamp": "2025-09-16T12:00:00Z",
1184
+ "topic": "security",
1185
+ "region": "levant",
1186
+ "actors": [
1187
+ "hezbollah"
1188
+ ],
1189
+ "targets": [
1190
+ "general"
1191
+ ],
1192
+ "severity": "high",
1193
+ "summary": "Bulgarian police arrest the owner of the ship that brought explosive material to Beirut, Lebanon, which caused an explosion in August 2020 that killed more than 200 people.",
1194
+ "public_summary": "Bulgarian police arrest the owner of the ship that brought explosive material to Beirut, Lebanon, which caused an explosion in August 2020 that killed more than 200 people.",
1195
+ "source_type": "hf_2025_events",
1196
+ "confirmed": true,
1197
+ "tags": [
1198
+ "security",
1199
+ "law_and_crime",
1200
+ "levant"
1201
+ ],
1202
+ "impact": {
1203
+ "tension_delta": 3.0,
1204
+ "market_stress_delta": 1.5,
1205
+ "oil_pressure_delta": 0.8,
1206
+ "actor_metric_deltas": {}
1207
+ }
1208
+ },
1209
+ {
1210
+ "event_id": "evt-2025-sep-21-eaa27044",
1211
+ "timestamp": "2025-09-21T12:00:00Z",
1212
+ "topic": "military",
1213
+ "region": "levant",
1214
+ "actors": [
1215
+ "israel",
1216
+ "hezbollah"
1217
+ ],
1218
+ "targets": [
1219
+ "general"
1220
+ ],
1221
+ "severity": "high",
1222
+ "summary": "Five people are killed and two others are injured in an Israeli drone strike on a vehicle in Bint Jbeil, Lebanon.",
1223
+ "public_summary": "Five people are killed and two others are injured in an Israeli drone strike on a vehicle in Bint Jbeil, Lebanon.",
1224
+ "source_type": "hf_2025_events",
1225
+ "confirmed": true,
1226
+ "tags": [
1227
+ "military",
1228
+ "armed_conflicts_and_attacks",
1229
+ "levant"
1230
+ ],
1231
+ "impact": {
1232
+ "tension_delta": 9.0,
1233
+ "market_stress_delta": 3.8,
1234
+ "oil_pressure_delta": 2.2,
1235
+ "actor_metric_deltas": {}
1236
+ }
1237
+ },
1238
+ {
1239
+ "event_id": "evt-2025-sep-26-46695b30",
1240
+ "timestamp": "2025-09-26T12:00:00Z",
1241
+ "topic": "security",
1242
+ "region": "levant",
1243
+ "actors": [
1244
+ "us",
1245
+ "hezbollah"
1246
+ ],
1247
+ "targets": [
1248
+ "general"
1249
+ ],
1250
+ "severity": "medium",
1251
+ "summary": "Lebanese authorities release former central bank governor Riad Salameh after he posts bail of US$14 million and LL 5 billion ($55,866) while facing ongoing charges of alleged financial crimes.",
1252
+ "public_summary": "Lebanese authorities release former central bank governor Riad Salameh after he posts bail of US$14 million and LL 5 billion ($55,866) while facing ongoing charges of alleged financial crimes.",
1253
+ "source_type": "hf_2025_events",
1254
+ "confirmed": true,
1255
+ "tags": [
1256
+ "security",
1257
+ "law_and_crime",
1258
+ "levant"
1259
+ ],
1260
+ "impact": {
1261
+ "tension_delta": 2.0,
1262
+ "market_stress_delta": 1.0,
1263
+ "oil_pressure_delta": 0.5,
1264
+ "actor_metric_deltas": {}
1265
+ }
1266
+ },
1267
+ {
1268
+ "event_id": "evt-2025-sep-28-97d06874",
1269
+ "timestamp": "2025-09-28T12:00:00Z",
1270
+ "topic": "disaster",
1271
+ "region": "levant",
1272
+ "actors": [
1273
+ "us",
1274
+ "hezbollah"
1275
+ ],
1276
+ "targets": [
1277
+ "general"
1278
+ ],
1279
+ "severity": "high",
1280
+ "summary": "Four people are killed and another six people affected after a fire in a building in Lebanon, Pennsylvania, United States. (MSN)",
1281
+ "public_summary": "Four people are killed and another six people affected after a fire in a building in Lebanon, Pennsylvania, United States. (MSN)",
1282
+ "source_type": "hf_2025_events",
1283
+ "confirmed": true,
1284
+ "tags": [
1285
+ "disaster",
1286
+ "disasters_and_accidents",
1287
+ "levant"
1288
+ ],
1289
+ "impact": {
1290
+ "tension_delta": 1.5,
1291
+ "market_stress_delta": 3.0,
1292
+ "oil_pressure_delta": 1.5,
1293
+ "actor_metric_deltas": {}
1294
+ }
1295
+ },
1296
+ {
1297
+ "event_id": "evt-2025-oct-07-9974fc64",
1298
+ "timestamp": "2025-10-07T12:00:00Z",
1299
+ "topic": "diplomacy",
1300
+ "region": "levant",
1301
+ "actors": [
1302
+ "hezbollah"
1303
+ ],
1304
+ "targets": [
1305
+ "infrastructure"
1306
+ ],
1307
+ "severity": "medium",
1308
+ "summary": "Foreign relations of the Holy See Visit by Pope Leo XIV to Turkey and Lebanon The Holy See Press Office announces that Pope Leo XIV will visit \u0130znik, Turkey, the location of the ancient city Nicaea, and Lebanon in November and December as his first papal visits abroad. (The Catholic Herald) (Anadolu",
1309
+ "public_summary": "Foreign relations of the Holy See Visit by Pope Leo XIV to Turkey and Lebanon The Holy See Press Office announces that Pope Leo XIV will visit \u0130znik, Turkey, the location of the ancient city Nicaea, a",
1310
+ "source_type": "hf_2025_events",
1311
+ "confirmed": true,
1312
+ "tags": [
1313
+ "diplomacy",
1314
+ "international_relations",
1315
+ "levant"
1316
+ ],
1317
+ "impact": {
1318
+ "tension_delta": -3.0,
1319
+ "market_stress_delta": -2.0,
1320
+ "oil_pressure_delta": -1.0,
1321
+ "actor_metric_deltas": {}
1322
+ }
1323
+ },
1324
+ {
1325
+ "event_id": "evt-2025-oct-10-6494d5d9",
1326
+ "timestamp": "2025-10-10T12:00:00Z",
1327
+ "topic": "conflict",
1328
+ "region": "levant",
1329
+ "actors": [
1330
+ "israel",
1331
+ "hezbollah"
1332
+ ],
1333
+ "targets": [
1334
+ "government"
1335
+ ],
1336
+ "severity": "high",
1337
+ "summary": "Israel\u2013Hezbollah conflict (2023\u2013present) The Lebanese government announces the arrest of a network affiliated to Israel which attempted to carry out assassinations and bombing attacks, mainly during a commemoration ceremony for the assassinated Hezbollah leader Hassan Nasrallah.",
1338
+ "public_summary": "Israel\u2013Hezbollah conflict (2023\u2013present) The Lebanese government announces the arrest of a network affiliated to Israel which attempted to carry out assassinations and bombing attacks, mainly during a",
1339
+ "source_type": "hf_2025_events",
1340
+ "confirmed": true,
1341
+ "tags": [
1342
+ "conflict",
1343
+ "armed_conflicts_and_attacks",
1344
+ "levant"
1345
+ ],
1346
+ "impact": {
1347
+ "tension_delta": 7.5,
1348
+ "market_stress_delta": 3.0,
1349
+ "oil_pressure_delta": 1.5,
1350
+ "actor_metric_deltas": {}
1351
+ }
1352
+ },
1353
+ {
1354
+ "event_id": "evt-2025-oct-11-f7370a76",
1355
+ "timestamp": "2025-10-11T12:00:00Z",
1356
+ "topic": "conflict",
1357
+ "region": "levant",
1358
+ "actors": [
1359
+ "israel",
1360
+ "hezbollah"
1361
+ ],
1362
+ "targets": [
1363
+ "general"
1364
+ ],
1365
+ "severity": "high",
1366
+ "summary": "One person is killed, seven others are injured, dozens of bulldozers are destroyed, and a key route connecting Beirut to Lebanon's south is severed in Israeli attacks in Msayleh, An-Najjariyah, Lebanon. (Naharnetnet)",
1367
+ "public_summary": "One person is killed, seven others are injured, dozens of bulldozers are destroyed, and a key route connecting Beirut to Lebanon's south is severed in Israeli attacks in Msayleh, An-Najjariyah, Lebano",
1368
+ "source_type": "hf_2025_events",
1369
+ "confirmed": true,
1370
+ "tags": [
1371
+ "conflict",
1372
+ "armed_conflicts_and_attacks",
1373
+ "levant"
1374
+ ],
1375
+ "impact": {
1376
+ "tension_delta": 7.5,
1377
+ "market_stress_delta": 3.0,
1378
+ "oil_pressure_delta": 1.5,
1379
+ "actor_metric_deltas": {}
1380
+ }
1381
+ },
1382
+ {
1383
+ "event_id": "evt-2025-oct-16-f53c3e02",
1384
+ "timestamp": "2025-10-16T12:00:00Z",
1385
+ "topic": "conflict",
1386
+ "region": "levant",
1387
+ "actors": [
1388
+ "hezbollah"
1389
+ ],
1390
+ "targets": [
1391
+ "general"
1392
+ ],
1393
+ "severity": "high",
1394
+ "summary": "Heavy armed clashes break out in Zawiya, Tripolitania, Libya, after the attempted assassination of Reserve Force commander Suleiman Al-Fitouri. (Xinhua News Agency)",
1395
+ "public_summary": "Heavy armed clashes break out in Zawiya, Tripolitania, Libya, after the attempted assassination of Reserve Force commander Suleiman Al-Fitouri. (Xinhua News Agency)",
1396
+ "source_type": "hf_2025_events",
1397
+ "confirmed": true,
1398
+ "tags": [
1399
+ "conflict",
1400
+ "armed_conflicts_and_attacks",
1401
+ "levant"
1402
+ ],
1403
+ "impact": {
1404
+ "tension_delta": 7.5,
1405
+ "market_stress_delta": 3.0,
1406
+ "oil_pressure_delta": 1.5,
1407
+ "actor_metric_deltas": {}
1408
+ }
1409
+ },
1410
+ {
1411
+ "event_id": "evt-2025-oct-23-99d61a03",
1412
+ "timestamp": "2025-10-23T12:00:00Z",
1413
+ "topic": "military",
1414
+ "region": "gulf",
1415
+ "actors": [
1416
+ "israel",
1417
+ "hezbollah",
1418
+ "gulf"
1419
+ ],
1420
+ "targets": [
1421
+ "general"
1422
+ ],
1423
+ "severity": "high",
1424
+ "summary": "Four people, including an elderly woman, are killed in a series of Israeli airstrikes in eastern and southern Lebanon.",
1425
+ "public_summary": "Four people, including an elderly woman, are killed in a series of Israeli airstrikes in eastern and southern Lebanon.",
1426
+ "source_type": "hf_2025_events",
1427
+ "confirmed": true,
1428
+ "tags": [
1429
+ "military",
1430
+ "armed_conflicts_and_attacks",
1431
+ "gulf"
1432
+ ],
1433
+ "impact": {
1434
+ "tension_delta": 9.0,
1435
+ "market_stress_delta": 3.8,
1436
+ "oil_pressure_delta": 2.2,
1437
+ "actor_metric_deltas": {}
1438
+ }
1439
+ },
1440
+ {
1441
+ "event_id": "evt-2025-oct-24-f15a2436",
1442
+ "timestamp": "2025-10-24T12:00:00Z",
1443
+ "topic": "military",
1444
+ "region": "levant",
1445
+ "actors": [
1446
+ "israel",
1447
+ "hezbollah"
1448
+ ],
1449
+ "targets": [
1450
+ "general"
1451
+ ],
1452
+ "severity": "high",
1453
+ "summary": "Three people are killed and two others injured during two Israeli airstrikes against two vehicles in Toul and Nabatieh, Lebanon. Senior Hezbollah commander Abbas Karki is among the fatalities. (AA)",
1454
+ "public_summary": "Three people are killed and two others injured during two Israeli airstrikes against two vehicles in Toul and Nabatieh, Lebanon. Senior Hezbollah commander Abbas Karki is among the fatalities. (AA)",
1455
+ "source_type": "hf_2025_events",
1456
+ "confirmed": true,
1457
+ "tags": [
1458
+ "military",
1459
+ "armed_conflicts_and_attacks",
1460
+ "levant"
1461
+ ],
1462
+ "impact": {
1463
+ "tension_delta": 9.0,
1464
+ "market_stress_delta": 3.8,
1465
+ "oil_pressure_delta": 2.2,
1466
+ "actor_metric_deltas": {}
1467
+ }
1468
+ },
1469
+ {
1470
+ "event_id": "evt-2025-oct-25-4adb2abc",
1471
+ "timestamp": "2025-10-25T12:00:00Z",
1472
+ "topic": "military",
1473
+ "region": "levant",
1474
+ "actors": [
1475
+ "israel",
1476
+ "hezbollah"
1477
+ ],
1478
+ "targets": [
1479
+ "general"
1480
+ ],
1481
+ "severity": "high",
1482
+ "summary": "Two people are killed during two Israeli airstrikes against a car and a motorcycle in Harouf and Qlayleh, Lebanon, bringing the death toll from the attacks since the past 48 hours to nine.",
1483
+ "public_summary": "Two people are killed during two Israeli airstrikes against a car and a motorcycle in Harouf and Qlayleh, Lebanon, bringing the death toll from the attacks since the past 48 hours to nine.",
1484
+ "source_type": "hf_2025_events",
1485
+ "confirmed": true,
1486
+ "tags": [
1487
+ "military",
1488
+ "armed_conflicts_and_attacks",
1489
+ "levant"
1490
+ ],
1491
+ "impact": {
1492
+ "tension_delta": 9.0,
1493
+ "market_stress_delta": 3.8,
1494
+ "oil_pressure_delta": 2.2,
1495
+ "actor_metric_deltas": {}
1496
+ }
1497
+ },
1498
+ {
1499
+ "event_id": "evt-2025-oct-26-6e2fdfe0",
1500
+ "timestamp": "2025-10-26T12:00:00Z",
1501
+ "topic": "military",
1502
+ "region": "levant",
1503
+ "actors": [
1504
+ "israel",
1505
+ "hezbollah"
1506
+ ],
1507
+ "targets": [
1508
+ "general"
1509
+ ],
1510
+ "severity": "high",
1511
+ "summary": "Three people are killed in two Israeli airstrikes in An-Naqoura and Baalbek, Lebanon. Separately, an Israeli drone drops a grenade near UNIFIL forces in Kfar Kila, without casualties.",
1512
+ "public_summary": "Three people are killed in two Israeli airstrikes in An-Naqoura and Baalbek, Lebanon. Separately, an Israeli drone drops a grenade near UNIFIL forces in Kfar Kila, without casualties.",
1513
+ "source_type": "hf_2025_events",
1514
+ "confirmed": true,
1515
+ "tags": [
1516
+ "military",
1517
+ "armed_conflicts_and_attacks",
1518
+ "levant"
1519
+ ],
1520
+ "impact": {
1521
+ "tension_delta": 9.0,
1522
+ "market_stress_delta": 3.8,
1523
+ "oil_pressure_delta": 2.2,
1524
+ "actor_metric_deltas": {}
1525
+ }
1526
+ },
1527
+ {
1528
+ "event_id": "evt-2025-oct-30-d623fb35",
1529
+ "timestamp": "2025-10-30T12:00:00Z",
1530
+ "topic": "border",
1531
+ "region": "israel",
1532
+ "actors": [
1533
+ "israel",
1534
+ "hezbollah",
1535
+ "oversight"
1536
+ ],
1537
+ "targets": [
1538
+ "civilians"
1539
+ ],
1540
+ "severity": "high",
1541
+ "summary": "Israeli troops enter the southern Lebanese city of Blida and storm the municipal hall, killing an employee while sleeping. In response, Lebanese president Joseph Aoun orders the military to confront any Israeli incursions.",
1542
+ "public_summary": "Israeli troops enter the southern Lebanese city of Blida and storm the municipal hall, killing an employee while sleeping. In response, Lebanese president Joseph Aoun orders the military to confront a",
1543
+ "source_type": "hf_2025_events",
1544
+ "confirmed": true,
1545
+ "tags": [
1546
+ "border",
1547
+ "armed_conflicts_and_attacks",
1548
+ "israel"
1549
+ ],
1550
+ "impact": {
1551
+ "tension_delta": 7.5,
1552
+ "market_stress_delta": 1.5,
1553
+ "oil_pressure_delta": 0.8,
1554
+ "actor_metric_deltas": {}
1555
+ }
1556
+ },
1557
+ {
1558
+ "event_id": "evt-2025-nov-01-99cb3eee",
1559
+ "timestamp": "2025-11-01T12:00:00Z",
1560
+ "topic": "military",
1561
+ "region": "levant",
1562
+ "actors": [
1563
+ "israel",
1564
+ "hezbollah"
1565
+ ],
1566
+ "targets": [
1567
+ "general"
1568
+ ],
1569
+ "severity": "high",
1570
+ "summary": "Four people are killed and three injured in an Israeli airstrike on a vehicle in Kfar Reman, Lebanon.",
1571
+ "public_summary": "Four people are killed and three injured in an Israeli airstrike on a vehicle in Kfar Reman, Lebanon.",
1572
+ "source_type": "hf_2025_events",
1573
+ "confirmed": true,
1574
+ "tags": [
1575
+ "military",
1576
+ "armed_conflicts_and_attacks",
1577
+ "levant"
1578
+ ],
1579
+ "impact": {
1580
+ "tension_delta": 9.0,
1581
+ "market_stress_delta": 3.8,
1582
+ "oil_pressure_delta": 2.2,
1583
+ "actor_metric_deltas": {}
1584
+ }
1585
+ },
1586
+ {
1587
+ "event_id": "evt-2025-nov-10-eb28755c",
1588
+ "timestamp": "2025-11-10T12:00:00Z",
1589
+ "topic": "security",
1590
+ "region": "levant",
1591
+ "actors": [
1592
+ "hezbollah"
1593
+ ],
1594
+ "targets": [
1595
+ "general"
1596
+ ],
1597
+ "severity": "medium",
1598
+ "summary": "Lebanon grants a US$900,000 bail to Hannibal Gaddafi, the son of former Libyan leader Muammar Gaddafi, ending his nearly 10-year detention in a case involving the 1978 disappearance of Shia Muslim cleric Musa al-Sadr, for which Gaddafi was accused of withholding information but never tried.",
1599
+ "public_summary": "Lebanon grants a US$900,000 bail to Hannibal Gaddafi, the son of former Libyan leader Muammar Gaddafi, ending his nearly 10-year detention in a case involving the 1978 disappearance of Shia Muslim cle",
1600
+ "source_type": "hf_2025_events",
1601
+ "confirmed": true,
1602
+ "tags": [
1603
+ "security",
1604
+ "law_and_crime",
1605
+ "levant"
1606
+ ],
1607
+ "impact": {
1608
+ "tension_delta": 2.0,
1609
+ "market_stress_delta": 1.0,
1610
+ "oil_pressure_delta": 0.5,
1611
+ "actor_metric_deltas": {}
1612
+ }
1613
+ },
1614
+ {
1615
+ "event_id": "evt-2025-nov-18-cdfe2fae",
1616
+ "timestamp": "2025-11-18T12:00:00Z",
1617
+ "topic": "military",
1618
+ "region": "levant",
1619
+ "actors": [
1620
+ "israel",
1621
+ "hezbollah"
1622
+ ],
1623
+ "targets": [
1624
+ "civilians"
1625
+ ],
1626
+ "severity": "high",
1627
+ "summary": "Thirteen people are killed and several others are wounded in an Israeli airstrike in the Ain al-Hilweh Palestinian refugee camp in Sidon District, Lebanon. (MENA via The New Arab)",
1628
+ "public_summary": "Thirteen people are killed and several others are wounded in an Israeli airstrike in the Ain al-Hilweh Palestinian refugee camp in Sidon District, Lebanon. (MENA via The New Arab)",
1629
+ "source_type": "hf_2025_events",
1630
+ "confirmed": true,
1631
+ "tags": [
1632
+ "military",
1633
+ "armed_conflicts_and_attacks",
1634
+ "levant"
1635
+ ],
1636
+ "impact": {
1637
+ "tension_delta": 9.0,
1638
+ "market_stress_delta": 3.8,
1639
+ "oil_pressure_delta": 2.2,
1640
+ "actor_metric_deltas": {}
1641
+ }
1642
+ },
1643
+ {
1644
+ "event_id": "evt-2025-nov-18-ec6565fe",
1645
+ "timestamp": "2025-11-18T12:00:00Z",
1646
+ "topic": "military",
1647
+ "region": "levant",
1648
+ "actors": [
1649
+ "israel",
1650
+ "hezbollah"
1651
+ ],
1652
+ "targets": [
1653
+ "general"
1654
+ ],
1655
+ "severity": "high",
1656
+ "summary": "Two people are killed in two separate Israeli airstrikes on vehicles in Bint Jbeil and Marjayoun, Lebanon. (L'Orient Today)",
1657
+ "public_summary": "Two people are killed in two separate Israeli airstrikes on vehicles in Bint Jbeil and Marjayoun, Lebanon. (L'Orient Today)",
1658
+ "source_type": "hf_2025_events",
1659
+ "confirmed": true,
1660
+ "tags": [
1661
+ "military",
1662
+ "armed_conflicts_and_attacks",
1663
+ "levant"
1664
+ ],
1665
+ "impact": {
1666
+ "tension_delta": 9.0,
1667
+ "market_stress_delta": 3.8,
1668
+ "oil_pressure_delta": 2.2,
1669
+ "actor_metric_deltas": {}
1670
+ }
1671
+ },
1672
+ {
1673
+ "event_id": "evt-2025-nov-19-e4cdfd62",
1674
+ "timestamp": "2025-11-19T12:00:00Z",
1675
+ "topic": "military",
1676
+ "region": "levant",
1677
+ "actors": [
1678
+ "us",
1679
+ "israel",
1680
+ "hezbollah"
1681
+ ],
1682
+ "targets": [
1683
+ "general"
1684
+ ],
1685
+ "severity": "high",
1686
+ "summary": "One person is killed in an Israeli airstrike attack against a vehicle in Bint Jbeil, Lebanon. Several students are also wounded on a bus passing in the area.",
1687
+ "public_summary": "One person is killed in an Israeli airstrike attack against a vehicle in Bint Jbeil, Lebanon. Several students are also wounded on a bus passing in the area.",
1688
+ "source_type": "hf_2025_events",
1689
+ "confirmed": true,
1690
+ "tags": [
1691
+ "military",
1692
+ "armed_conflicts_and_attacks",
1693
+ "levant"
1694
+ ],
1695
+ "impact": {
1696
+ "tension_delta": 9.0,
1697
+ "market_stress_delta": 3.8,
1698
+ "oil_pressure_delta": 2.2,
1699
+ "actor_metric_deltas": {}
1700
+ }
1701
+ },
1702
+ {
1703
+ "event_id": "evt-2025-nov-22-1df578e2",
1704
+ "timestamp": "2025-11-22T12:00:00Z",
1705
+ "topic": "military",
1706
+ "region": "levant",
1707
+ "actors": [
1708
+ "israel",
1709
+ "hezbollah"
1710
+ ],
1711
+ "targets": [
1712
+ "general"
1713
+ ],
1714
+ "severity": "high",
1715
+ "summary": "A person is killed in an Israeli airstrike on a car in Zawtar al-Sharqiyah, Lebanon, while several more attacks are carried out across Kafr Rumman.",
1716
+ "public_summary": "A person is killed in an Israeli airstrike on a car in Zawtar al-Sharqiyah, Lebanon, while several more attacks are carried out across Kafr Rumman.",
1717
+ "source_type": "hf_2025_events",
1718
+ "confirmed": true,
1719
+ "tags": [
1720
+ "military",
1721
+ "armed_conflicts_and_attacks",
1722
+ "levant"
1723
+ ],
1724
+ "impact": {
1725
+ "tension_delta": 9.0,
1726
+ "market_stress_delta": 3.8,
1727
+ "oil_pressure_delta": 2.2,
1728
+ "actor_metric_deltas": {}
1729
+ }
1730
+ },
1731
+ {
1732
+ "event_id": "evt-2025-nov-23-3f1e2172",
1733
+ "timestamp": "2025-11-23T12:00:00Z",
1734
+ "topic": "military",
1735
+ "region": "levant",
1736
+ "actors": [
1737
+ "israel",
1738
+ "hezbollah"
1739
+ ],
1740
+ "targets": [
1741
+ "general"
1742
+ ],
1743
+ "severity": "high",
1744
+ "summary": "November 2025 Israeli attack in Beirut Israel carries out an airstrike on a building in Beirut, Lebanon, killing five Hezbollah militants and injuring 28 others. Hezbollah military chief Haytham Ali Tabatabai\u00a0is confirmed among the fatalities.",
1745
+ "public_summary": "November 2025 Israeli attack in Beirut Israel carries out an airstrike on a building in Beirut, Lebanon, killing five Hezbollah militants and injuring 28 others. Hezbollah military chief Haytham Ali T",
1746
+ "source_type": "hf_2025_events",
1747
+ "confirmed": true,
1748
+ "tags": [
1749
+ "military",
1750
+ "armed_conflicts_and_attacks",
1751
+ "levant"
1752
+ ],
1753
+ "impact": {
1754
+ "tension_delta": 9.0,
1755
+ "market_stress_delta": 3.8,
1756
+ "oil_pressure_delta": 2.2,
1757
+ "actor_metric_deltas": {}
1758
+ }
1759
+ },
1760
+ {
1761
+ "event_id": "evt-2025-nov-26-4e3538d9",
1762
+ "timestamp": "2025-11-26T12:00:00Z",
1763
+ "topic": "culture",
1764
+ "region": "levant",
1765
+ "actors": [
1766
+ "hezbollah"
1767
+ ],
1768
+ "targets": [
1769
+ "infrastructure"
1770
+ ],
1771
+ "severity": "high",
1772
+ "summary": "Holy See\u2013Lebanon relations, Holy See\u2013Turkey relations Visit by Pope Leo XIV to Turkey and Lebanon Pope Leo XIV begins his first papal trip, first to \u0130znik and Nicaea in Turkey, then to Bkerk\u00e9, the Monastery of Saint Maron, and the Port of Beirut in Lebanon. The trip will include an ecumenical commem",
1773
+ "public_summary": "Holy See\u2013Lebanon relations, Holy See\u2013Turkey relations Visit by Pope Leo XIV to Turkey and Lebanon Pope Leo XIV begins his first papal trip, first to \u0130znik and Nicaea in Turkey, then to Bkerk\u00e9, the Mon",
1774
+ "source_type": "hf_2025_events",
1775
+ "confirmed": true,
1776
+ "tags": [
1777
+ "culture",
1778
+ "arts_and_culture",
1779
+ "levant"
1780
+ ],
1781
+ "impact": {
1782
+ "tension_delta": 0.0,
1783
+ "market_stress_delta": 0.0,
1784
+ "oil_pressure_delta": 0.0,
1785
+ "actor_metric_deltas": {}
1786
+ }
1787
+ },
1788
+ {
1789
+ "event_id": "evt-2025-nov-26-814de5ef",
1790
+ "timestamp": "2025-11-26T12:00:00Z",
1791
+ "topic": "shipping",
1792
+ "region": "levant",
1793
+ "actors": [
1794
+ "us",
1795
+ "hezbollah"
1796
+ ],
1797
+ "targets": [
1798
+ "shipping_lanes"
1799
+ ],
1800
+ "severity": "medium",
1801
+ "summary": "Cyprus and Lebanon sign a long-delayed maritime boundary agreement.",
1802
+ "public_summary": "Cyprus and Lebanon sign a long-delayed maritime boundary agreement.",
1803
+ "source_type": "hf_2025_events",
1804
+ "confirmed": true,
1805
+ "tags": [
1806
+ "shipping",
1807
+ "international_relations",
1808
+ "levant"
1809
+ ],
1810
+ "impact": {
1811
+ "tension_delta": 3.0,
1812
+ "market_stress_delta": 4.0,
1813
+ "oil_pressure_delta": 6.0,
1814
+ "actor_metric_deltas": {}
1815
+ }
1816
+ },
1817
+ {
1818
+ "event_id": "evt-2025-dec-08-614e2d74",
1819
+ "timestamp": "2025-12-08T12:00:00Z",
1820
+ "topic": "military",
1821
+ "region": "levant",
1822
+ "actors": [
1823
+ "israel",
1824
+ "hezbollah"
1825
+ ],
1826
+ "targets": [
1827
+ "general"
1828
+ ],
1829
+ "severity": "high",
1830
+ "summary": "Israel carries out a wave of airstrikes in southern Lebanon, allegedly targeting Hezbollah's site.",
1831
+ "public_summary": "Israel carries out a wave of airstrikes in southern Lebanon, allegedly targeting Hezbollah's site.",
1832
+ "source_type": "hf_2025_events",
1833
+ "confirmed": true,
1834
+ "tags": [
1835
+ "military",
1836
+ "armed_conflicts_and_attacks",
1837
+ "levant"
1838
+ ],
1839
+ "impact": {
1840
+ "tension_delta": 9.0,
1841
+ "market_stress_delta": 3.8,
1842
+ "oil_pressure_delta": 2.2,
1843
+ "actor_metric_deltas": {}
1844
+ }
1845
+ },
1846
+ {
1847
+ "event_id": "evt-2025-dec-13-6ca8c26a",
1848
+ "timestamp": "2025-12-13T12:00:00Z",
1849
+ "topic": "military",
1850
+ "region": "levant",
1851
+ "actors": [
1852
+ "israel",
1853
+ "hezbollah"
1854
+ ],
1855
+ "targets": [
1856
+ "general"
1857
+ ],
1858
+ "severity": "high",
1859
+ "summary": "The Israeli military issues an evacuation order in southern Lebanese villages in preparation for planned airstrikes on Hezbollah targets. (The Jerusalem Post)",
1860
+ "public_summary": "The Israeli military issues an evacuation order in southern Lebanese villages in preparation for planned airstrikes on Hezbollah targets. (The Jerusalem Post)",
1861
+ "source_type": "hf_2025_events",
1862
+ "confirmed": true,
1863
+ "tags": [
1864
+ "military",
1865
+ "armed_conflicts_and_attacks",
1866
+ "levant"
1867
+ ],
1868
+ "impact": {
1869
+ "tension_delta": 9.0,
1870
+ "market_stress_delta": 3.8,
1871
+ "oil_pressure_delta": 2.2,
1872
+ "actor_metric_deltas": {}
1873
+ }
1874
+ },
1875
+ {
1876
+ "event_id": "evt-2025-dec-14-5d24227b",
1877
+ "timestamp": "2025-12-14T12:00:00Z",
1878
+ "topic": "military",
1879
+ "region": "levant",
1880
+ "actors": [
1881
+ "israel",
1882
+ "hezbollah"
1883
+ ],
1884
+ "targets": [
1885
+ "general"
1886
+ ],
1887
+ "severity": "high",
1888
+ "summary": "Three people are killed in three separated Israeli airstrikes against vehicles in southern Lebanon. (L'Orient Today)",
1889
+ "public_summary": "Three people are killed in three separated Israeli airstrikes against vehicles in southern Lebanon. (L'Orient Today)",
1890
+ "source_type": "hf_2025_events",
1891
+ "confirmed": true,
1892
+ "tags": [
1893
+ "military",
1894
+ "armed_conflicts_and_attacks",
1895
+ "levant"
1896
+ ],
1897
+ "impact": {
1898
+ "tension_delta": 9.0,
1899
+ "market_stress_delta": 3.8,
1900
+ "oil_pressure_delta": 2.2,
1901
+ "actor_metric_deltas": {}
1902
+ }
1903
+ },
1904
+ {
1905
+ "event_id": "evt-2025-dec-22-48c66d32",
1906
+ "timestamp": "2025-12-22T12:00:00Z",
1907
+ "topic": "military",
1908
+ "region": "levant",
1909
+ "actors": [
1910
+ "israel",
1911
+ "hezbollah"
1912
+ ],
1913
+ "targets": [
1914
+ "general"
1915
+ ],
1916
+ "severity": "high",
1917
+ "summary": "Three people are killed in an Israeli airstrike against a vehicle near Sidon, Lebanon.",
1918
+ "public_summary": "Three people are killed in an Israeli airstrike against a vehicle near Sidon, Lebanon.",
1919
+ "source_type": "hf_2025_events",
1920
+ "confirmed": true,
1921
+ "tags": [
1922
+ "military",
1923
+ "armed_conflicts_and_attacks",
1924
+ "levant"
1925
+ ],
1926
+ "impact": {
1927
+ "tension_delta": 9.0,
1928
+ "market_stress_delta": 3.8,
1929
+ "oil_pressure_delta": 2.2,
1930
+ "actor_metric_deltas": {}
1931
+ }
1932
+ },
1933
+ {
1934
+ "event_id": "evt-2025-dec-24-30bada7a",
1935
+ "timestamp": "2025-12-24T12:00:00Z",
1936
+ "topic": "military",
1937
+ "region": "levant",
1938
+ "actors": [
1939
+ "israel",
1940
+ "hezbollah"
1941
+ ],
1942
+ "targets": [
1943
+ "general"
1944
+ ],
1945
+ "severity": "high",
1946
+ "summary": "An alleged Hezbollah member is killed in an airstrike by the Israel Defense Forces on a vehicle near Jannata, Tyre District, South Governorate, Lebanon. (MSN)",
1947
+ "public_summary": "An alleged Hezbollah member is killed in an airstrike by the Israel Defense Forces on a vehicle near Jannata, Tyre District, South Governorate, Lebanon. (MSN)",
1948
+ "source_type": "hf_2025_events",
1949
+ "confirmed": true,
1950
+ "tags": [
1951
+ "military",
1952
+ "armed_conflicts_and_attacks",
1953
+ "levant"
1954
+ ],
1955
+ "impact": {
1956
+ "tension_delta": 9.0,
1957
+ "market_stress_delta": 3.8,
1958
+ "oil_pressure_delta": 2.2,
1959
+ "actor_metric_deltas": {}
1960
+ }
1961
+ },
1962
+ {
1963
+ "event_id": "evt-2025-dec-25-7c508b87",
1964
+ "timestamp": "2025-12-25T12:00:00Z",
1965
+ "topic": "military",
1966
+ "region": "levant",
1967
+ "actors": [
1968
+ "israel",
1969
+ "iran",
1970
+ "hezbollah"
1971
+ ],
1972
+ "targets": [
1973
+ "general"
1974
+ ],
1975
+ "severity": "high",
1976
+ "summary": "Israel\u2013Hezbollah conflict, Iran\u2013Israel relations Three people are killed in two separate Israeli airstrikes targeting vehicles in southern Lebanon, including a member of the Iranian Quds Force.",
1977
+ "public_summary": "Israel\u2013Hezbollah conflict, Iran\u2013Israel relations Three people are killed in two separate Israeli airstrikes targeting vehicles in southern Lebanon, including a member of the Iranian Quds Force.",
1978
+ "source_type": "hf_2025_events",
1979
+ "confirmed": true,
1980
+ "tags": [
1981
+ "military",
1982
+ "armed_conflicts_and_attacks",
1983
+ "levant"
1984
+ ],
1985
+ "impact": {
1986
+ "tension_delta": 9.0,
1987
+ "market_stress_delta": 3.8,
1988
+ "oil_pressure_delta": 2.2,
1989
+ "actor_metric_deltas": {}
1990
+ }
1991
+ }
1992
+ ]
1993
+ }
backend/src/trenches_env/historical_replays/iran_2025_events.json ADDED
The diff for this file is too large to render. See raw diff
 
backend/src/trenches_env/historical_replays/israel_2025_events.json ADDED
The diff for this file is too large to render. See raw diff
 
backend/src/trenches_env/historical_replays/oversight_2025_events.json ADDED
The diff for this file is too large to render. See raw diff
 
backend/src/trenches_env/historical_replays/us_2025_events.json ADDED
The diff for this file is too large to render. See raw diff