diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..50ccb79bee1d6ba92d7946ce77e7ef29db56d0db 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,29 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +assets/audio/courtroom.ogg filter=lfs diff=lfs merge=lfs -text +assets/audio/crowd_shouting.ogg filter=lfs diff=lfs merge=lfs -text +assets/audio/Judgement.ogg filter=lfs diff=lfs merge=lfs -text +assets/audio/steps_in_wood_floor.wav filter=lfs diff=lfs merge=lfs -text +assets/background/CourtRoom.png filter=lfs diff=lfs merge=lfs -text +assets/book/docket-book-closed-keyed.png filter=lfs diff=lfs merge=lfs -text +assets/book/docket-book-closed.png filter=lfs diff=lfs merge=lfs -text +assets/book/docket-book-open-keyed.png filter=lfs diff=lfs merge=lfs -text +assets/book/docket-book-open.png filter=lfs diff=lfs merge=lfs -text +assets/characters/cleopatra-vii.png filter=lfs diff=lfs merge=lfs -text +assets/characters/confucius.png filter=lfs diff=lfs merge=lfs -text +assets/characters/jensen-huang.png filter=lfs diff=lfs merge=lfs -text +assets/characters/john-stuart-mill.png filter=lfs diff=lfs merge=lfs -text +assets/characters/karl-marx.png filter=lfs diff=lfs merge=lfs -text +assets/characters/marcus-aurelius.png filter=lfs diff=lfs merge=lfs -text +assets/characters/niccolo-machiavelli.png filter=lfs diff=lfs merge=lfs -text +assets/characters/sources/cleopatra-vii-chroma.png filter=lfs diff=lfs merge=lfs -text +assets/characters/sources/confucius-chroma.png filter=lfs diff=lfs merge=lfs -text +assets/characters/sources/jensen-huang-chroma.png filter=lfs diff=lfs merge=lfs -text +assets/characters/sources/john-stuart-mill-chroma.png filter=lfs diff=lfs merge=lfs -text +assets/characters/sources/karl-marx-chroma.png filter=lfs diff=lfs merge=lfs -text +assets/characters/sources/marcus-aurelius-chroma.png filter=lfs diff=lfs merge=lfs -text +assets/characters/sources/niccolo-machiavelli-chroma.png filter=lfs diff=lfs merge=lfs -text +assets/courtroom-dickinson.jpg filter=lfs diff=lfs merge=lfs -text +assets/foreground/foregroundFence.png filter=lfs diff=lfs merge=lfs -text +assets/foreground/JudgeTable.png filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..8d897e7177c0abe5ba70cdbf1ff3376f0cb26858 --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +.env +.env.* +!.env.example +__pycache__/ +*.py[cod] +.venv/ +venv/ +.modal.toml +.cache/ +artifacts/ diff --git a/README.md b/README.md index ba9c9af7249dfff1d20475322e64d16527feaa7f..e1c3c9b74ce1df0e65006407afd3ebe4ef5568fd 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,105 @@ --- -title: JudgeGPT -emoji: 🏢 +title: Judge-GPT +emoji: ⚖️ colorFrom: yellow colorTo: red sdk: gradio -sdk_version: 6.18.0 -python_version: '3.13' +sdk_version: 6.17.3 app_file: app.py pinned: false +license: mit short_description: AI-native miniature trials under 32B. --- -Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference +# Judge-GPT + +Judge-GPT is a cinematic Gradio Space for the Build Small Hackathon's Thousand Token Wood track. It runs two-minute AI-native miniature trials where small-model agents act as advocates, judge, jurors, clerk, and evidence auditor. + +The app is built to stay under the 32B named-model budget: + +- `openai/gpt-oss-20b` for primary legal reasoning. +- `openbmb/AgentCPM-Explore` for clerk/stage/verdict style. +- `nvidia/Nemotron-Orchestrator-8B` for juror and evidence-auditor review. + +Total named budget: 32B parameters. + +## What the app can do + +- Run cached trials for the Socrates and Barnaby demo cases without network search. +- Run the Live Search Tribunal path, which builds a search packet from a user query and stops if live material is too weak to support a trial. +- Add a hypothetical sidebar to shift the framing of a trial without editing cached case files. +- Switch trial pacing between swift, measured, and ceremonial speeds. +- Stage the courtroom with phase-specific visuals, agent puppets, evidence props, captions, and browser audio cues. +- Show the Mind Layer as a compact JSON trace of agent turns and phase metadata. +- Call a Modal streaming endpoint when `MODAL_TRIAL_URL` is configured. Endpoint or model failures stop the trial instead of substituting cached dialogue. +- Retain decree and agent-trace export helpers in `sovereign_bench/export.py` for future UI restoration. + +## Limitations + +- Judge-GPT is not legal advice and should not be used for real legal decisions. +- Live search snippets are not independently verified by the app. +- Output quality depends on Modal GPU availability, token limits, and the configured Hugging Face models. +- Model, Modal, or live retrieval failures stop the current trial rather than returning substitute courtroom dialogue. +- Trial results are not persisted across sessions. +- Export generation remains in the codebase, but the visible download UI is currently hidden. + +## Run locally + +```powershell +python -m pip install -r requirements.txt +python app.py +``` + +## Modal backend + +The Gradio app works locally without Modal. If `MODAL_TRIAL_URL` is set, the Space calls the Modal streaming endpoint and stops the trial if the endpoint is unavailable. + +The deployed Modal endpoint runs each role prompt through a GPU-backed vLLM class on H100 by default. Traces mark successful GPU calls with `runtime: modal-gpu-vllm`, `provider: modal-gpu-vllm`, and `gpu: H100`. If a GPU/model load fails, the trial stops; the app does not substitute provider or cached dialogue. + +```powershell +python -m modal deploy modal_app.py +``` + +Keep the deployed endpoint URL as a Hugging Face Space variable named `MODAL_TRIAL_URL`. + +## Project targets + +Workspace connected to: + +- GitHub: `https://github.com/aliiqbal24/BuildSmallfinal.git` +- Modal profile: `ali-j-iqbal24` +- Hugging Face user: `AliIqbal05` + +## Secrets + +Credentials are not committed to this repo. + +- Local Hugging Face CLI auth is stored in the Hugging Face cache. +- Modal auth is stored in the local Modal profile. +- Modal has a secret named `huggingface` with `HF_TOKEN`. + +Use the Modal secret in functions like this: + +```python +@app.function(secrets=[modal.Secret.from_name("huggingface")]) +def run_model(): + token = os.getenv("HF_TOKEN") +``` + +## Developer guide + +- `app.py`: Gradio UI, CSS, JavaScript audio hooks, HTML renderers, and Modal/local streaming switch. +- `sovereign_bench/engine.py`: trial phases, agent orchestration, verdict assembly, and trace construction. +- `sovereign_bench/llm.py`: Hugging Face calls, strict model error handling, and prompt building. +- `sovereign_bench/retrieval.py`: live search packet construction. +- `sovereign_bench/models.py`: Pydantic schemas for cases, evidence, events, turns, votes, and verdicts. +- `sovereign_bench/cases.py`: cached demo case packets. +- `sovereign_bench/export.py`: dormant decree and trace writers. +- `modal_app.py`: Modal deployment and GPU-backed streaming endpoint. +- `tests/`: engine, case, and rendering regression coverage. + +## Verify Modal to Hugging Face + +```powershell +python -m modal run modal_app.py +``` diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..20db29c677a092889517e1c725f1cdfe42cabebb --- /dev/null +++ b/app.py @@ -0,0 +1,2102 @@ +from __future__ import annotations + +import json +import os +from collections.abc import Iterable + +import gradio as gr +import httpx + +from sovereign_bench.engine import JUDGE_NAME, JUROR_PERSONAS, stream_trial +from sovereign_bench.models import TrialEvent, TrialRequest + + +def _load_env_file() -> None: + path = ".env" + if not os.path.exists(path): + return + with open(path, encoding="utf-8") as handle: + for line in handle: + stripped = line.strip() + if not stripped or stripped.startswith("#") or "=" not in stripped: + continue + key, value = stripped.split("=", 1) + os.environ.setdefault(key.strip(), value.strip().strip('"').strip("'")) + + +_load_env_file() + +CASE_OPTIONS = { + "Trial of Socrates": "socrates", + "The People v. Barnaby Buttons": "barnaby", + "Live Search Tribunal": "live", +} + +PHASE_GLYPHS = { + "pretrial": "00", + "intake": "01", + "claims": "02", + "opening": "03", + "evidence": "04", + "questions": "05", + "deliberation": "06", + "verdict": "07", + "appeal": "08", +} + +AUDIO_PATHS = { + "score": "/gradio_api/file=assets/audio/courtroom.ogg", + "judgement": "/gradio_api/file=assets/audio/Judgement.ogg", + "crowd": "/gradio_api/file=assets/audio/crowd_shouting.ogg", + "gavel": "/gradio_api/file=assets/audio/wood_hammer_01.ogg", + "wood": "/gradio_api/file=assets/audio/wood_hit_03.ogg", + "steps": "/gradio_api/file=assets/audio/steps_in_wood_floor.wav", + "paper": "/gradio_api/file=assets/audio/paper_sound_1.mp3", + "paper_long": "/gradio_api/file=assets/audio/paper_sound_4.mp3", + "select": "/gradio_api/file=assets/audio/select_001.ogg", +} + +CSS = """ +:root { + --ink: #23170e; + --paper: #f3dfb7; + --paper-dark: #c79455; + --gold: #d9b060; + --mahogany: #4b2119; + --shadow: rgba(10, 5, 2, .6); + --red: #8f2e2d; + --green: #2f6f5e; + --blue: #254f7a; +} + +body, +.gradio-container { + margin: 0; + background: #141413 !important; + background-color: #141413 !important; + color: var(--ink); + font-family: Georgia, "Times New Roman", serif; +} + +.gradio-container { + max-width: none !important; + padding: 0 !important; +} + +.main, +.contain { + max-width: none !important; + padding: 0 !important; + background: transparent !important; +} + +.gradio-container main, +.gradio-container .wrap, +.gradio-container .app, +.gradio-container .html-container { + background: transparent !important; + padding-left: 0 !important; + padding-right: 0 !important; +} + +.docket-book-controls { + position: fixed; + left: 50%; + top: clamp(172px, 21vh, 212px); + z-index: 9999; + width: min(620px, calc(100vw - 160px)); + max-width: none; + margin: 0; + padding: 0; + transform: translateX(-50%) rotate(-1deg); + border: 0 !important; + border-radius: 0 !important; + background: transparent !important; + box-shadow: none !important; + color: #321d10; + transition: opacity .32s ease, transform .65s ease; +} + +body.trial-has-started .docket-book-controls { + opacity: 0; + pointer-events: none; + transform: translateX(-50%) rotateX(56deg) rotate(-1deg) scale(.45); +} + +.docket-book-controls::before { + content: none; +} + +.docket-book-controls, +.docket-book-controls > *, +.docket-book-controls .form, +.docket-book-controls .block, +.docket-book-controls .gap, +.docket-book-controls .wrap { + background: transparent !important; + border: 0 !important; + box-shadow: none !important; +} + +.docket-book-controls .docket-book-controls { + position: static !important; + left: auto !important; + top: auto !important; + width: 100% !important; + transform: none !important; + opacity: 1; + pointer-events: auto; +} + +body.trial-has-started .docket-book-controls .docket-book-controls { + pointer-events: none; +} + +.book-control-heading { + margin: 0 0 6px; + color: #694019; + font: 900 12px/1 ui-monospace, SFMono-Regular, Consolas, monospace; + letter-spacing: .08em; + text-transform: uppercase; +} + +.docket-book-controls label, +.docket-book-controls span, +.docket-book-controls .prose { + color: #321d10 !important; +} + +.docket-book-controls label { + font-size: 11px !important; + font-weight: 800 !important; +} + +.docket-book-controls input, +.docket-book-controls textarea, +.docket-book-controls [role="combobox"], +.docket-book-controls .wrap-inner { + border-color: rgba(90, 50, 20, .24) !important; + border-radius: 4px !important; + background: rgba(255, 243, 207, .58) !important; + color: #241509 !important; + box-shadow: inset 0 1px 0 rgba(255,255,255,.24) !important; +} + +.docket-book-controls textarea { + min-height: 42px !important; +} + +.docket-book-controls button.primary { + min-height: 42px; + border: 1px solid rgba(44, 21, 10, .42) !important; + border-radius: 5px !important; + background: #1c130d !important; + color: #fff3d2 !important; + box-shadow: inset 0 1px 0 rgba(255,255,255,.12), 0 8px 18px rgba(40, 18, 9, .28); +} + +.docket-book-controls .book-status p { + margin: 0 !important; + color: #5a3519 !important; + font-size: 12px; + line-height: 1.25; +} + +.trial-options { + max-width: 1120px; + margin: 0 auto 14px; + border: 1px solid rgba(255, 226, 154, .18); + border-radius: 6px; + background: rgba(18, 9, 5, .78); + color: #f5dfb5; +} + +.trial-options label, +.trial-options span, +.trial-options .prose { + color: #f5dfb5 !important; +} + +.court-episode-stage { + --spot-x: 50%; + --spot-y: 36%; + position: relative; + min-height: min(880px, calc(100vh - 112px)); + height: min(880px, calc(100vh - 112px)); + margin: 0; + width: 100%; + max-width: none; + overflow: hidden; + isolation: auto; + color: #fff0d2; + border: 0; + border-radius: 0; + background: transparent; + box-shadow: none; +} + +.court-episode-stage::before { + content: ""; + display: none; +} + +.court-episode-stage::after { + content: ""; + display: none; +} + +.court-episode-stage > * { + position: relative; + z-index: 4; +} + +.episode-room { + position: absolute; + inset: 0; + z-index: 3; + background: + url('/gradio_api/file=assets/background/CourtRoom.png') center center / 100% 100% no-repeat, + #26120b; + filter: none; + transform: none; +} + +.trial-started .episode-room, +.phase-intake .episode-room, +.phase-claims .episode-room, +.phase-opening .episode-room, +.phase-evidence .episode-room, +.phase-questions .episode-room, +.phase-deliberation .episode-room, +.phase-verdict .episode-room, +.phase-appeal .episode-room { + filter: none; + transform: none; +} + +.phase-intake, +.phase-appeal { --spot-x: 50%; --spot-y: 30%; } +.phase-claims, +.phase-opening { --spot-x: 43%; --spot-y: 66%; } +.phase-evidence { --spot-x: 70%; --spot-y: 56%; } +.phase-questions, +.phase-verdict { --spot-x: 50%; --spot-y: 34%; } +.phase-deliberation { --spot-x: 79%; --spot-y: 60%; } + +.episode-title { + position: absolute; + left: 26px; + top: 22px; + z-index: 9; + max-width: min(780px, calc(100% - 330px)); + text-shadow: 0 3px 18px rgba(0, 0, 0, .75); +} + +.episode-kicker, +.prop-label, +.caption-phase, +.tooltip-meta, +.drawer-kicker { + color: #f4d58f; + font: 800 11px/1.2 ui-monospace, SFMono-Regular, Consolas, monospace; + letter-spacing: .06em; + text-transform: uppercase; +} + +.episode-title h1 { + margin: 4px 0 6px; + max-width: 780px; + color: #fff4d7; + font-size: clamp(28px, 4.2vw, 58px); + line-height: .98; + letter-spacing: 0; +} + +.episode-title p { + margin: 0; + max-width: 720px; + color: #f8dcaa; + font-size: 15px; + line-height: 1.38; +} + +.audio-deck { + display: none; +} + +.sound-toggle { + position: fixed; + left: 18px; + bottom: 18px; + z-index: 80; + width: 46px; + height: 46px; + border: 1px solid rgba(255, 226, 154, .48); + border-radius: 50%; + background: rgba(22, 11, 7, .82); + box-shadow: 0 12px 28px rgba(0, 0, 0, .42), inset 0 1px 0 rgba(255, 255, 255, .12); + cursor: pointer; +} + +.sound-toggle:hover, +.sound-toggle:focus-visible { + outline: none; + border-color: rgba(255, 226, 154, .82); + background: rgba(41, 20, 12, .92); +} + +.sound-toggle .sound-icon { + position: absolute; + left: 13px; + top: 15px; + width: 10px; + height: 16px; + border-radius: 2px 0 0 2px; + background: #ffe5a6; +} + +.sound-toggle .sound-icon::before { + content: ""; + position: absolute; + left: 7px; + top: -3px; + width: 14px; + height: 22px; + border: 3px solid #ffe5a6; + border-left: 0; + border-radius: 0 18px 18px 0; +} + +.sound-toggle .sound-icon::after { + content: ""; + position: absolute; + left: 20px; + top: -9px; + width: 3px; + height: 34px; + border-radius: 4px; + background: #d64d45; + opacity: 0; + transform: rotate(42deg); + transform-origin: center; +} + +.sound-toggle.muted .sound-icon::after { + opacity: 1; +} + +.episode-book { + position: absolute; + left: 50%; + top: 12%; + z-index: 12; + width: min(760px, calc(100% - 32px)); + aspect-ratio: 3 / 2; + transform: translateX(-50%) rotateX(0) rotateZ(-1deg); + transform-origin: center bottom; + color: #2b1b10; + filter: drop-shadow(0 34px 36px rgba(0, 0, 0, .48)); + pointer-events: none; + transition: top .85s ease, width .85s ease, transform .85s ease, filter .85s ease, opacity .85s ease; +} + +.book-art { + position: absolute; + inset: 0; + width: 100%; + height: 100%; + object-fit: contain; + pointer-events: none; + user-select: none; + transition: opacity .36s ease; +} + +.book-art.closed-art { + opacity: 0; +} + +.episode-book.closed { + top: 36%; + width: min(245px, 30vw); + transform: translateX(-50%) rotateX(56deg) rotateZ(1deg); + opacity: .92; + filter: drop-shadow(0 18px 18px rgba(0, 0, 0, .45)); +} + +.episode-book.closed .open-art { + opacity: 0; +} + +.episode-book.closed .closed-art { + opacity: 1; +} + +.episode-book.closed .book-open-content { + opacity: 0; + pointer-events: none; +} + +.book-open-content { + position: absolute; + inset: 17% 10% 13%; + z-index: 2; + display: grid; + grid-template-columns: 1fr 1fr; + gap: 72px; + padding: 0 28px; + transition: opacity .35s ease; +} + +.book-open-content h2 { + margin: 0 0 10px; + color: #4c2a12; + font-size: 30px; + letter-spacing: 0; +} + +.book-open-content p, +.book-entry { + color: #3c2615; + font-size: 15px; + line-height: 1.34; +} + +.book-entry { + margin: 11px 0; + padding-left: 12px; + border-left: 3px solid rgba(111, 61, 23, .36); +} + +.judge-dais { + position: absolute; + left: 50%; + top: 27%; + z-index: 6; + width: min(360px, 32vw); + min-width: 230px; + transform: translateX(-50%); + text-align: center; +} + +.bench-front { + display: none; +} + +.gavel { + display: none; +} + +.gavel::before { + content: ""; + display: none; +} + +.phase-verdict .gavel { + animation: gavel-hit .55s ease-out both; +} + +.counsel-table { + position: absolute; + bottom: 19%; + z-index: 6; + width: min(255px, 22vw); + height: 84px; + border: 0; + background: transparent; + box-shadow: none; +} + +.counsel-table.left { left: 17%; } +.counsel-table.right { right: 17%; } + +.trial-floor-mark { + display: none; +} + +.witness-area { + position: absolute; + right: 8.5%; + bottom: 28%; + z-index: 6; + width: min(190px, 18vw); + height: 98px; + border: 0; + background: transparent; + box-shadow: none; +} + +.jury-benches { + position: absolute; + top: 43%; + z-index: 7; + width: min(220px, 16vw); + min-width: 150px; + display: grid; + gap: 6px; +} + +.jury-benches.left { + left: 4.5%; +} + +.jury-benches.right { + right: 4.5%; +} + +.jury-benches.left .jury-row { + transform: rotate(-7deg) skewY(-3deg); +} + +.jury-benches.right .jury-row { + transform: rotate(7deg) skewY(3deg); +} + +.jury-rail { + display: none; +} + +.jury-row { + display: grid; + grid-template-columns: repeat(3, 1fr); + gap: 8px; + align-items: end; +} + +.gallery-benches { + display: none; +} + +.gallery-benches div { + display: none; +} + +.prop-label { + display: none; +} + +.foreground-props { + position: absolute; + inset: 0; + z-index: 13; + pointer-events: none; +} + +.foreground-fence, +.judge-table-foreground { + position: absolute; + display: block; + max-width: none; + height: auto; + filter: none; + opacity: 1; + pointer-events: none; + user-select: none; +} + +.foreground-fence { + bottom: -1.5%; + width: 47%; +} + +.foreground-fence.fence-left { + left: 0; + transform: translateX(-2%); +} + +.foreground-fence.fence-right { + right: 0; + transform: translateX(2%); +} + +.judge-table-foreground { + left: 50%; + top: 35%; + z-index: 1; + width: 46%; + transform: translateX(-50%); +} + +.puppet { + --skin: #c99257; + --robe: #282128; + --accent: #8a2f2f; + --portrait-width: 74px; + --portrait-top: -14px; + position: absolute; + z-index: 8; + width: 72px; + height: 128px; + transform: translate(-50%, -100%); + transform-origin: center bottom; + filter: none; + color: inherit; + text-decoration: none; +} + +.puppet.small { + width: 50px; + height: 94px; + --portrait-width: 54px; + --portrait-top: -8px; +} + +.puppet.active { + animation: puppet-breathe 1.45s ease-in-out infinite; +} + +.puppet.walking { + animation: lawyer-walk 1.9s ease-in-out infinite; +} + +.puppet.judge { + left: 50%; + top: 31%; + --skin: #c38a55; + --robe: #1b1b20; + --accent: #79242a; + --portrait-width: 96px; + --portrait-top: -28px; +} + +.puppet.clerk { + left: 43%; + top: 41%; + --skin: #b77b52; + --robe: #365548; + --accent: #2f6f5e; +} + +.puppet.auric { + left: 24%; + top: 62%; + --skin: #c9975d; + --robe: #5b2719; + --accent: #a45c25; +} + +.speaker-auric .puppet.auric { + left: 43%; + top: 66%; +} + +.puppet.sable { + left: 75%; + top: 62%; + --skin: #a86d4a; + --robe: #1d3045; + --accent: #254f7a; +} + +.speaker-sable .puppet.sable { + left: 57%; + top: 66%; +} + +.puppet.auditor { + left: 71%; + top: 55%; + --skin: #c6a65b; + --robe: #4b3d1b; + --accent: #8d6b1f; +} + +.puppet-portrait { + position: absolute; + left: 50%; + top: var(--portrait-top); + z-index: 3; + width: var(--portrait-width); + height: auto; + max-height: 118px; + transform: translateX(-50%); + object-fit: contain; + pointer-events: none; +} + +.phase-evidence .puppet.auditor { + animation: evidence-focus 1.35s ease-in-out infinite; +} + +.puppet::before { + content: ""; + position: absolute; + left: 50%; + top: 0; + width: 44px; + height: 44px; + transform: translateX(-50%); + border: 2px solid rgba(255, 232, 174, .58); + border-radius: 50%; + background: + radial-gradient(circle at 34% 32%, rgba(255,255,255,.38), transparent 22%), + radial-gradient(circle at 36% 42%, #1b120c 0 2px, transparent 2.5px), + radial-gradient(circle at 62% 42%, #1b120c 0 2px, transparent 2.5px), + linear-gradient(180deg, var(--skin), #8b5638); +} + +.puppet::after { + content: ""; + position: absolute; + left: 50%; + top: 48px; + width: 58px; + height: 70px; + transform: translateX(-50%); + border: 1px solid rgba(255, 232, 174, .22); + border-radius: 24px 24px 8px 8px; + background: + linear-gradient(90deg, transparent 46%, rgba(255, 226, 154, .14) 49%, transparent 52%), + linear-gradient(180deg, var(--accent), var(--robe) 52%, #130a07); +} + +.puppet .mouth { + position: absolute; + left: 50%; + top: 27px; + z-index: 2; + width: 15px; + height: 7px; + transform: translateX(-50%); + border-bottom: 2px solid #28150c; + border-radius: 0 0 18px 18px; +} + +.puppet.active .mouth, +.puppet.walking .mouth { + animation: speak-mouth .5s ease-in-out infinite; +} + +.speech-bubble { + position: absolute; + left: 50%; + bottom: calc(100% + 12px); + z-index: 18; + width: 260px; + max-width: min(320px, calc(100vw - 32px)); + transform: translateX(-50%); + padding: 10px 12px; + border: 1px solid rgba(255, 226, 154, .48); + border-radius: 6px; + background: rgba(255, 244, 215, .94); + color: #2d1b0d; + box-shadow: 0 14px 30px rgba(0, 0, 0, .34); + font-size: 12px; + font-weight: 700; + line-height: 1.3; + pointer-events: none; +} + +.speech-bubble::after { + content: ""; + position: absolute; + left: 50%; + bottom: -8px; + width: 14px; + height: 14px; + transform: translateX(-50%) rotate(45deg); + border-right: 1px solid rgba(255, 226, 154, .48); + border-bottom: 1px solid rgba(255, 226, 154, .48); + background: rgba(255, 244, 215, .94); +} + +.tooltip { + position: absolute; + left: 50%; + bottom: calc(100% + 10px); + z-index: 20; + width: 320px; + max-width: min(360px, calc(100vw - 32px)); + transform: translateX(-50%) translateY(6px); + opacity: 0; + pointer-events: none; + padding: 8px 10px; + border: 1px solid rgba(255, 226, 154, .34); + border-radius: 5px; + background: rgba(17, 9, 5, .88); + color: #fff0d2; + box-shadow: 0 12px 24px rgba(0,0,0,.36); + transition: opacity .18s ease, transform .18s ease; +} + +.puppet:hover .tooltip, +.puppet:focus-within .tooltip, +.juror:hover .tooltip, +.juror:focus-within .tooltip { + opacity: 1; + transform: translateX(-50%) translateY(0); +} + +.tooltip strong { + display: block; + color: #fff6df; + font-size: 13px; +} + +.tooltip p { + margin: 6px 0 0; + color: #f5dfb5; + font-size: 11px; + line-height: 1.28; + white-space: normal; +} + +.tooltip-meta { + margin-top: 3px; + color: #f4d58f; + font-size: 10px; +} + +.tooltip-io-label, +.thread-label { + display: block; + margin-top: 7px; + color: #f4d58f; + font: 800 10px/1.2 ui-monospace, SFMono-Regular, Consolas, monospace; + text-transform: uppercase; +} + +.ai-thread-modal { + display: none; + position: fixed; + inset: max(18px, 4vh) max(18px, 5vw); + z-index: 120; + overflow: auto; + padding: 20px; + border: 1px solid rgba(255, 226, 154, .42); + border-radius: 8px; + background: rgba(18, 9, 5, .96); + color: #fff0d2; + box-shadow: 0 24px 70px rgba(0, 0, 0, .58); +} + +.ai-thread-modal:target { + display: block; +} + +.thread-close { + position: sticky; + top: 0; + float: right; + padding: 7px 10px; + border: 1px solid rgba(255, 226, 154, .38); + border-radius: 4px; + background: rgba(255, 226, 154, .12); + color: #fff0d2; + text-decoration: none; + font: 800 11px/1 ui-monospace, SFMono-Regular, Consolas, monospace; +} + +.thread-title { + margin: 0 0 4px; + color: #fff6df; + font-size: 22px; +} + +.thread-subtitle { + margin: 0 0 16px; + color: #f4d58f; + font: 800 12px/1.3 ui-monospace, SFMono-Regular, Consolas, monospace; + text-transform: uppercase; +} + +.thread-turn { + margin: 0 0 18px; + padding-bottom: 16px; + border-bottom: 1px solid rgba(255, 226, 154, .16); +} + +.thread-turn:last-child { + border-bottom: 0; +} + +.thread-block { + margin: 7px 0 0; + white-space: pre-wrap; + color: #f8dfaa; + font: 12px/1.42 ui-monospace, SFMono-Regular, Consolas, monospace; +} + +.juror { + --face: #c89259; + --juror-image: none; + position: relative; + height: 72px; + transform-origin: center bottom; + filter: none; + color: inherit; + text-decoration: none; +} + +.juror.active { + animation: juror-react .82s ease-in-out infinite alternate; +} + +.juror .speech-bubble { + bottom: calc(100% + 6px); + width: 230px; +} + +.juror-face { + position: absolute; + left: 50%; + top: 0; + width: 30px; + height: 30px; + transform: translateX(-50%); + border: 2px solid rgba(255, 232, 174, .5); + border-radius: 50%; + background: + radial-gradient(circle at 35% 40%, #1d1109 0 2px, transparent 2.5px), + radial-gradient(circle at 64% 40%, #1d1109 0 2px, transparent 2.5px), + linear-gradient(180deg, var(--face), #835235); +} + +.juror-face::after { + content: ""; + position: absolute; + left: 10px; + bottom: 9px; + width: 14px; + height: 7px; + border-bottom: 2px solid #25140c; + border-radius: 0 0 18px 18px; +} + +.juror-portrait { + position: absolute; + left: 50%; + top: -19px; + z-index: 3; + width: 58px; + height: 74px; + transform: translateX(-50%); + object-fit: contain; + pointer-events: none; +} + +.juror-body { + position: absolute; + left: 50%; + top: 32px; + width: 36px; + height: 36px; + transform: translateX(-50%); + border-radius: 20px 20px 7px 7px; + border: 1px solid rgba(255, 232, 174, .18); + background: linear-gradient(180deg, #5b496f, #211726); +} + +.phase-deliberation .juror:nth-child(odd) { + animation-delay: .18s; +} + +.evidence-props { + position: absolute; + left: 55%; + right: 11%; + bottom: 36%; + z-index: 9; + display: flex; + flex-wrap: wrap; + gap: 8px; + justify-content: center; + pointer-events: auto; +} + +.evidence-sheet { + width: 96px; + min-height: 72px; + padding: 8px; + transform: rotate(var(--tilt)); + border: 1px solid rgba(56, 32, 15, .22); + border-radius: 3px; + background: + linear-gradient(135deg, transparent 0 84%, rgba(64, 38, 20, .18) 85%), + #fff6df; + color: #372212; + box-shadow: 0 10px 20px rgba(0,0,0,.28); + opacity: .18; + transition: transform .25s ease, opacity .25s ease; +} + +.phase-evidence .evidence-sheet, +.phase-questions .evidence-sheet, +.phase-deliberation .evidence-sheet, +.phase-verdict .evidence-sheet, +.phase-appeal .evidence-sheet { + opacity: .96; + animation: paper-land .55s ease-out both; +} + +.evidence-sheet:hover { + transform: rotate(0) translateY(-8px) scale(1.08); + z-index: 15; +} + +.evidence-sheet strong { + display: block; + margin-bottom: 4px; + color: #254f7a; + font: 800 12px/1 ui-monospace, SFMono-Regular, Consolas, monospace; +} + +.evidence-sheet span { + display: block; + font-size: 11px; + line-height: 1.2; +} + +.trial-caption { + position: absolute; + left: 50%; + bottom: 108px; + z-index: 14; + width: min(870px, calc(100% - 44px)); + transform: translateX(-50%); + padding: 12px 16px 13px; + border: 1px solid rgba(255, 226, 154, .34); + border-radius: 6px; + background: rgba(13, 7, 4, .78); + backdrop-filter: blur(12px); + box-shadow: 0 18px 36px rgba(0,0,0,.38); +} + +.caption-title { + margin-top: 3px; + color: #fff3d7; + font-size: 20px; + font-weight: 800; +} + +.caption-body { + margin-top: 5px; + color: #f8dfaa; + font-size: 14px; + line-height: 1.36; + white-space: pre-wrap; +} + +.decree-ribbon { + position: absolute; + right: 26px; + top: 22px; + z-index: 10; + max-width: 230px; + padding: 9px 11px; + border: 1px solid rgba(255, 226, 154, .26); + border-radius: 5px; + background: rgba(18, 9, 5, .68); + color: #ffe6ae; + font: 800 11px/1.35 ui-monospace, SFMono-Regular, Consolas, monospace; + text-transform: uppercase; +} + +.phase-verdict .judge-dais, +.phase-questions .judge-dais { + animation: bench-lean .9s ease-in-out infinite alternate; +} + +.phase-deliberation .jury-benches { + animation: jury-murmur .7s ease-in-out infinite alternate; +} + +.stage-prop-link { + cursor: help; +} + +.drawer-shell { + max-width: 1500px; + margin: 12px auto 0; +} + +.drawer-grid { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(240px, 1fr)); + gap: 18px 28px; +} + +.drawer-text-stack { + color: var(--ink); + line-height: 1.42; +} + +.drawer-text-block { + color: var(--ink); +} + +.drawer-text-block h4 { + margin: 5px 0 7px; +} + +.drawer-text-block p, +.drawer-empty { + margin: 0 0 8px; + line-height: 1.38; + white-space: pre-wrap; +} + +.vote-liable { color: var(--red); font-weight: 800; } +.vote-not_liable { color: var(--green); font-weight: 800; } +.vote-uncertain { color: var(--blue); font-weight: 800; } + +.mind-text { + max-height: 340px; + overflow: auto; + color: var(--ink); + font: 12px/1.42 ui-monospace, SFMono-Regular, Consolas, monospace; + white-space: pre-wrap; +} + +@keyframes puppet-breathe { + 0%, 100% { transform: translate(-50%, -100%) translateY(0); } + 50% { transform: translate(-50%, -100%) translateY(-4px); } +} + +@keyframes lawyer-walk { + 0%, 100% { transform: translate(-50%, -100%) translateY(0) rotate(-1deg); } + 25% { transform: translate(-50%, -100%) translateY(-7px) rotate(2deg); } + 50% { transform: translate(-50%, -100%) translateY(0) rotate(1deg); } + 75% { transform: translate(-50%, -100%) translateY(-5px) rotate(-2deg); } +} + +@keyframes speak-mouth { + 0%, 100% { height: 5px; border-radius: 0 0 18px 18px; } + 50% { height: 10px; border-radius: 50%; border: 2px solid #28150c; } +} + +@keyframes juror-react { + from { transform: translateY(0) rotate(-1deg); } + to { transform: translateY(-5px) rotate(2deg); } +} + +@keyframes evidence-focus { + 0%, 100% { transform: translate(-50%, -100%) translateY(0) scale(1); } + 50% { transform: translate(-50%, -100%) translateY(-6px) scale(1.035); } +} + +@keyframes paper-land { + from { transform: rotate(var(--tilt)) translateY(-18px); opacity: 0; } + to { transform: rotate(var(--tilt)) translateY(0); opacity: .96; } +} + +@keyframes bench-lean { + from { transform: translateX(-50%) translateY(0); } + to { transform: translateX(-50%) translateY(-5px); } +} + +@keyframes jury-murmur { + from { transform: translateX(0); } + to { transform: translateX(-3px); } +} + +@keyframes gavel-hit { + 0% { transform: rotate(-18deg) translateY(0); } + 45% { transform: rotate(21deg) translateY(18px); } + 100% { transform: rotate(-18deg) translateY(0); } +} + +@media (max-width: 820px) { + .docket-book-controls { + position: fixed; + top: 262px; + width: calc(100vw - 52px); + transform: translateX(-50%) rotate(-1deg); + } + + .court-episode-stage { + height: 1280px; + min-height: 1280px; + } + + .episode-room { + background-position: center top; + } + + .episode-title { + left: 16px; + right: 16px; + max-width: none; + } + + .decree-ribbon { + top: 164px; + left: 16px; + right: auto; + max-width: calc(100% - 32px); + } + + .episode-book { + top: 220px; + width: min(680px, calc(100% - 20px)); + } + + .episode-book.closed { + top: 430px; + width: 210px; + } + + .book-open-content { + grid-template-columns: 1fr; + gap: 10px; + inset: 17% 12% 14%; + padding: 0 18px; + } + + .book-open-content h2 { + font-size: 22px; + margin-bottom: 5px; + } + + .book-open-content p, + .book-entry { + font-size: 12px; + line-height: 1.22; + } + + .book-entry { + margin: 5px 0; + } + + .judge-dais { + top: 390px; + width: 280px; + } + + .counsel-table.left { + left: 7%; + bottom: 470px; + } + + .counsel-table.right { + right: 7%; + bottom: 470px; + } + + .counsel-table { + width: 154px; + } + + .puppet.auric { + left: 20%; + top: 650px; + } + + .puppet.sable { + left: 80%; + top: 650px; + } + + .speaker-auric .puppet.auric { + left: 42%; + top: 730px; + } + + .speaker-sable .puppet.sable { + left: 58%; + top: 730px; + } + + .puppet.clerk { + left: 35%; + top: 560px; + } + + .puppet.auditor { + left: 78%; + top: 540px; + } + + .witness-area { + right: 5%; + bottom: 580px; + width: 138px; + } + + .jury-benches { + top: 520px; + width: 126px; + min-width: 126px; + } + + .jury-benches.left { + left: 5%; + } + + .jury-benches.right { + right: 5%; + } + + .foreground-fence { + bottom: -2px; + width: 64%; + } + + .foreground-fence.fence-left { + left: -17%; + } + + .foreground-fence.fence-right { + right: -17%; + } + + .judge-table-foreground { + top: 405px; + width: 760px; + } + + .evidence-props { + left: 8%; + right: 8%; + bottom: 410px; + } + + .trial-caption { + bottom: 105px; + } + + .gallery-benches { + bottom: 42px; + grid-template-columns: repeat(3, 1fr); + } +} +""" + +APP_JS = f""" +() => {{ + const paths = {json.dumps(AUDIO_PATHS)}; + const SCORE_BASE_VOLUME = 0.16; + const SCORE_QUIET_VOLUME = 0.035; + const SCORE_BREATH_INTERVAL_MS = 20000; + const SCORE_BREATH_DURATION_MS = 5000; + const make = (name, volume = 1, loop = false) => {{ + const audio = new Audio(paths[name]); + audio.preload = 'auto'; + audio.volume = volume; + audio.loop = loop; + return audio; + }}; + + if (!window.SovereignCourtAudio) {{ + const controller = {{ + unlocked: false, + lastPhase: null, + muted: false, + scoreVolume: SCORE_BASE_VOLUME, + crowdVolume: 0.0, + fadeFrame: null, + breathTimer: null, + score: make('score', SCORE_BASE_VOLUME, true), + crowd: make('crowd', 0.0, true), + begin() {{ + this.unlocked = true; + this.ensureLooping(); + this.startBreathing(); + this.play('select', 0.26); + window.setTimeout(() => this.play('paper_long', 0.45), 120); + window.setTimeout(() => this.play('gavel', 0.72), 520); + this.observePhase(); + this.updateToggle(); + }}, + ensureLooping() {{ + if (!this.unlocked || this.muted) return; + this.applyLoopVolumes(); + this.score.play().catch(() => {{}}); + this.crowd.play().catch(() => {{}}); + }}, + applyLoopVolumes() {{ + this.score.volume = this.muted ? 0 : this.scoreVolume; + this.crowd.volume = this.muted ? 0 : this.crowdVolume; + }}, + play(name, volume = 1) {{ + if (!this.unlocked || this.muted) return; + const cue = make(name, volume, false); + cue.play().catch(() => {{}}); + }}, + setCrowd(volume) {{ + this.crowdVolume = volume; + this.applyLoopVolumes(); + }}, + fadeScore(toVolume, duration, onComplete) {{ + if (this.fadeFrame) window.cancelAnimationFrame(this.fadeFrame); + const fromVolume = this.scoreVolume; + const started = window.performance.now(); + const step = (now) => {{ + const progress = Math.min(1, (now - started) / duration); + this.scoreVolume = fromVolume + ((toVolume - fromVolume) * progress); + this.applyLoopVolumes(); + if (progress < 1) {{ + this.fadeFrame = window.requestAnimationFrame(step); + }} else {{ + this.fadeFrame = null; + if (onComplete) onComplete(); + }} + }}; + this.fadeFrame = window.requestAnimationFrame(step); + }}, + breatheScore() {{ + if (!this.unlocked) return; + const halfDuration = SCORE_BREATH_DURATION_MS / 2; + this.fadeScore(SCORE_QUIET_VOLUME, halfDuration, () => {{ + this.fadeScore(SCORE_BASE_VOLUME, halfDuration); + }}); + }}, + startBreathing() {{ + if (this.breathTimer) return; + this.breathTimer = window.setInterval(() => this.breatheScore(), SCORE_BREATH_INTERVAL_MS); + }}, + toggleMuted() {{ + this.muted = !this.muted; + if (this.muted) {{ + this.applyLoopVolumes(); + this.score.pause(); + this.crowd.pause(); + }} else {{ + this.ensureLooping(); + }} + this.updateToggle(); + }}, + updateToggle() {{ + document.querySelectorAll('.sound-toggle').forEach((button) => {{ + button.classList.toggle('muted', this.muted); + button.setAttribute('aria-pressed', String(this.muted)); + button.setAttribute('title', this.muted ? 'Sound off' : 'Sound on'); + }}); + }}, + cuePhase(phase) {{ + if (!this.unlocked || !phase || phase === this.lastPhase) return; + this.lastPhase = phase; + if (phase === 'intake') {{ + this.setCrowd(0.08); + this.play('paper', 0.45); + this.play('wood', 0.42); + }} else if (phase === 'claims' || phase === 'opening') {{ + this.setCrowd(0.045); + this.play('steps', 0.33); + }} else if (phase === 'evidence') {{ + this.setCrowd(0.035); + this.play('paper_long', 0.52); + }} else if (phase === 'questions') {{ + this.setCrowd(0.02); + this.play('wood', 0.34); + }} else if (phase === 'deliberation') {{ + this.setCrowd(0.18); + }} else if (phase === 'verdict') {{ + this.setCrowd(0.0); + this.play('judgement', 0.66); + window.setTimeout(() => this.play('gavel', 0.9), 650); + }} else if (phase === 'appeal') {{ + this.setCrowd(0.035); + this.play('paper_long', 0.5); + }} + }}, + observePhase() {{ + const stage = document.querySelector('.court-episode-stage'); + if (stage) this.cuePhase(stage.dataset.phase); + this.updateToggle(); + }} + }}; + + window.SovereignCourtAudio = controller; + + const observer = new MutationObserver(() => controller.observePhase()); + observer.observe(document.body, {{ childList: true, subtree: true, attributes: true, attributeFilter: ['data-phase'] }}); + + document.addEventListener('click', (event) => {{ + const toggle = event.target.closest('.sound-toggle'); + if (toggle) {{ + event.preventDefault(); + controller.toggleMuted(); + return; + }} + if (event.target.closest('.docket-book-controls')) {{ + controller.play('select', 0.22); + }} + }}, true); + }} +}} +""" + +APP_HEAD = f""" + +""" + +START_JS = """ +(case_label, search_query, hypothetical, speed, mind_layer) => { + document.body.classList.add('trial-has-started'); + if (window.SovereignCourtAudio) { + window.SovereignCourtAudio.begin(); + } + return [case_label, search_query, hypothetical, speed, mind_layer]; +} +""" + +CHARACTERS = { + JUDGE_NAME: { + "class": "judge", + "name": JUDGE_NAME, + "role": "Stoic presiding judge", + "model": "gpt-oss-20b", + "image": "/gradio_api/file=assets/characters/marcus-aurelius.png", + }, + "Clerk Meridian": { + "class": "clerk", + "name": "Clerk Meridian", + "role": "Court clerk", + "model": "AgentCPM-Explore", + }, + "Advocate Auric": { + "class": "auric", + "name": "Advocate Auric", + "role": "Claimant advocate", + "model": "gpt-oss-20b", + }, + "Counsel Sable": { + "class": "sable", + "name": "Counsel Sable", + "role": "Respondent advocate", + "model": "gpt-oss-20b", + }, + "Auditor Prism": { + "class": "auditor", + "name": "Auditor Prism", + "role": "Evidence auditor", + "model": "Nemotron-Orchestrator-8B", + }, + "Nemotron Jury": { + "class": "jury", + "name": "Nemotron Jury", + "role": "Jury panel", + "model": "Nemotron-Orchestrator-8B", + }, +} + +JUROR_FACES = { + "Karl Marx": "#d0b79c", + "John Stuart Mill": "#c99b72", + "Confucius": "#c49a64", + "Cleopatra VII": "#b98755", + "Niccolo Machiavelli": "#b88963", + "Jensen Huang": "#b37758", +} + +JUROR_IMAGES = { + "Karl Marx": "/gradio_api/file=assets/characters/karl-marx.png", + "John Stuart Mill": "/gradio_api/file=assets/characters/john-stuart-mill.png", + "Confucius": "/gradio_api/file=assets/characters/confucius.png", + "Cleopatra VII": "/gradio_api/file=assets/characters/cleopatra-vii.png", + "Niccolo Machiavelli": "/gradio_api/file=assets/characters/niccolo-machiavelli.png", + "Jensen Huang": "/gradio_api/file=assets/characters/jensen-huang.png", +} + +PHASE_AGENTS = { + "pretrial": ["Clerk Meridian"], +} + + +def _remote_events(request: TrialRequest) -> Iterable[TrialEvent] | None: + endpoint = os.getenv("MODAL_TRIAL_URL", "").strip() + if not endpoint: + return None + + def iterator() -> Iterable[TrialEvent]: + with httpx.stream("POST", endpoint, json=request.model_dump(), timeout=900.0) as response: + response.raise_for_status() + for line in response.iter_lines(): + if line: + yield TrialEvent.model_validate_json(line) + + return iterator() + + +def get_events(request: TrialRequest) -> Iterable[TrialEvent]: + remote = _remote_events(request) + if remote is not None: + yield from remote + return + delay = {"swift": 1.4, "measured": 2.4, "ceremonial": 3.4}[request.speed] + yield from stream_trial(request, delay=delay) + + +def _escape(value: str) -> str: + return ( + value.replace("&", "&") + .replace("<", "<") + .replace(">", ">") + .replace('"', """) + ) + + +def _latest_packet_title(events: list[TrialEvent]) -> tuple[str, str]: + if not events: + return ( + "Judge-GPT", + "The gallery doors open on an AI-native courtroom. Choose a case from the docket book and begin the proceeding.", + ) + lines = events[0].body.splitlines() + title = lines[0] if lines else "Judge-GPT" + subtitle = lines[1] if len(lines) > 1 else events[0].title + return title, subtitle + + +def _active_agents_for(event: TrialEvent | None) -> set[str]: + if event is None: + return set(PHASE_AGENTS["pretrial"]) + if not event.turns: + return set() + return {event.turns[0].agent} + + +def _active_speaker_for(event: TrialEvent | None) -> str: + if event is None: + return "Clerk Meridian" + if not event.turns: + return "" + return event.turns[0].agent + + +def _speaker_class_for(speaker: str) -> str: + if not speaker: + return "" + if speaker in CHARACTERS: + return f" speaker-{CHARACTERS[speaker]['class']}" + return " speaker-" + "".join(ch.lower() if ch.isalnum() else "-" for ch in speaker).strip("-") + + +def _latest_turn_text(event: TrialEvent | None, agent: str) -> str: + if event is None: + return "" + turn = next((turn for turn in event.turns if turn.agent == agent), None) + if turn is None: + return "" + return _short_text(turn.content, 210) + + +def _thread_id(name: str) -> str: + return "ai-thread-" + "".join(ch.lower() if ch.isalnum() else "-" for ch in name).strip("-") + + +def _turns_for_agent(events: list[TrialEvent], agent: str) -> list[dict[str, str]]: + turns = [] + for event in events: + for turn in event.turns: + if turn.agent == agent: + turns.append( + { + "phase": event.phase, + "title": event.title, + "role": turn.role, + "model": turn.model, + "confidence": f"{turn.confidence:.2f}", + "input": turn.input or "Prompt unavailable for this turn.", + "output": turn.content or "No output captured yet.", + } + ) + return turns + + +def _thread_for_character(events: list[TrialEvent], agent: str) -> list[dict[str, str]]: + if agent in JUROR_FACES: + turns = _turns_for_agent(events, agent) + vote = next((vote for event in reversed(events) for vote in event.votes if vote.juror == agent), None) + if not turns: + turns = _turns_for_agent(events, "Nemotron Jury") + if vote and turns: + turns = [ + dict( + turn, + output=f"{turn['output']}\n\n{agent} persona: {vote.persona}\n{agent} vote: {vote.vote}\nReason: {vote.reason}", + ) + for turn in turns + ] + return turns + return _turns_for_agent(events, agent) + + +def _short_text(value: str, limit: int = 170) -> str: + squashed = " ".join(value.split()) + return squashed if len(squashed) <= limit else squashed[: limit - 1].rstrip() + "..." + + +def _tooltip(name: str, role: str, model: str, turns: list[dict[str, str]]) -> str: + latest = turns[-1] if turns else None + input_preview = _short_text(latest["input"] if latest else "Waiting for this model to receive its first prompt.") + output_preview = _short_text(latest["output"] if latest else "No output has been emitted yet.") + return ( + "" + f"{_escape(name)}" + f"{_escape(role)}" + f"{_escape(model)}" + "Input" + f"

{_escape(input_preview)}

" + "Output" + f"

{_escape(output_preview)}

" + "Click to open full thread" + "
" + ) + + +def _thread_modal(name: str, role: str, model: str, turns: list[dict[str, str]]) -> str: + body = ( + "".join( + "
" + f"
{_escape(turn['phase'])} / {_escape(turn['role'])} / confidence {turn['confidence']}
" + "Input" + f"
{_escape(turn['input'])}
" + "Output" + f"
{_escape(turn['output'])}
" + "
" + for turn in turns + ) + or "
Waiting for this model thread to appear.
" + ) + return ( + f"" + ) + + +def _puppet(agent: str, active_agents: set[str], phase: str, events: list[TrialEvent], latest: TrialEvent | None) -> str: + meta = CHARACTERS[agent] + active = " active" if agent in active_agents else "" + walking = " walking" if agent in {"Advocate Auric", "Counsel Sable"} and agent in active_agents else "" + small = " small" if agent in {"Clerk Meridian", "Auditor Prism"} else "" + turns = _thread_for_character(events, agent) + bubble = "" + if agent in active_agents: + speech = _latest_turn_text(latest, agent) + if speech: + bubble = f"{_escape(speech)}" + portrait = "" + if meta.get("image"): + portrait = ( + f"" + ) + return ( + f"" + f"{portrait}" + "" + f"{bubble}" + f"{_tooltip(meta['name'], meta['role'], meta['model'], turns)}" + "" + ) + + +def _juror(name: str, active: bool, events: list[TrialEvent] | None = None, latest: TrialEvent | None = None) -> str: + face = JUROR_FACES.get(name, "#c89259") + image = JUROR_IMAGES.get(name, "") + active_cls = " active" if active else "" + turns = _thread_for_character(events or [], name) + bubble = "" + if active: + vote = next((vote for vote in (latest.votes if latest else []) if vote.juror == name), None) + speech = _latest_turn_text(latest, name) + if vote: + speech = f"{vote.vote.replace('_', ' ').title()}. {vote.reason}" + if speech: + bubble = f"{_escape(_short_text(speech, 190))}" + portrait = ( + f"{_escape(name)} bust" + if image + else "" + ) + return ( + f"" + f"{portrait}" + "" + f"{bubble}" + f"{_tooltip(name, 'HF-style juror', 'Nemotron panel', turns)}" + "" + ) + + +def _book(open_book: bool) -> str: + closed = "" if open_book else " closed" + return ( + f"
" + "Open docket book" + "Closed docket book" + "
" + ) + + +def _caption(event: TrialEvent | None, phase: str) -> tuple[str, str, str]: + if event is None: + return ( + "PRETRIAL", + "The Courtroom Is Ready", + "The docket is open, the room is dimmed, and the clerk is waiting for the first call.", + ) + body = event.body.splitlines()[0] if event.body.splitlines() else event.body + return (f"{PHASE_GLYPHS[phase]} / {phase.upper()}", event.title, body) + + +def _evidence_props(events: list[TrialEvent]) -> str: + evidence = next((event.evidence for event in reversed(events) if event.evidence), []) + if not evidence: + return "" + tilts = ["-5deg", "3deg", "-2deg", "5deg"] + sheets = [] + for idx, item in enumerate(evidence[:4]): + sheets.append( + f"" + ) + return f"
{''.join(sheets)}
" + + +def _foreground_props() -> str: + fence = "/gradio_api/file=assets/foreground/foregroundFence.png" + judge_table = "/gradio_api/file=assets/foreground/JudgeTable.png" + return ( + "" + ) + + +def _courtroom_juror_names(votes: list) -> list[str]: + names = list(JUROR_FACES) + names.extend(vote.juror for vote in votes if vote.juror not in names) + return names[:6] + + +def _latest_votes(events: list[TrialEvent]) -> list: + by_juror = {} + for event in events: + for vote in event.votes: + by_juror[vote.juror] = vote + ordered = [by_juror[name] for name in JUROR_FACES if name in by_juror] + ordered.extend(vote for juror, vote in by_juror.items() if juror not in JUROR_FACES) + return ordered + + +def render_court(events: list[TrialEvent], started: bool = False) -> str: + latest = events[-1] if events else None + phase = latest.phase if latest else "pretrial" + title, subtitle = _latest_packet_title(events) + active_agents = _active_agents_for(latest) + active_speaker = _active_speaker_for(latest) + speaker_cls = _speaker_class_for(active_speaker) + caption_phase, caption_title, caption_body = _caption(latest, phase) + latest_votes = _latest_votes(events) + juror_names = _courtroom_juror_names(latest_votes) + started_cls = " trial-started" if started or events else "" + book_open = not started and not events + puppets = "".join( + _puppet(agent, active_agents, phase, events, latest) + for agent in [JUDGE_NAME, "Clerk Meridian", "Advocate Auric", "Counsel Sable", "Auditor Prism"] + ) + left_jurors = "".join(_juror(name, name == active_speaker, events, latest) for name in juror_names[:3]) + right_jurors = "".join(_juror(name, name == active_speaker, events, latest) for name in juror_names[3:6]) + evidence_props = _evidence_props(events) + thread_modals = "".join( + _thread_modal(meta["name"], meta["role"], meta["model"], _thread_for_character(events, agent)) + for agent, meta in CHARACTERS.items() + ) + "".join( + _thread_modal(name, "HF-style juror", "Nemotron panel", _thread_for_character(events, name)) + for name in juror_names + ) + return ( + f"
" + "
" + "" + "" + "
" + "
Judge-GPT Virtual Courtroom
" + f"

{_escape(title)}

" + f"

{_escape(subtitle)}

" + f"
Step {len(events) if events else 0}: {caption_title}
Hover characters for agent and model details
" + f"{_book(book_open)}" + f"
{_escape(JUDGE_NAME)}
" + "
Claimant Table
" + "
Respondent Table
" + "
" + "
Evidence Stand
" + "
Jury Box
" + f"{left_jurors}
" + "
Jury Box
" + f"{right_jurors}
" + f"{puppets}" + f"{evidence_props}" + f"{_foreground_props()}" + "" + "
" + f"
Live Trial Feed / {_escape(caption_phase)}
" + f"
{_escape(caption_title)}
" + f"
{_escape(caption_body)}
" + "
" + f"{thread_modals}
" + ) + + +def render_evidence(events: list[TrialEvent]) -> str: + evidence = next((event.evidence for event in reversed(events) if event.evidence), []) + if not evidence: + return "
The exhibit drawer is closed until the clerk opens the docket.
" + return ( + "
" + + "".join( + "
" + f"
{_escape(item.id)} / {item.reliability:.2f}
" + f"

{_escape(item.title)}

" + f"

{_escape(item.excerpt)}

" + f"

Direction: {_escape(item.supports)}

" + f"

{_escape(item.note)}

" + for item in evidence + ) + + "
" + ) + + +def render_jurors(events: list[TrialEvent]) -> str: + votes = _latest_votes(events) + if not votes: + sleepers = "".join(_juror(name, False) for name in JUROR_FACES) + return ( + "
Jury Box
" + f"
{sleepers}
" + "

The jurors are seated and silent.

" + ) + return ( + "
" + + "".join( + "
" + f"
{_escape(vote.juror)}
" + f"

Persona: {_escape(vote.persona)}

" + f"

{_escape(vote.vote.replace('_', ' '))}

" + f"

{_escape(vote.reason)}

" + f"

Evidence: {_escape(', '.join(vote.evidence_ids))}

" + for vote in votes + ) + + "
" + ) + + +def render_mind(events: list[TrialEvent], enabled: bool) -> str: + if not enabled: + return "
Mind Layer hidden.
" + if not events: + return "
Awaiting trace.
" + compact = [ + { + "phase": event.phase, + "title": event.title, + "turns": [turn.model_dump() for turn in event.turns], + "trace": event.trace, + } + for event in events + ] + return f"
{_escape(json.dumps(compact, indent=2))}
" + + +def run_ui(case_label: str, search_query: str, hypothetical: str, speed: str, mind_layer: bool): + request = TrialRequest( + case_id=CASE_OPTIONS.get(case_label, "socrates"), + search_query=search_query or "", + hypothetical=hypothetical or "", + speed=speed or "swift", + mind_layer=bool(mind_layer), + ) + events: list[TrialEvent] = [] + yield ( + render_court(events, started=True), + render_evidence(events), + render_jurors(events), + render_mind(events, mind_layer), + "The docket closes and the bailiff calls the room to order.", + ) + try: + for event in get_events(request): + events.append(event) + status = f"Step {len(events)}: {event.title}" + yield ( + render_court(events, started=True), + render_evidence(events), + render_jurors(events), + render_mind(events, mind_layer), + status, + ) + except Exception as exc: + yield ( + render_court(events, started=True), + render_evidence(events), + render_jurors(events), + render_mind(events, mind_layer), + f"Model response required. Trial stopped: {exc}", + ) + return + yield ( + render_court(events, started=True), + render_evidence(events), + render_jurors(events), + render_mind(events, mind_layer), + "Verdict sealed.", + ) + + +def build_app() -> gr.Blocks: + with gr.Blocks(title="Judge-GPT") as demo: + with gr.Group(elem_classes=["docket-book-controls"]): + gr.HTML("
DATA TRIAL:
") + with gr.Row(): + case = gr.Dropdown( + label="Case entry", + choices=list(CASE_OPTIONS.keys()), + value="Trial of Socrates", + scale=2, + ) + start = gr.Button("Begin Trial", variant="primary", scale=1) + status = gr.Markdown("Ready.", elem_classes=["book-status"]) + courtroom = gr.HTML(render_court([]), label="Live courtroom") + search = gr.State("") + speed = gr.State("swift") + mind = gr.State(True) + with gr.Accordion("Advanced trial options", open=False, elem_classes=["trial-options"]): + with gr.Row(): + hypo = gr.Textbox(label="Hypothetical sidebar", lines=1) + with gr.Row(elem_classes=["drawer-shell"]): + with gr.Column(scale=1): + with gr.Tab("Evidence Drawer"): + evidence = gr.HTML(render_evidence([])) + with gr.Tab("Juror Panel"): + jurors = gr.HTML(render_jurors([])) + mind_html = gr.HTML(render_mind([], True), visible=False) + start.click( + run_ui, + inputs=[case, search, hypo, speed, mind], + outputs=[courtroom, evidence, jurors, mind_html, status], + js=START_JS, + ) + return demo + + +demo = build_app() + +if __name__ == "__main__": + demo.queue().launch( + show_error=True, + allowed_paths=["assets"], + css=CSS, + head=APP_HEAD, + theme=gr.themes.Soft(), + ) diff --git a/assets/ATTRIBUTION.md b/assets/ATTRIBUTION.md new file mode 100644 index 0000000000000000000000000000000000000000..db50a44b814adc1e0cfe30d67c3cfaad37e94d34 --- /dev/null +++ b/assets/ATTRIBUTION.md @@ -0,0 +1,10 @@ +# Asset Attribution + +## `courtroom-dickinson.jpg` + +- Source: https://commons.wikimedia.org/wiki/File:Dickinson_Law_Courtroom.jpg +- Description: Penn State University, Dickinson School of Law courtroom +- Author: Jeremy Hess Photography +- License: Creative Commons CC0 1.0 Universal Public Domain Dedication +- Local use: cinematic courtroom background for Sovereign Bench + diff --git a/assets/audio/ATTRIBUTION.md b/assets/audio/ATTRIBUTION.md new file mode 100644 index 0000000000000000000000000000000000000000..9b83e5fd5f73c11f5dfa505ac2452cd21158e2b6 --- /dev/null +++ b/assets/audio/ATTRIBUTION.md @@ -0,0 +1,51 @@ +# Audio Attribution + +All selected audio is stored locally in `assets/audio/` for the animated courtroom episode. + +## Courtroom score and judgement sting + +- Files: `courtroom.ogg`, `Judgement.ogg` +- Source: OpenGameArt, "Courtroom and Judgement" +- Author: Spring Spring +- License: CC0 +- URL: https://opengameart.org/content/courtroom-and-judgement + +## Courtroom chatter and crowd reaction + +- File: `crowd_shouting.ogg` +- Source: OpenGameArt, "Crowd Shouting/Speaking Ambience" +- Author: StarNinjas +- License: CC0 +- URL: https://opengameart.org/content/crowd-shoutingspeaking-ambience + +## Gavel and wood hits + +- Files: `wood_hammer_01.ogg`, `wood_hit_03.ogg` +- Source: OpenGameArt, "100 CC0 metal and wood SFX" +- Author: rubberduck +- License: CC0 +- URL: https://opengameart.org/content/100-cc0-metal-and-wood-sfx + +## Lawyer footsteps + +- File: `steps_in_wood_floor.wav` +- Source: OpenGameArt, "Steps in wood floor" +- Author: mikeask +- License: CC0 +- URL: https://opengameart.org/content/steps-in-wood-floor + +## Book and paper movement + +- Files: `paper_sound_1.mp3`, `paper_sound_4.mp3` +- Source: OpenGameArt, "Various Paper Sound Effects" +- Author: Luckius +- License: CC0 +- URL: https://opengameart.org/content/various-paper-sound-effects + +## Docket selection UI cue + +- File: `select_001.ogg` +- Source: OpenGameArt, "Interface Sounds" +- Author: Kenney +- License: CC0 +- URL: https://opengameart.org/content/interface-sounds diff --git a/assets/audio/Judgement.ogg b/assets/audio/Judgement.ogg new file mode 100644 index 0000000000000000000000000000000000000000..00724264a7afa013dd7168f3b64158b28cc94697 --- /dev/null +++ b/assets/audio/Judgement.ogg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0fb8647947708bcde1771a96cd6272653e90dd1d2823bae9581e28707fad35d +size 3164976 diff --git a/assets/audio/courtroom.ogg b/assets/audio/courtroom.ogg new file mode 100644 index 0000000000000000000000000000000000000000..19511dc7b270424403c322f8fbd2ed276f7c6a89 --- /dev/null +++ b/assets/audio/courtroom.ogg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ffd37d0a907c80324a51419544b5cdcb69781cc4642d9f79cff3e2ffee4a556 +size 2666730 diff --git a/assets/audio/crowd_shouting.ogg b/assets/audio/crowd_shouting.ogg new file mode 100644 index 0000000000000000000000000000000000000000..f71ac04ec0d453f480a25ba5a0d3f088dd5205fa --- /dev/null +++ b/assets/audio/crowd_shouting.ogg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2c23a64c127c77717cbe4abb21f138bac9d9aa6cb3cc89d62bab5d0d96dd7ca +size 335526 diff --git a/assets/audio/paper_sound_1.mp3 b/assets/audio/paper_sound_1.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..3434b227b6c962bbe02d48a442d763008d858dcd Binary files /dev/null and b/assets/audio/paper_sound_1.mp3 differ diff --git a/assets/audio/paper_sound_4.mp3 b/assets/audio/paper_sound_4.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..89a45f2cdaf8042cb0ff6c897e57e649dc807256 Binary files /dev/null and b/assets/audio/paper_sound_4.mp3 differ diff --git a/assets/audio/select_001.ogg b/assets/audio/select_001.ogg new file mode 100644 index 0000000000000000000000000000000000000000..0f81a639ca63328d5c065b27d4000cbabc22065d Binary files /dev/null and b/assets/audio/select_001.ogg differ diff --git a/assets/audio/steps_in_wood_floor.wav b/assets/audio/steps_in_wood_floor.wav new file mode 100644 index 0000000000000000000000000000000000000000..8e7adb096bd995a6f9dea4b559639932dc352fc2 --- /dev/null +++ b/assets/audio/steps_in_wood_floor.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aae50329e6b702c23a2e6e21f0eb673a0bca5aa9386fb9eb4ea6e9acdcb05536 +size 613852 diff --git a/assets/audio/wood_hammer_01.ogg b/assets/audio/wood_hammer_01.ogg new file mode 100644 index 0000000000000000000000000000000000000000..a2cdd58a420e107f706d4328e8e02466100b3b3a Binary files /dev/null and b/assets/audio/wood_hammer_01.ogg differ diff --git a/assets/audio/wood_hit_03.ogg b/assets/audio/wood_hit_03.ogg new file mode 100644 index 0000000000000000000000000000000000000000..f60bfcde4cc0cbef6198a82df3e4f2f32a5db2bb Binary files /dev/null and b/assets/audio/wood_hit_03.ogg differ diff --git a/assets/background/CourtRoom.png b/assets/background/CourtRoom.png new file mode 100644 index 0000000000000000000000000000000000000000..42c3665ae34de84aca71b39ff5066c72f77974d4 --- /dev/null +++ b/assets/background/CourtRoom.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ba5b3dc8f5a36d5a3c10b38e86ec7e28938acf577add34246fb47af1b0e31d6 +size 2727354 diff --git a/assets/book/README.md b/assets/book/README.md new file mode 100644 index 0000000000000000000000000000000000000000..0a4ba3f858de00a30161f78f4f778c46b591bad1 --- /dev/null +++ b/assets/book/README.md @@ -0,0 +1,14 @@ +# Docket Book Assets + +These project-bound UI prop assets were generated with the built-in Codex image generation tool, then processed locally from a chroma-key background to transparent PNGs. + +- `docket-book-open.png`: open docket book used before the trial starts. +- `docket-book-closed.png`: closed docket book used after the trial begins. +- `docket-book-open-keyed.png`: preserved chroma-key source. +- `docket-book-closed-keyed.png`: preserved chroma-key source. + +Generation prompt summary: + +- Antique legal docket book, warm parchment or dark leather, gold corner protectors, polished painterly game UI prop, centered with generous padding. +- No text, no logos, no watermark, no hands, no pen. +- Generated on a flat `#00ff00` background for local alpha extraction. diff --git a/assets/book/docket-book-closed-keyed.png b/assets/book/docket-book-closed-keyed.png new file mode 100644 index 0000000000000000000000000000000000000000..6aa1581240b59f86c7f104ef0b8cf14dca65f900 --- /dev/null +++ b/assets/book/docket-book-closed-keyed.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe37e1711a12955aa6a1fce806160a33f225d6eaeeca7c03621ad33ecd3fa0a8 +size 2273553 diff --git a/assets/book/docket-book-closed.png b/assets/book/docket-book-closed.png new file mode 100644 index 0000000000000000000000000000000000000000..b83a69532db077b4a4172a63309319c6ebfeeea5 --- /dev/null +++ b/assets/book/docket-book-closed.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8262488326fae3161bf2981cb808d6c894c2ef51852d90cce150475fa3130f7 +size 1677113 diff --git a/assets/book/docket-book-open-keyed.png b/assets/book/docket-book-open-keyed.png new file mode 100644 index 0000000000000000000000000000000000000000..1d8affe8150a481abc23c368874547344b199e03 --- /dev/null +++ b/assets/book/docket-book-open-keyed.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d5718ab59300553bf95bddc24e381f386b1ee23aea48d1b8d9159291e387aff +size 2434037 diff --git a/assets/book/docket-book-open.png b/assets/book/docket-book-open.png new file mode 100644 index 0000000000000000000000000000000000000000..c9bfe8a94dad79080702de4447a9cdd795cfa8db --- /dev/null +++ b/assets/book/docket-book-open.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d90aa3e29db9b58dd9928de7629481bea0a6ab994477f9de72971e1de496547 +size 2107390 diff --git a/assets/characters/cleopatra-vii.png b/assets/characters/cleopatra-vii.png new file mode 100644 index 0000000000000000000000000000000000000000..7a72eb8a80a3702871ff24227936730d4f6e2fb2 --- /dev/null +++ b/assets/characters/cleopatra-vii.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1df2fbeb8bcfed5c9c58f8d2514ba914521f1f23d49d3a356a5b9d2013a778b2 +size 1436707 diff --git a/assets/characters/confucius.png b/assets/characters/confucius.png new file mode 100644 index 0000000000000000000000000000000000000000..0e744442c47d710838cac8e8a6591abfe43f841d --- /dev/null +++ b/assets/characters/confucius.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e236ffdc96aca7a2049a8b500aff163496aba73effd5c9b97e8f56ead9e9ebdf +size 1435092 diff --git a/assets/characters/jensen-huang.png b/assets/characters/jensen-huang.png new file mode 100644 index 0000000000000000000000000000000000000000..c8e468f8d4712882cffe963aea418156bb80e811 --- /dev/null +++ b/assets/characters/jensen-huang.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:295ec10b5eb6ea569283c8ba43259b83490cf0e80dd7011495e797a7dfa19ca3 +size 1350094 diff --git a/assets/characters/john-stuart-mill.png b/assets/characters/john-stuart-mill.png new file mode 100644 index 0000000000000000000000000000000000000000..0b1384fe471afffdd28c2647de2a6b83a90b3d87 --- /dev/null +++ b/assets/characters/john-stuart-mill.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a56b20ed6931d387e3aa7228fd52daefac2010540d1c55dcabaca45771761f50 +size 1176065 diff --git a/assets/characters/karl-marx.png b/assets/characters/karl-marx.png new file mode 100644 index 0000000000000000000000000000000000000000..a2109b65be8e637227ff309edc8ee5ad2c93df1e --- /dev/null +++ b/assets/characters/karl-marx.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce73bb67347d37b86a730ca7587b0c858803dbacbcdeec7d428d2680db130f28 +size 1311219 diff --git a/assets/characters/marcus-aurelius.png b/assets/characters/marcus-aurelius.png new file mode 100644 index 0000000000000000000000000000000000000000..c4faba8b954bce406fd49eb9ea079bd9af797d31 --- /dev/null +++ b/assets/characters/marcus-aurelius.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d87605fff884d942918b9a16c2c79456d132bb115026a080312fae90659a812e +size 1266848 diff --git a/assets/characters/niccolo-machiavelli.png b/assets/characters/niccolo-machiavelli.png new file mode 100644 index 0000000000000000000000000000000000000000..01cabef1d5c07caed76b7e50f067c9f2bee8b635 --- /dev/null +++ b/assets/characters/niccolo-machiavelli.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:734bb0679951cf04028384b3e1f560bfb6965f8284bd1b4ace5829642a870706 +size 1290378 diff --git a/assets/characters/sources/cleopatra-vii-chroma.png b/assets/characters/sources/cleopatra-vii-chroma.png new file mode 100644 index 0000000000000000000000000000000000000000..cfaedd75bfd7180fe5dae0f82a1eaa4095612003 --- /dev/null +++ b/assets/characters/sources/cleopatra-vii-chroma.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b98d96379e71b0e50b102fe8122523f588e91ef957867a9030b0e213b7dd4ca +size 1867773 diff --git a/assets/characters/sources/confucius-chroma.png b/assets/characters/sources/confucius-chroma.png new file mode 100644 index 0000000000000000000000000000000000000000..9a154c985626fd015fafcfc0699aea6755240aa8 --- /dev/null +++ b/assets/characters/sources/confucius-chroma.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cbdb9b4e6b6f24f9ed18accf7d959e5723aa2cbd4f58448322dcfd27fdfada6 +size 1874694 diff --git a/assets/characters/sources/jensen-huang-chroma.png b/assets/characters/sources/jensen-huang-chroma.png new file mode 100644 index 0000000000000000000000000000000000000000..e3531e3a34f91ef8fb5b91b2520233511cb2e1fb --- /dev/null +++ b/assets/characters/sources/jensen-huang-chroma.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:657fe8e6e72ee067e2ae52f340af407b963b823a2a21d8aa9661482ff1eb9664 +size 1805006 diff --git a/assets/characters/sources/john-stuart-mill-chroma.png b/assets/characters/sources/john-stuart-mill-chroma.png new file mode 100644 index 0000000000000000000000000000000000000000..a6cae6670d6321ee444f30c59b2b3414ac3ef71b --- /dev/null +++ b/assets/characters/sources/john-stuart-mill-chroma.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb38a0cdb066776a528400a3bdd4b90302e73680c5708a4a5de2ba0cde14fb5d +size 1681782 diff --git a/assets/characters/sources/karl-marx-chroma.png b/assets/characters/sources/karl-marx-chroma.png new file mode 100644 index 0000000000000000000000000000000000000000..271a5892d219277e2191345dab79b50c5dc2ad46 --- /dev/null +++ b/assets/characters/sources/karl-marx-chroma.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f569950d6edbe2175ca8db4caebf90fdcb399b06c96616d18f003dfb8cbcbbb8 +size 1757540 diff --git a/assets/characters/sources/marcus-aurelius-chroma.png b/assets/characters/sources/marcus-aurelius-chroma.png new file mode 100644 index 0000000000000000000000000000000000000000..6cd8ece98b566c1049cb1fbee450c90953e5fe93 --- /dev/null +++ b/assets/characters/sources/marcus-aurelius-chroma.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cbbc1bfbf769394bad536e4d57f5c0a24b06f6a2aebc30839bb04b0ed446375 +size 1748222 diff --git a/assets/characters/sources/niccolo-machiavelli-chroma.png b/assets/characters/sources/niccolo-machiavelli-chroma.png new file mode 100644 index 0000000000000000000000000000000000000000..1aa13df577bbf9af35a8dba3dd4ff80af088eb4c --- /dev/null +++ b/assets/characters/sources/niccolo-machiavelli-chroma.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76ef5b62217064ab9eab67d6729e61c94ac870ee447863f6b6f30612de6e63e8 +size 1711518 diff --git a/assets/courtroom-dickinson.jpg b/assets/courtroom-dickinson.jpg new file mode 100644 index 0000000000000000000000000000000000000000..e0ff7c1deb236886cc74bce30b1ecb9b85fe6146 --- /dev/null +++ b/assets/courtroom-dickinson.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9783920c38641d8a7a3c900f258cb22e844c0694a0578b186d411daed4cb109e +size 5667725 diff --git a/assets/foreground/JudgeTable.png b/assets/foreground/JudgeTable.png new file mode 100644 index 0000000000000000000000000000000000000000..d9a182c0bc80a7b975febb551e444b7f73815c35 --- /dev/null +++ b/assets/foreground/JudgeTable.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:699184ad0574c3da0846536964c6ed703f76e78409da1539c2d6bc4a048cee6b +size 1322887 diff --git a/assets/foreground/foregroundFence.png b/assets/foreground/foregroundFence.png new file mode 100644 index 0000000000000000000000000000000000000000..5d21340e3a6e2e333f3a2c5f984c640366744cd8 --- /dev/null +++ b/assets/foreground/foregroundFence.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2c7553f88852c89f504973da6094ba09e1e5d322daacbdea283dafe1bffefa1 +size 1405986 diff --git a/data/README.md b/data/README.md new file mode 100644 index 0000000000000000000000000000000000000000..0170a68b4ed00ea3d05e427b2591ea8c9a8b2932 --- /dev/null +++ b/data/README.md @@ -0,0 +1,5 @@ +# Sovereign Bench Agent Trace Sample + +This sample contains compact phase-level trace rows from the cached Barnaby Buttons trial. Runtime traces exported by the Gradio app include the full structured `TrialEvent` objects. + +The trace is synthetic and intended for hackathon demonstration, reproducibility, and UI testing. diff --git a/data/agent_trace_sample.json b/data/agent_trace_sample.json new file mode 100644 index 0000000000000000000000000000000000000000..469329e044cea829332b4ced8c6647a4d7e5d2d9 --- /dev/null +++ b/data/agent_trace_sample.json @@ -0,0 +1,23 @@ +[ + { + "phase": "intake", + "case_id": "barnaby", + "agent": "Clerk Meridian", + "model": "openbmb/AgentCPM-Explore", + "summary": "Opened The People v. Barnaby Buttons and recorded source provenance for cached demo reliability." + }, + { + "phase": "evidence", + "case_id": "barnaby", + "agent": "Auditor Prism", + "model": "nvidia/Nemotron-Orchestrator-8B", + "summary": "Scored ledger ink, crumb trail, calendar motive, and biscuit alibi as directional evidence with uncertainty." + }, + { + "phase": "verdict", + "case_id": "barnaby", + "agent": "Marcus Aurelius", + "model": "openai/gpt-oss-20b", + "summary": "Issued a narrow claimant finding with cited evidence IDs and an explicit uncertainty warning." + } +] diff --git a/modal_app.py b/modal_app.py new file mode 100644 index 0000000000000000000000000000000000000000..2cec8b67f31db5c3445ad5872463abda531bfff6 --- /dev/null +++ b/modal_app.py @@ -0,0 +1,193 @@ +import os +import time + +import modal + +from sovereign_bench.engine import stream_trial_jsonl +from sovereign_bench.llm import ( + ModelCall, + ModelResult, + build_role_messages, + messages_hash, +) +from sovereign_bench.models import TrialRequest + +app = modal.App("sovereign-bench") +GPU_NAME = "H100" +GPU_TIMEOUT_SECONDS = 20 * 60 +HF_CACHE_DIR = "/root/.cache/huggingface" + +image = ( + modal.Image.debian_slim(python_version="3.12") + .pip_install("fastapi", "huggingface_hub", "httpx", "pydantic") + .add_local_dir("sovereign_bench", remote_path="/root/sovereign_bench") +) + +model_cache = modal.Volume.from_name("sovereign-bench-model-cache", create_if_missing=True) + +vllm_image = ( + modal.Image.from_registry("nvidia/cuda:12.8.1-devel-ubuntu22.04", add_python="3.12") + .entrypoint([]) + .uv_pip_install( + "vllm==0.18.1", + "huggingface_hub[hf_transfer]==0.36.0", + "transformers", + "httpx", + "pydantic", + ) + .env( + { + "HF_HUB_ENABLE_HF_TRANSFER": "1", + "HF_HOME": HF_CACHE_DIR, + "VLLM_WORKER_MULTIPROC_METHOD": "spawn", + "VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8": "1", + } + ) + .add_local_dir("sovereign_bench", remote_path="/root/sovereign_bench") +) + + +@app.cls( + image=vllm_image, + gpu=GPU_NAME, + secrets=[modal.Secret.from_name("huggingface")], + volumes={HF_CACHE_DIR: model_cache}, + timeout=GPU_TIMEOUT_SECONDS, + scaledown_window=10 * 60, + max_containers=3, +) +class VllmModel: + model_id: str = modal.parameter() + + @modal.enter() + def load(self) -> None: + from vllm import LLM, SamplingParams + + self.SamplingParams = SamplingParams + self.llm = LLM( + model=self.model_id, + trust_remote_code=True, + max_model_len=4096, + gpu_memory_utilization=0.9, + ) + + @modal.method() + def generate(self, payload: dict) -> dict: + from sovereign_bench.llm import ModelCallError, clean_model_text + + started = time.perf_counter() + messages = payload["messages"] + max_tokens = int(payload.get("max_tokens") or 120) + temperature = float(payload.get("temperature") or 0.45) + sampling_params = self.SamplingParams( + max_tokens=max_tokens, + temperature=temperature, + top_p=0.9, + ) + retry_messages = messages + [ + { + "role": "user", + "content": ( + "Your previous response did not include visible courtroom dialogue. " + "Return only the final spoken dialogue now. Do not include , analysis, reasoning, markdown, or notes. /no_think" + ), + } + ] + last_error: Exception | None = None + text = "" + for attempt_messages in (messages, retry_messages): + outputs = self.llm.chat( + [attempt_messages], + sampling_params=sampling_params, + use_tqdm=False, + chat_template_kwargs={"enable_thinking": False}, + ) + raw_text = outputs[0].outputs[0].text.strip() + try: + text = clean_model_text(raw_text) + break + except ModelCallError as exc: + last_error = exc + if not text and last_error: + raise last_error + return { + "text": text, + "latency_ms": int((time.perf_counter() - started) * 1000), + } + + +def modal_gpu_enabled() -> bool: + return os.getenv("SOVEREIGN_DISABLE_MODAL_GPU", "").lower() not in {"1", "true", "yes"} + + +def modal_gpu_runner(**kwargs) -> ModelResult: + messages = build_role_messages( + agent=kwargs["agent"], + role=kwargs["role"], + case_summary=kwargs["case_summary"], + task=kwargs["task"], + evidence_summary=kwargs["evidence_summary"], + ) + requested_model = kwargs["model"] + prompt_hash = messages_hash(messages) + + if modal_gpu_enabled(): + output = VllmModel(model_id=requested_model).generate.remote( + { + "messages": messages, + "max_tokens": kwargs.get("max_tokens", 120), + "temperature": 0.45, + } + ) + return ModelResult( + text=output["text"], + input_text="\n\n".join(f"{item.get('role', 'user').upper()}:\n{item.get('content', '')}" for item in messages) + + "\n\nASSISTANT:\n", + call=ModelCall( + model=requested_model, + provider="modal-gpu-vllm", + ok=True, + latency_ms=output["latency_ms"], + prompt_hash=prompt_hash, + requested_model=requested_model, + runtime="modal-gpu-vllm", + gpu=GPU_NAME, + ), + ) + + raise RuntimeError("Modal GPU is disabled; no provider fallback is allowed.") + + +@app.function(image=image, secrets=[modal.Secret.from_name("huggingface")]) +def check_huggingface_connection() -> str: + token = os.getenv("HF_TOKEN") + if not token: + return "HF_TOKEN is not available inside Modal." + + from huggingface_hub import HfApi + + user = HfApi(token=token).whoami()["name"] + return f"Connected to Hugging Face as {user}." + + +@app.function( + image=image, + secrets=[modal.Secret.from_name("huggingface")], + min_containers=1, + timeout=GPU_TIMEOUT_SECONDS, +) +@modal.fastapi_endpoint(method="POST", label="trial-stream") +def trial_stream(payload: dict): + from fastapi.responses import StreamingResponse + + request = TrialRequest.model_validate(payload) + delay = {"swift": 0.02, "measured": 0.12, "ceremonial": 0.25}[request.speed] + return StreamingResponse( + stream_trial_jsonl(request, delay=delay, model_runner=modal_gpu_runner), + media_type="application/x-ndjson", + ) + + +@app.local_entrypoint() +def main(): + print(check_huggingface_connection.remote()) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..2fd4f3f0297a524ca695dc482663f06b9bf6061b --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +gradio +huggingface_hub +httpx +modal +pydantic +pytest +python-dotenv diff --git a/sovereign_bench/__init__.py b/sovereign_bench/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5f1f4a450c7e9d33837fa879597fb9c697cf563a --- /dev/null +++ b/sovereign_bench/__init__.py @@ -0,0 +1,6 @@ +"""Sovereign Bench trial engine package.""" + +from .engine import run_trial, stream_trial +from .models import TrialRequest + +__all__ = ["TrialRequest", "run_trial", "stream_trial"] diff --git a/sovereign_bench/cases.py b/sovereign_bench/cases.py new file mode 100644 index 0000000000000000000000000000000000000000..ccf0aad9ca73c3ff7e2ce2fa13bed7915e24b2eb --- /dev/null +++ b/sovereign_bench/cases.py @@ -0,0 +1,141 @@ +from __future__ import annotations + +from .models import CasePacket, EvidenceItem + + +SOCRATES = CasePacket( + id="socrates", + title="The Polis v. Socrates", + subtitle="A miniature retrial of impiety, civic anxiety, and troublesome questions.", + claimant="The Athenian Polis", + respondent="Socrates", + charge="Corrupting the youth and refusing the sanctioned gods of the city.", + setting="Athens, 399 BCE, reassembled inside a pocket tribunal.", + claimant_claim=( + "The city argues that Socrates trained young citizens to mock public authority " + "and placed private daimonion guidance above civic religion." + ), + respondent_claim=( + "Socrates answers that cross-examination was a public service, not corruption, " + "and that unpopular inquiry should not be confused with civic sabotage." + ), + source_note=( + "Cached public-domain style packet derived from Plato's Apology and Crito, " + "Xenophon's Apology, and common historical summaries. It is not a live scholarly edition." + ), + evidence=[ + EvidenceItem( + id="SOC-E1", + title="The Oracle Burden", + source="Plato, Apology tradition", + excerpt=( + "Socrates describes testing reputedly wise citizens after a Delphic oracle " + "report, creating public embarrassment but framing the act as duty." + ), + supports="mixed", + reliability=0.78, + note="Shows both civic irritation and a claimed religious motivation.", + ), + EvidenceItem( + id="SOC-E2", + title="Youthful Imitators", + source="Plato, Apology tradition", + excerpt=( + "Young men with leisure reportedly followed Socrates and copied his questioning, " + "which angered the questioned citizens." + ), + supports="claimant", + reliability=0.68, + note="Supports social effect, but does not prove intentional corruption.", + ), + EvidenceItem( + id="SOC-E3", + title="No Fee, No School", + source="Ancient defense tradition", + excerpt=( + "Socrates distinguishes himself from paid teachers and denies promising technical " + "instruction or private doctrine." + ), + supports="respondent", + reliability=0.72, + note="Weakens the claim that he operated a formal corrupting academy.", + ), + EvidenceItem( + id="SOC-E4", + title="The Daimonion", + source="Ancient biographical tradition", + excerpt=( + "Socrates reports a private divine sign that restrains him from certain actions, " + "which the court may read as piety or heterodoxy." + ), + supports="mixed", + reliability=0.64, + note="Central ambiguity: private religious experience versus civic irreverence.", + ), + ], +) + + +BARNABY = CasePacket( + id="barnaby", + title="The People v. Barnaby Buttons", + subtitle="The last office mooncake, a tampered snack ledger, and crumbs shaped like intent.", + claimant="The Breakroom Commonwealth", + respondent="Barnaby Buttons", + charge="Theft of the final mooncake and alteration of the communal snack ledger.", + setting="A fluorescent office kitchen at 4:47 p.m., under the humming republic of the fridge.", + claimant_claim=( + "Barnaby removed the final mooncake, changed the snack ledger from '1 mooncake' " + "to '0 mooncakes', and left the team dessertless." + ), + respondent_claim=( + "Barnaby says the mooncake was already abandoned, the ledger pen skipped naturally, " + "and the crumbs came from an unrelated biscuit." + ), + source_note="Cached original whimsical packet made for reliable hackathon demos.", + evidence=[ + EvidenceItem( + id="BTN-E1", + title="Ledger Ink Discontinuity", + source="Clerk's magnifying loupe", + excerpt="The zero in '0 mooncakes' uses a darker ink than the previous entries.", + supports="claimant", + reliability=0.82, + note="Strong tampering indicator, though pen swaps happen in offices.", + ), + EvidenceItem( + id="BTN-E2", + title="Crumb Constellation", + source="Breakroom floor survey", + excerpt="Sesame crumbs form a trail from the pantry shelf to Barnaby's keyboard.", + supports="claimant", + reliability=0.71, + note="Suggestive route evidence, vulnerable to shared-desk contamination.", + ), + EvidenceItem( + id="BTN-E3", + title="Calendar Entry", + source="Respondent's calendar", + excerpt="Barnaby had a 4:45 p.m. reminder titled 'Do not forget tea with lunar pastry'.", + supports="mixed", + reliability=0.76, + note="Shows desire and opportunity, but not necessarily theft.", + ), + EvidenceItem( + id="BTN-E4", + title="Biscuit Alibi", + source="Vending machine receipt", + excerpt="A receipt shows Barnaby bought a sesame biscuit at 4:39 p.m.", + supports="respondent", + reliability=0.67, + note="Explains crumbs but not ledger alteration.", + ), + ], +) + + +CASES = {case.id: case for case in (SOCRATES, BARNABY)} + + +def get_case(case_id: str) -> CasePacket: + return CASES.get(case_id, SOCRATES) diff --git a/sovereign_bench/engine.py b/sovereign_bench/engine.py new file mode 100644 index 0000000000000000000000000000000000000000..7e5050de2a65594cfb9965b66a76f9b012a612e9 --- /dev/null +++ b/sovereign_bench/engine.py @@ -0,0 +1,572 @@ +from __future__ import annotations + +import json +import re +import time +from collections import Counter +from collections.abc import Callable, Iterable + +from pydantic import ValidationError + +from .cases import get_case +from .llm import ModelCall, ModelResult, call_small_model +from .models import AgentTurn, CasePacket, JurorVote, TrialEvent, TrialRequest, Verdict +from .retrieval import build_live_case + +GPT_OSS_MODEL = "openai/gpt-oss-20b" +OPENBMB_MODEL = "openbmb/AgentCPM-Explore" +NEMOTRON_MODEL = "nvidia/Nemotron-Orchestrator-8B" +OPENAI_PROVIDER = "auto" +OPENBMB_PROVIDER = "featherless-ai" +NEMOTRON_PROVIDER = "featherless-ai" + +MODEL_BUDGET = [ + ("Presiding Advocate", GPT_OSS_MODEL, 20.0), + ("Clerk of Style", OPENBMB_MODEL, 4.0), + ("Juror/Auditor Ring", NEMOTRON_MODEL, 8.0), +] +TOTAL_PARAMS_B = sum(item[2] for item in MODEL_BUDGET) + +JUDGE_NAME = "Marcus Aurelius" +JUDGE_PERSONA = "Stoic duty, restraint, public reason, and disciplined judgment" + +JUROR_PERSONAS = { + "Karl Marx": "class power, material conditions, exploitation, institutional incentives", + "John Stuart Mill": "liberty, harm principle, utility, individual rights", + "Confucius": "social harmony, role duty, ritual order, moral cultivation", + "Cleopatra VII": "sovereign pragmatism, diplomacy, survival, legitimacy under pressure", + "Niccolo Machiavelli": "political realism, stability, power, consequences over ideals", + "Jensen Huang": "technological optimism, operator mindset, systems thinking, innovation tradeoffs", +} +JUROR_NAMES = list(JUROR_PERSONAS) + + +class RequiredModelError(RuntimeError): + """Raised when a required courtroom model call cannot produce usable output.""" + + +ModelRunner = Callable[..., ModelResult] + + +def _turn(agent: str, role: str, result: ModelResult, model: str, confidence: float) -> AgentTurn: + return AgentTurn( + agent=agent, + role=role, + content=result.text, + model=model, + confidence=confidence, + input=getattr(result, "input_text", ""), + ) + + +def _case_summary(packet: CasePacket) -> str: + return ( + f"{packet.title}. Charge: {packet.charge}\n" + f"Claimant: {packet.claimant_claim}\n" + f"Respondent: {packet.respondent_claim}" + ) + + +def _evidence_summary(packet: CasePacket) -> str: + return "\n".join( + f"{item.id}: {item.title}; direction={item.supports}; reliability={item.reliability:.2f}; note={item.note}" + for item in packet.evidence + ) + + +def _call_trace(calls: list[ModelCall]) -> list[dict]: + return [call.__dict__ for call in calls] + + +def resolve_case(request: TrialRequest) -> tuple[CasePacket, dict]: + if request.case_id == "live": + packet = build_live_case(request.search_query, request.hypothetical) + if packet: + return packet, {"mode": "live"} + raise RuntimeError("Live retrieval produced too little usable evidence; no fallback case will be substituted.") + return get_case(request.case_id), {"mode": "cached"} + + +def _generate_role(model_runner: ModelRunner | None = None, **kwargs) -> ModelResult: + if model_runner is not None: + return model_runner(**kwargs) + return call_small_model(**kwargs) + + +def _required_role(model_runner: ModelRunner | None, model_calls: list[ModelCall], **kwargs) -> ModelResult: + try: + result = _generate_role(model_runner, **kwargs) + except Exception as exc: + raise RequiredModelError(f"{kwargs.get('agent', 'Model')} unavailable: {exc}") from exc + model_calls.append(result.call) + if not result.call.ok: + error = result.call.error or "model call did not complete" + raise RequiredModelError(f"{kwargs.get('agent', 'Model')} unavailable: {error}") + if not result.text.strip(): + raise RequiredModelError(f"{kwargs.get('agent', 'Model')} returned an empty response.") + return result + + +def _trace(packet: CasePacket, source_trace: dict, model_calls: list[ModelCall]) -> dict: + return { + "case_id": packet.id, + "model_budget_b": TOTAL_PARAMS_B, + "models": [{"role": role, "model": model, "params_b": params} for role, model, params in MODEL_BUDGET], + "model_calls": _call_trace(model_calls), + "live_model_call_count": sum(1 for call in model_calls if call.ok), + "attempted_model_call_count": len(model_calls), + **source_trace, + } + + +def _emit( + packet: CasePacket, + source_trace: dict, + model_calls: list[ModelCall], + event: TrialEvent, + delay: float, +) -> TrialEvent: + event.trace = _trace(packet, source_trace, model_calls) + if delay > 0: + time.sleep(delay) + return event + + +def _extract_json(text: str) -> object: + stripped = text.strip() + if stripped.startswith("```"): + stripped = re.sub(r"^```(?:json)?\s*", "", stripped, flags=re.I) + stripped = re.sub(r"\s*```$", "", stripped) + try: + return json.loads(stripped) + except json.JSONDecodeError: + match = re.search(r"(\{.*\}|\[.*\])", stripped, flags=re.S) + if not match: + raise + return json.loads(match.group(1)) + + +def _parse_jury_votes(result: ModelResult, packet: CasePacket) -> list[JurorVote]: + try: + data = _extract_json(result.text) + except json.JSONDecodeError as exc: + raise RequiredModelError(f"Nemotron Jury returned invalid JSON: {exc.msg}") from exc + + raw_votes = data.get("votes") if isinstance(data, dict) else data + if not isinstance(raw_votes, list): + raise RequiredModelError("Nemotron Jury output must contain a votes list.") + if len(raw_votes) != len(JUROR_NAMES): + raise RequiredModelError("Nemotron Jury must return exactly six juror votes.") + + known_evidence = {item.id for item in packet.evidence} + votes: list[JurorVote] = [] + try: + for item in raw_votes: + vote = JurorVote.model_validate(item) + votes.append(vote) + except ValidationError as exc: + raise RequiredModelError(f"Nemotron Jury vote schema is invalid: {exc.errors()[0]['msg']}") from exc + + if [vote.juror for vote in votes] != JUROR_NAMES: + raise RequiredModelError("Nemotron Jury must return votes in the fixed juror order.") + for vote in votes: + expected_persona = JUROR_PERSONAS[vote.juror] + if vote.persona.strip().lower() != expected_persona: + raise RequiredModelError(f"{vote.juror} persona must be '{expected_persona}'.") + if not vote.reason.strip(): + raise RequiredModelError(f"{vote.juror} must include a rationale.") + if not vote.evidence_ids or any(evidence_id not in known_evidence for evidence_id in vote.evidence_ids): + raise RequiredModelError(f"{vote.juror} must cite known evidence IDs.") + return votes + + +def _majority_finding(votes: list[JurorVote]) -> str: + counts = Counter(vote.vote for vote in votes) + top = counts.most_common() + if not top: + return "uncertain" + if len(top) > 1 and top[0][1] == top[1][1]: + return "mixed" + if top[0][0] == "uncertain": + return "uncertain" + return top[0][0] + + +def _verdict_from_votes(votes: list[JurorVote]) -> Verdict: + finding = _majority_finding(votes) + evidence_ids = [] + for vote in votes: + for evidence_id in vote.evidence_ids: + if evidence_id not in evidence_ids: + evidence_ids.append(evidence_id) + cited = evidence_ids[:4] + counts = Counter(vote.vote for vote in votes) + vote_line = ", ".join(f"{name}: {counts.get(name, 0)}" for name in ("liable", "not_liable", "uncertain")) + decree_by_finding = { + "liable": "The jury majority finds liability on the miniature record.", + "not_liable": "The jury majority does not find liability on the miniature record.", + "mixed": "The jury divides too closely for a clean finding.", + "uncertain": "The jury leaves the court with unresolved uncertainty.", + } + remedy_by_finding = { + "liable": "Enter symbolic censure and proportional repair.", + "not_liable": "Dismiss without prejudice to stronger proof.", + "mixed": "Record a divided result and preserve the exhibits for later review.", + "uncertain": "Withhold sanction and identify the proof gaps before any retrial.", + } + return Verdict( + finding=finding, # type: ignore[arg-type] + decree=decree_by_finding[finding], + rationale=f"Jury vote: {vote_line}. Cited evidence IDs: {', '.join(cited)}.", + evidence_ids=cited, + uncertainty=( + "Uncertainty remains visible: this is an AI-native miniature trial. Retrieved facts, cached " + "packets, and model inferences are separated in the trace and should not be treated as legal advice." + ), + remedy=remedy_by_finding[finding], + ) + + +def _jury_task() -> str: + personas = "\n".join(f"- {name}: {persona}" for name, persona in JUROR_PERSONAS.items()) + return ( + "Return JSON only with a top-level 'votes' array. Create exactly one vote for each juror, in this order: " + f"{', '.join(JUROR_NAMES)}. Valid vote values are liable, not_liable, uncertain. Each item must contain " + "juror, persona, vote, reason, and evidence_ids. The persona value must exactly match the profile below. " + "Each reason should be one concise sentence and each evidence_ids list must cite evidence IDs from the record. " + "Vote through the named public-history worldview, not a generic juror role.\n" + f"{personas}" + ) + + +def run_trial(request: TrialRequest, model_runner: ModelRunner | None = None) -> list[TrialEvent]: + return list(stream_trial(request, delay=0.0, model_runner=model_runner)) + + +def stream_trial( + request: TrialRequest, + delay: float = 0.0, + model_runner: ModelRunner | None = None, +) -> Iterable[TrialEvent]: + packet, source_trace = resolve_case(request) + case_summary = _case_summary(packet) + evidence_summary = _evidence_summary(packet) + model_calls: list[ModelCall] = [] + hypo = request.hypothetical.strip() + hypo_line = f"\n\nUser hypothetical admitted as a blue-ribbon sidebar: {hypo}" if hypo else "" + + clerk = _required_role( + model_runner, + model_calls, + agent="Clerk Meridian", + role="clerk", + model=OPENBMB_MODEL, + case_summary=case_summary, + evidence_summary=evidence_summary, + task="Announce the case by name, identify the parties, and read the charge.", + provider=OPENBMB_PROVIDER, + max_tokens=110, + ) + yield _emit( + packet, + source_trace, + model_calls, + TrialEvent( + phase="intake", + title="The Court Convenes", + body=f"{packet.title}\n{packet.subtitle}\n\nCharge: {packet.charge}{hypo_line}", + turns=[_turn("Clerk Meridian", "clerk", clerk, OPENBMB_MODEL, 0.88)], + evidence=packet.evidence, + ), + delay, + ) + + judge_open = _required_role( + model_runner, + model_calls, + agent=JUDGE_NAME, + role="judge", + model=GPT_OSS_MODEL, + case_summary=case_summary, + evidence_summary=evidence_summary, + task=( + f"As {JUDGE_NAME}, a Stoic courtroom judge guided by {JUDGE_PERSONA}, explain the proceeding " + "and the burden of proof in one or two disciplined sentences." + ), + provider=OPENAI_PROVIDER, + max_tokens=110, + ) + yield _emit( + packet, + source_trace, + model_calls, + TrialEvent( + phase="intake", + title="The Burden Is Set", + body="The bench defines how the miniature court will weigh the record.", + turns=[_turn(JUDGE_NAME, "judge", judge_open, GPT_OSS_MODEL, 0.88)], + evidence=packet.evidence, + ), + delay, + ) + + claimant_opening = _required_role( + model_runner, + model_calls, + agent="Advocate Auric", + role="claimant advocate", + model=GPT_OSS_MODEL, + case_summary=case_summary, + evidence_summary=evidence_summary, + task="Make the claimant's opening statement alone. Cite the strongest claimant-side exhibit.", + provider=OPENAI_PROVIDER, + max_tokens=130, + ) + yield _emit( + packet, + source_trace, + model_calls, + TrialEvent( + phase="claims", + title="Claimant Opening", + body=packet.claimant_claim, + turns=[_turn("Advocate Auric", "claimant advocate", claimant_opening, GPT_OSS_MODEL, 0.88)], + evidence=packet.evidence, + ), + delay, + ) + + respondent_opening = _required_role( + model_runner, + model_calls, + agent="Counsel Sable", + role="respondent advocate", + model=GPT_OSS_MODEL, + case_summary=case_summary, + evidence_summary=evidence_summary, + task="Make the respondent's opening statement alone. Emphasize uncertainty and cite a helpful exhibit.", + provider=OPENAI_PROVIDER, + max_tokens=130, + ) + yield _emit( + packet, + source_trace, + model_calls, + TrialEvent( + phase="opening", + title="Respondent Opening", + body=packet.respondent_claim, + turns=[_turn("Counsel Sable", "respondent advocate", respondent_opening, GPT_OSS_MODEL, 0.88)], + evidence=packet.evidence, + ), + delay, + ) + + auditor = _required_role( + model_runner, + model_calls, + agent="Auditor Prism", + role="evidence auditor", + model=NEMOTRON_MODEL, + case_summary=case_summary, + evidence_summary=evidence_summary, + task="Present the evidence record. Identify the strongest exhibit and the weakest inference.", + provider=NEMOTRON_PROVIDER, + max_tokens=150, + ) + yield _emit( + packet, + source_trace, + model_calls, + TrialEvent( + phase="evidence", + title="The Record Is Audited", + body="\n".join(f"{item.id}: {item.title} | reliability {item.reliability:.2f} | {item.note}" for item in packet.evidence), + turns=[_turn("Auditor Prism", "evidence auditor", auditor, NEMOTRON_MODEL, 0.86)], + evidence=packet.evidence, + ), + delay, + ) + + judge_question = _required_role( + model_runner, + model_calls, + agent=JUDGE_NAME, + role="judge", + model=GPT_OSS_MODEL, + case_summary=case_summary, + evidence_summary=evidence_summary, + task=( + f"As {JUDGE_NAME}, ask one sharp hinge question that would change the outcome if answered. " + "Use Stoic restraint and public reason." + ), + provider=OPENAI_PROVIDER, + max_tokens=100, + ) + yield _emit( + packet, + source_trace, + model_calls, + TrialEvent( + phase="questions", + title="The Hinge Question", + body="The bench asks the single question that could turn the record.", + turns=[_turn(JUDGE_NAME, "judge", judge_question, GPT_OSS_MODEL, 0.88)], + evidence=packet.evidence, + ), + delay, + ) + + claimant_answer = _required_role( + model_runner, + model_calls, + agent="Advocate Auric", + role="claimant advocate", + model=GPT_OSS_MODEL, + case_summary=case_summary, + evidence_summary=evidence_summary, + task=f"Answer {JUDGE_NAME}'s hinge question for the claimant: {judge_question.text}", + provider=OPENAI_PROVIDER, + max_tokens=130, + ) + yield _emit( + packet, + source_trace, + model_calls, + TrialEvent( + phase="questions", + title="Claimant Answers the Bench", + body="The claimant answers the hinge question.", + turns=[_turn("Advocate Auric", "claimant advocate", claimant_answer, GPT_OSS_MODEL, 0.88)], + evidence=packet.evidence, + ), + delay, + ) + + respondent_answer = _required_role( + model_runner, + model_calls, + agent="Counsel Sable", + role="respondent advocate", + model=GPT_OSS_MODEL, + case_summary=case_summary, + evidence_summary=evidence_summary, + task=f"Answer {JUDGE_NAME}'s hinge question for the respondent: {judge_question.text}", + provider=OPENAI_PROVIDER, + max_tokens=130, + ) + yield _emit( + packet, + source_trace, + model_calls, + TrialEvent( + phase="questions", + title="Respondent Answers the Bench", + body="The respondent answers the hinge question.", + turns=[_turn("Counsel Sable", "respondent advocate", respondent_answer, GPT_OSS_MODEL, 0.88)], + evidence=packet.evidence, + ), + delay, + ) + + jury_panel = _required_role( + model_runner, + model_calls, + agent="Nemotron Jury", + role="juror panel", + model=NEMOTRON_MODEL, + case_summary=case_summary, + evidence_summary=evidence_summary, + task="Announce that the six named jurors retire to vote. Do not reveal the votes yet.", + provider=NEMOTRON_PROVIDER, + max_tokens=100, + ) + yield _emit( + packet, + source_trace, + model_calls, + TrialEvent( + phase="deliberation", + title="The Jury Retires", + body="Six fixed-perspective jurors leave the public floor to vote from the record.", + turns=[_turn("Nemotron Jury", "juror panel", jury_panel, NEMOTRON_MODEL, 0.86)], + evidence=packet.evidence, + ), + delay, + ) + + jury_votes_result = _required_role( + model_runner, + model_calls, + agent="Nemotron Jury", + role="juror vote generator", + model=NEMOTRON_MODEL, + case_summary=case_summary, + evidence_summary=evidence_summary, + task=_jury_task(), + provider=NEMOTRON_PROVIDER, + max_tokens=650, + ) + votes = _parse_jury_votes(jury_votes_result, packet) + for vote in votes: + juror_result = ModelResult( + text=f"{vote.vote.replace('_', ' ').title()}. {vote.reason}", + call=jury_votes_result.call, + input_text=jury_votes_result.input_text, + ) + yield _emit( + packet, + source_trace, + model_calls, + TrialEvent( + phase="deliberation", + title=f"Juror {vote.juror} Votes", + body=f"{vote.persona}. Evidence: {', '.join(vote.evidence_ids)}.", + turns=[_turn(vote.juror, "juror", juror_result, NEMOTRON_MODEL, 0.86)], + votes=[vote], + evidence=packet.evidence, + ), + delay, + ) + + verdict = _verdict_from_votes(votes) + verdict_voice = _required_role( + model_runner, + model_calls, + agent=JUDGE_NAME, + role="verdict writer", + model=GPT_OSS_MODEL, + case_summary=case_summary, + evidence_summary=evidence_summary, + task=( + f"As {JUDGE_NAME}, announce the final legal finding after the jury vote with Stoic restraint. " + f"Finding: {verdict.finding}. " + f"Jury rationale: {verdict.rationale} Remedy: {verdict.remedy}. Include uncertainty without disclaiming the role." + ), + provider=OPENAI_PROVIDER, + max_tokens=160, + ) + yield _emit( + packet, + source_trace, + model_calls, + TrialEvent( + phase="verdict", + title="The Court Announces Judgment", + body=f"{verdict_voice.text}\n\n{verdict.rationale}\n\nRemedy: {verdict.remedy}", + verdict=verdict, + votes=votes, + evidence=packet.evidence, + turns=[_turn(JUDGE_NAME, "verdict writer", verdict_voice, GPT_OSS_MODEL, 0.88)], + ), + delay, + ) + + +def stream_trial_jsonl( + request: TrialRequest, + delay: float = 0.0, + model_runner: ModelRunner | None = None, +) -> Iterable[str]: + for event in stream_trial(request, delay, model_runner=model_runner): + yield json.dumps(event.model_dump(), ensure_ascii=True) + "\n" diff --git a/sovereign_bench/export.py b/sovereign_bench/export.py new file mode 100644 index 0000000000000000000000000000000000000000..56f3285febe3957ae8c1ecfc29185544c0cf5025 --- /dev/null +++ b/sovereign_bench/export.py @@ -0,0 +1,35 @@ +from __future__ import annotations + +import json +import tempfile +from pathlib import Path + +from .models import TrialEvent + + +def write_trace(events: list[TrialEvent]) -> str: + path = Path(tempfile.gettempdir()) / "sovereign_bench_trace.json" + path.write_text( + json.dumps([event.model_dump() for event in events], indent=2, ensure_ascii=True), + encoding="utf-8", + ) + return str(path) + + +def write_decree(events: list[TrialEvent]) -> str: + verdict_event = next((event for event in events if event.verdict), events[-1]) + verdict = verdict_event.verdict + path = Path(tempfile.gettempdir()) / "sovereign_bench_decree.md" + if verdict is None: + text = "# Sovereign Bench Decree\n\nNo verdict was recorded." + else: + text = ( + "# Sovereign Bench Decree\n\n" + f"## Finding\n{verdict.finding}\n\n" + f"## Decree\n{verdict.decree}\n\n" + f"## Rationale\n{verdict.rationale}\n\n" + f"## Remedy\n{verdict.remedy}\n\n" + f"## Uncertainty\n{verdict.uncertainty}\n" + ) + path.write_text(text, encoding="utf-8") + return str(path) diff --git a/sovereign_bench/llm.py b/sovereign_bench/llm.py new file mode 100644 index 0000000000000000000000000000000000000000..6d92e00a72f520c0952c02e1c9fe8e18c33c07a0 --- /dev/null +++ b/sovereign_bench/llm.py @@ -0,0 +1,209 @@ +from __future__ import annotations + +import os +import re +import time +from dataclasses import dataclass +from hashlib import sha256 + + +@dataclass +class ModelCall: + model: str + provider: str + ok: bool + latency_ms: int + prompt_hash: str + error: str | None = None + requested_model: str | None = None + runtime: str | None = None + gpu: str | None = None + + +@dataclass +class ModelResult: + text: str + call: ModelCall + input_text: str = "" + + +class ModelCallError(RuntimeError): + pass + + +def _short_error(exc: Exception) -> str: + message = str(exc).replace("\n", " ").strip() + return f"{exc.__class__.__name__}: {message[:220]}" + + +def messages_hash(messages: list[dict[str, str]]) -> str: + joined = "\n".join(f"{item.get('role', '')}:{item.get('content', '')}" for item in messages) + return sha256(joined.encode("utf-8")).hexdigest()[:16] + + +def _prompt_from_messages(messages: list[dict[str, str]]) -> str: + return "\n\n".join(f"{item.get('role', 'user').upper()}:\n{item.get('content', '')}" for item in messages) + "\n\nASSISTANT:\n" + + +def _response_text(response: object) -> str: + choice = response.choices[0] # type: ignore[attr-defined] + message = choice.message + for attr in ("content", "reasoning_content", "reasoning"): + value = getattr(message, attr, None) + if isinstance(value, str) and value.strip(): + return value.strip() + if isinstance(value, list): + pieces = [] + for item in value: + text = getattr(item, "text", None) or (item.get("text") if isinstance(item, dict) else None) + if text: + pieces.append(str(text)) + if pieces: + return " ".join(pieces).strip() + if hasattr(message, "model_dump"): + data = message.model_dump() + for key in ("content", "reasoning_content", "reasoning"): + value = data.get(key) + if isinstance(value, str) and value.strip(): + return value.strip() + return "" + + +def clean_model_text(text: str) -> str: + cleaned = re.sub(r"(?is).*?", "", text).strip() + if re.search(r"(?i)", cleaned): + raise ModelCallError("model returned unclosed hidden reasoning") + cleaned = re.sub(r"(?is).*?", "", cleaned).strip() + cleaned = re.sub(r"(?is).*?", "", cleaned).strip() + cleaned = cleaned.replace("", "").strip() + if not cleaned: + raise ModelCallError("model returned no visible output") + return cleaned + + +def model_enabled() -> bool: + return os.getenv("SOVEREIGN_DISABLE_LIVE_MODELS", "").lower() not in {"1", "true", "yes"} + + +def call_hf_chat_model( + *, + model: str, + messages: list[dict[str, str]], + provider: str = "auto", + max_tokens: int = 140, + temperature: float = 0.45, +) -> ModelResult: + prompt_hash = messages_hash(messages) + started = time.perf_counter() + token = os.getenv("HF_TOKEN") + if not token or not model_enabled(): + raise ModelCallError("HF_TOKEN missing or live models disabled") + + try: + from huggingface_hub import InferenceClient + + client = InferenceClient(model=model, provider=provider, token=token, timeout=45.0) + retry_messages = messages + [ + { + "role": "user", + "content": ( + "Your previous response did not include visible courtroom dialogue. " + "Return only the final spoken dialogue now. Do not include , analysis, reasoning, markdown, or notes. /no_think" + ), + } + ] + last_error: Exception | None = None + text = "" + for attempt_messages in (messages, retry_messages): + try: + response = client.chat_completion( + messages=attempt_messages, + max_tokens=max_tokens, + temperature=temperature, + top_p=0.9, + ) + raw_text = _response_text(response) + except Exception as chat_exc: + prompt = _prompt_from_messages(attempt_messages) + generated = client.text_generation( + prompt, + max_new_tokens=max_tokens, + temperature=temperature, + top_p=0.9, + return_full_text=False, + ) + raw_text = str(generated).strip() + if not raw_text: + raise chat_exc + try: + text = clean_model_text(raw_text) + break + except ModelCallError as exc: + last_error = exc + if not text: + raise last_error or RuntimeError("empty model response") + return ModelResult( + text=text, + call=ModelCall( + model=model, + provider=provider, + ok=True, + latency_ms=int((time.perf_counter() - started) * 1000), + prompt_hash=prompt_hash, + ), + ) + except Exception as exc: + raise ModelCallError( + f"{model} via {provider} failed after {int((time.perf_counter() - started) * 1000)}ms: {_short_error(exc)}" + ) from exc + + +def call_small_model( + *, + agent: str, + role: str, + model: str, + case_summary: str, + task: str, + evidence_summary: str, + provider: str = "auto", + max_tokens: int = 120, +) -> ModelResult: + messages = build_role_messages( + agent=agent, + role=role, + case_summary=case_summary, + task=task, + evidence_summary=evidence_summary, + ) + result = call_hf_chat_model( + model=model, + provider=provider, + messages=messages, + max_tokens=max_tokens, + ) + result.input_text = _prompt_from_messages(messages) + return result + + +def build_role_messages( + *, + agent: str, + role: str, + case_summary: str, + task: str, + evidence_summary: str, +) -> list[dict[str, str]]: + system = ( + "You are one AI character in Sovereign Bench, a miniature virtual courtroom. " + "Write concise courtroom dialogue only. Cite evidence IDs when relevant. " + "Do not claim certainty beyond the record. Do not add markdown. " + "Return final spoken dialogue only; never reveal hidden reasoning, analysis, or text. " + "Do not use thinking mode." + ) + user = ( + f"Agent: {agent}\nRole: {role}\nCase:\n{case_summary}\n\n" + f"Evidence:\n{evidence_summary}\n\nTask: {task}\n" + "Answer in 1-3 sentences, theatrical but clear.\n/no_think" + ) + return [{"role": "system", "content": system}, {"role": "user", "content": user}] diff --git a/sovereign_bench/models.py b/sovereign_bench/models.py new file mode 100644 index 0000000000000000000000000000000000000000..160a182fcd11ddb3446a40a1d43013565418d3de --- /dev/null +++ b/sovereign_bench/models.py @@ -0,0 +1,86 @@ +from __future__ import annotations + +from typing import Literal + +from pydantic import BaseModel, Field + + +TrialPhase = Literal[ + "intake", + "claims", + "opening", + "evidence", + "questions", + "deliberation", + "verdict", + "appeal", +] + + +class EvidenceItem(BaseModel): + id: str + title: str + source: str + excerpt: str + supports: Literal["claimant", "respondent", "mixed", "context"] + reliability: float = Field(ge=0.0, le=1.0) + note: str + + +class CasePacket(BaseModel): + id: str + title: str + subtitle: str + claimant: str + respondent: str + charge: str + setting: str + claimant_claim: str + respondent_claim: str + source_note: str + evidence: list[EvidenceItem] + + +class TrialRequest(BaseModel): + case_id: str = "socrates" + search_query: str = "" + hypothetical: str = "" + speed: Literal["swift", "measured", "ceremonial"] = "swift" + mind_layer: bool = True + + +class AgentTurn(BaseModel): + agent: str + role: str + content: str + model: str + confidence: float = Field(ge=0.0, le=1.0) + input: str = "" + + +class JurorVote(BaseModel): + juror: str + persona: str = "" + vote: Literal["liable", "not_liable", "uncertain"] + reason: str + evidence_ids: list[str] + + +class Verdict(BaseModel): + finding: Literal["liable", "not_liable", "mixed", "uncertain"] + decree: str + rationale: str + evidence_ids: list[str] + uncertainty: str + remedy: str + + +class TrialEvent(BaseModel): + phase: TrialPhase + title: str + body: str + turns: list[AgentTurn] = Field(default_factory=list) + evidence: list[EvidenceItem] = Field(default_factory=list) + votes: list[JurorVote] = Field(default_factory=list) + verdict: Verdict | None = None + trace: dict = Field(default_factory=dict) diff --git a/sovereign_bench/retrieval.py b/sovereign_bench/retrieval.py new file mode 100644 index 0000000000000000000000000000000000000000..2d8771d21ec723b8b1073455218f41660ee6d853 --- /dev/null +++ b/sovereign_bench/retrieval.py @@ -0,0 +1,70 @@ +from __future__ import annotations + +import re +from urllib.parse import quote_plus + +import httpx + +from .models import CasePacket, EvidenceItem + + +def _plain_text(html: str) -> str: + html = re.sub(r"(?is)|", " ", html) + html = re.sub(r"(?s)<[^>]+>", " ", html) + html = re.sub(r"\s+", " ", html) + return html.strip() + + +def build_live_case(query: str, hypothetical: str = "") -> CasePacket | None: + clean_query = " ".join(query.split()) + if len(clean_query) < 8: + return None + + try: + url = f"https://r.jina.ai/http://r.jina.ai/http://duckduckgo.com/html/?q={quote_plus(clean_query)}" + response = httpx.get(url, timeout=8.0, follow_redirects=True) + text = _plain_text(response.text) + except Exception: + return None + + snippets = [ + segment.strip() + for segment in re.split(r"(?<=[.!?])\s+", text) + if 80 <= len(segment.strip()) <= 320 and "http" not in segment[:20].lower() + ] + unique: list[str] = [] + for snippet in snippets: + if snippet.lower() not in {item.lower() for item in unique}: + unique.append(snippet) + if len(unique) == 4: + break + + if len(unique) < 2: + return None + + evidence = [ + EvidenceItem( + id=f"WEB-E{i}", + title=f"Retrieved fragment {i}", + source=f"Web retrieval for: {clean_query}", + excerpt=snippet, + supports="context" if i == 1 else "mixed", + reliability=max(0.45, 0.72 - (i * 0.06)), + note="Live retrieval fragment; the court treats it as context until corroborated.", + ) + for i, snippet in enumerate(unique, start=1) + ] + framing = hypothetical.strip() or "the parties dispute how the retrieved facts should be interpreted" + return CasePacket( + id="live", + title=f"Live Search Tribunal: {clean_query[:58]}", + subtitle="A search-fed miniature proceeding with uncertainty kept visible.", + claimant="The Search Record", + respondent="The Counter-Interpretation", + charge=f"Whether {framing}.", + setting="A temporary court assembled from retrieved public web fragments.", + claimant_claim="The retrieved record supports a coherent claim that should be credited.", + respondent_claim="The retrieved record is incomplete, ambiguous, or overread by the claimant.", + source_note="Live web retrieval via public search snippets. Treat as unverified context, not ground truth.", + evidence=evidence, + ) diff --git a/tests/test_cases.py b/tests/test_cases.py new file mode 100644 index 0000000000000000000000000000000000000000..77d0cb035742c1291b5f64eacdc2fb76ffb82908 --- /dev/null +++ b/tests/test_cases.py @@ -0,0 +1,8 @@ +from sovereign_bench.cases import CASES + + +def test_cached_cases_have_evidence(): + assert {"socrates", "barnaby"} <= set(CASES) + for case in CASES.values(): + assert len(case.evidence) >= 4 + assert all(item.id and item.excerpt for item in case.evidence) diff --git a/tests/test_engine.py b/tests/test_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..ca5704ba5b647504ee2c8f03d3eba69289cada07 --- /dev/null +++ b/tests/test_engine.py @@ -0,0 +1,149 @@ +import json +import re + +import pytest + +from sovereign_bench.engine import JUDGE_NAME, JUROR_PERSONAS, RequiredModelError, run_trial +from sovereign_bench.llm import ModelCall, ModelResult +from sovereign_bench.models import TrialRequest + + +def _jury_json(evidence_summary: str, vote: str = "liable") -> str: + evidence_ids = re.findall(r"^([A-Z]+-E\d+):", evidence_summary, flags=re.M) + evidence_ids = (evidence_ids or ["SOC-E1"]) * 6 + return json.dumps( + { + "votes": [ + { + "juror": name, + "persona": persona, + "vote": vote if idx < 4 else "not_liable", + "reason": f"{name} applies a {persona} lens to exhibit {evidence_ids[idx]}.", + "evidence_ids": [evidence_ids[idx]], + } + for idx, (name, persona) in enumerate(JUROR_PERSONAS.items()) + ] + } + ) + + +def fake_model_runner(**kwargs): + text = ( + _jury_json(kwargs["evidence_summary"]) + if kwargs["role"] == "juror vote generator" + else f"{kwargs['agent']} responds to: {kwargs['task']}" + ) + prompt = ( + f"SYSTEM:\nFake live model for tests.\n\nUSER:\n" + f"Agent: {kwargs['agent']}\nRole: {kwargs['role']}\nTask: {kwargs['task']}\n\nASSISTANT:\n" + ) + return ModelResult( + text=text, + input_text=prompt, + call=ModelCall( + model=kwargs["model"], + provider=kwargs.get("provider", "test"), + ok=True, + latency_ms=1, + prompt_hash="test-prompt", + ), + ) + + +def test_cached_cases_emit_sequential_speaker_order(): + expected_speakers = [ + "Clerk Meridian", + JUDGE_NAME, + "Advocate Auric", + "Counsel Sable", + "Auditor Prism", + JUDGE_NAME, + "Advocate Auric", + "Counsel Sable", + "Nemotron Jury", + *list(JUROR_PERSONAS), + JUDGE_NAME, + ] + for case_id in ["socrates", "barnaby"]: + events = run_trial(TrialRequest(case_id=case_id), model_runner=fake_model_runner) + + assert [event.turns[0].agent for event in events] == expected_speakers + assert [event.phase for event in events].count("deliberation") == 7 + assert events[0].turns[0].input + assert "SYSTEM:" in events[0].turns[0].input + assert events[-1].verdict is not None + assert events[-1].votes and len(events[-1].votes) == 6 + assert "uncertainty" in events[-1].verdict.uncertainty.lower() + + +def test_no_event_contains_both_lawyers_speaking_together(): + events = run_trial(TrialRequest(case_id="socrates"), model_runner=fake_model_runner) + + for event in events: + agents = {turn.agent for turn in event.turns} + assert not {"Advocate Auric", "Counsel Sable"}.issubset(agents) + + +def test_juror_vote_events_have_fixed_personas_and_evidence(): + events = run_trial(TrialRequest(case_id="socrates"), model_runner=fake_model_runner) + juror_events = [event for event in events if event.turns[0].agent in JUROR_PERSONAS] + + assert len(juror_events) == 6 + for event in juror_events: + vote = event.votes[0] + assert vote.juror == event.turns[0].agent + assert vote.persona == JUROR_PERSONAS[vote.juror] + assert vote.vote in {"liable", "not_liable", "uncertain"} + assert vote.reason + assert vote.evidence_ids + + final = events[-1] + assert final.phase == "verdict" + assert [vote.juror for vote in final.votes] == list(JUROR_PERSONAS) + + +def test_jury_contract_uses_public_history_personas(): + assert JUDGE_NAME == "Marcus Aurelius" + assert JUROR_PERSONAS == { + "Karl Marx": "class power, material conditions, exploitation, institutional incentives", + "John Stuart Mill": "liberty, harm principle, utility, individual rights", + "Confucius": "social harmony, role duty, ritual order, moral cultivation", + "Cleopatra VII": "sovereign pragmatism, diplomacy, survival, legitimacy under pressure", + "Niccolo Machiavelli": "political realism, stability, power, consequences over ideals", + "Jensen Huang": "technological optimism, operator mindset, systems thinking, innovation tradeoffs", + } + + +def test_required_model_failure_stops_trial_without_canned_dialogue(): + def failing_runner(**kwargs): + return ModelResult( + text="", + input_text="SYSTEM:\nfailed", + call=ModelCall( + model=kwargs["model"], + provider=kwargs.get("provider", "test"), + ok=False, + latency_ms=1, + prompt_hash="test-prompt", + error="offline", + ), + ) + + with pytest.raises(RequiredModelError, match="unavailable"): + run_trial(TrialRequest(case_id="socrates"), model_runner=failing_runner) + + +def test_invalid_jury_output_stops_trial_without_fallback_votes(): + def invalid_jury_runner(**kwargs): + result = fake_model_runner(**kwargs) + if kwargs["role"] == "juror vote generator": + result.text = "the jury refuses structured output" + return result + + with pytest.raises(RequiredModelError, match="invalid JSON"): + run_trial(TrialRequest(case_id="socrates"), model_runner=invalid_jury_runner) + + +def test_live_search_stops_when_query_is_weak(): + with pytest.raises(RuntimeError, match="no fallback case"): + run_trial(TrialRequest(case_id="live", search_query="x"), model_runner=fake_model_runner) diff --git a/tests/test_ui_rendering.py b/tests/test_ui_rendering.py new file mode 100644 index 0000000000000000000000000000000000000000..c495377a1254e90ad4d785dadd44a146766f4f32 --- /dev/null +++ b/tests/test_ui_rendering.py @@ -0,0 +1,252 @@ +import inspect + +import app +from sovereign_bench.models import AgentTurn, EvidenceItem, JurorVote, TrialEvent + + +OLD_CARD_CLASSES = [ + "paper-panel", + "juror-panel", + "mind-panel", + "empty-state", + "trial-downloads", +] + + +def _event_with_lower_tab_data() -> TrialEvent: + evidence = EvidenceItem( + id="E1", + title="Ledger entry", + source="Archive", + excerpt="A short exhibit excerpt.", + supports="claimant", + reliability=0.82, + note="Useful but incomplete.", + ) + vote = JurorVote( + juror="Karl Marx", + persona=app.JUROR_PERSONAS["Karl Marx"], + vote="liable", + reason="The exhibit supports the claim.", + evidence_ids=["E1"], + ) + return TrialEvent( + phase="deliberation", + title="Jury weighs the record", + body="The jury reviews the record.", + turns=[ + AgentTurn( + agent="Nemotron Jury", + role="juror panel", + content="The jurors compare E1 and state their votes.", + model="nvidia/Nemotron-Orchestrator-8B", + confidence=0.84, + input="SYSTEM:\nYou are the jury.\n\nUSER:\nWeigh E1 and explain the vote.", + ) + ], + evidence=[evidence], + votes=[vote], + trace={"mode": "test"}, + ) + + +def _speaker_event(agent: str, phase: str = "questions") -> TrialEvent: + return TrialEvent( + phase=phase, + title=f"{agent} speaks", + body="A single speaker takes the floor.", + turns=[ + AgentTurn( + agent=agent, + role="test speaker", + content=f"{agent} has the visible floor.", + model="test-model", + confidence=0.9, + input="SYSTEM:\nTest prompt.", + ) + ], + ) + + +def test_lower_tab_renderers_emit_plain_text_classes(): + event = _event_with_lower_tab_data() + html = "\n".join( + [ + app.render_evidence([]), + app.render_evidence([event]), + app.render_jurors([]), + app.render_jurors([event]), + app.render_mind([], True), + app.render_mind([event], True), + app.render_mind([event], False), + ] + ) + + for class_name in OLD_CARD_CLASSES: + assert class_name not in html + + assert "drawer-text-block" in html + assert "drawer-empty" in html + assert "mind-text" in html + + +def test_download_controls_are_not_wired_into_app(): + source = inspect.getsource(app.build_app) + + assert "DownloadButton" not in source + assert "Download decree" not in source + assert "Download agent trace" not in source + + +def test_courtroom_splits_six_jurors_between_side_benches(): + html = app.render_court([_event_with_lower_tab_data()], started=True) + + assert "jury-benches left" in html + assert "jury-benches right" in html + assert html.count("Input" in html + assert "tooltip-io-label'>Output" in html + assert "Click to open full thread" in html + assert "class='ai-thread-modal'" in html + assert "thread-block'>SYSTEM:" in html + assert "The jurors compare E1 and state their votes." in html + assert "href='#ai-thread-karl-marx'" in html + + +def test_courtroom_renders_historical_judge_and_juror_assets(): + html = app.render_court([_event_with_lower_tab_data()], started=True) + + assert "Marcus Aurelius" in html + assert "assets/characters/marcus-aurelius.png" in html + for name, image in app.JUROR_IMAGES.items(): + assert name in html + assert image in html + assert html.count("class='juror-portrait'") == 6 + + +def test_courtroom_renders_foreground_fences_and_judge_table_above_characters(): + html = app.render_court([_event_with_lower_tab_data()], started=True) + + assert html.count("assets/foreground/foregroundFence.png") == 2 + assert "assets/foreground/JudgeTable.png" in html + assert html.find("class='puppet judge") < html.find("class='foreground-props'") + assert ".foreground-props {\n position: absolute;\n inset: 0;\n z-index: 13;" in app.CSS + assert ".puppet {\n --skin: #c99257;" in app.CSS + assert "z-index: 8;" in app.CSS + + +def test_latest_speaker_sets_stage_class_and_speech_bubble(): + html = app.render_court([_speaker_event("Advocate Auric", phase="claims")], started=True) + + assert "speaker-auric" in html + assert "class='speech-bubble'" in html + assert "Advocate Auric has the visible floor." in html + assert "puppet auric active walking" in html + assert "puppet sable active" not in html + + +def test_individual_juror_can_be_active_speaker(): + event = TrialEvent( + phase="deliberation", + title="Juror Karl Marx Votes", + body=app.JUROR_PERSONAS["Karl Marx"], + turns=[ + AgentTurn( + agent="Karl Marx", + role="juror", + content="Liable. E1 carries the record.", + model="nvidia/Nemotron-Orchestrator-8B", + confidence=0.86, + input="SYSTEM:\nJury JSON prompt.", + ) + ], + votes=[ + JurorVote( + juror="Karl Marx", + persona=app.JUROR_PERSONAS["Karl Marx"], + vote="liable", + reason="E1 carries the record.", + evidence_ids=["E1"], + ) + ], + ) + html = app.render_court([event], started=True) + + assert "speaker-karl-marx" in html + assert "