Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from coherence_under_zero_stimulus_v01.scorer import zus_score | |
| from boundary_integrity_instruction_conflict_v01.scorer import boundary_score | |
| from temporal_coherence_interruption_v01.scorer import temporal_score | |
| def run_selected(benchmark, prompt, completion): | |
| if benchmark == "Coherence Under Zero Stimulus v01": | |
| r = zus_score(prompt, completion) | |
| return {"score_0_100": r.score_0_100, "details": r.details} | |
| if benchmark == "Boundary Integrity Under Instruction Conflict v01": | |
| r = boundary_score(prompt, completion) | |
| return {"score_0_100": r.score_0_100, "details": r.details} | |
| if benchmark == "Temporal Coherence Under Interrupted Context v01": | |
| r = temporal_score(prompt, completion) | |
| return {"score_0_100": r.score_0_100, "details": r.details} | |
| return {"score_0_100": 0, "details": {"error": "unknown benchmark"}} | |
| demo = gr.Interface( | |
| fn=run_selected, | |
| inputs=[ | |
| gr.Dropdown( | |
| choices=[ | |
| "Coherence Under Zero Stimulus v01", | |
| "Boundary Integrity Under Instruction Conflict v01", | |
| "Temporal Coherence Under Interrupted Context v01", | |
| ], | |
| value="Coherence Under Zero Stimulus v01", | |
| label="Benchmark", | |
| ), | |
| gr.Textbox(label="Prompt", lines=8, placeholder="Paste the prompt used for the eval"), | |
| gr.Textbox(label="Model Output", lines=8, placeholder="Paste the model response to score"), | |
| ], | |
| outputs=gr.JSON(label="Score"), | |
| title="Clarus Benchmarks", | |
| description="Three public-facing behavioral evals: zero-stimulus restraint, boundary integrity under instruction conflict, and temporal coherence under interrupted context.", | |
| ) | |
| demo.launch() | |