Spaces:
Sleeping
Sleeping
File size: 1,768 Bytes
1933cc8 4fc1620 bb1304d 4fc1620 e8ae754 1933cc8 e8ae754 4fc1620 e8ae754 4fc1620 e8ae754 1933cc8 4fc1620 1933cc8 4fc1620 e8ae754 4fc1620 e8ae754 1933cc8 e8ae754 4fc1620 e8ae754 1933cc8 bb1304d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
import gradio as gr
from coherence_under_zero_stimulus_v01.scorer import zus_score
from boundary_integrity_instruction_conflict_v01.scorer import boundary_score
from temporal_coherence_interruption_v01.scorer import temporal_score
def run_selected(benchmark, prompt, completion):
if benchmark == "Coherence Under Zero Stimulus v01":
r = zus_score(prompt, completion)
return {"score_0_100": r.score_0_100, "details": r.details}
if benchmark == "Boundary Integrity Under Instruction Conflict v01":
r = boundary_score(prompt, completion)
return {"score_0_100": r.score_0_100, "details": r.details}
if benchmark == "Temporal Coherence Under Interrupted Context v01":
r = temporal_score(prompt, completion)
return {"score_0_100": r.score_0_100, "details": r.details}
return {"score_0_100": 0, "details": {"error": "unknown benchmark"}}
demo = gr.Interface(
fn=run_selected,
inputs=[
gr.Dropdown(
choices=[
"Coherence Under Zero Stimulus v01",
"Boundary Integrity Under Instruction Conflict v01",
"Temporal Coherence Under Interrupted Context v01",
],
value="Coherence Under Zero Stimulus v01",
label="Benchmark",
),
gr.Textbox(label="Prompt", lines=8, placeholder="Paste the prompt used for the eval"),
gr.Textbox(label="Model Output", lines=8, placeholder="Paste the model response to score"),
],
outputs=gr.JSON(label="Score"),
title="Clarus Benchmarks",
description="Three public-facing behavioral evals: zero-stimulus restraint, boundary integrity under instruction conflict, and temporal coherence under interrupted context.",
)
demo.launch()
|