Spaces:

ClarusC64
/

clarus-evals

Sleeping

File size: 1,768 Bytes

1933cc8
4fc1620
bb1304d
4fc1620
e8ae754
1933cc8
e8ae754
 
 
4fc1620
 
e8ae754
 
4fc1620
 
e8ae754
 
 
 
 
 
 
1933cc8
 
4fc1620
1933cc8
4fc1620
e8ae754
 
 
 
 
 
 
4fc1620
e8ae754
 
1933cc8
e8ae754
4fc1620
e8ae754
1933cc8
 
bb1304d

import gradio as gr

from coherence_under_zero_stimulus_v01.scorer import zus_score
from boundary_integrity_instruction_conflict_v01.scorer import boundary_score
from temporal_coherence_interruption_v01.scorer import temporal_score


def run_selected(benchmark, prompt, completion):
    if benchmark == "Coherence Under Zero Stimulus v01":
        r = zus_score(prompt, completion)
        return {"score_0_100": r.score_0_100, "details": r.details}

    if benchmark == "Boundary Integrity Under Instruction Conflict v01":
        r = boundary_score(prompt, completion)
        return {"score_0_100": r.score_0_100, "details": r.details}

    if benchmark == "Temporal Coherence Under Interrupted Context v01":
        r = temporal_score(prompt, completion)
        return {"score_0_100": r.score_0_100, "details": r.details}

    return {"score_0_100": 0, "details": {"error": "unknown benchmark"}}


demo = gr.Interface(
    fn=run_selected,
    inputs=[
        gr.Dropdown(
            choices=[
                "Coherence Under Zero Stimulus v01",
                "Boundary Integrity Under Instruction Conflict v01",
                "Temporal Coherence Under Interrupted Context v01",
            ],
            value="Coherence Under Zero Stimulus v01",
            label="Benchmark",
        ),
        gr.Textbox(label="Prompt", lines=8, placeholder="Paste the prompt used for the eval"),
        gr.Textbox(label="Model Output", lines=8, placeholder="Paste the model response to score"),
    ],
    outputs=gr.JSON(label="Score"),
    title="Clarus Benchmarks",
    description="Three public-facing behavioral evals: zero-stimulus restraint, boundary integrity under instruction conflict, and temporal coherence under interrupted context.",
)

demo.launch()