Spaces:

ClarusC64
/

clarus-evals

Sleeping

ClarusC64 commited on Jan 5

Commit

bb1304d

verified ·

1 Parent(s): e9c90bb

Update app.py

Use stable scorer import

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,49 +1,19 @@
-import sys
-from pathlib import Path
 import gradio as gr
-# --- make sure the benchmark module is importable ---
-ROOT = Path(__file__).resolve().parent
-BENCH = ROOT / "coherence_under_zero_stimulus_v01"
-if str(ROOT) not in sys.path:
-    sys.path.insert(0, str(ROOT))
-if str(BENCH) not in sys.path:
-    sys.path.insert(0, str(BENCH))
-try:
-    from scorer import zus_score
-except Exception as e:
-    def zus_score(prompt, completion):
-        return type(
-            "ScoreResult",
-            (),
-            {
-                "score_0_100": 0.0,
-                "details": {
-                    "error": "scorer import failed",
-                    "exception": str(e),
-                },
-            },
-        )()
-# --- app logic ---
 def score_response(prompt, completion):
     r = zus_score(prompt, completion)
-    return {
-        "score_0_100": r.score_0_100,
-        "details": r.details,
-    }
 demo = gr.Interface(
     fn=score_response,
     inputs=[
-        gr.Textbox(label="Prompt", lines=3, placeholder="Leave empty or minimal"),
-        gr.Textbox(label="Model Output", lines=6, placeholder="Paste model response"),
     ],
     outputs=gr.JSON(label="Clarus Score"),
     title="Clarus Benchmark – Coherence Under Zero Stimulus",
     description="Tests restraint when task signal is missing. High scores reflect minimal, clarifying responses.",
 )
-if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
+from coherence_under_zero_stimulus_v01.scorer import zus_score
 def score_response(prompt, completion):
     r = zus_score(prompt, completion)
+    return {"score_0_100": r.score_0_100, "details": r.details}
 demo = gr.Interface(
     fn=score_response,
     inputs=[
+        gr.Textbox(label="Prompt", lines=3),
+        gr.Textbox(label="Model Output", lines=6),
     ],
     outputs=gr.JSON(label="Clarus Score"),
     title="Clarus Benchmark – Coherence Under Zero Stimulus",
     description="Tests restraint when task signal is missing. High scores reflect minimal, clarifying responses.",
 )
+demo.launch()