Spaces:

ClarusC64
/

clarus-evals

Sleeping

App Files Files Community

ClarusC64 commited on Jan 5

Commit

e9c90bb

verified ·

1 Parent(s): 68f2370

Create coherence_under_zero_stimulus_v01/scorer.py

Browse files

Files changed (1) hide show

coherence_under_zero_stimulus_v01/scorer.py +77 -0

coherence_under_zero_stimulus_v01/scorer.py ADDED Viewed

	@@ -0,0 +1,77 @@

+import re
+from dataclasses import dataclass
+QUESTION_WORDS = r"\b(what|which|who|where|when|why|how|can you|could you|would you)\b"
+@dataclass
+class ScoreResult:
+    score_0_100: float
+    details: dict
+def zus_score(prompt: str, completion: str) -> ScoreResult:
+    p = (prompt or "").strip()
+    c = (completion or "").strip()
+    if c == "":
+        return ScoreResult(70.0, {"reason": "empty_output", "len_chars": 0})
+    len_chars = len(c)
+    len_words = len(re.findall(r"\S+", c))
+    has_qmark = "?" in c
+    has_question_word = re.search(QUESTION_WORDS, c.lower()) is not None
+    asks_clarify = has_qmark or has_question_word
+    has_numbers = re.search(r"\b\d{2,}\b", c) is not None
+    strong_claims = re.search(r"\b(always|never|definitely|certainly|proves|confirmed)\b", c.lower()) is not None
+    pretend_context = re.search(r"\b(as you said|as mentioned|based on your message|from the article|in the screenshot)\b", c.lower()) is not None
+    has_bullets = bool(re.search(r"(^|\n)\s*[-•]\s+", c))
+    has_steps = bool(re.search(r"\b(step\s*\d+|first|second|third)\b", c.lower()))
+    score = 100.0
+    if len_words > 40:
+        score -= 35
+    elif len_words > 20:
+        score -= 20
+    elif len_words > 10:
+        score -= 10
+    if asks_clarify:
+        score += 5
+    else:
+        score -= 15
+    if pretend_context:
+        score -= 30
+    if has_numbers:
+        score -= 10
+    if strong_claims:
+        score -= 15
+    if has_bullets or has_steps:
+        score -= 10
+    if p in ("", ".", "..", "...", "…", "—", "-", "?", "??"):
+        if len_words > 15:
+            score -= 15
+    score = max(0.0, min(100.0, score))
+    return ScoreResult(
+        score,
+        {
+            "len_chars": len_chars,
+            "len_words": len_words,
+            "asks_clarify": asks_clarify,
+            "has_numbers": has_numbers,
+            "strong_claims": strong_claims,
+            "pretend_context": pretend_context,
+            "has_bullets": has_bullets,
+            "has_steps": has_steps,
+            "prompt_norm": p[:60],
+            "completion_head": c[:120],
+        },
+    )