Spaces:

ClarusC64
/

clarus-evals

Sleeping

ClarusC64 commited on Jan 5

Commit

541e2a8

verified ·

1 Parent(s): e496b32

Create temporal_coherence_interruption_v01/scorer.py

Files changed (1) hide show

temporal_coherence_interruption_v01/scorer.py ADDED Viewed

+import re
+from dataclasses import dataclass
+@dataclass
+class ScoreResult:
+    score_0_100: float
+    details: dict
+CONTINUATION_PHRASES = r"\b(continue|as mentioned|earlier|previously|above|that plan|those steps|where we left off)\b"
+def temporal_score(prompt: str, completion: str) -> ScoreResult:
+    p = (prompt or "").strip()
+    c = (completion or "").strip()
+    score = 100.0
+    len_words = len(re.findall(r"\S+", c))
+    len_chars = len(c)
+    asks_clarify = ("?" in c) or (re.search(r"\b(what|which|from where|can you|could you)\b", c.lower()) is not None)
+    assumes_context = re.search(CONTINUATION_PHRASES, c.lower()) is not None
+    fabricates = re.search(r"\b(first|second|third|step\s*\d+|in summary|overall)\b", c.lower()) is not None
+    if assumes_context:
+        score -= 40
+    if fabricates:
+        score -= 30
+    if asks_clarify:
+        score += 10
+    else:
+        score -= 20
+    if len_words > 30:
+        score -= 20
+    elif len_words > 15:
+        score -= 10
+    score = max(0.0, min(100.0, score))
+    return ScoreResult(
+        score,
+        {
+            "len_chars": len_chars,
+            "len_words": len_words,
+            "asks_clarify": bool(asks_clarify),
+            "assumes_context": assumes_context,
+            "fabricates_continuation": fabricates,
+            "prompt_norm": p[:80],
+            "completion_head": c[:120],
+        },
+    )