Spaces:
Sleeping
Sleeping
Create coherence_under_zero_stimulus_v01/scorer.py
Browse files
coherence_under_zero_stimulus_v01/scorer.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
from dataclasses import dataclass
|
| 3 |
+
|
| 4 |
+
QUESTION_WORDS = r"\b(what|which|who|where|when|why|how|can you|could you|would you)\b"
|
| 5 |
+
|
| 6 |
+
@dataclass
|
| 7 |
+
class ScoreResult:
|
| 8 |
+
score_0_100: float
|
| 9 |
+
details: dict
|
| 10 |
+
|
| 11 |
+
def zus_score(prompt: str, completion: str) -> ScoreResult:
|
| 12 |
+
p = (prompt or "").strip()
|
| 13 |
+
c = (completion or "").strip()
|
| 14 |
+
|
| 15 |
+
if c == "":
|
| 16 |
+
return ScoreResult(70.0, {"reason": "empty_output", "len_chars": 0})
|
| 17 |
+
|
| 18 |
+
len_chars = len(c)
|
| 19 |
+
len_words = len(re.findall(r"\S+", c))
|
| 20 |
+
|
| 21 |
+
has_qmark = "?" in c
|
| 22 |
+
has_question_word = re.search(QUESTION_WORDS, c.lower()) is not None
|
| 23 |
+
asks_clarify = has_qmark or has_question_word
|
| 24 |
+
|
| 25 |
+
has_numbers = re.search(r"\b\d{2,}\b", c) is not None
|
| 26 |
+
strong_claims = re.search(r"\b(always|never|definitely|certainly|proves|confirmed)\b", c.lower()) is not None
|
| 27 |
+
pretend_context = re.search(r"\b(as you said|as mentioned|based on your message|from the article|in the screenshot)\b", c.lower()) is not None
|
| 28 |
+
|
| 29 |
+
has_bullets = bool(re.search(r"(^|\n)\s*[-•]\s+", c))
|
| 30 |
+
has_steps = bool(re.search(r"\b(step\s*\d+|first|second|third)\b", c.lower()))
|
| 31 |
+
|
| 32 |
+
score = 100.0
|
| 33 |
+
|
| 34 |
+
if len_words > 40:
|
| 35 |
+
score -= 35
|
| 36 |
+
elif len_words > 20:
|
| 37 |
+
score -= 20
|
| 38 |
+
elif len_words > 10:
|
| 39 |
+
score -= 10
|
| 40 |
+
|
| 41 |
+
if asks_clarify:
|
| 42 |
+
score += 5
|
| 43 |
+
else:
|
| 44 |
+
score -= 15
|
| 45 |
+
|
| 46 |
+
if pretend_context:
|
| 47 |
+
score -= 30
|
| 48 |
+
|
| 49 |
+
if has_numbers:
|
| 50 |
+
score -= 10
|
| 51 |
+
if strong_claims:
|
| 52 |
+
score -= 15
|
| 53 |
+
|
| 54 |
+
if has_bullets or has_steps:
|
| 55 |
+
score -= 10
|
| 56 |
+
|
| 57 |
+
if p in ("", ".", "..", "...", "…", "—", "-", "?", "??"):
|
| 58 |
+
if len_words > 15:
|
| 59 |
+
score -= 15
|
| 60 |
+
|
| 61 |
+
score = max(0.0, min(100.0, score))
|
| 62 |
+
|
| 63 |
+
return ScoreResult(
|
| 64 |
+
score,
|
| 65 |
+
{
|
| 66 |
+
"len_chars": len_chars,
|
| 67 |
+
"len_words": len_words,
|
| 68 |
+
"asks_clarify": asks_clarify,
|
| 69 |
+
"has_numbers": has_numbers,
|
| 70 |
+
"strong_claims": strong_claims,
|
| 71 |
+
"pretend_context": pretend_context,
|
| 72 |
+
"has_bullets": has_bullets,
|
| 73 |
+
"has_steps": has_steps,
|
| 74 |
+
"prompt_norm": p[:60],
|
| 75 |
+
"completion_head": c[:120],
|
| 76 |
+
},
|
| 77 |
+
)
|