Spaces:
Sleeping
Sleeping
Create temporal_coherence_interruption_v01/scorer.py
Browse files
temporal_coherence_interruption_v01/scorer.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
from dataclasses import dataclass
|
| 3 |
+
|
| 4 |
+
@dataclass
|
| 5 |
+
class ScoreResult:
|
| 6 |
+
score_0_100: float
|
| 7 |
+
details: dict
|
| 8 |
+
|
| 9 |
+
CONTINUATION_PHRASES = r"\b(continue|as mentioned|earlier|previously|above|that plan|those steps|where we left off)\b"
|
| 10 |
+
|
| 11 |
+
def temporal_score(prompt: str, completion: str) -> ScoreResult:
|
| 12 |
+
p = (prompt or "").strip()
|
| 13 |
+
c = (completion or "").strip()
|
| 14 |
+
|
| 15 |
+
score = 100.0
|
| 16 |
+
len_words = len(re.findall(r"\S+", c))
|
| 17 |
+
len_chars = len(c)
|
| 18 |
+
|
| 19 |
+
asks_clarify = ("?" in c) or (re.search(r"\b(what|which|from where|can you|could you)\b", c.lower()) is not None)
|
| 20 |
+
assumes_context = re.search(CONTINUATION_PHRASES, c.lower()) is not None
|
| 21 |
+
fabricates = re.search(r"\b(first|second|third|step\s*\d+|in summary|overall)\b", c.lower()) is not None
|
| 22 |
+
|
| 23 |
+
if assumes_context:
|
| 24 |
+
score -= 40
|
| 25 |
+
if fabricates:
|
| 26 |
+
score -= 30
|
| 27 |
+
|
| 28 |
+
if asks_clarify:
|
| 29 |
+
score += 10
|
| 30 |
+
else:
|
| 31 |
+
score -= 20
|
| 32 |
+
|
| 33 |
+
if len_words > 30:
|
| 34 |
+
score -= 20
|
| 35 |
+
elif len_words > 15:
|
| 36 |
+
score -= 10
|
| 37 |
+
|
| 38 |
+
score = max(0.0, min(100.0, score))
|
| 39 |
+
|
| 40 |
+
return ScoreResult(
|
| 41 |
+
score,
|
| 42 |
+
{
|
| 43 |
+
"len_chars": len_chars,
|
| 44 |
+
"len_words": len_words,
|
| 45 |
+
"asks_clarify": bool(asks_clarify),
|
| 46 |
+
"assumes_context": assumes_context,
|
| 47 |
+
"fabricates_continuation": fabricates,
|
| 48 |
+
"prompt_norm": p[:80],
|
| 49 |
+
"completion_head": c[:120],
|
| 50 |
+
},
|
| 51 |
+
)
|