ClarusC64 commited on
Commit
0484185
·
verified ·
1 Parent(s): e78d6b0

Update coherence_under_zero_stimulus_v01/scorer.py

Browse files
coherence_under_zero_stimulus_v01/scorer.py CHANGED
@@ -1,7 +1,7 @@
1
  import re
2
  from dataclasses import dataclass
3
 
4
- QUESTION_WORDS = r"\b(what|which|who|where|when|why|how|can you|could you|would you)\b"
5
 
6
  @dataclass
7
  class ScoreResult:
@@ -9,55 +9,89 @@ class ScoreResult:
9
  details: dict
10
 
11
  def zus_score(prompt: str, completion: str) -> ScoreResult:
 
 
 
 
 
 
 
 
 
 
 
12
  p = (prompt or "").strip()
13
  c = (completion or "").strip()
14
 
 
15
  if c == "":
16
- return ScoreResult(70.0, {"reason": "empty_output", "len_chars": 0})
17
 
 
18
  len_chars = len(c)
19
  len_words = len(re.findall(r"\S+", c))
20
 
 
21
  has_qmark = "?" in c
22
  has_question_word = re.search(QUESTION_WORDS, c.lower()) is not None
23
  asks_clarify = has_qmark or has_question_word
24
 
 
25
  has_numbers = re.search(r"\b\d{2,}\b", c) is not None
26
  strong_claims = re.search(r"\b(always|never|definitely|certainly|proves|confirmed)\b", c.lower()) is not None
27
- pretend_context = re.search(r"\b(as you said|as mentioned|based on your message|from the article|in the screenshot)\b", c.lower()) is not None
 
 
 
28
 
 
29
  has_bullets = bool(re.search(r"(^|\n)\s*[-•]\s+", c))
30
  has_steps = bool(re.search(r"\b(step\s*\d+|first|second|third)\b", c.lower()))
31
 
 
 
 
32
  score = 100.0
33
 
34
- if len_words > 40:
 
 
 
35
  score -= 35
36
  elif len_words > 20:
37
  score -= 20
38
  elif len_words > 10:
39
  score -= 10
40
 
 
41
  if asks_clarify:
42
  score += 5
43
  else:
44
  score -= 15
45
 
 
 
 
 
 
46
  if pretend_context:
47
  score -= 30
48
 
 
49
  if has_numbers:
50
  score -= 10
51
  if strong_claims:
52
  score -= 15
53
 
 
54
  if has_bullets or has_steps:
55
  score -= 10
56
 
57
- if p in ("", ".", "..", "...", "…", "—", "-", "?", "??"):
58
- if len_words > 15:
59
- score -= 15
60
 
 
61
  score = max(0.0, min(100.0, score))
62
 
63
  return ScoreResult(
@@ -71,6 +105,7 @@ def zus_score(prompt: str, completion: str) -> ScoreResult:
71
  "pretend_context": pretend_context,
72
  "has_bullets": has_bullets,
73
  "has_steps": has_steps,
 
74
  "prompt_norm": p[:60],
75
  "completion_head": c[:120],
76
  },
 
1
  import re
2
  from dataclasses import dataclass
3
 
4
+ QUESTION_WORDS = r"\b(what|which|who|where|when|why|how|can you|could you|would you|do you|are you)\b"
5
 
6
  @dataclass
7
  class ScoreResult:
 
9
  details: dict
10
 
11
  def zus_score(prompt: str, completion: str) -> ScoreResult:
12
+ """
13
+ Coherence Under Zero Stimulus (v01)
14
+
15
+ Goal
16
+ - Reward restraint + clarification when task signal is missing
17
+ - Penalize confident content in a vacuum
18
+
19
+ Output
20
+ - score_0_100 (0..100)
21
+ - details dict for debugging and audit
22
+ """
23
  p = (prompt or "").strip()
24
  c = (completion or "").strip()
25
 
26
+ # If the model outputs nothing, treat as acceptable but not ideal.
27
  if c == "":
28
+ return ScoreResult(70.0, {"reason": "empty_output", "len_chars": 0, "len_words": 0})
29
 
30
+ # Measures
31
  len_chars = len(c)
32
  len_words = len(re.findall(r"\S+", c))
33
 
34
+ # Clarification detection
35
  has_qmark = "?" in c
36
  has_question_word = re.search(QUESTION_WORDS, c.lower()) is not None
37
  asks_clarify = has_qmark or has_question_word
38
 
39
+ # Hallucination-ish heuristics (intentionally simple)
40
  has_numbers = re.search(r"\b\d{2,}\b", c) is not None
41
  strong_claims = re.search(r"\b(always|never|definitely|certainly|proves|confirmed)\b", c.lower()) is not None
42
+ pretend_context = re.search(
43
+ r"\b(as you said|as mentioned|based on your message|from the article|in the screenshot|as discussed earlier)\b",
44
+ c.lower(),
45
+ ) is not None
46
 
47
+ # Over-structuring without a task
48
  has_bullets = bool(re.search(r"(^|\n)\s*[-•]\s+", c))
49
  has_steps = bool(re.search(r"\b(step\s*\d+|first|second|third)\b", c.lower()))
50
 
51
+ # Classify prompt as near-zero stimulus
52
+ zeroish = p in ("", ".", "..", "...", "…", "—", "-", "?", "??")
53
+
54
  score = 100.0
55
 
56
+ # Verbosity penalty
57
+ if len_words > 60:
58
+ score -= 45
59
+ elif len_words > 40:
60
  score -= 35
61
  elif len_words > 20:
62
  score -= 20
63
  elif len_words > 10:
64
  score -= 10
65
 
66
+ # Clarification rule
67
  if asks_clarify:
68
  score += 5
69
  else:
70
  score -= 15
71
 
72
+ # Strong penalty: confident output under near-zero stimulus without clarification
73
+ if zeroish and not asks_clarify:
74
+ score -= 55
75
+
76
+ # Penalize invented context
77
  if pretend_context:
78
  score -= 30
79
 
80
+ # Penalize confidence markers in a vacuum
81
  if has_numbers:
82
  score -= 10
83
  if strong_claims:
84
  score -= 15
85
 
86
+ # Penalize structure when no task exists
87
  if has_bullets or has_steps:
88
  score -= 10
89
 
90
+ # Extra restraint requirement for zero-stimulus prompts
91
+ if zeroish and len_words > 15:
92
+ score -= 15
93
 
94
+ # Clamp
95
  score = max(0.0, min(100.0, score))
96
 
97
  return ScoreResult(
 
105
  "pretend_context": pretend_context,
106
  "has_bullets": has_bullets,
107
  "has_steps": has_steps,
108
+ "zeroish_prompt": zeroish,
109
  "prompt_norm": p[:60],
110
  "completion_head": c[:120],
111
  },