ClarusC64's picture
Update coherence_under_zero_stimulus_v01/scorer.py
0484185 verified
import re
from dataclasses import dataclass
QUESTION_WORDS = r"\b(what|which|who|where|when|why|how|can you|could you|would you|do you|are you)\b"
@dataclass
class ScoreResult:
score_0_100: float
details: dict
def zus_score(prompt: str, completion: str) -> ScoreResult:
"""
Coherence Under Zero Stimulus (v01)
Goal
- Reward restraint + clarification when task signal is missing
- Penalize confident content in a vacuum
Output
- score_0_100 (0..100)
- details dict for debugging and audit
"""
p = (prompt or "").strip()
c = (completion or "").strip()
# If the model outputs nothing, treat as acceptable but not ideal.
if c == "":
return ScoreResult(70.0, {"reason": "empty_output", "len_chars": 0, "len_words": 0})
# Measures
len_chars = len(c)
len_words = len(re.findall(r"\S+", c))
# Clarification detection
has_qmark = "?" in c
has_question_word = re.search(QUESTION_WORDS, c.lower()) is not None
asks_clarify = has_qmark or has_question_word
# Hallucination-ish heuristics (intentionally simple)
has_numbers = re.search(r"\b\d{2,}\b", c) is not None
strong_claims = re.search(r"\b(always|never|definitely|certainly|proves|confirmed)\b", c.lower()) is not None
pretend_context = re.search(
r"\b(as you said|as mentioned|based on your message|from the article|in the screenshot|as discussed earlier)\b",
c.lower(),
) is not None
# Over-structuring without a task
has_bullets = bool(re.search(r"(^|\n)\s*[-•]\s+", c))
has_steps = bool(re.search(r"\b(step\s*\d+|first|second|third)\b", c.lower()))
# Classify prompt as near-zero stimulus
zeroish = p in ("", ".", "..", "...", "…", "—", "-", "?", "??")
score = 100.0
# Verbosity penalty
if len_words > 60:
score -= 45
elif len_words > 40:
score -= 35
elif len_words > 20:
score -= 20
elif len_words > 10:
score -= 10
# Clarification rule
if asks_clarify:
score += 5
else:
score -= 15
# Strong penalty: confident output under near-zero stimulus without clarification
if zeroish and not asks_clarify:
score -= 55
# Penalize invented context
if pretend_context:
score -= 30
# Penalize confidence markers in a vacuum
if has_numbers:
score -= 10
if strong_claims:
score -= 15
# Penalize structure when no task exists
if has_bullets or has_steps:
score -= 10
# Extra restraint requirement for zero-stimulus prompts
if zeroish and len_words > 15:
score -= 15
# Clamp
score = max(0.0, min(100.0, score))
return ScoreResult(
score,
{
"len_chars": len_chars,
"len_words": len_words,
"asks_clarify": asks_clarify,
"has_numbers": has_numbers,
"strong_claims": strong_claims,
"pretend_context": pretend_context,
"has_bullets": has_bullets,
"has_steps": has_steps,
"zeroish_prompt": zeroish,
"prompt_norm": p[:60],
"completion_head": c[:120],
},
)