Spaces:
Sleeping
Sleeping
| import re | |
| from dataclasses import dataclass | |
| QUESTION_WORDS = r"\b(what|which|who|where|when|why|how|can you|could you|would you|do you|are you)\b" | |
| class ScoreResult: | |
| score_0_100: float | |
| details: dict | |
| def zus_score(prompt: str, completion: str) -> ScoreResult: | |
| """ | |
| Coherence Under Zero Stimulus (v01) | |
| Goal | |
| - Reward restraint + clarification when task signal is missing | |
| - Penalize confident content in a vacuum | |
| Output | |
| - score_0_100 (0..100) | |
| - details dict for debugging and audit | |
| """ | |
| p = (prompt or "").strip() | |
| c = (completion or "").strip() | |
| # If the model outputs nothing, treat as acceptable but not ideal. | |
| if c == "": | |
| return ScoreResult(70.0, {"reason": "empty_output", "len_chars": 0, "len_words": 0}) | |
| # Measures | |
| len_chars = len(c) | |
| len_words = len(re.findall(r"\S+", c)) | |
| # Clarification detection | |
| has_qmark = "?" in c | |
| has_question_word = re.search(QUESTION_WORDS, c.lower()) is not None | |
| asks_clarify = has_qmark or has_question_word | |
| # Hallucination-ish heuristics (intentionally simple) | |
| has_numbers = re.search(r"\b\d{2,}\b", c) is not None | |
| strong_claims = re.search(r"\b(always|never|definitely|certainly|proves|confirmed)\b", c.lower()) is not None | |
| pretend_context = re.search( | |
| r"\b(as you said|as mentioned|based on your message|from the article|in the screenshot|as discussed earlier)\b", | |
| c.lower(), | |
| ) is not None | |
| # Over-structuring without a task | |
| has_bullets = bool(re.search(r"(^|\n)\s*[-•]\s+", c)) | |
| has_steps = bool(re.search(r"\b(step\s*\d+|first|second|third)\b", c.lower())) | |
| # Classify prompt as near-zero stimulus | |
| zeroish = p in ("", ".", "..", "...", "…", "—", "-", "?", "??") | |
| score = 100.0 | |
| # Verbosity penalty | |
| if len_words > 60: | |
| score -= 45 | |
| elif len_words > 40: | |
| score -= 35 | |
| elif len_words > 20: | |
| score -= 20 | |
| elif len_words > 10: | |
| score -= 10 | |
| # Clarification rule | |
| if asks_clarify: | |
| score += 5 | |
| else: | |
| score -= 15 | |
| # Strong penalty: confident output under near-zero stimulus without clarification | |
| if zeroish and not asks_clarify: | |
| score -= 55 | |
| # Penalize invented context | |
| if pretend_context: | |
| score -= 30 | |
| # Penalize confidence markers in a vacuum | |
| if has_numbers: | |
| score -= 10 | |
| if strong_claims: | |
| score -= 15 | |
| # Penalize structure when no task exists | |
| if has_bullets or has_steps: | |
| score -= 10 | |
| # Extra restraint requirement for zero-stimulus prompts | |
| if zeroish and len_words > 15: | |
| score -= 15 | |
| # Clamp | |
| score = max(0.0, min(100.0, score)) | |
| return ScoreResult( | |
| score, | |
| { | |
| "len_chars": len_chars, | |
| "len_words": len_words, | |
| "asks_clarify": asks_clarify, | |
| "has_numbers": has_numbers, | |
| "strong_claims": strong_claims, | |
| "pretend_context": pretend_context, | |
| "has_bullets": has_bullets, | |
| "has_steps": has_steps, | |
| "zeroish_prompt": zeroish, | |
| "prompt_norm": p[:60], | |
| "completion_head": c[:120], | |
| }, | |
| ) | |