Spaces:
Running on Zero
Running on Zero
| """Prompt construction and structured-output parsing for TemperCheck. | |
| The model is asked to return a single JSON object. Small VLMs drift from | |
| requested formats, so `parse_verdict` is deliberately defensive: it extracts the | |
| first JSON object it can find and clamps/falls back on every field. | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import re | |
| from dataclasses import dataclass, field | |
| from typing import Any | |
| # The whole app hangs off this contract. If you change the shape, update | |
| # parse_verdict, the Gradio output rendering, and tests/test_parsing.py together. | |
| SYSTEM_PROMPT = """\ | |
| You are TemperCheck, a playful but sharp-eyed party-game judge. Given a photo or | |
| screenshot of a social-media profile, you estimate β for entertainment only β how | |
| short-tempered / "cranky to deal with" the person seems. This is a whimsical | |
| novelty read, NOT a real personality assessment or a factual claim about anyone. | |
| Judge mainly by what the person CHOSE TO PUT in their profile (bio text, display | |
| name, handle, captions, stated attitude); treat facial expression and overall | |
| vibe as a lighter, secondary signal. Read the profile text carefully and look for: | |
| 1. HOSTILITY & SARCASM β a biting, contemptuous, passive-aggressive, or sarcastic | |
| tone; insults; mocking others; "I don't care what you think" energy. | |
| 2. PRIDE IN CONFLICT β does the profile brag about upsetting people, starting | |
| fights, blocking/roasting/"destroying" others, or being "brutally honest"? | |
| Treating confrontation as a personality is a strong temper signal. | |
| 3. EXPLICIT WARNING SIGNS stated right in the profile β slurs, hateful or | |
| demeaning language, open anger, ALL-CAPS ranting, relentless negativity or | |
| doom, or harsh criticism aimed at other people. | |
| The more of these appear, and the more prominent they are, the higher the score. | |
| A warm, friendly, or neutral profile with none of them scores low. If there is | |
| little readable text, lean on overall vibe, say so, and keep the score mid-range. | |
| Score guide (0 = delightful, 100 = run): | |
| 0-20 warm/friendly, no red flags | |
| 21-40 mostly pleasant, minor edge | |
| 41-60 mixed signals, or too little to tell | |
| 61-80 clearly prickly: sarcasm, negativity, or pride-in-conflict present | |
| 81-100 multiple strong warning signs (e.g. hostility + slurs/anger + conflict-pride) | |
| Two extra content flags, independent of temper β ALWAYS add the tag when present: | |
| - NSFW β inspect the ACTUAL IMAGE as carefully as the text, and flag adult | |
| content from EITHER source. Add the signal "NSFW" if the picture shows nudity, | |
| sexual or fetish content, underwear/lingerie as the focus, or other explicit or | |
| adult visual material, OR if the text is sexually suggestive ("18+", "spicy | |
| content", adult-site links). This visual check OVERRIDES the "text-first" | |
| guidance above: flag NSFW based on the image even when the bio text is perfectly | |
| innocuous. In your rationale, say whether the NSFW signal came from the image, | |
| the text, or both. | |
| - If the profile mentions crypto (Bitcoin, Ethereum, NFTs, tokens, "$TICKER", | |
| "to the moon", web3, "diamond hands", trading shills), add the signal "crypto". | |
| Put any NSFW/crypto tags FIRST in the signals list so they are never dropped. | |
| Respond with ONLY a single JSON object, no prose before or after, of the form: | |
| { | |
| "score": <integer 0-100, higher = shorter-tempered / crankier>, | |
| "verdict": "<3-6 word punchy label>", | |
| "rationale": "<1-2 sentences naming the specific signals you actually saw>", | |
| "signals": ["<short flag>", "<short flag>", "<short flag>"] | |
| } | |
| Each signal must be a concrete thing you observed, e.g. "sarcastic bio", "brags | |
| about blocking people", "ALL-CAPS anger", "slur in bio", or "warm smile, kind bio". | |
| Stay good-natured: you are reading the profile's stated content and vibe, never | |
| mocking how someone looks.""" | |
| USER_INSTRUCTION = ( | |
| "Read this profile β its bio/handle text first, then the overall vibe β and " | |
| "return the temper JSON. Be playful, not mean." | |
| ) | |
| # 0-100 score bucket -> emoji label used by the UI. | |
| SCORE_BANDS = [ | |
| (20, "π Sunshine"), | |
| (40, "π Easygoing"), | |
| (60, "π Depends on the day"), | |
| (80, "π€ Bit prickly"), | |
| (101, "π Approach with coffee"), | |
| ] | |
| class TemperVerdict: | |
| score: int | |
| verdict: str | |
| rationale: str | |
| signals: list[str] = field(default_factory=list) | |
| raw: str = "" # original model text, for debugging / agent traces | |
| def band(self) -> str: | |
| for ceiling, label in SCORE_BANDS: | |
| if self.score < ceiling: | |
| return label | |
| return SCORE_BANDS[-1][1] | |
| def build_messages(image_ref: Any) -> list[dict]: | |
| """Chat messages in the Gemma 4 multimodal format (image before text).""" | |
| return [ | |
| {"role": "system", "content": [{"type": "text", "text": SYSTEM_PROMPT}]}, | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "image", "image": image_ref}, | |
| {"type": "text", "text": USER_INSTRUCTION}, | |
| ], | |
| }, | |
| ] | |
| def _first_json_object(text: str) -> dict | None: | |
| """Find the first balanced {...} block and parse it, tolerating junk around it.""" | |
| start = text.find("{") | |
| while start != -1: | |
| depth = 0 | |
| for i in range(start, len(text)): | |
| if text[i] == "{": | |
| depth += 1 | |
| elif text[i] == "}": | |
| depth -= 1 | |
| if depth == 0: | |
| try: | |
| return json.loads(text[start : i + 1]) | |
| except json.JSONDecodeError: | |
| break | |
| start = text.find("{", start + 1) | |
| return None | |
| def parse_verdict(text: str) -> TemperVerdict: | |
| """Parse model output into a TemperVerdict, never raising on bad output.""" | |
| data = _first_json_object(text) or {} | |
| # score: accept int/float/str, clamp to 0-100, default mid on failure. | |
| raw_score = data.get("score", 50) | |
| try: | |
| score = int(round(float(raw_score))) | |
| except (TypeError, ValueError): | |
| m = re.search(r"-?\d+", str(raw_score)) | |
| score = int(m.group()) if m else 50 | |
| score = max(0, min(100, score)) | |
| signals = data.get("signals", []) | |
| if not isinstance(signals, list): | |
| signals = [str(signals)] | |
| signals = [str(s).strip() for s in signals if str(s).strip()][:6] | |
| return TemperVerdict( | |
| score=score, | |
| verdict=str(data.get("verdict", "Inscrutable")).strip() or "Inscrutable", | |
| rationale=str(data.get("rationale", "The model kept its cards close.")) | |
| .strip() | |
| or "The model kept its cards close.", | |
| signals=signals, | |
| raw=text, | |
| ) | |