gridsense-gemma4 / brain /validator.py
Nestroy2003's picture
Sync from GitHub via hub-sync
91bf1ce verified
import re
from typing import Dict
HIGH_SIGNAL_KEYWORDS = {
"flicker", "flickering", "hum", "humming", "transformer", "crew", "van",
"smell", "burning", "sparks", "spark", "surge", "dim", "dimming",
"brownout", "schedule", "maintenance", "load shed", "load shedding",
"stage 4", "stage 6", "stage 3", "dark", "gone", "off", "no power",
"no light", "generator", "kicked in", "inverter", "substation",
"pole", "line", "wire", "cable", "breaker", "blown", "trip", "tripped"
}
MEDIUM_KEYWORDS = {
"rumor", "neighbor", "said", "heard", "sometimes", "usually",
"every evening", "often", "weekly", "always goes"
}
HOUSEHOLD_KEYWORDS = {
"my socket", "my breaker", "my fuse", "my bulb", "my appliance",
"my room", "my house only", "only my", "just my", "my wiring",
"my cord", "only in my", "my fridge stopped", "my phone charger",
"my outlet", "my switch"
}
PAST_TENSE = {
"yesterday", "last week", "last month", "earlier today",
"this morning", "last time", "the other day"
}
PRESENT_TENSE = {"right now", "just", "currently", "happening", "just now"}
TEST_INPUTS = {"test", "testing", "hello", "hi", "asdf", "qwerty", "lol", "idk", ""}
def validate_input(text: str) -> Dict:
if not text or len(text.strip()) < 4:
return {
"valid": False,
"signal_type": "none",
"confidence_tone": "unknown",
"keyword_matches": 0,
"rejection_reason": "too_short",
"guidance": "Tell GridSense a little more about what you are noticing."
}
text_lower = text.lower().strip()
if text_lower in TEST_INPUTS:
return {
"valid": False,
"signal_type": "none",
"confidence_tone": "unknown",
"keyword_matches": 0,
"rejection_reason": "test_input",
"guidance": "Looks like a test. When you are ready, describe what you are actually noticing."
}
if re.search(r'(.)\1{5,}', text_lower) or len(re.sub(r'[^a-z]', '', text_lower)) < 3:
return {
"valid": False,
"signal_type": "none",
"confidence_tone": "unknown",
"keyword_matches": 0,
"rejection_reason": "incoherent",
"guidance": "That did not look like a real report. Try describing what you see or hear."
}
has_present = any(p in text_lower for p in PRESENT_TENSE)
has_past = any(p in text_lower for p in PAST_TENSE)
if has_past and not has_present:
return {
"valid": False,
"signal_type": "none",
"confidence_tone": "unknown",
"keyword_matches": 0,
"rejection_reason": "not_current",
"guidance": "GridSense needs real-time signals. Describe what is happening right now."
}
household_match = any(kw in text_lower for kw in HOUSEHOLD_KEYWORDS)
if household_match:
return {
"valid": True,
"signal_type": "household",
"confidence_tone": "confident",
"keyword_matches": 0,
"rejection_reason": None,
"guidance": "That sounds like it might only be affecting your home. GridSense will watch the neighborhood, but check your own breaker and wiring first."
}
high_matches = sum(1 for kw in HIGH_SIGNAL_KEYWORDS if kw in text_lower)
medium_matches = sum(1 for kw in MEDIUM_KEYWORDS if kw in text_lower)
total_matches = high_matches + (medium_matches * 0.5)
hedging = {"maybe", "i think", "possibly", "not sure", "might", "could be", "seems"}
certain = {"definitely", "100%", "for sure", "absolutely", "clearly", "right now"}
tone = "unknown"
if any(h in text_lower for h in hedging):
tone = "uncertain"
elif any(c in text_lower for c in certain):
tone = "confident"
signal_type = "infrastructure" if high_matches > 0 else ("ambiguous" if medium_matches > 0 else "none")
guidance = None
if total_matches == 0:
guidance = "No clear grid signals detected. Try mentioning what you see, hear, or what neighbors are saying."
return {
"valid": True,
"signal_type": signal_type,
"confidence_tone": tone,
"keyword_matches": int(total_matches),
"rejection_reason": None,
"guidance": guidance
}