Spaces:
Sleeping
Sleeping
File size: 13,575 Bytes
13a5236 ba65d00 13a5236 ba65d00 13a5236 ba65d00 13a5236 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 | from __future__ import annotations
import os
import re
import numpy as np
# Groq is optional. The app works fully without it.
# Set GROQ_API_KEY in .env to enable richer explanations.
_GROQ_KEY = os.getenv("GROQ_API_KEY", "").strip()
_USE_GROQ = bool(_GROQ_KEY)
_GROQ_MODEL = "llama-3.3-70b-versatile"
_FORBIDDEN = [
"model", "algorithm", "cusum", "percentile", "recalibration",
"aci", "chronos", "neural", "transformer", "inference",
"statistical", "parameter", "coefficient", "conformal",
]
_SYSTEM_PROMPT = (
"You explain forecasting results to non-technical users in 2-3 plain sentences. "
"Your user may be a farmer, baker, or shop owner with no data background. "
"Never use technical words. Speak like a helpful friend, not a data scientist. "
"Always mention: what the trend is, how confident to be, and one thing to do. "
"Forbidden words β never use: "
+ ", ".join(_FORBIDDEN)
)
# βββ Explanation ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def explain(
trend_pct: float,
confidence: int,
alert: str, # "HIGH" | "LOW" | "NONE"
horizon_weeks: int,
series_name: str = "your data",
) -> tuple[str, str]:
"""
Returns (explanation_text, source) where source is "groq" or "template".
Template is always the fallback β it never fails.
"""
context = dict(
trend_pct=trend_pct,
confidence=confidence,
alert=alert,
horizon_weeks=horizon_weeks,
series_name=series_name,
)
if _USE_GROQ:
text = _groq_explain(context)
if text:
return text, "groq"
return _template_explain(context), "template"
def _groq_explain(context: dict) -> str | None:
try:
from groq import Groq
client = Groq(api_key=_GROQ_KEY)
direction = "up" if context["trend_pct"] > 0 else "down"
user_msg = (
f"Series: {context['series_name']}. "
f"Trend over next {context['horizon_weeks']} weeks: "
f"{direction} {abs(context['trend_pct']):.1f}%. "
f"Confidence score: {context['confidence']}/100. "
f"Anomaly alert: {context['alert']}. "
f"Write the 2-3 sentence explanation now."
)
resp = client.chat.completions.create(
model=_GROQ_MODEL,
messages=[
{"role": "system", "content": _SYSTEM_PROMPT},
{"role": "user", "content": user_msg},
],
max_tokens=120,
temperature=0.4,
)
text = resp.choices[0].message.content.strip()
# Reject if a forbidden word slipped through
if any(w in text.lower() for w in _FORBIDDEN):
return None
return text
except Exception:
return None
def _template_explain(ctx: dict) -> str:
pct = ctx["trend_pct"]
conf = ctx["confidence"]
alert = ctx["alert"]
weeks = ctx["horizon_weeks"]
name = ctx["series_name"].capitalize()
direction = "rise" if pct > 0 else "fall"
dir_word = "upward" if pct > 0 else "downward"
conf_word = "high" if conf > 60 else "moderate" if conf > 40 else "low"
pct_str = f"{abs(pct):.1f}%"
if alert == "HIGH":
return (
f"{name} jumped higher than expected recently. "
f"The forecast now shows a {dir_word} trend of about {pct_str} "
f"over the next {weeks} weeks. "
f"Wait one more week to confirm before making large decisions."
)
if alert == "LOW":
return (
f"{name} dropped lower than expected recently. "
f"The forecast now shows a {dir_word} trend of about {pct_str} "
f"over the next {weeks} weeks. "
f"Wait one more week to confirm before making large decisions."
)
if conf < 40:
return (
f"{name} shows a {dir_word} trend over the next {weeks} weeks, "
f"but uncertainty is {conf_word} right now. "
f"Make smaller, reversible decisions until the picture clears."
)
return (
f"{name} is expected to {direction} by about {pct_str} "
f"over the next {weeks} weeks, with {conf_word} confidence. "
f"The range shown gives you a safe window to plan within."
)
# βββ NL Input Parser ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def parse_nl_input(raw_text: str, last_actual: float | None = None) -> dict:
"""
Converts what the user typed into a structured value dict.
Returns:
{
"value": float | None,
"is_approximate": bool,
"is_relative": bool,
"zero": bool,
"error": str | None, # set if we cannot parse
}
Primary path: regex β fast, free, handles 95% of real inputs.
Enhanced path: Groq β handles Hindi-English mix and edge cases.
Groq result is validated before use; regex is the fallback.
"""
text = raw_text.strip()
if _USE_GROQ:
result = _groq_parse(text, last_actual)
if result and result.get("value") is not None:
return result
return _regex_parse(text, last_actual)
def _regex_parse(text: str, last_actual: float | None) -> dict:
t = text.lower().strip()
# ββ Zero / closed ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Check for exact "0" as a standalone word, not substring (avoids matching "2300")
zero_words = ["nothing", "bandh", "closed", "shut", "nil", "nill", "shunya"]
if any(w in t for w in zero_words) or re.search(r"\b0\b", t):
return _result(0.0, zero=True)
# ββ Rejection phrases ββββββββββββββββββββββββββββββββββββββββββββββββββ
reject_words = ["don't know", "dont know", "pata nahi", "pata nahin", "not sure", "no idea"]
if any(w in t for w in reject_words):
return _error("Please enter the value when you know it.")
is_approximate = any(w in t for w in ["around", "about", "roughly", "approximately", "lagbhag", "almost"])
# ββ Relative: same as last week ββββββββββββββββββββββββββββββββββββββββ
if any(w in t for w in ["same", "equal", "usi", "wahi"]):
if last_actual is not None:
return _result(last_actual, relative=True, approximate=is_approximate)
return _error("We don't have a previous value to compare. Please enter the number directly.")
# ββ Relative: double βββββββββββββββββββββββββββββββββββββββββββββββββββ
if "double" in t or "do guna" in t or "dugna" in t:
if last_actual is not None:
return _result(last_actual * 2, relative=True)
return _error("Please enter the actual number.")
# ββ Relative: half ββββββββββββββββββββββββββββββββββββββββββββββββββββ
if "half" in t or "aadha" in t:
if last_actual is not None:
return _result(last_actual * 0.5, relative=True)
return _error("Please enter the actual number.")
# ββ Relative: percentage change ββββββββββββββββββββββββββββββββββββββββ
pct_match = re.search(r"([\d.]+)\s*%", t)
if pct_match:
pct = float(pct_match.group(1))
down = any(w in t for w in ["down", "kam", "less", "decrease", "drop", "fell", "girak", "gira"])
if last_actual is not None:
factor = (1 - pct / 100) if down else (1 + pct / 100)
return _result(last_actual * factor, relative=True, approximate=is_approximate)
# Has relative words (jyada, kam, more, less) β needs a previous value
relative_words = ["jyada", "zyada", "more", "kam", "less", "up", "down", "increase", "decrease"]
if any(w in t for w in relative_words):
return _error("We need a previous value to calculate the percentage. Please enter the number directly.")
# No relative words β treat as absolute value
if pct < 10000:
return _result(pct, approximate=is_approximate)
# ββ Relative: went up/down by absolute amount ββββββββββββββββββββββββββ
up_match = re.search(
r"(?:went up|increased?|badhke?|upar|zyada|jyada)\s+(?:by\s+)?([\d,. ]+(?:lakh|crore)?)", t
)
down_match = re.search(
r"(?:went down|decreased?|dropped?|girak?|kam|niche)\s+(?:by\s+)?([\d,. ]+(?:lakh|crore)?)", t
)
if up_match and last_actual is not None:
delta = _parse_number_str(up_match.group(1))
if delta is not None:
return _result(last_actual + delta, relative=True)
if down_match and last_actual is not None:
delta = _parse_number_str(down_match.group(1))
if delta is not None:
return _result(last_actual - delta, relative=True)
# ββ Absolute value βββββββββββββββββββββββββββββββββββββββββββββββββββββ
value = _parse_number_str(t)
if value is not None:
return _result(value, approximate=is_approximate)
return _error(
"We couldn't understand that value. "
"Try typing just the number, like: 2300"
)
def _parse_number_str(text: str) -> float | None:
"""
Extracts a number from a string, handling:
- βΉ $ Β£ symbols
- Indian lakh / crore shorthand
- Indian comma notation (1,23,456)
- Prefix approximate words (around, about, roughly...)
- Trailing unit words (kg, rupee, units...)
- Plain floats and integers
"""
t = text.lower().strip()
t = re.sub(r"[βΉ$Β£\s]", "", t)
crore = re.search(r"([\d.]+)\s*crore", t)
lakh = re.search(r"([\d.]+)\s*lakh", t)
if crore:
return float(crore.group(1)) * 1e7
if lakh:
return float(lakh.group(1)) * 1e5
# Strip commas (handles both Indian and Western notation)
t = re.sub(r",", "", t)
# Strip prefix approximate/directional words
t = re.sub(r"^(around|about|roughly|approximately|lagbhag|almost|upto|up to)\s*", "", t)
# Strip trailing unit words and directional tokens
t = re.sub(r"\s*(rupee|rupees|rs|inr|kg|units?|pieces?|up|down|more|less|zyada|jyada|kam)s?\s*$", "", t)
t = t.strip()
# Extract first clean number from remaining string
num_match = re.search(r"[\d.]+", t)
if num_match:
try:
return float(num_match.group())
except ValueError:
return None
return None
def _groq_parse(text: str, last_actual: float | None) -> dict | None:
try:
from groq import Groq
client = Groq(api_key=_GROQ_KEY)
context = f"Last known value: {last_actual}" if last_actual is not None else "No previous value."
prompt = (
f"{context}\n"
f"User typed: \"{text}\"\n\n"
"Extract the numeric value. Respond with ONLY a JSON object, no markdown:\n"
'{"value": <number or null>, "is_approximate": <bool>, '
'"is_relative": <bool>, "zero": <bool>, "error": <string or null>}'
)
resp = client.chat.completions.create(
model=_GROQ_MODEL,
messages=[{"role": "user", "content": prompt}],
max_tokens=80,
temperature=0.0,
)
raw = resp.choices[0].message.content.strip()
raw = re.sub(r"```[a-z]*", "", raw).strip("`").strip()
import json
parsed = json.loads(raw)
value = parsed.get("value")
if value is not None:
value = float(value)
return {
"value": value,
"is_approximate": bool(parsed.get("is_approximate", False)),
"is_relative": bool(parsed.get("is_relative", False)),
"zero": bool(parsed.get("zero", False)),
"error": parsed.get("error"),
}
except Exception:
return None
# βββ Helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def _result(
value: float,
zero: bool = False,
relative: bool = False,
approximate: bool = False,
) -> dict:
return {
"value": round(float(value), 4),
"is_approximate": approximate,
"is_relative": relative,
"zero": zero,
"error": None,
}
def _error(message: str) -> dict:
return {
"value": None,
"is_approximate": False,
"is_relative": False,
"zero": False,
"error": message,
} |