Spaces:

r-bansal
/

pulseai-api

Sleeping

App Files Files Community

pulseai-api / explainer.py

r-bansal

feat: add React frontend, rolling forecast, AI chat, CUSUM tuning, CSV download

ba65d00 about 1 month ago

raw

history blame contribute delete

13.6 kB

	from __future__ import annotations

	import os
	import re

	import numpy as np

	# Groq is optional. The app works fully without it.
	# Set GROQ_API_KEY in .env to enable richer explanations.
	_GROQ_KEY = os.getenv("GROQ_API_KEY", "").strip()
	_USE_GROQ = bool(_GROQ_KEY)
	_GROQ_MODEL = "llama-3.3-70b-versatile"

	_FORBIDDEN = [
	"model", "algorithm", "cusum", "percentile", "recalibration",
	"aci", "chronos", "neural", "transformer", "inference",
	"statistical", "parameter", "coefficient", "conformal",
	]

	_SYSTEM_PROMPT = (
	"You explain forecasting results to non-technical users in 2-3 plain sentences. "
	"Your user may be a farmer, baker, or shop owner with no data background. "
	"Never use technical words. Speak like a helpful friend, not a data scientist. "
	"Always mention: what the trend is, how confident to be, and one thing to do. "
	"Forbidden words — never use: "
	+ ", ".join(_FORBIDDEN)
	)


	# ─── Explanation ──────────────────────────────────────────────────────────────

	def explain(
	trend_pct: float,
	confidence: int,
	alert: str, # "HIGH" \| "LOW" \| "NONE"
	horizon_weeks: int,
	series_name: str = "your data",
	) -> tuple[str, str]:
	"""
	Returns (explanation_text, source) where source is "groq" or "template".
	Template is always the fallback — it never fails.
	"""
	context = dict(
	trend_pct=trend_pct,
	confidence=confidence,
	alert=alert,
	horizon_weeks=horizon_weeks,
	series_name=series_name,
	)

	if _USE_GROQ:
	text = _groq_explain(context)
	if text:
	return text, "groq"

	return _template_explain(context), "template"


	def _groq_explain(context: dict) -> str \| None:
	try:
	from groq import Groq
	client = Groq(api_key=_GROQ_KEY)
	direction = "up" if context["trend_pct"] > 0 else "down"
	user_msg = (
	f"Series: {context['series_name']}. "
	f"Trend over next {context['horizon_weeks']} weeks: "
	f"{direction} {abs(context['trend_pct']):.1f}%. "
	f"Confidence score: {context['confidence']}/100. "
	f"Anomaly alert: {context['alert']}. "
	f"Write the 2-3 sentence explanation now."
	)
	resp = client.chat.completions.create(
	model=_GROQ_MODEL,
	messages=[
	{"role": "system", "content": _SYSTEM_PROMPT},
	{"role": "user", "content": user_msg},
	],
	max_tokens=120,
	temperature=0.4,
	)
	text = resp.choices[0].message.content.strip()

	# Reject if a forbidden word slipped through
	if any(w in text.lower() for w in _FORBIDDEN):
	return None

	return text
	except Exception:
	return None


	def _template_explain(ctx: dict) -> str:
	pct = ctx["trend_pct"]
	conf = ctx["confidence"]
	alert = ctx["alert"]
	weeks = ctx["horizon_weeks"]
	name = ctx["series_name"].capitalize()

	direction = "rise" if pct > 0 else "fall"
	dir_word = "upward" if pct > 0 else "downward"
	conf_word = "high" if conf > 60 else "moderate" if conf > 40 else "low"
	pct_str = f"{abs(pct):.1f}%"

	if alert == "HIGH":
	return (
	f"{name} jumped higher than expected recently. "
	f"The forecast now shows a {dir_word} trend of about {pct_str} "
	f"over the next {weeks} weeks. "
	f"Wait one more week to confirm before making large decisions."
	)
	if alert == "LOW":
	return (
	f"{name} dropped lower than expected recently. "
	f"The forecast now shows a {dir_word} trend of about {pct_str} "
	f"over the next {weeks} weeks. "
	f"Wait one more week to confirm before making large decisions."
	)
	if conf < 40:
	return (
	f"{name} shows a {dir_word} trend over the next {weeks} weeks, "
	f"but uncertainty is {conf_word} right now. "
	f"Make smaller, reversible decisions until the picture clears."
	)
	return (
	f"{name} is expected to {direction} by about {pct_str} "
	f"over the next {weeks} weeks, with {conf_word} confidence. "
	f"The range shown gives you a safe window to plan within."
	)


	# ─── NL Input Parser ──────────────────────────────────────────────────────────

	def parse_nl_input(raw_text: str, last_actual: float \| None = None) -> dict:
	"""
	Converts what the user typed into a structured value dict.

	Returns:
	{
	"value": float \| None,
	"is_approximate": bool,
	"is_relative": bool,
	"zero": bool,
	"error": str \| None, # set if we cannot parse
	}

	Primary path: regex — fast, free, handles 95% of real inputs.
	Enhanced path: Groq — handles Hindi-English mix and edge cases.
	Groq result is validated before use; regex is the fallback.
	"""
	text = raw_text.strip()

	if _USE_GROQ:
	result = _groq_parse(text, last_actual)
	if result and result.get("value") is not None:
	return result

	return _regex_parse(text, last_actual)


	def _regex_parse(text: str, last_actual: float \| None) -> dict:
	t = text.lower().strip()

	# ── Zero / closed ──────────────────────────────────────────────────────
	# Check for exact "0" as a standalone word, not substring (avoids matching "2300")
	zero_words = ["nothing", "bandh", "closed", "shut", "nil", "nill", "shunya"]
	if any(w in t for w in zero_words) or re.search(r"\b0\b", t):
	return _result(0.0, zero=True)

	# ── Rejection phrases ──────────────────────────────────────────────────
	reject_words = ["don't know", "dont know", "pata nahi", "pata nahin", "not sure", "no idea"]
	if any(w in t for w in reject_words):
	return _error("Please enter the value when you know it.")

	is_approximate = any(w in t for w in ["around", "about", "roughly", "approximately", "lagbhag", "almost"])

	# ── Relative: same as last week ────────────────────────────────────────
	if any(w in t for w in ["same", "equal", "usi", "wahi"]):
	if last_actual is not None:
	return _result(last_actual, relative=True, approximate=is_approximate)
	return _error("We don't have a previous value to compare. Please enter the number directly.")

	# ── Relative: double ───────────────────────────────────────────────────
	if "double" in t or "do guna" in t or "dugna" in t:
	if last_actual is not None:
	return _result(last_actual * 2, relative=True)
	return _error("Please enter the actual number.")

	# ── Relative: half ────────────────────────────────────────────────────
	if "half" in t or "aadha" in t:
	if last_actual is not None:
	return _result(last_actual * 0.5, relative=True)
	return _error("Please enter the actual number.")

	# ── Relative: percentage change ────────────────────────────────────────
	pct_match = re.search(r"([\d.]+)\s*%", t)
	if pct_match:
	pct = float(pct_match.group(1))
	down = any(w in t for w in ["down", "kam", "less", "decrease", "drop", "fell", "girak", "gira"])
	if last_actual is not None:
	factor = (1 - pct / 100) if down else (1 + pct / 100)
	return _result(last_actual * factor, relative=True, approximate=is_approximate)
	# Has relative words (jyada, kam, more, less) — needs a previous value
	relative_words = ["jyada", "zyada", "more", "kam", "less", "up", "down", "increase", "decrease"]
	if any(w in t for w in relative_words):
	return _error("We need a previous value to calculate the percentage. Please enter the number directly.")
	# No relative words — treat as absolute value
	if pct < 10000:
	return _result(pct, approximate=is_approximate)

	# ── Relative: went up/down by absolute amount ──────────────────────────
	up_match = re.search(
	r"(?:went up\|increased?\|badhke?\|upar\|zyada\|jyada)\s+(?:by\s+)?([\d,. ]+(?:lakh\|crore)?)", t
	)
	down_match = re.search(
	r"(?:went down\|decreased?\|dropped?\|girak?\|kam\|niche)\s+(?:by\s+)?([\d,. ]+(?:lakh\|crore)?)", t
	)
	if up_match and last_actual is not None:
	delta = _parse_number_str(up_match.group(1))
	if delta is not None:
	return _result(last_actual + delta, relative=True)
	if down_match and last_actual is not None:
	delta = _parse_number_str(down_match.group(1))
	if delta is not None:
	return _result(last_actual - delta, relative=True)

	# ── Absolute value ─────────────────────────────────────────────────────
	value = _parse_number_str(t)
	if value is not None:
	return _result(value, approximate=is_approximate)

	return _error(
	"We couldn't understand that value. "
	"Try typing just the number, like: 2300"
	)


	def _parse_number_str(text: str) -> float \| None:
	"""
	Extracts a number from a string, handling:
	- ₹ $ £ symbols
	- Indian lakh / crore shorthand
	- Indian comma notation (1,23,456)
	- Prefix approximate words (around, about, roughly...)
	- Trailing unit words (kg, rupee, units...)
	- Plain floats and integers
	"""
	t = text.lower().strip()
	t = re.sub(r"[₹$£\s]", "", t)

	crore = re.search(r"([\d.]+)\s*crore", t)
	lakh = re.search(r"([\d.]+)\s*lakh", t)
	if crore:
	return float(crore.group(1)) * 1e7
	if lakh:
	return float(lakh.group(1)) * 1e5

	# Strip commas (handles both Indian and Western notation)
	t = re.sub(r",", "", t)
	# Strip prefix approximate/directional words
	t = re.sub(r"^(around\|about\|roughly\|approximately\|lagbhag\|almost\|upto\|up to)\s*", "", t)
	# Strip trailing unit words and directional tokens
	t = re.sub(r"\s(rupee\|rupees\|rs\|inr\|kg\|units?\|pieces?\|up\|down\|more\|less\|zyada\|jyada\|kam)s?\s$", "", t)
	t = t.strip()

	# Extract first clean number from remaining string
	num_match = re.search(r"[\d.]+", t)
	if num_match:
	try:
	return float(num_match.group())
	except ValueError:
	return None
	return None


	def _groq_parse(text: str, last_actual: float \| None) -> dict \| None:
	try:
	from groq import Groq
	client = Groq(api_key=_GROQ_KEY)
	context = f"Last known value: {last_actual}" if last_actual is not None else "No previous value."
	prompt = (
	f"{context}\n"
	f"User typed: \"{text}\"\n\n"
	"Extract the numeric value. Respond with ONLY a JSON object, no markdown:\n"
	'{"value": <number or null>, "is_approximate": <bool>, '
	'"is_relative": <bool>, "zero": <bool>, "error": <string or null>}'
	)
	resp = client.chat.completions.create(
	model=_GROQ_MODEL,
	messages=[{"role": "user", "content": prompt}],
	max_tokens=80,
	temperature=0.0,
	)
	raw = resp.choices[0].message.content.strip()
	raw = re.sub(r"```[a-z]*", "", raw).strip("`").strip()

	import json
	parsed = json.loads(raw)
	value = parsed.get("value")

	if value is not None:
	value = float(value)

	return {
	"value": value,
	"is_approximate": bool(parsed.get("is_approximate", False)),
	"is_relative": bool(parsed.get("is_relative", False)),
	"zero": bool(parsed.get("zero", False)),
	"error": parsed.get("error"),
	}
	except Exception:
	return None


	# ─── Helpers ──────────────────────────────────────────────────────────────────

	def _result(
	value: float,
	zero: bool = False,
	relative: bool = False,
	approximate: bool = False,
	) -> dict:
	return {
	"value": round(float(value), 4),
	"is_approximate": approximate,
	"is_relative": relative,
	"zero": zero,
	"error": None,
	}


	def _error(message: str) -> dict:
	return {
	"value": None,
	"is_approximate": False,
	"is_relative": False,
	"zero": False,
	"error": message,
	}