Spaces:

r-bansal
/

pulseai-api

Sleeping

File size: 13,575 Bytes

from __future__ import annotations

import os
import re

import numpy as np

# Groq is optional. The app works fully without it.
# Set GROQ_API_KEY in .env to enable richer explanations.
_GROQ_KEY   = os.getenv("GROQ_API_KEY", "").strip()
_USE_GROQ   = bool(_GROQ_KEY)
_GROQ_MODEL = "llama-3.3-70b-versatile"

_FORBIDDEN = [
    "model", "algorithm", "cusum", "percentile", "recalibration",
    "aci", "chronos", "neural", "transformer", "inference",
    "statistical", "parameter", "coefficient", "conformal",
]

_SYSTEM_PROMPT = (
    "You explain forecasting results to non-technical users in 2-3 plain sentences. "
    "Your user may be a farmer, baker, or shop owner with no data background. "
    "Never use technical words. Speak like a helpful friend, not a data scientist. "
    "Always mention: what the trend is, how confident to be, and one thing to do. "
    "Forbidden words — never use: "
    + ", ".join(_FORBIDDEN)
)


# ─── Explanation ──────────────────────────────────────────────────────────────

def explain(
    trend_pct:      float,
    confidence:     int,
    alert:          str,        # "HIGH" | "LOW" | "NONE"
    horizon_weeks:  int,
    series_name:    str = "your data",
) -> tuple[str, str]:
    """
    Returns (explanation_text, source) where source is "groq" or "template".
    Template is always the fallback — it never fails.
    """
    context = dict(
        trend_pct=trend_pct,
        confidence=confidence,
        alert=alert,
        horizon_weeks=horizon_weeks,
        series_name=series_name,
    )

    if _USE_GROQ:
        text = _groq_explain(context)
        if text:
            return text, "groq"

    return _template_explain(context), "template"


def _groq_explain(context: dict) -> str | None:
    try:
        from groq import Groq
        client   = Groq(api_key=_GROQ_KEY)
        direction = "up" if context["trend_pct"] > 0 else "down"
        user_msg  = (
            f"Series: {context['series_name']}. "
            f"Trend over next {context['horizon_weeks']} weeks: "
            f"{direction} {abs(context['trend_pct']):.1f}%. "
            f"Confidence score: {context['confidence']}/100. "
            f"Anomaly alert: {context['alert']}. "
            f"Write the 2-3 sentence explanation now."
        )
        resp = client.chat.completions.create(
            model=_GROQ_MODEL,
            messages=[
                {"role": "system", "content": _SYSTEM_PROMPT},
                {"role": "user",   "content": user_msg},
            ],
            max_tokens=120,
            temperature=0.4,
        )
        text = resp.choices[0].message.content.strip()

        # Reject if a forbidden word slipped through
        if any(w in text.lower() for w in _FORBIDDEN):
            return None

        return text
    except Exception:
        return None


def _template_explain(ctx: dict) -> str:
    pct   = ctx["trend_pct"]
    conf  = ctx["confidence"]
    alert = ctx["alert"]
    weeks = ctx["horizon_weeks"]
    name  = ctx["series_name"].capitalize()

    direction  = "rise" if pct > 0 else "fall"
    dir_word   = "upward" if pct > 0 else "downward"
    conf_word  = "high" if conf > 60 else "moderate" if conf > 40 else "low"
    pct_str    = f"{abs(pct):.1f}%"

    if alert == "HIGH":
        return (
            f"{name} jumped higher than expected recently. "
            f"The forecast now shows a {dir_word} trend of about {pct_str} "
            f"over the next {weeks} weeks. "
            f"Wait one more week to confirm before making large decisions."
        )
    if alert == "LOW":
        return (
            f"{name} dropped lower than expected recently. "
            f"The forecast now shows a {dir_word} trend of about {pct_str} "
            f"over the next {weeks} weeks. "
            f"Wait one more week to confirm before making large decisions."
        )
    if conf < 40:
        return (
            f"{name} shows a {dir_word} trend over the next {weeks} weeks, "
            f"but uncertainty is {conf_word} right now. "
            f"Make smaller, reversible decisions until the picture clears."
        )
    return (
        f"{name} is expected to {direction} by about {pct_str} "
        f"over the next {weeks} weeks, with {conf_word} confidence. "
        f"The range shown gives you a safe window to plan within."
    )


# ─── NL Input Parser ──────────────────────────────────────────────────────────

def parse_nl_input(raw_text: str, last_actual: float | None = None) -> dict:
    """
    Converts what the user typed into a structured value dict.

    Returns:
        {
            "value":          float | None,
            "is_approximate": bool,
            "is_relative":    bool,
            "zero":           bool,
            "error":          str | None,   # set if we cannot parse
        }

    Primary path: regex — fast, free, handles 95% of real inputs.
    Enhanced path: Groq — handles Hindi-English mix and edge cases.
    Groq result is validated before use; regex is the fallback.
    """
    text = raw_text.strip()

    if _USE_GROQ:
        result = _groq_parse(text, last_actual)
        if result and result.get("value") is not None:
            return result

    return _regex_parse(text, last_actual)


def _regex_parse(text: str, last_actual: float | None) -> dict:
    t = text.lower().strip()

    # ── Zero / closed ──────────────────────────────────────────────────────
    # Check for exact "0" as a standalone word, not substring (avoids matching "2300")
    zero_words = ["nothing", "bandh", "closed", "shut", "nil", "nill", "shunya"]
    if any(w in t for w in zero_words) or re.search(r"\b0\b", t):
        return _result(0.0, zero=True)

    # ── Rejection phrases ──────────────────────────────────────────────────
    reject_words = ["don't know", "dont know", "pata nahi", "pata nahin", "not sure", "no idea"]
    if any(w in t for w in reject_words):
        return _error("Please enter the value when you know it.")

    is_approximate = any(w in t for w in ["around", "about", "roughly", "approximately", "lagbhag", "almost"])

    # ── Relative: same as last week ────────────────────────────────────────
    if any(w in t for w in ["same", "equal", "usi", "wahi"]):
        if last_actual is not None:
            return _result(last_actual, relative=True, approximate=is_approximate)
        return _error("We don't have a previous value to compare. Please enter the number directly.")

    # ── Relative: double ───────────────────────────────────────────────────
    if "double" in t or "do guna" in t or "dugna" in t:
        if last_actual is not None:
            return _result(last_actual * 2, relative=True)
        return _error("Please enter the actual number.")

    # ── Relative: half ────────────────────────────────────────────────────
    if "half" in t or "aadha" in t:
        if last_actual is not None:
            return _result(last_actual * 0.5, relative=True)
        return _error("Please enter the actual number.")

    # ── Relative: percentage change ────────────────────────────────────────
    pct_match = re.search(r"([\d.]+)\s*%", t)
    if pct_match:
        pct  = float(pct_match.group(1))
        down = any(w in t for w in ["down", "kam", "less", "decrease", "drop", "fell", "girak", "gira"])
        if last_actual is not None:
            factor = (1 - pct / 100) if down else (1 + pct / 100)
            return _result(last_actual * factor, relative=True, approximate=is_approximate)
        # Has relative words (jyada, kam, more, less) — needs a previous value
        relative_words = ["jyada", "zyada", "more", "kam", "less", "up", "down", "increase", "decrease"]
        if any(w in t for w in relative_words):
            return _error("We need a previous value to calculate the percentage. Please enter the number directly.")
        # No relative words — treat as absolute value
        if pct < 10000:
            return _result(pct, approximate=is_approximate)

    # ── Relative: went up/down by absolute amount ──────────────────────────
    up_match = re.search(
        r"(?:went up|increased?|badhke?|upar|zyada|jyada)\s+(?:by\s+)?([\d,. ]+(?:lakh|crore)?)", t
    )
    down_match = re.search(
        r"(?:went down|decreased?|dropped?|girak?|kam|niche)\s+(?:by\s+)?([\d,. ]+(?:lakh|crore)?)", t
    )
    if up_match and last_actual is not None:
        delta = _parse_number_str(up_match.group(1))
        if delta is not None:
            return _result(last_actual + delta, relative=True)
    if down_match and last_actual is not None:
        delta = _parse_number_str(down_match.group(1))
        if delta is not None:
            return _result(last_actual - delta, relative=True)

    # ── Absolute value ─────────────────────────────────────────────────────
    value = _parse_number_str(t)
    if value is not None:
        return _result(value, approximate=is_approximate)

    return _error(
        "We couldn't understand that value. "
        "Try typing just the number, like: 2300"
    )


def _parse_number_str(text: str) -> float | None:
    """
    Extracts a number from a string, handling:
      - ₹ $ £ symbols
      - Indian lakh / crore shorthand
      - Indian comma notation (1,23,456)
      - Prefix approximate words (around, about, roughly...)
      - Trailing unit words (kg, rupee, units...)
      - Plain floats and integers
    """
    t = text.lower().strip()
    t = re.sub(r"[₹$£\s]", "", t)

    crore = re.search(r"([\d.]+)\s*crore", t)
    lakh  = re.search(r"([\d.]+)\s*lakh",  t)
    if crore:
        return float(crore.group(1)) * 1e7
    if lakh:
        return float(lakh.group(1)) * 1e5

    # Strip commas (handles both Indian and Western notation)
    t = re.sub(r",", "", t)
    # Strip prefix approximate/directional words
    t = re.sub(r"^(around|about|roughly|approximately|lagbhag|almost|upto|up to)\s*", "", t)
    # Strip trailing unit words and directional tokens
    t = re.sub(r"\s*(rupee|rupees|rs|inr|kg|units?|pieces?|up|down|more|less|zyada|jyada|kam)s?\s*$", "", t)
    t = t.strip()

    # Extract first clean number from remaining string
    num_match = re.search(r"[\d.]+", t)
    if num_match:
        try:
            return float(num_match.group())
        except ValueError:
            return None
    return None


def _groq_parse(text: str, last_actual: float | None) -> dict | None:
    try:
        from groq import Groq
        client  = Groq(api_key=_GROQ_KEY)
        context = f"Last known value: {last_actual}" if last_actual is not None else "No previous value."
        prompt  = (
            f"{context}\n"
            f"User typed: \"{text}\"\n\n"
            "Extract the numeric value. Respond with ONLY a JSON object, no markdown:\n"
            '{"value": <number or null>, "is_approximate": <bool>, '
            '"is_relative": <bool>, "zero": <bool>, "error": <string or null>}'
        )
        resp = client.chat.completions.create(
            model=_GROQ_MODEL,
            messages=[{"role": "user", "content": prompt}],
            max_tokens=80,
            temperature=0.0,
        )
        raw = resp.choices[0].message.content.strip()
        raw = re.sub(r"```[a-z]*", "", raw).strip("`").strip()

        import json
        parsed = json.loads(raw)
        value  = parsed.get("value")

        if value is not None:
            value = float(value)

        return {
            "value":          value,
            "is_approximate": bool(parsed.get("is_approximate", False)),
            "is_relative":    bool(parsed.get("is_relative", False)),
            "zero":           bool(parsed.get("zero", False)),
            "error":          parsed.get("error"),
        }
    except Exception:
        return None


# ─── Helpers ──────────────────────────────────────────────────────────────────

def _result(
    value: float,
    zero: bool = False,
    relative: bool = False,
    approximate: bool = False,
) -> dict:
    return {
        "value":          round(float(value), 4),
        "is_approximate": approximate,
        "is_relative":    relative,
        "zero":           zero,
        "error":          None,
    }


def _error(message: str) -> dict:
    return {
        "value":          None,
        "is_approximate": False,
        "is_relative":    False,
        "zero":           False,
        "error":          message,
    }