from __future__ import annotations import ast import json import math import re from typing import Any, Iterable, List from models import ChatRequest def clamp01(x: Any, default: float = 0.5) -> float: try: v = float(x) return max(0.0, min(1.0, v)) except Exception: return default def normalize_spaces(text: str) -> str: return re.sub(r"\s+", " ", str(text or "")).strip() def clean_math_text(text: str) -> str: t = str(text or "") t = t.replace("×", "*").replace("÷", "/") t = t.replace("–", "-").replace("—", "-").replace("−", "-") t = t.replace("\u00a0", " ") return t def tokenize(text: str) -> List[str]: return re.findall(r"[a-z0-9]+", clean_math_text(text).lower()) def score_token_overlap(query: str, text: str) -> float: q = set(tokenize(query)) t = set(tokenize(text)) if not q or not t: return 0.0 overlap = len(q & t) return overlap / max(1, len(q)) def extract_text_from_any_payload(payload: Any) -> str: if payload is None: return "" if isinstance(payload, str): s = payload.strip() if not s: return "" if (s.startswith("{") and s.endswith("}")) or (s.startswith("[") and s.endswith("]")): try: decoded = json.loads(s) return extract_text_from_any_payload(decoded) except Exception: pass try: decoded = ast.literal_eval(s) if isinstance(decoded, (dict, list)): return extract_text_from_any_payload(decoded) except Exception: pass return s if isinstance(payload, dict): for key in [ "message", "prompt", "query", "text", "user_message", "input", "data", "payload", "body", "content", ]: if key in payload: maybe = extract_text_from_any_payload(payload[key]) if maybe: return maybe parts = [extract_text_from_any_payload(v) for v in payload.values()] return "\n".join([p for p in parts if p]).strip() if isinstance(payload, list): parts = [extract_text_from_any_payload(x) for x in payload] return "\n".join([p for p in parts if p]).strip() return str(payload).strip() def get_user_text(req: ChatRequest, raw_body: Any = None) -> str: for field in ["message", "prompt", "query", "text", "user_message"]: value = getattr(req, field, None) if isinstance(value, str) and value.strip(): return value.strip() return extract_text_from_any_payload(raw_body).strip() def short_lines(items: Iterable[str], limit: int) -> List[str]: out: List[str] = [] for item in items: item = normalize_spaces(item) if item: out.append(item) if len(out) >= limit: break return out