| from __future__ import annotations |
|
|
| import ast |
| import json |
| import math |
| import re |
| from typing import Any, Iterable, List |
|
|
| from models import ChatRequest |
|
|
|
|
| def clamp01(x: Any, default: float = 0.5) -> float: |
| try: |
| v = float(x) |
| return max(0.0, min(1.0, v)) |
| except Exception: |
| return default |
|
|
|
|
| def normalize_spaces(text: str) -> str: |
| return re.sub(r"\s+", " ", str(text or "")).strip() |
|
|
|
|
| def clean_math_text(text: str) -> str: |
| t = str(text or "") |
| t = t.replace("×", "*").replace("÷", "/") |
| t = t.replace("–", "-").replace("—", "-").replace("−", "-") |
| t = t.replace("\u00a0", " ") |
| return t |
|
|
|
|
| def tokenize(text: str) -> List[str]: |
| return re.findall(r"[a-z0-9]+", clean_math_text(text).lower()) |
|
|
|
|
| def score_token_overlap(query: str, text: str) -> float: |
| q = set(tokenize(query)) |
| t = set(tokenize(text)) |
| if not q or not t: |
| return 0.0 |
| overlap = len(q & t) |
| return overlap / max(1, len(q)) |
|
|
|
|
| def extract_text_from_any_payload(payload: Any) -> str: |
| if payload is None: |
| return "" |
|
|
| if isinstance(payload, str): |
| s = payload.strip() |
| if not s: |
| return "" |
| if (s.startswith("{") and s.endswith("}")) or (s.startswith("[") and s.endswith("]")): |
| try: |
| decoded = json.loads(s) |
| return extract_text_from_any_payload(decoded) |
| except Exception: |
| pass |
| try: |
| decoded = ast.literal_eval(s) |
| if isinstance(decoded, (dict, list)): |
| return extract_text_from_any_payload(decoded) |
| except Exception: |
| pass |
| return s |
|
|
| if isinstance(payload, dict): |
| for key in [ |
| "message", "prompt", "query", "text", "user_message", |
| "input", "data", "payload", "body", "content", |
| ]: |
| if key in payload: |
| maybe = extract_text_from_any_payload(payload[key]) |
| if maybe: |
| return maybe |
| parts = [extract_text_from_any_payload(v) for v in payload.values()] |
| return "\n".join([p for p in parts if p]).strip() |
|
|
| if isinstance(payload, list): |
| parts = [extract_text_from_any_payload(x) for x in payload] |
| return "\n".join([p for p in parts if p]).strip() |
|
|
| return str(payload).strip() |
|
|
|
|
| def get_user_text(req: ChatRequest, raw_body: Any = None) -> str: |
| for field in ["message", "prompt", "query", "text", "user_message"]: |
| value = getattr(req, field, None) |
| if isinstance(value, str) and value.strip(): |
| return value.strip() |
| return extract_text_from_any_payload(raw_body).strip() |
|
|
|
|
| def short_lines(items: Iterable[str], limit: int) -> List[str]: |
| out: List[str] = [] |
| for item in items: |
| item = normalize_spaces(item) |
| if item: |
| out.append(item) |
| if len(out) >= limit: |
| break |
| return out |
|
|