File size: 2,932 Bytes
e9462cd
 
fa70564
 
e9462cd
 
fa70564
e9462cd
fa70564
e9462cd
 
fa70564
e9462cd
fa70564
e9462cd
 
 
 
 
 
fa70564
e9462cd
 
 
fa70564
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e9462cd
 
 
fa70564
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e9462cd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
from __future__ import annotations

import ast
import json
import math
import re
from typing import Any, Iterable, List

from models import ChatRequest


def clamp01(x: Any, default: float = 0.5) -> float:
    try:
        v = float(x)
        return max(0.0, min(1.0, v))
    except Exception:
        return default


def normalize_spaces(text: str) -> str:
    return re.sub(r"\s+", " ", str(text or "")).strip()


def clean_math_text(text: str) -> str:
    t = str(text or "")
    t = t.replace("×", "*").replace("÷", "/")
    t = t.replace("–", "-").replace("—", "-").replace("−", "-")
    t = t.replace("\u00a0", " ")
    return t


def tokenize(text: str) -> List[str]:
    return re.findall(r"[a-z0-9]+", clean_math_text(text).lower())


def score_token_overlap(query: str, text: str) -> float:
    q = set(tokenize(query))
    t = set(tokenize(text))
    if not q or not t:
        return 0.0
    overlap = len(q & t)
    return overlap / max(1, len(q))


def extract_text_from_any_payload(payload: Any) -> str:
    if payload is None:
        return ""

    if isinstance(payload, str):
        s = payload.strip()
        if not s:
            return ""
        if (s.startswith("{") and s.endswith("}")) or (s.startswith("[") and s.endswith("]")):
            try:
                decoded = json.loads(s)
                return extract_text_from_any_payload(decoded)
            except Exception:
                pass
        try:
            decoded = ast.literal_eval(s)
            if isinstance(decoded, (dict, list)):
                return extract_text_from_any_payload(decoded)
        except Exception:
            pass
        return s

    if isinstance(payload, dict):
        for key in [
            "message", "prompt", "query", "text", "user_message",
            "input", "data", "payload", "body", "content",
        ]:
            if key in payload:
                maybe = extract_text_from_any_payload(payload[key])
                if maybe:
                    return maybe
        parts = [extract_text_from_any_payload(v) for v in payload.values()]
        return "\n".join([p for p in parts if p]).strip()

    if isinstance(payload, list):
        parts = [extract_text_from_any_payload(x) for x in payload]
        return "\n".join([p for p in parts if p]).strip()

    return str(payload).strip()


def get_user_text(req: ChatRequest, raw_body: Any = None) -> str:
    for field in ["message", "prompt", "query", "text", "user_message"]:
        value = getattr(req, field, None)
        if isinstance(value, str) and value.strip():
            return value.strip()
    return extract_text_from_any_payload(raw_body).strip()


def short_lines(items: Iterable[str], limit: int) -> List[str]:
    out: List[str] = []
    for item in items:
        item = normalize_spaces(item)
        if item:
            out.append(item)
        if len(out) >= limit:
            break
    return out