crazycrazypete commited on
Commit
0b3dd30
·
verified ·
1 Parent(s): e6b3281

Upload folder using huggingface_hub

Browse files
Updates/app_hf_fixed_v7.py ADDED
@@ -0,0 +1,1173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import json
4
+ import math
5
+ import hashlib
6
+ import tempfile
7
+ from dataclasses import dataclass
8
+ from datetime import datetime, date
9
+ from typing import Any, Dict, List, Optional, Tuple
10
+
11
+ import numpy as np
12
+ import pandas as pd
13
+
14
+ import fitz # PyMuPDF
15
+ import faiss
16
+ from sentence_transformers import SentenceTransformer
17
+ from rapidfuzz import fuzz, process
18
+
19
+ import gradio as gr
20
+ from openai import OpenAI
21
+
22
+
23
+ # ============================
24
+ # Settings
25
+ # ============================
26
+ TODAY = date(2026, 1, 18)
27
+ OPENAI_MODEL = "gpt-5.2"
28
+ OPENAI_REASONING = {"effort": "high"}
29
+ MATCH_OK = 80
30
+
31
+ EMBED_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
32
+ PARSEC_CONTEXT_BEFORE = 900
33
+ PARSEC_CONTEXT_AFTER = 1600
34
+
35
+
36
+ # ============================
37
+ # OpenAI client (HF Space secret: OPENAI_API_KEY)
38
+ # ============================
39
+ API_KEY = os.getenv("OPENAI_API_KEY", "").strip()
40
+ client = OpenAI(api_key=API_KEY) if API_KEY else None
41
+
42
+ # ----------------------------
43
+ # Gradio state helpers
44
+ # Keep state as a JSON STRING to avoid schema issues on Hugging Face.
45
+ # ----------------------------
46
+ def state_load(st_json: str) -> Dict[str, Any]:
47
+ try:
48
+ if not st_json:
49
+ return {}
50
+ return json.loads(st_json) if isinstance(st_json, str) else {}
51
+ except Exception:
52
+ return {}
53
+
54
+ def state_dump(st: Dict[str, Any]) -> str:
55
+ try:
56
+ return json.dumps(st or {}, ensure_ascii=False)
57
+ except Exception:
58
+ return "{}"
59
+
60
+
61
+
62
+ # ============================
63
+ # Helpers
64
+ # ============================
65
+ def norm_text(s: Any) -> str:
66
+ try:
67
+ if s is None or (isinstance(s, float) and math.isnan(s)) or pd.isna(s):
68
+ return ""
69
+ except Exception:
70
+ pass
71
+ s = str(s).strip().lower()
72
+ s = re.sub(r"[^a-z0-9\s\-\/]", " ", s)
73
+ s = re.sub(r"\s+", " ", s).strip()
74
+ return s
75
+
76
+ def safe_str(v: Any) -> str:
77
+ if v is None or (isinstance(v, float) and pd.isna(v)) or pd.isna(v):
78
+ return ""
79
+ return str(v).strip()
80
+
81
+ def is_5g(modem_type: Any) -> bool:
82
+ s = norm_text(modem_type)
83
+ return ("5g" in s) or ("nr" in s)
84
+
85
+ def json_load_safe(s: str) -> Dict[str, Any]:
86
+ try:
87
+ return json.loads(s)
88
+ except Exception:
89
+ return {}
90
+
91
+ def gpt_json(system: str, payload: Dict[str, Any], max_tokens: int = 600) -> Dict[str, Any]:
92
+ if client is None:
93
+ return {}
94
+ resp = client.responses.create(
95
+ model=OPENAI_MODEL,
96
+ reasoning=OPENAI_REASONING,
97
+ input=[{"role":"system","content":system},{"role":"user","content":json.dumps(payload)}],
98
+ max_output_tokens=max_tokens,
99
+ )
100
+ return json_load_safe(getattr(resp, "output_text", "") or "")
101
+
102
+
103
+ # ============================
104
+ # Load data
105
+ # ============================
106
+ EOS_PATH = "routers_eos_eol_by_sku.csv"
107
+ DEC_PATH = "dec2025routers.csv"
108
+ PARSEC_PDF = "ParsecCatalog.pdf"
109
+
110
+ if not os.path.exists(EOS_PATH):
111
+ raise FileNotFoundError(f"Missing {EOS_PATH} in repo.")
112
+ if not os.path.exists(DEC_PATH):
113
+ raise FileNotFoundError(f"Missing {DEC_PATH} in repo.")
114
+ if not os.path.exists(PARSEC_PDF):
115
+ raise FileNotFoundError(f"Missing {PARSEC_PDF} in repo.")
116
+
117
+ df_eos = pd.read_csv(EOS_PATH).copy()
118
+ df_dec = pd.read_csv(DEC_PATH).copy()
119
+
120
+
121
+ def _canonize_eos_columns(df: pd.DataFrame) -> pd.DataFrame:
122
+ """Normalize lifecycle CSV column names (case-insensitive) and create expected columns."""
123
+ # Map various header spellings to canonical names used by the app
124
+ mapping = {}
125
+ for c in df.columns:
126
+ k = str(c).strip().lower().replace(" ", "_")
127
+ if k in {"sku", "model", "device", "device_sku"}:
128
+ mapping[c] = "sku"
129
+ elif k in {"manufacturer", "make", "vendor"}:
130
+ mapping[c] = "manufacturer"
131
+ elif k in {"device_type", "type"}:
132
+ mapping[c] = "device_type"
133
+ elif k in {"end_of_sale", "eos", "end_sale", "end_of_sales"}:
134
+ mapping[c] = "end_of_sale"
135
+ elif k in {"end_of_life", "eol", "end_life"}:
136
+ mapping[c] = "end_of_life"
137
+ elif k in {"suggested_replacement", "replacement_4g", "lte_replacement", "replacement_lte", "replacement"}:
138
+ mapping[c] = "suggested_replacement"
139
+ elif k in {"advanced_5g_option", "replacement_5g", "fiveg_replacement", "5g_replacement", "upgrade_5g"}:
140
+ mapping[c] = "advanced_5g_option"
141
+ elif k in {"region", "market"}:
142
+ mapping[c] = "region"
143
+ elif k in {"notes", "note"}:
144
+ mapping[c] = "notes"
145
+ elif k in {"description", "device_description", "name"}:
146
+ mapping[c] = "description"
147
+
148
+ df = df.rename(columns=mapping).copy()
149
+
150
+ # Create expected columns if missing
151
+ if "sku" not in df.columns:
152
+ # Try the common capitalized header as a fallback
153
+ if "SKU" in df.columns:
154
+ df["sku"] = df["SKU"].astype(str)
155
+ else:
156
+ df["sku"] = ""
157
+
158
+ if "manufacturer" not in df.columns:
159
+ df["manufacturer"] = ""
160
+
161
+ if "device_type" not in df.columns:
162
+ df["device_type"] = ""
163
+
164
+ if "description" not in df.columns:
165
+ # If the simplified file removed description, use SKU as description (still searchable)
166
+ df["description"] = df["sku"].astype(str)
167
+
168
+ if "notes" not in df.columns:
169
+ df["notes"] = ""
170
+
171
+ if "region" not in df.columns:
172
+ df["region"] = ""
173
+
174
+ if "suggested_replacement" not in df.columns:
175
+ df["suggested_replacement"] = ""
176
+
177
+ if "advanced_5g_option" not in df.columns:
178
+ df["advanced_5g_option"] = ""
179
+
180
+ if "end_of_sale" not in df.columns:
181
+ df["end_of_sale"] = ""
182
+
183
+ if "end_of_life" not in df.columns:
184
+ df["end_of_life"] = ""
185
+
186
+ return df
187
+
188
+ df_eos = _canonize_eos_columns(df_eos)
189
+
190
+
191
+ def region_ok(x: Any) -> bool:
192
+ s = str(x or "").strip().lower()
193
+ if not s:
194
+ return True
195
+ if "not specified" in s:
196
+ return True
197
+ if "north america" in s:
198
+ return True
199
+ if re.search(r"\busa\b", s):
200
+ return True
201
+ if re.search(r"\bunited\s+states\b", s):
202
+ return True
203
+ if re.search(r"\bu\.?s\.?\b", s):
204
+ return True
205
+ return False
206
+
207
+ if "region" in df_eos.columns:
208
+ df_eos = df_eos[df_eos["region"].apply(region_ok)].reset_index(drop=True)
209
+
210
+ # Maker mapping (includes Teltonika)
211
+ CANON_MAKER = {
212
+ "CRADLEPOINT": {"cradlepoint", "ericsson", "ericsson enterprise wireless"},
213
+ "SIERRA": {"sierra", "sierra wireless", "semtech", "airlink"},
214
+ "FEENEY": {"feeney", "feeney wireless", "inseego"},
215
+ "DIGI": {"digi", "accelerated", "accelerated concepts"},
216
+ "CISCO_MERAKI": {"meraki", "cisco meraki"},
217
+ "CISCO": {"cisco"},
218
+ "TELTONIKA": {"teltonika"},
219
+ }
220
+
221
+ def canon_maker_from_text(s: Any) -> str:
222
+ t = norm_text(s)
223
+ for canon, terms in CANON_MAKER.items():
224
+ for term in terms:
225
+ if term in t:
226
+ return canon
227
+ return "UNKNOWN"
228
+
229
+ df_eos["_canon_make"] = df_eos["manufacturer"].apply(canon_maker_from_text) if "manufacturer" in df_eos.columns else "UNKNOWN"
230
+ df_eos["_norm_sku"] = df_eos["sku"].apply(norm_text) if "sku" in df_eos.columns else ""
231
+ df_eos["_norm_desc"] = df_eos["description"].apply(norm_text) if "description" in df_eos.columns else ""
232
+ df_eos["_norm_notes"] = df_eos["notes"].apply(norm_text) if "notes" in df_eos.columns else ""
233
+
234
+ df_dec["_canon_make"] = df_dec["Make"].apply(canon_maker_from_text) if "Make" in df_dec.columns else "UNKNOWN"
235
+ df_dec["_norm_model"] = df_dec["Model"].apply(norm_text) if "Model" in df_dec.columns else ""
236
+ df_dec["_is5g"] = df_dec["Modem Type"].apply(is_5g) if "Modem Type" in df_dec.columns else False
237
+
238
+
239
+ # ============================
240
+ # Date helpers
241
+ # ============================
242
+ @dataclass
243
+ class ParsedDate:
244
+ raw: str
245
+ kind: str
246
+ value: Optional[date]
247
+
248
+ def parse_date_field(x: Any) -> ParsedDate:
249
+ raw = str(x or "").strip()
250
+ if not raw:
251
+ return ParsedDate(raw="", kind="missing", value=None)
252
+
253
+ # Common US formats: M/D/YY or M/D/YYYY (e.g., 6/24/24, 9/30/21)
254
+ for fmt in ("%m/%d/%y", "%m/%d/%Y", "%-m/%-d/%y", "%-m/%-d/%Y"):
255
+ try:
256
+ dt = datetime.strptime(raw, fmt).date()
257
+ return ParsedDate(raw=raw, kind="full", value=dt)
258
+ except Exception:
259
+ pass
260
+
261
+ # ISO-ish: YYYY
262
+ if re.fullmatch(r"\d{4}", raw):
263
+ y = int(raw)
264
+ if y == TODAY.year:
265
+ return ParsedDate(raw=raw, kind="year", value=date(y, 1, 1))
266
+ if y < TODAY.year:
267
+ return ParsedDate(raw=raw, kind="year", value=date(y, 1, 1))
268
+ return ParsedDate(raw=raw, kind="year", value=date(y, 12, 31))
269
+
270
+ # YYYY-MM
271
+ if re.fullmatch(r"\d{4}-\d{2}", raw):
272
+ try:
273
+ y, m = raw.split("-")
274
+ return ParsedDate(raw=raw, kind="year_month", value=date(int(y), int(m), 1))
275
+ except Exception:
276
+ return ParsedDate(raw=raw, kind="bad", value=None)
277
+
278
+ # YYYY-MM-DD
279
+ if re.fullmatch(r"\d{4}-\d{2}-\d{2}", raw):
280
+ try:
281
+ dt = datetime.strptime(raw, "%Y-%m-%d").date()
282
+ return ParsedDate(raw=raw, kind="full", value=dt)
283
+ except Exception:
284
+ return ParsedDate(raw=raw, kind="bad", value=None)
285
+
286
+ # Last resort: leave as raw (unparsed)
287
+ return ParsedDate(raw=raw, kind="bad", value=None)
288
+
289
+ if re.fullmatch(r"\d{4}-\d{2}-\d{2}", raw):
290
+ try:
291
+ dt = datetime.strptime(raw, "%Y-%m-%d").date()
292
+ return ParsedDate(raw=raw, kind="full", value=dt)
293
+ except Exception:
294
+ return ParsedDate(raw=raw, kind="bad", value=None)
295
+
296
+ return ParsedDate(raw=raw, kind="bad", value=None)
297
+
298
+ def display_date(pd_: ParsedDate) -> str:
299
+ if pd_.kind == "missing":
300
+ return "Not listed"
301
+ if pd_.kind == "bad":
302
+ return pd_.raw or "Not listed"
303
+ return pd_.raw
304
+
305
+ def status_from_eos_eol(eos: ParsedDate, eol: ParsedDate) -> str:
306
+ if eos.value is None and eol.value is None:
307
+ return "Unknown"
308
+ if eol.value is not None and eol.value <= TODAY:
309
+ return "End of Life"
310
+ if eos.value is not None and eos.value <= TODAY:
311
+ return "End of Sale"
312
+ return "Active"
313
+
314
+ def row_to_dates_and_status(row: pd.Series) -> Tuple[str, str, str]:
315
+ eos = parse_date_field(row.get("end_of_sale"))
316
+ eol = parse_date_field(row.get("end_of_life"))
317
+ return display_date(eos), display_date(eol), status_from_eos_eol(eos, eol)
318
+
319
+
320
+ # ============================
321
+ # Embeddings + Parsec index
322
+ # ============================
323
+ embedder = SentenceTransformer(EMBED_MODEL_NAME)
324
+
325
+ def extract_pdf_text_pages(path: str) -> List[str]:
326
+ doc = fitz.open(path)
327
+ return [doc[i].get_text("text") for i in range(len(doc))]
328
+
329
+ def build_parsec_cards(pages: List[str]) -> List[str]:
330
+ cards = []
331
+ for p in pages:
332
+ for m in re.finditer(r"Standard\s+SKU:", p):
333
+ start = max(0, m.start() - PARSEC_CONTEXT_BEFORE)
334
+ end = min(len(p), m.start() + PARSEC_CONTEXT_AFTER)
335
+ c = p[start:end].strip()
336
+ if len(c) >= 200:
337
+ cards.append(c)
338
+ out, seen = [], set()
339
+ for c in cards:
340
+ h = hashlib.sha1(c.encode("utf-8")).hexdigest()
341
+ if h not in seen:
342
+ seen.add(h); out.append(c)
343
+ return out
344
+
345
+ parsec_cards = build_parsec_cards(extract_pdf_text_pages(PARSEC_PDF))
346
+ parsec_emb = embedder.encode(parsec_cards, batch_size=64, show_progress_bar=False, normalize_embeddings=True)
347
+ parsec_emb = np.asarray(parsec_emb, dtype=np.float32)
348
+ parsec_index = faiss.IndexFlatIP(parsec_emb.shape[1])
349
+ parsec_index.add(parsec_emb)
350
+
351
+
352
+ # ============================
353
+ # Device resolution
354
+ # ============================
355
+ def label_for_row(i: int) -> str:
356
+ r = df_eos.iloc[i]
357
+ return f"{r.get('sku','')} — {r.get('manufacturer','')} — {r.get('description','')}"[:220]
358
+
359
+ EOS_LABELS = [label_for_row(i) for i in range(len(df_eos))]
360
+ EOS_CORPUS = []
361
+ for _, r in df_eos.iterrows():
362
+ EOS_CORPUS.append(" ".join([r.get("_norm_sku",""), r.get("_canon_make",""), r.get("_norm_desc",""), r.get("_norm_notes","")]))
363
+
364
+ def local_candidates(query: str, top_k: int = 6) -> List[Tuple[int, int, str]]:
365
+ q = norm_text(query)
366
+ hits = process.extract(q, EOS_CORPUS, scorer=fuzz.WRatio, limit=top_k)
367
+ return [(int(idx), int(score), EOS_LABELS[int(idx)]) for _, score, idx in hits]
368
+
369
+ def gpt_choose_device(user_text: str, candidates: List[Tuple[int,int,str]]) -> Dict[str, Any]:
370
+ if client is None:
371
+ return {}
372
+ sys = "Pick which router the user meant. Never invent. Return strict JSON only."
373
+ payload = {
374
+ "user_input": user_text,
375
+ "candidates": [{"row_idx": i, "score": s, "label": lbl} for (i,s,lbl) in candidates],
376
+ "rules": [
377
+ "If one is clearly correct, return mode='ok' with row_idx.",
378
+ "If two are plausible, return mode='pick' with top 2 options."
379
+ ],
380
+ "output_schema": {"mode":"ok|pick","row_idx":"int","options":[{"row_idx":"int","label":"string"}]}
381
+ }
382
+ return gpt_json(sys, payload, max_tokens=280)
383
+
384
+ def resolve_device(user_text: str) -> Dict[str, Any]:
385
+ q = norm_text(user_text)
386
+ exact = df_eos.index[df_eos["_norm_sku"] == q].tolist()
387
+ if len(exact) == 1:
388
+ return {"mode":"ok","row_idx": int(exact[0])}
389
+ if len(exact) > 1:
390
+ opts = [{"row_idx": int(i), "label": EOS_LABELS[int(i)]} for i in exact[:2]]
391
+ return {"mode":"pick","options": opts}
392
+
393
+ cands = local_candidates(user_text, top_k=6)
394
+ if not cands:
395
+ return {"mode":"not_found"}
396
+
397
+ if cands[0][1] >= 95 and (len(cands) == 1 or (cands[0][1] - cands[1][1]) >= 8):
398
+ return {"mode":"ok","row_idx": cands[0][0]}
399
+
400
+ g = gpt_choose_device(user_text, cands)
401
+ if g.get("mode") == "ok" and isinstance(g.get("row_idx"), int):
402
+ return {"mode":"ok","row_idx": int(g["row_idx"])}
403
+
404
+ if g.get("mode") == "pick":
405
+ opts = g.get("options", []) or []
406
+ opts2 = [{"row_idx": int(o["row_idx"]), "label": str(o["label"])} for o in opts[:2] if "row_idx" in o]
407
+ if opts2:
408
+ return {"mode":"pick","options": opts2}
409
+
410
+ if len(cands) > 1:
411
+ return {"mode":"pick","options":[{"row_idx":cands[0][0],"label":cands[0][2]},{"row_idx":cands[1][0],"label":cands[1][2]}]}
412
+ return {"mode":"pick","options":[{"row_idx":cands[0][0],"label":cands[0][2]}]}
413
+
414
+
415
+ # ============================
416
+ # Replacements — lifecycle CSV source of truth
417
+ # ============================
418
+ def extract_model_token(text: str) -> str:
419
+ s = safe_str(text)
420
+ if not s:
421
+ return ""
422
+ parts = [p.strip() for p in s.split("|") if p.strip()]
423
+ candidates = parts[::-1] if parts else [s]
424
+ for cand in candidates:
425
+ m = re.search(r"\bRUT[A-Z]?\d{2,4}\b", cand.upper())
426
+ if m:
427
+ return m.group(0).upper()
428
+ m = re.search(r"\bIX\d{2}\b", cand, flags=re.IGNORECASE)
429
+ if m:
430
+ return m.group(0).upper()
431
+ m = re.search(r"\b(R\d{3,4}|E\d{3,4}|S\d{3,4})\b", cand, flags=re.IGNORECASE)
432
+ if m:
433
+ return m.group(0).upper()
434
+ m = re.search(r"\b[A-Z]{1,6}\d{2,4}[A-Z]?\b", cand.upper())
435
+ if m:
436
+ return m.group(0).upper()
437
+ return candidates[0][:60]
438
+
439
+ def device_is_4g(row: pd.Series) -> bool:
440
+ # Detect LTE/4G even when the description uses "Cat 4 / Cat6 / Cat 12" without saying "LTE"
441
+ t = norm_text(row.get("description","")) + " " + norm_text(row.get("notes","")) + " " + norm_text(row.get("sku",""))
442
+
443
+ # If it explicitly says 5G/NR, treat as not 4G-only
444
+ if ("5g" in t) or ("nr" in t):
445
+ return False
446
+
447
+ # Classic signals
448
+ if ("lte" in t) or ("4g" in t):
449
+ return True
450
+
451
+ # LTE category signals (Cat 1..20 are LTE categories; Cat M1/M2 are LTE-M)
452
+ if re.search(r"\bcat\s*[-]?\s*(m1|m2)\b", t):
453
+ return True
454
+
455
+ m = re.search(r"\bcat\s*[-]?\s*(\d{1,2})\b", t)
456
+ if m:
457
+ try:
458
+ cat = int(m.group(1))
459
+ if 0 < cat <= 20:
460
+ return True
461
+ except Exception:
462
+ pass
463
+
464
+ # If "cat" appears at all, it's almost always LTE-family
465
+ if "cat" in t:
466
+ return True
467
+
468
+ return False
469
+
470
+ # If it explicitly says 5G/NR, treat as not 4G-only
471
+ if ("5g" in t) or ("nr" in t):
472
+ return False
473
+
474
+ # Classic signals
475
+ if ("lte" in t) or ("4g" in t):
476
+ return True
477
+
478
+ # LTE category signals (Cat 1..20 are LTE categories; Cat M1/M2 are LTE-M)
479
+ if re.search(r"\bcat\s*[-]?\s*(m1|m2)\b", t):
480
+ return True
481
+
482
+ m = re.search(r"\bcat\s*[-]?\s*(\d{1,2})\b", t)
483
+ if m:
484
+ try:
485
+ cat = int(m.group(1))
486
+ if 0 < cat <= 20:
487
+ return True
488
+ except Exception:
489
+ pass
490
+
491
+ # If "cat" appears at all, it's almost always LTE-family
492
+ if "cat" in t:
493
+ return True
494
+
495
+ return False
496
+
497
+
498
+ def candidate_5g_models_from_lifecycle(manufacturer: str) -> List[str]:
499
+ mfr = norm_text(manufacturer)
500
+ pool = df_eos[df_eos["manufacturer"].astype(str).str.lower().eq(mfr)].copy() if "manufacturer" in df_eos.columns else df_eos.copy()
501
+ vals = pool["advanced_5g_option"].tolist() if "advanced_5g_option" in pool.columns else []
502
+ out, seen = [], set()
503
+ for v in vals:
504
+ tok = extract_model_token(v)
505
+ if tok and tok.lower() != "nan" and tok not in seen:
506
+ seen.add(tok); out.append(tok)
507
+ return out
508
+
509
+ def candidate_4g_models_from_lifecycle(manufacturer: str) -> List[str]:
510
+ mfr = norm_text(manufacturer)
511
+ pool = df_eos[df_eos["manufacturer"].astype(str).str.lower().eq(mfr)].copy() if "manufacturer" in df_eos.columns else df_eos.copy()
512
+ vals = pool["suggested_replacement"].tolist() if "suggested_replacement" in pool.columns else []
513
+ out, seen = [], set()
514
+ for v in vals:
515
+ tok = extract_model_token(v)
516
+ if tok and tok.lower() != "nan" and tok not in seen:
517
+ seen.add(tok); out.append(tok)
518
+ return out
519
+
520
+ def gpt_pick_from_candidates(old_row: pd.Series, candidates: List[str], need: str) -> str:
521
+ if client is None or not candidates:
522
+ return ""
523
+ sys = "Pick the best replacement model. Choose only from candidates. Return strict JSON only."
524
+ payload = {
525
+ "old_device": {
526
+ "sku": str(old_row.get("sku","")),
527
+ "manufacturer": str(old_row.get("manufacturer","")),
528
+ "description": str(old_row.get("description","")),
529
+ "need": need,
530
+ },
531
+ "candidates": candidates[:40],
532
+ "output_schema": {"choice":"string"}
533
+ }
534
+ out = gpt_json(sys, payload, max_tokens=240) or {}
535
+ choice = str(out.get("choice","") or "").strip()
536
+ return choice if choice in candidates else ""
537
+
538
+ def fallback_5g_from_dec(canon_make: str) -> str:
539
+ pool5 = df_dec[(df_dec["_canon_make"] == canon_make) & (df_dec["_is5g"] == True)]
540
+ return str(pool5.iloc[0]["Model"]).strip() if not pool5.empty else ""
541
+
542
+ def pick_replacements_lifecycle(row: pd.Series, status: str, use_gpt: bool = True) -> Dict[str, Any]:
543
+ canon = str(row.get("_canon_make","UNKNOWN"))
544
+ manufacturer = str(row.get("manufacturer","") or "")
545
+
546
+ sug_raw = safe_str(row.get("suggested_replacement",""))
547
+ adv_raw = safe_str(row.get("advanced_5g_option",""))
548
+
549
+ has_4g_alt = bool(sug_raw.strip())
550
+ has_5g_alt = bool(adv_raw.strip())
551
+
552
+ # Treat as 4G if the description indicates LTE OR lifecycle provides a 4G suggested replacement
553
+ is_4g = device_is_4g(row) or has_4g_alt
554
+
555
+ # Provide 5G option if the unit is 4G, EOS/EOL, or lifecycle explicitly provides advanced_5g_option
556
+ want_5g = is_4g or (status in {"End of Sale","End of Life"}) or has_5g_alt
557
+
558
+ # 4G alternative: show whenever lifecycle provides it (or device appears 4G)
559
+ repl_4g = "Not applicable"
560
+ if is_4g or has_4g_alt:
561
+ repl_4g = extract_model_token(sug_raw)
562
+ if not repl_4g:
563
+ cand4 = candidate_4g_models_from_lifecycle(manufacturer)
564
+ repl_4g = (gpt_pick_from_candidates(row, cand4, "4G alternative") if (use_gpt and client) else "") or (cand4[0] if cand4 else "")
565
+ if not repl_4g:
566
+ repl_4g = "Not applicable"
567
+
568
+ # 5G replacement: prefer lifecycle advanced_5g_option whenever present
569
+ repl_5g = "Not listed"
570
+ if want_5g:
571
+ repl_5g = extract_model_token(adv_raw)
572
+ if not repl_5g:
573
+ cand5 = candidate_5g_models_from_lifecycle(manufacturer)
574
+ repl_5g = (gpt_pick_from_candidates(row, cand5, "5G replacement/upgrade") if (use_gpt and client) else "") or (cand5[0] if cand5 else "")
575
+ if not repl_5g:
576
+ repl_5g = fallback_5g_from_dec(canon) or "Not listed"
577
+
578
+ if repl_5g.lower() == "nan":
579
+ repl_5g = "Not listed"
580
+
581
+ return {"repl_4g": repl_4g, "repl_5g": repl_5g, "sources": ["lifecycle_csv"] + (["gpt"] if (use_gpt and client) else [])}
582
+
583
+
584
+ # ============================
585
+ # Antennas (Parsec-only)
586
+ # ============================
587
+ PARSEC_FAMILY_WORDS = {"chinook","labrador","boxer","bloodhound","husky","beagle","mastiff","collie","shepherd","belgian","australian","terrier","pyrenees"}
588
+ BAD_NAME_MARKERS = {"customization","standard connectors","connectors","features","benefits","specifications","mechanical","electrical","mounting","accessories","description:","standard sku"}
589
+
590
+ def clean_line(s: str) -> str:
591
+ s = re.sub(r"\s+", " ", str(s or "").strip())
592
+ if re.fullmatch(r"-[a-z0-9]+", s.lower()):
593
+ return ""
594
+ return s
595
+
596
+ def is_bad_name_line(line: str) -> bool:
597
+ low = line.lower()
598
+ if any(m in low for m in BAD_NAME_MARKERS):
599
+ return True
600
+ if re.search(r"\b-[a-z0-9]{1,4}\b", low) and len(low) <= 25:
601
+ return True
602
+ return False
603
+
604
+ def family_from_line(line: str) -> str:
605
+ low = line.lower()
606
+ for fam in PARSEC_FAMILY_WORDS:
607
+ if fam in low:
608
+ return fam.capitalize()
609
+ return ""
610
+
611
+ def parsec_connectors_from_card(t: str) -> str:
612
+ m = re.search(r"Standard\s+Connectors:\s*(.+)", t, flags=re.IGNORECASE)
613
+ if m:
614
+ return re.sub(r"\s+", " ", m.group(1).strip())[:80]
615
+ return ""
616
+
617
+ def parsec_mounts_from_card(t: str) -> List[str]:
618
+ mounts = []
619
+ for m in re.finditer(r"Mount:\s*(.+)", t, flags=re.IGNORECASE):
620
+ val = re.sub(r"\s+", " ", m.group(1).strip())
621
+ parts = [p.strip().lower() for p in val.split(",") if p.strip()]
622
+ mounts.extend(parts)
623
+ out = []
624
+ seen = set()
625
+ for x in mounts:
626
+ if x not in seen:
627
+ seen.add(x); out.append(x)
628
+ return out
629
+
630
+ def parsec_name_from_card(card_text: str) -> str:
631
+ lines = [clean_line(ln) for ln in str(card_text or "").splitlines()]
632
+ lines = [ln for ln in lines if ln]
633
+
634
+ for ln in lines:
635
+ if is_bad_name_line(ln):
636
+ continue
637
+ fam = family_from_line(ln)
638
+ if fam:
639
+ return fam
640
+
641
+ sku_i = None
642
+ for i, ln in enumerate(lines):
643
+ if "standard sku" in ln.lower():
644
+ sku_i = i
645
+ break
646
+ if sku_i is not None:
647
+ window = lines[max(0, sku_i - 12):sku_i]
648
+ for ln in reversed(window):
649
+ if is_bad_name_line(ln):
650
+ continue
651
+ if 3 <= len(ln) <= 40 and re.search(r"[A-Za-z]", ln):
652
+ return ln.split()[0].capitalize()
653
+
654
+ return "Parsec antenna"
655
+
656
+ def parsec_part_from_card(t: str) -> str:
657
+ m = re.search(r"Standard\s+SKU:\s*([A-Z0-9]+)", t)
658
+ return m.group(1).strip() if m else ""
659
+
660
+ def parsec_desc_from_card(t: str) -> str:
661
+ m = re.search(r"Description:\s*(.+?)(?:\n|$)", t, flags=re.IGNORECASE)
662
+ return re.sub(r"\s+"," ",m.group(1).strip())[:220] if m else ""
663
+
664
+ def parsec_retrieve(query: str, top_k: int = 12) -> List[Dict[str, Any]]:
665
+ qv = embedder.encode([query], normalize_embeddings=True)
666
+ qv = np.asarray(qv, dtype=np.float32)
667
+ scores, ids = parsec_index.search(qv, top_k)
668
+ out: List[Dict[str, Any]] = []
669
+ for sc, i in zip(scores[0].tolist(), ids[0].tolist()):
670
+ if 0 <= int(i) < len(parsec_cards):
671
+ card = parsec_cards[int(i)]
672
+ out.append({
673
+ "score": float(sc),
674
+ "name": parsec_name_from_card(card),
675
+ "part_number": parsec_part_from_card(card),
676
+ "description": parsec_desc_from_card(card),
677
+ "connectors": parsec_connectors_from_card(card),
678
+ "mounts": parsec_mounts_from_card(card),
679
+ "_card": card.lower(),
680
+ })
681
+ return out
682
+
683
+ def choose_best_parsec(cands: List[Dict[str, Any]], mode: str) -> Dict[str, Any]:
684
+ best = None
685
+ best_score = -1e9
686
+
687
+ for c in cands:
688
+ card = c.get("_card","")
689
+ mounts = c.get("mounts", []) or []
690
+ score = float(c.get("score", 0.0))
691
+
692
+ if "omni" in card:
693
+ score += 0.6
694
+ if "directional" in card:
695
+ score -= 1.5
696
+
697
+ if mode == "vehicle":
698
+ if any("magnetic" in m for m in mounts):
699
+ score += 3.0
700
+ if any("through" in m for m in mounts):
701
+ score += 2.0
702
+ if any("wall" in m for m in mounts) or any("pole" in m for m in mounts):
703
+ score -= 1.2
704
+ if "app: fixed" in card and "mobile" not in card:
705
+ score -= 2.0
706
+
707
+ if mode == "stationary":
708
+ if any("wall" in m for m in mounts):
709
+ score += 2.0
710
+ if any("pole" in m for m in mounts):
711
+ score += 1.8
712
+
713
+ if score > best_score:
714
+ best_score = score
715
+ best = c
716
+
717
+ if not best:
718
+ return {"name":"Parsec antenna","part_number":"","description":"","connectors":"","mounts":[]}
719
+
720
+ best = dict(best)
721
+ best.pop("_card", None)
722
+ return best
723
+
724
+
725
+ def infer_mimo_for_5g(repl_5g_model: str) -> str:
726
+ """Rule: every 5G router uses a 4x4 antenna."""
727
+ return "4x4"
728
+
729
+ # If the model name hints 5G, lean 4x4
730
+ if "5g" in model.lower() or model.upper().startswith(("R", "E", "S", "IX", "RUTM")):
731
+ default = "4x4"
732
+ else:
733
+ default = "2x2"
734
+
735
+ # Use dec2025routers.csv if we can match the model under the same maker family
736
+ try:
737
+ pool = df_dec[df_dec["_canon_make"] == canon_make].copy()
738
+ if pool.empty:
739
+ return default
740
+ hit = process.extractOne(norm_text(model), pool["_norm_model"].tolist(), scorer=fuzz.WRatio)
741
+ if not hit or hit[1] < MATCH_OK:
742
+ return default
743
+ row = pool.iloc[int(hit[2])]
744
+ txt2 = (str(row.get("Antennas (internal/external/both)", "")) + " " + str(row.get("Modem Type", "")) + " " + str(row.get("Special notes",""))).lower()
745
+ if "4x4" in txt2 or "4 x 4" in txt2 or "4x 4" in txt2:
746
+ return "4x4"
747
+ if "2x2" in txt2 or "2 x 2" in txt2:
748
+ return "2x2"
749
+ # If modem type includes 5G, lean 4x4
750
+ if "5g" in txt2 or "nr" in txt2:
751
+ return "4x4"
752
+ return default
753
+ except Exception:
754
+ return default
755
+
756
+ def antenna_options_for(router_model: str, tech: str, mimo: str) -> Dict[str, Any]:
757
+ q_stationary = f"{router_model} {tech} {mimo} omni stationary pole wall fixed site Parsec"
758
+ q_vehicle = f"{router_model} {tech} {mimo} omni vehicle mobile magnetic through-bolt Parsec"
759
+
760
+ cand_stationary = parsec_retrieve(q_stationary, top_k=12)
761
+ cand_vehicle = parsec_retrieve(q_vehicle, top_k=12)
762
+
763
+ s = choose_best_parsec(cand_stationary, mode="stationary")
764
+ v = choose_best_parsec(cand_vehicle, mode="vehicle")
765
+
766
+ s.update({"mimo": mimo, "why": "Stationary omni best match."})
767
+ v.update({"mimo": mimo, "why": "Vehicle omni best match."})
768
+
769
+ return {"stationary_omni": s, "vehicle_omni": v, "sources":["parsec_rag"]}
770
+
771
+
772
+ # ============================
773
+ # Install-ready checklist
774
+ # ============================
775
+ def install_ready_checklist(current_sku: str, repl: Dict[str,Any], ant: Dict[str,Any]) -> str:
776
+ st = ant.get("stationary_omni", {})
777
+ vh = ant.get("vehicle_omni", {})
778
+ if client is not None:
779
+ sys = "Create a short, install-ready checklist for a Verizon rep. Return markdown only."
780
+ payload = {"current_device": current_sku, "replacements": repl, "antennas": {"stationary": st, "vehicle": vh}}
781
+ resp = client.responses.create(
782
+ model=OPENAI_MODEL,
783
+ reasoning=OPENAI_REASONING,
784
+ input=[{"role":"system","content":sys},{"role":"user","content":json.dumps(payload)}],
785
+ max_output_tokens=520,
786
+ )
787
+ return (getattr(resp, "output_text", "") or "").strip()
788
+ return "\n".join([
789
+ "### Install-ready checklist",
790
+ f"- Current device: {current_sku}",
791
+ f"- 5G replacement: {repl.get('repl_5g','')}",
792
+ f"- 4G alternative: {repl.get('repl_4g','Not applicable')}",
793
+ f"- Stationary omni antenna: {st.get('name','')} (PN {st.get('part_number','')})",
794
+ f"- Vehicle omni antenna: {vh.get('name','')} (PN {vh.get('part_number','')})",
795
+ "- Next steps: confirm mounting + cable lengths + power; place order; schedule install.",
796
+ ])
797
+
798
+
799
+ # ============================
800
+ # Batch mode (NO GPT)
801
+ # ============================
802
+ def parse_batch_inputs(text_blob: str, file_obj: Any) -> List[str]:
803
+ items: List[str] = []
804
+ if file_obj is not None:
805
+ try:
806
+ path = file_obj.name if hasattr(file_obj, "name") else str(file_obj)
807
+ df = pd.read_csv(path)
808
+ col = df.columns[0]
809
+ items.extend([str(x).strip() for x in df[col].tolist() if str(x).strip()])
810
+ except Exception:
811
+ pass
812
+ if text_blob:
813
+ for ln in str(text_blob).splitlines():
814
+ ln = ln.strip()
815
+ if ln:
816
+ items.append(ln)
817
+ seen=set()
818
+ out=[]
819
+ for x in items:
820
+ k=norm_text(x)
821
+ if k and k not in seen:
822
+ seen.add(k); out.append(x)
823
+ return out
824
+
825
+ def run_batch(text_blob: str, file_obj: Any, include_antennas: bool):
826
+ inputs = parse_batch_inputs(text_blob, file_obj)
827
+ if not inputs:
828
+ return "", None, None, ""
829
+
830
+ rows=[]
831
+ for item in inputs:
832
+ res = resolve_device(item)
833
+ if res.get("mode") != "ok":
834
+ rows.append({"Input": item, "Matched":"", "Status":"Needs review", "EOS":"", "EOL":"", "4G alternative":"", "5G replacement":"", "Notes":"Not found/ambiguous"})
835
+ continue
836
+
837
+ life_row = df_eos.iloc[int(res["row_idx"])]
838
+ eos, eol, status = row_to_dates_and_status(life_row)
839
+ repl = pick_replacements_lifecycle(life_row, status, use_gpt=False)
840
+
841
+ rows.append({
842
+ "Input": item,
843
+ "Matched": str(life_row.get("sku","")),
844
+ "Status": status,
845
+ "EOS": eos,
846
+ "EOL": eol,
847
+ "4G alternative": repl.get("repl_4g",""),
848
+ "5G replacement": repl.get("repl_5g",""),
849
+ "Notes": "",
850
+ })
851
+
852
+ out_df = pd.DataFrame(rows)
853
+ counts = out_df["Status"].value_counts(dropna=False).to_dict()
854
+ top_5g = out_df["5G replacement"].value_counts(dropna=False).head(5).to_dict()
855
+ summary = f"Rows: {len(out_df)} | " + " | ".join([f"{k}: {v}" for k,v in counts.items()])
856
+ rollup = "Top 5G recommendations:\n" + "\n".join([f"- {k}: {v}" for k,v in top_5g.items() if str(k).strip()])
857
+
858
+ tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
859
+ out_df.to_csv(tmp.name, index=False)
860
+
861
+ return summary, out_df, tmp.name, rollup
862
+
863
+
864
+ # ============================
865
+ # Replacement feature table + manufacturer link (5G device)
866
+ # ============================
867
+
868
+ FEATURE_COLS = ["Device", "Modem technology", "WiFi", "Ports", "Antennas", "Ruggedness", "Use case"]
869
+
870
+ # Manufacturer domains used for best-effort link resolution (no non-maker domains).
871
+ MAKER_DOMAINS = {
872
+ "CRADLEPOINT": ["cradlepoint.com", "ericsson.com"],
873
+ "SIERRA": ["semtech.com", "airlink.com"],
874
+ "FEENEY": ["inseego.com"],
875
+ "DIGI": ["digi.com"],
876
+ "CISCO_MERAKI": ["meraki.cisco.com", "cisco.com"],
877
+ "CISCO": ["cisco.com"],
878
+ "TELTONIKA": ["teltonika-networks.com"],
879
+ "UNKNOWN": [],
880
+ }
881
+
882
+ HTTP_HEADERS = {
883
+ "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
884
+ "(KHTML, like Gecko) Chrome/120.0 Safari/537.36"
885
+ }
886
+ HTTP_TIMEOUT = 12
887
+
888
+ def _best_effort_manufacturer_url(model: str, canon_make: str) -> str:
889
+ \"\"\"Try to find a manufacturer page or datasheet link using simple on-domain searches.
890
+ If we can't confirm a page, return the manufacturer homepage for the maker family.
891
+ \"\"\"
892
+ model = str(model or "").strip()
893
+ if not model or model in {"Not listed", "Not applicable"}:
894
+ return ""
895
+
896
+ domains = MAKER_DOMAINS.get(canon_make, []) or []
897
+ if not domains:
898
+ return ""
899
+
900
+ # Candidate on-domain search URLs (common patterns across sites).
901
+ # We keep these on the manufacturer domain (no Google/Bing).
902
+ q = re.sub(r"\s+", "+", model)
903
+ url_candidates = []
904
+ for d in domains:
905
+ url_candidates += [
906
+ f"https://{d}/search?q={q}",
907
+ f"https://{d}/search?query={q}",
908
+ f"https://{d}/?s={q}",
909
+ f"https://www.{d}/search?q={q}",
910
+ f"https://www.{d}/search?query={q}",
911
+ f"https://www.{d}/?s={q}",
912
+ ]
913
+
914
+ # Also try a few direct product patterns for known makers (best effort).
915
+ if canon_make == "TELTONIKA":
916
+ slug = model.lower()
917
+ url_candidates += [
918
+ f"https://teltonika-networks.com/products/routers/{slug}",
919
+ f"https://teltonika-networks.com/product/{slug}",
920
+ "https://teltonika-networks.com/products/routers/",
921
+ ]
922
+ if canon_make == "DIGI":
923
+ url_candidates += [
924
+ "https://www.digi.com/products/networking/cellular-routers",
925
+ f"https://www.digi.com/search?q={q}",
926
+ ]
927
+ if canon_make == "CRADLEPOINT":
928
+ url_candidates += [
929
+ "https://cradlepoint.com/products/",
930
+ f"https://cradlepoint.com/?s={q}",
931
+ ]
932
+ if canon_make in {"CISCO", "CISCO_MERAKI"}:
933
+ url_candidates += [
934
+ f"https://www.cisco.com/c/en/us/search.html?q={q}",
935
+ ]
936
+
937
+ # Try to confirm a working page (HTTP 200 and model string somewhere in HTML).
938
+ for u in url_candidates[:18]:
939
+ try:
940
+ import requests
941
+ r = requests.get(u, headers=HTTP_HEADERS, timeout=HTTP_TIMEOUT, allow_redirects=True)
942
+ if r.status_code != 200:
943
+ continue
944
+ html = (r.text or "").lower()
945
+ if model.lower() in html or "datasheet" in html or "data sheet" in html:
946
+ return r.url
947
+ except Exception:
948
+ continue
949
+
950
+ # Fallback: maker homepage
951
+ d0 = domains[0]
952
+ return f"https://{d0}"
953
+
954
+ def _features_from_dec(model: str, canon_make: str) -> Dict[str, str]:
955
+ \"\"\"Lookup a router model in dec2025routers.csv and return the key feature fields.\"\"\"
956
+ if not model or model in {"Not listed", "Not applicable"}:
957
+ return {k: "Not listed" for k in FEATURE_COLS[1:]}
958
+
959
+ pool = df_dec[df_dec["_canon_make"] == canon_make].copy()
960
+ if pool.empty:
961
+ return {k: "Not listed" for k in FEATURE_COLS[1:]}
962
+
963
+ hit = process.extractOne(norm_text(model), pool["_norm_model"].tolist(), scorer=fuzz.WRatio)
964
+ if not hit or hit[1] < MATCH_OK:
965
+ return {k: "Not listed" for k in FEATURE_COLS[1:]}
966
+
967
+ r = pool.iloc[int(hit[2])]
968
+ ports = f"WAN: {r.get('WAN ports and speed','')} | LAN: {r.get('LAN ports and speed','')}"
969
+ return {
970
+ "Modem technology": str(r.get("Modem Type","")) or "Not listed",
971
+ "WiFi": str(r.get("WiFi type","")) or "Not listed",
972
+ "Ports": ports.strip() if ports.strip() else "Not listed",
973
+ "Antennas": str(r.get("Antennas (internal/external/both)","")) or "Not listed",
974
+ "Ruggedness": str(r.get("Ruggedization","")) or "Not listed",
975
+ "Use case": str(r.get("Primary use case","")) or "Not listed",
976
+ }
977
+
978
+ def _gpt_fill_feature_row(device_label: str, model: str, canon_make: str, row: Dict[str, str]) -> Dict[str, str]:
979
+ \"\"\"If dec can't supply values, ask GPT to fill missing ones (best guess).\"\"\"
980
+ if client is None:
981
+ return row
982
+
983
+ missing = [k for k,v in row.items() if (not v) or str(v).strip().lower() in {"not listed","nan",""}]
984
+ if not missing:
985
+ return row
986
+
987
+ sys = "Fill missing router feature fields for a Verizon rep. Return strict JSON only."
988
+ payload = {
989
+ "device_label": device_label,
990
+ "model": model,
991
+ "maker_family": canon_make,
992
+ "known": row,
993
+ "fill_only": missing,
994
+ "rules": [
995
+ "Fill only the requested fields.",
996
+ "Best guess if needed. Short phrases only.",
997
+ "Return JSON only."
998
+ ],
999
+ "output_schema": {k: "string" for k in missing}
1000
+ }
1001
+ out = gpt_json(sys, payload, max_tokens=260) or {}
1002
+ for k in missing:
1003
+ val = str(out.get(k, "") or "").strip()
1004
+ if val:
1005
+ row[k] = val
1006
+ return row
1007
+
1008
+ def build_replacement_features_table(repl_4g: str, repl_5g: str, canon_make: str) -> pd.DataFrame:
1009
+ rows = []
1010
+
1011
+ # 4G
1012
+ row4 = _features_from_dec(repl_4g, canon_make)
1013
+ row4 = _gpt_fill_feature_row("4G alternative", repl_4g, canon_make, row4)
1014
+ rows.append({"Device": "4G alternative", **row4})
1015
+
1016
+ # 5G
1017
+ row5 = _features_from_dec(repl_5g, canon_make)
1018
+ row5 = _gpt_fill_feature_row("5G replacement", repl_5g, canon_make, row5)
1019
+ rows.append({"Device": "5G replacement", **row5})
1020
+
1021
+ df = pd.DataFrame(rows, columns=FEATURE_COLS)
1022
+ return df
1023
+
1024
+ # ============================
1025
+ # Output
1026
+ # ============================
1027
+ def assemble_output(life_row: pd.Series, status: str, eos: str, eol: str, repl: Dict[str,Any], ant: Dict[str,Any]) -> str:
1028
+ current_name = f"{life_row.get('sku','')} — {life_row.get('description','')}".strip(" —")
1029
+ st = ant.get("stationary_omni", {})
1030
+ vh = ant.get("vehicle_omni", {})
1031
+
1032
+ lines = []
1033
+ lines.append(f"1. Current device: **{current_name}**")
1034
+ lines.append(f"2. Status: **{status}**")
1035
+ lines.append(f"3. End of Sale date: **{eos}**")
1036
+ lines.append(f"4. End of Life date: **{eol}**")
1037
+ lines.append(f"5. 4G alternative (lifecycle): **{repl.get('repl_4g','Not applicable')}**")
1038
+ lines.append(f"6. 5G replacement (lifecycle): **{repl.get('repl_5g','Not listed')}**")
1039
+ lines.append("7. Antenna options (Parsec-only):")
1040
+ conn_s = f" | Conn: {st.get('connectors','')}" if st.get("connectors") else ""
1041
+ conn_v = f" | Conn: {vh.get('connectors','')}" if vh.get("connectors") else ""
1042
+ lines.append(f" - Stationary (Omni): **{st.get('name','')}** (Part #: {st.get('part_number','')}) — {st.get('description','')} — MIMO: {st.get('mimo','')}{conn_s}")
1043
+ lines.append(f" - Vehicle (Omni): **{vh.get('name','')}** (Part #: {vh.get('part_number','')}) — {vh.get('description','')} — MIMO: {vh.get('mimo','')}{conn_v}")
1044
+
1045
+ lines.append("\nSources (debug):")
1046
+ for s in repl.get("sources", []) if isinstance(repl.get("sources"), list) else []:
1047
+ lines.append(f"- {s}")
1048
+ lines.append("- ParsecCatalog.pdf (local RAG)")
1049
+ lines.append("- routers_eos_eol_by_sku.csv (replacements)")
1050
+ return "\n".join(lines)
1051
+
1052
+
1053
+ # ============================
1054
+ # Gradio callbacks
1055
+ # IMPORTANT: no dict state and ALL events have api_name=False (prevents api_info schema generation)
1056
+ # ============================
1057
+ def run_lookup(user_text: str, st_json: str):
1058
+ user_text = str(user_text or "").strip()
1059
+ if not user_text:
1060
+ return "Enter a router SKU/model.", "", None, gr.update(visible=False), gr.update(visible=False), "{}", ""
1061
+
1062
+ res = resolve_device(user_text)
1063
+
1064
+ if res.get("mode") == "pick":
1065
+ opts = res.get("options", [])
1066
+ choices = [o["label"] for o in opts]
1067
+ st2 = {"mode":"pick","options": opts, "raw": user_text}
1068
+ return "Did you mean A or B? Pick one, then click Use selection.", "", None, gr.update(choices=choices, value=None, visible=True), gr.update(visible=True), state_dump(st2), ""
1069
+
1070
+ if res.get("mode") != "ok":
1071
+ return "Not found.", "", None, gr.update(visible=False), gr.update(visible=False), "{}", ""
1072
+
1073
+ life_row = df_eos.iloc[int(res["row_idx"])]
1074
+ eos, eol, status = row_to_dates_and_status(life_row)
1075
+
1076
+ repl = pick_replacements_lifecycle(life_row, status, use_gpt=True)
1077
+ canon_make = str(life_row.get("_canon_make","UNKNOWN"))
1078
+ mimo = infer_mimo_for_5g(repl.get("repl_5g",""))
1079
+ tech = "5G" if repl.get("repl_5g") and repl.get("repl_5g") != "Not listed" else ("4G" if device_is_4g(life_row) else "Unknown")
1080
+ ant = antenna_options_for(repl.get("repl_5g") or str(life_row.get("sku","")), tech, mimo)
1081
+
1082
+ output = assemble_output(life_row, status, eos, eol, repl, ant)
1083
+ st_out = {"row_idx": int(res["row_idx"]), "repl": repl, "ant": ant, "raw": user_text}
1084
+ url5 = _best_effort_manufacturer_url(repl.get('repl_5g',''), canon_make)
1085
+ link = f"**5G manufacturer page (best effort):** {url5}" if url5 else ""
1086
+ feat_df = build_replacement_features_table(repl.get('repl_4g',''), repl.get('repl_5g',''), canon_make)
1087
+ return output, link, feat_df, gr.update(visible=False), gr.update(visible=False), state_dump(st_out), ""
1088
+
1089
+ def use_selection(selected_label: str, st_json: str):
1090
+ st = state_load(st_json)
1091
+ if not st or st.get("mode") != "pick":
1092
+ return "Run a search first.", "", None, gr.update(visible=False), gr.update(visible=False), "{}", ""
1093
+
1094
+ if not selected_label:
1095
+ return "Pick A or B first.", "", None, gr.update(visible=True), gr.update(visible=True), st_json, ""
1096
+
1097
+ chosen_row = None
1098
+ for o in st.get("options", []):
1099
+ if o.get("label") == selected_label:
1100
+ chosen_row = int(o["row_idx"])
1101
+ break
1102
+ if chosen_row is None:
1103
+ return "Pick a valid option.", "", None, gr.update(visible=True), gr.update(visible=True), st_json, ""
1104
+
1105
+ life_row = df_eos.iloc[int(chosen_row)]
1106
+ eos, eol, status = row_to_dates_and_status(life_row)
1107
+
1108
+ repl = pick_replacements_lifecycle(life_row, status, use_gpt=True)
1109
+ canon_make = str(life_row.get("_canon_make","UNKNOWN"))
1110
+ mimo = infer_mimo_for_5g(repl.get("repl_5g",""))
1111
+ tech = "5G" if repl.get("repl_5g") and repl.get("repl_5g") != "Not listed" else ("4G" if device_is_4g(life_row) else "Unknown")
1112
+ ant = antenna_options_for(repl.get("repl_5g") or str(life_row.get("sku","")), tech, mimo)
1113
+
1114
+ output = assemble_output(life_row, status, eos, eol, repl, ant)
1115
+ st_out = {"row_idx": int(chosen_row), "repl": repl, "ant": ant, "raw": st.get("raw","")}
1116
+ url5 = _best_effort_manufacturer_url(repl.get('repl_5g',''), canon_make)
1117
+ link = f"**5G manufacturer page (best effort):** {url5}" if url5 else ""
1118
+ feat_df = build_replacement_features_table(repl.get('repl_4g',''), repl.get('repl_5g',''), canon_make)
1119
+ return output, link, feat_df, gr.update(visible=False), gr.update(visible=False), state_dump(st_out), ""
1120
+
1121
+ def make_install_ready(st_json: str):
1122
+ st = state_load(st_json)
1123
+ if not st or "row_idx" not in st:
1124
+ return "Run a lookup first."
1125
+ life_row = df_eos.iloc[int(st["row_idx"])]
1126
+ current_sku = str(life_row.get("sku","") or "")
1127
+ return install_ready_checklist(current_sku, st.get("repl", {}) or {}, st.get("ant", {}) or {})
1128
+
1129
+
1130
+ # ============================
1131
+ # UI
1132
+ # ============================
1133
+ with gr.Blocks(title="Only-Routers") as demo:
1134
+ gr.Markdown("## Only-Routers\nSingle lookup + Batch upload for Verizon reps.")
1135
+
1136
+ with gr.Tabs():
1137
+ with gr.Tab("Single"):
1138
+ user_text = gr.Textbox(label="Router SKU or model", placeholder="Examples: IBR650B, AER1600, ES450, WR21, RUT240", lines=1)
1139
+ st = gr.State("{}") # JSON string
1140
+
1141
+ check_btn = gr.Button("Check", variant="primary")
1142
+ pick_dd = gr.Dropdown(label="Pick A or B", choices=[], visible=False)
1143
+ use_btn = gr.Button("Use selection", visible=False)
1144
+
1145
+ output_md = gr.Markdown()
1146
+
1147
+ link_md = gr.Markdown()
1148
+ features_df = gr.Dataframe(headers=FEATURE_COLS, interactive=False, wrap=True)
1149
+
1150
+
1151
+ install_btn = gr.Button("Make install-ready checklist")
1152
+ install_md = gr.Markdown()
1153
+
1154
+ check_btn.click(fn=run_lookup, inputs=[user_text, st], outputs=[output_md, link_md, features_df, pick_dd, use_btn, st, install_md], api_name=False)
1155
+ use_btn.click(fn=use_selection, inputs=[pick_dd, st], outputs=[output_md, link_md, features_df, pick_dd, use_btn, st, install_md], api_name=False)
1156
+ install_btn.click(fn=make_install_ready, inputs=[st], outputs=[install_md], api_name=False)
1157
+
1158
+ with gr.Tab("Batch"):
1159
+ gr.Markdown("Paste one per line or upload a CSV (first column). Batch runs fast (no GPT).")
1160
+ batch_text = gr.Textbox(label="Paste devices (one per line)", lines=8, placeholder="WR21\nRUT240\nIBR650B")
1161
+ batch_file = gr.File(label="Upload CSV", file_types=[".csv"])
1162
+ include_ant = gr.Checkbox(label="Include antenna picks (slower)", value=False)
1163
+ run_btn = gr.Button("Run batch", variant="primary")
1164
+
1165
+ summary_md = gr.Markdown()
1166
+ rollup_md = gr.Markdown()
1167
+ table = gr.Dataframe(interactive=False, wrap=True)
1168
+ dl = gr.File(label="Download results CSV")
1169
+
1170
+ run_btn.click(fn=run_batch, inputs=[batch_text, batch_file, include_ant], outputs=[summary_md, table, dl, rollup_md], api_name=False)
1171
+
1172
+ # IMPORTANT: On Spaces, demo.launch() is correct; do NOT use share=True.
1173
+ demo.launch(show_api=False)
Updates/app_working.py ADDED
@@ -0,0 +1,1005 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import json
4
+ import math
5
+ import hashlib
6
+ import tempfile
7
+ from dataclasses import dataclass
8
+ from datetime import datetime, date
9
+ from typing import Any, Dict, List, Optional, Tuple
10
+
11
+ import numpy as np
12
+ import pandas as pd
13
+
14
+ import fitz # PyMuPDF
15
+ import faiss
16
+ from sentence_transformers import SentenceTransformer
17
+ from rapidfuzz import fuzz, process
18
+
19
+ import gradio as gr
20
+ from openai import OpenAI
21
+
22
+
23
+ # ============================
24
+ # Settings
25
+ # ============================
26
+ TODAY = date(2026, 1, 18)
27
+ OPENAI_MODEL = "gpt-5.2"
28
+ OPENAI_REASONING = {"effort": "high"}
29
+ MATCH_OK = 80
30
+
31
+ EMBED_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
32
+ PARSEC_CONTEXT_BEFORE = 900
33
+ PARSEC_CONTEXT_AFTER = 1600
34
+
35
+
36
+ # ============================
37
+ # OpenAI client (HF Space secret: OPENAI_API_KEY)
38
+ # ============================
39
+ API_KEY = os.getenv("OPENAI_API_KEY", "").strip()
40
+ client = OpenAI(api_key=API_KEY) if API_KEY else None
41
+
42
+ # ----------------------------
43
+ # Gradio state helpers
44
+ # Keep state as a JSON STRING to avoid schema issues on Hugging Face.
45
+ # ----------------------------
46
+ def state_load(st_json: str) -> Dict[str, Any]:
47
+ try:
48
+ if not st_json:
49
+ return {}
50
+ return json.loads(st_json) if isinstance(st_json, str) else {}
51
+ except Exception:
52
+ return {}
53
+
54
+ def state_dump(st: Dict[str, Any]) -> str:
55
+ try:
56
+ return json.dumps(st or {}, ensure_ascii=False)
57
+ except Exception:
58
+ return "{}"
59
+
60
+
61
+
62
+ # ============================
63
+ # Helpers
64
+ # ============================
65
+ def norm_text(s: Any) -> str:
66
+ try:
67
+ if s is None or (isinstance(s, float) and math.isnan(s)) or pd.isna(s):
68
+ return ""
69
+ except Exception:
70
+ pass
71
+ s = str(s).strip().lower()
72
+ s = re.sub(r"[^a-z0-9\s\-\/]", " ", s)
73
+ s = re.sub(r"\s+", " ", s).strip()
74
+ return s
75
+
76
+ def safe_str(v: Any) -> str:
77
+ if v is None or (isinstance(v, float) and pd.isna(v)) or pd.isna(v):
78
+ return ""
79
+ return str(v).strip()
80
+
81
+ def is_5g(modem_type: Any) -> bool:
82
+ s = norm_text(modem_type)
83
+ return ("5g" in s) or ("nr" in s)
84
+
85
+ def json_load_safe(s: str) -> Dict[str, Any]:
86
+ try:
87
+ return json.loads(s)
88
+ except Exception:
89
+ return {}
90
+
91
+ def gpt_json(system: str, payload: Dict[str, Any], max_tokens: int = 600) -> Dict[str, Any]:
92
+ if client is None:
93
+ return {}
94
+ resp = client.responses.create(
95
+ model=OPENAI_MODEL,
96
+ reasoning=OPENAI_REASONING,
97
+ input=[{"role":"system","content":system},{"role":"user","content":json.dumps(payload)}],
98
+ max_output_tokens=max_tokens,
99
+ )
100
+ return json_load_safe(getattr(resp, "output_text", "") or "")
101
+
102
+
103
+ # ============================
104
+ # Load data
105
+ # ============================
106
+ EOS_PATH = "routers_eos_eol_by_sku.csv"
107
+ DEC_PATH = "dec2025routers.csv"
108
+ PARSEC_PDF = "ParsecCatalog.pdf"
109
+
110
+ if not os.path.exists(EOS_PATH):
111
+ raise FileNotFoundError(f"Missing {EOS_PATH} in repo.")
112
+ if not os.path.exists(DEC_PATH):
113
+ raise FileNotFoundError(f"Missing {DEC_PATH} in repo.")
114
+ if not os.path.exists(PARSEC_PDF):
115
+ raise FileNotFoundError(f"Missing {PARSEC_PDF} in repo.")
116
+
117
+ df_eos = pd.read_csv(EOS_PATH).copy()
118
+ df_dec = pd.read_csv(DEC_PATH).copy()
119
+
120
+
121
+ def _canonize_eos_columns(df: pd.DataFrame) -> pd.DataFrame:
122
+ """Normalize lifecycle CSV column names (case-insensitive) and create expected columns."""
123
+ # Map various header spellings to canonical names used by the app
124
+ mapping = {}
125
+ for c in df.columns:
126
+ k = str(c).strip().lower().replace(" ", "_")
127
+ if k in {"sku", "model", "device", "device_sku"}:
128
+ mapping[c] = "sku"
129
+ elif k in {"manufacturer", "make", "vendor"}:
130
+ mapping[c] = "manufacturer"
131
+ elif k in {"device_type", "type"}:
132
+ mapping[c] = "device_type"
133
+ elif k in {"end_of_sale", "eos", "end_sale", "end_of_sales"}:
134
+ mapping[c] = "end_of_sale"
135
+ elif k in {"end_of_life", "eol", "end_life"}:
136
+ mapping[c] = "end_of_life"
137
+ elif k in {"suggested_replacement", "replacement_4g", "lte_replacement", "replacement_lte", "replacement"}:
138
+ mapping[c] = "suggested_replacement"
139
+ elif k in {"advanced_5g_option", "replacement_5g", "fiveg_replacement", "5g_replacement", "upgrade_5g"}:
140
+ mapping[c] = "advanced_5g_option"
141
+ elif k in {"region", "market"}:
142
+ mapping[c] = "region"
143
+ elif k in {"notes", "note"}:
144
+ mapping[c] = "notes"
145
+ elif k in {"description", "device_description", "name"}:
146
+ mapping[c] = "description"
147
+
148
+ df = df.rename(columns=mapping).copy()
149
+
150
+ # Create expected columns if missing
151
+ if "sku" not in df.columns:
152
+ # Try the common capitalized header as a fallback
153
+ if "SKU" in df.columns:
154
+ df["sku"] = df["SKU"].astype(str)
155
+ else:
156
+ df["sku"] = ""
157
+
158
+ if "manufacturer" not in df.columns:
159
+ df["manufacturer"] = ""
160
+
161
+ if "device_type" not in df.columns:
162
+ df["device_type"] = ""
163
+
164
+ if "description" not in df.columns:
165
+ # If the simplified file removed description, use SKU as description (still searchable)
166
+ df["description"] = df["sku"].astype(str)
167
+
168
+ if "notes" not in df.columns:
169
+ df["notes"] = ""
170
+
171
+ if "region" not in df.columns:
172
+ df["region"] = ""
173
+
174
+ if "suggested_replacement" not in df.columns:
175
+ df["suggested_replacement"] = ""
176
+
177
+ if "advanced_5g_option" not in df.columns:
178
+ df["advanced_5g_option"] = ""
179
+
180
+ if "end_of_sale" not in df.columns:
181
+ df["end_of_sale"] = ""
182
+
183
+ if "end_of_life" not in df.columns:
184
+ df["end_of_life"] = ""
185
+
186
+ return df
187
+
188
+ df_eos = _canonize_eos_columns(df_eos)
189
+
190
+
191
+ def region_ok(x: Any) -> bool:
192
+ s = str(x or "").strip().lower()
193
+ if not s:
194
+ return True
195
+ if "not specified" in s:
196
+ return True
197
+ if "north america" in s:
198
+ return True
199
+ if re.search(r"\busa\b", s):
200
+ return True
201
+ if re.search(r"\bunited\s+states\b", s):
202
+ return True
203
+ if re.search(r"\bu\.?s\.?\b", s):
204
+ return True
205
+ return False
206
+
207
+ if "region" in df_eos.columns:
208
+ df_eos = df_eos[df_eos["region"].apply(region_ok)].reset_index(drop=True)
209
+
210
+ # Maker mapping (includes Teltonika)
211
+ CANON_MAKER = {
212
+ "CRADLEPOINT": {"cradlepoint", "ericsson", "ericsson enterprise wireless"},
213
+ "SIERRA": {"sierra", "sierra wireless", "semtech", "airlink"},
214
+ "FEENEY": {"feeney", "feeney wireless", "inseego"},
215
+ "DIGI": {"digi", "accelerated", "accelerated concepts"},
216
+ "CISCO_MERAKI": {"meraki", "cisco meraki"},
217
+ "CISCO": {"cisco"},
218
+ "TELTONIKA": {"teltonika"},
219
+ }
220
+
221
+ def canon_maker_from_text(s: Any) -> str:
222
+ t = norm_text(s)
223
+ for canon, terms in CANON_MAKER.items():
224
+ for term in terms:
225
+ if term in t:
226
+ return canon
227
+ return "UNKNOWN"
228
+
229
+ df_eos["_canon_make"] = df_eos["manufacturer"].apply(canon_maker_from_text) if "manufacturer" in df_eos.columns else "UNKNOWN"
230
+ df_eos["_norm_sku"] = df_eos["sku"].apply(norm_text) if "sku" in df_eos.columns else ""
231
+ df_eos["_norm_desc"] = df_eos["description"].apply(norm_text) if "description" in df_eos.columns else ""
232
+ df_eos["_norm_notes"] = df_eos["notes"].apply(norm_text) if "notes" in df_eos.columns else ""
233
+
234
+ df_dec["_canon_make"] = df_dec["Make"].apply(canon_maker_from_text) if "Make" in df_dec.columns else "UNKNOWN"
235
+ df_dec["_norm_model"] = df_dec["Model"].apply(norm_text) if "Model" in df_dec.columns else ""
236
+ df_dec["_is5g"] = df_dec["Modem Type"].apply(is_5g) if "Modem Type" in df_dec.columns else False
237
+
238
+
239
+ # ============================
240
+ # Date helpers
241
+ # ============================
242
+ @dataclass
243
+ class ParsedDate:
244
+ raw: str
245
+ kind: str
246
+ value: Optional[date]
247
+
248
+ def parse_date_field(x: Any) -> ParsedDate:
249
+ raw = str(x or "").strip()
250
+ if not raw:
251
+ return ParsedDate(raw="", kind="missing", value=None)
252
+
253
+ # Common US formats: M/D/YY or M/D/YYYY (e.g., 6/24/24, 9/30/21)
254
+ for fmt in ("%m/%d/%y", "%m/%d/%Y", "%-m/%-d/%y", "%-m/%-d/%Y"):
255
+ try:
256
+ dt = datetime.strptime(raw, fmt).date()
257
+ return ParsedDate(raw=raw, kind="full", value=dt)
258
+ except Exception:
259
+ pass
260
+
261
+ # ISO-ish: YYYY
262
+ if re.fullmatch(r"\d{4}", raw):
263
+ y = int(raw)
264
+ if y == TODAY.year:
265
+ return ParsedDate(raw=raw, kind="year", value=date(y, 1, 1))
266
+ if y < TODAY.year:
267
+ return ParsedDate(raw=raw, kind="year", value=date(y, 1, 1))
268
+ return ParsedDate(raw=raw, kind="year", value=date(y, 12, 31))
269
+
270
+ # YYYY-MM
271
+ if re.fullmatch(r"\d{4}-\d{2}", raw):
272
+ try:
273
+ y, m = raw.split("-")
274
+ return ParsedDate(raw=raw, kind="year_month", value=date(int(y), int(m), 1))
275
+ except Exception:
276
+ return ParsedDate(raw=raw, kind="bad", value=None)
277
+
278
+ # YYYY-MM-DD
279
+ if re.fullmatch(r"\d{4}-\d{2}-\d{2}", raw):
280
+ try:
281
+ dt = datetime.strptime(raw, "%Y-%m-%d").date()
282
+ return ParsedDate(raw=raw, kind="full", value=dt)
283
+ except Exception:
284
+ return ParsedDate(raw=raw, kind="bad", value=None)
285
+
286
+ # Last resort: leave as raw (unparsed)
287
+ return ParsedDate(raw=raw, kind="bad", value=None)
288
+
289
+ if re.fullmatch(r"\d{4}-\d{2}-\d{2}", raw):
290
+ try:
291
+ dt = datetime.strptime(raw, "%Y-%m-%d").date()
292
+ return ParsedDate(raw=raw, kind="full", value=dt)
293
+ except Exception:
294
+ return ParsedDate(raw=raw, kind="bad", value=None)
295
+
296
+ return ParsedDate(raw=raw, kind="bad", value=None)
297
+
298
+ def display_date(pd_: ParsedDate) -> str:
299
+ if pd_.kind == "missing":
300
+ return "Not listed"
301
+ if pd_.kind == "bad":
302
+ return pd_.raw or "Not listed"
303
+ return pd_.raw
304
+
305
+ def status_from_eos_eol(eos: ParsedDate, eol: ParsedDate) -> str:
306
+ if eos.value is None and eol.value is None:
307
+ return "Unknown"
308
+ if eol.value is not None and eol.value <= TODAY:
309
+ return "End of Life"
310
+ if eos.value is not None and eos.value <= TODAY:
311
+ return "End of Sale"
312
+ return "Active"
313
+
314
+ def row_to_dates_and_status(row: pd.Series) -> Tuple[str, str, str]:
315
+ eos = parse_date_field(row.get("end_of_sale"))
316
+ eol = parse_date_field(row.get("end_of_life"))
317
+ return display_date(eos), display_date(eol), status_from_eos_eol(eos, eol)
318
+
319
+
320
+ # ============================
321
+ # Embeddings + Parsec index
322
+ # ============================
323
+ embedder = SentenceTransformer(EMBED_MODEL_NAME)
324
+
325
+ def extract_pdf_text_pages(path: str) -> List[str]:
326
+ doc = fitz.open(path)
327
+ return [doc[i].get_text("text") for i in range(len(doc))]
328
+
329
+ def build_parsec_cards(pages: List[str]) -> List[str]:
330
+ cards = []
331
+ for p in pages:
332
+ for m in re.finditer(r"Standard\s+SKU:", p):
333
+ start = max(0, m.start() - PARSEC_CONTEXT_BEFORE)
334
+ end = min(len(p), m.start() + PARSEC_CONTEXT_AFTER)
335
+ c = p[start:end].strip()
336
+ if len(c) >= 200:
337
+ cards.append(c)
338
+ out, seen = [], set()
339
+ for c in cards:
340
+ h = hashlib.sha1(c.encode("utf-8")).hexdigest()
341
+ if h not in seen:
342
+ seen.add(h); out.append(c)
343
+ return out
344
+
345
+ parsec_cards = build_parsec_cards(extract_pdf_text_pages(PARSEC_PDF))
346
+ parsec_emb = embedder.encode(parsec_cards, batch_size=64, show_progress_bar=False, normalize_embeddings=True)
347
+ parsec_emb = np.asarray(parsec_emb, dtype=np.float32)
348
+ parsec_index = faiss.IndexFlatIP(parsec_emb.shape[1])
349
+ parsec_index.add(parsec_emb)
350
+
351
+
352
+ # ============================
353
+ # Device resolution
354
+ # ============================
355
+ def label_for_row(i: int) -> str:
356
+ r = df_eos.iloc[i]
357
+ return f"{r.get('sku','')} — {r.get('manufacturer','')} — {r.get('description','')}"[:220]
358
+
359
+ EOS_LABELS = [label_for_row(i) for i in range(len(df_eos))]
360
+ EOS_CORPUS = []
361
+ for _, r in df_eos.iterrows():
362
+ EOS_CORPUS.append(" ".join([r.get("_norm_sku",""), r.get("_canon_make",""), r.get("_norm_desc",""), r.get("_norm_notes","")]))
363
+
364
+ def local_candidates(query: str, top_k: int = 6) -> List[Tuple[int, int, str]]:
365
+ q = norm_text(query)
366
+ hits = process.extract(q, EOS_CORPUS, scorer=fuzz.WRatio, limit=top_k)
367
+ return [(int(idx), int(score), EOS_LABELS[int(idx)]) for _, score, idx in hits]
368
+
369
+ def gpt_choose_device(user_text: str, candidates: List[Tuple[int,int,str]]) -> Dict[str, Any]:
370
+ if client is None:
371
+ return {}
372
+ sys = "Pick which router the user meant. Never invent. Return strict JSON only."
373
+ payload = {
374
+ "user_input": user_text,
375
+ "candidates": [{"row_idx": i, "score": s, "label": lbl} for (i,s,lbl) in candidates],
376
+ "rules": [
377
+ "If one is clearly correct, return mode='ok' with row_idx.",
378
+ "If two are plausible, return mode='pick' with top 2 options."
379
+ ],
380
+ "output_schema": {"mode":"ok|pick","row_idx":"int","options":[{"row_idx":"int","label":"string"}]}
381
+ }
382
+ return gpt_json(sys, payload, max_tokens=280)
383
+
384
+ def resolve_device(user_text: str) -> Dict[str, Any]:
385
+ q = norm_text(user_text)
386
+ exact = df_eos.index[df_eos["_norm_sku"] == q].tolist()
387
+ if len(exact) == 1:
388
+ return {"mode":"ok","row_idx": int(exact[0])}
389
+ if len(exact) > 1:
390
+ opts = [{"row_idx": int(i), "label": EOS_LABELS[int(i)]} for i in exact[:2]]
391
+ return {"mode":"pick","options": opts}
392
+
393
+ cands = local_candidates(user_text, top_k=6)
394
+ if not cands:
395
+ return {"mode":"not_found"}
396
+
397
+ if cands[0][1] >= 95 and (len(cands) == 1 or (cands[0][1] - cands[1][1]) >= 8):
398
+ return {"mode":"ok","row_idx": cands[0][0]}
399
+
400
+ g = gpt_choose_device(user_text, cands)
401
+ if g.get("mode") == "ok" and isinstance(g.get("row_idx"), int):
402
+ return {"mode":"ok","row_idx": int(g["row_idx"])}
403
+
404
+ if g.get("mode") == "pick":
405
+ opts = g.get("options", []) or []
406
+ opts2 = [{"row_idx": int(o["row_idx"]), "label": str(o["label"])} for o in opts[:2] if "row_idx" in o]
407
+ if opts2:
408
+ return {"mode":"pick","options": opts2}
409
+
410
+ if len(cands) > 1:
411
+ return {"mode":"pick","options":[{"row_idx":cands[0][0],"label":cands[0][2]},{"row_idx":cands[1][0],"label":cands[1][2]}]}
412
+ return {"mode":"pick","options":[{"row_idx":cands[0][0],"label":cands[0][2]}]}
413
+
414
+
415
+ # ============================
416
+ # Replacements — lifecycle CSV source of truth
417
+ # ============================
418
+ def extract_model_token(text: str) -> str:
419
+ s = safe_str(text)
420
+ if not s:
421
+ return ""
422
+ parts = [p.strip() for p in s.split("|") if p.strip()]
423
+ candidates = parts[::-1] if parts else [s]
424
+ for cand in candidates:
425
+ m = re.search(r"\bRUT[A-Z]?\d{2,4}\b", cand.upper())
426
+ if m:
427
+ return m.group(0).upper()
428
+ m = re.search(r"\bIX\d{2}\b", cand, flags=re.IGNORECASE)
429
+ if m:
430
+ return m.group(0).upper()
431
+ m = re.search(r"\b(R\d{3,4}|E\d{3,4}|S\d{3,4})\b", cand, flags=re.IGNORECASE)
432
+ if m:
433
+ return m.group(0).upper()
434
+ m = re.search(r"\b[A-Z]{1,6}\d{2,4}[A-Z]?\b", cand.upper())
435
+ if m:
436
+ return m.group(0).upper()
437
+ return candidates[0][:60]
438
+
439
+ def device_is_4g(row: pd.Series) -> bool:
440
+ # Detect LTE/4G even when the description uses "Cat 4 / Cat6 / Cat 12" without saying "LTE"
441
+ t = norm_text(row.get("description","")) + " " + norm_text(row.get("notes","")) + " " + norm_text(row.get("sku",""))
442
+
443
+ # If it explicitly says 5G/NR, treat as not 4G-only
444
+ if ("5g" in t) or ("nr" in t):
445
+ return False
446
+
447
+ # Classic signals
448
+ if ("lte" in t) or ("4g" in t):
449
+ return True
450
+
451
+ # LTE category signals (Cat 1..20 are LTE categories; Cat M1/M2 are LTE-M)
452
+ if re.search(r"\bcat\s*[-]?\s*(m1|m2)\b", t):
453
+ return True
454
+
455
+ m = re.search(r"\bcat\s*[-]?\s*(\d{1,2})\b", t)
456
+ if m:
457
+ try:
458
+ cat = int(m.group(1))
459
+ if 0 < cat <= 20:
460
+ return True
461
+ except Exception:
462
+ pass
463
+
464
+ # If "cat" appears at all, it's almost always LTE-family
465
+ if "cat" in t:
466
+ return True
467
+
468
+ return False
469
+
470
+ # If it explicitly says 5G/NR, treat as not 4G-only
471
+ if ("5g" in t) or ("nr" in t):
472
+ return False
473
+
474
+ # Classic signals
475
+ if ("lte" in t) or ("4g" in t):
476
+ return True
477
+
478
+ # LTE category signals (Cat 1..20 are LTE categories; Cat M1/M2 are LTE-M)
479
+ if re.search(r"\bcat\s*[-]?\s*(m1|m2)\b", t):
480
+ return True
481
+
482
+ m = re.search(r"\bcat\s*[-]?\s*(\d{1,2})\b", t)
483
+ if m:
484
+ try:
485
+ cat = int(m.group(1))
486
+ if 0 < cat <= 20:
487
+ return True
488
+ except Exception:
489
+ pass
490
+
491
+ # If "cat" appears at all, it's almost always LTE-family
492
+ if "cat" in t:
493
+ return True
494
+
495
+ return False
496
+
497
+
498
+ def candidate_5g_models_from_lifecycle(manufacturer: str) -> List[str]:
499
+ mfr = norm_text(manufacturer)
500
+ pool = df_eos[df_eos["manufacturer"].astype(str).str.lower().eq(mfr)].copy() if "manufacturer" in df_eos.columns else df_eos.copy()
501
+ vals = pool["advanced_5g_option"].tolist() if "advanced_5g_option" in pool.columns else []
502
+ out, seen = [], set()
503
+ for v in vals:
504
+ tok = extract_model_token(v)
505
+ if tok and tok.lower() != "nan" and tok not in seen:
506
+ seen.add(tok); out.append(tok)
507
+ return out
508
+
509
+ def candidate_4g_models_from_lifecycle(manufacturer: str) -> List[str]:
510
+ mfr = norm_text(manufacturer)
511
+ pool = df_eos[df_eos["manufacturer"].astype(str).str.lower().eq(mfr)].copy() if "manufacturer" in df_eos.columns else df_eos.copy()
512
+ vals = pool["suggested_replacement"].tolist() if "suggested_replacement" in pool.columns else []
513
+ out, seen = [], set()
514
+ for v in vals:
515
+ tok = extract_model_token(v)
516
+ if tok and tok.lower() != "nan" and tok not in seen:
517
+ seen.add(tok); out.append(tok)
518
+ return out
519
+
520
+ def gpt_pick_from_candidates(old_row: pd.Series, candidates: List[str], need: str) -> str:
521
+ if client is None or not candidates:
522
+ return ""
523
+ sys = "Pick the best replacement model. Choose only from candidates. Return strict JSON only."
524
+ payload = {
525
+ "old_device": {
526
+ "sku": str(old_row.get("sku","")),
527
+ "manufacturer": str(old_row.get("manufacturer","")),
528
+ "description": str(old_row.get("description","")),
529
+ "need": need,
530
+ },
531
+ "candidates": candidates[:40],
532
+ "output_schema": {"choice":"string"}
533
+ }
534
+ out = gpt_json(sys, payload, max_tokens=240) or {}
535
+ choice = str(out.get("choice","") or "").strip()
536
+ return choice if choice in candidates else ""
537
+
538
+ def fallback_5g_from_dec(canon_make: str) -> str:
539
+ pool5 = df_dec[(df_dec["_canon_make"] == canon_make) & (df_dec["_is5g"] == True)]
540
+ return str(pool5.iloc[0]["Model"]).strip() if not pool5.empty else ""
541
+
542
+ def pick_replacements_lifecycle(row: pd.Series, status: str, use_gpt: bool = True) -> Dict[str, Any]:
543
+ canon = str(row.get("_canon_make","UNKNOWN"))
544
+ manufacturer = str(row.get("manufacturer","") or "")
545
+
546
+ sug_raw = safe_str(row.get("suggested_replacement",""))
547
+ adv_raw = safe_str(row.get("advanced_5g_option",""))
548
+
549
+ has_4g_alt = bool(sug_raw.strip())
550
+ has_5g_alt = bool(adv_raw.strip())
551
+
552
+ # Treat as 4G if the description indicates LTE OR lifecycle provides a 4G suggested replacement
553
+ is_4g = device_is_4g(row) or has_4g_alt
554
+
555
+ # Provide 5G option if the unit is 4G, EOS/EOL, or lifecycle explicitly provides advanced_5g_option
556
+ want_5g = is_4g or (status in {"End of Sale","End of Life"}) or has_5g_alt
557
+
558
+ # 4G alternative: show whenever lifecycle provides it (or device appears 4G)
559
+ repl_4g = "Not applicable"
560
+ if is_4g or has_4g_alt:
561
+ repl_4g = extract_model_token(sug_raw)
562
+ if not repl_4g:
563
+ cand4 = candidate_4g_models_from_lifecycle(manufacturer)
564
+ repl_4g = (gpt_pick_from_candidates(row, cand4, "4G alternative") if (use_gpt and client) else "") or (cand4[0] if cand4 else "")
565
+ if not repl_4g:
566
+ repl_4g = "Not applicable"
567
+
568
+ # 5G replacement: prefer lifecycle advanced_5g_option whenever present
569
+ repl_5g = "Not listed"
570
+ if want_5g:
571
+ repl_5g = extract_model_token(adv_raw)
572
+ if not repl_5g:
573
+ cand5 = candidate_5g_models_from_lifecycle(manufacturer)
574
+ repl_5g = (gpt_pick_from_candidates(row, cand5, "5G replacement/upgrade") if (use_gpt and client) else "") or (cand5[0] if cand5 else "")
575
+ if not repl_5g:
576
+ repl_5g = fallback_5g_from_dec(canon) or "Not listed"
577
+
578
+ if repl_5g.lower() == "nan":
579
+ repl_5g = "Not listed"
580
+
581
+ return {"repl_4g": repl_4g, "repl_5g": repl_5g, "sources": ["lifecycle_csv"] + (["gpt"] if (use_gpt and client) else [])}
582
+
583
+
584
+ # ============================
585
+ # Antennas (Parsec-only)
586
+ # ============================
587
+ PARSEC_FAMILY_WORDS = {"chinook","labrador","boxer","bloodhound","husky","beagle","mastiff","collie","shepherd","belgian","australian","terrier","pyrenees"}
588
+ BAD_NAME_MARKERS = {"customization","standard connectors","connectors","features","benefits","specifications","mechanical","electrical","mounting","accessories","description:","standard sku"}
589
+
590
+ def clean_line(s: str) -> str:
591
+ s = re.sub(r"\s+", " ", str(s or "").strip())
592
+ if re.fullmatch(r"-[a-z0-9]+", s.lower()):
593
+ return ""
594
+ return s
595
+
596
+ def is_bad_name_line(line: str) -> bool:
597
+ low = line.lower()
598
+ if any(m in low for m in BAD_NAME_MARKERS):
599
+ return True
600
+ if re.search(r"\b-[a-z0-9]{1,4}\b", low) and len(low) <= 25:
601
+ return True
602
+ return False
603
+
604
+ def family_from_line(line: str) -> str:
605
+ low = line.lower()
606
+ for fam in PARSEC_FAMILY_WORDS:
607
+ if fam in low:
608
+ return fam.capitalize()
609
+ return ""
610
+
611
+ def parsec_connectors_from_card(t: str) -> str:
612
+ m = re.search(r"Standard\s+Connectors:\s*(.+)", t, flags=re.IGNORECASE)
613
+ if m:
614
+ return re.sub(r"\s+", " ", m.group(1).strip())[:80]
615
+ return ""
616
+
617
+ def parsec_mounts_from_card(t: str) -> List[str]:
618
+ mounts = []
619
+ for m in re.finditer(r"Mount:\s*(.+)", t, flags=re.IGNORECASE):
620
+ val = re.sub(r"\s+", " ", m.group(1).strip())
621
+ parts = [p.strip().lower() for p in val.split(",") if p.strip()]
622
+ mounts.extend(parts)
623
+ out = []
624
+ seen = set()
625
+ for x in mounts:
626
+ if x not in seen:
627
+ seen.add(x); out.append(x)
628
+ return out
629
+
630
+ def parsec_name_from_card(card_text: str) -> str:
631
+ lines = [clean_line(ln) for ln in str(card_text or "").splitlines()]
632
+ lines = [ln for ln in lines if ln]
633
+
634
+ for ln in lines:
635
+ if is_bad_name_line(ln):
636
+ continue
637
+ fam = family_from_line(ln)
638
+ if fam:
639
+ return fam
640
+
641
+ sku_i = None
642
+ for i, ln in enumerate(lines):
643
+ if "standard sku" in ln.lower():
644
+ sku_i = i
645
+ break
646
+ if sku_i is not None:
647
+ window = lines[max(0, sku_i - 12):sku_i]
648
+ for ln in reversed(window):
649
+ if is_bad_name_line(ln):
650
+ continue
651
+ if 3 <= len(ln) <= 40 and re.search(r"[A-Za-z]", ln):
652
+ return ln.split()[0].capitalize()
653
+
654
+ return "Parsec antenna"
655
+
656
+ def parsec_part_from_card(t: str) -> str:
657
+ m = re.search(r"Standard\s+SKU:\s*([A-Z0-9]+)", t)
658
+ return m.group(1).strip() if m else ""
659
+
660
+ def parsec_desc_from_card(t: str) -> str:
661
+ m = re.search(r"Description:\s*(.+?)(?:\n|$)", t, flags=re.IGNORECASE)
662
+ return re.sub(r"\s+"," ",m.group(1).strip())[:220] if m else ""
663
+
664
+ def parsec_retrieve(query: str, top_k: int = 12) -> List[Dict[str, Any]]:
665
+ qv = embedder.encode([query], normalize_embeddings=True)
666
+ qv = np.asarray(qv, dtype=np.float32)
667
+ scores, ids = parsec_index.search(qv, top_k)
668
+ out: List[Dict[str, Any]] = []
669
+ for sc, i in zip(scores[0].tolist(), ids[0].tolist()):
670
+ if 0 <= int(i) < len(parsec_cards):
671
+ card = parsec_cards[int(i)]
672
+ out.append({
673
+ "score": float(sc),
674
+ "name": parsec_name_from_card(card),
675
+ "part_number": parsec_part_from_card(card),
676
+ "description": parsec_desc_from_card(card),
677
+ "connectors": parsec_connectors_from_card(card),
678
+ "mounts": parsec_mounts_from_card(card),
679
+ "_card": card.lower(),
680
+ })
681
+ return out
682
+
683
+ def choose_best_parsec(cands: List[Dict[str, Any]], mode: str) -> Dict[str, Any]:
684
+ best = None
685
+ best_score = -1e9
686
+
687
+ for c in cands:
688
+ card = c.get("_card","")
689
+ mounts = c.get("mounts", []) or []
690
+ score = float(c.get("score", 0.0))
691
+
692
+ if "omni" in card:
693
+ score += 0.6
694
+ if "directional" in card:
695
+ score -= 1.5
696
+
697
+ if mode == "vehicle":
698
+ if any("magnetic" in m for m in mounts):
699
+ score += 3.0
700
+ if any("through" in m for m in mounts):
701
+ score += 2.0
702
+ if any("wall" in m for m in mounts) or any("pole" in m for m in mounts):
703
+ score -= 1.2
704
+ if "app: fixed" in card and "mobile" not in card:
705
+ score -= 2.0
706
+
707
+ if mode == "stationary":
708
+ if any("wall" in m for m in mounts):
709
+ score += 2.0
710
+ if any("pole" in m for m in mounts):
711
+ score += 1.8
712
+
713
+ if score > best_score:
714
+ best_score = score
715
+ best = c
716
+
717
+ if not best:
718
+ return {"name":"Parsec antenna","part_number":"","description":"","connectors":"","mounts":[]}
719
+
720
+ best = dict(best)
721
+ best.pop("_card", None)
722
+ return best
723
+
724
+
725
+ def infer_mimo_for_5g(model: str, canon_make: str) -> str:
726
+ """Best-effort MIMO guess for antenna selection (2x2 vs 4x4)."""
727
+ # If model is unknown, default to 2x2 (safer ordering)
728
+ if not model or model in {"Not applicable", "Not listed"}:
729
+ return "2x2"
730
+
731
+ # If the model name hints 5G, lean 4x4
732
+ if "5g" in model.lower() or model.upper().startswith(("R", "E", "S", "IX", "RUTM")):
733
+ default = "4x4"
734
+ else:
735
+ default = "2x2"
736
+
737
+ # Use dec2025routers.csv if we can match the model under the same maker family
738
+ try:
739
+ pool = df_dec[df_dec["_canon_make"] == canon_make].copy()
740
+ if pool.empty:
741
+ return default
742
+ hit = process.extractOne(norm_text(model), pool["_norm_model"].tolist(), scorer=fuzz.WRatio)
743
+ if not hit or hit[1] < MATCH_OK:
744
+ return default
745
+ row = pool.iloc[int(hit[2])]
746
+ txt2 = (str(row.get("Antennas (internal/external/both)", "")) + " " + str(row.get("Modem Type", "")) + " " + str(row.get("Special notes",""))).lower()
747
+ if "4x4" in txt2 or "4 x 4" in txt2 or "4x 4" in txt2:
748
+ return "4x4"
749
+ if "2x2" in txt2 or "2 x 2" in txt2:
750
+ return "2x2"
751
+ # If modem type includes 5G, lean 4x4
752
+ if "5g" in txt2 or "nr" in txt2:
753
+ return "4x4"
754
+ return default
755
+ except Exception:
756
+ return default
757
+
758
+ def antenna_options_for(router_model: str, tech: str, mimo: str) -> Dict[str, Any]:
759
+ q_stationary = f"{router_model} {tech} {mimo} omni stationary pole wall fixed site Parsec"
760
+ q_vehicle = f"{router_model} {tech} {mimo} omni vehicle mobile magnetic through-bolt Parsec"
761
+
762
+ cand_stationary = parsec_retrieve(q_stationary, top_k=12)
763
+ cand_vehicle = parsec_retrieve(q_vehicle, top_k=12)
764
+
765
+ s = choose_best_parsec(cand_stationary, mode="stationary")
766
+ v = choose_best_parsec(cand_vehicle, mode="vehicle")
767
+
768
+ s.update({"mimo": mimo, "why": "Stationary omni best match."})
769
+ v.update({"mimo": mimo, "why": "Vehicle omni best match."})
770
+
771
+ return {"stationary_omni": s, "vehicle_omni": v, "sources":["parsec_rag"]}
772
+
773
+
774
+ # ============================
775
+ # Install-ready checklist
776
+ # ============================
777
+ def install_ready_checklist(current_sku: str, repl: Dict[str,Any], ant: Dict[str,Any]) -> str:
778
+ st = ant.get("stationary_omni", {})
779
+ vh = ant.get("vehicle_omni", {})
780
+ if client is not None:
781
+ sys = "Create a short, install-ready checklist for a Verizon rep. Return markdown only."
782
+ payload = {"current_device": current_sku, "replacements": repl, "antennas": {"stationary": st, "vehicle": vh}}
783
+ resp = client.responses.create(
784
+ model=OPENAI_MODEL,
785
+ reasoning=OPENAI_REASONING,
786
+ input=[{"role":"system","content":sys},{"role":"user","content":json.dumps(payload)}],
787
+ max_output_tokens=520,
788
+ )
789
+ return (getattr(resp, "output_text", "") or "").strip()
790
+ return "\n".join([
791
+ "### Install-ready checklist",
792
+ f"- Current device: {current_sku}",
793
+ f"- 5G replacement: {repl.get('repl_5g','')}",
794
+ f"- 4G alternative: {repl.get('repl_4g','Not applicable')}",
795
+ f"- Stationary omni antenna: {st.get('name','')} (PN {st.get('part_number','')})",
796
+ f"- Vehicle omni antenna: {vh.get('name','')} (PN {vh.get('part_number','')})",
797
+ "- Next steps: confirm mounting + cable lengths + power; place order; schedule install.",
798
+ ])
799
+
800
+
801
+ # ============================
802
+ # Batch mode (NO GPT)
803
+ # ============================
804
+ def parse_batch_inputs(text_blob: str, file_obj: Any) -> List[str]:
805
+ items: List[str] = []
806
+ if file_obj is not None:
807
+ try:
808
+ path = file_obj.name if hasattr(file_obj, "name") else str(file_obj)
809
+ df = pd.read_csv(path)
810
+ col = df.columns[0]
811
+ items.extend([str(x).strip() for x in df[col].tolist() if str(x).strip()])
812
+ except Exception:
813
+ pass
814
+ if text_blob:
815
+ for ln in str(text_blob).splitlines():
816
+ ln = ln.strip()
817
+ if ln:
818
+ items.append(ln)
819
+ seen=set()
820
+ out=[]
821
+ for x in items:
822
+ k=norm_text(x)
823
+ if k and k not in seen:
824
+ seen.add(k); out.append(x)
825
+ return out
826
+
827
+ def run_batch(text_blob: str, file_obj: Any, include_antennas: bool):
828
+ inputs = parse_batch_inputs(text_blob, file_obj)
829
+ if not inputs:
830
+ return "", None, None, ""
831
+
832
+ rows=[]
833
+ for item in inputs:
834
+ res = resolve_device(item)
835
+ if res.get("mode") != "ok":
836
+ rows.append({"Input": item, "Matched":"", "Status":"Needs review", "EOS":"", "EOL":"", "4G alternative":"", "5G replacement":"", "Notes":"Not found/ambiguous"})
837
+ continue
838
+
839
+ life_row = df_eos.iloc[int(res["row_idx"])]
840
+ eos, eol, status = row_to_dates_and_status(life_row)
841
+ repl = pick_replacements_lifecycle(life_row, status, use_gpt=False)
842
+
843
+ rows.append({
844
+ "Input": item,
845
+ "Matched": str(life_row.get("sku","")),
846
+ "Status": status,
847
+ "EOS": eos,
848
+ "EOL": eol,
849
+ "4G alternative": repl.get("repl_4g",""),
850
+ "5G replacement": repl.get("repl_5g",""),
851
+ "Notes": "",
852
+ })
853
+
854
+ out_df = pd.DataFrame(rows)
855
+ counts = out_df["Status"].value_counts(dropna=False).to_dict()
856
+ top_5g = out_df["5G replacement"].value_counts(dropna=False).head(5).to_dict()
857
+ summary = f"Rows: {len(out_df)} | " + " | ".join([f"{k}: {v}" for k,v in counts.items()])
858
+ rollup = "Top 5G recommendations:\n" + "\n".join([f"- {k}: {v}" for k,v in top_5g.items() if str(k).strip()])
859
+
860
+ tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
861
+ out_df.to_csv(tmp.name, index=False)
862
+
863
+ return summary, out_df, tmp.name, rollup
864
+
865
+
866
+ # ============================
867
+ # Output
868
+ # ============================
869
+ def assemble_output(life_row: pd.Series, status: str, eos: str, eol: str, repl: Dict[str,Any], ant: Dict[str,Any]) -> str:
870
+ current_name = f"{life_row.get('sku','')} — {life_row.get('description','')}".strip(" —")
871
+ st = ant.get("stationary_omni", {})
872
+ vh = ant.get("vehicle_omni", {})
873
+
874
+ lines = []
875
+ lines.append(f"1. Current device: **{current_name}**")
876
+ lines.append(f"2. Status: **{status}**")
877
+ lines.append(f"3. End of Sale date: **{eos}**")
878
+ lines.append(f"4. End of Life date: **{eol}**")
879
+ lines.append(f"5. 4G alternative (lifecycle): **{repl.get('repl_4g','Not applicable')}**")
880
+ lines.append(f"6. 5G replacement (lifecycle): **{repl.get('repl_5g','Not listed')}**")
881
+ lines.append("7. Antenna options (Parsec-only):")
882
+ conn_s = f" | Conn: {st.get('connectors','')}" if st.get("connectors") else ""
883
+ conn_v = f" | Conn: {vh.get('connectors','')}" if vh.get("connectors") else ""
884
+ lines.append(f" - Stationary (Omni): **{st.get('name','')}** (Part #: {st.get('part_number','')}) — {st.get('description','')} — MIMO: {st.get('mimo','')}{conn_s}")
885
+ lines.append(f" - Vehicle (Omni): **{vh.get('name','')}** (Part #: {vh.get('part_number','')}) — {vh.get('description','')} — MIMO: {vh.get('mimo','')}{conn_v}")
886
+
887
+ lines.append("\nSources (debug):")
888
+ for s in repl.get("sources", []) if isinstance(repl.get("sources"), list) else []:
889
+ lines.append(f"- {s}")
890
+ lines.append("- ParsecCatalog.pdf (local RAG)")
891
+ lines.append("- routers_eos_eol_by_sku.csv (replacements)")
892
+ return "\n".join(lines)
893
+
894
+
895
+ # ============================
896
+ # Gradio callbacks
897
+ # IMPORTANT: no dict state and ALL events have api_name=False (prevents api_info schema generation)
898
+ # ============================
899
+ def run_lookup(user_text: str, st_json: str):
900
+ user_text = str(user_text or "").strip()
901
+ if not user_text:
902
+ return "Enter a router SKU/model.", gr.update(visible=False), gr.update(visible=False), "{}", ""
903
+
904
+ res = resolve_device(user_text)
905
+
906
+ if res.get("mode") == "pick":
907
+ opts = res.get("options", [])
908
+ choices = [o["label"] for o in opts]
909
+ st2 = {"mode":"pick","options": opts, "raw": user_text}
910
+ return "Did you mean A or B? Pick one, then click Use selection.", gr.update(choices=choices, value=None, visible=True), gr.update(visible=True), state_dump(st2), ""
911
+
912
+ if res.get("mode") != "ok":
913
+ return "Not found.", gr.update(visible=False), gr.update(visible=False), "{}", ""
914
+
915
+ life_row = df_eos.iloc[int(res["row_idx"])]
916
+ eos, eol, status = row_to_dates_and_status(life_row)
917
+
918
+ repl = pick_replacements_lifecycle(life_row, status, use_gpt=True)
919
+ canon_make = str(life_row.get("_canon_make","UNKNOWN"))
920
+ mimo = infer_mimo_for_5g(repl.get("repl_5g",""), canon_make)
921
+ tech = "5G" if repl.get("repl_5g") and repl.get("repl_5g") != "Not listed" else ("4G" if device_is_4g(life_row) else "Unknown")
922
+ ant = antenna_options_for(repl.get("repl_5g") or str(life_row.get("sku","")), tech, mimo)
923
+
924
+ output = assemble_output(life_row, status, eos, eol, repl, ant)
925
+ st_out = {"row_idx": int(res["row_idx"]), "repl": repl, "ant": ant, "raw": user_text}
926
+ return output, gr.update(visible=False), gr.update(visible=False), state_dump(st_out), ""
927
+
928
+ def use_selection(selected_label: str, st_json: str):
929
+ st = state_load(st_json)
930
+ if not st or st.get("mode") != "pick":
931
+ return "Run a search first.", gr.update(visible=False), gr.update(visible=False), "{}", ""
932
+
933
+ if not selected_label:
934
+ return "Pick A or B first.", gr.update(visible=True), gr.update(visible=True), st_json, ""
935
+
936
+ chosen_row = None
937
+ for o in st.get("options", []):
938
+ if o.get("label") == selected_label:
939
+ chosen_row = int(o["row_idx"])
940
+ break
941
+ if chosen_row is None:
942
+ return "Pick a valid option.", gr.update(visible=True), gr.update(visible=True), st_json, ""
943
+
944
+ life_row = df_eos.iloc[int(chosen_row)]
945
+ eos, eol, status = row_to_dates_and_status(life_row)
946
+
947
+ repl = pick_replacements_lifecycle(life_row, status, use_gpt=True)
948
+ canon_make = str(life_row.get("_canon_make","UNKNOWN"))
949
+ mimo = infer_mimo_for_5g(repl.get("repl_5g",""), canon_make)
950
+ tech = "5G" if repl.get("repl_5g") and repl.get("repl_5g") != "Not listed" else ("4G" if device_is_4g(life_row) else "Unknown")
951
+ ant = antenna_options_for(repl.get("repl_5g") or str(life_row.get("sku","")), tech, mimo)
952
+
953
+ output = assemble_output(life_row, status, eos, eol, repl, ant)
954
+ st_out = {"row_idx": int(chosen_row), "repl": repl, "ant": ant, "raw": st.get("raw","")}
955
+ return output, gr.update(visible=False), gr.update(visible=False), state_dump(st_out), ""
956
+
957
+ def make_install_ready(st_json: str):
958
+ st = state_load(st_json)
959
+ if not st or "row_idx" not in st:
960
+ return "Run a lookup first."
961
+ life_row = df_eos.iloc[int(st["row_idx"])]
962
+ current_sku = str(life_row.get("sku","") or "")
963
+ return install_ready_checklist(current_sku, st.get("repl", {}) or {}, st.get("ant", {}) or {})
964
+
965
+
966
+ # ============================
967
+ # UI
968
+ # ============================
969
+ with gr.Blocks(title="Only-Routers") as demo:
970
+ gr.Markdown("## Only-Routers\nSingle lookup + Batch upload for Verizon reps.")
971
+
972
+ with gr.Tabs():
973
+ with gr.Tab("Single"):
974
+ user_text = gr.Textbox(label="Router SKU or model", placeholder="Examples: IBR650B, AER1600, ES450, WR21, RUT240", lines=1)
975
+ st = gr.State("{}") # JSON string
976
+
977
+ check_btn = gr.Button("Check", variant="primary")
978
+ pick_dd = gr.Dropdown(label="Pick A or B", choices=[], visible=False)
979
+ use_btn = gr.Button("Use selection", visible=False)
980
+
981
+ output_md = gr.Markdown()
982
+
983
+ install_btn = gr.Button("Make install-ready checklist")
984
+ install_md = gr.Markdown()
985
+
986
+ check_btn.click(fn=run_lookup, inputs=[user_text, st], outputs=[output_md, pick_dd, use_btn, st, install_md], api_name=False)
987
+ use_btn.click(fn=use_selection, inputs=[pick_dd, st], outputs=[output_md, pick_dd, use_btn, st, install_md], api_name=False)
988
+ install_btn.click(fn=make_install_ready, inputs=[st], outputs=[install_md], api_name=False)
989
+
990
+ with gr.Tab("Batch"):
991
+ gr.Markdown("Paste one per line or upload a CSV (first column). Batch runs fast (no GPT).")
992
+ batch_text = gr.Textbox(label="Paste devices (one per line)", lines=8, placeholder="WR21\nRUT240\nIBR650B")
993
+ batch_file = gr.File(label="Upload CSV", file_types=[".csv"])
994
+ include_ant = gr.Checkbox(label="Include antenna picks (slower)", value=False)
995
+ run_btn = gr.Button("Run batch", variant="primary")
996
+
997
+ summary_md = gr.Markdown()
998
+ rollup_md = gr.Markdown()
999
+ table = gr.Dataframe(interactive=False, wrap=True)
1000
+ dl = gr.File(label="Download results CSV")
1001
+
1002
+ run_btn.click(fn=run_batch, inputs=[batch_text, batch_file, include_ant], outputs=[summary_md, table, dl, rollup_md], api_name=False)
1003
+
1004
+ # IMPORTANT: On Spaces, demo.launch() is correct; do NOT use share=True.
1005
+ demo.launch(show_api=False)
Updates/only-routers_ai_poc_hf_fixed_v7.ipynb ADDED
@@ -0,0 +1,1207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "b10cd58a",
6
+ "metadata": {},
7
+ "source": [
8
+ "# Only-Routers (HF fixed v7)\n",
9
+ "\n",
10
+ "Adds replacement feature table + manufacturer link; enforces 5G->4x4 antenna.\n"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": null,
16
+ "id": "0a7e8886",
17
+ "metadata": {},
18
+ "outputs": [],
19
+ "source": [
20
+ "import os\n",
21
+ "import re\n",
22
+ "import json\n",
23
+ "import math\n",
24
+ "import hashlib\n",
25
+ "import tempfile\n",
26
+ "from dataclasses import dataclass\n",
27
+ "from datetime import datetime, date\n",
28
+ "from typing import Any, Dict, List, Optional, Tuple\n",
29
+ "\n",
30
+ "import numpy as np\n",
31
+ "import pandas as pd\n",
32
+ "\n",
33
+ "import fitz # PyMuPDF\n",
34
+ "import faiss\n",
35
+ "from sentence_transformers import SentenceTransformer\n",
36
+ "from rapidfuzz import fuzz, process\n",
37
+ "\n",
38
+ "import gradio as gr\n",
39
+ "from openai import OpenAI\n",
40
+ "\n",
41
+ "\n",
42
+ "# ============================\n",
43
+ "# Settings\n",
44
+ "# ============================\n",
45
+ "TODAY = date(2026, 1, 18)\n",
46
+ "OPENAI_MODEL = \"gpt-5.2\"\n",
47
+ "OPENAI_REASONING = {\"effort\": \"high\"}\n",
48
+ "MATCH_OK = 80\n",
49
+ "\n",
50
+ "EMBED_MODEL_NAME = \"sentence-transformers/all-MiniLM-L6-v2\"\n",
51
+ "PARSEC_CONTEXT_BEFORE = 900\n",
52
+ "PARSEC_CONTEXT_AFTER = 1600\n",
53
+ "\n",
54
+ "\n",
55
+ "# ============================\n",
56
+ "# OpenAI client (HF Space secret: OPENAI_API_KEY)\n",
57
+ "# ============================\n",
58
+ "API_KEY = os.getenv(\"OPENAI_API_KEY\", \"\").strip()\n",
59
+ "client = OpenAI(api_key=API_KEY) if API_KEY else None\n",
60
+ "\n",
61
+ "# ----------------------------\n",
62
+ "# Gradio state helpers\n",
63
+ "# Keep state as a JSON STRING to avoid schema issues on Hugging Face.\n",
64
+ "# ----------------------------\n",
65
+ "def state_load(st_json: str) -> Dict[str, Any]:\n",
66
+ " try:\n",
67
+ " if not st_json:\n",
68
+ " return {}\n",
69
+ " return json.loads(st_json) if isinstance(st_json, str) else {}\n",
70
+ " except Exception:\n",
71
+ " return {}\n",
72
+ "\n",
73
+ "def state_dump(st: Dict[str, Any]) -> str:\n",
74
+ " try:\n",
75
+ " return json.dumps(st or {}, ensure_ascii=False)\n",
76
+ " except Exception:\n",
77
+ " return \"{}\"\n",
78
+ "\n",
79
+ "\n",
80
+ "\n",
81
+ "# ============================\n",
82
+ "# Helpers\n",
83
+ "# ============================\n",
84
+ "def norm_text(s: Any) -> str:\n",
85
+ " try:\n",
86
+ " if s is None or (isinstance(s, float) and math.isnan(s)) or pd.isna(s):\n",
87
+ " return \"\"\n",
88
+ " except Exception:\n",
89
+ " pass\n",
90
+ " s = str(s).strip().lower()\n",
91
+ " s = re.sub(r\"[^a-z0-9\\s\\-\\/]\", \" \", s)\n",
92
+ " s = re.sub(r\"\\s+\", \" \", s).strip()\n",
93
+ " return s\n",
94
+ "\n",
95
+ "def safe_str(v: Any) -> str:\n",
96
+ " if v is None or (isinstance(v, float) and pd.isna(v)) or pd.isna(v):\n",
97
+ " return \"\"\n",
98
+ " return str(v).strip()\n",
99
+ "\n",
100
+ "def is_5g(modem_type: Any) -> bool:\n",
101
+ " s = norm_text(modem_type)\n",
102
+ " return (\"5g\" in s) or (\"nr\" in s)\n",
103
+ "\n",
104
+ "def json_load_safe(s: str) -> Dict[str, Any]:\n",
105
+ " try:\n",
106
+ " return json.loads(s)\n",
107
+ " except Exception:\n",
108
+ " return {}\n",
109
+ "\n",
110
+ "def gpt_json(system: str, payload: Dict[str, Any], max_tokens: int = 600) -> Dict[str, Any]:\n",
111
+ " if client is None:\n",
112
+ " return {}\n",
113
+ " resp = client.responses.create(\n",
114
+ " model=OPENAI_MODEL,\n",
115
+ " reasoning=OPENAI_REASONING,\n",
116
+ " input=[{\"role\":\"system\",\"content\":system},{\"role\":\"user\",\"content\":json.dumps(payload)}],\n",
117
+ " max_output_tokens=max_tokens,\n",
118
+ " )\n",
119
+ " return json_load_safe(getattr(resp, \"output_text\", \"\") or \"\")\n",
120
+ "\n",
121
+ "\n",
122
+ "# ============================\n",
123
+ "# Load data\n",
124
+ "# ============================\n",
125
+ "EOS_PATH = \"routers_eos_eol_by_sku.csv\"\n",
126
+ "DEC_PATH = \"dec2025routers.csv\"\n",
127
+ "PARSEC_PDF = \"ParsecCatalog.pdf\"\n",
128
+ "\n",
129
+ "if not os.path.exists(EOS_PATH):\n",
130
+ " raise FileNotFoundError(f\"Missing {EOS_PATH} in repo.\")\n",
131
+ "if not os.path.exists(DEC_PATH):\n",
132
+ " raise FileNotFoundError(f\"Missing {DEC_PATH} in repo.\")\n",
133
+ "if not os.path.exists(PARSEC_PDF):\n",
134
+ " raise FileNotFoundError(f\"Missing {PARSEC_PDF} in repo.\")\n",
135
+ "\n",
136
+ "df_eos = pd.read_csv(EOS_PATH).copy()\n",
137
+ "df_dec = pd.read_csv(DEC_PATH).copy()\n",
138
+ "\n",
139
+ "\n",
140
+ "def _canonize_eos_columns(df: pd.DataFrame) -> pd.DataFrame:\n",
141
+ " \"\"\"Normalize lifecycle CSV column names (case-insensitive) and create expected columns.\"\"\"\n",
142
+ " # Map various header spellings to canonical names used by the app\n",
143
+ " mapping = {}\n",
144
+ " for c in df.columns:\n",
145
+ " k = str(c).strip().lower().replace(\" \", \"_\")\n",
146
+ " if k in {\"sku\", \"model\", \"device\", \"device_sku\"}:\n",
147
+ " mapping[c] = \"sku\"\n",
148
+ " elif k in {\"manufacturer\", \"make\", \"vendor\"}:\n",
149
+ " mapping[c] = \"manufacturer\"\n",
150
+ " elif k in {\"device_type\", \"type\"}:\n",
151
+ " mapping[c] = \"device_type\"\n",
152
+ " elif k in {\"end_of_sale\", \"eos\", \"end_sale\", \"end_of_sales\"}:\n",
153
+ " mapping[c] = \"end_of_sale\"\n",
154
+ " elif k in {\"end_of_life\", \"eol\", \"end_life\"}:\n",
155
+ " mapping[c] = \"end_of_life\"\n",
156
+ " elif k in {\"suggested_replacement\", \"replacement_4g\", \"lte_replacement\", \"replacement_lte\", \"replacement\"}:\n",
157
+ " mapping[c] = \"suggested_replacement\"\n",
158
+ " elif k in {\"advanced_5g_option\", \"replacement_5g\", \"fiveg_replacement\", \"5g_replacement\", \"upgrade_5g\"}:\n",
159
+ " mapping[c] = \"advanced_5g_option\"\n",
160
+ " elif k in {\"region\", \"market\"}:\n",
161
+ " mapping[c] = \"region\"\n",
162
+ " elif k in {\"notes\", \"note\"}:\n",
163
+ " mapping[c] = \"notes\"\n",
164
+ " elif k in {\"description\", \"device_description\", \"name\"}:\n",
165
+ " mapping[c] = \"description\"\n",
166
+ "\n",
167
+ " df = df.rename(columns=mapping).copy()\n",
168
+ "\n",
169
+ " # Create expected columns if missing\n",
170
+ " if \"sku\" not in df.columns:\n",
171
+ " # Try the common capitalized header as a fallback\n",
172
+ " if \"SKU\" in df.columns:\n",
173
+ " df[\"sku\"] = df[\"SKU\"].astype(str)\n",
174
+ " else:\n",
175
+ " df[\"sku\"] = \"\"\n",
176
+ "\n",
177
+ " if \"manufacturer\" not in df.columns:\n",
178
+ " df[\"manufacturer\"] = \"\"\n",
179
+ "\n",
180
+ " if \"device_type\" not in df.columns:\n",
181
+ " df[\"device_type\"] = \"\"\n",
182
+ "\n",
183
+ " if \"description\" not in df.columns:\n",
184
+ " # If the simplified file removed description, use SKU as description (still searchable)\n",
185
+ " df[\"description\"] = df[\"sku\"].astype(str)\n",
186
+ "\n",
187
+ " if \"notes\" not in df.columns:\n",
188
+ " df[\"notes\"] = \"\"\n",
189
+ "\n",
190
+ " if \"region\" not in df.columns:\n",
191
+ " df[\"region\"] = \"\"\n",
192
+ "\n",
193
+ " if \"suggested_replacement\" not in df.columns:\n",
194
+ " df[\"suggested_replacement\"] = \"\"\n",
195
+ "\n",
196
+ " if \"advanced_5g_option\" not in df.columns:\n",
197
+ " df[\"advanced_5g_option\"] = \"\"\n",
198
+ "\n",
199
+ " if \"end_of_sale\" not in df.columns:\n",
200
+ " df[\"end_of_sale\"] = \"\"\n",
201
+ "\n",
202
+ " if \"end_of_life\" not in df.columns:\n",
203
+ " df[\"end_of_life\"] = \"\"\n",
204
+ "\n",
205
+ " return df\n",
206
+ "\n",
207
+ "df_eos = _canonize_eos_columns(df_eos)\n",
208
+ "\n",
209
+ "\n",
210
+ "def region_ok(x: Any) -> bool:\n",
211
+ " s = str(x or \"\").strip().lower()\n",
212
+ " if not s:\n",
213
+ " return True\n",
214
+ " if \"not specified\" in s:\n",
215
+ " return True\n",
216
+ " if \"north america\" in s:\n",
217
+ " return True\n",
218
+ " if re.search(r\"\\busa\\b\", s):\n",
219
+ " return True\n",
220
+ " if re.search(r\"\\bunited\\s+states\\b\", s):\n",
221
+ " return True\n",
222
+ " if re.search(r\"\\bu\\.?s\\.?\\b\", s):\n",
223
+ " return True\n",
224
+ " return False\n",
225
+ "\n",
226
+ "if \"region\" in df_eos.columns:\n",
227
+ " df_eos = df_eos[df_eos[\"region\"].apply(region_ok)].reset_index(drop=True)\n",
228
+ "\n",
229
+ "# Maker mapping (includes Teltonika)\n",
230
+ "CANON_MAKER = {\n",
231
+ " \"CRADLEPOINT\": {\"cradlepoint\", \"ericsson\", \"ericsson enterprise wireless\"},\n",
232
+ " \"SIERRA\": {\"sierra\", \"sierra wireless\", \"semtech\", \"airlink\"},\n",
233
+ " \"FEENEY\": {\"feeney\", \"feeney wireless\", \"inseego\"},\n",
234
+ " \"DIGI\": {\"digi\", \"accelerated\", \"accelerated concepts\"},\n",
235
+ " \"CISCO_MERAKI\": {\"meraki\", \"cisco meraki\"},\n",
236
+ " \"CISCO\": {\"cisco\"},\n",
237
+ " \"TELTONIKA\": {\"teltonika\"},\n",
238
+ "}\n",
239
+ "\n",
240
+ "def canon_maker_from_text(s: Any) -> str:\n",
241
+ " t = norm_text(s)\n",
242
+ " for canon, terms in CANON_MAKER.items():\n",
243
+ " for term in terms:\n",
244
+ " if term in t:\n",
245
+ " return canon\n",
246
+ " return \"UNKNOWN\"\n",
247
+ "\n",
248
+ "df_eos[\"_canon_make\"] = df_eos[\"manufacturer\"].apply(canon_maker_from_text) if \"manufacturer\" in df_eos.columns else \"UNKNOWN\"\n",
249
+ "df_eos[\"_norm_sku\"] = df_eos[\"sku\"].apply(norm_text) if \"sku\" in df_eos.columns else \"\"\n",
250
+ "df_eos[\"_norm_desc\"] = df_eos[\"description\"].apply(norm_text) if \"description\" in df_eos.columns else \"\"\n",
251
+ "df_eos[\"_norm_notes\"] = df_eos[\"notes\"].apply(norm_text) if \"notes\" in df_eos.columns else \"\"\n",
252
+ "\n",
253
+ "df_dec[\"_canon_make\"] = df_dec[\"Make\"].apply(canon_maker_from_text) if \"Make\" in df_dec.columns else \"UNKNOWN\"\n",
254
+ "df_dec[\"_norm_model\"] = df_dec[\"Model\"].apply(norm_text) if \"Model\" in df_dec.columns else \"\"\n",
255
+ "df_dec[\"_is5g\"] = df_dec[\"Modem Type\"].apply(is_5g) if \"Modem Type\" in df_dec.columns else False\n",
256
+ "\n",
257
+ "\n",
258
+ "# ============================\n",
259
+ "# Date helpers\n",
260
+ "# ============================\n",
261
+ "@dataclass\n",
262
+ "class ParsedDate:\n",
263
+ " raw: str\n",
264
+ " kind: str\n",
265
+ " value: Optional[date]\n",
266
+ "\n",
267
+ "def parse_date_field(x: Any) -> ParsedDate:\n",
268
+ " raw = str(x or \"\").strip()\n",
269
+ " if not raw:\n",
270
+ " return ParsedDate(raw=\"\", kind=\"missing\", value=None)\n",
271
+ "\n",
272
+ " # Common US formats: M/D/YY or M/D/YYYY (e.g., 6/24/24, 9/30/21)\n",
273
+ " for fmt in (\"%m/%d/%y\", \"%m/%d/%Y\", \"%-m/%-d/%y\", \"%-m/%-d/%Y\"):\n",
274
+ " try:\n",
275
+ " dt = datetime.strptime(raw, fmt).date()\n",
276
+ " return ParsedDate(raw=raw, kind=\"full\", value=dt)\n",
277
+ " except Exception:\n",
278
+ " pass\n",
279
+ "\n",
280
+ " # ISO-ish: YYYY\n",
281
+ " if re.fullmatch(r\"\\d{4}\", raw):\n",
282
+ " y = int(raw)\n",
283
+ " if y == TODAY.year:\n",
284
+ " return ParsedDate(raw=raw, kind=\"year\", value=date(y, 1, 1))\n",
285
+ " if y < TODAY.year:\n",
286
+ " return ParsedDate(raw=raw, kind=\"year\", value=date(y, 1, 1))\n",
287
+ " return ParsedDate(raw=raw, kind=\"year\", value=date(y, 12, 31))\n",
288
+ "\n",
289
+ " # YYYY-MM\n",
290
+ " if re.fullmatch(r\"\\d{4}-\\d{2}\", raw):\n",
291
+ " try:\n",
292
+ " y, m = raw.split(\"-\")\n",
293
+ " return ParsedDate(raw=raw, kind=\"year_month\", value=date(int(y), int(m), 1))\n",
294
+ " except Exception:\n",
295
+ " return ParsedDate(raw=raw, kind=\"bad\", value=None)\n",
296
+ "\n",
297
+ " # YYYY-MM-DD\n",
298
+ " if re.fullmatch(r\"\\d{4}-\\d{2}-\\d{2}\", raw):\n",
299
+ " try:\n",
300
+ " dt = datetime.strptime(raw, \"%Y-%m-%d\").date()\n",
301
+ " return ParsedDate(raw=raw, kind=\"full\", value=dt)\n",
302
+ " except Exception:\n",
303
+ " return ParsedDate(raw=raw, kind=\"bad\", value=None)\n",
304
+ "\n",
305
+ " # Last resort: leave as raw (unparsed)\n",
306
+ " return ParsedDate(raw=raw, kind=\"bad\", value=None)\n",
307
+ "\n",
308
+ " if re.fullmatch(r\"\\d{4}-\\d{2}-\\d{2}\", raw):\n",
309
+ " try:\n",
310
+ " dt = datetime.strptime(raw, \"%Y-%m-%d\").date()\n",
311
+ " return ParsedDate(raw=raw, kind=\"full\", value=dt)\n",
312
+ " except Exception:\n",
313
+ " return ParsedDate(raw=raw, kind=\"bad\", value=None)\n",
314
+ "\n",
315
+ " return ParsedDate(raw=raw, kind=\"bad\", value=None)\n",
316
+ "\n",
317
+ "def display_date(pd_: ParsedDate) -> str:\n",
318
+ " if pd_.kind == \"missing\":\n",
319
+ " return \"Not listed\"\n",
320
+ " if pd_.kind == \"bad\":\n",
321
+ " return pd_.raw or \"Not listed\"\n",
322
+ " return pd_.raw\n",
323
+ "\n",
324
+ "def status_from_eos_eol(eos: ParsedDate, eol: ParsedDate) -> str:\n",
325
+ " if eos.value is None and eol.value is None:\n",
326
+ " return \"Unknown\"\n",
327
+ " if eol.value is not None and eol.value <= TODAY:\n",
328
+ " return \"End of Life\"\n",
329
+ " if eos.value is not None and eos.value <= TODAY:\n",
330
+ " return \"End of Sale\"\n",
331
+ " return \"Active\"\n",
332
+ "\n",
333
+ "def row_to_dates_and_status(row: pd.Series) -> Tuple[str, str, str]:\n",
334
+ " eos = parse_date_field(row.get(\"end_of_sale\"))\n",
335
+ " eol = parse_date_field(row.get(\"end_of_life\"))\n",
336
+ " return display_date(eos), display_date(eol), status_from_eos_eol(eos, eol)\n",
337
+ "\n",
338
+ "\n",
339
+ "# ============================\n",
340
+ "# Embeddings + Parsec index\n",
341
+ "# ============================\n",
342
+ "embedder = SentenceTransformer(EMBED_MODEL_NAME)\n",
343
+ "\n",
344
+ "def extract_pdf_text_pages(path: str) -> List[str]:\n",
345
+ " doc = fitz.open(path)\n",
346
+ " return [doc[i].get_text(\"text\") for i in range(len(doc))]\n",
347
+ "\n",
348
+ "def build_parsec_cards(pages: List[str]) -> List[str]:\n",
349
+ " cards = []\n",
350
+ " for p in pages:\n",
351
+ " for m in re.finditer(r\"Standard\\s+SKU:\", p):\n",
352
+ " start = max(0, m.start() - PARSEC_CONTEXT_BEFORE)\n",
353
+ " end = min(len(p), m.start() + PARSEC_CONTEXT_AFTER)\n",
354
+ " c = p[start:end].strip()\n",
355
+ " if len(c) >= 200:\n",
356
+ " cards.append(c)\n",
357
+ " out, seen = [], set()\n",
358
+ " for c in cards:\n",
359
+ " h = hashlib.sha1(c.encode(\"utf-8\")).hexdigest()\n",
360
+ " if h not in seen:\n",
361
+ " seen.add(h); out.append(c)\n",
362
+ " return out\n",
363
+ "\n",
364
+ "parsec_cards = build_parsec_cards(extract_pdf_text_pages(PARSEC_PDF))\n",
365
+ "parsec_emb = embedder.encode(parsec_cards, batch_size=64, show_progress_bar=False, normalize_embeddings=True)\n",
366
+ "parsec_emb = np.asarray(parsec_emb, dtype=np.float32)\n",
367
+ "parsec_index = faiss.IndexFlatIP(parsec_emb.shape[1])\n",
368
+ "parsec_index.add(parsec_emb)\n",
369
+ "\n",
370
+ "\n",
371
+ "# ============================\n",
372
+ "# Device resolution\n",
373
+ "# ============================\n",
374
+ "def label_for_row(i: int) -> str:\n",
375
+ " r = df_eos.iloc[i]\n",
376
+ " return f\"{r.get('sku','')} — {r.get('manufacturer','')} — {r.get('description','')}\"[:220]\n",
377
+ "\n",
378
+ "EOS_LABELS = [label_for_row(i) for i in range(len(df_eos))]\n",
379
+ "EOS_CORPUS = []\n",
380
+ "for _, r in df_eos.iterrows():\n",
381
+ " EOS_CORPUS.append(\" \".join([r.get(\"_norm_sku\",\"\"), r.get(\"_canon_make\",\"\"), r.get(\"_norm_desc\",\"\"), r.get(\"_norm_notes\",\"\")]))\n",
382
+ "\n",
383
+ "def local_candidates(query: str, top_k: int = 6) -> List[Tuple[int, int, str]]:\n",
384
+ " q = norm_text(query)\n",
385
+ " hits = process.extract(q, EOS_CORPUS, scorer=fuzz.WRatio, limit=top_k)\n",
386
+ " return [(int(idx), int(score), EOS_LABELS[int(idx)]) for _, score, idx in hits]\n",
387
+ "\n",
388
+ "def gpt_choose_device(user_text: str, candidates: List[Tuple[int,int,str]]) -> Dict[str, Any]:\n",
389
+ " if client is None:\n",
390
+ " return {}\n",
391
+ " sys = \"Pick which router the user meant. Never invent. Return strict JSON only.\"\n",
392
+ " payload = {\n",
393
+ " \"user_input\": user_text,\n",
394
+ " \"candidates\": [{\"row_idx\": i, \"score\": s, \"label\": lbl} for (i,s,lbl) in candidates],\n",
395
+ " \"rules\": [\n",
396
+ " \"If one is clearly correct, return mode='ok' with row_idx.\",\n",
397
+ " \"If two are plausible, return mode='pick' with top 2 options.\"\n",
398
+ " ],\n",
399
+ " \"output_schema\": {\"mode\":\"ok|pick\",\"row_idx\":\"int\",\"options\":[{\"row_idx\":\"int\",\"label\":\"string\"}]}\n",
400
+ " }\n",
401
+ " return gpt_json(sys, payload, max_tokens=280)\n",
402
+ "\n",
403
+ "def resolve_device(user_text: str) -> Dict[str, Any]:\n",
404
+ " q = norm_text(user_text)\n",
405
+ " exact = df_eos.index[df_eos[\"_norm_sku\"] == q].tolist()\n",
406
+ " if len(exact) == 1:\n",
407
+ " return {\"mode\":\"ok\",\"row_idx\": int(exact[0])}\n",
408
+ " if len(exact) > 1:\n",
409
+ " opts = [{\"row_idx\": int(i), \"label\": EOS_LABELS[int(i)]} for i in exact[:2]]\n",
410
+ " return {\"mode\":\"pick\",\"options\": opts}\n",
411
+ "\n",
412
+ " cands = local_candidates(user_text, top_k=6)\n",
413
+ " if not cands:\n",
414
+ " return {\"mode\":\"not_found\"}\n",
415
+ "\n",
416
+ " if cands[0][1] >= 95 and (len(cands) == 1 or (cands[0][1] - cands[1][1]) >= 8):\n",
417
+ " return {\"mode\":\"ok\",\"row_idx\": cands[0][0]}\n",
418
+ "\n",
419
+ " g = gpt_choose_device(user_text, cands)\n",
420
+ " if g.get(\"mode\") == \"ok\" and isinstance(g.get(\"row_idx\"), int):\n",
421
+ " return {\"mode\":\"ok\",\"row_idx\": int(g[\"row_idx\"])}\n",
422
+ "\n",
423
+ " if g.get(\"mode\") == \"pick\":\n",
424
+ " opts = g.get(\"options\", []) or []\n",
425
+ " opts2 = [{\"row_idx\": int(o[\"row_idx\"]), \"label\": str(o[\"label\"])} for o in opts[:2] if \"row_idx\" in o]\n",
426
+ " if opts2:\n",
427
+ " return {\"mode\":\"pick\",\"options\": opts2}\n",
428
+ "\n",
429
+ " if len(cands) > 1:\n",
430
+ " return {\"mode\":\"pick\",\"options\":[{\"row_idx\":cands[0][0],\"label\":cands[0][2]},{\"row_idx\":cands[1][0],\"label\":cands[1][2]}]}\n",
431
+ " return {\"mode\":\"pick\",\"options\":[{\"row_idx\":cands[0][0],\"label\":cands[0][2]}]}\n",
432
+ "\n",
433
+ "\n",
434
+ "# ============================\n",
435
+ "# Replacements — lifecycle CSV source of truth\n",
436
+ "# ============================\n",
437
+ "def extract_model_token(text: str) -> str:\n",
438
+ " s = safe_str(text)\n",
439
+ " if not s:\n",
440
+ " return \"\"\n",
441
+ " parts = [p.strip() for p in s.split(\"|\") if p.strip()]\n",
442
+ " candidates = parts[::-1] if parts else [s]\n",
443
+ " for cand in candidates:\n",
444
+ " m = re.search(r\"\\bRUT[A-Z]?\\d{2,4}\\b\", cand.upper())\n",
445
+ " if m:\n",
446
+ " return m.group(0).upper()\n",
447
+ " m = re.search(r\"\\bIX\\d{2}\\b\", cand, flags=re.IGNORECASE)\n",
448
+ " if m:\n",
449
+ " return m.group(0).upper()\n",
450
+ " m = re.search(r\"\\b(R\\d{3,4}|E\\d{3,4}|S\\d{3,4})\\b\", cand, flags=re.IGNORECASE)\n",
451
+ " if m:\n",
452
+ " return m.group(0).upper()\n",
453
+ " m = re.search(r\"\\b[A-Z]{1,6}\\d{2,4}[A-Z]?\\b\", cand.upper())\n",
454
+ " if m:\n",
455
+ " return m.group(0).upper()\n",
456
+ " return candidates[0][:60]\n",
457
+ "\n",
458
+ "def device_is_4g(row: pd.Series) -> bool:\n",
459
+ " # Detect LTE/4G even when the description uses \"Cat 4 / Cat6 / Cat 12\" without saying \"LTE\"\n",
460
+ " t = norm_text(row.get(\"description\",\"\")) + \" \" + norm_text(row.get(\"notes\",\"\")) + \" \" + norm_text(row.get(\"sku\",\"\"))\n",
461
+ "\n",
462
+ " # If it explicitly says 5G/NR, treat as not 4G-only\n",
463
+ " if (\"5g\" in t) or (\"nr\" in t):\n",
464
+ " return False\n",
465
+ "\n",
466
+ " # Classic signals\n",
467
+ " if (\"lte\" in t) or (\"4g\" in t):\n",
468
+ " return True\n",
469
+ "\n",
470
+ " # LTE category signals (Cat 1..20 are LTE categories; Cat M1/M2 are LTE-M)\n",
471
+ " if re.search(r\"\\bcat\\s*[-]?\\s*(m1|m2)\\b\", t):\n",
472
+ " return True\n",
473
+ "\n",
474
+ " m = re.search(r\"\\bcat\\s*[-]?\\s*(\\d{1,2})\\b\", t)\n",
475
+ " if m:\n",
476
+ " try:\n",
477
+ " cat = int(m.group(1))\n",
478
+ " if 0 < cat <= 20:\n",
479
+ " return True\n",
480
+ " except Exception:\n",
481
+ " pass\n",
482
+ "\n",
483
+ " # If \"cat\" appears at all, it's almost always LTE-family\n",
484
+ " if \"cat\" in t:\n",
485
+ " return True\n",
486
+ "\n",
487
+ " return False\n",
488
+ "\n",
489
+ " # If it explicitly says 5G/NR, treat as not 4G-only\n",
490
+ " if (\"5g\" in t) or (\"nr\" in t):\n",
491
+ " return False\n",
492
+ "\n",
493
+ " # Classic signals\n",
494
+ " if (\"lte\" in t) or (\"4g\" in t):\n",
495
+ " return True\n",
496
+ "\n",
497
+ " # LTE category signals (Cat 1..20 are LTE categories; Cat M1/M2 are LTE-M)\n",
498
+ " if re.search(r\"\\bcat\\s*[-]?\\s*(m1|m2)\\b\", t):\n",
499
+ " return True\n",
500
+ "\n",
501
+ " m = re.search(r\"\\bcat\\s*[-]?\\s*(\\d{1,2})\\b\", t)\n",
502
+ " if m:\n",
503
+ " try:\n",
504
+ " cat = int(m.group(1))\n",
505
+ " if 0 < cat <= 20:\n",
506
+ " return True\n",
507
+ " except Exception:\n",
508
+ " pass\n",
509
+ "\n",
510
+ " # If \"cat\" appears at all, it's almost always LTE-family\n",
511
+ " if \"cat\" in t:\n",
512
+ " return True\n",
513
+ "\n",
514
+ " return False\n",
515
+ "\n",
516
+ "\n",
517
+ "def candidate_5g_models_from_lifecycle(manufacturer: str) -> List[str]:\n",
518
+ " mfr = norm_text(manufacturer)\n",
519
+ " pool = df_eos[df_eos[\"manufacturer\"].astype(str).str.lower().eq(mfr)].copy() if \"manufacturer\" in df_eos.columns else df_eos.copy()\n",
520
+ " vals = pool[\"advanced_5g_option\"].tolist() if \"advanced_5g_option\" in pool.columns else []\n",
521
+ " out, seen = [], set()\n",
522
+ " for v in vals:\n",
523
+ " tok = extract_model_token(v)\n",
524
+ " if tok and tok.lower() != \"nan\" and tok not in seen:\n",
525
+ " seen.add(tok); out.append(tok)\n",
526
+ " return out\n",
527
+ "\n",
528
+ "def candidate_4g_models_from_lifecycle(manufacturer: str) -> List[str]:\n",
529
+ " mfr = norm_text(manufacturer)\n",
530
+ " pool = df_eos[df_eos[\"manufacturer\"].astype(str).str.lower().eq(mfr)].copy() if \"manufacturer\" in df_eos.columns else df_eos.copy()\n",
531
+ " vals = pool[\"suggested_replacement\"].tolist() if \"suggested_replacement\" in pool.columns else []\n",
532
+ " out, seen = [], set()\n",
533
+ " for v in vals:\n",
534
+ " tok = extract_model_token(v)\n",
535
+ " if tok and tok.lower() != \"nan\" and tok not in seen:\n",
536
+ " seen.add(tok); out.append(tok)\n",
537
+ " return out\n",
538
+ "\n",
539
+ "def gpt_pick_from_candidates(old_row: pd.Series, candidates: List[str], need: str) -> str:\n",
540
+ " if client is None or not candidates:\n",
541
+ " return \"\"\n",
542
+ " sys = \"Pick the best replacement model. Choose only from candidates. Return strict JSON only.\"\n",
543
+ " payload = {\n",
544
+ " \"old_device\": {\n",
545
+ " \"sku\": str(old_row.get(\"sku\",\"\")),\n",
546
+ " \"manufacturer\": str(old_row.get(\"manufacturer\",\"\")),\n",
547
+ " \"description\": str(old_row.get(\"description\",\"\")),\n",
548
+ " \"need\": need,\n",
549
+ " },\n",
550
+ " \"candidates\": candidates[:40],\n",
551
+ " \"output_schema\": {\"choice\":\"string\"}\n",
552
+ " }\n",
553
+ " out = gpt_json(sys, payload, max_tokens=240) or {}\n",
554
+ " choice = str(out.get(\"choice\",\"\") or \"\").strip()\n",
555
+ " return choice if choice in candidates else \"\"\n",
556
+ "\n",
557
+ "def fallback_5g_from_dec(canon_make: str) -> str:\n",
558
+ " pool5 = df_dec[(df_dec[\"_canon_make\"] == canon_make) & (df_dec[\"_is5g\"] == True)]\n",
559
+ " return str(pool5.iloc[0][\"Model\"]).strip() if not pool5.empty else \"\"\n",
560
+ "\n",
561
+ "def pick_replacements_lifecycle(row: pd.Series, status: str, use_gpt: bool = True) -> Dict[str, Any]:\n",
562
+ " canon = str(row.get(\"_canon_make\",\"UNKNOWN\"))\n",
563
+ " manufacturer = str(row.get(\"manufacturer\",\"\") or \"\")\n",
564
+ "\n",
565
+ " sug_raw = safe_str(row.get(\"suggested_replacement\",\"\"))\n",
566
+ " adv_raw = safe_str(row.get(\"advanced_5g_option\",\"\"))\n",
567
+ "\n",
568
+ " has_4g_alt = bool(sug_raw.strip())\n",
569
+ " has_5g_alt = bool(adv_raw.strip())\n",
570
+ "\n",
571
+ " # Treat as 4G if the description indicates LTE OR lifecycle provides a 4G suggested replacement\n",
572
+ " is_4g = device_is_4g(row) or has_4g_alt\n",
573
+ "\n",
574
+ " # Provide 5G option if the unit is 4G, EOS/EOL, or lifecycle explicitly provides advanced_5g_option\n",
575
+ " want_5g = is_4g or (status in {\"End of Sale\",\"End of Life\"}) or has_5g_alt\n",
576
+ "\n",
577
+ " # 4G alternative: show whenever lifecycle provides it (or device appears 4G)\n",
578
+ " repl_4g = \"Not applicable\"\n",
579
+ " if is_4g or has_4g_alt:\n",
580
+ " repl_4g = extract_model_token(sug_raw)\n",
581
+ " if not repl_4g:\n",
582
+ " cand4 = candidate_4g_models_from_lifecycle(manufacturer)\n",
583
+ " repl_4g = (gpt_pick_from_candidates(row, cand4, \"4G alternative\") if (use_gpt and client) else \"\") or (cand4[0] if cand4 else \"\")\n",
584
+ " if not repl_4g:\n",
585
+ " repl_4g = \"Not applicable\"\n",
586
+ "\n",
587
+ " # 5G replacement: prefer lifecycle advanced_5g_option whenever present\n",
588
+ " repl_5g = \"Not listed\"\n",
589
+ " if want_5g:\n",
590
+ " repl_5g = extract_model_token(adv_raw)\n",
591
+ " if not repl_5g:\n",
592
+ " cand5 = candidate_5g_models_from_lifecycle(manufacturer)\n",
593
+ " repl_5g = (gpt_pick_from_candidates(row, cand5, \"5G replacement/upgrade\") if (use_gpt and client) else \"\") or (cand5[0] if cand5 else \"\")\n",
594
+ " if not repl_5g:\n",
595
+ " repl_5g = fallback_5g_from_dec(canon) or \"Not listed\"\n",
596
+ "\n",
597
+ " if repl_5g.lower() == \"nan\":\n",
598
+ " repl_5g = \"Not listed\"\n",
599
+ "\n",
600
+ " return {\"repl_4g\": repl_4g, \"repl_5g\": repl_5g, \"sources\": [\"lifecycle_csv\"] + ([\"gpt\"] if (use_gpt and client) else [])}\n",
601
+ "\n",
602
+ "\n",
603
+ "# ============================\n",
604
+ "# Antennas (Parsec-only)\n",
605
+ "# ============================\n",
606
+ "PARSEC_FAMILY_WORDS = {\"chinook\",\"labrador\",\"boxer\",\"bloodhound\",\"husky\",\"beagle\",\"mastiff\",\"collie\",\"shepherd\",\"belgian\",\"australian\",\"terrier\",\"pyrenees\"}\n",
607
+ "BAD_NAME_MARKERS = {\"customization\",\"standard connectors\",\"connectors\",\"features\",\"benefits\",\"specifications\",\"mechanical\",\"electrical\",\"mounting\",\"accessories\",\"description:\",\"standard sku\"}\n",
608
+ "\n",
609
+ "def clean_line(s: str) -> str:\n",
610
+ " s = re.sub(r\"\\s+\", \" \", str(s or \"\").strip())\n",
611
+ " if re.fullmatch(r\"-[a-z0-9]+\", s.lower()):\n",
612
+ " return \"\"\n",
613
+ " return s\n",
614
+ "\n",
615
+ "def is_bad_name_line(line: str) -> bool:\n",
616
+ " low = line.lower()\n",
617
+ " if any(m in low for m in BAD_NAME_MARKERS):\n",
618
+ " return True\n",
619
+ " if re.search(r\"\\b-[a-z0-9]{1,4}\\b\", low) and len(low) <= 25:\n",
620
+ " return True\n",
621
+ " return False\n",
622
+ "\n",
623
+ "def family_from_line(line: str) -> str:\n",
624
+ " low = line.lower()\n",
625
+ " for fam in PARSEC_FAMILY_WORDS:\n",
626
+ " if fam in low:\n",
627
+ " return fam.capitalize()\n",
628
+ " return \"\"\n",
629
+ "\n",
630
+ "def parsec_connectors_from_card(t: str) -> str:\n",
631
+ " m = re.search(r\"Standard\\s+Connectors:\\s*(.+)\", t, flags=re.IGNORECASE)\n",
632
+ " if m:\n",
633
+ " return re.sub(r\"\\s+\", \" \", m.group(1).strip())[:80]\n",
634
+ " return \"\"\n",
635
+ "\n",
636
+ "def parsec_mounts_from_card(t: str) -> List[str]:\n",
637
+ " mounts = []\n",
638
+ " for m in re.finditer(r\"Mount:\\s*(.+)\", t, flags=re.IGNORECASE):\n",
639
+ " val = re.sub(r\"\\s+\", \" \", m.group(1).strip())\n",
640
+ " parts = [p.strip().lower() for p in val.split(\",\") if p.strip()]\n",
641
+ " mounts.extend(parts)\n",
642
+ " out = []\n",
643
+ " seen = set()\n",
644
+ " for x in mounts:\n",
645
+ " if x not in seen:\n",
646
+ " seen.add(x); out.append(x)\n",
647
+ " return out\n",
648
+ "\n",
649
+ "def parsec_name_from_card(card_text: str) -> str:\n",
650
+ " lines = [clean_line(ln) for ln in str(card_text or \"\").splitlines()]\n",
651
+ " lines = [ln for ln in lines if ln]\n",
652
+ "\n",
653
+ " for ln in lines:\n",
654
+ " if is_bad_name_line(ln):\n",
655
+ " continue\n",
656
+ " fam = family_from_line(ln)\n",
657
+ " if fam:\n",
658
+ " return fam\n",
659
+ "\n",
660
+ " sku_i = None\n",
661
+ " for i, ln in enumerate(lines):\n",
662
+ " if \"standard sku\" in ln.lower():\n",
663
+ " sku_i = i\n",
664
+ " break\n",
665
+ " if sku_i is not None:\n",
666
+ " window = lines[max(0, sku_i - 12):sku_i]\n",
667
+ " for ln in reversed(window):\n",
668
+ " if is_bad_name_line(ln):\n",
669
+ " continue\n",
670
+ " if 3 <= len(ln) <= 40 and re.search(r\"[A-Za-z]\", ln):\n",
671
+ " return ln.split()[0].capitalize()\n",
672
+ "\n",
673
+ " return \"Parsec antenna\"\n",
674
+ "\n",
675
+ "def parsec_part_from_card(t: str) -> str:\n",
676
+ " m = re.search(r\"Standard\\s+SKU:\\s*([A-Z0-9]+)\", t)\n",
677
+ " return m.group(1).strip() if m else \"\"\n",
678
+ "\n",
679
+ "def parsec_desc_from_card(t: str) -> str:\n",
680
+ " m = re.search(r\"Description:\\s*(.+?)(?:\\n|$)\", t, flags=re.IGNORECASE)\n",
681
+ " return re.sub(r\"\\s+\",\" \",m.group(1).strip())[:220] if m else \"\"\n",
682
+ "\n",
683
+ "def parsec_retrieve(query: str, top_k: int = 12) -> List[Dict[str, Any]]:\n",
684
+ " qv = embedder.encode([query], normalize_embeddings=True)\n",
685
+ " qv = np.asarray(qv, dtype=np.float32)\n",
686
+ " scores, ids = parsec_index.search(qv, top_k)\n",
687
+ " out: List[Dict[str, Any]] = []\n",
688
+ " for sc, i in zip(scores[0].tolist(), ids[0].tolist()):\n",
689
+ " if 0 <= int(i) < len(parsec_cards):\n",
690
+ " card = parsec_cards[int(i)]\n",
691
+ " out.append({\n",
692
+ " \"score\": float(sc),\n",
693
+ " \"name\": parsec_name_from_card(card),\n",
694
+ " \"part_number\": parsec_part_from_card(card),\n",
695
+ " \"description\": parsec_desc_from_card(card),\n",
696
+ " \"connectors\": parsec_connectors_from_card(card),\n",
697
+ " \"mounts\": parsec_mounts_from_card(card),\n",
698
+ " \"_card\": card.lower(),\n",
699
+ " })\n",
700
+ " return out\n",
701
+ "\n",
702
+ "def choose_best_parsec(cands: List[Dict[str, Any]], mode: str) -> Dict[str, Any]:\n",
703
+ " best = None\n",
704
+ " best_score = -1e9\n",
705
+ "\n",
706
+ " for c in cands:\n",
707
+ " card = c.get(\"_card\",\"\")\n",
708
+ " mounts = c.get(\"mounts\", []) or []\n",
709
+ " score = float(c.get(\"score\", 0.0))\n",
710
+ "\n",
711
+ " if \"omni\" in card:\n",
712
+ " score += 0.6\n",
713
+ " if \"directional\" in card:\n",
714
+ " score -= 1.5\n",
715
+ "\n",
716
+ " if mode == \"vehicle\":\n",
717
+ " if any(\"magnetic\" in m for m in mounts):\n",
718
+ " score += 3.0\n",
719
+ " if any(\"through\" in m for m in mounts):\n",
720
+ " score += 2.0\n",
721
+ " if any(\"wall\" in m for m in mounts) or any(\"pole\" in m for m in mounts):\n",
722
+ " score -= 1.2\n",
723
+ " if \"app: fixed\" in card and \"mobile\" not in card:\n",
724
+ " score -= 2.0\n",
725
+ "\n",
726
+ " if mode == \"stationary\":\n",
727
+ " if any(\"wall\" in m for m in mounts):\n",
728
+ " score += 2.0\n",
729
+ " if any(\"pole\" in m for m in mounts):\n",
730
+ " score += 1.8\n",
731
+ "\n",
732
+ " if score > best_score:\n",
733
+ " best_score = score\n",
734
+ " best = c\n",
735
+ "\n",
736
+ " if not best:\n",
737
+ " return {\"name\":\"Parsec antenna\",\"part_number\":\"\",\"description\":\"\",\"connectors\":\"\",\"mounts\":[]}\n",
738
+ "\n",
739
+ " best = dict(best)\n",
740
+ " best.pop(\"_card\", None)\n",
741
+ " return best\n",
742
+ "\n",
743
+ "\n",
744
+ "def infer_mimo_for_5g(repl_5g_model: str) -> str:\n",
745
+ " \"\"\"Rule: every 5G router uses a 4x4 antenna.\"\"\"\n",
746
+ " return \"4x4\"\n",
747
+ "\n",
748
+ " # If the model name hints 5G, lean 4x4\n",
749
+ " if \"5g\" in model.lower() or model.upper().startswith((\"R\", \"E\", \"S\", \"IX\", \"RUTM\")):\n",
750
+ " default = \"4x4\"\n",
751
+ " else:\n",
752
+ " default = \"2x2\"\n",
753
+ "\n",
754
+ " # Use dec2025routers.csv if we can match the model under the same maker family\n",
755
+ " try:\n",
756
+ " pool = df_dec[df_dec[\"_canon_make\"] == canon_make].copy()\n",
757
+ " if pool.empty:\n",
758
+ " return default\n",
759
+ " hit = process.extractOne(norm_text(model), pool[\"_norm_model\"].tolist(), scorer=fuzz.WRatio)\n",
760
+ " if not hit or hit[1] < MATCH_OK:\n",
761
+ " return default\n",
762
+ " row = pool.iloc[int(hit[2])]\n",
763
+ " txt2 = (str(row.get(\"Antennas (internal/external/both)\", \"\")) + \" \" + str(row.get(\"Modem Type\", \"\")) + \" \" + str(row.get(\"Special notes\",\"\"))).lower()\n",
764
+ " if \"4x4\" in txt2 or \"4 x 4\" in txt2 or \"4x 4\" in txt2:\n",
765
+ " return \"4x4\"\n",
766
+ " if \"2x2\" in txt2 or \"2 x 2\" in txt2:\n",
767
+ " return \"2x2\"\n",
768
+ " # If modem type includes 5G, lean 4x4\n",
769
+ " if \"5g\" in txt2 or \"nr\" in txt2:\n",
770
+ " return \"4x4\"\n",
771
+ " return default\n",
772
+ " except Exception:\n",
773
+ " return default\n",
774
+ "\n",
775
+ "def antenna_options_for(router_model: str, tech: str, mimo: str) -> Dict[str, Any]:\n",
776
+ " q_stationary = f\"{router_model} {tech} {mimo} omni stationary pole wall fixed site Parsec\"\n",
777
+ " q_vehicle = f\"{router_model} {tech} {mimo} omni vehicle mobile magnetic through-bolt Parsec\"\n",
778
+ "\n",
779
+ " cand_stationary = parsec_retrieve(q_stationary, top_k=12)\n",
780
+ " cand_vehicle = parsec_retrieve(q_vehicle, top_k=12)\n",
781
+ "\n",
782
+ " s = choose_best_parsec(cand_stationary, mode=\"stationary\")\n",
783
+ " v = choose_best_parsec(cand_vehicle, mode=\"vehicle\")\n",
784
+ "\n",
785
+ " s.update({\"mimo\": mimo, \"why\": \"Stationary omni best match.\"})\n",
786
+ " v.update({\"mimo\": mimo, \"why\": \"Vehicle omni best match.\"})\n",
787
+ "\n",
788
+ " return {\"stationary_omni\": s, \"vehicle_omni\": v, \"sources\":[\"parsec_rag\"]}\n",
789
+ "\n",
790
+ "\n",
791
+ "# ============================\n",
792
+ "# Install-ready checklist\n",
793
+ "# ============================\n",
794
+ "def install_ready_checklist(current_sku: str, repl: Dict[str,Any], ant: Dict[str,Any]) -> str:\n",
795
+ " st = ant.get(\"stationary_omni\", {})\n",
796
+ " vh = ant.get(\"vehicle_omni\", {})\n",
797
+ " if client is not None:\n",
798
+ " sys = \"Create a short, install-ready checklist for a Verizon rep. Return markdown only.\"\n",
799
+ " payload = {\"current_device\": current_sku, \"replacements\": repl, \"antennas\": {\"stationary\": st, \"vehicle\": vh}}\n",
800
+ " resp = client.responses.create(\n",
801
+ " model=OPENAI_MODEL,\n",
802
+ " reasoning=OPENAI_REASONING,\n",
803
+ " input=[{\"role\":\"system\",\"content\":sys},{\"role\":\"user\",\"content\":json.dumps(payload)}],\n",
804
+ " max_output_tokens=520,\n",
805
+ " )\n",
806
+ " return (getattr(resp, \"output_text\", \"\") or \"\").strip()\n",
807
+ " return \"\\n\".join([\n",
808
+ " \"### Install-ready checklist\",\n",
809
+ " f\"- Current device: {current_sku}\",\n",
810
+ " f\"- 5G replacement: {repl.get('repl_5g','')}\",\n",
811
+ " f\"- 4G alternative: {repl.get('repl_4g','Not applicable')}\",\n",
812
+ " f\"- Stationary omni antenna: {st.get('name','')} (PN {st.get('part_number','')})\",\n",
813
+ " f\"- Vehicle omni antenna: {vh.get('name','')} (PN {vh.get('part_number','')})\",\n",
814
+ " \"- Next steps: confirm mounting + cable lengths + power; place order; schedule install.\",\n",
815
+ " ])\n",
816
+ "\n",
817
+ "\n",
818
+ "# ============================\n",
819
+ "# Batch mode (NO GPT)\n",
820
+ "# ============================\n",
821
+ "def parse_batch_inputs(text_blob: str, file_obj: Any) -> List[str]:\n",
822
+ " items: List[str] = []\n",
823
+ " if file_obj is not None:\n",
824
+ " try:\n",
825
+ " path = file_obj.name if hasattr(file_obj, \"name\") else str(file_obj)\n",
826
+ " df = pd.read_csv(path)\n",
827
+ " col = df.columns[0]\n",
828
+ " items.extend([str(x).strip() for x in df[col].tolist() if str(x).strip()])\n",
829
+ " except Exception:\n",
830
+ " pass\n",
831
+ " if text_blob:\n",
832
+ " for ln in str(text_blob).splitlines():\n",
833
+ " ln = ln.strip()\n",
834
+ " if ln:\n",
835
+ " items.append(ln)\n",
836
+ " seen=set()\n",
837
+ " out=[]\n",
838
+ " for x in items:\n",
839
+ " k=norm_text(x)\n",
840
+ " if k and k not in seen:\n",
841
+ " seen.add(k); out.append(x)\n",
842
+ " return out\n",
843
+ "\n",
844
+ "def run_batch(text_blob: str, file_obj: Any, include_antennas: bool):\n",
845
+ " inputs = parse_batch_inputs(text_blob, file_obj)\n",
846
+ " if not inputs:\n",
847
+ " return \"\", None, None, \"\"\n",
848
+ "\n",
849
+ " rows=[]\n",
850
+ " for item in inputs:\n",
851
+ " res = resolve_device(item)\n",
852
+ " if res.get(\"mode\") != \"ok\":\n",
853
+ " rows.append({\"Input\": item, \"Matched\":\"\", \"Status\":\"Needs review\", \"EOS\":\"\", \"EOL\":\"\", \"4G alternative\":\"\", \"5G replacement\":\"\", \"Notes\":\"Not found/ambiguous\"})\n",
854
+ " continue\n",
855
+ "\n",
856
+ " life_row = df_eos.iloc[int(res[\"row_idx\"])]\n",
857
+ " eos, eol, status = row_to_dates_and_status(life_row)\n",
858
+ " repl = pick_replacements_lifecycle(life_row, status, use_gpt=False)\n",
859
+ "\n",
860
+ " rows.append({\n",
861
+ " \"Input\": item,\n",
862
+ " \"Matched\": str(life_row.get(\"sku\",\"\")),\n",
863
+ " \"Status\": status,\n",
864
+ " \"EOS\": eos,\n",
865
+ " \"EOL\": eol,\n",
866
+ " \"4G alternative\": repl.get(\"repl_4g\",\"\"),\n",
867
+ " \"5G replacement\": repl.get(\"repl_5g\",\"\"),\n",
868
+ " \"Notes\": \"\",\n",
869
+ " })\n",
870
+ "\n",
871
+ " out_df = pd.DataFrame(rows)\n",
872
+ " counts = out_df[\"Status\"].value_counts(dropna=False).to_dict()\n",
873
+ " top_5g = out_df[\"5G replacement\"].value_counts(dropna=False).head(5).to_dict()\n",
874
+ " summary = f\"Rows: {len(out_df)} | \" + \" | \".join([f\"{k}: {v}\" for k,v in counts.items()])\n",
875
+ " rollup = \"Top 5G recommendations:\\n\" + \"\\n\".join([f\"- {k}: {v}\" for k,v in top_5g.items() if str(k).strip()])\n",
876
+ "\n",
877
+ " tmp = tempfile.NamedTemporaryFile(delete=False, suffix=\".csv\")\n",
878
+ " out_df.to_csv(tmp.name, index=False)\n",
879
+ "\n",
880
+ " return summary, out_df, tmp.name, rollup\n",
881
+ "\n",
882
+ "\n",
883
+ "# ============================\n",
884
+ "# Replacement feature table + manufacturer link (5G device)\n",
885
+ "# ============================\n",
886
+ "\n",
887
+ "FEATURE_COLS = [\"Device\", \"Modem technology\", \"WiFi\", \"Ports\", \"Antennas\", \"Ruggedness\", \"Use case\"]\n",
888
+ "\n",
889
+ "# Manufacturer domains used for best-effort link resolution (no non-maker domains).\n",
890
+ "MAKER_DOMAINS = {\n",
891
+ " \"CRADLEPOINT\": [\"cradlepoint.com\", \"ericsson.com\"],\n",
892
+ " \"SIERRA\": [\"semtech.com\", \"airlink.com\"],\n",
893
+ " \"FEENEY\": [\"inseego.com\"],\n",
894
+ " \"DIGI\": [\"digi.com\"],\n",
895
+ " \"CISCO_MERAKI\": [\"meraki.cisco.com\", \"cisco.com\"],\n",
896
+ " \"CISCO\": [\"cisco.com\"],\n",
897
+ " \"TELTONIKA\": [\"teltonika-networks.com\"],\n",
898
+ " \"UNKNOWN\": [],\n",
899
+ "}\n",
900
+ "\n",
901
+ "HTTP_HEADERS = {\n",
902
+ " \"User-Agent\": \"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 \"\n",
903
+ " \"(KHTML, like Gecko) Chrome/120.0 Safari/537.36\"\n",
904
+ "}\n",
905
+ "HTTP_TIMEOUT = 12\n",
906
+ "\n",
907
+ "def _best_effort_manufacturer_url(model: str, canon_make: str) -> str:\n",
908
+ " \\\"\\\"\\\"Try to find a manufacturer page or datasheet link using simple on-domain searches.\n",
909
+ " If we can't confirm a page, return the manufacturer homepage for the maker family.\n",
910
+ " \\\"\\\"\\\"\n",
911
+ " model = str(model or \"\").strip()\n",
912
+ " if not model or model in {\"Not listed\", \"Not applicable\"}:\n",
913
+ " return \"\"\n",
914
+ "\n",
915
+ " domains = MAKER_DOMAINS.get(canon_make, []) or []\n",
916
+ " if not domains:\n",
917
+ " return \"\"\n",
918
+ "\n",
919
+ " # Candidate on-domain search URLs (common patterns across sites).\n",
920
+ " # We keep these on the manufacturer domain (no Google/Bing).\n",
921
+ " q = re.sub(r\"\\s+\", \"+\", model)\n",
922
+ " url_candidates = []\n",
923
+ " for d in domains:\n",
924
+ " url_candidates += [\n",
925
+ " f\"https://{d}/search?q={q}\",\n",
926
+ " f\"https://{d}/search?query={q}\",\n",
927
+ " f\"https://{d}/?s={q}\",\n",
928
+ " f\"https://www.{d}/search?q={q}\",\n",
929
+ " f\"https://www.{d}/search?query={q}\",\n",
930
+ " f\"https://www.{d}/?s={q}\",\n",
931
+ " ]\n",
932
+ "\n",
933
+ " # Also try a few direct product patterns for known makers (best effort).\n",
934
+ " if canon_make == \"TELTONIKA\":\n",
935
+ " slug = model.lower()\n",
936
+ " url_candidates += [\n",
937
+ " f\"https://teltonika-networks.com/products/routers/{slug}\",\n",
938
+ " f\"https://teltonika-networks.com/product/{slug}\",\n",
939
+ " \"https://teltonika-networks.com/products/routers/\",\n",
940
+ " ]\n",
941
+ " if canon_make == \"DIGI\":\n",
942
+ " url_candidates += [\n",
943
+ " \"https://www.digi.com/products/networking/cellular-routers\",\n",
944
+ " f\"https://www.digi.com/search?q={q}\",\n",
945
+ " ]\n",
946
+ " if canon_make == \"CRADLEPOINT\":\n",
947
+ " url_candidates += [\n",
948
+ " \"https://cradlepoint.com/products/\",\n",
949
+ " f\"https://cradlepoint.com/?s={q}\",\n",
950
+ " ]\n",
951
+ " if canon_make in {\"CISCO\", \"CISCO_MERAKI\"}:\n",
952
+ " url_candidates += [\n",
953
+ " f\"https://www.cisco.com/c/en/us/search.html?q={q}\",\n",
954
+ " ]\n",
955
+ "\n",
956
+ " # Try to confirm a working page (HTTP 200 and model string somewhere in HTML).\n",
957
+ " for u in url_candidates[:18]:\n",
958
+ " try:\n",
959
+ " import requests\n",
960
+ " r = requests.get(u, headers=HTTP_HEADERS, timeout=HTTP_TIMEOUT, allow_redirects=True)\n",
961
+ " if r.status_code != 200:\n",
962
+ " continue\n",
963
+ " html = (r.text or \"\").lower()\n",
964
+ " if model.lower() in html or \"datasheet\" in html or \"data sheet\" in html:\n",
965
+ " return r.url\n",
966
+ " except Exception:\n",
967
+ " continue\n",
968
+ "\n",
969
+ " # Fallback: maker homepage\n",
970
+ " d0 = domains[0]\n",
971
+ " return f\"https://{d0}\"\n",
972
+ "\n",
973
+ "def _features_from_dec(model: str, canon_make: str) -> Dict[str, str]:\n",
974
+ " \\\"\\\"\\\"Lookup a router model in dec2025routers.csv and return the key feature fields.\\\"\\\"\\\"\n",
975
+ " if not model or model in {\"Not listed\", \"Not applicable\"}:\n",
976
+ " return {k: \"Not listed\" for k in FEATURE_COLS[1:]}\n",
977
+ "\n",
978
+ " pool = df_dec[df_dec[\"_canon_make\"] == canon_make].copy()\n",
979
+ " if pool.empty:\n",
980
+ " return {k: \"Not listed\" for k in FEATURE_COLS[1:]}\n",
981
+ "\n",
982
+ " hit = process.extractOne(norm_text(model), pool[\"_norm_model\"].tolist(), scorer=fuzz.WRatio)\n",
983
+ " if not hit or hit[1] < MATCH_OK:\n",
984
+ " return {k: \"Not listed\" for k in FEATURE_COLS[1:]}\n",
985
+ "\n",
986
+ " r = pool.iloc[int(hit[2])]\n",
987
+ " ports = f\"WAN: {r.get('WAN ports and speed','')} | LAN: {r.get('LAN ports and speed','')}\"\n",
988
+ " return {\n",
989
+ " \"Modem technology\": str(r.get(\"Modem Type\",\"\")) or \"Not listed\",\n",
990
+ " \"WiFi\": str(r.get(\"WiFi type\",\"\")) or \"Not listed\",\n",
991
+ " \"Ports\": ports.strip() if ports.strip() else \"Not listed\",\n",
992
+ " \"Antennas\": str(r.get(\"Antennas (internal/external/both)\",\"\")) or \"Not listed\",\n",
993
+ " \"Ruggedness\": str(r.get(\"Ruggedization\",\"\")) or \"Not listed\",\n",
994
+ " \"Use case\": str(r.get(\"Primary use case\",\"\")) or \"Not listed\",\n",
995
+ " }\n",
996
+ "\n",
997
+ "def _gpt_fill_feature_row(device_label: str, model: str, canon_make: str, row: Dict[str, str]) -> Dict[str, str]:\n",
998
+ " \\\"\\\"\\\"If dec can't supply values, ask GPT to fill missing ones (best guess).\\\"\\\"\\\"\n",
999
+ " if client is None:\n",
1000
+ " return row\n",
1001
+ "\n",
1002
+ " missing = [k for k,v in row.items() if (not v) or str(v).strip().lower() in {\"not listed\",\"nan\",\"\"}]\n",
1003
+ " if not missing:\n",
1004
+ " return row\n",
1005
+ "\n",
1006
+ " sys = \"Fill missing router feature fields for a Verizon rep. Return strict JSON only.\"\n",
1007
+ " payload = {\n",
1008
+ " \"device_label\": device_label,\n",
1009
+ " \"model\": model,\n",
1010
+ " \"maker_family\": canon_make,\n",
1011
+ " \"known\": row,\n",
1012
+ " \"fill_only\": missing,\n",
1013
+ " \"rules\": [\n",
1014
+ " \"Fill only the requested fields.\",\n",
1015
+ " \"Best guess if needed. Short phrases only.\",\n",
1016
+ " \"Return JSON only.\"\n",
1017
+ " ],\n",
1018
+ " \"output_schema\": {k: \"string\" for k in missing}\n",
1019
+ " }\n",
1020
+ " out = gpt_json(sys, payload, max_tokens=260) or {}\n",
1021
+ " for k in missing:\n",
1022
+ " val = str(out.get(k, \"\") or \"\").strip()\n",
1023
+ " if val:\n",
1024
+ " row[k] = val\n",
1025
+ " return row\n",
1026
+ "\n",
1027
+ "def build_replacement_features_table(repl_4g: str, repl_5g: str, canon_make: str) -> pd.DataFrame:\n",
1028
+ " rows = []\n",
1029
+ "\n",
1030
+ " # 4G\n",
1031
+ " row4 = _features_from_dec(repl_4g, canon_make)\n",
1032
+ " row4 = _gpt_fill_feature_row(\"4G alternative\", repl_4g, canon_make, row4)\n",
1033
+ " rows.append({\"Device\": \"4G alternative\", **row4})\n",
1034
+ "\n",
1035
+ " # 5G\n",
1036
+ " row5 = _features_from_dec(repl_5g, canon_make)\n",
1037
+ " row5 = _gpt_fill_feature_row(\"5G replacement\", repl_5g, canon_make, row5)\n",
1038
+ " rows.append({\"Device\": \"5G replacement\", **row5})\n",
1039
+ "\n",
1040
+ " df = pd.DataFrame(rows, columns=FEATURE_COLS)\n",
1041
+ " return df\n",
1042
+ "\n",
1043
+ "# ============================\n",
1044
+ "# Output\n",
1045
+ "# ============================\n",
1046
+ "def assemble_output(life_row: pd.Series, status: str, eos: str, eol: str, repl: Dict[str,Any], ant: Dict[str,Any]) -> str:\n",
1047
+ " current_name = f\"{life_row.get('sku','')} — {life_row.get('description','')}\".strip(\" —\")\n",
1048
+ " st = ant.get(\"stationary_omni\", {})\n",
1049
+ " vh = ant.get(\"vehicle_omni\", {})\n",
1050
+ "\n",
1051
+ " lines = []\n",
1052
+ " lines.append(f\"1. Current device: **{current_name}**\")\n",
1053
+ " lines.append(f\"2. Status: **{status}**\")\n",
1054
+ " lines.append(f\"3. End of Sale date: **{eos}**\")\n",
1055
+ " lines.append(f\"4. End of Life date: **{eol}**\")\n",
1056
+ " lines.append(f\"5. 4G alternative (lifecycle): **{repl.get('repl_4g','Not applicable')}**\")\n",
1057
+ " lines.append(f\"6. 5G replacement (lifecycle): **{repl.get('repl_5g','Not listed')}**\")\n",
1058
+ " lines.append(\"7. Antenna options (Parsec-only):\")\n",
1059
+ " conn_s = f\" | Conn: {st.get('connectors','')}\" if st.get(\"connectors\") else \"\"\n",
1060
+ " conn_v = f\" | Conn: {vh.get('connectors','')}\" if vh.get(\"connectors\") else \"\"\n",
1061
+ " lines.append(f\" - Stationary (Omni): **{st.get('name','')}** (Part #: {st.get('part_number','')}) — {st.get('description','')} — MIMO: {st.get('mimo','')}{conn_s}\")\n",
1062
+ " lines.append(f\" - Vehicle (Omni): **{vh.get('name','')}** (Part #: {vh.get('part_number','')}) — {vh.get('description','')} — MIMO: {vh.get('mimo','')}{conn_v}\")\n",
1063
+ "\n",
1064
+ " lines.append(\"\\nSources (debug):\")\n",
1065
+ " for s in repl.get(\"sources\", []) if isinstance(repl.get(\"sources\"), list) else []:\n",
1066
+ " lines.append(f\"- {s}\")\n",
1067
+ " lines.append(\"- ParsecCatalog.pdf (local RAG)\")\n",
1068
+ " lines.append(\"- routers_eos_eol_by_sku.csv (replacements)\")\n",
1069
+ " return \"\\n\".join(lines)\n",
1070
+ "\n",
1071
+ "\n",
1072
+ "# ============================\n",
1073
+ "# Gradio callbacks\n",
1074
+ "# IMPORTANT: no dict state and ALL events have api_name=False (prevents api_info schema generation)\n",
1075
+ "# ============================\n",
1076
+ "def run_lookup(user_text: str, st_json: str):\n",
1077
+ " user_text = str(user_text or \"\").strip()\n",
1078
+ " if not user_text:\n",
1079
+ " return \"Enter a router SKU/model.\", \"\", None, gr.update(visible=False), gr.update(visible=False), \"{}\", \"\"\n",
1080
+ "\n",
1081
+ " res = resolve_device(user_text)\n",
1082
+ "\n",
1083
+ " if res.get(\"mode\") == \"pick\":\n",
1084
+ " opts = res.get(\"options\", [])\n",
1085
+ " choices = [o[\"label\"] for o in opts]\n",
1086
+ " st2 = {\"mode\":\"pick\",\"options\": opts, \"raw\": user_text}\n",
1087
+ " return \"Did you mean A or B? Pick one, then click Use selection.\", \"\", None, gr.update(choices=choices, value=None, visible=True), gr.update(visible=True), state_dump(st2), \"\"\n",
1088
+ "\n",
1089
+ " if res.get(\"mode\") != \"ok\":\n",
1090
+ " return \"Not found.\", \"\", None, gr.update(visible=False), gr.update(visible=False), \"{}\", \"\"\n",
1091
+ "\n",
1092
+ " life_row = df_eos.iloc[int(res[\"row_idx\"])]\n",
1093
+ " eos, eol, status = row_to_dates_and_status(life_row)\n",
1094
+ "\n",
1095
+ " repl = pick_replacements_lifecycle(life_row, status, use_gpt=True)\n",
1096
+ " canon_make = str(life_row.get(\"_canon_make\",\"UNKNOWN\"))\n",
1097
+ " mimo = infer_mimo_for_5g(repl.get(\"repl_5g\",\"\"))\n",
1098
+ " tech = \"5G\" if repl.get(\"repl_5g\") and repl.get(\"repl_5g\") != \"Not listed\" else (\"4G\" if device_is_4g(life_row) else \"Unknown\")\n",
1099
+ " ant = antenna_options_for(repl.get(\"repl_5g\") or str(life_row.get(\"sku\",\"\")), tech, mimo)\n",
1100
+ "\n",
1101
+ " output = assemble_output(life_row, status, eos, eol, repl, ant)\n",
1102
+ " st_out = {\"row_idx\": int(res[\"row_idx\"]), \"repl\": repl, \"ant\": ant, \"raw\": user_text}\n",
1103
+ " url5 = _best_effort_manufacturer_url(repl.get('repl_5g',''), canon_make)\n",
1104
+ " link = f\"**5G manufacturer page (best effort):** {url5}\" if url5 else \"\"\n",
1105
+ " feat_df = build_replacement_features_table(repl.get('repl_4g',''), repl.get('repl_5g',''), canon_make)\n",
1106
+ " return output, link, feat_df, gr.update(visible=False), gr.update(visible=False), state_dump(st_out), \"\"\n",
1107
+ "\n",
1108
+ "def use_selection(selected_label: str, st_json: str):\n",
1109
+ " st = state_load(st_json)\n",
1110
+ " if not st or st.get(\"mode\") != \"pick\":\n",
1111
+ " return \"Run a search first.\", \"\", None, gr.update(visible=False), gr.update(visible=False), \"{}\", \"\"\n",
1112
+ "\n",
1113
+ " if not selected_label:\n",
1114
+ " return \"Pick A or B first.\", \"\", None, gr.update(visible=True), gr.update(visible=True), st_json, \"\"\n",
1115
+ "\n",
1116
+ " chosen_row = None\n",
1117
+ " for o in st.get(\"options\", []):\n",
1118
+ " if o.get(\"label\") == selected_label:\n",
1119
+ " chosen_row = int(o[\"row_idx\"])\n",
1120
+ " break\n",
1121
+ " if chosen_row is None:\n",
1122
+ " return \"Pick a valid option.\", \"\", None, gr.update(visible=True), gr.update(visible=True), st_json, \"\"\n",
1123
+ "\n",
1124
+ " life_row = df_eos.iloc[int(chosen_row)]\n",
1125
+ " eos, eol, status = row_to_dates_and_status(life_row)\n",
1126
+ "\n",
1127
+ " repl = pick_replacements_lifecycle(life_row, status, use_gpt=True)\n",
1128
+ " canon_make = str(life_row.get(\"_canon_make\",\"UNKNOWN\"))\n",
1129
+ " mimo = infer_mimo_for_5g(repl.get(\"repl_5g\",\"\"))\n",
1130
+ " tech = \"5G\" if repl.get(\"repl_5g\") and repl.get(\"repl_5g\") != \"Not listed\" else (\"4G\" if device_is_4g(life_row) else \"Unknown\")\n",
1131
+ " ant = antenna_options_for(repl.get(\"repl_5g\") or str(life_row.get(\"sku\",\"\")), tech, mimo)\n",
1132
+ "\n",
1133
+ " output = assemble_output(life_row, status, eos, eol, repl, ant)\n",
1134
+ " st_out = {\"row_idx\": int(chosen_row), \"repl\": repl, \"ant\": ant, \"raw\": st.get(\"raw\",\"\")}\n",
1135
+ " url5 = _best_effort_manufacturer_url(repl.get('repl_5g',''), canon_make)\n",
1136
+ " link = f\"**5G manufacturer page (best effort):** {url5}\" if url5 else \"\"\n",
1137
+ " feat_df = build_replacement_features_table(repl.get('repl_4g',''), repl.get('repl_5g',''), canon_make)\n",
1138
+ " return output, link, feat_df, gr.update(visible=False), gr.update(visible=False), state_dump(st_out), \"\"\n",
1139
+ "\n",
1140
+ "def make_install_ready(st_json: str):\n",
1141
+ " st = state_load(st_json)\n",
1142
+ " if not st or \"row_idx\" not in st:\n",
1143
+ " return \"Run a lookup first.\"\n",
1144
+ " life_row = df_eos.iloc[int(st[\"row_idx\"])]\n",
1145
+ " current_sku = str(life_row.get(\"sku\",\"\") or \"\")\n",
1146
+ " return install_ready_checklist(current_sku, st.get(\"repl\", {}) or {}, st.get(\"ant\", {}) or {})\n",
1147
+ "\n",
1148
+ "\n",
1149
+ "# ============================\n",
1150
+ "# UI\n",
1151
+ "# ============================\n",
1152
+ "with gr.Blocks(title=\"Only-Routers\") as demo:\n",
1153
+ " gr.Markdown(\"## Only-Routers\\nSingle lookup + Batch upload for Verizon reps.\")\n",
1154
+ "\n",
1155
+ " with gr.Tabs():\n",
1156
+ " with gr.Tab(\"Single\"):\n",
1157
+ " user_text = gr.Textbox(label=\"Router SKU or model\", placeholder=\"Examples: IBR650B, AER1600, ES450, WR21, RUT240\", lines=1)\n",
1158
+ " st = gr.State(\"{}\") # JSON string\n",
1159
+ "\n",
1160
+ " check_btn = gr.Button(\"Check\", variant=\"primary\")\n",
1161
+ " pick_dd = gr.Dropdown(label=\"Pick A or B\", choices=[], visible=False)\n",
1162
+ " use_btn = gr.Button(\"Use selection\", visible=False)\n",
1163
+ "\n",
1164
+ " output_md = gr.Markdown()\n",
1165
+ "\n",
1166
+ " link_md = gr.Markdown()\n",
1167
+ " features_df = gr.Dataframe(headers=FEATURE_COLS, interactive=False, wrap=True)\n",
1168
+ "\n",
1169
+ "\n",
1170
+ " install_btn = gr.Button(\"Make install-ready checklist\")\n",
1171
+ " install_md = gr.Markdown()\n",
1172
+ "\n",
1173
+ " check_btn.click(fn=run_lookup, inputs=[user_text, st], outputs=[output_md, link_md, features_df, pick_dd, use_btn, st, install_md], api_name=False)\n",
1174
+ " use_btn.click(fn=use_selection, inputs=[pick_dd, st], outputs=[output_md, link_md, features_df, pick_dd, use_btn, st, install_md], api_name=False)\n",
1175
+ " install_btn.click(fn=make_install_ready, inputs=[st], outputs=[install_md], api_name=False)\n",
1176
+ "\n",
1177
+ " with gr.Tab(\"Batch\"):\n",
1178
+ " gr.Markdown(\"Paste one per line or upload a CSV (first column). Batch runs fast (no GPT).\")\n",
1179
+ " batch_text = gr.Textbox(label=\"Paste devices (one per line)\", lines=8, placeholder=\"WR21\\nRUT240\\nIBR650B\")\n",
1180
+ " batch_file = gr.File(label=\"Upload CSV\", file_types=[\".csv\"])\n",
1181
+ " include_ant = gr.Checkbox(label=\"Include antenna picks (slower)\", value=False)\n",
1182
+ " run_btn = gr.Button(\"Run batch\", variant=\"primary\")\n",
1183
+ "\n",
1184
+ " summary_md = gr.Markdown()\n",
1185
+ " rollup_md = gr.Markdown()\n",
1186
+ " table = gr.Dataframe(interactive=False, wrap=True)\n",
1187
+ " dl = gr.File(label=\"Download results CSV\")\n",
1188
+ "\n",
1189
+ " run_btn.click(fn=run_batch, inputs=[batch_text, batch_file, include_ant], outputs=[summary_md, table, dl, rollup_md], api_name=False)\n",
1190
+ "\n",
1191
+ "# IMPORTANT: On Spaces, demo.launch() is correct; do NOT use share=True.\n",
1192
+ "demo.launch(show_api=False)\n"
1193
+ ]
1194
+ }
1195
+ ],
1196
+ "metadata": {
1197
+ "kernelspec": {
1198
+ "display_name": "Python 3",
1199
+ "name": "python3"
1200
+ },
1201
+ "language_info": {
1202
+ "name": "python"
1203
+ }
1204
+ },
1205
+ "nbformat": 4,
1206
+ "nbformat_minor": 5
1207
+ }
Updates/only-routers_ai_poc_hf_fixed_v8.ipynb ADDED
@@ -0,0 +1,1288 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "d439c9b3",
6
+ "metadata": {},
7
+ "source": [
8
+ "# Only-Routers (HF fixed v8)\n",
9
+ "\n",
10
+ "Adds post-recommendation Q&A box powered by GPT.\n"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": null,
16
+ "id": "c68bc169",
17
+ "metadata": {},
18
+ "outputs": [],
19
+ "source": [
20
+ "import os\n",
21
+ "import re\n",
22
+ "import json\n",
23
+ "import math\n",
24
+ "import hashlib\n",
25
+ "import tempfile\n",
26
+ "from dataclasses import dataclass\n",
27
+ "from datetime import datetime, date\n",
28
+ "from typing import Any, Dict, List, Optional, Tuple\n",
29
+ "\n",
30
+ "import numpy as np\n",
31
+ "import pandas as pd\n",
32
+ "\n",
33
+ "import fitz # PyMuPDF\n",
34
+ "import faiss\n",
35
+ "from sentence_transformers import SentenceTransformer\n",
36
+ "from rapidfuzz import fuzz, process\n",
37
+ "\n",
38
+ "import gradio as gr\n",
39
+ "from openai import OpenAI\n",
40
+ "\n",
41
+ "\n",
42
+ "# ============================\n",
43
+ "# Settings\n",
44
+ "# ============================\n",
45
+ "TODAY = date(2026, 1, 18)\n",
46
+ "OPENAI_MODEL = \"gpt-5.2\"\n",
47
+ "OPENAI_REASONING = {\"effort\": \"high\"}\n",
48
+ "MATCH_OK = 80\n",
49
+ "\n",
50
+ "EMBED_MODEL_NAME = \"sentence-transformers/all-MiniLM-L6-v2\"\n",
51
+ "PARSEC_CONTEXT_BEFORE = 900\n",
52
+ "PARSEC_CONTEXT_AFTER = 1600\n",
53
+ "\n",
54
+ "\n",
55
+ "# ============================\n",
56
+ "# OpenAI client (HF Space secret: OPENAI_API_KEY)\n",
57
+ "# ============================\n",
58
+ "API_KEY = os.getenv(\"OPENAI_API_KEY\", \"\").strip()\n",
59
+ "client = OpenAI(api_key=API_KEY) if API_KEY else None\n",
60
+ "\n",
61
+ "# ----------------------------\n",
62
+ "# Gradio state helpers\n",
63
+ "# Keep state as a JSON STRING to avoid schema issues on Hugging Face.\n",
64
+ "# ----------------------------\n",
65
+ "def state_load(st_json: str) -> Dict[str, Any]:\n",
66
+ " try:\n",
67
+ " if not st_json:\n",
68
+ " return {}\n",
69
+ " return json.loads(st_json) if isinstance(st_json, str) else {}\n",
70
+ " except Exception:\n",
71
+ " return {}\n",
72
+ "\n",
73
+ "def state_dump(st: Dict[str, Any]) -> str:\n",
74
+ " try:\n",
75
+ " return json.dumps(st or {}, ensure_ascii=False)\n",
76
+ " except Exception:\n",
77
+ " return \"{}\"\n",
78
+ "\n",
79
+ "\n",
80
+ "\n",
81
+ "# ============================\n",
82
+ "# Helpers\n",
83
+ "# ============================\n",
84
+ "def norm_text(s: Any) -> str:\n",
85
+ " try:\n",
86
+ " if s is None or (isinstance(s, float) and math.isnan(s)) or pd.isna(s):\n",
87
+ " return \"\"\n",
88
+ " except Exception:\n",
89
+ " pass\n",
90
+ " s = str(s).strip().lower()\n",
91
+ " s = re.sub(r\"[^a-z0-9\\s\\-\\/]\", \" \", s)\n",
92
+ " s = re.sub(r\"\\s+\", \" \", s).strip()\n",
93
+ " return s\n",
94
+ "\n",
95
+ "def safe_str(v: Any) -> str:\n",
96
+ " if v is None or (isinstance(v, float) and pd.isna(v)) or pd.isna(v):\n",
97
+ " return \"\"\n",
98
+ " return str(v).strip()\n",
99
+ "\n",
100
+ "def is_5g(modem_type: Any) -> bool:\n",
101
+ " s = norm_text(modem_type)\n",
102
+ " return (\"5g\" in s) or (\"nr\" in s)\n",
103
+ "\n",
104
+ "def json_load_safe(s: str) -> Dict[str, Any]:\n",
105
+ " try:\n",
106
+ " return json.loads(s)\n",
107
+ " except Exception:\n",
108
+ " return {}\n",
109
+ "\n",
110
+ "def gpt_json(system: str, payload: Dict[str, Any], max_tokens: int = 600) -> Dict[str, Any]:\n",
111
+ " if client is None:\n",
112
+ " return {}\n",
113
+ " resp = client.responses.create(\n",
114
+ " model=OPENAI_MODEL,\n",
115
+ " reasoning=OPENAI_REASONING,\n",
116
+ " input=[{\"role\":\"system\",\"content\":system},{\"role\":\"user\",\"content\":json.dumps(payload)}],\n",
117
+ " max_output_tokens=max_tokens,\n",
118
+ " )\n",
119
+ " return json_load_safe(getattr(resp, \"output_text\", \"\") or \"\")\n",
120
+ "\n",
121
+ "\n",
122
+ "def gpt_answer_md(system: str, user: str, max_tokens: int = 650) -> str:\n",
123
+ " \"\"\"Return a rep-friendly markdown answer.\"\"\"\n",
124
+ " if client is None:\n",
125
+ " return \"No API key is configured, so I can't answer detailed questions right now.\"\n",
126
+ " resp = client.responses.create(\n",
127
+ " model=OPENAI_MODEL,\n",
128
+ " reasoning=OPENAI_REASONING,\n",
129
+ " input=[\n",
130
+ " {\"role\": \"system\", \"content\": system},\n",
131
+ " {\"role\": \"user\", \"content\": user},\n",
132
+ " ],\n",
133
+ " max_output_tokens=max_tokens,\n",
134
+ " )\n",
135
+ " return (getattr(resp, \"output_text\", \"\") or \"\").strip()\n",
136
+ "\n",
137
+ "\n",
138
+ "# ============================\n",
139
+ "# Load data\n",
140
+ "# ============================\n",
141
+ "EOS_PATH = \"routers_eos_eol_by_sku.csv\"\n",
142
+ "DEC_PATH = \"dec2025routers.csv\"\n",
143
+ "PARSEC_PDF = \"ParsecCatalog.pdf\"\n",
144
+ "\n",
145
+ "if not os.path.exists(EOS_PATH):\n",
146
+ " raise FileNotFoundError(f\"Missing {EOS_PATH} in repo.\")\n",
147
+ "if not os.path.exists(DEC_PATH):\n",
148
+ " raise FileNotFoundError(f\"Missing {DEC_PATH} in repo.\")\n",
149
+ "if not os.path.exists(PARSEC_PDF):\n",
150
+ " raise FileNotFoundError(f\"Missing {PARSEC_PDF} in repo.\")\n",
151
+ "\n",
152
+ "df_eos = pd.read_csv(EOS_PATH).copy()\n",
153
+ "df_dec = pd.read_csv(DEC_PATH).copy()\n",
154
+ "\n",
155
+ "\n",
156
+ "def _canonize_eos_columns(df: pd.DataFrame) -> pd.DataFrame:\n",
157
+ " \"\"\"Normalize lifecycle CSV column names (case-insensitive) and create expected columns.\"\"\"\n",
158
+ " # Map various header spellings to canonical names used by the app\n",
159
+ " mapping = {}\n",
160
+ " for c in df.columns:\n",
161
+ " k = str(c).strip().lower().replace(\" \", \"_\")\n",
162
+ " if k in {\"sku\", \"model\", \"device\", \"device_sku\"}:\n",
163
+ " mapping[c] = \"sku\"\n",
164
+ " elif k in {\"manufacturer\", \"make\", \"vendor\"}:\n",
165
+ " mapping[c] = \"manufacturer\"\n",
166
+ " elif k in {\"device_type\", \"type\"}:\n",
167
+ " mapping[c] = \"device_type\"\n",
168
+ " elif k in {\"end_of_sale\", \"eos\", \"end_sale\", \"end_of_sales\"}:\n",
169
+ " mapping[c] = \"end_of_sale\"\n",
170
+ " elif k in {\"end_of_life\", \"eol\", \"end_life\"}:\n",
171
+ " mapping[c] = \"end_of_life\"\n",
172
+ " elif k in {\"suggested_replacement\", \"replacement_4g\", \"lte_replacement\", \"replacement_lte\", \"replacement\"}:\n",
173
+ " mapping[c] = \"suggested_replacement\"\n",
174
+ " elif k in {\"advanced_5g_option\", \"replacement_5g\", \"fiveg_replacement\", \"5g_replacement\", \"upgrade_5g\"}:\n",
175
+ " mapping[c] = \"advanced_5g_option\"\n",
176
+ " elif k in {\"region\", \"market\"}:\n",
177
+ " mapping[c] = \"region\"\n",
178
+ " elif k in {\"notes\", \"note\"}:\n",
179
+ " mapping[c] = \"notes\"\n",
180
+ " elif k in {\"description\", \"device_description\", \"name\"}:\n",
181
+ " mapping[c] = \"description\"\n",
182
+ "\n",
183
+ " df = df.rename(columns=mapping).copy()\n",
184
+ "\n",
185
+ " # Create expected columns if missing\n",
186
+ " if \"sku\" not in df.columns:\n",
187
+ " # Try the common capitalized header as a fallback\n",
188
+ " if \"SKU\" in df.columns:\n",
189
+ " df[\"sku\"] = df[\"SKU\"].astype(str)\n",
190
+ " else:\n",
191
+ " df[\"sku\"] = \"\"\n",
192
+ "\n",
193
+ " if \"manufacturer\" not in df.columns:\n",
194
+ " df[\"manufacturer\"] = \"\"\n",
195
+ "\n",
196
+ " if \"device_type\" not in df.columns:\n",
197
+ " df[\"device_type\"] = \"\"\n",
198
+ "\n",
199
+ " if \"description\" not in df.columns:\n",
200
+ " # If the simplified file removed description, use SKU as description (still searchable)\n",
201
+ " df[\"description\"] = df[\"sku\"].astype(str)\n",
202
+ "\n",
203
+ " if \"notes\" not in df.columns:\n",
204
+ " df[\"notes\"] = \"\"\n",
205
+ "\n",
206
+ " if \"region\" not in df.columns:\n",
207
+ " df[\"region\"] = \"\"\n",
208
+ "\n",
209
+ " if \"suggested_replacement\" not in df.columns:\n",
210
+ " df[\"suggested_replacement\"] = \"\"\n",
211
+ "\n",
212
+ " if \"advanced_5g_option\" not in df.columns:\n",
213
+ " df[\"advanced_5g_option\"] = \"\"\n",
214
+ "\n",
215
+ " if \"end_of_sale\" not in df.columns:\n",
216
+ " df[\"end_of_sale\"] = \"\"\n",
217
+ "\n",
218
+ " if \"end_of_life\" not in df.columns:\n",
219
+ " df[\"end_of_life\"] = \"\"\n",
220
+ "\n",
221
+ " return df\n",
222
+ "\n",
223
+ "df_eos = _canonize_eos_columns(df_eos)\n",
224
+ "\n",
225
+ "\n",
226
+ "def region_ok(x: Any) -> bool:\n",
227
+ " s = str(x or \"\").strip().lower()\n",
228
+ " if not s:\n",
229
+ " return True\n",
230
+ " if \"not specified\" in s:\n",
231
+ " return True\n",
232
+ " if \"north america\" in s:\n",
233
+ " return True\n",
234
+ " if re.search(r\"\\busa\\b\", s):\n",
235
+ " return True\n",
236
+ " if re.search(r\"\\bunited\\s+states\\b\", s):\n",
237
+ " return True\n",
238
+ " if re.search(r\"\\bu\\.?s\\.?\\b\", s):\n",
239
+ " return True\n",
240
+ " return False\n",
241
+ "\n",
242
+ "if \"region\" in df_eos.columns:\n",
243
+ " df_eos = df_eos[df_eos[\"region\"].apply(region_ok)].reset_index(drop=True)\n",
244
+ "\n",
245
+ "# Maker mapping (includes Teltonika)\n",
246
+ "CANON_MAKER = {\n",
247
+ " \"CRADLEPOINT\": {\"cradlepoint\", \"ericsson\", \"ericsson enterprise wireless\"},\n",
248
+ " \"SIERRA\": {\"sierra\", \"sierra wireless\", \"semtech\", \"airlink\"},\n",
249
+ " \"FEENEY\": {\"feeney\", \"feeney wireless\", \"inseego\"},\n",
250
+ " \"DIGI\": {\"digi\", \"accelerated\", \"accelerated concepts\"},\n",
251
+ " \"CISCO_MERAKI\": {\"meraki\", \"cisco meraki\"},\n",
252
+ " \"CISCO\": {\"cisco\"},\n",
253
+ " \"TELTONIKA\": {\"teltonika\"},\n",
254
+ "}\n",
255
+ "\n",
256
+ "def canon_maker_from_text(s: Any) -> str:\n",
257
+ " t = norm_text(s)\n",
258
+ " for canon, terms in CANON_MAKER.items():\n",
259
+ " for term in terms:\n",
260
+ " if term in t:\n",
261
+ " return canon\n",
262
+ " return \"UNKNOWN\"\n",
263
+ "\n",
264
+ "df_eos[\"_canon_make\"] = df_eos[\"manufacturer\"].apply(canon_maker_from_text) if \"manufacturer\" in df_eos.columns else \"UNKNOWN\"\n",
265
+ "df_eos[\"_norm_sku\"] = df_eos[\"sku\"].apply(norm_text) if \"sku\" in df_eos.columns else \"\"\n",
266
+ "df_eos[\"_norm_desc\"] = df_eos[\"description\"].apply(norm_text) if \"description\" in df_eos.columns else \"\"\n",
267
+ "df_eos[\"_norm_notes\"] = df_eos[\"notes\"].apply(norm_text) if \"notes\" in df_eos.columns else \"\"\n",
268
+ "\n",
269
+ "df_dec[\"_canon_make\"] = df_dec[\"Make\"].apply(canon_maker_from_text) if \"Make\" in df_dec.columns else \"UNKNOWN\"\n",
270
+ "df_dec[\"_norm_model\"] = df_dec[\"Model\"].apply(norm_text) if \"Model\" in df_dec.columns else \"\"\n",
271
+ "df_dec[\"_is5g\"] = df_dec[\"Modem Type\"].apply(is_5g) if \"Modem Type\" in df_dec.columns else False\n",
272
+ "\n",
273
+ "\n",
274
+ "# ============================\n",
275
+ "# Date helpers\n",
276
+ "# ============================\n",
277
+ "@dataclass\n",
278
+ "class ParsedDate:\n",
279
+ " raw: str\n",
280
+ " kind: str\n",
281
+ " value: Optional[date]\n",
282
+ "\n",
283
+ "def parse_date_field(x: Any) -> ParsedDate:\n",
284
+ " raw = str(x or \"\").strip()\n",
285
+ " if not raw:\n",
286
+ " return ParsedDate(raw=\"\", kind=\"missing\", value=None)\n",
287
+ "\n",
288
+ " # Common US formats: M/D/YY or M/D/YYYY (e.g., 6/24/24, 9/30/21)\n",
289
+ " for fmt in (\"%m/%d/%y\", \"%m/%d/%Y\", \"%-m/%-d/%y\", \"%-m/%-d/%Y\"):\n",
290
+ " try:\n",
291
+ " dt = datetime.strptime(raw, fmt).date()\n",
292
+ " return ParsedDate(raw=raw, kind=\"full\", value=dt)\n",
293
+ " except Exception:\n",
294
+ " pass\n",
295
+ "\n",
296
+ " # ISO-ish: YYYY\n",
297
+ " if re.fullmatch(r\"\\d{4}\", raw):\n",
298
+ " y = int(raw)\n",
299
+ " if y == TODAY.year:\n",
300
+ " return ParsedDate(raw=raw, kind=\"year\", value=date(y, 1, 1))\n",
301
+ " if y < TODAY.year:\n",
302
+ " return ParsedDate(raw=raw, kind=\"year\", value=date(y, 1, 1))\n",
303
+ " return ParsedDate(raw=raw, kind=\"year\", value=date(y, 12, 31))\n",
304
+ "\n",
305
+ " # YYYY-MM\n",
306
+ " if re.fullmatch(r\"\\d{4}-\\d{2}\", raw):\n",
307
+ " try:\n",
308
+ " y, m = raw.split(\"-\")\n",
309
+ " return ParsedDate(raw=raw, kind=\"year_month\", value=date(int(y), int(m), 1))\n",
310
+ " except Exception:\n",
311
+ " return ParsedDate(raw=raw, kind=\"bad\", value=None)\n",
312
+ "\n",
313
+ " # YYYY-MM-DD\n",
314
+ " if re.fullmatch(r\"\\d{4}-\\d{2}-\\d{2}\", raw):\n",
315
+ " try:\n",
316
+ " dt = datetime.strptime(raw, \"%Y-%m-%d\").date()\n",
317
+ " return ParsedDate(raw=raw, kind=\"full\", value=dt)\n",
318
+ " except Exception:\n",
319
+ " return ParsedDate(raw=raw, kind=\"bad\", value=None)\n",
320
+ "\n",
321
+ " # Last resort: leave as raw (unparsed)\n",
322
+ " return ParsedDate(raw=raw, kind=\"bad\", value=None)\n",
323
+ "\n",
324
+ " if re.fullmatch(r\"\\d{4}-\\d{2}-\\d{2}\", raw):\n",
325
+ " try:\n",
326
+ " dt = datetime.strptime(raw, \"%Y-%m-%d\").date()\n",
327
+ " return ParsedDate(raw=raw, kind=\"full\", value=dt)\n",
328
+ " except Exception:\n",
329
+ " return ParsedDate(raw=raw, kind=\"bad\", value=None)\n",
330
+ "\n",
331
+ " return ParsedDate(raw=raw, kind=\"bad\", value=None)\n",
332
+ "\n",
333
+ "def display_date(pd_: ParsedDate) -> str:\n",
334
+ " if pd_.kind == \"missing\":\n",
335
+ " return \"Not listed\"\n",
336
+ " if pd_.kind == \"bad\":\n",
337
+ " return pd_.raw or \"Not listed\"\n",
338
+ " return pd_.raw\n",
339
+ "\n",
340
+ "def status_from_eos_eol(eos: ParsedDate, eol: ParsedDate) -> str:\n",
341
+ " if eos.value is None and eol.value is None:\n",
342
+ " return \"Unknown\"\n",
343
+ " if eol.value is not None and eol.value <= TODAY:\n",
344
+ " return \"End of Life\"\n",
345
+ " if eos.value is not None and eos.value <= TODAY:\n",
346
+ " return \"End of Sale\"\n",
347
+ " return \"Active\"\n",
348
+ "\n",
349
+ "def row_to_dates_and_status(row: pd.Series) -> Tuple[str, str, str]:\n",
350
+ " eos = parse_date_field(row.get(\"end_of_sale\"))\n",
351
+ " eol = parse_date_field(row.get(\"end_of_life\"))\n",
352
+ " return display_date(eos), display_date(eol), status_from_eos_eol(eos, eol)\n",
353
+ "\n",
354
+ "\n",
355
+ "# ============================\n",
356
+ "# Embeddings + Parsec index\n",
357
+ "# ============================\n",
358
+ "embedder = SentenceTransformer(EMBED_MODEL_NAME)\n",
359
+ "\n",
360
+ "def extract_pdf_text_pages(path: str) -> List[str]:\n",
361
+ " doc = fitz.open(path)\n",
362
+ " return [doc[i].get_text(\"text\") for i in range(len(doc))]\n",
363
+ "\n",
364
+ "def build_parsec_cards(pages: List[str]) -> List[str]:\n",
365
+ " cards = []\n",
366
+ " for p in pages:\n",
367
+ " for m in re.finditer(r\"Standard\\s+SKU:\", p):\n",
368
+ " start = max(0, m.start() - PARSEC_CONTEXT_BEFORE)\n",
369
+ " end = min(len(p), m.start() + PARSEC_CONTEXT_AFTER)\n",
370
+ " c = p[start:end].strip()\n",
371
+ " if len(c) >= 200:\n",
372
+ " cards.append(c)\n",
373
+ " out, seen = [], set()\n",
374
+ " for c in cards:\n",
375
+ " h = hashlib.sha1(c.encode(\"utf-8\")).hexdigest()\n",
376
+ " if h not in seen:\n",
377
+ " seen.add(h); out.append(c)\n",
378
+ " return out\n",
379
+ "\n",
380
+ "parsec_cards = build_parsec_cards(extract_pdf_text_pages(PARSEC_PDF))\n",
381
+ "parsec_emb = embedder.encode(parsec_cards, batch_size=64, show_progress_bar=False, normalize_embeddings=True)\n",
382
+ "parsec_emb = np.asarray(parsec_emb, dtype=np.float32)\n",
383
+ "parsec_index = faiss.IndexFlatIP(parsec_emb.shape[1])\n",
384
+ "parsec_index.add(parsec_emb)\n",
385
+ "\n",
386
+ "\n",
387
+ "# ============================\n",
388
+ "# Device resolution\n",
389
+ "# ============================\n",
390
+ "def label_for_row(i: int) -> str:\n",
391
+ " r = df_eos.iloc[i]\n",
392
+ " return f\"{r.get('sku','')} — {r.get('manufacturer','')} — {r.get('description','')}\"[:220]\n",
393
+ "\n",
394
+ "EOS_LABELS = [label_for_row(i) for i in range(len(df_eos))]\n",
395
+ "EOS_CORPUS = []\n",
396
+ "for _, r in df_eos.iterrows():\n",
397
+ " EOS_CORPUS.append(\" \".join([r.get(\"_norm_sku\",\"\"), r.get(\"_canon_make\",\"\"), r.get(\"_norm_desc\",\"\"), r.get(\"_norm_notes\",\"\")]))\n",
398
+ "\n",
399
+ "def local_candidates(query: str, top_k: int = 6) -> List[Tuple[int, int, str]]:\n",
400
+ " q = norm_text(query)\n",
401
+ " hits = process.extract(q, EOS_CORPUS, scorer=fuzz.WRatio, limit=top_k)\n",
402
+ " return [(int(idx), int(score), EOS_LABELS[int(idx)]) for _, score, idx in hits]\n",
403
+ "\n",
404
+ "def gpt_choose_device(user_text: str, candidates: List[Tuple[int,int,str]]) -> Dict[str, Any]:\n",
405
+ " if client is None:\n",
406
+ " return {}\n",
407
+ " sys = \"Pick which router the user meant. Never invent. Return strict JSON only.\"\n",
408
+ " payload = {\n",
409
+ " \"user_input\": user_text,\n",
410
+ " \"candidates\": [{\"row_idx\": i, \"score\": s, \"label\": lbl} for (i,s,lbl) in candidates],\n",
411
+ " \"rules\": [\n",
412
+ " \"If one is clearly correct, return mode='ok' with row_idx.\",\n",
413
+ " \"If two are plausible, return mode='pick' with top 2 options.\"\n",
414
+ " ],\n",
415
+ " \"output_schema\": {\"mode\":\"ok|pick\",\"row_idx\":\"int\",\"options\":[{\"row_idx\":\"int\",\"label\":\"string\"}]}\n",
416
+ " }\n",
417
+ " return gpt_json(sys, payload, max_tokens=280)\n",
418
+ "\n",
419
+ "def resolve_device(user_text: str) -> Dict[str, Any]:\n",
420
+ " q = norm_text(user_text)\n",
421
+ " exact = df_eos.index[df_eos[\"_norm_sku\"] == q].tolist()\n",
422
+ " if len(exact) == 1:\n",
423
+ " return {\"mode\":\"ok\",\"row_idx\": int(exact[0])}\n",
424
+ " if len(exact) > 1:\n",
425
+ " opts = [{\"row_idx\": int(i), \"label\": EOS_LABELS[int(i)]} for i in exact[:2]]\n",
426
+ " return {\"mode\":\"pick\",\"options\": opts}\n",
427
+ "\n",
428
+ " cands = local_candidates(user_text, top_k=6)\n",
429
+ " if not cands:\n",
430
+ " return {\"mode\":\"not_found\"}\n",
431
+ "\n",
432
+ " if cands[0][1] >= 95 and (len(cands) == 1 or (cands[0][1] - cands[1][1]) >= 8):\n",
433
+ " return {\"mode\":\"ok\",\"row_idx\": cands[0][0]}\n",
434
+ "\n",
435
+ " g = gpt_choose_device(user_text, cands)\n",
436
+ " if g.get(\"mode\") == \"ok\" and isinstance(g.get(\"row_idx\"), int):\n",
437
+ " return {\"mode\":\"ok\",\"row_idx\": int(g[\"row_idx\"])}\n",
438
+ "\n",
439
+ " if g.get(\"mode\") == \"pick\":\n",
440
+ " opts = g.get(\"options\", []) or []\n",
441
+ " opts2 = [{\"row_idx\": int(o[\"row_idx\"]), \"label\": str(o[\"label\"])} for o in opts[:2] if \"row_idx\" in o]\n",
442
+ " if opts2:\n",
443
+ " return {\"mode\":\"pick\",\"options\": opts2}\n",
444
+ "\n",
445
+ " if len(cands) > 1:\n",
446
+ " return {\"mode\":\"pick\",\"options\":[{\"row_idx\":cands[0][0],\"label\":cands[0][2]},{\"row_idx\":cands[1][0],\"label\":cands[1][2]}]}\n",
447
+ " return {\"mode\":\"pick\",\"options\":[{\"row_idx\":cands[0][0],\"label\":cands[0][2]}]}\n",
448
+ "\n",
449
+ "\n",
450
+ "# ============================\n",
451
+ "# Replacements — lifecycle CSV source of truth\n",
452
+ "# ============================\n",
453
+ "def extract_model_token(text: str) -> str:\n",
454
+ " s = safe_str(text)\n",
455
+ " if not s:\n",
456
+ " return \"\"\n",
457
+ " parts = [p.strip() for p in s.split(\"|\") if p.strip()]\n",
458
+ " candidates = parts[::-1] if parts else [s]\n",
459
+ " for cand in candidates:\n",
460
+ " m = re.search(r\"\\bRUT[A-Z]?\\d{2,4}\\b\", cand.upper())\n",
461
+ " if m:\n",
462
+ " return m.group(0).upper()\n",
463
+ " m = re.search(r\"\\bIX\\d{2}\\b\", cand, flags=re.IGNORECASE)\n",
464
+ " if m:\n",
465
+ " return m.group(0).upper()\n",
466
+ " m = re.search(r\"\\b(R\\d{3,4}|E\\d{3,4}|S\\d{3,4})\\b\", cand, flags=re.IGNORECASE)\n",
467
+ " if m:\n",
468
+ " return m.group(0).upper()\n",
469
+ " m = re.search(r\"\\b[A-Z]{1,6}\\d{2,4}[A-Z]?\\b\", cand.upper())\n",
470
+ " if m:\n",
471
+ " return m.group(0).upper()\n",
472
+ " return candidates[0][:60]\n",
473
+ "\n",
474
+ "def device_is_4g(row: pd.Series) -> bool:\n",
475
+ " # Detect LTE/4G even when the description uses \"Cat 4 / Cat6 / Cat 12\" without saying \"LTE\"\n",
476
+ " t = norm_text(row.get(\"description\",\"\")) + \" \" + norm_text(row.get(\"notes\",\"\")) + \" \" + norm_text(row.get(\"sku\",\"\"))\n",
477
+ "\n",
478
+ " # If it explicitly says 5G/NR, treat as not 4G-only\n",
479
+ " if (\"5g\" in t) or (\"nr\" in t):\n",
480
+ " return False\n",
481
+ "\n",
482
+ " # Classic signals\n",
483
+ " if (\"lte\" in t) or (\"4g\" in t):\n",
484
+ " return True\n",
485
+ "\n",
486
+ " # LTE category signals (Cat 1..20 are LTE categories; Cat M1/M2 are LTE-M)\n",
487
+ " if re.search(r\"\\bcat\\s*[-]?\\s*(m1|m2)\\b\", t):\n",
488
+ " return True\n",
489
+ "\n",
490
+ " m = re.search(r\"\\bcat\\s*[-]?\\s*(\\d{1,2})\\b\", t)\n",
491
+ " if m:\n",
492
+ " try:\n",
493
+ " cat = int(m.group(1))\n",
494
+ " if 0 < cat <= 20:\n",
495
+ " return True\n",
496
+ " except Exception:\n",
497
+ " pass\n",
498
+ "\n",
499
+ " # If \"cat\" appears at all, it's almost always LTE-family\n",
500
+ " if \"cat\" in t:\n",
501
+ " return True\n",
502
+ "\n",
503
+ " return False\n",
504
+ "\n",
505
+ " # If it explicitly says 5G/NR, treat as not 4G-only\n",
506
+ " if (\"5g\" in t) or (\"nr\" in t):\n",
507
+ " return False\n",
508
+ "\n",
509
+ " # Classic signals\n",
510
+ " if (\"lte\" in t) or (\"4g\" in t):\n",
511
+ " return True\n",
512
+ "\n",
513
+ " # LTE category signals (Cat 1..20 are LTE categories; Cat M1/M2 are LTE-M)\n",
514
+ " if re.search(r\"\\bcat\\s*[-]?\\s*(m1|m2)\\b\", t):\n",
515
+ " return True\n",
516
+ "\n",
517
+ " m = re.search(r\"\\bcat\\s*[-]?\\s*(\\d{1,2})\\b\", t)\n",
518
+ " if m:\n",
519
+ " try:\n",
520
+ " cat = int(m.group(1))\n",
521
+ " if 0 < cat <= 20:\n",
522
+ " return True\n",
523
+ " except Exception:\n",
524
+ " pass\n",
525
+ "\n",
526
+ " # If \"cat\" appears at all, it's almost always LTE-family\n",
527
+ " if \"cat\" in t:\n",
528
+ " return True\n",
529
+ "\n",
530
+ " return False\n",
531
+ "\n",
532
+ "\n",
533
+ "def candidate_5g_models_from_lifecycle(manufacturer: str) -> List[str]:\n",
534
+ " mfr = norm_text(manufacturer)\n",
535
+ " pool = df_eos[df_eos[\"manufacturer\"].astype(str).str.lower().eq(mfr)].copy() if \"manufacturer\" in df_eos.columns else df_eos.copy()\n",
536
+ " vals = pool[\"advanced_5g_option\"].tolist() if \"advanced_5g_option\" in pool.columns else []\n",
537
+ " out, seen = [], set()\n",
538
+ " for v in vals:\n",
539
+ " tok = extract_model_token(v)\n",
540
+ " if tok and tok.lower() != \"nan\" and tok not in seen:\n",
541
+ " seen.add(tok); out.append(tok)\n",
542
+ " return out\n",
543
+ "\n",
544
+ "def candidate_4g_models_from_lifecycle(manufacturer: str) -> List[str]:\n",
545
+ " mfr = norm_text(manufacturer)\n",
546
+ " pool = df_eos[df_eos[\"manufacturer\"].astype(str).str.lower().eq(mfr)].copy() if \"manufacturer\" in df_eos.columns else df_eos.copy()\n",
547
+ " vals = pool[\"suggested_replacement\"].tolist() if \"suggested_replacement\" in pool.columns else []\n",
548
+ " out, seen = [], set()\n",
549
+ " for v in vals:\n",
550
+ " tok = extract_model_token(v)\n",
551
+ " if tok and tok.lower() != \"nan\" and tok not in seen:\n",
552
+ " seen.add(tok); out.append(tok)\n",
553
+ " return out\n",
554
+ "\n",
555
+ "def gpt_pick_from_candidates(old_row: pd.Series, candidates: List[str], need: str) -> str:\n",
556
+ " if client is None or not candidates:\n",
557
+ " return \"\"\n",
558
+ " sys = \"Pick the best replacement model. Choose only from candidates. Return strict JSON only.\"\n",
559
+ " payload = {\n",
560
+ " \"old_device\": {\n",
561
+ " \"sku\": str(old_row.get(\"sku\",\"\")),\n",
562
+ " \"manufacturer\": str(old_row.get(\"manufacturer\",\"\")),\n",
563
+ " \"description\": str(old_row.get(\"description\",\"\")),\n",
564
+ " \"need\": need,\n",
565
+ " },\n",
566
+ " \"candidates\": candidates[:40],\n",
567
+ " \"output_schema\": {\"choice\":\"string\"}\n",
568
+ " }\n",
569
+ " out = gpt_json(sys, payload, max_tokens=240) or {}\n",
570
+ " choice = str(out.get(\"choice\",\"\") or \"\").strip()\n",
571
+ " return choice if choice in candidates else \"\"\n",
572
+ "\n",
573
+ "def fallback_5g_from_dec(canon_make: str) -> str:\n",
574
+ " pool5 = df_dec[(df_dec[\"_canon_make\"] == canon_make) & (df_dec[\"_is5g\"] == True)]\n",
575
+ " return str(pool5.iloc[0][\"Model\"]).strip() if not pool5.empty else \"\"\n",
576
+ "\n",
577
+ "def pick_replacements_lifecycle(row: pd.Series, status: str, use_gpt: bool = True) -> Dict[str, Any]:\n",
578
+ " canon = str(row.get(\"_canon_make\",\"UNKNOWN\"))\n",
579
+ " manufacturer = str(row.get(\"manufacturer\",\"\") or \"\")\n",
580
+ "\n",
581
+ " sug_raw = safe_str(row.get(\"suggested_replacement\",\"\"))\n",
582
+ " adv_raw = safe_str(row.get(\"advanced_5g_option\",\"\"))\n",
583
+ "\n",
584
+ " has_4g_alt = bool(sug_raw.strip())\n",
585
+ " has_5g_alt = bool(adv_raw.strip())\n",
586
+ "\n",
587
+ " # Treat as 4G if the description indicates LTE OR lifecycle provides a 4G suggested replacement\n",
588
+ " is_4g = device_is_4g(row) or has_4g_alt\n",
589
+ "\n",
590
+ " # Provide 5G option if the unit is 4G, EOS/EOL, or lifecycle explicitly provides advanced_5g_option\n",
591
+ " want_5g = is_4g or (status in {\"End of Sale\",\"End of Life\"}) or has_5g_alt\n",
592
+ "\n",
593
+ " # 4G alternative: show whenever lifecycle provides it (or device appears 4G)\n",
594
+ " repl_4g = \"Not applicable\"\n",
595
+ " if is_4g or has_4g_alt:\n",
596
+ " repl_4g = extract_model_token(sug_raw)\n",
597
+ " if not repl_4g:\n",
598
+ " cand4 = candidate_4g_models_from_lifecycle(manufacturer)\n",
599
+ " repl_4g = (gpt_pick_from_candidates(row, cand4, \"4G alternative\") if (use_gpt and client) else \"\") or (cand4[0] if cand4 else \"\")\n",
600
+ " if not repl_4g:\n",
601
+ " repl_4g = \"Not applicable\"\n",
602
+ "\n",
603
+ " # 5G replacement: prefer lifecycle advanced_5g_option whenever present\n",
604
+ " repl_5g = \"Not listed\"\n",
605
+ " if want_5g:\n",
606
+ " repl_5g = extract_model_token(adv_raw)\n",
607
+ " if not repl_5g:\n",
608
+ " cand5 = candidate_5g_models_from_lifecycle(manufacturer)\n",
609
+ " repl_5g = (gpt_pick_from_candidates(row, cand5, \"5G replacement/upgrade\") if (use_gpt and client) else \"\") or (cand5[0] if cand5 else \"\")\n",
610
+ " if not repl_5g:\n",
611
+ " repl_5g = fallback_5g_from_dec(canon) or \"Not listed\"\n",
612
+ "\n",
613
+ " if repl_5g.lower() == \"nan\":\n",
614
+ " repl_5g = \"Not listed\"\n",
615
+ "\n",
616
+ " return {\"repl_4g\": repl_4g, \"repl_5g\": repl_5g, \"sources\": [\"lifecycle_csv\"] + ([\"gpt\"] if (use_gpt and client) else [])}\n",
617
+ "\n",
618
+ "\n",
619
+ "# ============================\n",
620
+ "# Antennas (Parsec-only)\n",
621
+ "# ============================\n",
622
+ "PARSEC_FAMILY_WORDS = {\"chinook\",\"labrador\",\"boxer\",\"bloodhound\",\"husky\",\"beagle\",\"mastiff\",\"collie\",\"shepherd\",\"belgian\",\"australian\",\"terrier\",\"pyrenees\"}\n",
623
+ "BAD_NAME_MARKERS = {\"customization\",\"standard connectors\",\"connectors\",\"features\",\"benefits\",\"specifications\",\"mechanical\",\"electrical\",\"mounting\",\"accessories\",\"description:\",\"standard sku\"}\n",
624
+ "\n",
625
+ "def clean_line(s: str) -> str:\n",
626
+ " s = re.sub(r\"\\s+\", \" \", str(s or \"\").strip())\n",
627
+ " if re.fullmatch(r\"-[a-z0-9]+\", s.lower()):\n",
628
+ " return \"\"\n",
629
+ " return s\n",
630
+ "\n",
631
+ "def is_bad_name_line(line: str) -> bool:\n",
632
+ " low = line.lower()\n",
633
+ " if any(m in low for m in BAD_NAME_MARKERS):\n",
634
+ " return True\n",
635
+ " if re.search(r\"\\b-[a-z0-9]{1,4}\\b\", low) and len(low) <= 25:\n",
636
+ " return True\n",
637
+ " return False\n",
638
+ "\n",
639
+ "def family_from_line(line: str) -> str:\n",
640
+ " low = line.lower()\n",
641
+ " for fam in PARSEC_FAMILY_WORDS:\n",
642
+ " if fam in low:\n",
643
+ " return fam.capitalize()\n",
644
+ " return \"\"\n",
645
+ "\n",
646
+ "def parsec_connectors_from_card(t: str) -> str:\n",
647
+ " m = re.search(r\"Standard\\s+Connectors:\\s*(.+)\", t, flags=re.IGNORECASE)\n",
648
+ " if m:\n",
649
+ " return re.sub(r\"\\s+\", \" \", m.group(1).strip())[:80]\n",
650
+ " return \"\"\n",
651
+ "\n",
652
+ "def parsec_mounts_from_card(t: str) -> List[str]:\n",
653
+ " mounts = []\n",
654
+ " for m in re.finditer(r\"Mount:\\s*(.+)\", t, flags=re.IGNORECASE):\n",
655
+ " val = re.sub(r\"\\s+\", \" \", m.group(1).strip())\n",
656
+ " parts = [p.strip().lower() for p in val.split(\",\") if p.strip()]\n",
657
+ " mounts.extend(parts)\n",
658
+ " out = []\n",
659
+ " seen = set()\n",
660
+ " for x in mounts:\n",
661
+ " if x not in seen:\n",
662
+ " seen.add(x); out.append(x)\n",
663
+ " return out\n",
664
+ "\n",
665
+ "def parsec_name_from_card(card_text: str) -> str:\n",
666
+ " lines = [clean_line(ln) for ln in str(card_text or \"\").splitlines()]\n",
667
+ " lines = [ln for ln in lines if ln]\n",
668
+ "\n",
669
+ " for ln in lines:\n",
670
+ " if is_bad_name_line(ln):\n",
671
+ " continue\n",
672
+ " fam = family_from_line(ln)\n",
673
+ " if fam:\n",
674
+ " return fam\n",
675
+ "\n",
676
+ " sku_i = None\n",
677
+ " for i, ln in enumerate(lines):\n",
678
+ " if \"standard sku\" in ln.lower():\n",
679
+ " sku_i = i\n",
680
+ " break\n",
681
+ " if sku_i is not None:\n",
682
+ " window = lines[max(0, sku_i - 12):sku_i]\n",
683
+ " for ln in reversed(window):\n",
684
+ " if is_bad_name_line(ln):\n",
685
+ " continue\n",
686
+ " if 3 <= len(ln) <= 40 and re.search(r\"[A-Za-z]\", ln):\n",
687
+ " return ln.split()[0].capitalize()\n",
688
+ "\n",
689
+ " return \"Parsec antenna\"\n",
690
+ "\n",
691
+ "def parsec_part_from_card(t: str) -> str:\n",
692
+ " m = re.search(r\"Standard\\s+SKU:\\s*([A-Z0-9]+)\", t)\n",
693
+ " return m.group(1).strip() if m else \"\"\n",
694
+ "\n",
695
+ "def parsec_desc_from_card(t: str) -> str:\n",
696
+ " m = re.search(r\"Description:\\s*(.+?)(?:\\n|$)\", t, flags=re.IGNORECASE)\n",
697
+ " return re.sub(r\"\\s+\",\" \",m.group(1).strip())[:220] if m else \"\"\n",
698
+ "\n",
699
+ "def parsec_retrieve(query: str, top_k: int = 12) -> List[Dict[str, Any]]:\n",
700
+ " qv = embedder.encode([query], normalize_embeddings=True)\n",
701
+ " qv = np.asarray(qv, dtype=np.float32)\n",
702
+ " scores, ids = parsec_index.search(qv, top_k)\n",
703
+ " out: List[Dict[str, Any]] = []\n",
704
+ " for sc, i in zip(scores[0].tolist(), ids[0].tolist()):\n",
705
+ " if 0 <= int(i) < len(parsec_cards):\n",
706
+ " card = parsec_cards[int(i)]\n",
707
+ " out.append({\n",
708
+ " \"score\": float(sc),\n",
709
+ " \"name\": parsec_name_from_card(card),\n",
710
+ " \"part_number\": parsec_part_from_card(card),\n",
711
+ " \"description\": parsec_desc_from_card(card),\n",
712
+ " \"connectors\": parsec_connectors_from_card(card),\n",
713
+ " \"mounts\": parsec_mounts_from_card(card),\n",
714
+ " \"_card\": card.lower(),\n",
715
+ " })\n",
716
+ " return out\n",
717
+ "\n",
718
+ "def choose_best_parsec(cands: List[Dict[str, Any]], mode: str) -> Dict[str, Any]:\n",
719
+ " best = None\n",
720
+ " best_score = -1e9\n",
721
+ "\n",
722
+ " for c in cands:\n",
723
+ " card = c.get(\"_card\",\"\")\n",
724
+ " mounts = c.get(\"mounts\", []) or []\n",
725
+ " score = float(c.get(\"score\", 0.0))\n",
726
+ "\n",
727
+ " if \"omni\" in card:\n",
728
+ " score += 0.6\n",
729
+ " if \"directional\" in card:\n",
730
+ " score -= 1.5\n",
731
+ "\n",
732
+ " if mode == \"vehicle\":\n",
733
+ " if any(\"magnetic\" in m for m in mounts):\n",
734
+ " score += 3.0\n",
735
+ " if any(\"through\" in m for m in mounts):\n",
736
+ " score += 2.0\n",
737
+ " if any(\"wall\" in m for m in mounts) or any(\"pole\" in m for m in mounts):\n",
738
+ " score -= 1.2\n",
739
+ " if \"app: fixed\" in card and \"mobile\" not in card:\n",
740
+ " score -= 2.0\n",
741
+ "\n",
742
+ " if mode == \"stationary\":\n",
743
+ " if any(\"wall\" in m for m in mounts):\n",
744
+ " score += 2.0\n",
745
+ " if any(\"pole\" in m for m in mounts):\n",
746
+ " score += 1.8\n",
747
+ "\n",
748
+ " if score > best_score:\n",
749
+ " best_score = score\n",
750
+ " best = c\n",
751
+ "\n",
752
+ " if not best:\n",
753
+ " return {\"name\":\"Parsec antenna\",\"part_number\":\"\",\"description\":\"\",\"connectors\":\"\",\"mounts\":[]}\n",
754
+ "\n",
755
+ " best = dict(best)\n",
756
+ " best.pop(\"_card\", None)\n",
757
+ " return best\n",
758
+ "\n",
759
+ "\n",
760
+ "def infer_mimo_for_5g(repl_5g_model: str) -> str:\n",
761
+ " \"\"\"Rule: every 5G router uses a 4x4 antenna.\"\"\"\n",
762
+ " return \"4x4\"\n",
763
+ "\n",
764
+ " # If the model name hints 5G, lean 4x4\n",
765
+ " if \"5g\" in model.lower() or model.upper().startswith((\"R\", \"E\", \"S\", \"IX\", \"RUTM\")):\n",
766
+ " default = \"4x4\"\n",
767
+ " else:\n",
768
+ " default = \"2x2\"\n",
769
+ "\n",
770
+ " # Use dec2025routers.csv if we can match the model under the same maker family\n",
771
+ " try:\n",
772
+ " pool = df_dec[df_dec[\"_canon_make\"] == canon_make].copy()\n",
773
+ " if pool.empty:\n",
774
+ " return default\n",
775
+ " hit = process.extractOne(norm_text(model), pool[\"_norm_model\"].tolist(), scorer=fuzz.WRatio)\n",
776
+ " if not hit or hit[1] < MATCH_OK:\n",
777
+ " return default\n",
778
+ " row = pool.iloc[int(hit[2])]\n",
779
+ " txt2 = (str(row.get(\"Antennas (internal/external/both)\", \"\")) + \" \" + str(row.get(\"Modem Type\", \"\")) + \" \" + str(row.get(\"Special notes\",\"\"))).lower()\n",
780
+ " if \"4x4\" in txt2 or \"4 x 4\" in txt2 or \"4x 4\" in txt2:\n",
781
+ " return \"4x4\"\n",
782
+ " if \"2x2\" in txt2 or \"2 x 2\" in txt2:\n",
783
+ " return \"2x2\"\n",
784
+ " # If modem type includes 5G, lean 4x4\n",
785
+ " if \"5g\" in txt2 or \"nr\" in txt2:\n",
786
+ " return \"4x4\"\n",
787
+ " return default\n",
788
+ " except Exception:\n",
789
+ " return default\n",
790
+ "\n",
791
+ "def antenna_options_for(router_model: str, tech: str, mimo: str) -> Dict[str, Any]:\n",
792
+ " q_stationary = f\"{router_model} {tech} {mimo} omni stationary pole wall fixed site Parsec\"\n",
793
+ " q_vehicle = f\"{router_model} {tech} {mimo} omni vehicle mobile magnetic through-bolt Parsec\"\n",
794
+ "\n",
795
+ " cand_stationary = parsec_retrieve(q_stationary, top_k=12)\n",
796
+ " cand_vehicle = parsec_retrieve(q_vehicle, top_k=12)\n",
797
+ "\n",
798
+ " s = choose_best_parsec(cand_stationary, mode=\"stationary\")\n",
799
+ " v = choose_best_parsec(cand_vehicle, mode=\"vehicle\")\n",
800
+ "\n",
801
+ " s.update({\"mimo\": mimo, \"why\": \"Stationary omni best match.\"})\n",
802
+ " v.update({\"mimo\": mimo, \"why\": \"Vehicle omni best match.\"})\n",
803
+ "\n",
804
+ " return {\"stationary_omni\": s, \"vehicle_omni\": v, \"sources\":[\"parsec_rag\"]}\n",
805
+ "\n",
806
+ "\n",
807
+ "# ============================\n",
808
+ "# Install-ready checklist\n",
809
+ "# ============================\n",
810
+ "def install_ready_checklist(current_sku: str, repl: Dict[str,Any], ant: Dict[str,Any]) -> str:\n",
811
+ " st = ant.get(\"stationary_omni\", {})\n",
812
+ " vh = ant.get(\"vehicle_omni\", {})\n",
813
+ " if client is not None:\n",
814
+ " sys = \"Create a short, install-ready checklist for a Verizon rep. Return markdown only.\"\n",
815
+ " payload = {\"current_device\": current_sku, \"replacements\": repl, \"antennas\": {\"stationary\": st, \"vehicle\": vh}}\n",
816
+ " resp = client.responses.create(\n",
817
+ " model=OPENAI_MODEL,\n",
818
+ " reasoning=OPENAI_REASONING,\n",
819
+ " input=[{\"role\":\"system\",\"content\":sys},{\"role\":\"user\",\"content\":json.dumps(payload)}],\n",
820
+ " max_output_tokens=520,\n",
821
+ " )\n",
822
+ " return (getattr(resp, \"output_text\", \"\") or \"\").strip()\n",
823
+ " return \"\\n\".join([\n",
824
+ " \"### Install-ready checklist\",\n",
825
+ " f\"- Current device: {current_sku}\",\n",
826
+ " f\"- 5G replacement: {repl.get('repl_5g','')}\",\n",
827
+ " f\"- 4G alternative: {repl.get('repl_4g','Not applicable')}\",\n",
828
+ " f\"- Stationary omni antenna: {st.get('name','')} (PN {st.get('part_number','')})\",\n",
829
+ " f\"- Vehicle omni antenna: {vh.get('name','')} (PN {vh.get('part_number','')})\",\n",
830
+ " \"- Next steps: confirm mounting + cable lengths + power; place order; schedule install.\",\n",
831
+ " ])\n",
832
+ "\n",
833
+ "\n",
834
+ "# ============================\n",
835
+ "# Batch mode (NO GPT)\n",
836
+ "# ============================\n",
837
+ "def parse_batch_inputs(text_blob: str, file_obj: Any) -> List[str]:\n",
838
+ " items: List[str] = []\n",
839
+ " if file_obj is not None:\n",
840
+ " try:\n",
841
+ " path = file_obj.name if hasattr(file_obj, \"name\") else str(file_obj)\n",
842
+ " df = pd.read_csv(path)\n",
843
+ " col = df.columns[0]\n",
844
+ " items.extend([str(x).strip() for x in df[col].tolist() if str(x).strip()])\n",
845
+ " except Exception:\n",
846
+ " pass\n",
847
+ " if text_blob:\n",
848
+ " for ln in str(text_blob).splitlines():\n",
849
+ " ln = ln.strip()\n",
850
+ " if ln:\n",
851
+ " items.append(ln)\n",
852
+ " seen=set()\n",
853
+ " out=[]\n",
854
+ " for x in items:\n",
855
+ " k=norm_text(x)\n",
856
+ " if k and k not in seen:\n",
857
+ " seen.add(k); out.append(x)\n",
858
+ " return out\n",
859
+ "\n",
860
+ "def run_batch(text_blob: str, file_obj: Any, include_antennas: bool):\n",
861
+ " inputs = parse_batch_inputs(text_blob, file_obj)\n",
862
+ " if not inputs:\n",
863
+ " return \"\", None, None, \"\"\n",
864
+ "\n",
865
+ " rows=[]\n",
866
+ " for item in inputs:\n",
867
+ " res = resolve_device(item)\n",
868
+ " if res.get(\"mode\") != \"ok\":\n",
869
+ " rows.append({\"Input\": item, \"Matched\":\"\", \"Status\":\"Needs review\", \"EOS\":\"\", \"EOL\":\"\", \"4G alternative\":\"\", \"5G replacement\":\"\", \"Notes\":\"Not found/ambiguous\"})\n",
870
+ " continue\n",
871
+ "\n",
872
+ " life_row = df_eos.iloc[int(res[\"row_idx\"])]\n",
873
+ " eos, eol, status = row_to_dates_and_status(life_row)\n",
874
+ " repl = pick_replacements_lifecycle(life_row, status, use_gpt=False)\n",
875
+ "\n",
876
+ " rows.append({\n",
877
+ " \"Input\": item,\n",
878
+ " \"Matched\": str(life_row.get(\"sku\",\"\")),\n",
879
+ " \"Status\": status,\n",
880
+ " \"EOS\": eos,\n",
881
+ " \"EOL\": eol,\n",
882
+ " \"4G alternative\": repl.get(\"repl_4g\",\"\"),\n",
883
+ " \"5G replacement\": repl.get(\"repl_5g\",\"\"),\n",
884
+ " \"Notes\": \"\",\n",
885
+ " })\n",
886
+ "\n",
887
+ " out_df = pd.DataFrame(rows)\n",
888
+ " counts = out_df[\"Status\"].value_counts(dropna=False).to_dict()\n",
889
+ " top_5g = out_df[\"5G replacement\"].value_counts(dropna=False).head(5).to_dict()\n",
890
+ " summary = f\"Rows: {len(out_df)} | \" + \" | \".join([f\"{k}: {v}\" for k,v in counts.items()])\n",
891
+ " rollup = \"Top 5G recommendations:\\n\" + \"\\n\".join([f\"- {k}: {v}\" for k,v in top_5g.items() if str(k).strip()])\n",
892
+ "\n",
893
+ " tmp = tempfile.NamedTemporaryFile(delete=False, suffix=\".csv\")\n",
894
+ " out_df.to_csv(tmp.name, index=False)\n",
895
+ "\n",
896
+ " return summary, out_df, tmp.name, rollup\n",
897
+ "\n",
898
+ "\n",
899
+ "# ============================\n",
900
+ "# Replacement feature table + manufacturer link (5G device)\n",
901
+ "# ============================\n",
902
+ "\n",
903
+ "FEATURE_COLS = [\"Device\", \"Modem technology\", \"WiFi\", \"Ports\", \"Antennas\", \"Ruggedness\", \"Use case\"]\n",
904
+ "\n",
905
+ "# Manufacturer domains used for best-effort link resolution (no non-maker domains).\n",
906
+ "MAKER_DOMAINS = {\n",
907
+ " \"CRADLEPOINT\": [\"cradlepoint.com\", \"ericsson.com\"],\n",
908
+ " \"SIERRA\": [\"semtech.com\", \"airlink.com\"],\n",
909
+ " \"FEENEY\": [\"inseego.com\"],\n",
910
+ " \"DIGI\": [\"digi.com\"],\n",
911
+ " \"CISCO_MERAKI\": [\"meraki.cisco.com\", \"cisco.com\"],\n",
912
+ " \"CISCO\": [\"cisco.com\"],\n",
913
+ " \"TELTONIKA\": [\"teltonika-networks.com\"],\n",
914
+ " \"UNKNOWN\": [],\n",
915
+ "}\n",
916
+ "\n",
917
+ "HTTP_HEADERS = {\n",
918
+ " \"User-Agent\": \"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 \"\n",
919
+ " \"(KHTML, like Gecko) Chrome/120.0 Safari/537.36\"\n",
920
+ "}\n",
921
+ "HTTP_TIMEOUT = 12\n",
922
+ "\n",
923
+ "def _best_effort_manufacturer_url(model: str, canon_make: str) -> str:\n",
924
+ " \\\"\\\"\\\"Try to find a manufacturer page or datasheet link using simple on-domain searches.\n",
925
+ " If we can't confirm a page, return the manufacturer homepage for the maker family.\n",
926
+ " \\\"\\\"\\\"\n",
927
+ " model = str(model or \"\").strip()\n",
928
+ " if not model or model in {\"Not listed\", \"Not applicable\"}:\n",
929
+ " return \"\"\n",
930
+ "\n",
931
+ " domains = MAKER_DOMAINS.get(canon_make, []) or []\n",
932
+ " if not domains:\n",
933
+ " return \"\"\n",
934
+ "\n",
935
+ " # Candidate on-domain search URLs (common patterns across sites).\n",
936
+ " # We keep these on the manufacturer domain (no Google/Bing).\n",
937
+ " q = re.sub(r\"\\s+\", \"+\", model)\n",
938
+ " url_candidates = []\n",
939
+ " for d in domains:\n",
940
+ " url_candidates += [\n",
941
+ " f\"https://{d}/search?q={q}\",\n",
942
+ " f\"https://{d}/search?query={q}\",\n",
943
+ " f\"https://{d}/?s={q}\",\n",
944
+ " f\"https://www.{d}/search?q={q}\",\n",
945
+ " f\"https://www.{d}/search?query={q}\",\n",
946
+ " f\"https://www.{d}/?s={q}\",\n",
947
+ " ]\n",
948
+ "\n",
949
+ " # Also try a few direct product patterns for known makers (best effort).\n",
950
+ " if canon_make == \"TELTONIKA\":\n",
951
+ " slug = model.lower()\n",
952
+ " url_candidates += [\n",
953
+ " f\"https://teltonika-networks.com/products/routers/{slug}\",\n",
954
+ " f\"https://teltonika-networks.com/product/{slug}\",\n",
955
+ " \"https://teltonika-networks.com/products/routers/\",\n",
956
+ " ]\n",
957
+ " if canon_make == \"DIGI\":\n",
958
+ " url_candidates += [\n",
959
+ " \"https://www.digi.com/products/networking/cellular-routers\",\n",
960
+ " f\"https://www.digi.com/search?q={q}\",\n",
961
+ " ]\n",
962
+ " if canon_make == \"CRADLEPOINT\":\n",
963
+ " url_candidates += [\n",
964
+ " \"https://cradlepoint.com/products/\",\n",
965
+ " f\"https://cradlepoint.com/?s={q}\",\n",
966
+ " ]\n",
967
+ " if canon_make in {\"CISCO\", \"CISCO_MERAKI\"}:\n",
968
+ " url_candidates += [\n",
969
+ " f\"https://www.cisco.com/c/en/us/search.html?q={q}\",\n",
970
+ " ]\n",
971
+ "\n",
972
+ " # Try to confirm a working page (HTTP 200 and model string somewhere in HTML).\n",
973
+ " for u in url_candidates[:18]:\n",
974
+ " try:\n",
975
+ " import requests\n",
976
+ " r = requests.get(u, headers=HTTP_HEADERS, timeout=HTTP_TIMEOUT, allow_redirects=True)\n",
977
+ " if r.status_code != 200:\n",
978
+ " continue\n",
979
+ " html = (r.text or \"\").lower()\n",
980
+ " if model.lower() in html or \"datasheet\" in html or \"data sheet\" in html:\n",
981
+ " return r.url\n",
982
+ " except Exception:\n",
983
+ " continue\n",
984
+ "\n",
985
+ " # Fallback: maker homepage\n",
986
+ " d0 = domains[0]\n",
987
+ " return f\"https://{d0}\"\n",
988
+ "\n",
989
+ "def _features_from_dec(model: str, canon_make: str) -> Dict[str, str]:\n",
990
+ " \\\"\\\"\\\"Lookup a router model in dec2025routers.csv and return the key feature fields.\\\"\\\"\\\"\n",
991
+ " if not model or model in {\"Not listed\", \"Not applicable\"}:\n",
992
+ " return {k: \"Not listed\" for k in FEATURE_COLS[1:]}\n",
993
+ "\n",
994
+ " pool = df_dec[df_dec[\"_canon_make\"] == canon_make].copy()\n",
995
+ " if pool.empty:\n",
996
+ " return {k: \"Not listed\" for k in FEATURE_COLS[1:]}\n",
997
+ "\n",
998
+ " hit = process.extractOne(norm_text(model), pool[\"_norm_model\"].tolist(), scorer=fuzz.WRatio)\n",
999
+ " if not hit or hit[1] < MATCH_OK:\n",
1000
+ " return {k: \"Not listed\" for k in FEATURE_COLS[1:]}\n",
1001
+ "\n",
1002
+ " r = pool.iloc[int(hit[2])]\n",
1003
+ " ports = f\"WAN: {r.get('WAN ports and speed','')} | LAN: {r.get('LAN ports and speed','')}\"\n",
1004
+ " return {\n",
1005
+ " \"Modem technology\": str(r.get(\"Modem Type\",\"\")) or \"Not listed\",\n",
1006
+ " \"WiFi\": str(r.get(\"WiFi type\",\"\")) or \"Not listed\",\n",
1007
+ " \"Ports\": ports.strip() if ports.strip() else \"Not listed\",\n",
1008
+ " \"Antennas\": str(r.get(\"Antennas (internal/external/both)\",\"\")) or \"Not listed\",\n",
1009
+ " \"Ruggedness\": str(r.get(\"Ruggedization\",\"\")) or \"Not listed\",\n",
1010
+ " \"Use case\": str(r.get(\"Primary use case\",\"\")) or \"Not listed\",\n",
1011
+ " }\n",
1012
+ "\n",
1013
+ "def _gpt_fill_feature_row(device_label: str, model: str, canon_make: str, row: Dict[str, str]) -> Dict[str, str]:\n",
1014
+ " \\\"\\\"\\\"If dec can't supply values, ask GPT to fill missing ones (best guess).\\\"\\\"\\\"\n",
1015
+ " if client is None:\n",
1016
+ " return row\n",
1017
+ "\n",
1018
+ " missing = [k for k,v in row.items() if (not v) or str(v).strip().lower() in {\"not listed\",\"nan\",\"\"}]\n",
1019
+ " if not missing:\n",
1020
+ " return row\n",
1021
+ "\n",
1022
+ " sys = \"Fill missing router feature fields for a Verizon rep. Return strict JSON only.\"\n",
1023
+ " payload = {\n",
1024
+ " \"device_label\": device_label,\n",
1025
+ " \"model\": model,\n",
1026
+ " \"maker_family\": canon_make,\n",
1027
+ " \"known\": row,\n",
1028
+ " \"fill_only\": missing,\n",
1029
+ " \"rules\": [\n",
1030
+ " \"Fill only the requested fields.\",\n",
1031
+ " \"Best guess if needed. Short phrases only.\",\n",
1032
+ " \"Return JSON only.\"\n",
1033
+ " ],\n",
1034
+ " \"output_schema\": {k: \"string\" for k in missing}\n",
1035
+ " }\n",
1036
+ " out = gpt_json(sys, payload, max_tokens=260) or {}\n",
1037
+ " for k in missing:\n",
1038
+ " val = str(out.get(k, \"\") or \"\").strip()\n",
1039
+ " if val:\n",
1040
+ " row[k] = val\n",
1041
+ " return row\n",
1042
+ "\n",
1043
+ "def build_replacement_features_table(repl_4g: str, repl_5g: str, canon_make: str) -> pd.DataFrame:\n",
1044
+ " rows = []\n",
1045
+ "\n",
1046
+ " # 4G\n",
1047
+ " row4 = _features_from_dec(repl_4g, canon_make)\n",
1048
+ " row4 = _gpt_fill_feature_row(\"4G alternative\", repl_4g, canon_make, row4)\n",
1049
+ " rows.append({\"Device\": \"4G alternative\", **row4})\n",
1050
+ "\n",
1051
+ " # 5G\n",
1052
+ " row5 = _features_from_dec(repl_5g, canon_make)\n",
1053
+ " row5 = _gpt_fill_feature_row(\"5G replacement\", repl_5g, canon_make, row5)\n",
1054
+ " rows.append({\"Device\": \"5G replacement\", **row5})\n",
1055
+ "\n",
1056
+ " df = pd.DataFrame(rows, columns=FEATURE_COLS)\n",
1057
+ " return df\n",
1058
+ "\n",
1059
+ "# ============================\n",
1060
+ "# Output\n",
1061
+ "# ============================\n",
1062
+ "def assemble_output(life_row: pd.Series, status: str, eos: str, eol: str, repl: Dict[str,Any], ant: Dict[str,Any]) -> str:\n",
1063
+ " current_name = f\"{life_row.get('sku','')} — {life_row.get('description','')}\".strip(\" —\")\n",
1064
+ " st = ant.get(\"stationary_omni\", {})\n",
1065
+ " vh = ant.get(\"vehicle_omni\", {})\n",
1066
+ "\n",
1067
+ " lines = []\n",
1068
+ " lines.append(f\"1. Current device: **{current_name}**\")\n",
1069
+ " lines.append(f\"2. Status: **{status}**\")\n",
1070
+ " lines.append(f\"3. End of Sale date: **{eos}**\")\n",
1071
+ " lines.append(f\"4. End of Life date: **{eol}**\")\n",
1072
+ " lines.append(f\"5. 4G alternative (lifecycle): **{repl.get('repl_4g','Not applicable')}**\")\n",
1073
+ " lines.append(f\"6. 5G replacement (lifecycle): **{repl.get('repl_5g','Not listed')}**\")\n",
1074
+ " lines.append(\"7. Antenna options (Parsec-only):\")\n",
1075
+ " conn_s = f\" | Conn: {st.get('connectors','')}\" if st.get(\"connectors\") else \"\"\n",
1076
+ " conn_v = f\" | Conn: {vh.get('connectors','')}\" if vh.get(\"connectors\") else \"\"\n",
1077
+ " lines.append(f\" - Stationary (Omni): **{st.get('name','')}** (Part #: {st.get('part_number','')}) — {st.get('description','')} — MIMO: {st.get('mimo','')}{conn_s}\")\n",
1078
+ " lines.append(f\" - Vehicle (Omni): **{vh.get('name','')}** (Part #: {vh.get('part_number','')}) — {vh.get('description','')} — MIMO: {vh.get('mimo','')}{conn_v}\")\n",
1079
+ "\n",
1080
+ " lines.append(\"\\nSources (debug):\")\n",
1081
+ " for s in repl.get(\"sources\", []) if isinstance(repl.get(\"sources\"), list) else []:\n",
1082
+ " lines.append(f\"- {s}\")\n",
1083
+ " lines.append(\"- ParsecCatalog.pdf (local RAG)\")\n",
1084
+ " lines.append(\"- routers_eos_eol_by_sku.csv (replacements)\")\n",
1085
+ " return \"\\n\".join(lines)\n",
1086
+ "\n",
1087
+ "\n",
1088
+ "# ============================\n",
1089
+ "# Gradio callbacks\n",
1090
+ "# IMPORTANT: no dict state and ALL events have api_name=False (prevents api_info schema generation)\n",
1091
+ "# ============================\n",
1092
+ "def run_lookup(user_text: str, st_json: str):\n",
1093
+ " user_text = str(user_text or \"\").strip()\n",
1094
+ " if not user_text:\n",
1095
+ " return \"Enter a router SKU/model.\", \"\", None, \"\", gr.update(visible=False), gr.update(visible=False), \"{}\", \"\"\n",
1096
+ "\n",
1097
+ " res = resolve_device(user_text)\n",
1098
+ "\n",
1099
+ " if res.get(\"mode\") == \"pick\":\n",
1100
+ " opts = res.get(\"options\", [])\n",
1101
+ " choices = [o[\"label\"] for o in opts]\n",
1102
+ " st2 = {\"mode\":\"pick\",\"options\": opts, \"raw\": user_text}\n",
1103
+ " return \"Did you mean A or B? Pick one, then click Use selection.\", \"\", None, \"\", gr.update(choices=choices, value=None, visible=True), gr.update(visible=True), state_dump(st2), \"\"\n",
1104
+ "\n",
1105
+ " if res.get(\"mode\") != \"ok\":\n",
1106
+ " return \"Not found.\", \"\", None, \"\", gr.update(visible=False), gr.update(visible=False), \"{}\", \"\"\n",
1107
+ "\n",
1108
+ " life_row = df_eos.iloc[int(res[\"row_idx\"])]\n",
1109
+ " eos, eol, status = row_to_dates_and_status(life_row)\n",
1110
+ "\n",
1111
+ " repl = pick_replacements_lifecycle(life_row, status, use_gpt=True)\n",
1112
+ " canon_make = str(life_row.get(\"_canon_make\",\"UNKNOWN\"))\n",
1113
+ " mimo = infer_mimo_for_5g(repl.get(\"repl_5g\",\"\"))\n",
1114
+ " tech = \"5G\" if repl.get(\"repl_5g\") and repl.get(\"repl_5g\") != \"Not listed\" else (\"4G\" if device_is_4g(life_row) else \"Unknown\")\n",
1115
+ " ant = antenna_options_for(repl.get(\"repl_5g\") or str(life_row.get(\"sku\",\"\")), tech, mimo)\n",
1116
+ "\n",
1117
+ " output = assemble_output(life_row, status, eos, eol, repl, ant)\n",
1118
+ " st_out = {\"row_idx\": int(res[\"row_idx\"]), \"repl\": repl, \"ant\": ant, \"raw\": user_text}\n",
1119
+ " url5 = _best_effort_manufacturer_url(repl.get('repl_5g',''), canon_make)\n",
1120
+ " link = f\"**5G manufacturer page (best effort):** {url5}\" if url5 else \"\"\n",
1121
+ " feat_df = build_replacement_features_table(repl.get('repl_4g',''), repl.get('repl_5g',''), canon_make)\n",
1122
+ " return output, link, feat_df, \"\", gr.update(visible=False), gr.update(visible=False), state_dump(st_out), \"\"\n",
1123
+ "\n",
1124
+ "def use_selection(selected_label: str, st_json: str):\n",
1125
+ " st = state_load(st_json)\n",
1126
+ " if not st or st.get(\"mode\") != \"pick\":\n",
1127
+ " return \"Run a search first.\", \"\", None, \"\", gr.update(visible=False), gr.update(visible=False), \"{}\", \"\"\n",
1128
+ "\n",
1129
+ " if not selected_label:\n",
1130
+ " return \"Pick A or B first.\", \"\", None, \"\", gr.update(visible=True), gr.update(visible=True), st_json, \"\"\n",
1131
+ "\n",
1132
+ " chosen_row = None\n",
1133
+ " for o in st.get(\"options\", []):\n",
1134
+ " if o.get(\"label\") == selected_label:\n",
1135
+ " chosen_row = int(o[\"row_idx\"])\n",
1136
+ " break\n",
1137
+ " if chosen_row is None:\n",
1138
+ " return \"Pick a valid option.\", \"\", None, \"\", gr.update(visible=True), gr.update(visible=True), st_json, \"\"\n",
1139
+ "\n",
1140
+ " life_row = df_eos.iloc[int(chosen_row)]\n",
1141
+ " eos, eol, status = row_to_dates_and_status(life_row)\n",
1142
+ "\n",
1143
+ " repl = pick_replacements_lifecycle(life_row, status, use_gpt=True)\n",
1144
+ " canon_make = str(life_row.get(\"_canon_make\",\"UNKNOWN\"))\n",
1145
+ " mimo = infer_mimo_for_5g(repl.get(\"repl_5g\",\"\"))\n",
1146
+ " tech = \"5G\" if repl.get(\"repl_5g\") and repl.get(\"repl_5g\") != \"Not listed\" else (\"4G\" if device_is_4g(life_row) else \"Unknown\")\n",
1147
+ " ant = antenna_options_for(repl.get(\"repl_5g\") or str(life_row.get(\"sku\",\"\")), tech, mimo)\n",
1148
+ "\n",
1149
+ " output = assemble_output(life_row, status, eos, eol, repl, ant)\n",
1150
+ " st_out = {\"row_idx\": int(chosen_row), \"repl\": repl, \"ant\": ant, \"raw\": st.get(\"raw\",\"\")}\n",
1151
+ " url5 = _best_effort_manufacturer_url(repl.get('repl_5g',''), canon_make)\n",
1152
+ " link = f\"**5G manufacturer page (best effort):** {url5}\" if url5 else \"\"\n",
1153
+ " feat_df = build_replacement_features_table(repl.get('repl_4g',''), repl.get('repl_5g',''), canon_make)\n",
1154
+ " return output, link, feat_df, \"\", gr.update(visible=False), gr.update(visible=False), state_dump(st_out), \"\"\n",
1155
+ "\n",
1156
+ "def make_install_ready(st_json: str):\n",
1157
+ " st = state_load(st_json)\n",
1158
+ " if not st or \"row_idx\" not in st:\n",
1159
+ " return \"Run a lookup first.\"\n",
1160
+ " life_row = df_eos.iloc[int(st[\"row_idx\"])]\n",
1161
+ " current_sku = str(life_row.get(\"sku\",\"\") or \"\")\n",
1162
+ " return install_ready_checklist(current_sku, st.get(\"repl\", {}) or {}, st.get(\"ant\", {}) or {})\n",
1163
+ "\n",
1164
+ "\n",
1165
+ "\n",
1166
+ "# ============================\n",
1167
+ "# Q&A about the suggested device (post-recommendation)\n",
1168
+ "# ============================\n",
1169
+ "def answer_question(question: str, st_json: str) -> str:\n",
1170
+ " q = str(question or \"\").strip()\n",
1171
+ " if not q:\n",
1172
+ " return \"\"\n",
1173
+ " st = state_load(st_json)\n",
1174
+ " if not st or \"repl\" not in st:\n",
1175
+ " return \"Run a lookup first, then ask your question.\"\n",
1176
+ "\n",
1177
+ " repl = st.get(\"repl\", {}) or {}\n",
1178
+ " ant = st.get(\"ant\", {}) or {}\n",
1179
+ " repl5 = str(repl.get(\"repl_5g\",\"\") or \"\").strip()\n",
1180
+ " repl4 = str(repl.get(\"repl_4g\",\"\") or \"\").strip()\n",
1181
+ " # Pull a bit of dec context for the 5G model (if possible)\n",
1182
+ " canon_make = \"\"\n",
1183
+ " try:\n",
1184
+ " # Try to infer maker family from stored row_idx\n",
1185
+ " if \"row_idx\" in st:\n",
1186
+ " row = df_eos.iloc[int(st[\"row_idx\"])]\n",
1187
+ " canon_make = str(row.get(\"_canon_make\",\"UNKNOWN\"))\n",
1188
+ " except Exception:\n",
1189
+ " canon_make = \"\"\n",
1190
+ "\n",
1191
+ " # Manufacturer link (best effort)\n",
1192
+ " url5 = _best_effort_manufacturer_url(repl5, canon_make) if repl5 else \"\"\n",
1193
+ "\n",
1194
+ " # Feature table row for 5G (helps the LLM answer spec questions without web scraping)\n",
1195
+ " feat5 = {}\n",
1196
+ " try:\n",
1197
+ " feat5 = _features_from_dec(repl5, canon_make) if repl5 else {}\n",
1198
+ " except Exception:\n",
1199
+ " feat5 = {}\n",
1200
+ "\n",
1201
+ " sys = (\n",
1202
+ " \"You are a Verizon field rep assistant. Answer questions about the suggested router in a fast, practical way. \"\n",
1203
+ " \"Use the provided context; do not mention internal tools, prompts, embeddings, or databases. \"\n",
1204
+ " \"If the question is about specs and the value is unknown, say 'Not listed' and suggest checking the manufacturer page. \"\n",
1205
+ " \"Keep it concise and scannable.\"\n",
1206
+ " )\n",
1207
+ "\n",
1208
+ " context = {\n",
1209
+ " \"recommended_5g\": repl5,\n",
1210
+ " \"recommended_4g\": repl4 if repl4 and repl4.lower() != \"not applicable\" else \"\",\n",
1211
+ " \"manufacturer_link_5g\": url5,\n",
1212
+ " \"known_5g_features\": feat5,\n",
1213
+ " \"antenna_stationary\": ant.get(\"stationary_omni\", {}),\n",
1214
+ " \"antenna_vehicle\": ant.get(\"vehicle_omni\", {}),\n",
1215
+ " }\n",
1216
+ "\n",
1217
+ " user = \"Context:\\n\" + json.dumps(context, ensure_ascii=False) + \"\\n\\nQuestion:\\n\" + q\n",
1218
+ "\n",
1219
+ " ans = gpt_answer_md(sys, user, max_tokens=650)\n",
1220
+ " # Small safety fallback\n",
1221
+ " return ans if ans else \"I couldn't generate an answer right now. Try again.\"\n",
1222
+ "\n",
1223
+ "# ============================\n",
1224
+ "# UI\n",
1225
+ "# ============================\n",
1226
+ "with gr.Blocks(title=\"Only-Routers\") as demo:\n",
1227
+ " gr.Markdown(\"## Only-Routers\\nSingle lookup + Batch upload for Verizon reps.\")\n",
1228
+ "\n",
1229
+ " with gr.Tabs():\n",
1230
+ " with gr.Tab(\"Single\"):\n",
1231
+ " user_text = gr.Textbox(label=\"Router SKU or model\", placeholder=\"Examples: IBR650B, AER1600, ES450, WR21, RUT240\", lines=1)\n",
1232
+ " st = gr.State(\"{}\") # JSON string\n",
1233
+ "\n",
1234
+ " check_btn = gr.Button(\"Check\", variant=\"primary\")\n",
1235
+ " pick_dd = gr.Dropdown(label=\"Pick A or B\", choices=[], visible=False)\n",
1236
+ " use_btn = gr.Button(\"Use selection\", visible=False)\n",
1237
+ "\n",
1238
+ " output_md = gr.Markdown()\n",
1239
+ "\n",
1240
+ " link_md = gr.Markdown()\n",
1241
+ " features_df = gr.Dataframe(headers=FEATURE_COLS, interactive=False, wrap=True)\n",
1242
+ "\n",
1243
+ "\n",
1244
+ "gr.Markdown(\"### Questions about the suggested device?\")\n",
1245
+ "question_box = gr.Textbox(label=\"Ask a question (optional)\", placeholder=\"Example: Does the 5G device support dual-SIM? What antenna ports does it have?\", lines=2)\n",
1246
+ "ask_btn = gr.Button(\"Ask\", variant=\"secondary\")\n",
1247
+ "qa_md = gr.Markdown()\n",
1248
+ "\n",
1249
+ "\n",
1250
+ " install_btn = gr.Button(\"Make install-ready checklist\")\n",
1251
+ " install_md = gr.Markdown()\n",
1252
+ "\n",
1253
+ " check_btn.click(fn=run_lookup, inputs=[user_text, st], outputs=[output_md, link_md, features_df, qa_md, pick_dd, use_btn, st, install_md], api_name=False)\n",
1254
+ " use_btn.click(fn=use_selection, inputs=[pick_dd, st], outputs=[output_md, link_md, features_df, qa_md, pick_dd, use_btn, st, install_md], api_name=False)\n",
1255
+ " install_btn.click(fn=make_install_ready, inputs=[st], outputs=[install_md], api_name=False)\n",
1256
+ " ask_btn.click(fn=answer_question, inputs=[question_box, st], outputs=[qa_md], api_name=False)\n",
1257
+ "\n",
1258
+ " with gr.Tab(\"Batch\"):\n",
1259
+ " gr.Markdown(\"Paste one per line or upload a CSV (first column). Batch runs fast (no GPT).\")\n",
1260
+ " batch_text = gr.Textbox(label=\"Paste devices (one per line)\", lines=8, placeholder=\"WR21\\nRUT240\\nIBR650B\")\n",
1261
+ " batch_file = gr.File(label=\"Upload CSV\", file_types=[\".csv\"])\n",
1262
+ " include_ant = gr.Checkbox(label=\"Include antenna picks (slower)\", value=False)\n",
1263
+ " run_btn = gr.Button(\"Run batch\", variant=\"primary\")\n",
1264
+ "\n",
1265
+ " summary_md = gr.Markdown()\n",
1266
+ " rollup_md = gr.Markdown()\n",
1267
+ " table = gr.Dataframe(interactive=False, wrap=True)\n",
1268
+ " dl = gr.File(label=\"Download results CSV\")\n",
1269
+ "\n",
1270
+ " run_btn.click(fn=run_batch, inputs=[batch_text, batch_file, include_ant], outputs=[summary_md, table, dl, rollup_md], api_name=False)\n",
1271
+ "\n",
1272
+ "# IMPORTANT: On Spaces, demo.launch() is correct; do NOT use share=True.\n",
1273
+ "demo.launch(show_api=False)\n"
1274
+ ]
1275
+ }
1276
+ ],
1277
+ "metadata": {
1278
+ "kernelspec": {
1279
+ "display_name": "Python 3",
1280
+ "name": "python3"
1281
+ },
1282
+ "language_info": {
1283
+ "name": "python"
1284
+ }
1285
+ },
1286
+ "nbformat": 4,
1287
+ "nbformat_minor": 5
1288
+ }
app.py CHANGED
@@ -100,6 +100,22 @@ def gpt_json(system: str, payload: Dict[str, Any], max_tokens: int = 600) -> Dic
100
  return json_load_safe(getattr(resp, "output_text", "") or "")
101
 
102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  # ============================
104
  # Load data
105
  # ============================
@@ -722,11 +738,9 @@ def choose_best_parsec(cands: List[Dict[str, Any]], mode: str) -> Dict[str, Any]
722
  return best
723
 
724
 
725
- def infer_mimo_for_5g(model: str, canon_make: str) -> str:
726
- """Best-effort MIMO guess for antenna selection (2x2 vs 4x4)."""
727
- # If model is unknown, default to 2x2 (safer ordering)
728
- if not model or model in {"Not applicable", "Not listed"}:
729
- return "2x2"
730
 
731
  # If the model name hints 5G, lean 4x4
732
  if "5g" in model.lower() or model.upper().startswith(("R", "E", "S", "IX", "RUTM")):
@@ -863,6 +877,166 @@ def run_batch(text_blob: str, file_obj: Any, include_antennas: bool):
863
  return summary, out_df, tmp.name, rollup
864
 
865
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
866
  # ============================
867
  # Output
868
  # ============================
@@ -899,7 +1073,7 @@ def assemble_output(life_row: pd.Series, status: str, eos: str, eol: str, repl:
899
  def run_lookup(user_text: str, st_json: str):
900
  user_text = str(user_text or "").strip()
901
  if not user_text:
902
- return "Enter a router SKU/model.", gr.update(visible=False), gr.update(visible=False), "{}", ""
903
 
904
  res = resolve_device(user_text)
905
 
@@ -907,31 +1081,34 @@ def run_lookup(user_text: str, st_json: str):
907
  opts = res.get("options", [])
908
  choices = [o["label"] for o in opts]
909
  st2 = {"mode":"pick","options": opts, "raw": user_text}
910
- return "Did you mean A or B? Pick one, then click Use selection.", gr.update(choices=choices, value=None, visible=True), gr.update(visible=True), state_dump(st2), ""
911
 
912
  if res.get("mode") != "ok":
913
- return "Not found.", gr.update(visible=False), gr.update(visible=False), "{}", ""
914
 
915
  life_row = df_eos.iloc[int(res["row_idx"])]
916
  eos, eol, status = row_to_dates_and_status(life_row)
917
 
918
  repl = pick_replacements_lifecycle(life_row, status, use_gpt=True)
919
  canon_make = str(life_row.get("_canon_make","UNKNOWN"))
920
- mimo = infer_mimo_for_5g(repl.get("repl_5g",""), canon_make)
921
  tech = "5G" if repl.get("repl_5g") and repl.get("repl_5g") != "Not listed" else ("4G" if device_is_4g(life_row) else "Unknown")
922
  ant = antenna_options_for(repl.get("repl_5g") or str(life_row.get("sku","")), tech, mimo)
923
 
924
  output = assemble_output(life_row, status, eos, eol, repl, ant)
925
  st_out = {"row_idx": int(res["row_idx"]), "repl": repl, "ant": ant, "raw": user_text}
926
- return output, gr.update(visible=False), gr.update(visible=False), state_dump(st_out), ""
 
 
 
927
 
928
  def use_selection(selected_label: str, st_json: str):
929
  st = state_load(st_json)
930
  if not st or st.get("mode") != "pick":
931
- return "Run a search first.", gr.update(visible=False), gr.update(visible=False), "{}", ""
932
 
933
  if not selected_label:
934
- return "Pick A or B first.", gr.update(visible=True), gr.update(visible=True), st_json, ""
935
 
936
  chosen_row = None
937
  for o in st.get("options", []):
@@ -939,20 +1116,23 @@ def use_selection(selected_label: str, st_json: str):
939
  chosen_row = int(o["row_idx"])
940
  break
941
  if chosen_row is None:
942
- return "Pick a valid option.", gr.update(visible=True), gr.update(visible=True), st_json, ""
943
 
944
  life_row = df_eos.iloc[int(chosen_row)]
945
  eos, eol, status = row_to_dates_and_status(life_row)
946
 
947
  repl = pick_replacements_lifecycle(life_row, status, use_gpt=True)
948
  canon_make = str(life_row.get("_canon_make","UNKNOWN"))
949
- mimo = infer_mimo_for_5g(repl.get("repl_5g",""), canon_make)
950
  tech = "5G" if repl.get("repl_5g") and repl.get("repl_5g") != "Not listed" else ("4G" if device_is_4g(life_row) else "Unknown")
951
  ant = antenna_options_for(repl.get("repl_5g") or str(life_row.get("sku","")), tech, mimo)
952
 
953
  output = assemble_output(life_row, status, eos, eol, repl, ant)
954
  st_out = {"row_idx": int(chosen_row), "repl": repl, "ant": ant, "raw": st.get("raw","")}
955
- return output, gr.update(visible=False), gr.update(visible=False), state_dump(st_out), ""
 
 
 
956
 
957
  def make_install_ready(st_json: str):
958
  st = state_load(st_json)
@@ -963,6 +1143,64 @@ def make_install_ready(st_json: str):
963
  return install_ready_checklist(current_sku, st.get("repl", {}) or {}, st.get("ant", {}) or {})
964
 
965
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
966
  # ============================
967
  # UI
968
  # ============================
@@ -980,12 +1218,23 @@ with gr.Blocks(title="Only-Routers") as demo:
980
 
981
  output_md = gr.Markdown()
982
 
 
 
 
 
 
 
 
 
 
 
983
  install_btn = gr.Button("Make install-ready checklist")
984
  install_md = gr.Markdown()
985
 
986
- check_btn.click(fn=run_lookup, inputs=[user_text, st], outputs=[output_md, pick_dd, use_btn, st, install_md], api_name=False)
987
- use_btn.click(fn=use_selection, inputs=[pick_dd, st], outputs=[output_md, pick_dd, use_btn, st, install_md], api_name=False)
988
  install_btn.click(fn=make_install_ready, inputs=[st], outputs=[install_md], api_name=False)
 
989
 
990
  with gr.Tab("Batch"):
991
  gr.Markdown("Paste one per line or upload a CSV (first column). Batch runs fast (no GPT).")
 
100
  return json_load_safe(getattr(resp, "output_text", "") or "")
101
 
102
 
103
+ def gpt_answer_md(system: str, user: str, max_tokens: int = 650) -> str:
104
+ """Return a rep-friendly markdown answer."""
105
+ if client is None:
106
+ return "No API key is configured, so I can't answer detailed questions right now."
107
+ resp = client.responses.create(
108
+ model=OPENAI_MODEL,
109
+ reasoning=OPENAI_REASONING,
110
+ input=[
111
+ {"role": "system", "content": system},
112
+ {"role": "user", "content": user},
113
+ ],
114
+ max_output_tokens=max_tokens,
115
+ )
116
+ return (getattr(resp, "output_text", "") or "").strip()
117
+
118
+
119
  # ============================
120
  # Load data
121
  # ============================
 
738
  return best
739
 
740
 
741
+ def infer_mimo_for_5g(repl_5g_model: str) -> str:
742
+ """Rule: every 5G router uses a 4x4 antenna."""
743
+ return "4x4"
 
 
744
 
745
  # If the model name hints 5G, lean 4x4
746
  if "5g" in model.lower() or model.upper().startswith(("R", "E", "S", "IX", "RUTM")):
 
877
  return summary, out_df, tmp.name, rollup
878
 
879
 
880
+ # ============================
881
+ # Replacement feature table + manufacturer link (5G device)
882
+ # ============================
883
+
884
+ FEATURE_COLS = ["Device", "Modem technology", "WiFi", "Ports", "Antennas", "Ruggedness", "Use case"]
885
+
886
+ # Manufacturer domains used for best-effort link resolution (no non-maker domains).
887
+ MAKER_DOMAINS = {
888
+ "CRADLEPOINT": ["cradlepoint.com", "ericsson.com"],
889
+ "SIERRA": ["semtech.com", "airlink.com"],
890
+ "FEENEY": ["inseego.com"],
891
+ "DIGI": ["digi.com"],
892
+ "CISCO_MERAKI": ["meraki.cisco.com", "cisco.com"],
893
+ "CISCO": ["cisco.com"],
894
+ "TELTONIKA": ["teltonika-networks.com"],
895
+ "UNKNOWN": [],
896
+ }
897
+
898
+ HTTP_HEADERS = {
899
+ "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
900
+ "(KHTML, like Gecko) Chrome/120.0 Safari/537.36"
901
+ }
902
+ HTTP_TIMEOUT = 12
903
+
904
+ def _best_effort_manufacturer_url(model: str, canon_make: str) -> str:
905
+ \"\"\"Try to find a manufacturer page or datasheet link using simple on-domain searches.
906
+ If we can't confirm a page, return the manufacturer homepage for the maker family.
907
+ \"\"\"
908
+ model = str(model or "").strip()
909
+ if not model or model in {"Not listed", "Not applicable"}:
910
+ return ""
911
+
912
+ domains = MAKER_DOMAINS.get(canon_make, []) or []
913
+ if not domains:
914
+ return ""
915
+
916
+ # Candidate on-domain search URLs (common patterns across sites).
917
+ # We keep these on the manufacturer domain (no Google/Bing).
918
+ q = re.sub(r"\s+", "+", model)
919
+ url_candidates = []
920
+ for d in domains:
921
+ url_candidates += [
922
+ f"https://{d}/search?q={q}",
923
+ f"https://{d}/search?query={q}",
924
+ f"https://{d}/?s={q}",
925
+ f"https://www.{d}/search?q={q}",
926
+ f"https://www.{d}/search?query={q}",
927
+ f"https://www.{d}/?s={q}",
928
+ ]
929
+
930
+ # Also try a few direct product patterns for known makers (best effort).
931
+ if canon_make == "TELTONIKA":
932
+ slug = model.lower()
933
+ url_candidates += [
934
+ f"https://teltonika-networks.com/products/routers/{slug}",
935
+ f"https://teltonika-networks.com/product/{slug}",
936
+ "https://teltonika-networks.com/products/routers/",
937
+ ]
938
+ if canon_make == "DIGI":
939
+ url_candidates += [
940
+ "https://www.digi.com/products/networking/cellular-routers",
941
+ f"https://www.digi.com/search?q={q}",
942
+ ]
943
+ if canon_make == "CRADLEPOINT":
944
+ url_candidates += [
945
+ "https://cradlepoint.com/products/",
946
+ f"https://cradlepoint.com/?s={q}",
947
+ ]
948
+ if canon_make in {"CISCO", "CISCO_MERAKI"}:
949
+ url_candidates += [
950
+ f"https://www.cisco.com/c/en/us/search.html?q={q}",
951
+ ]
952
+
953
+ # Try to confirm a working page (HTTP 200 and model string somewhere in HTML).
954
+ for u in url_candidates[:18]:
955
+ try:
956
+ import requests
957
+ r = requests.get(u, headers=HTTP_HEADERS, timeout=HTTP_TIMEOUT, allow_redirects=True)
958
+ if r.status_code != 200:
959
+ continue
960
+ html = (r.text or "").lower()
961
+ if model.lower() in html or "datasheet" in html or "data sheet" in html:
962
+ return r.url
963
+ except Exception:
964
+ continue
965
+
966
+ # Fallback: maker homepage
967
+ d0 = domains[0]
968
+ return f"https://{d0}"
969
+
970
+ def _features_from_dec(model: str, canon_make: str) -> Dict[str, str]:
971
+ \"\"\"Lookup a router model in dec2025routers.csv and return the key feature fields.\"\"\"
972
+ if not model or model in {"Not listed", "Not applicable"}:
973
+ return {k: "Not listed" for k in FEATURE_COLS[1:]}
974
+
975
+ pool = df_dec[df_dec["_canon_make"] == canon_make].copy()
976
+ if pool.empty:
977
+ return {k: "Not listed" for k in FEATURE_COLS[1:]}
978
+
979
+ hit = process.extractOne(norm_text(model), pool["_norm_model"].tolist(), scorer=fuzz.WRatio)
980
+ if not hit or hit[1] < MATCH_OK:
981
+ return {k: "Not listed" for k in FEATURE_COLS[1:]}
982
+
983
+ r = pool.iloc[int(hit[2])]
984
+ ports = f"WAN: {r.get('WAN ports and speed','')} | LAN: {r.get('LAN ports and speed','')}"
985
+ return {
986
+ "Modem technology": str(r.get("Modem Type","")) or "Not listed",
987
+ "WiFi": str(r.get("WiFi type","")) or "Not listed",
988
+ "Ports": ports.strip() if ports.strip() else "Not listed",
989
+ "Antennas": str(r.get("Antennas (internal/external/both)","")) or "Not listed",
990
+ "Ruggedness": str(r.get("Ruggedization","")) or "Not listed",
991
+ "Use case": str(r.get("Primary use case","")) or "Not listed",
992
+ }
993
+
994
+ def _gpt_fill_feature_row(device_label: str, model: str, canon_make: str, row: Dict[str, str]) -> Dict[str, str]:
995
+ \"\"\"If dec can't supply values, ask GPT to fill missing ones (best guess).\"\"\"
996
+ if client is None:
997
+ return row
998
+
999
+ missing = [k for k,v in row.items() if (not v) or str(v).strip().lower() in {"not listed","nan",""}]
1000
+ if not missing:
1001
+ return row
1002
+
1003
+ sys = "Fill missing router feature fields for a Verizon rep. Return strict JSON only."
1004
+ payload = {
1005
+ "device_label": device_label,
1006
+ "model": model,
1007
+ "maker_family": canon_make,
1008
+ "known": row,
1009
+ "fill_only": missing,
1010
+ "rules": [
1011
+ "Fill only the requested fields.",
1012
+ "Best guess if needed. Short phrases only.",
1013
+ "Return JSON only."
1014
+ ],
1015
+ "output_schema": {k: "string" for k in missing}
1016
+ }
1017
+ out = gpt_json(sys, payload, max_tokens=260) or {}
1018
+ for k in missing:
1019
+ val = str(out.get(k, "") or "").strip()
1020
+ if val:
1021
+ row[k] = val
1022
+ return row
1023
+
1024
+ def build_replacement_features_table(repl_4g: str, repl_5g: str, canon_make: str) -> pd.DataFrame:
1025
+ rows = []
1026
+
1027
+ # 4G
1028
+ row4 = _features_from_dec(repl_4g, canon_make)
1029
+ row4 = _gpt_fill_feature_row("4G alternative", repl_4g, canon_make, row4)
1030
+ rows.append({"Device": "4G alternative", **row4})
1031
+
1032
+ # 5G
1033
+ row5 = _features_from_dec(repl_5g, canon_make)
1034
+ row5 = _gpt_fill_feature_row("5G replacement", repl_5g, canon_make, row5)
1035
+ rows.append({"Device": "5G replacement", **row5})
1036
+
1037
+ df = pd.DataFrame(rows, columns=FEATURE_COLS)
1038
+ return df
1039
+
1040
  # ============================
1041
  # Output
1042
  # ============================
 
1073
  def run_lookup(user_text: str, st_json: str):
1074
  user_text = str(user_text or "").strip()
1075
  if not user_text:
1076
+ return "Enter a router SKU/model.", "", None, "", gr.update(visible=False), gr.update(visible=False), "{}", ""
1077
 
1078
  res = resolve_device(user_text)
1079
 
 
1081
  opts = res.get("options", [])
1082
  choices = [o["label"] for o in opts]
1083
  st2 = {"mode":"pick","options": opts, "raw": user_text}
1084
+ return "Did you mean A or B? Pick one, then click Use selection.", "", None, "", gr.update(choices=choices, value=None, visible=True), gr.update(visible=True), state_dump(st2), ""
1085
 
1086
  if res.get("mode") != "ok":
1087
+ return "Not found.", "", None, "", gr.update(visible=False), gr.update(visible=False), "{}", ""
1088
 
1089
  life_row = df_eos.iloc[int(res["row_idx"])]
1090
  eos, eol, status = row_to_dates_and_status(life_row)
1091
 
1092
  repl = pick_replacements_lifecycle(life_row, status, use_gpt=True)
1093
  canon_make = str(life_row.get("_canon_make","UNKNOWN"))
1094
+ mimo = infer_mimo_for_5g(repl.get("repl_5g",""))
1095
  tech = "5G" if repl.get("repl_5g") and repl.get("repl_5g") != "Not listed" else ("4G" if device_is_4g(life_row) else "Unknown")
1096
  ant = antenna_options_for(repl.get("repl_5g") or str(life_row.get("sku","")), tech, mimo)
1097
 
1098
  output = assemble_output(life_row, status, eos, eol, repl, ant)
1099
  st_out = {"row_idx": int(res["row_idx"]), "repl": repl, "ant": ant, "raw": user_text}
1100
+ url5 = _best_effort_manufacturer_url(repl.get('repl_5g',''), canon_make)
1101
+ link = f"**5G manufacturer page (best effort):** {url5}" if url5 else ""
1102
+ feat_df = build_replacement_features_table(repl.get('repl_4g',''), repl.get('repl_5g',''), canon_make)
1103
+ return output, link, feat_df, "", gr.update(visible=False), gr.update(visible=False), state_dump(st_out), ""
1104
 
1105
  def use_selection(selected_label: str, st_json: str):
1106
  st = state_load(st_json)
1107
  if not st or st.get("mode") != "pick":
1108
+ return "Run a search first.", "", None, "", gr.update(visible=False), gr.update(visible=False), "{}", ""
1109
 
1110
  if not selected_label:
1111
+ return "Pick A or B first.", "", None, "", gr.update(visible=True), gr.update(visible=True), st_json, ""
1112
 
1113
  chosen_row = None
1114
  for o in st.get("options", []):
 
1116
  chosen_row = int(o["row_idx"])
1117
  break
1118
  if chosen_row is None:
1119
+ return "Pick a valid option.", "", None, "", gr.update(visible=True), gr.update(visible=True), st_json, ""
1120
 
1121
  life_row = df_eos.iloc[int(chosen_row)]
1122
  eos, eol, status = row_to_dates_and_status(life_row)
1123
 
1124
  repl = pick_replacements_lifecycle(life_row, status, use_gpt=True)
1125
  canon_make = str(life_row.get("_canon_make","UNKNOWN"))
1126
+ mimo = infer_mimo_for_5g(repl.get("repl_5g",""))
1127
  tech = "5G" if repl.get("repl_5g") and repl.get("repl_5g") != "Not listed" else ("4G" if device_is_4g(life_row) else "Unknown")
1128
  ant = antenna_options_for(repl.get("repl_5g") or str(life_row.get("sku","")), tech, mimo)
1129
 
1130
  output = assemble_output(life_row, status, eos, eol, repl, ant)
1131
  st_out = {"row_idx": int(chosen_row), "repl": repl, "ant": ant, "raw": st.get("raw","")}
1132
+ url5 = _best_effort_manufacturer_url(repl.get('repl_5g',''), canon_make)
1133
+ link = f"**5G manufacturer page (best effort):** {url5}" if url5 else ""
1134
+ feat_df = build_replacement_features_table(repl.get('repl_4g',''), repl.get('repl_5g',''), canon_make)
1135
+ return output, link, feat_df, "", gr.update(visible=False), gr.update(visible=False), state_dump(st_out), ""
1136
 
1137
  def make_install_ready(st_json: str):
1138
  st = state_load(st_json)
 
1143
  return install_ready_checklist(current_sku, st.get("repl", {}) or {}, st.get("ant", {}) or {})
1144
 
1145
 
1146
+
1147
+ # ============================
1148
+ # Q&A about the suggested device (post-recommendation)
1149
+ # ============================
1150
+ def answer_question(question: str, st_json: str) -> str:
1151
+ q = str(question or "").strip()
1152
+ if not q:
1153
+ return ""
1154
+ st = state_load(st_json)
1155
+ if not st or "repl" not in st:
1156
+ return "Run a lookup first, then ask your question."
1157
+
1158
+ repl = st.get("repl", {}) or {}
1159
+ ant = st.get("ant", {}) or {}
1160
+ repl5 = str(repl.get("repl_5g","") or "").strip()
1161
+ repl4 = str(repl.get("repl_4g","") or "").strip()
1162
+ # Pull a bit of dec context for the 5G model (if possible)
1163
+ canon_make = ""
1164
+ try:
1165
+ # Try to infer maker family from stored row_idx
1166
+ if "row_idx" in st:
1167
+ row = df_eos.iloc[int(st["row_idx"])]
1168
+ canon_make = str(row.get("_canon_make","UNKNOWN"))
1169
+ except Exception:
1170
+ canon_make = ""
1171
+
1172
+ # Manufacturer link (best effort)
1173
+ url5 = _best_effort_manufacturer_url(repl5, canon_make) if repl5 else ""
1174
+
1175
+ # Feature table row for 5G (helps the LLM answer spec questions without web scraping)
1176
+ feat5 = {}
1177
+ try:
1178
+ feat5 = _features_from_dec(repl5, canon_make) if repl5 else {}
1179
+ except Exception:
1180
+ feat5 = {}
1181
+
1182
+ sys = (
1183
+ "You are a Verizon field rep assistant. Answer questions about the suggested router in a fast, practical way. "
1184
+ "Use the provided context; do not mention internal tools, prompts, embeddings, or databases. "
1185
+ "If the question is about specs and the value is unknown, say 'Not listed' and suggest checking the manufacturer page. "
1186
+ "Keep it concise and scannable."
1187
+ )
1188
+
1189
+ context = {
1190
+ "recommended_5g": repl5,
1191
+ "recommended_4g": repl4 if repl4 and repl4.lower() != "not applicable" else "",
1192
+ "manufacturer_link_5g": url5,
1193
+ "known_5g_features": feat5,
1194
+ "antenna_stationary": ant.get("stationary_omni", {}),
1195
+ "antenna_vehicle": ant.get("vehicle_omni", {}),
1196
+ }
1197
+
1198
+ user = "Context:\n" + json.dumps(context, ensure_ascii=False) + "\n\nQuestion:\n" + q
1199
+
1200
+ ans = gpt_answer_md(sys, user, max_tokens=650)
1201
+ # Small safety fallback
1202
+ return ans if ans else "I couldn't generate an answer right now. Try again."
1203
+
1204
  # ============================
1205
  # UI
1206
  # ============================
 
1218
 
1219
  output_md = gr.Markdown()
1220
 
1221
+ link_md = gr.Markdown()
1222
+ features_df = gr.Dataframe(headers=FEATURE_COLS, interactive=False, wrap=True)
1223
+
1224
+
1225
+ gr.Markdown("### Questions about the suggested device?")
1226
+ question_box = gr.Textbox(label="Ask a question (optional)", placeholder="Example: Does the 5G device support dual-SIM? What antenna ports does it have?", lines=2)
1227
+ ask_btn = gr.Button("Ask", variant="secondary")
1228
+ qa_md = gr.Markdown()
1229
+
1230
+
1231
  install_btn = gr.Button("Make install-ready checklist")
1232
  install_md = gr.Markdown()
1233
 
1234
+ check_btn.click(fn=run_lookup, inputs=[user_text, st], outputs=[output_md, link_md, features_df, qa_md, pick_dd, use_btn, st, install_md], api_name=False)
1235
+ use_btn.click(fn=use_selection, inputs=[pick_dd, st], outputs=[output_md, link_md, features_df, qa_md, pick_dd, use_btn, st, install_md], api_name=False)
1236
  install_btn.click(fn=make_install_ready, inputs=[st], outputs=[install_md], api_name=False)
1237
+ ask_btn.click(fn=answer_question, inputs=[question_box, st], outputs=[qa_md], api_name=False)
1238
 
1239
  with gr.Tab("Batch"):
1240
  gr.Markdown("Paste one per line or upload a CSV (first column). Batch runs fast (no GPT).")