crazycrazypete commited on
Commit
8b8a60e
·
verified ·
1 Parent(s): 79adb97

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. app.py +372 -277
  2. only-routers_ai_poc_v4_5.ipynb +831 -0
app.py CHANGED
@@ -1,14 +1,12 @@
1
- \
2
  import os
3
  import re
4
  import json
5
  import math
 
6
  import hashlib
7
  from dataclasses import dataclass
8
  from datetime import datetime, date
9
- from functools import lru_cache
10
  from typing import Dict, List, Optional, Tuple, Any
11
- from urllib.parse import quote_plus, urlparse
12
 
13
  import numpy as np
14
  import pandas as pd
@@ -18,49 +16,37 @@ import faiss
18
  from sentence_transformers import SentenceTransformer
19
  from rapidfuzz import fuzz, process
20
 
21
- import requests
22
- from bs4 import BeautifulSoup
23
-
24
  import gradio as gr
25
  from openai import OpenAI
26
 
27
 
28
- # ----------------------------
29
  # Settings
30
- # ----------------------------
31
  TODAY = date(2026, 1, 18)
32
  OPENAI_MODEL = "gpt-5.2"
33
  OPENAI_REASONING = {"effort": "high"}
34
 
 
35
  EMBED_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
 
36
  PARSEC_CONTEXT_BEFORE = 900
37
  PARSEC_CONTEXT_AFTER = 1600
38
 
39
- HTTP_TIMEOUT = 25
40
- HTTP_HEADERS = {
41
- "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
42
- "(KHTML, like Gecko) Chrome/120.0 Safari/537.36"
43
- }
44
- RETRY_COUNT = 2
45
- MATCH_OK = 80
46
-
47
  CACHE_DIR = os.path.join(os.getcwd(), ".onlyrouters_cache")
48
- SPECS_CACHE_DIR = os.path.join(CACHE_DIR, "specs")
49
- PDF_CACHE_DIR = os.path.join(CACHE_DIR, "pdf")
50
- os.makedirs(SPECS_CACHE_DIR, exist_ok=True)
51
- os.makedirs(PDF_CACHE_DIR, exist_ok=True)
52
 
53
 
54
- # ----------------------------
55
- # OpenAI client (Space secret: OPENAI_API_KEY)
56
- # ----------------------------
57
  API_KEY = os.getenv("OPENAI_API_KEY", "").strip()
58
  client = OpenAI(api_key=API_KEY) if API_KEY else None
59
 
60
 
61
- # ----------------------------
62
- # Helpers
63
- # ----------------------------
64
  def norm_text(s: Any) -> str:
65
  try:
66
  if s is None or (isinstance(s, float) and math.isnan(s)) or pd.isna(s):
@@ -72,22 +58,15 @@ def norm_text(s: Any) -> str:
72
  s = re.sub(r"\s+", " ", s).strip()
73
  return s
74
 
 
 
 
 
 
75
  def _is_5g(modem_type: Any) -> bool:
76
  s = norm_text(modem_type)
77
  return ("5g" in s) or ("nr" in s)
78
 
79
- def _req_get(url: str) -> requests.Response:
80
- last = None
81
- for _ in range(RETRY_COUNT + 1):
82
- try:
83
- r = requests.get(url, headers=HTTP_HEADERS, timeout=HTTP_TIMEOUT, allow_redirects=True)
84
- r.raise_for_status()
85
- return r
86
- except Exception as e:
87
- last = e
88
- continue
89
- raise last
90
-
91
  def _json_load_safe(s: str) -> Dict[str, Any]:
92
  try:
93
  return json.loads(s)
@@ -109,9 +88,9 @@ def gpt_json(system: str, payload: Dict[str, Any], max_tokens: int = 700) -> Dic
109
  return _json_load_safe(getattr(resp, "output_text", "") or "")
110
 
111
 
112
- # ----------------------------
113
- # Load data files
114
- # ----------------------------
115
  EOS_PATH = "routers_eos_eol_by_sku.csv"
116
  DEC_PATH = "dec2025routers.csv"
117
  PARSEC_PDF = "ParsecCatalog.pdf"
@@ -126,6 +105,7 @@ if not os.path.exists(PARSEC_PDF):
126
  df_eos = pd.read_csv(EOS_PATH).copy()
127
  df_dec = pd.read_csv(DEC_PATH).copy()
128
 
 
129
  def _region_ok(x: Any) -> bool:
130
  s = str(x or "").strip().lower()
131
  if not s:
@@ -145,12 +125,14 @@ def _region_ok(x: Any) -> bool:
145
  if "region" in df_eos.columns:
146
  df_eos = df_eos[df_eos["region"].apply(_region_ok)].reset_index(drop=True)
147
 
 
148
  device_type_col = None
149
  for c in df_eos.columns:
150
  if norm_text(c) == "device type":
151
  device_type_col = c
152
  break
153
 
 
154
  CANON_MAKER = {
155
  "CRADLEPOINT": {"cradlepoint", "ericsson", "ericsson enterprise wireless"},
156
  "SIERRA": {"sierra", "sierra wireless", "semtech", "airlink"},
@@ -177,27 +159,27 @@ def canon_maker_from_text(s: Any) -> str:
177
  return canon
178
  return "UNKNOWN"
179
 
180
- def display_maker_for_row(life_row: pd.Series) -> str:
181
- canon = str(life_row.get("_canon_make", "UNKNOWN"))
182
- if canon != "DIGI":
183
- return DISPLAY_MAKER.get(canon, "Unknown")
184
- desc = norm_text(life_row.get("description", ""))
185
- notes = norm_text(life_row.get("notes", ""))
186
- if "accelerated" in desc or "accelerated" in notes:
187
- return "Accelerated Concepts (now Digi)"
188
- return "Digi"
189
-
190
  df_eos["_canon_make"] = df_eos["manufacturer"].apply(canon_maker_from_text) if "manufacturer" in df_eos.columns else "UNKNOWN"
191
  df_eos["_norm_sku"] = df_eos["sku"].apply(norm_text) if "sku" in df_eos.columns else ""
 
 
192
 
193
  df_dec["_canon_make"] = df_dec["Make"].apply(canon_maker_from_text) if "Make" in df_dec.columns else "UNKNOWN"
194
  df_dec["_norm_model"] = df_dec["Model"].apply(norm_text) if "Model" in df_dec.columns else ""
195
  df_dec["_is5g"] = df_dec["Modem Type"].apply(_is_5g) if "Modem Type" in df_dec.columns else False
196
 
 
 
 
 
 
 
 
 
197
 
198
- # ----------------------------
199
  # Date helpers
200
- # ----------------------------
201
  @dataclass
202
  class ParsedDate:
203
  raw: str
@@ -255,44 +237,12 @@ def row_to_dates_and_status(life_row: pd.Series) -> Tuple[str, str, str]:
255
  return display_date(eos), display_date(eol), status_from_eos_eol(eos, eol)
256
 
257
 
258
- # ----------------------------
259
- # Build embeddings + FAISS indices
260
- # ----------------------------
261
  embedder = SentenceTransformer(EMBED_MODEL_NAME)
262
 
263
- # Replacement RAG buckets
264
- def _router_card(row: pd.Series) -> str:
265
- parts = [
266
- f"Make: {row.get('Make','')}",
267
- f"Model: {row.get('Model','')}",
268
- f"Modem Type: {row.get('Modem Type','')}",
269
- f"WiFi: {row.get('WiFi type','')}",
270
- f"WAN: {row.get('WAN ports and speed','')}",
271
- f"LAN: {row.get('LAN ports and speed','')}",
272
- f"Antennas: {row.get('Antennas (internal/external/both)','')}",
273
- f"Ruggedization: {row.get('Ruggedization','')}",
274
- f"Use case: {row.get('Primary use case','')}",
275
- ]
276
- return "\n".join(parts)[:1400]
277
-
278
- df_dec["_card"] = df_dec.apply(_router_card, axis=1)
279
-
280
- RAG_REPL: Dict[Tuple[str, bool], Dict[str, Any]] = {}
281
- dim = None
282
- for canon in sorted(df_dec["_canon_make"].unique().tolist()):
283
- for is5g in [True, False]:
284
- subset = df_dec[(df_dec["_canon_make"] == canon) & (df_dec["_is5g"] == is5g)].copy()
285
- if subset.empty:
286
- continue
287
- cards = subset["_card"].tolist()
288
- embs = embedder.encode(cards, batch_size=64, show_progress_bar=False, normalize_embeddings=True)
289
- embs = np.asarray(embs, dtype=np.float32)
290
- dim = embs.shape[1] if dim is None else dim
291
- idx = faiss.IndexFlatIP(dim)
292
- idx.add(embs)
293
- RAG_REPL[(canon, is5g)] = {"index": idx, "rows": subset.index.tolist(), "cards": cards}
294
-
295
- # Parsec index
296
  def extract_pdf_text_pages(path: str) -> List[str]:
297
  doc = fitz.open(path)
298
  return [doc[i].get_text("text") for i in range(len(doc))]
@@ -320,9 +270,9 @@ parsec_index = faiss.IndexFlatIP(parsec_emb.shape[1])
320
  parsec_index.add(parsec_emb)
321
 
322
 
323
- # ----------------------------
324
- # Device resolution
325
- # ----------------------------
326
  def _label_for_row(i: int) -> str:
327
  r = df_eos.iloc[i]
328
  return f"{r.get('sku','')} — {r.get('manufacturer','')} — {r.get('description','')}"[:220]
@@ -330,7 +280,12 @@ def _label_for_row(i: int) -> str:
330
  EOS_LABELS = [_label_for_row(i) for i in range(len(df_eos))]
331
  EOS_CORPUS = []
332
  for _, r in df_eos.iterrows():
333
- EOS_CORPUS.append(" ".join([norm_text(r.get("sku","")), norm_text(r.get("manufacturer","")), norm_text(r.get("description","")), norm_text(r.get("notes",""))]))
 
 
 
 
 
334
 
335
  def local_candidates(query: str, top_k: int = 6) -> List[Tuple[int,int,str]]:
336
  q = norm_text(query)
@@ -344,12 +299,17 @@ def gpt_choose_device(user_text: str, candidates: List[Tuple[int,int,str]]) -> D
344
  payload = {
345
  "user_input": user_text,
346
  "candidates": [{"row_idx": i, "score": s, "label": lbl} for (i,s,lbl) in candidates],
 
 
 
 
347
  "output_schema": {"mode":"ok|pick","row_idx":"int","options":[{"row_idx":"int","label":"string"}]}
348
  }
349
- return gpt_json(sys, payload, max_tokens=260)
350
 
351
  def resolve_device(user_text: str) -> Dict[str, Any]:
352
  q = norm_text(user_text)
 
353
  exact_idxs = df_eos.index[df_eos["_norm_sku"] == q].tolist()
354
  if len(exact_idxs) == 1:
355
  return {"mode":"ok","row_idx": int(exact_idxs[0])}
@@ -360,137 +320,194 @@ def resolve_device(user_text: str) -> Dict[str, Any]:
360
  cands = local_candidates(user_text, top_k=6)
361
  if not cands:
362
  return {"mode":"not_found"}
 
 
 
 
363
  g = gpt_choose_device(user_text, cands)
364
  if g.get("mode") == "ok" and isinstance(g.get("row_idx"), int):
365
  return {"mode":"ok","row_idx": int(g["row_idx"])}
 
366
  if g.get("mode") == "pick":
367
  opts = g.get("options", []) or []
368
  opts2 = [{"row_idx": int(o["row_idx"]), "label": str(o["label"])} for o in opts[:2] if "row_idx" in o]
369
  if opts2:
370
  return {"mode":"pick","options": opts2}
371
- return {"mode":"pick","options":[{"row_idx":cands[0][0],"label":cands[0][2]},{"row_idx":cands[1][0],"label":cands[1][2]}] if len(cands)>1 else [{"row_idx":cands[0][0],"label":cands[0][2]}]}
 
 
 
 
372
 
373
 
374
- # ----------------------------
375
- # Replacement selection
376
- # ----------------------------
377
- def pick_best_5g_fallback(canon_make: str) -> str:
378
- pool5 = df_dec[(df_dec["_canon_make"] == canon_make) & (df_dec["_is5g"] == True)].copy()
379
- if pool5.empty:
380
  return ""
381
- pool5["_score"] = (
382
- pool5["WiFi type"].astype(str).str.len().fillna(0)
383
- + pool5["WAN ports and speed"].astype(str).str.len().fillna(0)
384
- + pool5["LAN ports and speed"].astype(str).str.len().fillna(0)
385
- )
386
- pool5 = pool5.sort_values(by="_score", ascending=False)
387
- return str(pool5.iloc[0]["Model"]).strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
388
 
389
- def likely_4g_only_row(life_row: pd.Series) -> bool:
390
- t = norm_text(life_row.get("description","")) + " " + norm_text(life_row.get("notes",""))
391
- return ("lte" in t and "5g" not in t) or ("4g" in t and "5g" not in t)
 
 
 
 
 
 
392
 
393
- def likely_5g_row(life_row: pd.Series) -> bool:
394
- t = norm_text(life_row.get("description","")) + " " + norm_text(life_row.get("notes",""))
395
- return ("5g" in t) or ("nr" in t)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
396
 
397
- def rag_candidates_for_make(canon_make: str, want_5g: bool, query_text: str, k: int = 8) -> List[int]:
398
- bucket = RAG_REPL.get((canon_make, want_5g))
399
- if not bucket:
400
- return []
401
- qv = embedder.encode([query_text], normalize_embeddings=True)
402
- qv = np.asarray(qv, dtype=np.float32)
403
- _, ids = bucket["index"].search(qv, min(k, len(bucket["rows"])))
404
- return [bucket["rows"][int(i)] for i in ids[0] if 0 <= int(i) < len(bucket["rows"])]
405
 
406
- def pick_replacements_gpt(life_row: pd.Series, status: str) -> Dict[str, Any]:
 
 
 
 
407
  canon = str(life_row.get("_canon_make","UNKNOWN"))
408
  if canon == "UNKNOWN":
409
- return {"repl_4g":"Not applicable","repl_5g":"","why":"","sources":[]}
410
-
411
- adv_val = life_row.get("advanced_5g_option", "")
412
- sug_val = life_row.get("suggested_replacement", "")
413
- adv_raw = "" if (adv_val is None or pd.isna(adv_val)) else str(adv_val).strip()
414
- sug_raw = "" if (sug_val is None or pd.isna(sug_val)) else str(sug_val).strip()
415
 
416
- is_4g_device = likely_4g_only_row(life_row) and (not likely_5g_row(life_row))
417
  needs_4g_repl = is_4g_device and (status in {"End of Sale","End of Life"})
418
  want_5g = is_4g_device or (status in {"End of Sale","End of Life"})
419
 
420
- if not want_5g:
421
- return {"repl_4g":"Not applicable","repl_5g":"Not applicable","why":"","sources":[]}
422
-
423
- # forced 5G
424
- forced_5g = ""
425
- if adv_raw:
426
- pool5 = df_dec[(df_dec["_canon_make"]==canon) & (df_dec["_is5g"]==True)]
427
- if not pool5.empty:
428
- hit = process.extractOne(norm_text(adv_raw), pool5["_norm_model"].tolist(), scorer=fuzz.WRatio)
429
- forced_5g = str(pool5.iloc[int(hit[2])]["Model"]).strip() if hit and hit[1] >= MATCH_OK else adv_raw
430
- else:
431
- forced_5g = adv_raw
432
-
433
- forced_4g = ""
434
- if needs_4g_repl and sug_raw:
435
- pool4 = df_dec[(df_dec["_canon_make"]==canon) & (df_dec["_is5g"]==False)]
436
- if not pool4.empty:
437
- hit = process.extractOne(norm_text(sug_raw), pool4["_norm_model"].tolist(), scorer=fuzz.WRatio)
438
- forced_4g = str(pool4.iloc[int(hit[2])]["Model"]).strip() if hit and hit[1] >= MATCH_OK else ""
439
-
440
- repl_4g = forced_4g if needs_4g_repl else "Not applicable"
441
-
442
- if forced_5g:
443
- if forced_5g.lower() == "nan" or not forced_5g:
444
- forced_5g = pick_best_5g_fallback(canon)
445
- return {"repl_4g": repl_4g, "repl_5g": forced_5g, "why":"From lifecycle replacement mapping.", "sources":["lifecycle_csv"]}
446
-
447
- # rag + gpt 5g
448
- query_text = "\n".join([
449
- f"Old device: {life_row.get('sku','')} {life_row.get('manufacturer','')}",
450
- str(life_row.get('description','') or ''),
451
- str(life_row.get('notes','') or ''),
452
- f"Need 5G: {want_5g}",
453
- f"Need 4G: {needs_4g_repl}",
454
- ])[:2000]
455
-
456
- idx_5g = rag_candidates_for_make(canon, True, query_text, k=8)
457
- idx_4g = rag_candidates_for_make(canon, False, query_text, k=8) if needs_4g_repl else []
458
-
459
- def pack(idx: int) -> Dict[str, Any]:
460
- r = df_dec.loc[idx]
461
- return {"Model": str(r.get("Model","")), "Modem Type": str(r.get("Modem Type","")), "WiFi": str(r.get("WiFi type",""))}
462
-
463
- cand5 = [pack(i) for i in idx_5g[:8] if i in df_dec.index]
464
- cand4 = [pack(i) for i in idx_4g[:8] if i in df_dec.index]
465
-
466
- det5 = cand5[0]["Model"] if cand5 else ""
467
- if not det5:
468
- det5 = pick_best_5g_fallback(canon)
469
- det4 = (cand4[0]["Model"] if cand4 else "") if needs_4g_repl else "Not applicable"
470
-
471
- if client is None or not cand5:
472
- return {"repl_4g": det4, "repl_5g": det5, "why":"", "sources":["dec_catalog_rag"]}
473
-
474
- sys = "Pick replacements. Choose only from candidates. Return strict JSON only."
475
- payload = {
476
- "need_4g_replacement": needs_4g_repl,
477
- "candidates_5g": cand5,
478
- "candidates_4g": cand4,
479
- "output_schema": {"repl_4g":"string","repl_5g":"string","why":"string"}
480
  }
481
- out = gpt_json(sys, payload, max_tokens=350)
482
- repl5 = str(out.get("repl_5g","") or det5).strip()
483
- if (not repl5) or (repl5.lower() in {"nan","not applicable"}):
484
- repl5 = det5 or pick_best_5g_fallback(canon)
485
- repl4 = str(out.get("repl_4g","") or det4).strip()
486
- if not needs_4g_repl:
487
- repl4 = "Not applicable"
488
- return {"repl_4g": repl4, "repl_5g": repl5, "why": str(out.get("why","") or "").strip(), "sources":["dec_catalog_rag","gpt"]}
489
-
490
-
491
- # ----------------------------
492
- # Antenna picks
493
- # ----------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
494
  def _parsec_part_from_card(t: str) -> str:
495
  m = re.search(r"Standard\s+SKU:\s*([A-Z0-9]+)", t)
496
  return m.group(1).strip() if m else ""
@@ -499,27 +516,6 @@ def _parsec_desc_from_card(t: str) -> str:
499
  m = re.search(r"Description:\s*(.+?)(?:\n|$)", t, flags=re.IGNORECASE)
500
  return re.sub(r"\s+"," ",m.group(1).strip())[:220] if m else ""
501
 
502
- def _parsec_name_from_card(t: str) -> str:
503
- lines = [ln.strip() for ln in t.splitlines() if ln.strip()]
504
- sku_i = None
505
- for i, ln in enumerate(lines):
506
- if "Standard SKU" in ln:
507
- sku_i = i; break
508
- window = lines[max(0,(sku_i or 0)-12):(sku_i or 0)]
509
- for ln in reversed(window):
510
- low = ln.lower()
511
- if "description:" in low or "sku" in low or "customization" in low:
512
- continue
513
- if 3 <= len(ln) <= 90 and re.search(r"[A-Za-z]", ln):
514
- return ln
515
- for ln in lines[:30]:
516
- low = ln.lower()
517
- if "description:" in low or "standard sku" in low or "customization" in low:
518
- continue
519
- if 3 <= len(ln) <= 90 and re.search(r"[A-Za-z]", ln):
520
- return ln
521
- return "Parsec antenna"
522
-
523
  def parsec_retrieve(query: str, top_k: int = 10) -> List[Dict[str, Any]]:
524
  qv = embedder.encode([query], normalize_embeddings=True)
525
  qv = np.asarray(qv, dtype=np.float32)
@@ -528,7 +524,13 @@ def parsec_retrieve(query: str, top_k: int = 10) -> List[Dict[str, Any]]:
528
  for sc, i in zip(scores[0].tolist(), ids[0].tolist()):
529
  if 0 <= int(i) < len(parsec_cards):
530
  card = parsec_cards[int(i)]
531
- out.append({"name": _parsec_name_from_card(card), "part_number": _parsec_part_from_card(card), "description": _parsec_desc_from_card(card)})
 
 
 
 
 
 
532
  return out
533
 
534
  def antenna_options_for(router_model: str, tech: str, mimo: str) -> Dict[str, Any]:
@@ -537,52 +539,119 @@ def antenna_options_for(router_model: str, tech: str, mimo: str) -> Dict[str, An
537
  cand_stationary = parsec_retrieve(q_stationary, top_k=10)
538
  cand_vehicle = parsec_retrieve(q_vehicle, top_k=10)
539
 
540
- s = cand_stationary[0] if cand_stationary else {"name":"Parsec antenna","part_number":"","description":""}
541
- v = cand_vehicle[0] if cand_vehicle else {"name":"Parsec antenna","part_number":"","description":""}
542
- s.update({"mimo": mimo, "why": "Stationary omni best match."})
543
- v.update({"mimo": mimo, "why": "Vehicle omni best match."})
544
- return {"stationary_omni": s, "vehicle_omni": v, "sources":["parsec_rag"]}
545
-
546
 
547
- # ----------------------------
548
- # Feature table + output
549
- # ----------------------------
550
- FEATURE_ROWS = ["Name","Modem technology","WiFi","Ports","Antennas","Ruggedness","Use case"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
551
 
552
  def dec_features_by_model(model: str, canon_make: str) -> Dict[str, str]:
553
  if not model or model == "Not applicable":
554
- return {k:"Not listed" for k in FEATURE_ROWS}
555
  pool = df_dec[df_dec["_canon_make"] == canon_make].copy()
556
  if pool.empty:
557
- return {k:"Not listed" for k in FEATURE_ROWS}
558
  hit = process.extractOne(norm_text(model), pool["_norm_model"].tolist(), scorer=fuzz.WRatio)
559
  if not hit or hit[1] < MATCH_OK:
560
- return {k:"Not listed" for k in FEATURE_ROWS}
561
  r = pool.iloc[int(hit[2])]
562
  ports = f"WAN: {r.get('WAN ports and speed','')} | LAN: {r.get('LAN ports and speed','')}"
563
  return {
564
  "Name": str(r.get("Model","")),
565
  "Modem technology": str(r.get("Modem Type","")),
566
  "WiFi": str(r.get("WiFi type","")),
567
- "Ports": ports.strip(),
568
  "Antennas": str(r.get("Antennas (internal/external/both)","")),
569
  "Ruggedness": str(r.get("Ruggedization","")),
570
  "Use case": str(r.get("Primary use case","")),
571
  }
572
 
573
- def current_features_basic(life_row: pd.Series) -> Dict[str, str]:
574
- name = str(life_row.get("sku","") or "").strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
575
  desc = str(life_row.get("description","") or "").strip()
576
- txt = (desc + " " + str(life_row.get("notes","") or "")).lower()
577
- modem = "5G" if ("5g" in txt or "nr" in txt) else ("4G" if "lte" in txt or "4g" in txt else "Not listed")
578
- rugged = "Rugged" if "rugged" in txt else "Not listed"
579
- return {"Name": name, "Modem technology": modem, "WiFi": "Not listed", "Ports": "Not listed",
580
- "Antennas": "Not listed", "Ruggedness": rugged, "Use case": "Not listed"}
 
 
 
 
 
 
581
 
582
- def build_features_table(current_feats: Dict[str,str], r4_feats: Dict[str,str], r5_feats: Dict[str,str]) -> str:
583
  cols = ["Device", "Modem technology", "WiFi", "Ports", "Antennas", "Ruggedness", "Use case"]
584
  header = "| " + " | ".join(cols) + " |"
585
  sep = "| " + " | ".join(["---"]*len(cols)) + " |"
 
586
  def row(name: str, feats: Dict[str,str]) -> str:
587
  return "| " + " | ".join([
588
  name,
@@ -593,43 +662,65 @@ def build_features_table(current_feats: Dict[str,str], r4_feats: Dict[str,str],
593
  feats.get("Ruggedness","Not listed"),
594
  feats.get("Use case","Not listed"),
595
  ]) + " |"
596
- return "\n".join([header, sep, row("Current", current_feats), row("4G replacement", r4_feats), row("5G replacement", r5_feats)])
 
 
 
 
 
 
 
 
 
 
 
597
 
598
  def assemble_output(life_row: pd.Series, status: str, eos: str, eol: str, repl: Dict[str,Any], ant: Dict[str,Any]) -> str:
599
  canon_make = str(life_row.get("_canon_make","UNKNOWN"))
600
  current_name = f"{life_row.get('sku','')} — {life_row.get('description','')}".strip(" —")
601
- stationary = ant.get("stationary_omni", {})
602
- vehicle = ant.get("vehicle_omni", {})
603
 
604
- cur_feats = current_features_basic(life_row)
 
 
 
 
 
605
  r4_feats = dec_features_by_model(repl.get("repl_4g",""), canon_make)
606
  r5_feats = dec_features_by_model(repl.get("repl_5g",""), canon_make)
 
 
 
607
  table_md = build_features_table(cur_feats, r4_feats, r5_feats)
608
 
609
  lines = []
610
- lines.append(f"1) Current device: **{current_name}**")
611
- lines.append(f"2) Status: **{status}**")
612
- lines.append(f"3) End of Sale date: **{eos}**")
613
- lines.append(f"4) End of Life date: **{eol}**")
614
- lines.append(f"5) 4G recommended replacement: **{repl.get('repl_4g','Not applicable')}**")
615
- lines.append(f"6) 5G recommended replacement: **{repl.get('repl_5g','')}**")
616
- lines.append("7) Antenna options (Parsec-only):")
617
- lines.append(f" - Stationary (Omni): **{stationary.get('name','')}** (Part #: {stationary.get('part_number','')}) {stationary.get('description','')} — MIMO: {stationary.get('mimo','')} — {stationary.get('why','')}")
618
- lines.append(f" - Vehicle (Omni): **{vehicle.get('name','')}** (Part #: {vehicle.get('part_number','')}) — {vehicle.get('description','')} — MIMO: {vehicle.get('mimo','')} — {vehicle.get('why','')}")
619
- lines.append("8) Recommended features table:")
 
 
 
 
 
 
 
 
 
620
  lines.append(table_md)
621
- lines.append("\n**Sources (debug):**")
622
- srcs = repl.get("sources", [])
623
- if isinstance(srcs, list):
624
- for s in srcs:
625
- lines.append(f"- {s}")
626
  lines.append("- ParsecCatalog.pdf (local RAG)")
 
627
  return "\n".join(lines)
628
 
629
-
630
- # ----------------------------
631
- # Gradio callbacks
632
- # ----------------------------
633
  def run_lookup(user_text: str, st: Dict[str,Any]):
634
  user_text = str(user_text or "").strip()
635
  if not user_text:
@@ -640,17 +731,20 @@ def run_lookup(user_text: str, st: Dict[str,Any]):
640
  opts = res.get("options", [])
641
  choices = [o["label"] for o in opts]
642
  st2 = {"mode":"pick","options": opts}
643
- return "Did you mean A or B? Pick one, then click **Use selection**.", gr.update(choices=choices, value=None, visible=True), gr.update(visible=True), st2
644
 
645
  if res.get("mode") != "ok":
646
  return "Not found.", gr.update(visible=False), gr.update(visible=False), {}
647
 
648
  life_row = df_eos.iloc[int(res["row_idx"])]
649
  eos, eol, status = row_to_dates_and_status(life_row)
650
- repl = pick_replacements_gpt(life_row, status)
651
- tech = "5G" if repl.get("repl_5g") and repl.get("repl_5g") != "Not applicable" else ("4G" if likely_4g_only_row(life_row) else "Unknown")
 
 
652
  mimo_guess = "4x4" if tech == "5G" else "2x2"
653
  ant = antenna_options_for(router_model=repl.get("repl_5g") or str(life_row.get("sku","")), tech=tech, mimo=mimo_guess)
 
654
  return assemble_output(life_row, status, eos, eol, repl, ant), gr.update(visible=False), gr.update(visible=False), {}
655
 
656
  def use_selection(selected_label: str, st: Dict[str,Any]):
@@ -669,12 +763,13 @@ def use_selection(selected_label: str, st: Dict[str,Any]):
669
 
670
  life_row = df_eos.iloc[int(chosen_row)]
671
  eos, eol, status = row_to_dates_and_status(life_row)
672
- repl = pick_replacements_gpt(life_row, status)
673
- tech = "5G" if repl.get("repl_5g") and repl.get("repl_5g") != "Not applicable" else ("4G" if likely_4g_only_row(life_row) else "Unknown")
 
674
  mimo_guess = "4x4" if tech == "5G" else "2x2"
675
  ant = antenna_options_for(router_model=repl.get("repl_5g") or str(life_row.get("sku","")), tech=tech, mimo=mimo_guess)
676
- return assemble_output(life_row, status, eos, eol, repl, ant), gr.update(visible=False), gr.update(visible=False), {}
677
 
 
678
 
679
  with gr.Blocks(title="Only-Routers") as demo:
680
  gr.Markdown("## Only-Routers\nEnter a router SKU/model. If ambiguous, you’ll get A/B choices.")
@@ -690,4 +785,4 @@ with gr.Blocks(title="Only-Routers") as demo:
690
  check_btn.click(fn=run_lookup, inputs=[user_text, st], outputs=[output_md, pick_dd, use_btn, st])
691
  use_btn.click(fn=use_selection, inputs=[pick_dd, st], outputs=[output_md, pick_dd, use_btn, st])
692
 
693
- demo.launch(share=True, debug=True)
 
 
1
  import os
2
  import re
3
  import json
4
  import math
5
+ import glob
6
  import hashlib
7
  from dataclasses import dataclass
8
  from datetime import datetime, date
 
9
  from typing import Dict, List, Optional, Tuple, Any
 
10
 
11
  import numpy as np
12
  import pandas as pd
 
16
  from sentence_transformers import SentenceTransformer
17
  from rapidfuzz import fuzz, process
18
 
 
 
 
19
  import gradio as gr
20
  from openai import OpenAI
21
 
22
 
23
+ # ============================
24
  # Settings
25
+ # ============================
26
  TODAY = date(2026, 1, 18)
27
  OPENAI_MODEL = "gpt-5.2"
28
  OPENAI_REASONING = {"effort": "high"}
29
 
30
+ MATCH_OK = 80
31
  EMBED_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
32
+
33
  PARSEC_CONTEXT_BEFORE = 900
34
  PARSEC_CONTEXT_AFTER = 1600
35
 
 
 
 
 
 
 
 
 
36
  CACHE_DIR = os.path.join(os.getcwd(), ".onlyrouters_cache")
37
+ os.makedirs(CACHE_DIR, exist_ok=True)
 
 
 
38
 
39
 
40
+ # ============================
41
+ # OpenAI client (HF Space secret: OPENAI_API_KEY)
42
+ # ============================
43
  API_KEY = os.getenv("OPENAI_API_KEY", "").strip()
44
  client = OpenAI(api_key=API_KEY) if API_KEY else None
45
 
46
 
47
+ # ============================
48
+ # Small utilities
49
+ # ============================
50
  def norm_text(s: Any) -> str:
51
  try:
52
  if s is None or (isinstance(s, float) and math.isnan(s)) or pd.isna(s):
 
58
  s = re.sub(r"\s+", " ", s).strip()
59
  return s
60
 
61
+ def _safe_str(v: Any) -> str:
62
+ if v is None or (isinstance(v, float) and pd.isna(v)) or pd.isna(v):
63
+ return ""
64
+ return str(v).strip()
65
+
66
  def _is_5g(modem_type: Any) -> bool:
67
  s = norm_text(modem_type)
68
  return ("5g" in s) or ("nr" in s)
69
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  def _json_load_safe(s: str) -> Dict[str, Any]:
71
  try:
72
  return json.loads(s)
 
88
  return _json_load_safe(getattr(resp, "output_text", "") or "")
89
 
90
 
91
+ # ============================
92
+ # Load data files (must exist in repo)
93
+ # ============================
94
  EOS_PATH = "routers_eos_eol_by_sku.csv"
95
  DEC_PATH = "dec2025routers.csv"
96
  PARSEC_PDF = "ParsecCatalog.pdf"
 
105
  df_eos = pd.read_csv(EOS_PATH).copy()
106
  df_dec = pd.read_csv(DEC_PATH).copy()
107
 
108
+ # Region filter: keep USA / North America / blank / not specified
109
  def _region_ok(x: Any) -> bool:
110
  s = str(x or "").strip().lower()
111
  if not s:
 
125
  if "region" in df_eos.columns:
126
  df_eos = df_eos[df_eos["region"].apply(_region_ok)].reset_index(drop=True)
127
 
128
+ # Optional "Device Type" column
129
  device_type_col = None
130
  for c in df_eos.columns:
131
  if norm_text(c) == "device type":
132
  device_type_col = c
133
  break
134
 
135
+ # Maker mapping
136
  CANON_MAKER = {
137
  "CRADLEPOINT": {"cradlepoint", "ericsson", "ericsson enterprise wireless"},
138
  "SIERRA": {"sierra", "sierra wireless", "semtech", "airlink"},
 
159
  return canon
160
  return "UNKNOWN"
161
 
 
 
 
 
 
 
 
 
 
 
162
  df_eos["_canon_make"] = df_eos["manufacturer"].apply(canon_maker_from_text) if "manufacturer" in df_eos.columns else "UNKNOWN"
163
  df_eos["_norm_sku"] = df_eos["sku"].apply(norm_text) if "sku" in df_eos.columns else ""
164
+ df_eos["_norm_desc"] = df_eos["description"].apply(norm_text) if "description" in df_eos.columns else ""
165
+ df_eos["_norm_notes"] = df_eos["notes"].apply(norm_text) if "notes" in df_eos.columns else ""
166
 
167
  df_dec["_canon_make"] = df_dec["Make"].apply(canon_maker_from_text) if "Make" in df_dec.columns else "UNKNOWN"
168
  df_dec["_norm_model"] = df_dec["Model"].apply(norm_text) if "Model" in df_dec.columns else ""
169
  df_dec["_is5g"] = df_dec["Modem Type"].apply(_is_5g) if "Modem Type" in df_dec.columns else False
170
 
171
+ def display_maker_for_row(life_row: pd.Series) -> str:
172
+ canon = str(life_row.get("_canon_make","UNKNOWN"))
173
+ if canon != "DIGI":
174
+ return DISPLAY_MAKER.get(canon, "Unknown")
175
+ desc = norm_text(life_row.get("description",""))
176
+ notes = norm_text(life_row.get("notes",""))
177
+ return "Accelerated Concepts (now Digi)" if ("accelerated" in desc or "accelerated" in notes) else "Digi"
178
+
179
 
180
+ # ============================
181
  # Date helpers
182
+ # ============================
183
  @dataclass
184
  class ParsedDate:
185
  raw: str
 
237
  return display_date(eos), display_date(eol), status_from_eos_eol(eos, eol)
238
 
239
 
240
+ # ============================
241
+ # Embeddings + indices
242
+ # ============================
243
  embedder = SentenceTransformer(EMBED_MODEL_NAME)
244
 
245
+ # Parsec cards around "Standard SKU"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
  def extract_pdf_text_pages(path: str) -> List[str]:
247
  doc = fitz.open(path)
248
  return [doc[i].get_text("text") for i in range(len(doc))]
 
270
  parsec_index.add(parsec_emb)
271
 
272
 
273
+ # ============================
274
+ # Device resolution (exact SKU -> GPT A/B)
275
+ # ============================
276
  def _label_for_row(i: int) -> str:
277
  r = df_eos.iloc[i]
278
  return f"{r.get('sku','')} — {r.get('manufacturer','')} — {r.get('description','')}"[:220]
 
280
  EOS_LABELS = [_label_for_row(i) for i in range(len(df_eos))]
281
  EOS_CORPUS = []
282
  for _, r in df_eos.iterrows():
283
+ EOS_CORPUS.append(" ".join([
284
+ r.get("_norm_sku",""),
285
+ r.get("_canon_make",""),
286
+ r.get("_norm_desc",""),
287
+ r.get("_norm_notes",""),
288
+ ]))
289
 
290
  def local_candidates(query: str, top_k: int = 6) -> List[Tuple[int,int,str]]:
291
  q = norm_text(query)
 
299
  payload = {
300
  "user_input": user_text,
301
  "candidates": [{"row_idx": i, "score": s, "label": lbl} for (i,s,lbl) in candidates],
302
+ "rules": [
303
+ "If one candidate is clearly correct, return mode='ok' with row_idx.",
304
+ "If two are plausible, return mode='pick' with top 2 options."
305
+ ],
306
  "output_schema": {"mode":"ok|pick","row_idx":"int","options":[{"row_idx":"int","label":"string"}]}
307
  }
308
+ return gpt_json(sys, payload, max_tokens=300)
309
 
310
  def resolve_device(user_text: str) -> Dict[str, Any]:
311
  q = norm_text(user_text)
312
+
313
  exact_idxs = df_eos.index[df_eos["_norm_sku"] == q].tolist()
314
  if len(exact_idxs) == 1:
315
  return {"mode":"ok","row_idx": int(exact_idxs[0])}
 
320
  cands = local_candidates(user_text, top_k=6)
321
  if not cands:
322
  return {"mode":"not_found"}
323
+
324
+ if cands[0][1] >= 95 and (len(cands) == 1 or (cands[0][1] - cands[1][1]) >= 8):
325
+ return {"mode":"ok","row_idx": cands[0][0]}
326
+
327
  g = gpt_choose_device(user_text, cands)
328
  if g.get("mode") == "ok" and isinstance(g.get("row_idx"), int):
329
  return {"mode":"ok","row_idx": int(g["row_idx"])}
330
+
331
  if g.get("mode") == "pick":
332
  opts = g.get("options", []) or []
333
  opts2 = [{"row_idx": int(o["row_idx"]), "label": str(o["label"])} for o in opts[:2] if "row_idx" in o]
334
  if opts2:
335
  return {"mode":"pick","options": opts2}
336
+
337
+ # fallback
338
+ if len(cands) > 1:
339
+ return {"mode":"pick","options":[{"row_idx":cands[0][0],"label":cands[0][2]},{"row_idx":cands[1][0],"label":cands[1][2]}]}
340
+ return {"mode":"pick","options":[{"row_idx":cands[0][0],"label":cands[0][2]}]}
341
 
342
 
343
+ # ============================
344
+ # Replacements — source of truth is lifecycle CSV (with GPT fallback)
345
+ # ============================
346
+ def _extract_model_token(text: str) -> str:
347
+ s = _safe_str(text)
348
+ if not s:
349
  return ""
350
+ parts = [p.strip() for p in s.split("|") if p.strip()]
351
+ candidates = parts[::-1] if parts else [s]
352
+
353
+ for cand in candidates:
354
+ m = re.search(r"\bIX\d{2}\b", cand, flags=re.IGNORECASE)
355
+ if m:
356
+ return m.group(0).upper()
357
+ m = re.search(r"\b(R\d{3,4}|E\d{3,4}|S\d{3,4})\b", cand, flags=re.IGNORECASE)
358
+ if m:
359
+ return m.group(0).upper()
360
+ m = re.search(r"\b[A-Z]{1,5}\d{2,4}[A-Z]?\b", cand.upper())
361
+ if m:
362
+ return m.group(0).upper()
363
+
364
+ return candidates[0][:60]
365
+
366
+ def _candidate_5g_models_from_lifecycle(canon_make: str) -> List[str]:
367
+ pool = df_eos[df_eos["_canon_make"] == canon_make].copy()
368
+ vals = pool["advanced_5g_option"].tolist() if "advanced_5g_option" in pool.columns else []
369
+ out, seen = [], set()
370
+ for v in vals:
371
+ tok = _extract_model_token(v)
372
+ if tok and tok.lower() != "nan" and tok not in seen:
373
+ seen.add(tok); out.append(tok)
374
+ return out
375
 
376
+ def _candidate_4g_models_from_lifecycle(canon_make: str) -> List[str]:
377
+ pool = df_eos[df_eos["_canon_make"] == canon_make].copy()
378
+ vals = pool["suggested_replacement"].tolist() if "suggested_replacement" in pool.columns else []
379
+ out, seen = [], set()
380
+ for v in vals:
381
+ tok = _extract_model_token(v)
382
+ if tok and tok.lower() != "nan" and tok not in seen:
383
+ seen.add(tok); out.append(tok)
384
+ return out
385
 
386
+ def _gpt_pick_from_lifecycle_models(old_row: pd.Series, candidates: List[str], need: str) -> str:
387
+ if client is None or not candidates:
388
+ return ""
389
+ sys = "Pick the best replacement model. Choose only from candidates. Return strict JSON only."
390
+ payload = {
391
+ "old_device": {
392
+ "sku": str(old_row.get("sku","")),
393
+ "description": str(old_row.get("description","")),
394
+ "manufacturer": str(old_row.get("manufacturer","")),
395
+ "need": need,
396
+ },
397
+ "candidates": candidates[:30],
398
+ "output_schema": {"choice":"string"}
399
+ }
400
+ out = gpt_json(sys, payload, max_tokens=220) or {}
401
+ choice = str(out.get("choice","") or "").strip()
402
+ return choice if choice in candidates else ""
403
 
404
+ def _fallback_5g_from_dec(canon_make: str) -> str:
405
+ pool5 = df_dec[(df_dec["_canon_make"] == canon_make) & (df_dec["_is5g"] == True)]
406
+ return str(pool5.iloc[0]["Model"]).strip() if not pool5.empty else ""
 
 
 
 
 
407
 
408
+ def _device_is_4g(life_row: pd.Series) -> bool:
409
+ t = norm_text(life_row.get("description","")) + " " + norm_text(life_row.get("notes",""))
410
+ return (("lte" in t or "4g" in t) and ("5g" not in t and "nr" not in t))
411
+
412
+ def pick_replacements_lifecycle(life_row: pd.Series, status: str) -> Dict[str, Any]:
413
  canon = str(life_row.get("_canon_make","UNKNOWN"))
414
  if canon == "UNKNOWN":
415
+ return {"repl_4g":"Not applicable","repl_5g":"", "why":"", "sources":[]}
 
 
 
 
 
416
 
417
+ is_4g_device = _device_is_4g(life_row)
418
  needs_4g_repl = is_4g_device and (status in {"End of Sale","End of Life"})
419
  want_5g = is_4g_device or (status in {"End of Sale","End of Life"})
420
 
421
+ repl_4g = "Not applicable"
422
+ if needs_4g_repl:
423
+ repl_4g = _extract_model_token(_safe_str(life_row.get("suggested_replacement","")))
424
+ if not repl_4g:
425
+ cand4 = _candidate_4g_models_from_lifecycle(canon)
426
+ repl_4g = _gpt_pick_from_lifecycle_models(life_row, cand4, "4G replacement") or (cand4[0] if cand4 else "")
427
+ if not repl_4g:
428
+ repl_4g = "Not applicable"
429
+
430
+ repl_5g = "Not applicable"
431
+ if want_5g:
432
+ repl_5g = _extract_model_token(_safe_str(life_row.get("advanced_5g_option","")))
433
+ if not repl_5g:
434
+ cand5 = _candidate_5g_models_from_lifecycle(canon)
435
+ repl_5g = _gpt_pick_from_lifecycle_models(life_row, cand5, "5G replacement/upgrade") or (cand5[0] if cand5 else "")
436
+ if not repl_5g:
437
+ # last resort: dec catalog
438
+ repl_5g = _fallback_5g_from_dec(canon)
439
+
440
+ if repl_5g.lower() == "nan":
441
+ repl_5g = ""
442
+
443
+ return {
444
+ "repl_4g": repl_4g,
445
+ "repl_5g": repl_5g,
446
+ "why": "Lifecycle replacements (GPT fallback when missing).",
447
+ "sources": ["lifecycle_csv"] + (["gpt"] if client else []) + (["dec_fallback"] if (want_5g and not _extract_model_token(_safe_str(life_row.get("advanced_5g_option","")))) else []),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
448
  }
449
+
450
+
451
+ # ============================
452
+ # Antennas (Parsec-only; family name extraction)
453
+ # ============================
454
+ PARSEC_FAMILY_WORDS = {
455
+ "chinook","labrador","boxer","bloodhound","husky","beagle","mastiff","collie",
456
+ "shepherd","belgian","australian","terrier","pyrenees"
457
+ }
458
+ BAD_NAME_MARKERS = {
459
+ "customization", "standard connectors", "connectors", "features", "benefits",
460
+ "specifications", "mechanical", "electrical", "mounting", "accessories",
461
+ "description:", "standard sku"
462
+ }
463
+
464
+ def _clean_line(s: str) -> str:
465
+ s = re.sub(r"\s+", " ", str(s or "").strip())
466
+ if re.fullmatch(r"-[a-z0-9]+", s.lower()):
467
+ return ""
468
+ return s
469
+
470
+ def _is_bad_name_line(line: str) -> bool:
471
+ low = line.lower()
472
+ if any(m in low for m in BAD_NAME_MARKERS):
473
+ return True
474
+ if re.search(r"\b-[a-z0-9]{1,4}\b", low) and len(low) <= 25:
475
+ return True
476
+ return False
477
+
478
+ def _family_from_line(line: str) -> str:
479
+ low = line.lower()
480
+ for fam in PARSEC_FAMILY_WORDS:
481
+ if fam in low:
482
+ return fam.capitalize()
483
+ return ""
484
+
485
+ def _parsec_name_from_card(card_text: str) -> str:
486
+ lines = [_clean_line(ln) for ln in str(card_text or "").splitlines()]
487
+ lines = [ln for ln in lines if ln]
488
+
489
+ for ln in lines:
490
+ if _is_bad_name_line(ln):
491
+ continue
492
+ fam = _family_from_line(ln)
493
+ if fam:
494
+ return fam
495
+
496
+ sku_i = None
497
+ for i, ln in enumerate(lines):
498
+ if "standard sku" in ln.lower():
499
+ sku_i = i
500
+ break
501
+ if sku_i is not None:
502
+ window = lines[max(0, sku_i - 12):sku_i]
503
+ for ln in reversed(window):
504
+ if _is_bad_name_line(ln):
505
+ continue
506
+ if 3 <= len(ln) <= 40 and re.search(r"[A-Za-z]", ln):
507
+ return ln.split()[0].capitalize()
508
+
509
+ return "Parsec antenna"
510
+
511
  def _parsec_part_from_card(t: str) -> str:
512
  m = re.search(r"Standard\s+SKU:\s*([A-Z0-9]+)", t)
513
  return m.group(1).strip() if m else ""
 
516
  m = re.search(r"Description:\s*(.+?)(?:\n|$)", t, flags=re.IGNORECASE)
517
  return re.sub(r"\s+"," ",m.group(1).strip())[:220] if m else ""
518
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
519
  def parsec_retrieve(query: str, top_k: int = 10) -> List[Dict[str, Any]]:
520
  qv = embedder.encode([query], normalize_embeddings=True)
521
  qv = np.asarray(qv, dtype=np.float32)
 
524
  for sc, i in zip(scores[0].tolist(), ids[0].tolist()):
525
  if 0 <= int(i) < len(parsec_cards):
526
  card = parsec_cards[int(i)]
527
+ out.append({
528
+ "score": float(sc),
529
+ "name": _parsec_name_from_card(card),
530
+ "part_number": _parsec_part_from_card(card),
531
+ "description": _parsec_desc_from_card(card),
532
+ "card": card[:1100],
533
+ })
534
  return out
535
 
536
  def antenna_options_for(router_model: str, tech: str, mimo: str) -> Dict[str, Any]:
 
539
  cand_stationary = parsec_retrieve(q_stationary, top_k=10)
540
  cand_vehicle = parsec_retrieve(q_vehicle, top_k=10)
541
 
542
+ if client is None:
543
+ s = cand_stationary[0] if cand_stationary else {"name":"Parsec antenna","part_number":"","description":""}
544
+ v = cand_vehicle[0] if cand_vehicle else {"name":"Parsec antenna","part_number":"","description":""}
545
+ s.update({"mimo": mimo, "why": "Stationary omni best match."})
546
+ v.update({"mimo": mimo, "why": "Vehicle omni best match."})
547
+ return {"stationary_omni": s, "vehicle_omni": v, "sources":["parsec_rag"]}
548
 
549
+ sys = "Select Parsec antennas. Choose only from candidates. Return strict JSON only."
550
+ payload = {
551
+ "router_model": router_model,
552
+ "tech": tech,
553
+ "mimo": mimo,
554
+ "stationary_candidates": cand_stationary,
555
+ "vehicle_candidates": cand_vehicle,
556
+ "rules": [
557
+ "Return two options: stationary_omni and vehicle_omni.",
558
+ "Use only candidates. Prefer family names like Labrador/Chinook/Boxer.",
559
+ "Include name, part_number, description, mimo, why.",
560
+ "Return JSON only."
561
+ ],
562
+ "output_schema": {
563
+ "stationary_omni": {"name":"string","part_number":"string","description":"string","mimo":"2x2|4x4","why":"string"},
564
+ "vehicle_omni": {"name":"string","part_number":"string","description":"string","mimo":"2x2|4x4","why":"string"}
565
+ }
566
+ }
567
+ out = gpt_json(sys, payload, max_tokens=650) or {}
568
+
569
+ def _fix(x: Dict[str, Any], default_why: str) -> Dict[str, str]:
570
+ return {
571
+ "name": str(x.get("name","Parsec antenna") or "Parsec antenna")[:110],
572
+ "part_number": str(x.get("part_number","") or "")[:40],
573
+ "description": str(x.get("description","") or "")[:220],
574
+ "mimo": str(x.get("mimo", mimo) or mimo),
575
+ "why": str(x.get("why", default_why) or default_why)[:160],
576
+ }
577
+
578
+ s = _fix(out.get("stationary_omni", {}) if isinstance(out, dict) else {}, "Stationary omni best match.")
579
+ v = _fix(out.get("vehicle_omni", {}) if isinstance(out, dict) else {}, "Vehicle omni best match.")
580
+ if not s.get("part_number") and cand_stationary:
581
+ top = cand_stationary[0]
582
+ s = {"name": top.get("name","Parsec antenna"), "part_number": top.get("part_number",""), "description": top.get("description",""), "mimo": mimo, "why":"Stationary omni best match."}
583
+ if not v.get("part_number") and cand_vehicle:
584
+ top = cand_vehicle[0]
585
+ v = {"name": top.get("name","Parsec antenna"), "part_number": top.get("part_number",""), "description": top.get("description",""), "mimo": mimo, "why":"Vehicle omni best match."}
586
+ return {"stationary_omni": s, "vehicle_omni": v, "sources":["parsec_rag","gpt"]}
587
+
588
+
589
+ # ============================
590
+ # Feature table + GPT fill for missing fields (no more ****; fill missing via GPT)
591
+ # ============================
592
+ FEATURE_COLS = ["Name","Modem technology","WiFi","Ports","Antennas","Ruggedness","Use case"]
593
 
594
  def dec_features_by_model(model: str, canon_make: str) -> Dict[str, str]:
595
  if not model or model == "Not applicable":
596
+ return {k:"Not listed" for k in FEATURE_COLS}
597
  pool = df_dec[df_dec["_canon_make"] == canon_make].copy()
598
  if pool.empty:
599
+ return {k:"Not listed" for k in FEATURE_COLS}
600
  hit = process.extractOne(norm_text(model), pool["_norm_model"].tolist(), scorer=fuzz.WRatio)
601
  if not hit or hit[1] < MATCH_OK:
602
+ return {k:"Not listed" for k in FEATURE_COLS}
603
  r = pool.iloc[int(hit[2])]
604
  ports = f"WAN: {r.get('WAN ports and speed','')} | LAN: {r.get('LAN ports and speed','')}"
605
  return {
606
  "Name": str(r.get("Model","")),
607
  "Modem technology": str(r.get("Modem Type","")),
608
  "WiFi": str(r.get("WiFi type","")),
609
+ "Ports": ports,
610
  "Antennas": str(r.get("Antennas (internal/external/both)","")),
611
  "Ruggedness": str(r.get("Ruggedization","")),
612
  "Use case": str(r.get("Primary use case","")),
613
  }
614
 
615
+ def gpt_fill_features(device_label: str, feats: Dict[str,str], context: str) -> Dict[str,str]:
616
+ missing = [k for k,v in feats.items() if (not v) or v.strip().lower() in {"not listed","nan"}]
617
+ if client is None or not missing:
618
+ return feats
619
+ sys = "Fill missing router feature fields. Return strict JSON only."
620
+ payload = {
621
+ "device": device_label,
622
+ "known": feats,
623
+ "context": context[:2000],
624
+ "fill_only": missing,
625
+ "rules": ["Fill only requested fields. Best guess if needed. Return JSON only."],
626
+ "output_schema": {k:"string" for k in missing}
627
+ }
628
+ out = gpt_json(sys, payload, max_tokens=350) or {}
629
+ for k in missing:
630
+ v = str(out.get(k,"") or "").strip()
631
+ if v:
632
+ feats[k] = v
633
+ return feats
634
+
635
+ def current_features_guess(life_row: pd.Series) -> Dict[str,str]:
636
+ sku = str(life_row.get("sku","") or "").strip()
637
  desc = str(life_row.get("description","") or "").strip()
638
+ notes = str(life_row.get("notes","") or "").strip()
639
+ base = {
640
+ "Name": sku,
641
+ "Modem technology": "4G" if _device_is_4g(life_row) else ("5G" if (("5g" in (desc+notes).lower()) or ("nr" in (desc+notes).lower())) else "Not listed"),
642
+ "WiFi": "Not listed",
643
+ "Ports": "Not listed",
644
+ "Antennas": "Not listed",
645
+ "Ruggedness": "Not listed",
646
+ "Use case": "Not listed",
647
+ }
648
+ return gpt_fill_features("Current device", base, f"{desc}\n{notes}")
649
 
650
+ def build_features_table(cur: Dict[str,str], r4: Dict[str,str], r5: Dict[str,str]) -> str:
651
  cols = ["Device", "Modem technology", "WiFi", "Ports", "Antennas", "Ruggedness", "Use case"]
652
  header = "| " + " | ".join(cols) + " |"
653
  sep = "| " + " | ".join(["---"]*len(cols)) + " |"
654
+
655
  def row(name: str, feats: Dict[str,str]) -> str:
656
  return "| " + " | ".join([
657
  name,
 
662
  feats.get("Ruggedness","Not listed"),
663
  feats.get("Use case","Not listed"),
664
  ]) + " |"
665
+
666
+ return "\n".join([header, sep, row("Current", cur), row("4G replacement", r4), row("5G replacement", r5)])
667
+
668
+
669
+ # ============================
670
+ # Output + Gradio UI
671
+ # ============================
672
+ def fmt(v: Any, fallback: str = "Not listed") -> str:
673
+ s = _safe_str(v)
674
+ if not s or s.lower() == "nan":
675
+ return fallback
676
+ return s
677
 
678
  def assemble_output(life_row: pd.Series, status: str, eos: str, eol: str, repl: Dict[str,Any], ant: Dict[str,Any]) -> str:
679
  canon_make = str(life_row.get("_canon_make","UNKNOWN"))
680
  current_name = f"{life_row.get('sku','')} — {life_row.get('description','')}".strip(" —")
 
 
681
 
682
+ # Antenna
683
+ st = ant.get("stationary_omni", {})
684
+ vh = ant.get("vehicle_omni", {})
685
+
686
+ # Feature table (fill missing via GPT)
687
+ cur_feats = current_features_guess(life_row)
688
  r4_feats = dec_features_by_model(repl.get("repl_4g",""), canon_make)
689
  r5_feats = dec_features_by_model(repl.get("repl_5g",""), canon_make)
690
+ if client is not None:
691
+ r4_feats = gpt_fill_features("4G replacement", r4_feats, "")
692
+ r5_feats = gpt_fill_features("5G replacement", r5_feats, "")
693
  table_md = build_features_table(cur_feats, r4_feats, r5_feats)
694
 
695
  lines = []
696
+ lines.append(f"1. Current device: **{current_name}**")
697
+ lines.append(f"2. Status: **{status}**")
698
+ lines.append(f"3. End of Sale date: **{eos}**")
699
+ lines.append(f"4. End of Life date: **{eol}**")
700
+ lines.append(f"5. 4G recommended replacement: **{fmt(repl.get('repl_4g'), 'Not applicable')}**")
701
+ # If 5G is empty, force GPT to pick from lifecycle pool
702
+ repl5 = fmt(repl.get("repl_5g"), "")
703
+ if (not repl5) and client is not None:
704
+ cand5 = _candidate_5g_models_from_lifecycle(str(life_row.get('_canon_make','UNKNOWN')))
705
+ repl5 = _gpt_pick_from_lifecycle_models(life_row, cand5, "5G replacement/upgrade") or (cand5[0] if cand5 else "")
706
+ if not repl5:
707
+ repl5 = "Not listed"
708
+ lines.append(f"6. 5G recommended replacement: **{repl5}**")
709
+
710
+ lines.append("7. Antenna options (Parsec-only):")
711
+ lines.append(f" - Stationary (Omni): **{fmt(st.get('name'))}** (Part #: {fmt(st.get('part_number'))}) — {fmt(st.get('description'))} — MIMO: {fmt(st.get('mimo'))} — {fmt(st.get('why'))}")
712
+ lines.append(f" - Vehicle (Omni): **{fmt(vh.get('name'))}** (Part #: {fmt(vh.get('part_number'))}) — {fmt(vh.get('description'))} — MIMO: {fmt(vh.get('mimo'))} — {fmt(vh.get('why'))}")
713
+
714
+ lines.append("8. Recommended features table:")
715
  lines.append(table_md)
716
+
717
+ lines.append("\nSources (debug):")
718
+ for s in repl.get("sources", []) if isinstance(repl.get("sources"), list) else []:
719
+ lines.append(f"- {s}")
 
720
  lines.append("- ParsecCatalog.pdf (local RAG)")
721
+ lines.append("- dec2025routers.csv (features + fallback)")
722
  return "\n".join(lines)
723
 
 
 
 
 
724
  def run_lookup(user_text: str, st: Dict[str,Any]):
725
  user_text = str(user_text or "").strip()
726
  if not user_text:
 
731
  opts = res.get("options", [])
732
  choices = [o["label"] for o in opts]
733
  st2 = {"mode":"pick","options": opts}
734
+ return "Did you mean A or B? Pick one, then click Use selection.", gr.update(choices=choices, value=None, visible=True), gr.update(visible=True), st2
735
 
736
  if res.get("mode") != "ok":
737
  return "Not found.", gr.update(visible=False), gr.update(visible=False), {}
738
 
739
  life_row = df_eos.iloc[int(res["row_idx"])]
740
  eos, eol, status = row_to_dates_and_status(life_row)
741
+
742
+ repl = pick_replacements_lifecycle(life_row, status)
743
+
744
+ tech = "5G" if repl.get("repl_5g") and repl.get("repl_5g") != "Not applicable" else ("4G" if _device_is_4g(life_row) else "Unknown")
745
  mimo_guess = "4x4" if tech == "5G" else "2x2"
746
  ant = antenna_options_for(router_model=repl.get("repl_5g") or str(life_row.get("sku","")), tech=tech, mimo=mimo_guess)
747
+
748
  return assemble_output(life_row, status, eos, eol, repl, ant), gr.update(visible=False), gr.update(visible=False), {}
749
 
750
  def use_selection(selected_label: str, st: Dict[str,Any]):
 
763
 
764
  life_row = df_eos.iloc[int(chosen_row)]
765
  eos, eol, status = row_to_dates_and_status(life_row)
766
+ repl = pick_replacements_lifecycle(life_row, status)
767
+
768
+ tech = "5G" if repl.get("repl_5g") and repl.get("repl_5g") != "Not applicable" else ("4G" if _device_is_4g(life_row) else "Unknown")
769
  mimo_guess = "4x4" if tech == "5G" else "2x2"
770
  ant = antenna_options_for(router_model=repl.get("repl_5g") or str(life_row.get("sku","")), tech=tech, mimo=mimo_guess)
 
771
 
772
+ return assemble_output(life_row, status, eos, eol, repl, ant), gr.update(visible=False), gr.update(visible=False), {}
773
 
774
  with gr.Blocks(title="Only-Routers") as demo:
775
  gr.Markdown("## Only-Routers\nEnter a router SKU/model. If ambiguous, you’ll get A/B choices.")
 
785
  check_btn.click(fn=run_lookup, inputs=[user_text, st], outputs=[output_md, pick_dd, use_btn, st])
786
  use_btn.click(fn=use_selection, inputs=[pick_dd, st], outputs=[output_md, pick_dd, use_btn, st])
787
 
788
+ demo.launch()
only-routers_ai_poc_v4_5.ipynb ADDED
@@ -0,0 +1,831 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "762ab53b",
6
+ "metadata": {},
7
+ "source": [
8
+ "# Only-Routers (v4.5)\n",
9
+ "This notebook mirrors the Space `app.py` logic.\n"
10
+ ]
11
+ },
12
+ {
13
+ "cell_type": "markdown",
14
+ "id": "1993260c",
15
+ "metadata": {},
16
+ "source": [
17
+ "## Run\n",
18
+ "1) Upload the 3 data files and set OPENAI_API_KEY env var (or load key).\n",
19
+ "2) Run cells top-to-bottom.\n"
20
+ ]
21
+ },
22
+ {
23
+ "cell_type": "code",
24
+ "execution_count": null,
25
+ "id": "66695eb5",
26
+ "metadata": {},
27
+ "outputs": [],
28
+ "source": [
29
+ "import os\n",
30
+ "import re\n",
31
+ "import json\n",
32
+ "import math\n",
33
+ "import glob\n",
34
+ "import hashlib\n",
35
+ "from dataclasses import dataclass\n",
36
+ "from datetime import datetime, date\n",
37
+ "from typing import Dict, List, Optional, Tuple, Any\n",
38
+ "\n",
39
+ "import numpy as np\n",
40
+ "import pandas as pd\n",
41
+ "\n",
42
+ "import fitz # PyMuPDF\n",
43
+ "import faiss\n",
44
+ "from sentence_transformers import SentenceTransformer\n",
45
+ "from rapidfuzz import fuzz, process\n",
46
+ "\n",
47
+ "import gradio as gr\n",
48
+ "from openai import OpenAI\n",
49
+ "\n",
50
+ "\n",
51
+ "# ============================\n",
52
+ "# Settings\n",
53
+ "# ============================\n",
54
+ "TODAY = date(2026, 1, 18)\n",
55
+ "OPENAI_MODEL = \"gpt-5.2\"\n",
56
+ "OPENAI_REASONING = {\"effort\": \"high\"}\n",
57
+ "\n",
58
+ "MATCH_OK = 80\n",
59
+ "EMBED_MODEL_NAME = \"sentence-transformers/all-MiniLM-L6-v2\"\n",
60
+ "\n",
61
+ "PARSEC_CONTEXT_BEFORE = 900\n",
62
+ "PARSEC_CONTEXT_AFTER = 1600\n",
63
+ "\n",
64
+ "CACHE_DIR = os.path.join(os.getcwd(), \".onlyrouters_cache\")\n",
65
+ "os.makedirs(CACHE_DIR, exist_ok=True)\n",
66
+ "\n",
67
+ "\n",
68
+ "# ============================\n",
69
+ "# OpenAI client (HF Space secret: OPENAI_API_KEY)\n",
70
+ "# ============================\n",
71
+ "API_KEY = os.getenv(\"OPENAI_API_KEY\", \"\").strip()\n",
72
+ "client = OpenAI(api_key=API_KEY) if API_KEY else None\n",
73
+ "\n",
74
+ "\n",
75
+ "# ============================\n",
76
+ "# Small utilities\n",
77
+ "# ============================\n",
78
+ "def norm_text(s: Any) -> str:\n",
79
+ " try:\n",
80
+ " if s is None or (isinstance(s, float) and math.isnan(s)) or pd.isna(s):\n",
81
+ " return \"\"\n",
82
+ " except Exception:\n",
83
+ " pass\n",
84
+ " s = str(s).strip().lower()\n",
85
+ " s = re.sub(r\"[^a-z0-9\\s\\-\\/]\", \" \", s)\n",
86
+ " s = re.sub(r\"\\s+\", \" \", s).strip()\n",
87
+ " return s\n",
88
+ "\n",
89
+ "def _safe_str(v: Any) -> str:\n",
90
+ " if v is None or (isinstance(v, float) and pd.isna(v)) or pd.isna(v):\n",
91
+ " return \"\"\n",
92
+ " return str(v).strip()\n",
93
+ "\n",
94
+ "def _is_5g(modem_type: Any) -> bool:\n",
95
+ " s = norm_text(modem_type)\n",
96
+ " return (\"5g\" in s) or (\"nr\" in s)\n",
97
+ "\n",
98
+ "def _json_load_safe(s: str) -> Dict[str, Any]:\n",
99
+ " try:\n",
100
+ " return json.loads(s)\n",
101
+ " except Exception:\n",
102
+ " return {}\n",
103
+ "\n",
104
+ "def gpt_json(system: str, payload: Dict[str, Any], max_tokens: int = 700) -> Dict[str, Any]:\n",
105
+ " if client is None:\n",
106
+ " return {}\n",
107
+ " resp = client.responses.create(\n",
108
+ " model=OPENAI_MODEL,\n",
109
+ " reasoning=OPENAI_REASONING,\n",
110
+ " input=[\n",
111
+ " {\"role\": \"system\", \"content\": system},\n",
112
+ " {\"role\": \"user\", \"content\": json.dumps(payload)},\n",
113
+ " ],\n",
114
+ " max_output_tokens=max_tokens,\n",
115
+ " )\n",
116
+ " return _json_load_safe(getattr(resp, \"output_text\", \"\") or \"\")\n",
117
+ "\n",
118
+ "\n",
119
+ "# ============================\n",
120
+ "# Load data files (must exist in repo)\n",
121
+ "# ============================\n",
122
+ "EOS_PATH = \"routers_eos_eol_by_sku.csv\"\n",
123
+ "DEC_PATH = \"dec2025routers.csv\"\n",
124
+ "PARSEC_PDF = \"ParsecCatalog.pdf\"\n",
125
+ "\n",
126
+ "if not os.path.exists(EOS_PATH):\n",
127
+ " raise FileNotFoundError(f\"Missing {EOS_PATH} in repo.\")\n",
128
+ "if not os.path.exists(DEC_PATH):\n",
129
+ " raise FileNotFoundError(f\"Missing {DEC_PATH} in repo.\")\n",
130
+ "if not os.path.exists(PARSEC_PDF):\n",
131
+ " raise FileNotFoundError(f\"Missing {PARSEC_PDF} in repo.\")\n",
132
+ "\n",
133
+ "df_eos = pd.read_csv(EOS_PATH).copy()\n",
134
+ "df_dec = pd.read_csv(DEC_PATH).copy()\n",
135
+ "\n",
136
+ "# Region filter: keep USA / North America / blank / not specified\n",
137
+ "def _region_ok(x: Any) -> bool:\n",
138
+ " s = str(x or \"\").strip().lower()\n",
139
+ " if not s:\n",
140
+ " return True\n",
141
+ " if \"not specified\" in s:\n",
142
+ " return True\n",
143
+ " if \"north america\" in s:\n",
144
+ " return True\n",
145
+ " if re.search(r\"\\busa\\b\", s):\n",
146
+ " return True\n",
147
+ " if re.search(r\"\\bunited\\s+states\\b\", s):\n",
148
+ " return True\n",
149
+ " if re.search(r\"\\bu\\.?s\\.?\\b\", s):\n",
150
+ " return True\n",
151
+ " return False\n",
152
+ "\n",
153
+ "if \"region\" in df_eos.columns:\n",
154
+ " df_eos = df_eos[df_eos[\"region\"].apply(_region_ok)].reset_index(drop=True)\n",
155
+ "\n",
156
+ "# Optional \"Device Type\" column\n",
157
+ "device_type_col = None\n",
158
+ "for c in df_eos.columns:\n",
159
+ " if norm_text(c) == \"device type\":\n",
160
+ " device_type_col = c\n",
161
+ " break\n",
162
+ "\n",
163
+ "# Maker mapping\n",
164
+ "CANON_MAKER = {\n",
165
+ " \"CRADLEPOINT\": {\"cradlepoint\", \"ericsson\", \"ericsson enterprise wireless\"},\n",
166
+ " \"SIERRA\": {\"sierra\", \"sierra wireless\", \"semtech\", \"airlink\"},\n",
167
+ " \"FEENEY\": {\"feeney\", \"feeney wireless\", \"inseego\"},\n",
168
+ " \"DIGI\": {\"digi\", \"accelerated\", \"accelerated concepts\"},\n",
169
+ " \"CISCO_MERAKI\": {\"meraki\", \"cisco meraki\"},\n",
170
+ " \"CISCO\": {\"cisco\"},\n",
171
+ "}\n",
172
+ "DISPLAY_MAKER = {\n",
173
+ " \"CRADLEPOINT\": \"Cradlepoint\",\n",
174
+ " \"SIERRA\": \"Sierra Wireless\",\n",
175
+ " \"FEENEY\": \"Feeney Wireless\",\n",
176
+ " \"DIGI\": \"Digi\",\n",
177
+ " \"CISCO_MERAKI\": \"Cisco Meraki\",\n",
178
+ " \"CISCO\": \"Cisco\",\n",
179
+ " \"UNKNOWN\": \"Unknown\",\n",
180
+ "}\n",
181
+ "\n",
182
+ "def canon_maker_from_text(s: Any) -> str:\n",
183
+ " t = norm_text(s)\n",
184
+ " for canon, terms in CANON_MAKER.items():\n",
185
+ " for term in terms:\n",
186
+ " if term in t:\n",
187
+ " return canon\n",
188
+ " return \"UNKNOWN\"\n",
189
+ "\n",
190
+ "df_eos[\"_canon_make\"] = df_eos[\"manufacturer\"].apply(canon_maker_from_text) if \"manufacturer\" in df_eos.columns else \"UNKNOWN\"\n",
191
+ "df_eos[\"_norm_sku\"] = df_eos[\"sku\"].apply(norm_text) if \"sku\" in df_eos.columns else \"\"\n",
192
+ "df_eos[\"_norm_desc\"] = df_eos[\"description\"].apply(norm_text) if \"description\" in df_eos.columns else \"\"\n",
193
+ "df_eos[\"_norm_notes\"] = df_eos[\"notes\"].apply(norm_text) if \"notes\" in df_eos.columns else \"\"\n",
194
+ "\n",
195
+ "df_dec[\"_canon_make\"] = df_dec[\"Make\"].apply(canon_maker_from_text) if \"Make\" in df_dec.columns else \"UNKNOWN\"\n",
196
+ "df_dec[\"_norm_model\"] = df_dec[\"Model\"].apply(norm_text) if \"Model\" in df_dec.columns else \"\"\n",
197
+ "df_dec[\"_is5g\"] = df_dec[\"Modem Type\"].apply(_is_5g) if \"Modem Type\" in df_dec.columns else False\n",
198
+ "\n",
199
+ "def display_maker_for_row(life_row: pd.Series) -> str:\n",
200
+ " canon = str(life_row.get(\"_canon_make\",\"UNKNOWN\"))\n",
201
+ " if canon != \"DIGI\":\n",
202
+ " return DISPLAY_MAKER.get(canon, \"Unknown\")\n",
203
+ " desc = norm_text(life_row.get(\"description\",\"\"))\n",
204
+ " notes = norm_text(life_row.get(\"notes\",\"\"))\n",
205
+ " return \"Accelerated Concepts (now Digi)\" if (\"accelerated\" in desc or \"accelerated\" in notes) else \"Digi\"\n",
206
+ "\n",
207
+ "\n",
208
+ "# ============================\n",
209
+ "# Date helpers\n",
210
+ "# ============================\n",
211
+ "@dataclass\n",
212
+ "class ParsedDate:\n",
213
+ " raw: str\n",
214
+ " kind: str\n",
215
+ " value: Optional[date]\n",
216
+ "\n",
217
+ "def parse_date_field(x: Any) -> ParsedDate:\n",
218
+ " raw = str(x or \"\").strip()\n",
219
+ " if not raw:\n",
220
+ " return ParsedDate(raw=\"\", kind=\"missing\", value=None)\n",
221
+ "\n",
222
+ " if re.fullmatch(r\"\\d{4}\", raw):\n",
223
+ " y = int(raw)\n",
224
+ " if y == TODAY.year:\n",
225
+ " return ParsedDate(raw=raw, kind=\"year\", value=date(y, 1, 1))\n",
226
+ " if y < TODAY.year:\n",
227
+ " return ParsedDate(raw=raw, kind=\"year\", value=date(y, 1, 1))\n",
228
+ " return ParsedDate(raw=raw, kind=\"year\", value=date(y, 12, 31))\n",
229
+ "\n",
230
+ " if re.fullmatch(r\"\\d{4}-\\d{2}\", raw):\n",
231
+ " try:\n",
232
+ " y, m = raw.split(\"-\")\n",
233
+ " return ParsedDate(raw=raw, kind=\"year_month\", value=date(int(y), int(m), 1))\n",
234
+ " except Exception:\n",
235
+ " return ParsedDate(raw=raw, kind=\"bad\", value=None)\n",
236
+ "\n",
237
+ " if re.fullmatch(r\"\\d{4}-\\d{2}-\\d{2}\", raw):\n",
238
+ " try:\n",
239
+ " dt = datetime.strptime(raw, \"%Y-%m-%d\").date()\n",
240
+ " return ParsedDate(raw=raw, kind=\"full\", value=dt)\n",
241
+ " except Exception:\n",
242
+ " return ParsedDate(raw=raw, kind=\"bad\", value=None)\n",
243
+ "\n",
244
+ " return ParsedDate(raw=raw, kind=\"bad\", value=None)\n",
245
+ "\n",
246
+ "def display_date(parsed: ParsedDate) -> str:\n",
247
+ " if parsed.kind == \"missing\":\n",
248
+ " return \"Not listed\"\n",
249
+ " if parsed.kind == \"bad\":\n",
250
+ " return parsed.raw or \"Not listed\"\n",
251
+ " return parsed.raw\n",
252
+ "\n",
253
+ "def status_from_eos_eol(eos: ParsedDate, eol: ParsedDate) -> str:\n",
254
+ " if eos.value is None and eol.value is None:\n",
255
+ " return \"Unknown\"\n",
256
+ " if eol.value is not None and eol.value <= TODAY:\n",
257
+ " return \"End of Life\"\n",
258
+ " if eos.value is not None and eos.value <= TODAY:\n",
259
+ " return \"End of Sale\"\n",
260
+ " return \"Active\"\n",
261
+ "\n",
262
+ "def row_to_dates_and_status(life_row: pd.Series) -> Tuple[str, str, str]:\n",
263
+ " eos = parse_date_field(life_row.get(\"end_of_sale\"))\n",
264
+ " eol = parse_date_field(life_row.get(\"end_of_life\"))\n",
265
+ " return display_date(eos), display_date(eol), status_from_eos_eol(eos, eol)\n",
266
+ "\n",
267
+ "\n",
268
+ "# ============================\n",
269
+ "# Embeddings + indices\n",
270
+ "# ============================\n",
271
+ "embedder = SentenceTransformer(EMBED_MODEL_NAME)\n",
272
+ "\n",
273
+ "# Parsec cards around \"Standard SKU\"\n",
274
+ "def extract_pdf_text_pages(path: str) -> List[str]:\n",
275
+ " doc = fitz.open(path)\n",
276
+ " return [doc[i].get_text(\"text\") for i in range(len(doc))]\n",
277
+ "\n",
278
+ "def build_parsec_cards(pages: List[str]) -> List[str]:\n",
279
+ " cards = []\n",
280
+ " for p in pages:\n",
281
+ " for m in re.finditer(r\"Standard\\s+SKU:\", p):\n",
282
+ " start = max(0, m.start() - PARSEC_CONTEXT_BEFORE)\n",
283
+ " end = min(len(p), m.start() + PARSEC_CONTEXT_AFTER)\n",
284
+ " c = p[start:end].strip()\n",
285
+ " if len(c) >= 200:\n",
286
+ " cards.append(c)\n",
287
+ " out, seen = [], set()\n",
288
+ " for c in cards:\n",
289
+ " h = hashlib.sha1(c.encode(\"utf-8\")).hexdigest()\n",
290
+ " if h not in seen:\n",
291
+ " seen.add(h); out.append(c)\n",
292
+ " return out\n",
293
+ "\n",
294
+ "parsec_cards = build_parsec_cards(extract_pdf_text_pages(PARSEC_PDF))\n",
295
+ "parsec_emb = embedder.encode(parsec_cards, batch_size=64, show_progress_bar=False, normalize_embeddings=True)\n",
296
+ "parsec_emb = np.asarray(parsec_emb, dtype=np.float32)\n",
297
+ "parsec_index = faiss.IndexFlatIP(parsec_emb.shape[1])\n",
298
+ "parsec_index.add(parsec_emb)\n",
299
+ "\n",
300
+ "\n",
301
+ "# ============================\n",
302
+ "# Device resolution (exact SKU -> GPT A/B)\n",
303
+ "# ============================\n",
304
+ "def _label_for_row(i: int) -> str:\n",
305
+ " r = df_eos.iloc[i]\n",
306
+ " return f\"{r.get('sku','')} — {r.get('manufacturer','')} — {r.get('description','')}\"[:220]\n",
307
+ "\n",
308
+ "EOS_LABELS = [_label_for_row(i) for i in range(len(df_eos))]\n",
309
+ "EOS_CORPUS = []\n",
310
+ "for _, r in df_eos.iterrows():\n",
311
+ " EOS_CORPUS.append(\" \".join([\n",
312
+ " r.get(\"_norm_sku\",\"\"),\n",
313
+ " r.get(\"_canon_make\",\"\"),\n",
314
+ " r.get(\"_norm_desc\",\"\"),\n",
315
+ " r.get(\"_norm_notes\",\"\"),\n",
316
+ " ]))\n",
317
+ "\n",
318
+ "def local_candidates(query: str, top_k: int = 6) -> List[Tuple[int,int,str]]:\n",
319
+ " q = norm_text(query)\n",
320
+ " hits = process.extract(q, EOS_CORPUS, scorer=fuzz.WRatio, limit=top_k)\n",
321
+ " return [(int(idx), int(score), EOS_LABELS[int(idx)]) for _, score, idx in hits]\n",
322
+ "\n",
323
+ "def gpt_choose_device(user_text: str, candidates: List[Tuple[int,int,str]]) -> Dict[str, Any]:\n",
324
+ " if client is None:\n",
325
+ " return {}\n",
326
+ " sys = \"Pick which router the user meant. Never invent. Return strict JSON only.\"\n",
327
+ " payload = {\n",
328
+ " \"user_input\": user_text,\n",
329
+ " \"candidates\": [{\"row_idx\": i, \"score\": s, \"label\": lbl} for (i,s,lbl) in candidates],\n",
330
+ " \"rules\": [\n",
331
+ " \"If one candidate is clearly correct, return mode='ok' with row_idx.\",\n",
332
+ " \"If two are plausible, return mode='pick' with top 2 options.\"\n",
333
+ " ],\n",
334
+ " \"output_schema\": {\"mode\":\"ok|pick\",\"row_idx\":\"int\",\"options\":[{\"row_idx\":\"int\",\"label\":\"string\"}]}\n",
335
+ " }\n",
336
+ " return gpt_json(sys, payload, max_tokens=300)\n",
337
+ "\n",
338
+ "def resolve_device(user_text: str) -> Dict[str, Any]:\n",
339
+ " q = norm_text(user_text)\n",
340
+ "\n",
341
+ " exact_idxs = df_eos.index[df_eos[\"_norm_sku\"] == q].tolist()\n",
342
+ " if len(exact_idxs) == 1:\n",
343
+ " return {\"mode\":\"ok\",\"row_idx\": int(exact_idxs[0])}\n",
344
+ " if len(exact_idxs) > 1:\n",
345
+ " opts = [{\"row_idx\": int(i), \"label\": EOS_LABELS[int(i)]} for i in exact_idxs[:2]]\n",
346
+ " return {\"mode\":\"pick\",\"options\": opts}\n",
347
+ "\n",
348
+ " cands = local_candidates(user_text, top_k=6)\n",
349
+ " if not cands:\n",
350
+ " return {\"mode\":\"not_found\"}\n",
351
+ "\n",
352
+ " if cands[0][1] >= 95 and (len(cands) == 1 or (cands[0][1] - cands[1][1]) >= 8):\n",
353
+ " return {\"mode\":\"ok\",\"row_idx\": cands[0][0]}\n",
354
+ "\n",
355
+ " g = gpt_choose_device(user_text, cands)\n",
356
+ " if g.get(\"mode\") == \"ok\" and isinstance(g.get(\"row_idx\"), int):\n",
357
+ " return {\"mode\":\"ok\",\"row_idx\": int(g[\"row_idx\"])}\n",
358
+ "\n",
359
+ " if g.get(\"mode\") == \"pick\":\n",
360
+ " opts = g.get(\"options\", []) or []\n",
361
+ " opts2 = [{\"row_idx\": int(o[\"row_idx\"]), \"label\": str(o[\"label\"])} for o in opts[:2] if \"row_idx\" in o]\n",
362
+ " if opts2:\n",
363
+ " return {\"mode\":\"pick\",\"options\": opts2}\n",
364
+ "\n",
365
+ " # fallback\n",
366
+ " if len(cands) > 1:\n",
367
+ " return {\"mode\":\"pick\",\"options\":[{\"row_idx\":cands[0][0],\"label\":cands[0][2]},{\"row_idx\":cands[1][0],\"label\":cands[1][2]}]}\n",
368
+ " return {\"mode\":\"pick\",\"options\":[{\"row_idx\":cands[0][0],\"label\":cands[0][2]}]}\n",
369
+ "\n",
370
+ "\n",
371
+ "# ============================\n",
372
+ "# Replacements — source of truth is lifecycle CSV (with GPT fallback)\n",
373
+ "# ============================\n",
374
+ "def _extract_model_token(text: str) -> str:\n",
375
+ " s = _safe_str(text)\n",
376
+ " if not s:\n",
377
+ " return \"\"\n",
378
+ " parts = [p.strip() for p in s.split(\"|\") if p.strip()]\n",
379
+ " candidates = parts[::-1] if parts else [s]\n",
380
+ "\n",
381
+ " for cand in candidates:\n",
382
+ " m = re.search(r\"\\bIX\\d{2}\\b\", cand, flags=re.IGNORECASE)\n",
383
+ " if m:\n",
384
+ " return m.group(0).upper()\n",
385
+ " m = re.search(r\"\\b(R\\d{3,4}|E\\d{3,4}|S\\d{3,4})\\b\", cand, flags=re.IGNORECASE)\n",
386
+ " if m:\n",
387
+ " return m.group(0).upper()\n",
388
+ " m = re.search(r\"\\b[A-Z]{1,5}\\d{2,4}[A-Z]?\\b\", cand.upper())\n",
389
+ " if m:\n",
390
+ " return m.group(0).upper()\n",
391
+ "\n",
392
+ " return candidates[0][:60]\n",
393
+ "\n",
394
+ "def _candidate_5g_models_from_lifecycle(canon_make: str) -> List[str]:\n",
395
+ " pool = df_eos[df_eos[\"_canon_make\"] == canon_make].copy()\n",
396
+ " vals = pool[\"advanced_5g_option\"].tolist() if \"advanced_5g_option\" in pool.columns else []\n",
397
+ " out, seen = [], set()\n",
398
+ " for v in vals:\n",
399
+ " tok = _extract_model_token(v)\n",
400
+ " if tok and tok.lower() != \"nan\" and tok not in seen:\n",
401
+ " seen.add(tok); out.append(tok)\n",
402
+ " return out\n",
403
+ "\n",
404
+ "def _candidate_4g_models_from_lifecycle(canon_make: str) -> List[str]:\n",
405
+ " pool = df_eos[df_eos[\"_canon_make\"] == canon_make].copy()\n",
406
+ " vals = pool[\"suggested_replacement\"].tolist() if \"suggested_replacement\" in pool.columns else []\n",
407
+ " out, seen = [], set()\n",
408
+ " for v in vals:\n",
409
+ " tok = _extract_model_token(v)\n",
410
+ " if tok and tok.lower() != \"nan\" and tok not in seen:\n",
411
+ " seen.add(tok); out.append(tok)\n",
412
+ " return out\n",
413
+ "\n",
414
+ "def _gpt_pick_from_lifecycle_models(old_row: pd.Series, candidates: List[str], need: str) -> str:\n",
415
+ " if client is None or not candidates:\n",
416
+ " return \"\"\n",
417
+ " sys = \"Pick the best replacement model. Choose only from candidates. Return strict JSON only.\"\n",
418
+ " payload = {\n",
419
+ " \"old_device\": {\n",
420
+ " \"sku\": str(old_row.get(\"sku\",\"\")),\n",
421
+ " \"description\": str(old_row.get(\"description\",\"\")),\n",
422
+ " \"manufacturer\": str(old_row.get(\"manufacturer\",\"\")),\n",
423
+ " \"need\": need,\n",
424
+ " },\n",
425
+ " \"candidates\": candidates[:30],\n",
426
+ " \"output_schema\": {\"choice\":\"string\"}\n",
427
+ " }\n",
428
+ " out = gpt_json(sys, payload, max_tokens=220) or {}\n",
429
+ " choice = str(out.get(\"choice\",\"\") or \"\").strip()\n",
430
+ " return choice if choice in candidates else \"\"\n",
431
+ "\n",
432
+ "def _fallback_5g_from_dec(canon_make: str) -> str:\n",
433
+ " pool5 = df_dec[(df_dec[\"_canon_make\"] == canon_make) & (df_dec[\"_is5g\"] == True)]\n",
434
+ " return str(pool5.iloc[0][\"Model\"]).strip() if not pool5.empty else \"\"\n",
435
+ "\n",
436
+ "def _device_is_4g(life_row: pd.Series) -> bool:\n",
437
+ " t = norm_text(life_row.get(\"description\",\"\")) + \" \" + norm_text(life_row.get(\"notes\",\"\"))\n",
438
+ " return ((\"lte\" in t or \"4g\" in t) and (\"5g\" not in t and \"nr\" not in t))\n",
439
+ "\n",
440
+ "def pick_replacements_lifecycle(life_row: pd.Series, status: str) -> Dict[str, Any]:\n",
441
+ " canon = str(life_row.get(\"_canon_make\",\"UNKNOWN\"))\n",
442
+ " if canon == \"UNKNOWN\":\n",
443
+ " return {\"repl_4g\":\"Not applicable\",\"repl_5g\":\"\", \"why\":\"\", \"sources\":[]}\n",
444
+ "\n",
445
+ " is_4g_device = _device_is_4g(life_row)\n",
446
+ " needs_4g_repl = is_4g_device and (status in {\"End of Sale\",\"End of Life\"})\n",
447
+ " want_5g = is_4g_device or (status in {\"End of Sale\",\"End of Life\"})\n",
448
+ "\n",
449
+ " repl_4g = \"Not applicable\"\n",
450
+ " if needs_4g_repl:\n",
451
+ " repl_4g = _extract_model_token(_safe_str(life_row.get(\"suggested_replacement\",\"\")))\n",
452
+ " if not repl_4g:\n",
453
+ " cand4 = _candidate_4g_models_from_lifecycle(canon)\n",
454
+ " repl_4g = _gpt_pick_from_lifecycle_models(life_row, cand4, \"4G replacement\") or (cand4[0] if cand4 else \"\")\n",
455
+ " if not repl_4g:\n",
456
+ " repl_4g = \"Not applicable\"\n",
457
+ "\n",
458
+ " repl_5g = \"Not applicable\"\n",
459
+ " if want_5g:\n",
460
+ " repl_5g = _extract_model_token(_safe_str(life_row.get(\"advanced_5g_option\",\"\")))\n",
461
+ " if not repl_5g:\n",
462
+ " cand5 = _candidate_5g_models_from_lifecycle(canon)\n",
463
+ " repl_5g = _gpt_pick_from_lifecycle_models(life_row, cand5, \"5G replacement/upgrade\") or (cand5[0] if cand5 else \"\")\n",
464
+ " if not repl_5g:\n",
465
+ " # last resort: dec catalog\n",
466
+ " repl_5g = _fallback_5g_from_dec(canon)\n",
467
+ "\n",
468
+ " if repl_5g.lower() == \"nan\":\n",
469
+ " repl_5g = \"\"\n",
470
+ "\n",
471
+ " return {\n",
472
+ " \"repl_4g\": repl_4g,\n",
473
+ " \"repl_5g\": repl_5g,\n",
474
+ " \"why\": \"Lifecycle replacements (GPT fallback when missing).\",\n",
475
+ " \"sources\": [\"lifecycle_csv\"] + ([\"gpt\"] if client else []) + ([\"dec_fallback\"] if (want_5g and not _extract_model_token(_safe_str(life_row.get(\"advanced_5g_option\",\"\")))) else []),\n",
476
+ " }\n",
477
+ "\n",
478
+ "\n",
479
+ "# ============================\n",
480
+ "# Antennas (Parsec-only; family name extraction)\n",
481
+ "# ============================\n",
482
+ "PARSEC_FAMILY_WORDS = {\n",
483
+ " \"chinook\",\"labrador\",\"boxer\",\"bloodhound\",\"husky\",\"beagle\",\"mastiff\",\"collie\",\n",
484
+ " \"shepherd\",\"belgian\",\"australian\",\"terrier\",\"pyrenees\"\n",
485
+ "}\n",
486
+ "BAD_NAME_MARKERS = {\n",
487
+ " \"customization\", \"standard connectors\", \"connectors\", \"features\", \"benefits\",\n",
488
+ " \"specifications\", \"mechanical\", \"electrical\", \"mounting\", \"accessories\",\n",
489
+ " \"description:\", \"standard sku\"\n",
490
+ "}\n",
491
+ "\n",
492
+ "def _clean_line(s: str) -> str:\n",
493
+ " s = re.sub(r\"\\s+\", \" \", str(s or \"\").strip())\n",
494
+ " if re.fullmatch(r\"-[a-z0-9]+\", s.lower()):\n",
495
+ " return \"\"\n",
496
+ " return s\n",
497
+ "\n",
498
+ "def _is_bad_name_line(line: str) -> bool:\n",
499
+ " low = line.lower()\n",
500
+ " if any(m in low for m in BAD_NAME_MARKERS):\n",
501
+ " return True\n",
502
+ " if re.search(r\"\\b-[a-z0-9]{1,4}\\b\", low) and len(low) <= 25:\n",
503
+ " return True\n",
504
+ " return False\n",
505
+ "\n",
506
+ "def _family_from_line(line: str) -> str:\n",
507
+ " low = line.lower()\n",
508
+ " for fam in PARSEC_FAMILY_WORDS:\n",
509
+ " if fam in low:\n",
510
+ " return fam.capitalize()\n",
511
+ " return \"\"\n",
512
+ "\n",
513
+ "def _parsec_name_from_card(card_text: str) -> str:\n",
514
+ " lines = [_clean_line(ln) for ln in str(card_text or \"\").splitlines()]\n",
515
+ " lines = [ln for ln in lines if ln]\n",
516
+ "\n",
517
+ " for ln in lines:\n",
518
+ " if _is_bad_name_line(ln):\n",
519
+ " continue\n",
520
+ " fam = _family_from_line(ln)\n",
521
+ " if fam:\n",
522
+ " return fam\n",
523
+ "\n",
524
+ " sku_i = None\n",
525
+ " for i, ln in enumerate(lines):\n",
526
+ " if \"standard sku\" in ln.lower():\n",
527
+ " sku_i = i\n",
528
+ " break\n",
529
+ " if sku_i is not None:\n",
530
+ " window = lines[max(0, sku_i - 12):sku_i]\n",
531
+ " for ln in reversed(window):\n",
532
+ " if _is_bad_name_line(ln):\n",
533
+ " continue\n",
534
+ " if 3 <= len(ln) <= 40 and re.search(r\"[A-Za-z]\", ln):\n",
535
+ " return ln.split()[0].capitalize()\n",
536
+ "\n",
537
+ " return \"Parsec antenna\"\n",
538
+ "\n",
539
+ "def _parsec_part_from_card(t: str) -> str:\n",
540
+ " m = re.search(r\"Standard\\s+SKU:\\s*([A-Z0-9]+)\", t)\n",
541
+ " return m.group(1).strip() if m else \"\"\n",
542
+ "\n",
543
+ "def _parsec_desc_from_card(t: str) -> str:\n",
544
+ " m = re.search(r\"Description:\\s*(.+?)(?:\\n|$)\", t, flags=re.IGNORECASE)\n",
545
+ " return re.sub(r\"\\s+\",\" \",m.group(1).strip())[:220] if m else \"\"\n",
546
+ "\n",
547
+ "def parsec_retrieve(query: str, top_k: int = 10) -> List[Dict[str, Any]]:\n",
548
+ " qv = embedder.encode([query], normalize_embeddings=True)\n",
549
+ " qv = np.asarray(qv, dtype=np.float32)\n",
550
+ " scores, ids = parsec_index.search(qv, top_k)\n",
551
+ " out = []\n",
552
+ " for sc, i in zip(scores[0].tolist(), ids[0].tolist()):\n",
553
+ " if 0 <= int(i) < len(parsec_cards):\n",
554
+ " card = parsec_cards[int(i)]\n",
555
+ " out.append({\n",
556
+ " \"score\": float(sc),\n",
557
+ " \"name\": _parsec_name_from_card(card),\n",
558
+ " \"part_number\": _parsec_part_from_card(card),\n",
559
+ " \"description\": _parsec_desc_from_card(card),\n",
560
+ " \"card\": card[:1100],\n",
561
+ " })\n",
562
+ " return out\n",
563
+ "\n",
564
+ "def antenna_options_for(router_model: str, tech: str, mimo: str) -> Dict[str, Any]:\n",
565
+ " q_stationary = f\"{router_model} {tech} {mimo} omni stationary outdoor Parsec\"\n",
566
+ " q_vehicle = f\"{router_model} {tech} {mimo} omni vehicle mobile Parsec\"\n",
567
+ " cand_stationary = parsec_retrieve(q_stationary, top_k=10)\n",
568
+ " cand_vehicle = parsec_retrieve(q_vehicle, top_k=10)\n",
569
+ "\n",
570
+ " if client is None:\n",
571
+ " s = cand_stationary[0] if cand_stationary else {\"name\":\"Parsec antenna\",\"part_number\":\"\",\"description\":\"\"}\n",
572
+ " v = cand_vehicle[0] if cand_vehicle else {\"name\":\"Parsec antenna\",\"part_number\":\"\",\"description\":\"\"}\n",
573
+ " s.update({\"mimo\": mimo, \"why\": \"Stationary omni best match.\"})\n",
574
+ " v.update({\"mimo\": mimo, \"why\": \"Vehicle omni best match.\"})\n",
575
+ " return {\"stationary_omni\": s, \"vehicle_omni\": v, \"sources\":[\"parsec_rag\"]}\n",
576
+ "\n",
577
+ " sys = \"Select Parsec antennas. Choose only from candidates. Return strict JSON only.\"\n",
578
+ " payload = {\n",
579
+ " \"router_model\": router_model,\n",
580
+ " \"tech\": tech,\n",
581
+ " \"mimo\": mimo,\n",
582
+ " \"stationary_candidates\": cand_stationary,\n",
583
+ " \"vehicle_candidates\": cand_vehicle,\n",
584
+ " \"rules\": [\n",
585
+ " \"Return two options: stationary_omni and vehicle_omni.\",\n",
586
+ " \"Use only candidates. Prefer family names like Labrador/Chinook/Boxer.\",\n",
587
+ " \"Include name, part_number, description, mimo, why.\",\n",
588
+ " \"Return JSON only.\"\n",
589
+ " ],\n",
590
+ " \"output_schema\": {\n",
591
+ " \"stationary_omni\": {\"name\":\"string\",\"part_number\":\"string\",\"description\":\"string\",\"mimo\":\"2x2|4x4\",\"why\":\"string\"},\n",
592
+ " \"vehicle_omni\": {\"name\":\"string\",\"part_number\":\"string\",\"description\":\"string\",\"mimo\":\"2x2|4x4\",\"why\":\"string\"}\n",
593
+ " }\n",
594
+ " }\n",
595
+ " out = gpt_json(sys, payload, max_tokens=650) or {}\n",
596
+ "\n",
597
+ " def _fix(x: Dict[str, Any], default_why: str) -> Dict[str, str]:\n",
598
+ " return {\n",
599
+ " \"name\": str(x.get(\"name\",\"Parsec antenna\") or \"Parsec antenna\")[:110],\n",
600
+ " \"part_number\": str(x.get(\"part_number\",\"\") or \"\")[:40],\n",
601
+ " \"description\": str(x.get(\"description\",\"\") or \"\")[:220],\n",
602
+ " \"mimo\": str(x.get(\"mimo\", mimo) or mimo),\n",
603
+ " \"why\": str(x.get(\"why\", default_why) or default_why)[:160],\n",
604
+ " }\n",
605
+ "\n",
606
+ " s = _fix(out.get(\"stationary_omni\", {}) if isinstance(out, dict) else {}, \"Stationary omni best match.\")\n",
607
+ " v = _fix(out.get(\"vehicle_omni\", {}) if isinstance(out, dict) else {}, \"Vehicle omni best match.\")\n",
608
+ " if not s.get(\"part_number\") and cand_stationary:\n",
609
+ " top = cand_stationary[0]\n",
610
+ " s = {\"name\": top.get(\"name\",\"Parsec antenna\"), \"part_number\": top.get(\"part_number\",\"\"), \"description\": top.get(\"description\",\"\"), \"mimo\": mimo, \"why\":\"Stationary omni best match.\"}\n",
611
+ " if not v.get(\"part_number\") and cand_vehicle:\n",
612
+ " top = cand_vehicle[0]\n",
613
+ " v = {\"name\": top.get(\"name\",\"Parsec antenna\"), \"part_number\": top.get(\"part_number\",\"\"), \"description\": top.get(\"description\",\"\"), \"mimo\": mimo, \"why\":\"Vehicle omni best match.\"}\n",
614
+ " return {\"stationary_omni\": s, \"vehicle_omni\": v, \"sources\":[\"parsec_rag\",\"gpt\"]}\n",
615
+ "\n",
616
+ "\n",
617
+ "# ============================\n",
618
+ "# Feature table + GPT fill for missing fields (no more ****; fill missing via GPT)\n",
619
+ "# ============================\n",
620
+ "FEATURE_COLS = [\"Name\",\"Modem technology\",\"WiFi\",\"Ports\",\"Antennas\",\"Ruggedness\",\"Use case\"]\n",
621
+ "\n",
622
+ "def dec_features_by_model(model: str, canon_make: str) -> Dict[str, str]:\n",
623
+ " if not model or model == \"Not applicable\":\n",
624
+ " return {k:\"Not listed\" for k in FEATURE_COLS}\n",
625
+ " pool = df_dec[df_dec[\"_canon_make\"] == canon_make].copy()\n",
626
+ " if pool.empty:\n",
627
+ " return {k:\"Not listed\" for k in FEATURE_COLS}\n",
628
+ " hit = process.extractOne(norm_text(model), pool[\"_norm_model\"].tolist(), scorer=fuzz.WRatio)\n",
629
+ " if not hit or hit[1] < MATCH_OK:\n",
630
+ " return {k:\"Not listed\" for k in FEATURE_COLS}\n",
631
+ " r = pool.iloc[int(hit[2])]\n",
632
+ " ports = f\"WAN: {r.get('WAN ports and speed','')} | LAN: {r.get('LAN ports and speed','')}\"\n",
633
+ " return {\n",
634
+ " \"Name\": str(r.get(\"Model\",\"\")),\n",
635
+ " \"Modem technology\": str(r.get(\"Modem Type\",\"\")),\n",
636
+ " \"WiFi\": str(r.get(\"WiFi type\",\"\")),\n",
637
+ " \"Ports\": ports,\n",
638
+ " \"Antennas\": str(r.get(\"Antennas (internal/external/both)\",\"\")),\n",
639
+ " \"Ruggedness\": str(r.get(\"Ruggedization\",\"\")),\n",
640
+ " \"Use case\": str(r.get(\"Primary use case\",\"\")),\n",
641
+ " }\n",
642
+ "\n",
643
+ "def gpt_fill_features(device_label: str, feats: Dict[str,str], context: str) -> Dict[str,str]:\n",
644
+ " missing = [k for k,v in feats.items() if (not v) or v.strip().lower() in {\"not listed\",\"nan\"}]\n",
645
+ " if client is None or not missing:\n",
646
+ " return feats\n",
647
+ " sys = \"Fill missing router feature fields. Return strict JSON only.\"\n",
648
+ " payload = {\n",
649
+ " \"device\": device_label,\n",
650
+ " \"known\": feats,\n",
651
+ " \"context\": context[:2000],\n",
652
+ " \"fill_only\": missing,\n",
653
+ " \"rules\": [\"Fill only requested fields. Best guess if needed. Return JSON only.\"],\n",
654
+ " \"output_schema\": {k:\"string\" for k in missing}\n",
655
+ " }\n",
656
+ " out = gpt_json(sys, payload, max_tokens=350) or {}\n",
657
+ " for k in missing:\n",
658
+ " v = str(out.get(k,\"\") or \"\").strip()\n",
659
+ " if v:\n",
660
+ " feats[k] = v\n",
661
+ " return feats\n",
662
+ "\n",
663
+ "def current_features_guess(life_row: pd.Series) -> Dict[str,str]:\n",
664
+ " sku = str(life_row.get(\"sku\",\"\") or \"\").strip()\n",
665
+ " desc = str(life_row.get(\"description\",\"\") or \"\").strip()\n",
666
+ " notes = str(life_row.get(\"notes\",\"\") or \"\").strip()\n",
667
+ " base = {\n",
668
+ " \"Name\": sku,\n",
669
+ " \"Modem technology\": \"4G\" if _device_is_4g(life_row) else (\"5G\" if ((\"5g\" in (desc+notes).lower()) or (\"nr\" in (desc+notes).lower())) else \"Not listed\"),\n",
670
+ " \"WiFi\": \"Not listed\",\n",
671
+ " \"Ports\": \"Not listed\",\n",
672
+ " \"Antennas\": \"Not listed\",\n",
673
+ " \"Ruggedness\": \"Not listed\",\n",
674
+ " \"Use case\": \"Not listed\",\n",
675
+ " }\n",
676
+ " return gpt_fill_features(\"Current device\", base, f\"{desc}\\n{notes}\")\n",
677
+ "\n",
678
+ "def build_features_table(cur: Dict[str,str], r4: Dict[str,str], r5: Dict[str,str]) -> str:\n",
679
+ " cols = [\"Device\", \"Modem technology\", \"WiFi\", \"Ports\", \"Antennas\", \"Ruggedness\", \"Use case\"]\n",
680
+ " header = \"| \" + \" | \".join(cols) + \" |\"\n",
681
+ " sep = \"| \" + \" | \".join([\"---\"]*len(cols)) + \" |\"\n",
682
+ "\n",
683
+ " def row(name: str, feats: Dict[str,str]) -> str:\n",
684
+ " return \"| \" + \" | \".join([\n",
685
+ " name,\n",
686
+ " feats.get(\"Modem technology\",\"Not listed\"),\n",
687
+ " feats.get(\"WiFi\",\"Not listed\"),\n",
688
+ " feats.get(\"Ports\",\"Not listed\"),\n",
689
+ " feats.get(\"Antennas\",\"Not listed\"),\n",
690
+ " feats.get(\"Ruggedness\",\"Not listed\"),\n",
691
+ " feats.get(\"Use case\",\"Not listed\"),\n",
692
+ " ]) + \" |\"\n",
693
+ "\n",
694
+ " return \"\\n\".join([header, sep, row(\"Current\", cur), row(\"4G replacement\", r4), row(\"5G replacement\", r5)])\n",
695
+ "\n",
696
+ "\n",
697
+ "# ============================\n",
698
+ "# Output + Gradio UI\n",
699
+ "# ============================\n",
700
+ "def fmt(v: Any, fallback: str = \"Not listed\") -> str:\n",
701
+ " s = _safe_str(v)\n",
702
+ " if not s or s.lower() == \"nan\":\n",
703
+ " return fallback\n",
704
+ " return s\n",
705
+ "\n",
706
+ "def assemble_output(life_row: pd.Series, status: str, eos: str, eol: str, repl: Dict[str,Any], ant: Dict[str,Any]) -> str:\n",
707
+ " canon_make = str(life_row.get(\"_canon_make\",\"UNKNOWN\"))\n",
708
+ " current_name = f\"{life_row.get('sku','')} — {life_row.get('description','')}\".strip(\" —\")\n",
709
+ "\n",
710
+ " # Antenna\n",
711
+ " st = ant.get(\"stationary_omni\", {})\n",
712
+ " vh = ant.get(\"vehicle_omni\", {})\n",
713
+ "\n",
714
+ " # Feature table (fill missing via GPT)\n",
715
+ " cur_feats = current_features_guess(life_row)\n",
716
+ " r4_feats = dec_features_by_model(repl.get(\"repl_4g\",\"\"), canon_make)\n",
717
+ " r5_feats = dec_features_by_model(repl.get(\"repl_5g\",\"\"), canon_make)\n",
718
+ " if client is not None:\n",
719
+ " r4_feats = gpt_fill_features(\"4G replacement\", r4_feats, \"\")\n",
720
+ " r5_feats = gpt_fill_features(\"5G replacement\", r5_feats, \"\")\n",
721
+ " table_md = build_features_table(cur_feats, r4_feats, r5_feats)\n",
722
+ "\n",
723
+ " lines = []\n",
724
+ " lines.append(f\"1. Current device: **{current_name}**\")\n",
725
+ " lines.append(f\"2. Status: **{status}**\")\n",
726
+ " lines.append(f\"3. End of Sale date: **{eos}**\")\n",
727
+ " lines.append(f\"4. End of Life date: **{eol}**\")\n",
728
+ " lines.append(f\"5. 4G recommended replacement: **{fmt(repl.get('repl_4g'), 'Not applicable')}**\")\n",
729
+ " # If 5G is empty, force GPT to pick from lifecycle pool\n",
730
+ " repl5 = fmt(repl.get(\"repl_5g\"), \"\")\n",
731
+ " if (not repl5) and client is not None:\n",
732
+ " cand5 = _candidate_5g_models_from_lifecycle(str(life_row.get('_canon_make','UNKNOWN')))\n",
733
+ " repl5 = _gpt_pick_from_lifecycle_models(life_row, cand5, \"5G replacement/upgrade\") or (cand5[0] if cand5 else \"\")\n",
734
+ " if not repl5:\n",
735
+ " repl5 = \"Not listed\"\n",
736
+ " lines.append(f\"6. 5G recommended replacement: **{repl5}**\")\n",
737
+ "\n",
738
+ " lines.append(\"7. Antenna options (Parsec-only):\")\n",
739
+ " lines.append(f\" - Stationary (Omni): **{fmt(st.get('name'))}** (Part #: {fmt(st.get('part_number'))}) — {fmt(st.get('description'))} — MIMO: {fmt(st.get('mimo'))} — {fmt(st.get('why'))}\")\n",
740
+ " lines.append(f\" - Vehicle (Omni): **{fmt(vh.get('name'))}** (Part #: {fmt(vh.get('part_number'))}) — {fmt(vh.get('description'))} — MIMO: {fmt(vh.get('mimo'))} — {fmt(vh.get('why'))}\")\n",
741
+ "\n",
742
+ " lines.append(\"8. Recommended features table:\")\n",
743
+ " lines.append(table_md)\n",
744
+ "\n",
745
+ " lines.append(\"\\nSources (debug):\")\n",
746
+ " for s in repl.get(\"sources\", []) if isinstance(repl.get(\"sources\"), list) else []:\n",
747
+ " lines.append(f\"- {s}\")\n",
748
+ " lines.append(\"- ParsecCatalog.pdf (local RAG)\")\n",
749
+ " lines.append(\"- dec2025routers.csv (features + fallback)\")\n",
750
+ " return \"\\n\".join(lines)\n",
751
+ "\n",
752
+ "def run_lookup(user_text: str, st: Dict[str,Any]):\n",
753
+ " user_text = str(user_text or \"\").strip()\n",
754
+ " if not user_text:\n",
755
+ " return \"Enter a router SKU/model.\", gr.update(visible=False), gr.update(visible=False), {}\n",
756
+ "\n",
757
+ " res = resolve_device(user_text)\n",
758
+ " if res.get(\"mode\") == \"pick\":\n",
759
+ " opts = res.get(\"options\", [])\n",
760
+ " choices = [o[\"label\"] for o in opts]\n",
761
+ " st2 = {\"mode\":\"pick\",\"options\": opts}\n",
762
+ " return \"Did you mean A or B? Pick one, then click Use selection.\", gr.update(choices=choices, value=None, visible=True), gr.update(visible=True), st2\n",
763
+ "\n",
764
+ " if res.get(\"mode\") != \"ok\":\n",
765
+ " return \"Not found.\", gr.update(visible=False), gr.update(visible=False), {}\n",
766
+ "\n",
767
+ " life_row = df_eos.iloc[int(res[\"row_idx\"])]\n",
768
+ " eos, eol, status = row_to_dates_and_status(life_row)\n",
769
+ "\n",
770
+ " repl = pick_replacements_lifecycle(life_row, status)\n",
771
+ "\n",
772
+ " tech = \"5G\" if repl.get(\"repl_5g\") and repl.get(\"repl_5g\") != \"Not applicable\" else (\"4G\" if _device_is_4g(life_row) else \"Unknown\")\n",
773
+ " mimo_guess = \"4x4\" if tech == \"5G\" else \"2x2\"\n",
774
+ " ant = antenna_options_for(router_model=repl.get(\"repl_5g\") or str(life_row.get(\"sku\",\"\")), tech=tech, mimo=mimo_guess)\n",
775
+ "\n",
776
+ " return assemble_output(life_row, status, eos, eol, repl, ant), gr.update(visible=False), gr.update(visible=False), {}\n",
777
+ "\n",
778
+ "def use_selection(selected_label: str, st: Dict[str,Any]):\n",
779
+ " if not st or st.get(\"mode\") != \"pick\":\n",
780
+ " return \"Run a search first.\", gr.update(visible=False), gr.update(visible=False), {}\n",
781
+ " if not selected_label:\n",
782
+ " return \"Pick A or B first.\", gr.update(visible=True), gr.update(visible=True), st\n",
783
+ "\n",
784
+ " chosen_row = None\n",
785
+ " for o in st.get(\"options\", []):\n",
786
+ " if o.get(\"label\") == selected_label:\n",
787
+ " chosen_row = int(o[\"row_idx\"])\n",
788
+ " break\n",
789
+ " if chosen_row is None:\n",
790
+ " return \"Pick a valid option.\", gr.update(visible=True), gr.update(visible=True), st\n",
791
+ "\n",
792
+ " life_row = df_eos.iloc[int(chosen_row)]\n",
793
+ " eos, eol, status = row_to_dates_and_status(life_row)\n",
794
+ " repl = pick_replacements_lifecycle(life_row, status)\n",
795
+ "\n",
796
+ " tech = \"5G\" if repl.get(\"repl_5g\") and repl.get(\"repl_5g\") != \"Not applicable\" else (\"4G\" if _device_is_4g(life_row) else \"Unknown\")\n",
797
+ " mimo_guess = \"4x4\" if tech == \"5G\" else \"2x2\"\n",
798
+ " ant = antenna_options_for(router_model=repl.get(\"repl_5g\") or str(life_row.get(\"sku\",\"\")), tech=tech, mimo=mimo_guess)\n",
799
+ "\n",
800
+ " return assemble_output(life_row, status, eos, eol, repl, ant), gr.update(visible=False), gr.update(visible=False), {}\n",
801
+ "\n",
802
+ "with gr.Blocks(title=\"Only-Routers\") as demo:\n",
803
+ " gr.Markdown(\"## Only-Routers\\nEnter a router SKU/model. If ambiguous, you’ll get A/B choices.\")\n",
804
+ " user_text = gr.Textbox(label=\"Router SKU or model\", placeholder=\"Examples: IBR650B, AER1600, ES450, WR21\", lines=1)\n",
805
+ " st = gr.State({})\n",
806
+ "\n",
807
+ " check_btn = gr.Button(\"Check\", variant=\"primary\")\n",
808
+ " pick_dd = gr.Dropdown(label=\"Pick A or B\", choices=[], visible=False)\n",
809
+ " use_btn = gr.Button(\"Use selection\", visible=False)\n",
810
+ "\n",
811
+ " output_md = gr.Markdown()\n",
812
+ "\n",
813
+ " check_btn.click(fn=run_lookup, inputs=[user_text, st], outputs=[output_md, pick_dd, use_btn, st])\n",
814
+ " use_btn.click(fn=use_selection, inputs=[pick_dd, st], outputs=[output_md, pick_dd, use_btn, st])\n",
815
+ "\n",
816
+ "demo.launch()\n"
817
+ ]
818
+ }
819
+ ],
820
+ "metadata": {
821
+ "kernelspec": {
822
+ "display_name": "Python 3",
823
+ "name": "python3"
824
+ },
825
+ "language_info": {
826
+ "name": "python"
827
+ }
828
+ },
829
+ "nbformat": 4,
830
+ "nbformat_minor": 5
831
+ }