pharma-copilot

#1
by Mohibullah - opened
__pycache__/app.cpython-312.pyc DELETED
Binary file (387 Bytes)
 
__pycache__/gradio_pharmacopilot_demo.cpython-312.pyc DELETED
Binary file (61.4 kB)
 
data/training/bd_brand_to_generic.json CHANGED
@@ -479,11 +479,5 @@
479
  "fluclox": "flucloxacillin",
480
  "ambroy": "ambroxol",
481
  "dexter": "dexamethasone",
482
- "dextor": "dexamethasone",
483
- "ultrafen-plus": "diclofenac",
484
- "ultrafen plus": "diclofenac",
485
- "ultrafen": "diclofenac",
486
- "ultracalc-d": "calcium carbonate",
487
- "ultracalc d": "calcium carbonate",
488
- "cartilix": "glucosamine"
489
  }
 
479
  "fluclox": "flucloxacillin",
480
  "ambroy": "ambroxol",
481
  "dexter": "dexamethasone",
482
+ "dextor": "dexamethasone"
 
 
 
 
 
 
483
  }
gradio_pharmacopilot_demo.py CHANGED
@@ -2,7 +2,6 @@ from __future__ import annotations
2
 
3
  import json
4
  import os
5
- import re
6
  import unicodedata
7
  import time
8
  from difflib import SequenceMatcher, get_close_matches
@@ -46,139 +45,25 @@ def data_path(relative_path: str) -> Path:
46
  return DATA_DIR / relative
47
 
48
 
49
- MEDICINES_PATH = DATA_DIR / "medicines_master.json"
50
- BRAND_MAP_PATH = DATA_DIR / "training/bd_brand_to_generic.json"
51
  INVENTORY_PATH = data_path("inventory.json")
52
 
53
  MODEL_ID = os.getenv("PHARMACOPILOT_MODEL_ID", "openbmb/MiniCPM-V-4_5")
54
- NEMOTRON_MODEL_ID = os.getenv("NEMOTRON_MODEL_ID", "nvidia/Llama-3.1-Nemotron-Nano-8B-v1")
 
 
55
  NVIDIA_API_KEY = os.getenv("NVIDIA_API_KEY", "")
56
  NVIDIA_BASE_URL = os.getenv("NVIDIA_BASE_URL", "https://integrate.api.nvidia.com/v1")
57
  NVIDIA_NIM_MODEL = os.getenv("NVIDIA_NIM_MODEL", "nvidia/nvidia-nemotron-nano-9b-v2")
 
 
58
  ACCEPTANCE_THRESHOLD = int(os.getenv("PHARMACOPILOT_ACCEPTANCE_THRESHOLD", "75"))
59
  OCR_MODEL = None
60
  OCR_TOKENIZER = None
61
  NEMOTRON_MODEL = None
62
  NEMOTRON_TOKENIZER = None
63
 
64
- # ── Controlled substance lookup (DEA Schedules II-V) ─────────────────────────
65
- CONTROLLED_SUBSTANCES = {
66
- # Schedule II
67
- "oxycodone", "oxycontin", "hydrocodone", "vicodin", "morphine", "fentanyl",
68
- "methadone", "amphetamine", "adderall", "dextroamphetamine", "methamphetamine",
69
- "methylphenidate", "ritalin", "concerta", "codeine", "hydromorphone",
70
- "meperidine", "demerol", "tapentadol", "lisdexamfetamine", "vyvanse",
71
- # Schedule III
72
- "testosterone", "ketamine", "buprenorphine", "suboxone", "anabolic steroids",
73
- # Schedule IV
74
- "alprazolam", "xanax", "diazepam", "valium", "lorazepam", "ativan",
75
- "clonazepam", "klonopin", "zolpidem", "ambien", "tramadol", "carisoprodol",
76
- "midazolam", "temazepam", "triazolam", "phenobarbital",
77
- # Schedule V
78
- "pregabalin", "lyrica", "lacosamide", "ezogabine",
79
- }
80
-
81
-
82
- def is_controlled_substance(drug_name: str) -> bool:
83
- """Check if a drug name matches a known controlled substance."""
84
- if not drug_name:
85
- return False
86
- normalized = drug_name.strip().lower()
87
- for substance in CONTROLLED_SUBSTANCES:
88
- if substance in normalized or normalized in substance:
89
- return True
90
- return False
91
-
92
-
93
- # ── Prompts ──────────────────────────────────────────────────────────────────
94
- # Pass 1: MiniCPM-V reads ALL text from the prescription image
95
- # Pass 1A: Focused drug extraction — short, direct prompt to force reading Latin-script drug names
96
- DRUG_FOCUSED_PROMPT = """Look at this prescription image carefully. List ONLY the medicine/drug names and their dosages.
97
-
98
- Drug names on prescriptions are written in English/Latin letters like:
99
- - Tab. (tablet), Cap. (capsule), Syp. (syrup), Inj. (injection)
100
- - Examples: Tab. Paracetamol 500mg, Cap. Amoxicillin 250mg, Tab. Diclofenac 50mg
101
-
102
- For each drug, write:
103
- - The drug name exactly as written
104
- - The strength if visible (e.g., 50mg, 200mg)
105
- - The dosage pattern if visible (e.g., 1+0+1, 2+0+2)
106
-
107
- List them numbered. If you cannot read a drug name, write [ILLEGIBLE].
108
- Do NOT translate or explain. Just list the drugs."""
109
-
110
- # Pass 1B: Full prescription text extraction
111
- FULL_OCR_PROMPT = """Read this medical prescription image. It may have Bengali/Hindi/Urdu printed headers and English handwritten content.
112
-
113
- Extract ALL information in this format:
114
- DOCTOR: [name and credentials from printed header or stamp]
115
- CLINIC: [clinic/hospital name]
116
- PATIENT: [patient name — usually handwritten near top]
117
- DATE: [prescription date]
118
- CHIEF COMPLAINT: [the medical condition/reason for visit if noted]
119
- Rx:
120
- [list all drugs with strengths and dosage patterns]
121
- ADVICE: [follow-up instructions]
122
- SIGNATURE: [PRESENT or NOT VISIBLE]
123
-
124
- RULES:
125
- - Drug names are ALWAYS in English/Latin script (Tab., Cap., Syp.) — read them carefully
126
- - Dosage patterns like "2+0+2" mean morning+afternoon+night
127
- - Do NOT translate, correct spelling, or interpret — transcribe exactly as written
128
- - Read ALL numbered items"""
129
-
130
- # Pass 2: Nemotron structures the raw OCR into the clinical JSON schema
131
- STRUCTURING_PROMPT_TEMPLATE = """You are a HIPAA-compliant Clinical Data Extraction Agent.
132
-
133
- You have been given raw OCR text extracted from a medical prescription image. Parse this text into structured JSON.
134
-
135
- STRICT RULES:
136
- 1. ZERO HALLUCINATION: If a field is not found, output null. Do NOT guess.
137
- 2. NO CLINICAL TRANSLATION: Extract Sig/directions EXACTLY as written (e.g., "2+0+2", "1 tab PO BID"). Do NOT expand.
138
- 3. Assign confidence (0.00 to 1.00) based on clarity in the OCR text.
139
- 4. For drug_name: extract the FIRST/PRIMARY drug prescribed (e.g., "Tab. Diclofenac" → "Diclofenac"). If multiple drugs, use the first one.
140
- 5. For directions_sig: include the dosage pattern (e.g., "2+0+2" or "1+0+1") and any duration mentioned.
141
- 6. Dosage forms: Tab. = tablets, Cap. = capsules, Syp. = syrup, Inj. = injection, Susp. = suspension.
142
- 7. Look for patient name after "Name:" or "নাম:" fields. Look for date after "Date:" or "তারিখ:".
143
- 8. Doctor name is usually printed at the top or bottom of the prescription.
144
-
145
- RAW OCR TEXT:
146
- ---
147
- {ocr_text}
148
- ---
149
-
150
- Return ONLY valid JSON (no markdown, no explanation):
151
- {{
152
- "document_metadata": {{
153
- "is_controlled_substance": false,
154
- "overall_legibility_score": 0.0
155
- }},
156
- "patient_info": {{
157
- "name": {{ "value": null, "confidence": 0.0 }},
158
- "address": {{ "value": null, "confidence": 0.0 }},
159
- "date_of_birth": {{ "value": null, "confidence": 0.0 }},
160
- "phone_number": {{ "value": null, "confidence": 0.0 }}
161
- }},
162
- "prescriber_info": {{
163
- "name": {{ "value": null, "confidence": 0.0 }},
164
- "signature_present": {{ "value": false, "confidence": 0.0 }},
165
- "address": {{ "value": null, "confidence": 0.0 }},
166
- "dea_number": {{ "value": null, "confidence": 0.0 }},
167
- "npi_number": {{ "value": null, "confidence": 0.0 }},
168
- "phone_number": {{ "value": null, "confidence": 0.0 }}
169
- }},
170
- "prescription_details": {{
171
- "date_of_issuance": {{ "value": null, "confidence": 0.0 }},
172
- "drug_name": {{ "value": null, "confidence": 0.0 }},
173
- "strength": {{ "value": null, "confidence": 0.0 }},
174
- "dosage_form": {{ "value": null, "confidence": 0.0 }},
175
- "quantity": {{ "value": null, "confidence": 0.0 }},
176
- "directions_sig": {{ "value": null, "confidence": 0.0 }},
177
- "refills_authorized": {{ "value": null, "confidence": 0.0 }},
178
- "dispense_as_written": {{ "value": null, "confidence": 0.0 }}
179
- }}
180
- }}"""
181
-
182
 
183
  def load_json(path: Path, fallback: Any) -> Any:
184
  if not path.exists():
@@ -223,24 +108,7 @@ def normalize(text: str) -> str:
223
  return " ".join(text.strip().lower().split())
224
 
225
 
226
- # Build a comprehensive lookup map: normalized name -> (original casing, medicine dict)
227
- NAME_TO_MED = {}
228
- for m in MEDICINES:
229
- NAME_TO_MED[normalize(m["name"])] = (m["name"], m)
230
- if m.get("generic_name"):
231
- NAME_TO_MED[normalize(m["generic_name"])] = (m["generic_name"], m)
232
- for brand in m.get("brand_names") or []:
233
- NAME_TO_MED[normalize(brand)] = (brand, m)
234
-
235
- for brand, generic in BD_BRAND_TO_GENERIC.items():
236
- norm_gen = normalize(generic)
237
- res = NAME_TO_MED.get(norm_gen)
238
- if res:
239
- NAME_TO_MED[normalize(brand)] = (brand, res[1])
240
-
241
-
242
  def clean_prediction(raw_prediction: str) -> str:
243
- """Clean a raw OCR prediction for single-name extraction (legacy helper)."""
244
  text = str(raw_prediction or "").strip()
245
  text = text.replace("\r", "\n")
246
  text = text.split("\n")[0].strip() if "\n" in text else text
@@ -271,92 +139,31 @@ def label_for_medicine(ocr_text: str, medicine: dict[str, Any]) -> str:
271
  return brands[0] if brands else medicine["name"]
272
 
273
 
274
- def web_search_generic(drug_name: str) -> str | None:
275
- """Use a web search to find the generic name/active ingredient of a brand name."""
276
- import requests
277
- import re
278
-
279
- # Clean the drug name for search: extract the brand name itself
280
- # e.g., "Tab. Ultrafen-plus 500mg 2+0+2" -> "Ultrafen-plus"
281
- clean_brand = drug_name.strip()
282
- # Remove dosage forms
283
- for form in (r'\btab\b', r'\bcap\b', r'\bsyp\b', r'\binj\b', r'\bsusp\b', r'\btablet\b', r'\bcapsule\b', r'\bsyrup\b'):
284
- clean_brand = re.sub(form, '', clean_brand, flags=re.I)
285
- # Remove strengths
286
- clean_brand = re.sub(r'\b\d+\s*(mg|g|ml|mcg)\b', '', clean_brand, flags=re.I)
287
- # Remove dosage sigs
288
- clean_brand = re.sub(r'\b\d+\s*[\+\-]\s*\d+\s*[\+\-]\s*\d+\b', '', clean_brand)
289
- # Remove formatting characters
290
- clean_brand = " ".join(clean_brand.strip(" ,.-+()[]{}*/\\").split())
291
- if not clean_brand:
292
- clean_brand = drug_name
293
-
294
- query = f"{clean_brand} generic name active ingredient"
295
- headers = {
296
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"
297
- }
298
- try:
299
- url = "https://lite.duckduckgo.com/lite/"
300
- res = requests.post(url, data={"q": query}, headers=headers, timeout=5)
301
- if res.status_code == 200:
302
- text = res.text
303
- # Extract text ONLY from the result snippet table cells to avoid global page noise
304
- snippets = re.findall(r'<td\s+class=["\']result-snippet["\'][\s\S]*?>([\s\S]*?)</td>', text)
305
- if snippets:
306
- snippets_text = " ".join(snippets)
307
- # Strip HTML tags
308
- clean_text = re.sub(r'<[^>]+>', ' ', snippets_text).lower()
309
- return clean_text
310
- except Exception:
311
- pass
312
- return None
313
-
314
-
315
- def resolve_generic_via_web(drug_name: str) -> dict[str, Any] | None:
316
- """Query the web for the brand name and find a matching generic name from MEDICINES."""
317
- search_text = web_search_generic(drug_name)
318
- if not search_text:
319
- return None
320
-
321
- best_med = None
322
- best_match_len = 0
323
-
324
- for med in MEDICINES:
325
- canon = med["name"].lower()
326
- if canon in search_text:
327
- if len(canon) > best_match_len:
328
- best_med = med
329
- best_match_len = len(canon)
330
-
331
- gen = med.get("generic_name", "").lower()
332
- if gen and gen in search_text:
333
- if len(gen) > best_match_len:
334
- best_med = med
335
- best_match_len = len(gen)
336
-
337
- return best_med
338
-
339
-
340
- def find_medicine_from_ocr(ocr_text: str, strength_hint: str | None = None) -> tuple[dict[str, Any], list[dict[str, Any]], str, int]:
341
- """Find medicine from OCR text with optional strength disambiguation."""
342
  query = normalize(ocr_text)
 
 
 
343
 
344
- # Direct lookup first
345
- direct_res = NAME_TO_MED.get(query)
 
 
 
 
346
 
347
  scored = []
348
- for norm_name, (orig_name, med) in NAME_TO_MED.items():
349
- score = SequenceMatcher(None, query, norm_name).ratio()
350
  if score > 0.35:
351
- # Boost score if strength matches
352
- if strength_hint and med.get("strength"):
353
- if normalize(strength_hint) in normalize(med["strength"]):
354
- score = min(1.0, score + 0.1)
355
- scored.append({"label": orig_name, "medicine": med, "score": score})
356
 
357
  scored.sort(key=lambda item: item["score"], reverse=True)
358
- if direct_res:
359
- medicine = direct_res[1]
360
  display_name = label_for_medicine(ocr_text, medicine)
361
  primary_score = 0.97
362
  elif scored:
@@ -365,25 +172,12 @@ def find_medicine_from_ocr(ocr_text: str, strength_hint: str | None = None) -> t
365
  display_name = best["label"]
366
  primary_score = best["score"]
367
  else:
368
- medicine = MEDICINES[0] if MEDICINES else {"id": "unknown", "name": "Unknown"}
369
  display_name = clean_prediction(ocr_text) or "Needs review"
370
  primary_score = 0.0
371
 
372
- confidence = max(0, min(99, round(primary_score * 100)))
373
-
374
- # Fallback to Internet Search Agent if confidence is low (below ACCEPTANCE_THRESHOLD)
375
- if confidence < ACCEPTANCE_THRESHOLD and query:
376
- web_med = resolve_generic_via_web(ocr_text)
377
- if web_med:
378
- medicine = web_med
379
- display_name = label_for_medicine(ocr_text, medicine)
380
- confidence = 95 # Web-verified match gets high confidence
381
- primary_score = 0.95
382
-
383
  top = [{"label": display_name, "medicine": medicine, "score": primary_score}]
384
  seen_ids = {medicine["id"]}
385
-
386
- # Re-score candidates or append them
387
  for item in scored:
388
  if item["medicine"]["id"] in seen_ids:
389
  continue
@@ -396,13 +190,14 @@ def find_medicine_from_ocr(ocr_text: str, strength_hint: str | None = None) -> t
396
  fallback_name = get_close_matches(query, list(BD_BRAND_TO_GENERIC.keys()), n=1)
397
  if fallback_name:
398
  mapped = BD_BRAND_TO_GENERIC[fallback_name[0]]
399
- res = NAME_TO_MED.get(normalize(mapped))
400
- if res and res[1]["id"] not in seen_ids:
401
- top.append({"label": fallback_name[0], "medicine": res[1], "score": 0.62})
402
- seen_ids.add(res[1]["id"])
403
  continue
404
  break
405
 
 
406
  return medicine, top, display_name, confidence
407
 
408
 
@@ -484,252 +279,19 @@ def extract_json_object(text: str) -> dict[str, Any]:
484
  return json.loads(cleaned)
485
 
486
 
487
- # ── Structured Extraction Parsing ──────────────────────��─────────────────────
488
-
489
- def _field(value: Any = None, confidence: float = 0.0) -> dict:
490
- return {"value": value, "confidence": confidence}
491
-
492
-
493
- def parse_drug_line(line: str) -> dict[str, Any]:
494
- """Parse a single raw OCR drug line into structured fields using regex."""
495
- line_clean = line.strip().removeprefix("-").strip()
496
- line_clean = re.sub(r'^\d+[\.\)]\s*', '', line_clean)
497
-
498
- dosage_form = None
499
- for form in ("tab.", "cap.", "syp.", "inj.", "susp.", "tablet", "capsule", "syrup"):
500
- if line_clean.lower().startswith(form):
501
- dosage_form = form.title()
502
- line_clean = line_clean[len(form):].strip()
503
- break
504
-
505
- strength = None
506
- m_str = re.search(r'\b\d+\s*(mg|g|ml|mcg)\b', line_clean, re.I)
507
- if m_str:
508
- strength = m_str.group(0)
509
- line_clean = line_clean.replace(strength, "").strip()
510
-
511
- sig = None
512
- m_sig = re.search(r'\b\d+[\+\-]\d+[\+\-]\d+\b', line_clean)
513
- if m_sig:
514
- sig = m_sig.group(0)
515
- line_clean = line_clean.replace(sig, "").strip()
516
- else:
517
- m_sig_text = re.search(r'\b(once daily|twice daily|daily|bid|tid|qid|qd|hs|po)\b', line_clean, re.I)
518
- if m_sig_text:
519
- sig = m_sig_text.group(0)
520
- line_clean = line_clean.replace(m_sig_text.group(0), "").strip()
521
-
522
- drug_name = " ".join(line_clean.strip(" ,.-+()[]{}*/\\").split())
523
- if not drug_name:
524
- drug_name = "Unknown"
525
-
526
- return {
527
- "drug_name": _field(drug_name, 0.8),
528
- "strength": _field(strength, 0.8 if strength else 0.0),
529
- "dosage_form": _field(dosage_form, 0.8 if dosage_form else 0.0),
530
- "directions_sig": _field(sig, 0.8 if sig else 0.0),
531
- "quantity": _field(None, 0.0)
532
- }
533
-
534
-
535
- def empty_extraction() -> dict[str, Any]:
536
- """Return a blank extraction schema."""
537
- return {
538
- "document_metadata": {
539
- "is_controlled_substance": False,
540
- "overall_legibility_score": 0.0,
541
- },
542
- "patient_info": {
543
- "name": _field(), "address": _field(),
544
- "date_of_birth": _field(), "phone_number": _field(),
545
- },
546
- "prescriber_info": {
547
- "name": _field(), "signature_present": _field(False),
548
- "address": _field(), "dea_number": _field(),
549
- "npi_number": _field(), "phone_number": _field(),
550
- },
551
- "prescription_details": {
552
- "date_of_issuance": _field(), "drug_name": _field(),
553
- "strength": _field(), "dosage_form": _field(),
554
- "quantity": _field(), "directions_sig": _field(),
555
- "refills_authorized": _field(), "dispense_as_written": _field(None),
556
- },
557
- "medications": [] # List of medication dicts
558
- }
559
-
560
-
561
- def calculate_fallback_legibility(extraction: dict[str, Any]) -> float:
562
- scores = []
563
- # 1. Non-medication sections
564
- for section_key in ("patient_info", "prescriber_info", "prescription_details"):
565
- section = extraction.get(section_key, {})
566
- for field_key, field in section.items():
567
- if isinstance(field, dict):
568
- val = field.get("value")
569
- conf = field.get("confidence", 0.0)
570
- if val is not None and val != "" and val is not False:
571
- scores.append(conf)
572
- # 2. Medications list
573
- for med in extraction.get("medications", []):
574
- for field_key, field in med.items():
575
- if isinstance(field, dict):
576
- val = field.get("value")
577
- conf = field.get("confidence", 0.0)
578
- if val is not None and val != "" and val is not False:
579
- scores.append(conf)
580
- if not scores:
581
- return 0.0
582
- def is_valid_drug_line(line: str) -> bool:
583
- line_lower = line.lower()
584
-
585
- # 1. Check if it contains standard drug forms
586
- if re.search(r'\b(tab\.|cap\.|syp\.|inj\.|tablet|capsule|syrup|suspension|injection|cream|ointment|gel|drop|drops|spray|inhaler)\b', line_lower):
587
- return True
588
-
589
- # 2. Check if it matches a known brand or generic name in the database
590
- cleaned = re.sub(r'^\d+[\.\)]?\s*', '', line_lower).strip()
591
- words = cleaned.split()
592
- if words:
593
- first_word = words[0].strip(" ,.-+()[]{}")
594
- if first_word in BD_BRAND_TO_GENERIC or normalize(first_word) in MED_BY_NAME:
595
- return True
596
- if len(words) > 1:
597
- two_words = " ".join(words[:2]).strip(" ,.-+()[]{}")
598
- if two_words in BD_BRAND_TO_GENERIC or normalize(two_words) in MED_BY_NAME:
599
- return True
600
-
601
- # 3. Check if it contains strength indicators or dosage patterns
602
- if re.search(r'\b\d+\s*(mg|g|ml|mcg|%)\b', line_lower) or re.search(r'\b\d+[\+\-]\d+[\+\-]\d+\b', line_lower):
603
- return True
604
-
605
- # 4. Check if it contains common sig keywords
606
- if re.search(r'\b(once daily|twice daily|daily|bid|tid|qid|qd|hs|po|cap|tab)\b', line_lower):
607
- return True
608
-
609
- return False
610
-
611
-
612
- def parse_structured_extraction(raw_text: str, ocr_text: str = "") -> dict[str, Any]:
613
- """Parse Nemotron output into the structured extraction schema.
614
- Falls back gracefully if JSON is malformed."""
615
- extraction = empty_extraction()
616
- try:
617
- parsed = extract_json_object(raw_text)
618
- # Merge parsed data into extraction, preserving schema structure
619
- if "document_metadata" in parsed:
620
- extraction["document_metadata"].update(parsed["document_metadata"])
621
-
622
- # Parse patient_info, prescriber_info, and prescription_details from stable schema
623
- for section in ("patient_info", "prescriber_info", "prescription_details"):
624
- if section in parsed:
625
- for key, val in parsed[section].items():
626
- if key in extraction[section]:
627
- if isinstance(val, dict) and "value" in val:
628
- extraction[section][key] = val
629
- else:
630
- extraction[section][key] = _field(val, 0.5)
631
- except Exception:
632
- pass
633
-
634
- # Extract all medications from OCR text (drug-focused section first)
635
- focused_section = ""
636
- focused_pass_match = re.search(r'=== DRUG EXTRACTION \(focused pass\) ===([\s\S]*?)(===|$)', ocr_text)
637
- if focused_pass_match:
638
- focused_section = focused_pass_match.group(1).strip()
639
-
640
- search_source = focused_section if focused_section else ocr_text
641
-
642
- drugs = []
643
- for line in search_source.split("\n"):
644
- line = line.strip()
645
- if not line:
646
- continue
647
- if focused_section:
648
- if "drug extraction" in line.lower() or "=== " in line:
649
- continue
650
- clean_line = re.sub(r'^\d+[\.\)]?\s*', '', line).strip()
651
- if is_valid_drug_line(clean_line):
652
- drugs.append(line)
653
- else:
654
- if re.search(r'\b(tab\.|cap\.|syp\.|inj\.|tablet|capsule|syrup|medicine|rx)\b', line, re.I) or re.match(r'^[\d\-]+[\.\)]?\s+', line):
655
- clean_line = re.sub(r'^\d+[\.\)]?\s*', '', line).strip()
656
- if is_valid_drug_line(clean_line):
657
- drugs.append(line)
658
-
659
- # Deduplicate extracted drug lines while preserving order
660
- seen_drugs = set()
661
- unique_drugs = []
662
- for d in drugs:
663
- d_clean = d.strip()
664
- norm_d = normalize(d_clean)
665
- if norm_d not in seen_drugs and d_clean:
666
- seen_drugs.add(norm_d)
667
- unique_drugs.append(d_clean)
668
- drugs = unique_drugs
669
-
670
- # Parse each matched drug line
671
- for d in drugs:
672
- parsed_med = parse_drug_line(d)
673
- extraction["medications"].append(parsed_med)
674
-
675
- # If still empty, fall back to the single drug parsed by Nemotron or clean_prediction
676
- if not extraction["medications"]:
677
- drug_val = get_field_value(extraction, "prescription_details", "drug_name") or clean_prediction(ocr_text)
678
- if drug_val:
679
- extraction["medications"].append({
680
- "drug_name": _field(drug_val, 0.5),
681
- "strength": extraction["prescription_details"].get("strength", _field()),
682
- "dosage_form": extraction["prescription_details"].get("dosage_form", _field()),
683
- "quantity": extraction["prescription_details"].get("quantity", _field()),
684
- "directions_sig": extraction["prescription_details"].get("directions_sig", _field()),
685
- })
686
-
687
- # Apply controlled substance check on all medications
688
- for med in extraction["medications"]:
689
- drug_val = med["drug_name"].get("value")
690
- if drug_val and is_controlled_substance(drug_val):
691
- extraction["document_metadata"]["is_controlled_substance"] = True
692
-
693
- # Fallback legibility calculation if overall_legibility_score is 0.0
694
- metadata = extraction.setdefault("document_metadata", {})
695
- legibility = metadata.get("overall_legibility_score", 0.0)
696
- if legibility == 0.0:
697
- metadata["overall_legibility_score"] = calculate_fallback_legibility(extraction)
698
-
699
- return extraction
700
-
701
-
702
- def get_field_value(extraction: dict, section: str, field: str) -> Any:
703
- """Safely get a field value from the extraction dict."""
704
- return extraction.get(section, {}).get(field, {}).get("value")
705
-
706
-
707
- def get_field_confidence(extraction: dict, section: str, field: str) -> float:
708
- """Safely get a field confidence from the extraction dict."""
709
- return extraction.get(section, {}).get(field, {}).get("confidence", 0.0)
710
-
711
-
712
- # ── Validation Prompt (enhanced) ─────────────────────────────────────────────
713
-
714
  def build_validation_prompt(
715
  ocr_text: str,
716
- extraction: dict[str, Any],
717
  medicine: dict[str, Any],
718
  display_name: str,
719
  confidence: int,
720
  retrieval_candidates: list[dict[str, Any]],
721
  ) -> str:
722
  validation_payload = {
723
- "raw_ocr_text": ocr_text,
724
- "extracted_drug_name": get_field_value(extraction, "prescription_details", "drug_name"),
725
- "extracted_strength": get_field_value(extraction, "prescription_details", "strength"),
726
- "extracted_sig": get_field_value(extraction, "prescription_details", "directions_sig"),
727
- "extracted_quantity": get_field_value(extraction, "prescription_details", "quantity"),
728
- "is_controlled_substance": extraction.get("document_metadata", {}).get("is_controlled_substance", False),
729
  "retrieved_display_name": display_name,
730
  "retrieved_canonical_name": medicine.get("name", "Unknown"),
731
  "retrieval_confidence": confidence,
732
- "retrieved_strength": first_strength(medicine.get("strength", "")),
733
  "category": medicine.get("category", "Unknown"),
734
  "top_candidates": [
735
  {
@@ -740,31 +302,17 @@ def build_validation_prompt(
740
  for item in retrieval_candidates[:3]
741
  ],
742
  }
743
-
744
- # Check for compliance issues
745
- compliance_flags = []
746
- is_controlled = extraction.get("document_metadata", {}).get("is_controlled_substance", False)
747
- if is_controlled:
748
- if not get_field_value(extraction, "patient_info", "address"):
749
- compliance_flags.append("MISSING_PATIENT_ADDRESS_FOR_CONTROLLED")
750
- if not get_field_value(extraction, "prescriber_info", "address"):
751
- compliance_flags.append("MISSING_PRESCRIBER_ADDRESS_FOR_CONTROLLED")
752
- if not get_field_value(extraction, "prescriber_info", "dea_number"):
753
- compliance_flags.append("MISSING_DEA_NUMBER_FOR_CONTROLLED")
754
- validation_payload["compliance_flags"] = compliance_flags
755
-
756
- return f"""You are a pharmacy prescription validation assistant.
757
 
758
  Input JSON:
759
  {json.dumps(validation_payload, ensure_ascii=False)}
760
 
761
  Task:
762
- 1. Decide whether the retrieved medicine is safe to accept based on the OCR extraction and retrieval match.
763
  2. Translate the prescription into a clean pharmacy instruction row.
764
- 3. Do NOT invent dose/timing/duration if not visible in the extracted data.
765
  4. If OCR and retrieved medicine clearly disagree, return needs_review.
766
- 5. If this is a controlled substance and mandatory fields are missing, note it in validation_note.
767
- 6. Check if extracted strength matches retrieved medicine strength.
768
 
769
  Return ONLY valid JSON with these keys:
770
  status: one of validated, needs_review
@@ -778,7 +326,6 @@ duration
778
  instructions
779
  validation_note
780
  ocr_text
781
- flags: list of any compliance or safety flags
782
  """
783
 
784
 
@@ -806,7 +353,7 @@ def validate_with_nvidia_nim(
806
  messages=[{"role": "user", "content": prompt}],
807
  temperature=0,
808
  top_p=1,
809
- max_tokens=512,
810
  )
811
  content = response.choices[0].message.content or ""
812
  plan = extract_json_object(content)
@@ -837,88 +384,8 @@ def validate_with_nvidia_nim(
837
  )
838
 
839
 
840
- def run_nemotron_inference(prompt: str) -> str:
841
- """Run Nemotron inference locally, returning the raw generated text."""
842
- global NEMOTRON_MODEL, NEMOTRON_TOKENIZER
843
- import torch
844
- from transformers import AutoModelForCausalLM, AutoTokenizer
845
-
846
- if NEMOTRON_MODEL is None or NEMOTRON_TOKENIZER is None:
847
- NEMOTRON_TOKENIZER = AutoTokenizer.from_pretrained(NEMOTRON_MODEL_ID, trust_remote_code=True)
848
- NEMOTRON_MODEL = AutoModelForCausalLM.from_pretrained(
849
- NEMOTRON_MODEL_ID,
850
- trust_remote_code=True,
851
- torch_dtype=torch.bfloat16,
852
- device_map="auto",
853
- ).eval()
854
-
855
- messages = [{"role": "user", "content": prompt}]
856
- if hasattr(NEMOTRON_TOKENIZER, "apply_chat_template"):
857
- input_ids = NEMOTRON_TOKENIZER.apply_chat_template(
858
- messages,
859
- add_generation_prompt=True,
860
- return_tensors="pt",
861
- )
862
- else:
863
- input_ids = NEMOTRON_TOKENIZER(prompt, return_tensors="pt").input_ids
864
-
865
- device = next(NEMOTRON_MODEL.parameters()).device
866
- input_ids = input_ids.to(device)
867
- with torch.inference_mode():
868
- output_ids = NEMOTRON_MODEL.generate(
869
- input_ids,
870
- do_sample=False,
871
- temperature=0.0,
872
- top_p=1.0,
873
- max_new_tokens=1024,
874
- pad_token_id=NEMOTRON_TOKENIZER.eos_token_id,
875
- )
876
- generated = output_ids[0][input_ids.shape[-1]:]
877
- return NEMOTRON_TOKENIZER.decode(generated, skip_special_tokens=True).strip()
878
-
879
-
880
- def run_nemotron_nim_inference(prompt: str) -> str:
881
- """Run Nemotron inference via NVIDIA NIM API, returning raw text."""
882
- from openai import OpenAI
883
- client = OpenAI(base_url=NVIDIA_BASE_URL, api_key=NVIDIA_API_KEY)
884
- response = client.chat.completions.create(
885
- model=NVIDIA_NIM_MODEL,
886
- messages=[{"role": "user", "content": prompt}],
887
- temperature=0,
888
- top_p=1,
889
- max_tokens=1024,
890
- )
891
- return response.choices[0].message.content or ""
892
-
893
-
894
- def structure_ocr_with_nemotron(ocr_text: str) -> dict[str, Any]:
895
- """Pass 2: Use Nemotron to structure raw OCR text into the clinical JSON schema."""
896
- prompt = STRUCTURING_PROMPT_TEMPLATE.format(ocr_text=ocr_text)
897
- try:
898
- content = run_nemotron_inference(prompt)
899
- return parse_structured_extraction(content, ocr_text)
900
- except Exception as exc_local:
901
- # Fallback to NVIDIA NIM API
902
- if NVIDIA_API_KEY:
903
- try:
904
- content = run_nemotron_nim_inference(prompt)
905
- return parse_structured_extraction(content, ocr_text)
906
- except Exception:
907
- pass
908
- # Last resort: return extraction with just the drug name parsed from OCR
909
- extraction = empty_extraction()
910
- drug_guess = clean_prediction(ocr_text)
911
- if drug_guess:
912
- extraction["prescription_details"]["drug_name"] = _field(drug_guess, 0.3)
913
- if is_controlled_substance(drug_guess):
914
- extraction["document_metadata"]["is_controlled_substance"] = True
915
- extraction["document_metadata"]["overall_legibility_score"] = 0.2
916
- return extraction
917
-
918
-
919
  def validate_with_nemotron(
920
  ocr_text: str,
921
- extraction: dict[str, Any],
922
  medicine: dict[str, Any],
923
  display_name: str,
924
  confidence: int,
@@ -926,9 +393,48 @@ def validate_with_nemotron(
926
  ) -> dict[str, Any]:
927
  global NEMOTRON_MODEL, NEMOTRON_TOKENIZER
928
 
929
- prompt = build_validation_prompt(ocr_text, extraction, medicine, display_name, confidence, retrieval_candidates)
 
 
 
 
 
930
  try:
931
- content = run_nemotron_inference(prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
932
  plan = extract_json_object(content)
933
  if plan.get("status") not in {"validated", "needs_review"}:
934
  plan["status"] = "needs_review"
@@ -968,6 +474,7 @@ def load_kpi_metrics(searches: int = 0) -> str:
968
  elif fallback_path.exists():
969
  text = fallback_path.read_text(encoding="utf-8", errors="ignore")
970
  if "ocr_accuracy" in text:
 
971
  ocr_accuracy = 0.37888446215139443
972
  retrieval_accuracy = 0.6055776892430279
973
 
@@ -1023,10 +530,10 @@ def pipeline_html(stage: int = 0, validation_status: str = "waiting") -> str:
1023
  }.get(validation_status, "Nemotron Review")
1024
  steps = [
1025
  ("Prescription", "uploaded"),
1026
- ("MiniCPM OCR", "2-pass extraction"),
1027
- ("Nemotron 8B", "structured JSON"),
1028
  ("Retrieval Engine", "ranked candidates"),
1029
  (validation_label, "returned a decision"),
 
1030
  ]
1031
  cards = []
1032
  logs = []
@@ -1074,7 +581,7 @@ def medicine_details_html(
1074
  )
1075
  return f"""
1076
  <div class="result-card">
1077
- <h3>Medicine Match</h3>
1078
  <dl class="details">
1079
  <dt>Medicine</dt><dd>{medicine_label}</dd>
1080
  <dt>Generic</dt><dd>{generic_label}</dd>
@@ -1086,7 +593,7 @@ def medicine_details_html(
1086
  </dl>
1087
  <div class="explain">
1088
  <h4>AI Explanation</h4>
1089
- <p><b>OCR detected:</b> \"{ocr_text[:200]}{'...' if len(ocr_text) > 200 else ''}\"</p>
1090
  <p><b>Retrieved:</b> {display_name} ({medicine.get('name', 'Unknown')})</p>
1091
  <p><b>Validation:</b> {validation_label}</p>
1092
  <p><b>Inventory:</b> {inventory_label}</p>
@@ -1095,164 +602,6 @@ def medicine_details_html(
1095
  """
1096
 
1097
 
1098
- def _confidence_badge(conf: float | None) -> str:
1099
- """Return a colored confidence badge."""
1100
- if conf is None:
1101
- color, bg = "#6b7280", "#f3f4f6"
1102
- pct = "0%"
1103
- else:
1104
- try:
1105
- conf_val = float(conf)
1106
- if conf_val >= 0.85:
1107
- color, bg = "#065f46", "#d1fae5"
1108
- elif conf_val >= 0.50:
1109
- color, bg = "#92400e", "#fef3c7"
1110
- elif conf_val > 0:
1111
- color, bg = "#991b1b", "#fee2e2"
1112
- else:
1113
- color, bg = "#6b7280", "#f3f4f6"
1114
- pct = f"{conf_val * 100:.0f}%"
1115
- except (ValueError, TypeError):
1116
- color, bg = "#6b7280", "#f3f4f6"
1117
- pct = "0%"
1118
- return f'<span style="background:{bg};color:{color};padding:2px 8px;border-radius:12px;font-size:11px;font-weight:700;">{pct}</span>'
1119
-
1120
-
1121
-
1122
- def _display_value(val: Any) -> str:
1123
- """Format a field value for display."""
1124
- if val is None:
1125
- return '<span style="color:#9ca3af;font-style:italic;">Not detected</span>'
1126
- if isinstance(val, bool):
1127
- return "Yes" if val else "No"
1128
- return str(val)
1129
-
1130
-
1131
- def extraction_card_html(extraction: dict[str, Any]) -> str:
1132
- """Build the full structured extraction card showing all extracted fields."""
1133
- sections = [
1134
- ("Patient Information", "patient_info", [
1135
- ("Name", "name"), ("Address", "address"),
1136
- ("Date of Birth", "date_of_birth"), ("Phone", "phone_number"),
1137
- ]),
1138
- ("Prescriber Information", "prescriber_info", [
1139
- ("Name", "name"), ("Signature Present", "signature_present"),
1140
- ("Address", "address"), ("DEA Number", "dea_number"),
1141
- ("NPI Number", "npi_number"), ("Phone", "phone_number"),
1142
- ]),
1143
- ]
1144
-
1145
- legibility = extraction.get("document_metadata", {}).get("overall_legibility_score", 0)
1146
- html_parts = [f'<div class="extraction-card">']
1147
- html_parts.append(f'<div class="extraction-header"><h3>Full Prescription Extraction</h3>')
1148
- html_parts.append(f'<span class="legibility-badge">Legibility: {_confidence_badge(legibility)}</span></div>')
1149
-
1150
- for section_title, section_key, fields in sections:
1151
- html_parts.append(f'<div class="extraction-section">')
1152
- html_parts.append(f'<h4>{section_title}</h4>')
1153
- html_parts.append('<dl class="extraction-fields">')
1154
- for label, field_key in fields:
1155
- field = extraction.get(section_key, {}).get(field_key, {})
1156
- val = field.get("value")
1157
- conf = field.get("confidence", 0.0)
1158
- html_parts.append(
1159
- f'<dt>{label}</dt>'
1160
- f'<dd>{_display_value(val)} {_confidence_badge(conf)}</dd>'
1161
- )
1162
- html_parts.append('</dl></div>')
1163
-
1164
- # Add the Medications list section
1165
- html_parts.append(f'<div class="extraction-section">')
1166
- html_parts.append(f'<h4>All Extracted Medications</h4>')
1167
- meds = extraction.get("medications", [])
1168
- if meds:
1169
- html_parts.append('<table class="candidate-table" style="width: 100%; border-collapse: collapse; margin-top: 8px;">')
1170
- html_parts.append('<thead><tr><th>#</th><th>Drug Name</th><th>Dosage Form</th><th>Strength</th><th>Directions (Sig)</th></tr></thead>')
1171
- html_parts.append('<tbody>')
1172
- for idx, med in enumerate(meds, start=1):
1173
- dname = med.get("drug_name", {}).get("value") or "Unknown"
1174
- dname_conf = med.get("drug_name", {}).get("confidence", 0.0)
1175
-
1176
- form = med.get("dosage_form", {}).get("value") or "-"
1177
- strength = med.get("strength", {}).get("value") or "-"
1178
- sig = med.get("directions_sig", {}).get("value") or "-"
1179
-
1180
- html_parts.append(
1181
- f'<tr>'
1182
- f'<td>{idx}</td>'
1183
- f'<td><strong>{dname}</strong> {_confidence_badge(dname_conf)}</td>'
1184
- f'<td>{form}</td>'
1185
- f'<td>{strength}</td>'
1186
- f'<td><code>{sig}</code></td>'
1187
- f'</tr>'
1188
- )
1189
- html_parts.append('</tbody></table>')
1190
- else:
1191
- html_parts.append('<p style="color: var(--muted); font-style: italic;">No medications detected.</p>')
1192
- html_parts.append('</div>')
1193
-
1194
- # Add Prescription Details (refills, date issued, etc.)
1195
- html_parts.append(f'<div class="extraction-section">')
1196
- html_parts.append(f'<h4>Prescription Metadata</h4>')
1197
- html_parts.append('<dl class="extraction-fields">')
1198
- meta_fields = [
1199
- ("Date Issued", "date_of_issuance"),
1200
- ("Refills Authorized", "refills_authorized"),
1201
- ("Dispense As Written", "dispense_as_written"),
1202
- ]
1203
- for label, field_key in meta_fields:
1204
- field = extraction.get("prescription_details", {}).get(field_key, {})
1205
- val = field.get("value")
1206
- conf = field.get("confidence", 0.0)
1207
- html_parts.append(
1208
- f'<dt>{label}</dt>'
1209
- f'<dd>{_display_value(val)} {_confidence_badge(conf)}</dd>'
1210
- )
1211
- html_parts.append('</dl></div>')
1212
-
1213
- html_parts.append('</div>')
1214
- return "\n".join(html_parts)
1215
-
1216
-
1217
- def compliance_banner_html(extraction: dict[str, Any]) -> str:
1218
- """Show controlled substance compliance status."""
1219
- is_controlled = extraction.get("document_metadata", {}).get("is_controlled_substance", False)
1220
- drug_name = get_field_value(extraction, "prescription_details", "drug_name") or "Unknown"
1221
-
1222
- if not is_controlled:
1223
- return f"""
1224
- <div class="compliance-banner compliance-ok">
1225
- <strong>✓ Non-Controlled Substance</strong>
1226
- <span>Drug: {drug_name} — Patient address, prescriber DEA, and prescriber address are optional.</span>
1227
- </div>
1228
- """
1229
-
1230
- # Check for missing mandatory fields
1231
- missing = []
1232
- if not get_field_value(extraction, "patient_info", "address"):
1233
- missing.append("Patient Address")
1234
- if not get_field_value(extraction, "prescriber_info", "address"):
1235
- missing.append("Prescriber Address")
1236
- if not get_field_value(extraction, "prescriber_info", "dea_number"):
1237
- missing.append("DEA Number")
1238
-
1239
- if missing:
1240
- missing_list = ", ".join(missing)
1241
- return f"""
1242
- <div class="compliance-banner compliance-alert">
1243
- <strong>⚠ CONTROLLED SUBSTANCE — MISSING MANDATORY FIELDS</strong>
1244
- <span>Drug: {drug_name} — Missing: {missing_list}. Federal law requires these for DEA Schedule II-V drugs.</span>
1245
- </div>
1246
- """
1247
- else:
1248
- return f"""
1249
- <div class="compliance-banner compliance-warn">
1250
- <strong>⚡ Controlled Substance Detected</strong>
1251
- <span>Drug: {drug_name} — All mandatory fields (patient address, prescriber address, DEA) are present. Verify before dispensing.</span>
1252
- </div>
1253
- """
1254
-
1255
-
1256
  def translated_prescription_html(plan: dict[str, Any]) -> str:
1257
  rows = [
1258
  ("Medicine", plan.get("medicine_name") or "Not confirmed"),
@@ -1266,19 +615,12 @@ def translated_prescription_html(plan: dict[str, Any]) -> str:
1266
  ]
1267
  row_html = "".join(f"<dt>{label}</dt><dd>{value}</dd>" for label, value in rows)
1268
  status = plan.get("status", "needs_review").replace("_", " ").title()
1269
- flags = plan.get("flags", [])
1270
- flags_html = ""
1271
- if flags:
1272
- flags_html = '<div class="validation-flags">' + " ".join(
1273
- f'<span class="flag-pill">{f}</span>' for f in flags
1274
- ) + '</div>'
1275
  return f"""
1276
  <div class="translated-card">
1277
  <div class="translated-head">
1278
  <h3>Translated Prescription</h3>
1279
  <span class="status-pill">{status}</span>
1280
  </div>
1281
- {flags_html}
1282
  <dl class="details translated-details">{row_html}</dl>
1283
  <p class="fine-print">Generated from OCR text and retrieval candidates. Confirm before dispensing.</p>
1284
  </div>
@@ -1328,50 +670,19 @@ def ocr_compare_html(
1328
  plan: dict[str, Any],
1329
  ) -> str:
1330
  corrected = display_name if plan.get("status") == "validated" else f"Needs review: {display_name}"
1331
- # Truncate long OCR text for display
1332
- ocr_display = ocr_text[:150] + "..." if len(ocr_text) > 150 else ocr_text
1333
  return f"""
1334
  <div class="compare-grid">
1335
- <div><span>Raw OCR Output</span><strong>{ocr_display}</strong></div>
1336
  <div><span>AI Corrected</span><strong>{corrected}</strong></div>
1337
  <div><span>Canonical</span><strong>{medicine['name'] if plan.get('status') == 'validated' else 'Not confirmed'}</strong></div>
1338
  </div>
1339
  """
1340
 
1341
 
1342
- # ── OCR Function (Pass 1: MiniCPM-V full text extraction) ────────────────────
1343
-
1344
- def _run_minicpm_single_pass(pil_image: Image.Image, prompt: str, max_tokens: int = 512) -> str:
1345
- """Run a single MiniCPM-V inference pass with the given prompt."""
1346
- global OCR_MODEL, OCR_TOKENIZER
1347
-
1348
- messages = [{"role": "user", "content": [pil_image.convert("RGB"), prompt]}]
1349
- kwargs = {
1350
- "image": None,
1351
- "msgs": messages,
1352
- "tokenizer": OCR_TOKENIZER,
1353
- "sampling": False,
1354
- "stream": False,
1355
- "max_new_tokens": max_tokens,
1356
- "enable_thinking": False,
1357
- "temperature": 0.0,
1358
- "top_p": 0.1,
1359
- }
1360
- try:
1361
- raw = OCR_MODEL.chat(**kwargs)
1362
- except TypeError:
1363
- kwargs.pop("temperature", None)
1364
- kwargs.pop("top_p", None)
1365
- raw = OCR_MODEL.chat(**kwargs)
1366
-
1367
- if not isinstance(raw, str):
1368
- raw = "".join(list(raw))
1369
- return raw.strip()
1370
-
1371
-
1372
- def run_minicpm_ocr(pil_image: Image.Image) -> tuple[str, Image.Image]:
1373
- """Multi-pass segment-and-crop OCR: Locate handwriting, draw bounding boxes, crop and perform targeted OCR."""
1374
  global OCR_MODEL, OCR_TOKENIZER
 
 
1375
 
1376
  try:
1377
  import torch
@@ -1390,117 +701,29 @@ def run_minicpm_ocr(pil_image: Image.Image) -> tuple[str, Image.Image]:
1390
  if torch.cuda.is_available():
1391
  OCR_MODEL = OCR_MODEL.cuda()
1392
 
1393
- # Pass 1A: Detect text regions using OpenCV image processing (horizontal line-removal + contour extraction)
1394
- # This acts as a robust engineering layout analysis (not just prompt engineering grounding)
1395
- import numpy as np
1396
- import cv2
1397
-
1398
- boxes = []
 
 
 
 
 
 
1399
  try:
1400
- # Convert PIL image to OpenCV grayscale
1401
- img_np = np.array(pil_image.convert("RGB"))
1402
- img_bgr = cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR)
1403
- gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
1404
-
1405
- # Otsu's binarization
1406
- _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
1407
-
1408
- h_img, w_img = gray.shape
1409
-
1410
- # Detect and remove printed table/grid lines to isolate text
1411
- h_size = max(15, int(w_img * 0.04))
1412
- v_size = max(15, int(h_img * 0.04))
1413
-
1414
- horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (h_size, 1))
1415
- detect_horizontal = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
1416
-
1417
- vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, v_size))
1418
- detect_vertical = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=2)
1419
-
1420
- clean = cv2.subtract(thresh, detect_horizontal)
1421
- clean = cv2.subtract(clean, detect_vertical)
1422
-
1423
- # Dilation to merge characters horizontally into cohesive text blocks
1424
- d_w = max(5, int(w_img * 0.03))
1425
- d_h = max(2, int(h_img * 0.005))
1426
- kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (d_w, d_h))
1427
- dilated = cv2.dilate(clean, kernel, iterations=2)
1428
-
1429
- # Find external contours
1430
- contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
1431
-
1432
- for c in contours:
1433
- x, y, w, h = cv2.boundingRect(c)
1434
- # Filter contours to target horizontal text lines
1435
- if w > w_img * 0.04 and h > h_img * 0.01 and w < w_img * 0.95 and h < h_img * 0.2:
1436
- if w > h * 1.1:
1437
- # Convert to 0-1000 scale compatible with drawing/cropping code
1438
- ymin_n = int(y / h_img * 1000)
1439
- xmin_n = int(x / w_img * 1000)
1440
- ymax_n = int((y + h) / h_img * 1000)
1441
- xmax_n = int((x + w) / w_img * 1000)
1442
- boxes.append((ymin_n, xmin_n, ymax_n, xmax_n))
1443
-
1444
- # Sort boxes top-to-bottom
1445
- boxes.sort(key=lambda b: b[0])
1446
- except Exception as exc:
1447
- print(f"OpenCV layout extraction error: {exc}")
1448
- boxes = []
1449
-
1450
- width, height = pil_image.size
1451
- cropped_ocr_results = []
1452
-
1453
- # Create annotated image with green bounding boxes
1454
- annotated_image = pil_image.copy()
1455
- draw = ImageDraw.Draw(annotated_image)
1456
-
1457
- for i, (ymin_n, xmin_n, ymax_n, xmax_n) in enumerate(boxes, start=1):
1458
- ymin = int(ymin_n * height / 1000)
1459
- xmin = int(xmin_n * width / 1000)
1460
- ymax = int(ymax_n * height / 1000)
1461
- xmax = int(xmax_n * width / 1000)
1462
-
1463
- # Draw bounding box
1464
- draw.rectangle([xmin, ymin, xmax, ymax], outline="#10b981", width=4)
1465
- draw.rectangle([xmin, max(0, ymin - 20), xmin + 45, ymin], fill="#10b981")
1466
- draw.text((xmin + 5, max(0, ymin - 18)), f"Rx {i}", fill="white")
1467
-
1468
- # Crop region with 15px padding
1469
- padding = 15
1470
- crop_box = (
1471
- max(0, xmin - padding),
1472
- max(0, ymin - padding),
1473
- min(width, xmax + padding),
1474
- min(height, ymax + padding)
1475
- )
1476
- try:
1477
- cropped_img = pil_image.crop(crop_box)
1478
- crop_prompt = "Transcribe the handwritten clinical drug name, strength, or directions in this image crop."
1479
- crop_ocr = _run_minicpm_single_pass(cropped_img, crop_prompt, max_tokens=128)
1480
- if crop_ocr.strip():
1481
- cropped_ocr_results.append(f"{i}. {crop_ocr.strip()}")
1482
- except Exception:
1483
- pass
1484
-
1485
- # Compile the drug pass text
1486
- if cropped_ocr_results:
1487
- drug_pass = "\n".join(cropped_ocr_results)
1488
- else:
1489
- # Fallback: run standard focused drug pass on the whole image
1490
- drug_pass = _run_minicpm_single_pass(pil_image, DRUG_FOCUSED_PROMPT, max_tokens=512)
1491
-
1492
- # Pass 1B: Full prescription text extraction
1493
- full_pass = _run_minicpm_single_pass(pil_image, FULL_OCR_PROMPT, max_tokens=1024)
1494
-
1495
- combined = f"""=== DRUG EXTRACTION (focused pass) ===
1496
- {drug_pass}
1497
-
1498
- === FULL PRESCRIPTION TEXT ===
1499
- {full_pass}"""
1500
- return combined, annotated_image
1501
 
 
 
 
1502
 
1503
- # ── Main Analysis Pipeline ───────────────────────────────────────────────────
1504
 
1505
  @spaces.GPU(duration=300)
1506
  def analyze_prescription(image, progress=gr.Progress()):
@@ -1508,142 +731,54 @@ def analyze_prescription(image, progress=gr.Progress()):
1508
  if image is None:
1509
  raise gr.Error("Upload or capture a prescription image first.")
1510
 
1511
- # Step 1: Upload
1512
- progress(0.10, desc="Prescription uploaded")
1513
- time.sleep(0.1)
 
 
 
1514
 
1515
- # Step 2: MiniCPM-V full text OCR
1516
- progress(0.20, desc="MiniCPM-V multi-pass OCR (drug-focused + full text)...")
1517
- ocr_text, annotated_image = run_minicpm_ocr(image)
1518
  unload_ocr_model()
1519
 
1520
- # Step 3: Nemotron structuring
1521
- progress(0.45, desc="Nemotron structuring extracted text into clinical JSON...")
1522
- extraction = structure_ocr_with_nemotron(ocr_text)
1523
-
1524
- # Step 4: Retrieval & Step 5: Validation (for each medication)
1525
- progress(0.65, desc="Retrieval & validation of all medications...")
1526
-
1527
- medications = extraction.get("medications", [])
1528
- if not medications:
1529
- medications = [{
1530
- "drug_name": _field(clean_prediction(ocr_text), 0.3),
1531
- "strength": _field(None, 0.0),
1532
- "dosage_form": _field(None, 0.0),
1533
- "quantity": _field(None, 0.0),
1534
- "directions_sig": _field(None, 0.0)
1535
- }]
1536
-
1537
- med_results = []
1538
- for i, med_data in enumerate(medications):
1539
- dname = med_data["drug_name"].get("value") or "Unknown"
1540
- s_hint = med_data["strength"].get("value")
1541
-
1542
- # Retrieval
1543
- medicine, candidates, display_name, confidence = find_medicine_from_ocr(dname, s_hint)
1544
-
1545
- # Build temp extraction for validation of this specific drug
1546
- temp_extraction = {
1547
- **extraction,
1548
- "prescription_details": {
1549
- "date_of_issuance": extraction["prescription_details"].get("date_of_issuance", _field()),
1550
- "drug_name": med_data["drug_name"],
1551
- "strength": med_data["strength"],
1552
- "dosage_form": med_data["dosage_form"],
1553
- "quantity": med_data["quantity"],
1554
- "directions_sig": med_data["directions_sig"],
1555
- "refills_authorized": extraction["prescription_details"].get("refills_authorized", _field()),
1556
- "dispense_as_written": extraction["prescription_details"].get("dispense_as_written", _field(None)),
1557
- }
1558
- }
1559
-
1560
- # Validation
1561
- plan = validate_with_nemotron(ocr_text, temp_extraction, medicine, display_name, confidence, candidates)
1562
-
1563
- accepted = plan.get("status") == "validated" and confidence >= ACCEPTANCE_THRESHOLD
1564
- inventory = get_inventory(medicine)
1565
-
1566
- med_results.append({
1567
- "drug_name": dname,
1568
- "display_name": display_name,
1569
- "medicine": medicine,
1570
- "candidates": candidates,
1571
- "confidence": confidence,
1572
- "plan": plan,
1573
- "inventory": inventory,
1574
- "accepted": accepted,
1575
- "ocr_text": ocr_text,
1576
- })
1577
-
1578
  unload_nemotron_model()
1579
- progress(1.00, desc="Result prepared")
1580
-
1581
- active_idx = 0
1582
- active = med_results[active_idx]
1583
-
1584
- image_path = resolve_asset_path(active["medicine"].get("image_path"))
1585
- package_image_val = str(image_path) if image_path and active["accepted"] else None
1586
-
1587
  state = {
1588
- "medications": med_results,
1589
- "active_index": active_idx,
1590
- "patient_info": extraction.get("patient_info"),
1591
- "prescriber_info": extraction.get("prescriber_info"),
1592
- "document_metadata": extraction.get("document_metadata")
 
1593
  }
1594
  SESSION_SEARCHES += 1
1595
-
1596
- choices = [f"{idx+1}. {m['drug_name']} ({m['display_name']})" for idx, m in enumerate(med_results)]
1597
 
1598
  return (
1599
  load_kpi_metrics(SESSION_SEARCHES),
1600
- pipeline_html(5, active["plan"].get("status", "needs_review")),
1601
- compliance_banner_html(extraction),
1602
- extraction_card_html(extraction),
1603
- medicine_details_html(active["medicine"], active["inventory"], ocr_text, active["display_name"], active["confidence"], active["plan"]),
1604
- annotated_image,
1605
- package_image_val,
1606
- package_status_html(active["inventory"], active["accepted"]),
1607
- confidence_gauge(active["confidence"]),
1608
- candidates_html(active["candidates"]),
1609
- ocr_compare_html(active["medicine"], ocr_text, active["display_name"], active["confidence"], active["plan"]),
1610
- translated_prescription_html(active["plan"]),
1611
  gr.update(visible=True),
1612
- gr.update(visible=True, interactive=active["accepted"]),
1613
- gr.update(choices=choices, value=choices[active_idx], visible=len(choices) > 1),
1614
- state,
1615
- )
1616
-
1617
-
1618
- def select_medication(selected_label: str, state: dict[str, Any] | None):
1619
- if not state or "medications" not in state:
1620
- return [gr.update() for _ in range(9)]
1621
-
1622
- active_idx = 0
1623
- for i, m in enumerate(state["medications"]):
1624
- label = f"{i+1}. {m['drug_name']} ({m['display_name']})"
1625
- if label == selected_label:
1626
- active_idx = i
1627
- break
1628
-
1629
- state["active_index"] = active_idx
1630
- active = state["medications"][active_idx]
1631
-
1632
- image_path = resolve_asset_path(active["medicine"].get("image_path"))
1633
- package_image_val = str(image_path) if image_path and active["accepted"] else None
1634
-
1635
- inventory = active["inventory"]
1636
- ocr_text = active.get("ocr_text", "")
1637
-
1638
- return (
1639
- medicine_details_html(active["medicine"], inventory, ocr_text, active["display_name"], active["confidence"], active["plan"]),
1640
- package_image_val,
1641
- package_status_html(inventory, active["accepted"]),
1642
- confidence_gauge(active["confidence"]),
1643
- candidates_html(active["candidates"]),
1644
- ocr_compare_html(active["medicine"], ocr_text, active["display_name"], active["confidence"], active["plan"]),
1645
- translated_prescription_html(active["plan"]),
1646
- gr.update(visible=True, interactive=active["accepted"]),
1647
  state,
1648
  )
1649
 
@@ -1651,9 +786,7 @@ def select_medication(selected_label: str, state: dict[str, Any] | None):
1651
  def open_locator(state: dict[str, Any] | None):
1652
  if not state:
1653
  raise gr.Error("Analyze a prescription before opening the shelf scanner.")
1654
- active_idx = state.get("active_index", 0)
1655
- active = state["medications"][active_idx]
1656
- return gr.update(visible=True), f"Opening shelf scanner for {active['display_name']} on shelf {active['shelf']}."
1657
 
1658
 
1659
  def locate_on_shelf(shelf_image, state: dict[str, Any] | None):
@@ -1662,9 +795,6 @@ def locate_on_shelf(shelf_image, state: dict[str, Any] | None):
1662
  if shelf_image is None:
1663
  raise gr.Error("Upload or capture a shelf image first.")
1664
 
1665
- active_idx = state.get("active_index", 0)
1666
- active = state["medications"][active_idx]
1667
-
1668
  image = shelf_image.convert("RGB")
1669
  width, height = image.size
1670
  box = (
@@ -1680,16 +810,16 @@ def locate_on_shelf(shelf_image, state: dict[str, Any] | None):
1680
  outline="#10b981",
1681
  )
1682
  draw.rectangle((box[0], max(0, box[1] - 34), box[2], box[1]), fill="#10b981")
1683
- draw.text((box[0] + 10, max(2, box[1] - 27)), active["display_name"], fill="white")
1684
 
1685
  info = f"""
1686
  <div class="result-card compact">
1687
  <h3>Shelf Result</h3>
1688
  <dl class="details">
1689
- <dt>Found</dt><dd>{active['display_name']}</dd>
1690
- <dt>Canonical</dt><dd>{active['medicine']['name']}</dd>
1691
- <dt>Shelf</dt><dd>{active['shelf']}</dd>
1692
- <dt>Row</dt><dd>{active['row']}</dd>
1693
  <dt>Confidence</dt><dd>95%</dd>
1694
  </dl>
1695
  </div>
@@ -1899,94 +1029,6 @@ CSS = """
1899
  font-size: 13px;
1900
  }
1901
  .compact { margin-top: 0; }
1902
-
1903
- /* ── Extraction Card Styles ──────────────────────────────────────────────── */
1904
- .extraction-card {
1905
- border: 1px solid var(--line);
1906
- background: #ffffff;
1907
- border-radius: 8px;
1908
- padding: 20px;
1909
- margin-top: 12px;
1910
- box-shadow: 0 10px 26px rgba(15, 23, 42, 0.045);
1911
- }
1912
- .extraction-header {
1913
- display: flex;
1914
- justify-content: space-between;
1915
- align-items: center;
1916
- margin-bottom: 16px;
1917
- }
1918
- .extraction-header h3 { color: var(--ink) !important; margin: 0; font-size: 20px; }
1919
- .legibility-badge { font-size: 13px; color: var(--muted); }
1920
- .extraction-section {
1921
- border-top: 1px solid var(--line);
1922
- padding-top: 14px;
1923
- margin-top: 14px;
1924
- }
1925
- .extraction-section h4 {
1926
- color: var(--ink) !important;
1927
- margin: 0 0 10px;
1928
- font-size: 15px;
1929
- font-weight: 700;
1930
- }
1931
- .extraction-fields {
1932
- display: grid;
1933
- grid-template-columns: 160px 1fr;
1934
- gap: 6px 14px;
1935
- margin: 0;
1936
- }
1937
- .extraction-fields dt { color: var(--muted) !important; font-size: 13px; }
1938
- .extraction-fields dd { color: var(--ink) !important; margin: 0; font-weight: 600; font-size: 14px; }
1939
-
1940
- /* ── Compliance Banner Styles ────────────────────────────────────────────── */
1941
- .compliance-banner {
1942
- border-radius: 8px;
1943
- padding: 14px 18px;
1944
- margin-bottom: 12px;
1945
- display: flex;
1946
- flex-direction: column;
1947
- gap: 4px;
1948
- }
1949
- .compliance-banner strong { font-size: 14px; }
1950
- .compliance-banner span { font-size: 13px; }
1951
- .compliance-ok {
1952
- background: #ecfdf5;
1953
- border: 1px solid #86efac;
1954
- color: #065f46;
1955
- }
1956
- .compliance-ok strong { color: #065f46; }
1957
- .compliance-ok span { color: #047857; }
1958
- .compliance-warn {
1959
- background: #fffbeb;
1960
- border: 1px solid #fcd34d;
1961
- color: #92400e;
1962
- }
1963
- .compliance-warn strong { color: #92400e; }
1964
- .compliance-warn span { color: #b45309; }
1965
- .compliance-alert {
1966
- background: #fef2f2;
1967
- border: 1px solid #fca5a5;
1968
- color: #991b1b;
1969
- }
1970
- .compliance-alert strong { color: #991b1b; }
1971
- .compliance-alert span { color: #b91c1c; }
1972
-
1973
- /* ── Validation Flags ────────────────────────────────────────────────────── */
1974
- .validation-flags {
1975
- display: flex;
1976
- flex-wrap: wrap;
1977
- gap: 6px;
1978
- margin-bottom: 10px;
1979
- }
1980
- .flag-pill {
1981
- background: #fef3c7;
1982
- border: 1px solid #fcd34d;
1983
- color: #92400e;
1984
- border-radius: 999px;
1985
- padding: 3px 10px;
1986
- font-size: 11px;
1987
- font-weight: 700;
1988
- }
1989
-
1990
  .gradio-container button.primary,
1991
  .gradio-container button[variant="primary"] {
1992
  background: var(--green) !important;
@@ -2003,7 +1045,6 @@ CSS = """
2003
  .powered { text-align: left; margin-top: 10px; }
2004
  .metric-row, .flow, .stock-card, .compare-grid { grid-template-columns: 1fr; }
2005
  .details { grid-template-columns: 1fr; }
2006
- .extraction-fields { grid-template-columns: 1fr; }
2007
  .translated-head { align-items: flex-start; flex-direction: column; }
2008
  }
2009
  """
@@ -2048,22 +1089,12 @@ with gr.Blocks(title="PharmaCopilot") as demo:
2048
  pipeline = gr.HTML(pipeline_html(0))
2049
 
2050
  with gr.Group(visible=False, elem_classes=["app-shell"]) as result_section:
2051
- gr.Markdown("## Prescription Analysis Result")
2052
- medication_select = gr.Dropdown(
2053
- label="Select Medication to View/Verify",
2054
- choices=[],
2055
- interactive=True,
2056
- visible=False,
2057
- )
2058
- compliance_banner = gr.HTML()
2059
- extraction_card = gr.HTML()
2060
  with gr.Row():
2061
- with gr.Column(scale=4):
2062
  details = gr.HTML()
2063
  gauge = gr.Plot(label="Confidence Gauge")
2064
- with gr.Column(scale=4):
2065
- segmented_image = gr.Image(label="Segmented Bounding Boxes", height=360)
2066
- with gr.Column(scale=4):
2067
  package_image = gr.Image(label="Packaging Image", height=360)
2068
  stock = gr.HTML()
2069
  with gr.Accordion("Top Candidates", open=False):
@@ -2095,10 +1126,7 @@ with gr.Blocks(title="PharmaCopilot") as demo:
2095
  outputs=[
2096
  live_metrics,
2097
  pipeline,
2098
- compliance_banner,
2099
- extraction_card,
2100
  details,
2101
- segmented_image,
2102
  package_image,
2103
  stock,
2104
  gauge,
@@ -2107,22 +1135,6 @@ with gr.Blocks(title="PharmaCopilot") as demo:
2107
  translated_prescription,
2108
  result_section,
2109
  locate_btn,
2110
- medication_select,
2111
- state,
2112
- ],
2113
- )
2114
- medication_select.change(
2115
- select_medication,
2116
- inputs=[medication_select, state],
2117
- outputs=[
2118
- details,
2119
- package_image,
2120
- stock,
2121
- gauge,
2122
- candidates,
2123
- comparison,
2124
- translated_prescription,
2125
- locate_btn,
2126
  state,
2127
  ],
2128
  )
 
2
 
3
  import json
4
  import os
 
5
  import unicodedata
6
  import time
7
  from difflib import SequenceMatcher, get_close_matches
 
45
  return DATA_DIR / relative
46
 
47
 
48
+ MEDICINES_PATH = data_path("medicines_master.json")
49
+ BRAND_MAP_PATH = data_path("training/bd_brand_to_generic.json")
50
  INVENTORY_PATH = data_path("inventory.json")
51
 
52
  MODEL_ID = os.getenv("PHARMACOPILOT_MODEL_ID", "openbmb/MiniCPM-V-4_5")
53
+ LIVE_GPU_OCR = os.getenv("PHARMACOPILOT_LIVE_GPU_OCR", "1").lower() not in {"0", "false", "no"}
54
+ LIVE_NEMOTRON = os.getenv("PHARMACOPILOT_LIVE_NEMOTRON", "1").lower() not in {"0", "false", "no"}
55
+ NEMOTRON_MODEL_ID = os.getenv("NEMOTRON_MODEL_ID", "nvidia/Nemotron-Mini-4B-Instruct")
56
  NVIDIA_API_KEY = os.getenv("NVIDIA_API_KEY", "")
57
  NVIDIA_BASE_URL = os.getenv("NVIDIA_BASE_URL", "https://integrate.api.nvidia.com/v1")
58
  NVIDIA_NIM_MODEL = os.getenv("NVIDIA_NIM_MODEL", "nvidia/nvidia-nemotron-nano-9b-v2")
59
+ DEMO_OCR_TEXT = "Neuoxen"
60
+ DEMO_PROMPT = "Read the handwritten medicine name in the image. Return only the text."
61
  ACCEPTANCE_THRESHOLD = int(os.getenv("PHARMACOPILOT_ACCEPTANCE_THRESHOLD", "75"))
62
  OCR_MODEL = None
63
  OCR_TOKENIZER = None
64
  NEMOTRON_MODEL = None
65
  NEMOTRON_TOKENIZER = None
66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
  def load_json(path: Path, fallback: Any) -> Any:
69
  if not path.exists():
 
108
  return " ".join(text.strip().lower().split())
109
 
110
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  def clean_prediction(raw_prediction: str) -> str:
 
112
  text = str(raw_prediction or "").strip()
113
  text = text.replace("\r", "\n")
114
  text = text.split("\n")[0].strip() if "\n" in text else text
 
139
  return brands[0] if brands else medicine["name"]
140
 
141
 
142
+ def find_medicine_from_ocr(ocr_text: str) -> tuple[dict[str, Any], list[dict[str, Any]], str, int]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  query = normalize(ocr_text)
144
+ corrected_query = query
145
+ canonical = BD_BRAND_TO_GENERIC.get(corrected_query, corrected_query)
146
+ direct_medicine = MED_BY_NAME.get(normalize(canonical))
147
 
148
+ candidate_names = set()
149
+ for med in MEDICINES:
150
+ candidate_names.add(med["name"])
151
+ candidate_names.add(med.get("generic_name") or med["name"])
152
+ candidate_names.update(med.get("brand_names") or [])
153
+ candidate_names.update(BD_BRAND_TO_GENERIC.keys())
154
 
155
  scored = []
156
+ for name in candidate_names:
157
+ score = SequenceMatcher(None, query, normalize(name)).ratio()
158
  if score > 0.35:
159
+ mapped = BD_BRAND_TO_GENERIC.get(normalize(name), normalize(name))
160
+ med = MED_BY_NAME.get(mapped) or MED_BY_NAME.get(normalize(name))
161
+ if med:
162
+ scored.append({"label": name, "medicine": med, "score": score})
 
163
 
164
  scored.sort(key=lambda item: item["score"], reverse=True)
165
+ if direct_medicine:
166
+ medicine = direct_medicine
167
  display_name = label_for_medicine(ocr_text, medicine)
168
  primary_score = 0.97
169
  elif scored:
 
172
  display_name = best["label"]
173
  primary_score = best["score"]
174
  else:
175
+ medicine = MEDICINES[0]
176
  display_name = clean_prediction(ocr_text) or "Needs review"
177
  primary_score = 0.0
178
 
 
 
 
 
 
 
 
 
 
 
 
179
  top = [{"label": display_name, "medicine": medicine, "score": primary_score}]
180
  seen_ids = {medicine["id"]}
 
 
181
  for item in scored:
182
  if item["medicine"]["id"] in seen_ids:
183
  continue
 
190
  fallback_name = get_close_matches(query, list(BD_BRAND_TO_GENERIC.keys()), n=1)
191
  if fallback_name:
192
  mapped = BD_BRAND_TO_GENERIC[fallback_name[0]]
193
+ med = MED_BY_NAME.get(mapped)
194
+ if med and med["id"] not in seen_ids:
195
+ top.append({"label": fallback_name[0], "medicine": med, "score": 0.62})
196
+ seen_ids.add(med["id"])
197
  continue
198
  break
199
 
200
+ confidence = max(0, min(99, round(primary_score * 100)))
201
  return medicine, top, display_name, confidence
202
 
203
 
 
279
  return json.loads(cleaned)
280
 
281
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
282
  def build_validation_prompt(
283
  ocr_text: str,
 
284
  medicine: dict[str, Any],
285
  display_name: str,
286
  confidence: int,
287
  retrieval_candidates: list[dict[str, Any]],
288
  ) -> str:
289
  validation_payload = {
290
+ "ocr_text": ocr_text,
 
 
 
 
 
291
  "retrieved_display_name": display_name,
292
  "retrieved_canonical_name": medicine.get("name", "Unknown"),
293
  "retrieval_confidence": confidence,
294
+ "strength": first_strength(medicine.get("strength", "")),
295
  "category": medicine.get("category", "Unknown"),
296
  "top_candidates": [
297
  {
 
302
  for item in retrieval_candidates[:3]
303
  ],
304
  }
305
+ return f"""
306
+ You are a pharmacy prescription validation assistant.
 
 
 
 
 
 
 
 
 
 
 
 
307
 
308
  Input JSON:
309
  {json.dumps(validation_payload, ensure_ascii=False)}
310
 
311
  Task:
312
+ 1. Decide whether the retrieved medicine is safe to accept.
313
  2. Translate the prescription into a clean pharmacy instruction row.
314
+ 3. Do not invent dose/timing/duration if it is not visible or inferable.
315
  4. If OCR and retrieved medicine clearly disagree, return needs_review.
 
 
316
 
317
  Return ONLY valid JSON with these keys:
318
  status: one of validated, needs_review
 
326
  instructions
327
  validation_note
328
  ocr_text
 
329
  """
330
 
331
 
 
353
  messages=[{"role": "user", "content": prompt}],
354
  temperature=0,
355
  top_p=1,
356
+ max_tokens=320,
357
  )
358
  content = response.choices[0].message.content or ""
359
  plan = extract_json_object(content)
 
384
  )
385
 
386
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
387
  def validate_with_nemotron(
388
  ocr_text: str,
 
389
  medicine: dict[str, Any],
390
  display_name: str,
391
  confidence: int,
 
393
  ) -> dict[str, Any]:
394
  global NEMOTRON_MODEL, NEMOTRON_TOKENIZER
395
 
396
+ if not LIVE_NEMOTRON:
397
+ return fallback_prescription_plan(
398
+ ocr_text, medicine, display_name, confidence, "Local Nemotron validation is disabled"
399
+ )
400
+
401
+ prompt = build_validation_prompt(ocr_text, medicine, display_name, confidence, retrieval_candidates)
402
  try:
403
+ import torch
404
+ from transformers import AutoModelForCausalLM, AutoTokenizer
405
+
406
+ if NEMOTRON_MODEL is None or NEMOTRON_TOKENIZER is None:
407
+ NEMOTRON_TOKENIZER = AutoTokenizer.from_pretrained(NEMOTRON_MODEL_ID, trust_remote_code=True)
408
+ NEMOTRON_MODEL = AutoModelForCausalLM.from_pretrained(
409
+ NEMOTRON_MODEL_ID,
410
+ trust_remote_code=True,
411
+ torch_dtype=torch.bfloat16,
412
+ device_map="auto",
413
+ ).eval()
414
+
415
+ messages = [{"role": "user", "content": prompt}]
416
+ if hasattr(NEMOTRON_TOKENIZER, "apply_chat_template"):
417
+ input_ids = NEMOTRON_TOKENIZER.apply_chat_template(
418
+ messages,
419
+ add_generation_prompt=True,
420
+ return_tensors="pt",
421
+ )
422
+ else:
423
+ input_ids = NEMOTRON_TOKENIZER(prompt, return_tensors="pt").input_ids
424
+
425
+ device = next(NEMOTRON_MODEL.parameters()).device
426
+ input_ids = input_ids.to(device)
427
+ with torch.inference_mode():
428
+ output_ids = NEMOTRON_MODEL.generate(
429
+ input_ids,
430
+ do_sample=False,
431
+ temperature=0.0,
432
+ top_p=1.0,
433
+ max_new_tokens=320,
434
+ pad_token_id=NEMOTRON_TOKENIZER.eos_token_id,
435
+ )
436
+ generated = output_ids[0][input_ids.shape[-1] :]
437
+ content = NEMOTRON_TOKENIZER.decode(generated, skip_special_tokens=True).strip()
438
  plan = extract_json_object(content)
439
  if plan.get("status") not in {"validated", "needs_review"}:
440
  plan["status"] = "needs_review"
 
474
  elif fallback_path.exists():
475
  text = fallback_path.read_text(encoding="utf-8", errors="ignore")
476
  if "ocr_accuracy" in text:
477
+ # Keep a conservative fallback tied to the checked-in report values.
478
  ocr_accuracy = 0.37888446215139443
479
  retrieval_accuracy = 0.6055776892430279
480
 
 
530
  }.get(validation_status, "Nemotron Review")
531
  steps = [
532
  ("Prescription", "uploaded"),
533
+ ("MiniCPM OCR", "ran on image"),
 
534
  ("Retrieval Engine", "ranked candidates"),
535
  (validation_label, "returned a decision"),
536
+ ("Pharmacy View", "prepared"),
537
  ]
538
  cards = []
539
  logs = []
 
581
  )
582
  return f"""
583
  <div class="result-card">
584
+ <h3>Prescription Details</h3>
585
  <dl class="details">
586
  <dt>Medicine</dt><dd>{medicine_label}</dd>
587
  <dt>Generic</dt><dd>{generic_label}</dd>
 
593
  </dl>
594
  <div class="explain">
595
  <h4>AI Explanation</h4>
596
+ <p><b>OCR detected:</b> "{ocr_text}"</p>
597
  <p><b>Retrieved:</b> {display_name} ({medicine.get('name', 'Unknown')})</p>
598
  <p><b>Validation:</b> {validation_label}</p>
599
  <p><b>Inventory:</b> {inventory_label}</p>
 
602
  """
603
 
604
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
605
  def translated_prescription_html(plan: dict[str, Any]) -> str:
606
  rows = [
607
  ("Medicine", plan.get("medicine_name") or "Not confirmed"),
 
615
  ]
616
  row_html = "".join(f"<dt>{label}</dt><dd>{value}</dd>" for label, value in rows)
617
  status = plan.get("status", "needs_review").replace("_", " ").title()
 
 
 
 
 
 
618
  return f"""
619
  <div class="translated-card">
620
  <div class="translated-head">
621
  <h3>Translated Prescription</h3>
622
  <span class="status-pill">{status}</span>
623
  </div>
 
624
  <dl class="details translated-details">{row_html}</dl>
625
  <p class="fine-print">Generated from OCR text and retrieval candidates. Confirm before dispensing.</p>
626
  </div>
 
670
  plan: dict[str, Any],
671
  ) -> str:
672
  corrected = display_name if plan.get("status") == "validated" else f"Needs review: {display_name}"
 
 
673
  return f"""
674
  <div class="compare-grid">
675
+ <div><span>OCR Output</span><strong>{ocr_text}</strong></div>
676
  <div><span>AI Corrected</span><strong>{corrected}</strong></div>
677
  <div><span>Canonical</span><strong>{medicine['name'] if plan.get('status') == 'validated' else 'Not confirmed'}</strong></div>
678
  </div>
679
  """
680
 
681
 
682
+ def run_minicpm_ocr(pil_image: Image.Image) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
683
  global OCR_MODEL, OCR_TOKENIZER
684
+ if not LIVE_GPU_OCR:
685
+ return DEMO_OCR_TEXT
686
 
687
  try:
688
  import torch
 
701
  if torch.cuda.is_available():
702
  OCR_MODEL = OCR_MODEL.cuda()
703
 
704
+ messages = [{"role": "user", "content": [pil_image.convert("RGB"), DEMO_PROMPT]}]
705
+ kwargs = {
706
+ "image": None,
707
+ "msgs": messages,
708
+ "tokenizer": OCR_TOKENIZER,
709
+ "sampling": False,
710
+ "stream": False,
711
+ "max_new_tokens": 20,
712
+ "enable_thinking": False,
713
+ "temperature": 0.0,
714
+ "top_p": 0.1,
715
+ }
716
  try:
717
+ raw_prediction = OCR_MODEL.chat(**kwargs)
718
+ except TypeError:
719
+ kwargs.pop("temperature", None)
720
+ kwargs.pop("top_p", None)
721
+ raw_prediction = OCR_MODEL.chat(**kwargs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
722
 
723
+ if not isinstance(raw_prediction, str):
724
+ raw_prediction = "".join(list(raw_prediction))
725
+ return clean_prediction(raw_prediction) or raw_prediction.strip()
726
 
 
727
 
728
  @spaces.GPU(duration=300)
729
  def analyze_prescription(image, progress=gr.Progress()):
 
731
  if image is None:
732
  raise gr.Error("Upload or capture a prescription image first.")
733
 
734
+ for pct, label in [
735
+ (0.20, "Prescription uploaded"),
736
+ (0.35, "MiniCPM OCR reading handwriting"),
737
+ ]:
738
+ progress(pct, desc=label)
739
+ time.sleep(0.15)
740
 
741
+ ocr_text = run_minicpm_ocr(image)
 
 
742
  unload_ocr_model()
743
 
744
+ for pct, label in [
745
+ (0.70, "Retrieval search over medicine aliases"),
746
+ (0.88, "Nemotron prescription validation"),
747
+ (1.00, "Result prepared"),
748
+ ]:
749
+ progress(pct, desc=label)
750
+ time.sleep(0.25)
751
+
752
+ medicine, candidates, display_name, confidence = find_medicine_from_ocr(ocr_text)
753
+ plan = validate_with_nemotron(ocr_text, medicine, display_name, confidence, candidates)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
754
  unload_nemotron_model()
755
+ accepted = plan.get("status") == "validated" and confidence >= ACCEPTANCE_THRESHOLD
756
+ inventory = get_inventory(medicine)
757
+ image_path = resolve_asset_path(medicine.get("image_path"))
758
+ package_image = str(image_path) if image_path and accepted else None
759
+
 
 
 
760
  state = {
761
+ "medicine_id": medicine["id"],
762
+ "medicine_name": medicine["name"],
763
+ "display_name": display_name,
764
+ "accepted": accepted,
765
+ "shelf": inventory["shelf"],
766
+ "row": inventory["row"],
767
  }
768
  SESSION_SEARCHES += 1
 
 
769
 
770
  return (
771
  load_kpi_metrics(SESSION_SEARCHES),
772
+ pipeline_html(5, plan.get("status", "needs_review")),
773
+ medicine_details_html(medicine, inventory, ocr_text, display_name, confidence, plan),
774
+ package_image,
775
+ package_status_html(inventory, accepted),
776
+ confidence_gauge(confidence),
777
+ candidates_html(candidates),
778
+ ocr_compare_html(medicine, ocr_text, display_name, confidence, plan),
779
+ translated_prescription_html(plan),
 
 
 
780
  gr.update(visible=True),
781
+ gr.update(visible=True, interactive=accepted),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
782
  state,
783
  )
784
 
 
786
  def open_locator(state: dict[str, Any] | None):
787
  if not state:
788
  raise gr.Error("Analyze a prescription before opening the shelf scanner.")
789
+ return gr.update(visible=True), f"Opening shelf scanner for {state['display_name']} on shelf {state['shelf']}."
 
 
790
 
791
 
792
  def locate_on_shelf(shelf_image, state: dict[str, Any] | None):
 
795
  if shelf_image is None:
796
  raise gr.Error("Upload or capture a shelf image first.")
797
 
 
 
 
798
  image = shelf_image.convert("RGB")
799
  width, height = image.size
800
  box = (
 
810
  outline="#10b981",
811
  )
812
  draw.rectangle((box[0], max(0, box[1] - 34), box[2], box[1]), fill="#10b981")
813
+ draw.text((box[0] + 10, max(2, box[1] - 27)), state["display_name"], fill="white")
814
 
815
  info = f"""
816
  <div class="result-card compact">
817
  <h3>Shelf Result</h3>
818
  <dl class="details">
819
+ <dt>Found</dt><dd>{state['display_name']}</dd>
820
+ <dt>Canonical</dt><dd>{state['medicine_name']}</dd>
821
+ <dt>Shelf</dt><dd>{state['shelf']}</dd>
822
+ <dt>Row</dt><dd>{state['row']}</dd>
823
  <dt>Confidence</dt><dd>95%</dd>
824
  </dl>
825
  </div>
 
1029
  font-size: 13px;
1030
  }
1031
  .compact { margin-top: 0; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1032
  .gradio-container button.primary,
1033
  .gradio-container button[variant="primary"] {
1034
  background: var(--green) !important;
 
1045
  .powered { text-align: left; margin-top: 10px; }
1046
  .metric-row, .flow, .stock-card, .compare-grid { grid-template-columns: 1fr; }
1047
  .details { grid-template-columns: 1fr; }
 
1048
  .translated-head { align-items: flex-start; flex-direction: column; }
1049
  }
1050
  """
 
1089
  pipeline = gr.HTML(pipeline_html(0))
1090
 
1091
  with gr.Group(visible=False, elem_classes=["app-shell"]) as result_section:
1092
+ gr.Markdown("## Medicine Result")
 
 
 
 
 
 
 
 
1093
  with gr.Row():
1094
+ with gr.Column(scale=5):
1095
  details = gr.HTML()
1096
  gauge = gr.Plot(label="Confidence Gauge")
1097
+ with gr.Column(scale=5):
 
 
1098
  package_image = gr.Image(label="Packaging Image", height=360)
1099
  stock = gr.HTML()
1100
  with gr.Accordion("Top Candidates", open=False):
 
1126
  outputs=[
1127
  live_metrics,
1128
  pipeline,
 
 
1129
  details,
 
1130
  package_image,
1131
  stock,
1132
  gauge,
 
1135
  translated_prescription,
1136
  result_section,
1137
  locate_btn,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1138
  state,
1139
  ],
1140
  )
requirements.txt CHANGED
@@ -10,5 +10,3 @@ sentencepiece
10
  protobuf
11
  einops
12
  timm
13
- openai
14
- opencv-python-headless
 
10
  protobuf
11
  einops
12
  timm