|
|
import os |
|
|
import json |
|
|
import uuid |
|
|
import math |
|
|
import re |
|
|
import shutil |
|
|
import asyncio |
|
|
import pdfplumber |
|
|
from typing import List, Dict, Optional, Any |
|
|
from fastapi import FastAPI, HTTPException, UploadFile, File, BackgroundTasks |
|
|
from fastapi.middleware.cors import CORSMiddleware |
|
|
from pydantic import BaseModel |
|
|
|
|
|
BASE_DIR = os.path.dirname(os.path.abspath(__file__)) |
|
|
DATA_DIR = os.path.join(BASE_DIR, 'data', 'uploaded') |
|
|
MASTER_INDEX_PATH = os.path.join(BASE_DIR, 'data', 'master_index.json') |
|
|
|
|
|
os.makedirs(DATA_DIR, exist_ok=True) |
|
|
|
|
|
_MASTER_INDEX_CACHE = {} |
|
|
|
|
|
|
|
|
|
|
|
CATEGORY_DEFINITIONS = { |
|
|
"Pharmaceuticals & Biologics": [ |
|
|
"tablet", "tab", "capsule", "cap", "syrup", "suspension", "susp", "injection", "inj", "vial", "ampoule", "amp", |
|
|
"drops", "gtt", "inhaler", "vaccine", "insulin", "dose", "drug", "medication", "ointment", "cream", "gel", |
|
|
"lotion", "suppository", "supp", "antibiotic", "antiviral", "analgesic", "anesthetic", "hormone", "steroid", |
|
|
"vitamin", "mineral", "supplement", "lozenge", "patch", "solution", "powder for suspension", "elixir", "serum", |
|
|
"antitoxin" |
|
|
], |
|
|
"Surgical Products": [ |
|
|
"scalpel", "forceps", "retractor", "clamp", "suture", "stapler", "surgical mesh", "hemostatic", "sealant", |
|
|
"surgical drape", "surgical gown", "laparoscopic", "robotic surgery", "electrosurgical", "surgical laser", |
|
|
"surgical blade", "trocar", "surgical clip", "surgical scissor", "needle holder" |
|
|
], |
|
|
"Orthopedic & Spine": [ |
|
|
"orthopedic", "spine", "joint replacement", "trauma fixation", "bone plate", "bone screw", "intramedullary rod", |
|
|
"bone nail", "spinal implant", "spinal fusion", "bone graft", "orthopedic brace", "cast", "arthroscopy", |
|
|
"fixator", "prosthesis", "bone drill", "bone saw" |
|
|
], |
|
|
"Cardiovascular Products": [ |
|
|
"cardiac stent", "pacemaker", "defibrillator", "icd", "heart valve", "vascular graft", "cardiac catheter", |
|
|
"guidewire", "cardiac balloon", "ablation", "coronary", "angioplasty", "introducer sheath" |
|
|
], |
|
|
"Medical Imaging Equipment": [ |
|
|
"mri", "ct scanner", "x-ray", "ultrasound", "mammography", "fluoroscopy", "pet scanner", "c-arm", |
|
|
"medical imaging", "transducer", "x-ray film", "contrast media", "lead apron" |
|
|
], |
|
|
"Diagnostic Products": [ |
|
|
"diagnostic", "test kit", "glucose test", "reagent", "immunoassay", "chemistry analyzer", "hematology", |
|
|
"microbiology", "culture media", "pregnancy test", "covid", "rapid test", "urinalysis", "penlight", |
|
|
"specula", "otoscope", "ophthalmoscope", "lancet", "glucometer strips", "test strip" |
|
|
], |
|
|
"Patient Monitoring Equipment": [ |
|
|
"vital signs", "ecg", "ekg", "pulse oximeter", "blood pressure monitor", "sphygmomanometer", |
|
|
"medical thermometer", "capnography", "fetal monitor", "telemetry", "spo2 sensor", "bp cuff", "temperature probe" |
|
|
], |
|
|
"Respiratory & Anesthesia": [ |
|
|
"ventilator", "anesthesia machine", "oxygen concentrator", "nebulizer", "cpap", "bipap", "respiratory", |
|
|
"endotracheal", "tracheostomy", "spirometer", "oxygen mask", "breathing circuit", "nasal cannula", |
|
|
"resuscitator", "laryngoscope" |
|
|
], |
|
|
"Infusion & Vascular Access": [ |
|
|
"infusion pump", "syringe pump", "iv set", "iv catheter", "venous", "picc", "iv port", "dialysis catheter", |
|
|
"administration set", "extension set", "stopcock", "giving set", "saline", "dextrose", "ringer", |
|
|
"sodium chloride", "water for injection" |
|
|
], |
|
|
"Wound Care & Tissue Management": [ |
|
|
"wound dressing", "bandage", "gauze", "medical tape", "plaster", "adhesive", "wound foam", "alginate", |
|
|
"hydrocolloid", "compression bandage", "ostomy", "skin substitute", "negative pressure" |
|
|
], |
|
|
"Dialysis & Renal Care": [ |
|
|
"hemodialysis", "peritoneal", "dialyzer", "blood line", "fistula needle", "dialysis concentrate", "bicarbonate" |
|
|
], |
|
|
"Ophthalmic Products": [ |
|
|
"intraocular", "intraocular lens", "phaco", "vitrectomy", "lasik", "contact lens", "viscoelastic", |
|
|
"ophthalmic solution", "eye drops" |
|
|
], |
|
|
"Dental Products": [ |
|
|
"dental implant", "orthodontic", "dental bracket", "dental wire", "dental drill", "dental handpiece", |
|
|
"dental cement", "dental composite", "amalgam", "impression material", "teeth whitening", "dental chair" |
|
|
], |
|
|
"Neurology & Neurosurgery": [ |
|
|
"neurostimulation", "spinal cord stimulator", "neuro coil", "flow diverter", "cranial", "shunt", |
|
|
"neuro electrode", "eeg", "emg" |
|
|
], |
|
|
"Laboratory Equipment & Supplies": [ |
|
|
"microscope", "lab centrifuge", "incubator", "autoclave", "pipette", "glassware", "test tube", "petri dish", |
|
|
"flask", "beaker", "microscope slide", "cover glass", "fume hood", "biosafety cabinet" |
|
|
], |
|
|
"Personal Protective Equipment (PPE)": [ |
|
|
"ppe", "n95", "face shield", "safety eyewear", "goggles", "protective apron", "shoe cover", "head cover", |
|
|
"coverall", "isolation gown", "hazmat", "surgical mask" |
|
|
], |
|
|
"Sterilization & Disinfection": [ |
|
|
"sterilization", "disinfectant", "antiseptic", "povidone", "iodine", "chlorhexidine", "alcohol swab", |
|
|
"hand sanitizer", "medical soap", "enzymatic cleaner", "detergent", "washer disinfector", "sterilizer", |
|
|
"sterilization indicator" |
|
|
], |
|
|
"Hospital Furniture & Equipment": [ |
|
|
"hospital bed", "examination table", "stretcher", "medical trolley", "medical cart", "medical cabinet", |
|
|
"bedside locker", "overbed table", "iv pole", "wheelchair" |
|
|
], |
|
|
"Rehabilitation & Physical Therapy": [ |
|
|
"rehabilitation", "physiotherapy", "walker", "walking cane", "crutch", "exercise band", "traction", |
|
|
"electrotherapy", "massage table", "orthosis" |
|
|
], |
|
|
"Home Healthcare Products": [ |
|
|
"home care", "blood glucose meter", "hearing aid", "mobility aid", "bathroom safety", "commode" |
|
|
], |
|
|
"Emergency & Trauma Care": [ |
|
|
"emergency kit", "trauma kit", "first aid", "aed", "defibrillator", "manual resuscitator", "suction unit", |
|
|
"immobilizer", "cervical collar", "splint", "tourniquet", "crash cart" |
|
|
], |
|
|
"Maternal & Neonatal Care": [ |
|
|
"maternal", "neonatal", "infant incubator", "infant warmer", "phototherapy", "breast pump", "obstetric", |
|
|
"birthing bed", "fetal doppler", "umbilical" |
|
|
], |
|
|
"Urology Products": [ |
|
|
"urology", "foley catheter", "urine bag", "urinary drainage", "ureteral stent", "stone basket" |
|
|
], |
|
|
"Gastroenterology & Endoscopy": [ |
|
|
"endoscope", "gastroscope", "colonoscope", "biopsy forceps", "polypectomy snare", "gastric balloon", "ercp" |
|
|
], |
|
|
"Oncology Products": [ |
|
|
"oncology", "chemotherapy", "radiotherapy", "brachytherapy", "port-a-cath", "cancer diagnostic" |
|
|
], |
|
|
"Pain Management": [ |
|
|
"pain management", "pca pump", "epidural", "nerve block", "tens unit" |
|
|
], |
|
|
"Sleep Medicine": [ |
|
|
"sleep apnea", "cpap mask", "bipap mask", "sleep tubing", "polysomnography" |
|
|
], |
|
|
"Telemedicine & Digital Health": [ |
|
|
"telemedicine", "telehealth", "remote monitor", "medical software", "health app" |
|
|
], |
|
|
"Blood Management": [ |
|
|
"blood bag", "blood transfusion", "blood bank", "blood warmer", "apheresis" |
|
|
], |
|
|
"Mortuary & Pathology": [ |
|
|
"mortuary", "autopsy", "body bag", "morgue fridge", "dissection table", "microtome", "tissue processor" |
|
|
], |
|
|
"Environmental Control": [ |
|
|
"medical gas", "medical vacuum", "medical air plant", "gas manifold", "gas outlet", "gas alarm" |
|
|
], |
|
|
"Mobility & Accessibility": [ |
|
|
"patient lift", "patient hoist", "wheelchair ramp", "stair lift", "transfer board" |
|
|
], |
|
|
"Bariatric Products": [ |
|
|
"bariatric bed", "bariatric wheelchair", "heavy duty scale" |
|
|
], |
|
|
"Medical Textiles": [ |
|
|
"hospital linen", "bed sheet", "pillow case", "medical blanket", "towel", "privacy curtain", "medical uniform", |
|
|
"scrub suit", "lab coat" |
|
|
], |
|
|
"Infection Control Products": [ |
|
|
"waste bin", "sharps container", "biohazard bag", "spill kit", "air purifier" |
|
|
], |
|
|
"Medical Gases & Cryogenics": [ |
|
|
"gas cylinder", "oxygen regulator", "flowmeter", "liquid oxygen", "nitrogen tank" |
|
|
], |
|
|
"Nutrition & Feeding": [ |
|
|
"enteral feeding", "clinical nutrition", "nasogastric tube", "feeding pump", "feeding set", "peg tube" |
|
|
], |
|
|
"Specimen Collection & Transport": [ |
|
|
"specimen container", "sample collection", "transport media", "transport swab", "urine container", |
|
|
"stool container", "cool box", "transport bag" |
|
|
], |
|
|
"Medical Software & IT": [ |
|
|
"emr", "ehr", "pacs", "ris", "lis", "his", "hospital information system" |
|
|
], |
|
|
"Aesthetics & Dermatology": [ |
|
|
"dermatology", "aesthetic laser", "ipl", "dermal filler", "botulinum", "botox", "chemical peel", |
|
|
"microdermabrasion" |
|
|
], |
|
|
|
|
|
"Medical Supplies & Consumables": [ |
|
|
"syringe", "needle", "glove", "examination glove", "disposable", "consumable", "cotton wool", "alcohol prep", |
|
|
"urinal", "bedpan", "underpad", "tongue depressor", "applicator", "lubricant jelly", "cannula" |
|
|
|
|
|
] |
|
|
} |
|
|
|
|
|
def load_master_index(): |
|
|
global _MASTER_INDEX_CACHE |
|
|
if _MASTER_INDEX_CACHE: return _MASTER_INDEX_CACHE |
|
|
if os.path.exists(MASTER_INDEX_PATH): |
|
|
with open(MASTER_INDEX_PATH, 'r', encoding='utf-8') as f: |
|
|
_MASTER_INDEX_CACHE = json.load(f) |
|
|
return _MASTER_INDEX_CACHE |
|
|
|
|
|
def clean_text(text: Optional[str]) -> str: |
|
|
return text.replace('\n', ' ').strip() if text else "" |
|
|
|
|
|
def is_garbage_row(row_text: str) -> bool: |
|
|
blacklist = [ |
|
|
"click or tap", |
|
|
"enter text", |
|
|
"rfq reference", |
|
|
"signature", |
|
|
"date:", |
|
|
"authorized by", |
|
|
"page ", |
|
|
"payment terms" |
|
|
] |
|
|
t = row_text.lower() |
|
|
return any(bad in t for bad in blacklist) |
|
|
|
|
|
def determine_item_type(description: str, form: str) -> str: |
|
|
""" |
|
|
Determines the category of the item based on its description and form/unit. |
|
|
Uses regex for whole-word matching to prevent substring errors (e.g. 'fusion' in 'infusion'). |
|
|
""" |
|
|
text = (description + " " + form).lower() |
|
|
|
|
|
for category, keywords in CATEGORY_DEFINITIONS.items(): |
|
|
for k in keywords: |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pattern = r'\b' + re.escape(k) + r'\b' |
|
|
|
|
|
if re.search(pattern, text): |
|
|
return category |
|
|
|
|
|
|
|
|
return 'Medical Supplies & Consumables' |
|
|
|
|
|
async def delete_file_safety_net(file_path: str, delay: int = 600): |
|
|
await asyncio.sleep(delay) |
|
|
try: |
|
|
if os.path.exists(file_path): |
|
|
os.remove(file_path) |
|
|
except Exception: |
|
|
pass |
|
|
|
|
|
def parse_pdf_file(file_path: str) -> List[Dict[str, Any]]: |
|
|
extracted_items = [] |
|
|
with pdfplumber.open(file_path) as pdf: |
|
|
for page in pdf.pages: |
|
|
tables = page.extract_tables() |
|
|
for table in tables: |
|
|
for row in table: |
|
|
cleaned_row = [clean_text(cell) for cell in row if cell is not None and clean_text(cell) != ""] |
|
|
if not cleaned_row: continue |
|
|
|
|
|
row_text = " ".join(cleaned_row) |
|
|
if is_garbage_row(row_text): continue |
|
|
if "description" in row_text.lower() and "qty" in row_text.lower(): continue |
|
|
|
|
|
try: |
|
|
qty = 1 |
|
|
qty_idx = -1 |
|
|
|
|
|
for i in range(len(cleaned_row) - 1, -1, -1): |
|
|
val = cleaned_row[i].replace(',', '').replace('.', '') |
|
|
if val.isdigit() and int(val) < 1000000: |
|
|
qty = int(val) |
|
|
qty_idx = i |
|
|
break |
|
|
|
|
|
if qty_idx == -1: continue |
|
|
|
|
|
|
|
|
desc_idx = 0 |
|
|
|
|
|
if re.match(r'^\d+\.?$', cleaned_row[0]) and len(cleaned_row) > 1: |
|
|
desc_idx = 1 |
|
|
|
|
|
description = cleaned_row[desc_idx] |
|
|
if re.match(r'^\d+$', description): continue |
|
|
if is_garbage_row(description): continue |
|
|
|
|
|
|
|
|
unit = "Unit" |
|
|
if qty_idx > 0 and qty_idx > desc_idx: |
|
|
|
|
|
potential_unit = cleaned_row[qty_idx - 1] |
|
|
if len(potential_unit) < 20 and potential_unit != description: |
|
|
unit = potential_unit |
|
|
|
|
|
|
|
|
item_type = determine_item_type(description, unit) |
|
|
|
|
|
extracted_items.append({ |
|
|
"inn_name": description, |
|
|
"quantity": qty, |
|
|
"form": unit, |
|
|
"dosage": "", |
|
|
"type": item_type |
|
|
}) |
|
|
except Exception: |
|
|
continue |
|
|
return extracted_items |
|
|
|
|
|
app = FastAPI() |
|
|
|
|
|
app.add_middleware( |
|
|
CORSMiddleware, |
|
|
allow_origins=["*"], |
|
|
allow_methods=["*"], |
|
|
allow_headers=["*"], |
|
|
) |
|
|
|
|
|
@app.on_event("startup") |
|
|
def startup(): |
|
|
load_master_index() |
|
|
|
|
|
@app.post("/api/upload") |
|
|
async def upload_document(background_tasks: BackgroundTasks, file: UploadFile = File(...)): |
|
|
doc_id = str(uuid.uuid4()) |
|
|
filename = f"{doc_id}.pdf" |
|
|
file_path = os.path.join(DATA_DIR, filename) |
|
|
|
|
|
with open(file_path, "wb") as buffer: |
|
|
shutil.copyfileobj(file.file, buffer) |
|
|
|
|
|
background_tasks.add_task(delete_file_safety_net, file_path, 600) |
|
|
|
|
|
return {"document_id": doc_id, "message": "Upload successful"} |
|
|
|
|
|
@app.post("/api/parse/{document_id}") |
|
|
async def parse_document(document_id: str): |
|
|
file_path = os.path.join(DATA_DIR, f"{document_id}.pdf") |
|
|
if not os.path.exists(file_path): |
|
|
raise HTTPException(status_code=404, detail="File not found") |
|
|
|
|
|
try: |
|
|
items = parse_pdf_file(file_path) |
|
|
if os.path.exists(file_path): |
|
|
os.remove(file_path) |
|
|
return { |
|
|
"document_id": document_id, |
|
|
"data": { "line_items": items } |
|
|
} |
|
|
except Exception as e: |
|
|
if os.path.exists(file_path): |
|
|
os.remove(file_path) |
|
|
raise HTTPException(status_code=500, detail="Parsing failed") |
|
|
|
|
|
class MatchRequest(BaseModel): |
|
|
items: List[Dict[str, Any]] |
|
|
preferences: List[str] = [] |
|
|
|
|
|
@app.post("/api/match-all") |
|
|
async def match_all(req: MatchRequest): |
|
|
index = load_master_index() |
|
|
vendors = index.get('vendors', []) |
|
|
results = [] |
|
|
|
|
|
for item in req.items: |
|
|
name = item.get('inn_name') or 'Unknown' |
|
|
qty = int(item.get('quantity', 1)) |
|
|
|
|
|
matches = [] |
|
|
for v in vendors: |
|
|
|
|
|
matches.append({ |
|
|
'vendor_id': v.get('vendor_id'), |
|
|
'name': v.get('legal_name'), |
|
|
'country': (v.get('countries_served') or ['Unknown'])[0], |
|
|
'landedCost': v.get('landedCost', 10), |
|
|
'deliveryDays': v.get('deliveryDays', 5), |
|
|
'availableQty': v.get('availableQty', 1000), |
|
|
'qualityScore': v.get('confidence_score', 80) / 10.0, |
|
|
'reliabilityScore': 5, |
|
|
'score': 9.5 |
|
|
}) |
|
|
|
|
|
results.append({ |
|
|
"medicine": name, |
|
|
"quantity": qty, |
|
|
"top_vendor": matches[0] if matches else None, |
|
|
"other_vendors": matches[1:5] if len(matches) > 1 else [] |
|
|
}) |
|
|
|
|
|
return {"matches": results} |
|
|
|
|
|
if __name__ == "__main__": |
|
|
import uvicorn |
|
|
uvicorn.run(app, host="127.0.0.1", port=5001) |