| import os |
| import json |
| import uuid |
| import math |
| import re |
| import shutil |
| import asyncio |
| import pdfplumber |
| from typing import List, Dict, Optional, Any |
| from fastapi import FastAPI, HTTPException, UploadFile, File, BackgroundTasks |
| from fastapi.middleware.cors import CORSMiddleware |
| from pydantic import BaseModel |
|
|
| BASE_DIR = os.path.dirname(os.path.abspath(__file__)) |
| DATA_DIR = os.path.join(BASE_DIR, 'data', 'uploaded') |
| MASTER_INDEX_PATH = os.path.join(BASE_DIR, 'data', 'master_index.json') |
|
|
| os.makedirs(DATA_DIR, exist_ok=True) |
|
|
| _MASTER_INDEX_CACHE = {} |
|
|
| |
| |
| CATEGORY_DEFINITIONS = { |
| "Pharmaceuticals & Biologics": [ |
| "tablet", "tab", "capsule", "cap", "syrup", "suspension", "susp", "injection", "inj", "vial", "ampoule", "amp", |
| "drops", "gtt", "inhaler", "vaccine", "insulin", "dose", "drug", "medication", "ointment", "cream", "gel", |
| "lotion", "suppository", "supp", "antibiotic", "antiviral", "analgesic", "anesthetic", "hormone", "steroid", |
| "vitamin", "mineral", "supplement", "lozenge", "patch", "solution", "powder for suspension", "elixir", "serum", |
| "antitoxin" |
| ], |
| "Surgical Products": [ |
| "scalpel", "forceps", "retractor", "clamp", "suture", "stapler", "surgical mesh", "hemostatic", "sealant", |
| "surgical drape", "surgical gown", "laparoscopic", "robotic surgery", "electrosurgical", "surgical laser", |
| "surgical blade", "trocar", "surgical clip", "surgical scissor", "needle holder" |
| ], |
| "Orthopedic & Spine": [ |
| "orthopedic", "spine", "joint replacement", "trauma fixation", "bone plate", "bone screw", "intramedullary rod", |
| "bone nail", "spinal implant", "spinal fusion", "bone graft", "orthopedic brace", "cast", "arthroscopy", |
| "fixator", "prosthesis", "bone drill", "bone saw" |
| ], |
| "Cardiovascular Products": [ |
| "cardiac stent", "pacemaker", "defibrillator", "icd", "heart valve", "vascular graft", "cardiac catheter", |
| "guidewire", "cardiac balloon", "ablation", "coronary", "angioplasty", "introducer sheath" |
| ], |
| "Medical Imaging Equipment": [ |
| "mri", "ct scanner", "x-ray", "ultrasound", "mammography", "fluoroscopy", "pet scanner", "c-arm", |
| "medical imaging", "transducer", "x-ray film", "contrast media", "lead apron" |
| ], |
| "Diagnostic Products": [ |
| "diagnostic", "test kit", "glucose test", "reagent", "immunoassay", "chemistry analyzer", "hematology", |
| "microbiology", "culture media", "pregnancy test", "covid", "rapid test", "urinalysis", "penlight", |
| "specula", "otoscope", "ophthalmoscope", "lancet", "glucometer strips", "test strip" |
| ], |
| "Patient Monitoring Equipment": [ |
| "vital signs", "ecg", "ekg", "pulse oximeter", "blood pressure monitor", "sphygmomanometer", |
| "medical thermometer", "capnography", "fetal monitor", "telemetry", "spo2 sensor", "bp cuff", "temperature probe" |
| ], |
| "Respiratory & Anesthesia": [ |
| "ventilator", "anesthesia machine", "oxygen concentrator", "nebulizer", "cpap", "bipap", "respiratory", |
| "endotracheal", "tracheostomy", "spirometer", "oxygen mask", "breathing circuit", "nasal cannula", |
| "resuscitator", "laryngoscope" |
| ], |
| "Infusion & Vascular Access": [ |
| "infusion pump", "syringe pump", "iv set", "iv catheter", "venous", "picc", "iv port", "dialysis catheter", |
| "administration set", "extension set", "stopcock", "giving set", "saline", "dextrose", "ringer", |
| "sodium chloride", "water for injection" |
| ], |
| "Wound Care & Tissue Management": [ |
| "wound dressing", "bandage", "gauze", "medical tape", "plaster", "adhesive", "wound foam", "alginate", |
| "hydrocolloid", "compression bandage", "ostomy", "skin substitute", "negative pressure" |
| ], |
| "Dialysis & Renal Care": [ |
| "hemodialysis", "peritoneal", "dialyzer", "blood line", "fistula needle", "dialysis concentrate", "bicarbonate" |
| ], |
| "Ophthalmic Products": [ |
| "intraocular", "intraocular lens", "phaco", "vitrectomy", "lasik", "contact lens", "viscoelastic", |
| "ophthalmic solution", "eye drops" |
| ], |
| "Dental Products": [ |
| "dental implant", "orthodontic", "dental bracket", "dental wire", "dental drill", "dental handpiece", |
| "dental cement", "dental composite", "amalgam", "impression material", "teeth whitening", "dental chair" |
| ], |
| "Neurology & Neurosurgery": [ |
| "neurostimulation", "spinal cord stimulator", "neuro coil", "flow diverter", "cranial", "shunt", |
| "neuro electrode", "eeg", "emg" |
| ], |
| "Laboratory Equipment & Supplies": [ |
| "microscope", "lab centrifuge", "incubator", "autoclave", "pipette", "glassware", "test tube", "petri dish", |
| "flask", "beaker", "microscope slide", "cover glass", "fume hood", "biosafety cabinet" |
| ], |
| "Personal Protective Equipment (PPE)": [ |
| "ppe", "n95", "face shield", "safety eyewear", "goggles", "protective apron", "shoe cover", "head cover", |
| "coverall", "isolation gown", "hazmat", "surgical mask" |
| ], |
| "Sterilization & Disinfection": [ |
| "sterilization", "disinfectant", "antiseptic", "povidone", "iodine", "chlorhexidine", "alcohol swab", |
| "hand sanitizer", "medical soap", "enzymatic cleaner", "detergent", "washer disinfector", "sterilizer", |
| "sterilization indicator" |
| ], |
| "Hospital Furniture & Equipment": [ |
| "hospital bed", "examination table", "stretcher", "medical trolley", "medical cart", "medical cabinet", |
| "bedside locker", "overbed table", "iv pole", "wheelchair" |
| ], |
| "Rehabilitation & Physical Therapy": [ |
| "rehabilitation", "physiotherapy", "walker", "walking cane", "crutch", "exercise band", "traction", |
| "electrotherapy", "massage table", "orthosis" |
| ], |
| "Home Healthcare Products": [ |
| "home care", "blood glucose meter", "hearing aid", "mobility aid", "bathroom safety", "commode" |
| ], |
| "Emergency & Trauma Care": [ |
| "emergency kit", "trauma kit", "first aid", "aed", "defibrillator", "manual resuscitator", "suction unit", |
| "immobilizer", "cervical collar", "splint", "tourniquet", "crash cart" |
| ], |
| "Maternal & Neonatal Care": [ |
| "maternal", "neonatal", "infant incubator", "infant warmer", "phototherapy", "breast pump", "obstetric", |
| "birthing bed", "fetal doppler", "umbilical" |
| ], |
| "Urology Products": [ |
| "urology", "foley catheter", "urine bag", "urinary drainage", "ureteral stent", "stone basket" |
| ], |
| "Gastroenterology & Endoscopy": [ |
| "endoscope", "gastroscope", "colonoscope", "biopsy forceps", "polypectomy snare", "gastric balloon", "ercp" |
| ], |
| "Oncology Products": [ |
| "oncology", "chemotherapy", "radiotherapy", "brachytherapy", "port-a-cath", "cancer diagnostic" |
| ], |
| "Pain Management": [ |
| "pain management", "pca pump", "epidural", "nerve block", "tens unit" |
| ], |
| "Sleep Medicine": [ |
| "sleep apnea", "cpap mask", "bipap mask", "sleep tubing", "polysomnography" |
| ], |
| "Telemedicine & Digital Health": [ |
| "telemedicine", "telehealth", "remote monitor", "medical software", "health app" |
| ], |
| "Blood Management": [ |
| "blood bag", "blood transfusion", "blood bank", "blood warmer", "apheresis" |
| ], |
| "Mortuary & Pathology": [ |
| "mortuary", "autopsy", "body bag", "morgue fridge", "dissection table", "microtome", "tissue processor" |
| ], |
| "Environmental Control": [ |
| "medical gas", "medical vacuum", "medical air plant", "gas manifold", "gas outlet", "gas alarm" |
| ], |
| "Mobility & Accessibility": [ |
| "patient lift", "patient hoist", "wheelchair ramp", "stair lift", "transfer board" |
| ], |
| "Bariatric Products": [ |
| "bariatric bed", "bariatric wheelchair", "heavy duty scale" |
| ], |
| "Medical Textiles": [ |
| "hospital linen", "bed sheet", "pillow case", "medical blanket", "towel", "privacy curtain", "medical uniform", |
| "scrub suit", "lab coat" |
| ], |
| "Infection Control Products": [ |
| "waste bin", "sharps container", "biohazard bag", "spill kit", "air purifier" |
| ], |
| "Medical Gases & Cryogenics": [ |
| "gas cylinder", "oxygen regulator", "flowmeter", "liquid oxygen", "nitrogen tank" |
| ], |
| "Nutrition & Feeding": [ |
| "enteral feeding", "clinical nutrition", "nasogastric tube", "feeding pump", "feeding set", "peg tube" |
| ], |
| "Specimen Collection & Transport": [ |
| "specimen container", "sample collection", "transport media", "transport swab", "urine container", |
| "stool container", "cool box", "transport bag" |
| ], |
| "Medical Software & IT": [ |
| "emr", "ehr", "pacs", "ris", "lis", "his", "hospital information system" |
| ], |
| "Aesthetics & Dermatology": [ |
| "dermatology", "aesthetic laser", "ipl", "dermal filler", "botulinum", "botox", "chemical peel", |
| "microdermabrasion" |
| ], |
| |
| "Medical Supplies & Consumables": [ |
| "syringe", "needle", "glove", "examination glove", "disposable", "consumable", "cotton wool", "alcohol prep", |
| "urinal", "bedpan", "underpad", "tongue depressor", "applicator", "lubricant jelly", "cannula" |
| |
| ] |
| } |
|
|
| def load_master_index(): |
| global _MASTER_INDEX_CACHE |
| if _MASTER_INDEX_CACHE: return _MASTER_INDEX_CACHE |
| if os.path.exists(MASTER_INDEX_PATH): |
| with open(MASTER_INDEX_PATH, 'r', encoding='utf-8') as f: |
| _MASTER_INDEX_CACHE = json.load(f) |
| return _MASTER_INDEX_CACHE |
|
|
| def clean_text(text: Optional[str]) -> str: |
| return text.replace('\n', ' ').strip() if text else "" |
|
|
| def is_garbage_row(row_text: str) -> bool: |
| blacklist = [ |
| "click or tap", |
| "enter text", |
| "rfq reference", |
| "signature", |
| "date:", |
| "authorized by", |
| "page ", |
| "payment terms" |
| ] |
| t = row_text.lower() |
| return any(bad in t for bad in blacklist) |
|
|
| def determine_item_type(description: str, form: str) -> str: |
| """ |
| Determines the category of the item based on its description and form/unit. |
| Uses regex for whole-word matching to prevent substring errors (e.g. 'fusion' in 'infusion'). |
| """ |
| text = (description + " " + form).lower() |
| |
| for category, keywords in CATEGORY_DEFINITIONS.items(): |
| for k in keywords: |
| |
| |
| |
| |
| pattern = r'\b' + re.escape(k) + r'\b' |
| |
| if re.search(pattern, text): |
| return category |
| |
| |
| return 'Medical Supplies & Consumables' |
|
|
| async def delete_file_safety_net(file_path: str, delay: int = 600): |
| await asyncio.sleep(delay) |
| try: |
| if os.path.exists(file_path): |
| os.remove(file_path) |
| except Exception: |
| pass |
|
|
| def parse_pdf_file(file_path: str) -> List[Dict[str, Any]]: |
| extracted_items = [] |
| with pdfplumber.open(file_path) as pdf: |
| for page in pdf.pages: |
| tables = page.extract_tables() |
| for table in tables: |
| for row in table: |
| cleaned_row = [clean_text(cell) for cell in row if cell is not None and clean_text(cell) != ""] |
| if not cleaned_row: continue |
| |
| row_text = " ".join(cleaned_row) |
| if is_garbage_row(row_text): continue |
| if "description" in row_text.lower() and "qty" in row_text.lower(): continue |
| |
| try: |
| qty = 1 |
| qty_idx = -1 |
| |
| for i in range(len(cleaned_row) - 1, -1, -1): |
| val = cleaned_row[i].replace(',', '').replace('.', '') |
| if val.isdigit() and int(val) < 1000000: |
| qty = int(val) |
| qty_idx = i |
| break |
| |
| if qty_idx == -1: continue |
|
|
| |
| desc_idx = 0 |
| |
| if re.match(r'^\d+\.?$', cleaned_row[0]) and len(cleaned_row) > 1: |
| desc_idx = 1 |
| |
| description = cleaned_row[desc_idx] |
| if re.match(r'^\d+$', description): continue |
| if is_garbage_row(description): continue |
|
|
| |
| unit = "Unit" |
| if qty_idx > 0 and qty_idx > desc_idx: |
| |
| potential_unit = cleaned_row[qty_idx - 1] |
| if len(potential_unit) < 20 and potential_unit != description: |
| unit = potential_unit |
|
|
| |
| item_type = determine_item_type(description, unit) |
|
|
| extracted_items.append({ |
| "inn_name": description, |
| "quantity": qty, |
| "form": unit, |
| "dosage": "", |
| "type": item_type |
| }) |
| except Exception: |
| continue |
| return extracted_items |
|
|
| app = FastAPI() |
|
|
| app.add_middleware( |
| CORSMiddleware, |
| allow_origins=["*"], |
| allow_methods=["*"], |
| allow_headers=["*"], |
| ) |
|
|
| @app.on_event("startup") |
| def startup(): |
| load_master_index() |
|
|
| @app.post("/api/upload") |
| async def upload_document(background_tasks: BackgroundTasks, file: UploadFile = File(...)): |
| doc_id = str(uuid.uuid4()) |
| filename = f"{doc_id}.pdf" |
| file_path = os.path.join(DATA_DIR, filename) |
| |
| with open(file_path, "wb") as buffer: |
| shutil.copyfileobj(file.file, buffer) |
| |
| background_tasks.add_task(delete_file_safety_net, file_path, 600) |
| |
| return {"document_id": doc_id, "message": "Upload successful"} |
|
|
| @app.post("/api/parse/{document_id}") |
| async def parse_document(document_id: str): |
| file_path = os.path.join(DATA_DIR, f"{document_id}.pdf") |
| if not os.path.exists(file_path): |
| raise HTTPException(status_code=404, detail="File not found") |
| |
| try: |
| items = parse_pdf_file(file_path) |
| if os.path.exists(file_path): |
| os.remove(file_path) |
| return { |
| "document_id": document_id, |
| "data": { "line_items": items } |
| } |
| except Exception as e: |
| if os.path.exists(file_path): |
| os.remove(file_path) |
| raise HTTPException(status_code=500, detail="Parsing failed") |
|
|
| class MatchRequest(BaseModel): |
| items: List[Dict[str, Any]] |
| preferences: List[str] = [] |
|
|
| @app.post("/api/match-all") |
| async def match_all(req: MatchRequest): |
| index = load_master_index() |
| vendors = index.get('vendors', []) |
| results = [] |
| |
| for item in req.items: |
| name = item.get('inn_name') or 'Unknown' |
| qty = int(item.get('quantity', 1)) |
| |
| matches = [] |
| for v in vendors: |
| |
| matches.append({ |
| 'vendor_id': v.get('vendor_id'), |
| 'name': v.get('legal_name'), |
| 'country': (v.get('countries_served') or ['Unknown'])[0], |
| 'landedCost': v.get('landedCost', 10), |
| 'deliveryDays': v.get('deliveryDays', 5), |
| 'availableQty': v.get('availableQty', 1000), |
| 'qualityScore': v.get('confidence_score', 80) / 10.0, |
| 'reliabilityScore': 5, |
| 'score': 9.5 |
| }) |
|
|
| results.append({ |
| "medicine": name, |
| "quantity": qty, |
| "top_vendor": matches[0] if matches else None, |
| "other_vendors": matches[1:5] if len(matches) > 1 else [] |
| }) |
|
|
| return {"matches": results} |
|
|
| if __name__ == "__main__": |
| import uvicorn |
| uvicorn.run(app, host="127.0.0.1", port=5001) |