from fastapi import FastAPI, HTTPException, Request, File, UploadFile from fastapi.responses import JSONResponse from fastapi.middleware.cors import CORSMiddleware from starlette.middleware.gzip import GZipMiddleware import time import os import gzip import base64 import json from .text_extractor import extract_text_from_pdf, extract_text_from_image from .image_extractor import extract_images_from_pdf from .lab_processor import RadioloLabProcessor from .models import EncryptedRequest from .crypto_utils import CryptoManager from dotenv import load_dotenv load_dotenv() app = FastAPI( title="Medical Lab Report Analysis API", description="Extract structured lab test data from medical reports using NER + Regex with end-to-end encryption", version="2.0.0", docs_url=None, redoc_url=None ) app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) app.add_middleware(GZipMiddleware, minimum_size=1000) lab_processor = None SECRET_KEY = os.getenv("ENCRYPTION_KEY") crypto_manager = CryptoManager(SECRET_KEY) @app.on_event("startup") async def startup_event(): global lab_processor print("\n" + "=" * 70) print("MEDICAL LAB REPORT ANALYSIS API - STARTING UP") print("=" * 70) model_path = os.getenv("LAB_NER_MODEL_PATH", "./models/radiolo_clinic_ner") print(f"\nLoading Lab NER model from: {model_path}") if not os.path.exists(model_path): print(f"āœ— ERROR: Model not found at {model_path}") raise RuntimeError("Lab NER model not found") try: lab_processor = RadioloLabProcessor(model_path) print("āœ… API READY!") print("=" * 70 + "\n") except Exception as e: print(f"āœ— FATAL ERROR: Failed to load model: {e}") raise @app.on_event("shutdown") async def shutdown_event(): print("\nAPI SHUTTING DOWN\n") @app.get("/") async def root(): return { "status": "online", "api": "Medical Lab Report Analysis API", "version": "2.0.0", "model_loaded": lab_processor is not None, "features": { "encryption": "NaCl (XSalsa20-Poly1305)", "compression": "gzip", "ocr_engine": "EasyOCR", "ner_model": "Custom Lab NER", "patient_friendly_summary": "AI-Generated Explanations", "supported_tests": 16 }, "endpoints": { "health": "/health", "analyze": "/analyze-lab-secure", "test": "/test-analyze" }, "supported_formats": ["pdf", "image"], "supported_lab_tests": [ "Complete Blood Count (WBC, RBC, Hemoglobin, Hematocrit, Platelets)", "Liver Function (ALT, AST, ALP, Bilirubin, Albumin)", "Thyroid Function (TSH, Free T4)", "Metabolic Panel (Glucose, Creatinine, Urea, Cholesterol)" ] } @app.get("/health") async def health_check(): return { "status": "healthy", "model_loaded": lab_processor is not None, "model_type": "Lab Report NER", "ocr_engine": "EasyOCR", "encryption": "NaCl (XSalsa20-Poly1305)", "compression": "gzip", "version": "2.0.0", "supported_tests": 16 } @app.post("/test-analyze", tags=["Testing"]) async def test_analyze(file: UploadFile = File(...)): start_time = time.time() try: if not lab_processor: raise HTTPException(status_code=503, detail="Lab processor not loaded") file_bytes = await file.read() filename = file.filename print(f"\nšŸ“„ Processing test file: {filename} ({len(file_bytes)} bytes)") if filename.lower().endswith('.pdf'): file_type = "pdf" extracted_text, ocr_used = extract_text_from_pdf(file_bytes) images = extract_images_from_pdf(file_bytes) elif filename.lower().endswith(('.jpg', '.jpeg', '.png', '.tiff', '.bmp')): file_type = "image" extracted_text = extract_text_from_image(file_bytes) ocr_used = True images = [] else: raise HTTPException(status_code=400, detail="Unsupported file type. Use PDF or image files.") if not extracted_text or len(extracted_text.strip()) < 10: raise HTTPException(status_code=400, detail="Could not extract sufficient text from file") print(f"āœ“ Extracted {len(extracted_text)} characters (OCR: {ocr_used})") print("🧠 Processing with NER + ClinicalDistilBERT...") lab_analysis = lab_processor.extract_and_format( extracted_text, report_id=f"test_{int(time.time())}", patient_id="TEST_PATIENT" ) processing_time = time.time() - start_time print(f"āœ… Processing complete in {processing_time:.2f}s") print(f" Tests extracted: {lab_analysis.get('metadata', {}).get('tests_extracted', 0)}\n") response_data = { "status": "success", "processing_time": round(processing_time, 3), "filename": filename, "input_type": file_type, "ocr_used": ocr_used, "ocr_engine": "EasyOCR" if ocr_used else "PyMuPDF", "raw_text_preview": extracted_text[:500] + "..." if len(extracted_text) > 500 else extracted_text, "text_length": len(extracted_text), "images": images, **lab_analysis } return response_data except HTTPException as he: raise he except Exception as e: import traceback print(f"āŒ Error: {e}") traceback.print_exc() raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}") @app.post("/analyze-lab-secure", tags=["Lab Analysis"]) async def analyze_lab_secure(request: EncryptedRequest): start_time = time.time() try: if not lab_processor: raise HTTPException(status_code=503, detail="Lab processor not loaded") decrypted_data = crypto_manager.decrypt(request.ciphertext, request.nonce) compressed_b64 = decrypted_data.decode('utf-8') compressed_bytes = base64.b64decode(compressed_b64) decompressed_data = gzip.decompress(compressed_bytes) payload = json.loads(decompressed_data.decode('utf-8')) filename = payload.get('filename', 'unknown') file_data_b64 = payload['file_data'] file_type = payload['file_type'] file_bytes = base64.b64decode(file_data_b64) if file_type == "pdf": extracted_text, ocr_used = extract_text_from_pdf(file_bytes) if not extracted_text or len(extracted_text.strip()) < 10: raise HTTPException(status_code=400, detail="Could not extract text from PDF") images = extract_images_from_pdf(file_bytes) elif file_type == "image": extracted_text = extract_text_from_image(file_bytes) ocr_used = True images = [] if not extracted_text or len(extracted_text.strip()) < 10: raise HTTPException(status_code=400, detail="Could not extract text from image") else: raise HTTPException(status_code=400, detail="Invalid file_type. Must be 'pdf' or 'image'") lab_analysis = lab_processor.extract_and_format( extracted_text, report_id=f"lab_{int(time.time())}", patient_id=payload.get('patient_id', 'unknown') ) processing_time = time.time() - start_time response_data = { "status": "success", "processing_time": round(processing_time, 3), "filename": filename, "input_type": file_type, "ocr_used": ocr_used, "ocr_engine": "EasyOCR" if ocr_used else "PyMuPDF", "raw_text": extracted_text[:500] + "..." if len(extracted_text) > 500 else extracted_text, "text_length": len(extracted_text), "images": images, **lab_analysis } encrypted_response = crypto_manager.encrypt_json(response_data) return { "status": "success", "ciphertext": encrypted_response['ciphertext'], "nonce": encrypted_response['nonce'] } except HTTPException as he: raise he except Exception as e: raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}") @app.exception_handler(404) async def not_found_handler(request: Request, exc): return JSONResponse( status_code=404, content={ "status": "error", "message": "Endpoint not found", "available_endpoints": ["/", "/health", "/test-analyze", "/analyze-lab-secure"] } ) @app.exception_handler(500) async def internal_error_handler(request: Request, exc): return JSONResponse( status_code=500, content={ "status": "error", "message": "Internal server error", "error_type": type(exc).__name__ } ) if __name__ == "__main__": import uvicorn host = os.getenv("HOST", "0.0.0.0") port = int(os.getenv("PORT", 7860)) uvicorn.run("app.main:app", host=host, port=port, reload=False, log_level="info")