Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI, HTTPException, Request, File, UploadFile | |
| from fastapi.responses import JSONResponse | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from starlette.middleware.gzip import GZipMiddleware | |
| import time | |
| import os | |
| import gzip | |
| import base64 | |
| import json | |
| from .text_extractor import extract_text_from_pdf, extract_text_from_image | |
| from .image_extractor import extract_images_from_pdf | |
| from .lab_processor import RadioloLabProcessor | |
| from .models import EncryptedRequest | |
| from .crypto_utils import CryptoManager | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| app = FastAPI( | |
| title="Medical Lab Report Analysis API", | |
| description="Extract structured lab test data from medical reports using NER + Regex with end-to-end encryption", | |
| version="2.0.0", | |
| docs_url=None, | |
| redoc_url=None | |
| ) | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| app.add_middleware(GZipMiddleware, minimum_size=1000) | |
| lab_processor = None | |
| SECRET_KEY = os.getenv("ENCRYPTION_KEY") | |
| crypto_manager = CryptoManager(SECRET_KEY) | |
| async def startup_event(): | |
| global lab_processor | |
| print("\n" + "=" * 70) | |
| print("MEDICAL LAB REPORT ANALYSIS API - STARTING UP") | |
| print("=" * 70) | |
| model_path = os.getenv("LAB_NER_MODEL_PATH", "./models/radiolo_clinic_ner") | |
| print(f"\nLoading Lab NER model from: {model_path}") | |
| if not os.path.exists(model_path): | |
| print(f"✗ ERROR: Model not found at {model_path}") | |
| raise RuntimeError("Lab NER model not found") | |
| try: | |
| lab_processor = RadioloLabProcessor(model_path) | |
| print("✅ API READY!") | |
| print("=" * 70 + "\n") | |
| except Exception as e: | |
| print(f"✗ FATAL ERROR: Failed to load model: {e}") | |
| raise | |
| async def shutdown_event(): | |
| print("\nAPI SHUTTING DOWN\n") | |
| async def root(): | |
| return { | |
| "status": "online", | |
| "api": "Medical Lab Report Analysis API", | |
| "version": "2.0.0", | |
| "model_loaded": lab_processor is not None, | |
| "features": { | |
| "encryption": "NaCl (XSalsa20-Poly1305)", | |
| "compression": "gzip", | |
| "ocr_engine": "EasyOCR", | |
| "ner_model": "Custom Lab NER", | |
| "patient_friendly_summary": "AI-Generated Explanations", | |
| "supported_tests": 16 | |
| }, | |
| "endpoints": { | |
| "health": "/health", | |
| "analyze": "/analyze-lab-secure", | |
| "test": "/test-analyze" | |
| }, | |
| "supported_formats": ["pdf", "image"], | |
| "supported_lab_tests": [ | |
| "Complete Blood Count (WBC, RBC, Hemoglobin, Hematocrit, Platelets)", | |
| "Liver Function (ALT, AST, ALP, Bilirubin, Albumin)", | |
| "Thyroid Function (TSH, Free T4)", | |
| "Metabolic Panel (Glucose, Creatinine, Urea, Cholesterol)" | |
| ] | |
| } | |
| async def health_check(): | |
| return { | |
| "status": "healthy", | |
| "model_loaded": lab_processor is not None, | |
| "model_type": "Lab Report NER", | |
| "ocr_engine": "EasyOCR", | |
| "encryption": "NaCl (XSalsa20-Poly1305)", | |
| "compression": "gzip", | |
| "version": "2.0.0", | |
| "supported_tests": 16 | |
| } | |
| async def test_analyze(file: UploadFile = File(...)): | |
| start_time = time.time() | |
| try: | |
| if not lab_processor: | |
| raise HTTPException(status_code=503, detail="Lab processor not loaded") | |
| file_bytes = await file.read() | |
| filename = file.filename | |
| print(f"\n📄 Processing test file: {filename} ({len(file_bytes)} bytes)") | |
| if filename.lower().endswith('.pdf'): | |
| file_type = "pdf" | |
| extracted_text, ocr_used = extract_text_from_pdf(file_bytes) | |
| images = extract_images_from_pdf(file_bytes) | |
| elif filename.lower().endswith(('.jpg', '.jpeg', '.png', '.tiff', '.bmp')): | |
| file_type = "image" | |
| extracted_text = extract_text_from_image(file_bytes) | |
| ocr_used = True | |
| images = [] | |
| else: | |
| raise HTTPException(status_code=400, detail="Unsupported file type. Use PDF or image files.") | |
| if not extracted_text or len(extracted_text.strip()) < 10: | |
| raise HTTPException(status_code=400, detail="Could not extract sufficient text from file") | |
| print(f"✓ Extracted {len(extracted_text)} characters (OCR: {ocr_used})") | |
| print("🧠 Processing with NER + ClinicalDistilBERT...") | |
| lab_analysis = lab_processor.extract_and_format( | |
| extracted_text, | |
| report_id=f"test_{int(time.time())}", | |
| patient_id="TEST_PATIENT" | |
| ) | |
| processing_time = time.time() - start_time | |
| print(f"✅ Processing complete in {processing_time:.2f}s") | |
| print(f" Tests extracted: {lab_analysis.get('metadata', {}).get('tests_extracted', 0)}\n") | |
| response_data = { | |
| "status": "success", | |
| "processing_time": round(processing_time, 3), | |
| "filename": filename, | |
| "input_type": file_type, | |
| "ocr_used": ocr_used, | |
| "ocr_engine": "EasyOCR" if ocr_used else "PyMuPDF", | |
| "raw_text_preview": extracted_text[:500] + "..." if len(extracted_text) > 500 else extracted_text, | |
| "text_length": len(extracted_text), | |
| "images": images, | |
| **lab_analysis | |
| } | |
| return response_data | |
| except HTTPException as he: | |
| raise he | |
| except Exception as e: | |
| import traceback | |
| print(f"❌ Error: {e}") | |
| traceback.print_exc() | |
| raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}") | |
| async def analyze_lab_secure(request: EncryptedRequest): | |
| start_time = time.time() | |
| try: | |
| if not lab_processor: | |
| raise HTTPException(status_code=503, detail="Lab processor not loaded") | |
| decrypted_data = crypto_manager.decrypt(request.ciphertext, request.nonce) | |
| compressed_b64 = decrypted_data.decode('utf-8') | |
| compressed_bytes = base64.b64decode(compressed_b64) | |
| decompressed_data = gzip.decompress(compressed_bytes) | |
| payload = json.loads(decompressed_data.decode('utf-8')) | |
| filename = payload.get('filename', 'unknown') | |
| file_data_b64 = payload['file_data'] | |
| file_type = payload['file_type'] | |
| file_bytes = base64.b64decode(file_data_b64) | |
| if file_type == "pdf": | |
| extracted_text, ocr_used = extract_text_from_pdf(file_bytes) | |
| if not extracted_text or len(extracted_text.strip()) < 10: | |
| raise HTTPException(status_code=400, detail="Could not extract text from PDF") | |
| images = extract_images_from_pdf(file_bytes) | |
| elif file_type == "image": | |
| extracted_text = extract_text_from_image(file_bytes) | |
| ocr_used = True | |
| images = [] | |
| if not extracted_text or len(extracted_text.strip()) < 10: | |
| raise HTTPException(status_code=400, detail="Could not extract text from image") | |
| else: | |
| raise HTTPException(status_code=400, detail="Invalid file_type. Must be 'pdf' or 'image'") | |
| lab_analysis = lab_processor.extract_and_format( | |
| extracted_text, | |
| report_id=f"lab_{int(time.time())}", | |
| patient_id=payload.get('patient_id', 'unknown') | |
| ) | |
| processing_time = time.time() - start_time | |
| response_data = { | |
| "status": "success", | |
| "processing_time": round(processing_time, 3), | |
| "filename": filename, | |
| "input_type": file_type, | |
| "ocr_used": ocr_used, | |
| "ocr_engine": "EasyOCR" if ocr_used else "PyMuPDF", | |
| "raw_text": extracted_text[:500] + "..." if len(extracted_text) > 500 else extracted_text, | |
| "text_length": len(extracted_text), | |
| "images": images, | |
| **lab_analysis | |
| } | |
| encrypted_response = crypto_manager.encrypt_json(response_data) | |
| return { | |
| "status": "success", | |
| "ciphertext": encrypted_response['ciphertext'], | |
| "nonce": encrypted_response['nonce'] | |
| } | |
| except HTTPException as he: | |
| raise he | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}") | |
| async def not_found_handler(request: Request, exc): | |
| return JSONResponse( | |
| status_code=404, | |
| content={ | |
| "status": "error", | |
| "message": "Endpoint not found", | |
| "available_endpoints": ["/", "/health", "/test-analyze", "/analyze-lab-secure"] | |
| } | |
| ) | |
| async def internal_error_handler(request: Request, exc): | |
| return JSONResponse( | |
| status_code=500, | |
| content={ | |
| "status": "error", | |
| "message": "Internal server error", | |
| "error_type": type(exc).__name__ | |
| } | |
| ) | |
| if __name__ == "__main__": | |
| import uvicorn | |
| host = os.getenv("HOST", "0.0.0.0") | |
| port = int(os.getenv("PORT", 7860)) | |
| uvicorn.run("app.main:app", host=host, port=port, reload=False, log_level="info") | |