import logging from fastapi import APIRouter, HTTPException, UploadFile, File from models.request_models import ProviderNotesRequest from models.response_models import CodingResponse, FileUploadResponse from services.groq_service import groq_service from services.file_service import file_service import re router = APIRouter() logger = logging.getLogger(__name__) def is_likely_medical_text(text: str) -> bool: """ Pre-validate if text appears to be medical provider notes Returns True if likely medical, False otherwise """ text_lower = text.lower() # Medical keywords that should be present medical_keywords = [ 'patient', 'diagnosis', 'symptom', 'treatment', 'exam', 'history', 'complaint', 'pain', 'fever', 'cough', 'prescribed', 'medication', 'procedure', 'surgery', 'vital', 'blood', 'pressure', 'heart', 'lung', 'breath', 'chronic', 'acute', 'assessment', 'plan', 'condition', 'disease', 'injury', 'wound', 'fracture', 'infection' ] # Red flags for non-medical text casual_phrases = [ 'how are you', 'hello', 'hi there', 'good morning', 'good evening', 'test test', 'sample text', 'doing fine', 'nice to meet', 'what\'s up', 'how\'s it going' ] # Check for casual phrases (immediate rejection) for phrase in casual_phrases: if phrase in text_lower: return False # Check for medical keywords (need at least 1) medical_keyword_count = sum(1 for keyword in medical_keywords if keyword in text_lower) # Require at least 1 medical keyword and minimum length return medical_keyword_count >= 1 and len(text.split()) >= 10 @router.post("/coding", response_model=CodingResponse) async def analyze_provider_notes(request: ProviderNotesRequest): """ Analyze provider notes and extract ICD-10 and CPT codes This endpoint accepts provider notes as text input and returns: - ICD-10 diagnostic codes with explanations - CPT procedure codes with explanations - Overall encounter summary """ try: logger.info(f"Received coding request (notes length: {len(request.provider_notes)})") # Validate input if not request.provider_notes or len(request.provider_notes.strip()) < 10: raise HTTPException( status_code=400, detail="Provider notes must be at least 10 characters long" ) # PRE-VALIDATION: Check if text appears to be medical if not is_likely_medical_text(request.provider_notes): logger.warning("Input rejected - does not appear to be medical provider notes") return { "icd_codes": [], "cpt_codes": [], "overall_summary": "No medical coding applicable - input does not appear to contain clinical provider notes. Please provide legitimate medical documentation." } # Analyze with Groq result = groq_service.analyze_provider_notes(request.provider_notes) logger.info(f"Analysis complete: {len(result.get('icd_codes', []))} ICD codes, {len(result.get('cpt_codes', []))} CPT codes") return result except HTTPException: raise except ValueError as e: logger.error(f"Validation error: {str(e)}") raise HTTPException(status_code=400, detail=str(e)) except Exception as e: logger.error(f"Error processing request: {str(e)}", exc_info=True) raise HTTPException(status_code=500, detail=f"Error processing request: {str(e)}") @router.post("/upload-file", response_model=FileUploadResponse) async def upload_provider_notes_file(file: UploadFile = File(...)): """ Upload a provider notes file (.txt), remove PII, and analyze Returns: - File processing info (PII removal stats) - ICD-10 codes with explanations - CPT codes with explanations - Overall summary """ try: logger.info(f"Received file upload: {file.filename}") # Validate file type if not file.filename.endswith('.txt'): raise HTTPException( status_code=400, detail="Only .txt files are allowed" ) # Read file content content = await file.read() text = content.decode('utf-8') logger.info(f"File read successfully (length: {len(text)})") # Remove PII try: cleaned_text, pii_count = file_service.remove_pii(text) logger.info(f"PII removal complete: {pii_count} entities removed") except Exception as pii_error: logger.error(f"WARNING: PII removal failed: {str(pii_error)}") # Continue without PII removal if it fails cleaned_text = text pii_count = 0 # Analyze with Groq result = groq_service.analyze_provider_notes(cleaned_text) # Combine results response = { "success": True, "filename": file.filename, "extracted_text_length": len(text), "pii_removed": pii_count > 0, "pii_count": pii_count, "icd_codes": result.get("icd_codes", []), "cpt_codes": result.get("cpt_codes", []), "overall_summary": result.get("overall_summary", "") } logger.info(f"File processing complete") return response except HTTPException: raise except UnicodeDecodeError: logger.error("File encoding error") raise HTTPException(status_code=400, detail="File must be UTF-8 encoded text") except Exception as e: logger.error(f"Error processing uploaded file: {str(e)}", exc_info=True) raise HTTPException(status_code=500, detail=f"Error processing uploaded file: {str(e)}")