Distopia22's picture
Fixing model halucination
1915c66
import logging
from fastapi import APIRouter, HTTPException, UploadFile, File
from models.request_models import ProviderNotesRequest
from models.response_models import CodingResponse, FileUploadResponse
from services.groq_service import groq_service
from services.file_service import file_service
import re
router = APIRouter()
logger = logging.getLogger(__name__)
def is_likely_medical_text(text: str) -> bool:
"""
Pre-validate if text appears to be medical provider notes
Returns True if likely medical, False otherwise
"""
text_lower = text.lower()
# Medical keywords that should be present
medical_keywords = [
'patient', 'diagnosis', 'symptom', 'treatment', 'exam', 'history',
'complaint', 'pain', 'fever', 'cough', 'prescribed', 'medication',
'procedure', 'surgery', 'vital', 'blood', 'pressure', 'heart',
'lung', 'breath', 'chronic', 'acute', 'assessment', 'plan',
'condition', 'disease', 'injury', 'wound', 'fracture', 'infection'
]
# Red flags for non-medical text
casual_phrases = [
'how are you', 'hello', 'hi there', 'good morning', 'good evening',
'test test', 'sample text', 'doing fine', 'nice to meet',
'what\'s up', 'how\'s it going'
]
# Check for casual phrases (immediate rejection)
for phrase in casual_phrases:
if phrase in text_lower:
return False
# Check for medical keywords (need at least 1)
medical_keyword_count = sum(1 for keyword in medical_keywords if keyword in text_lower)
# Require at least 1 medical keyword and minimum length
return medical_keyword_count >= 1 and len(text.split()) >= 10
@router.post("/coding", response_model=CodingResponse)
async def analyze_provider_notes(request: ProviderNotesRequest):
"""
Analyze provider notes and extract ICD-10 and CPT codes
This endpoint accepts provider notes as text input and returns:
- ICD-10 diagnostic codes with explanations
- CPT procedure codes with explanations
- Overall encounter summary
"""
try:
logger.info(f"Received coding request (notes length: {len(request.provider_notes)})")
# Validate input
if not request.provider_notes or len(request.provider_notes.strip()) < 10:
raise HTTPException(
status_code=400,
detail="Provider notes must be at least 10 characters long"
)
# PRE-VALIDATION: Check if text appears to be medical
if not is_likely_medical_text(request.provider_notes):
logger.warning("Input rejected - does not appear to be medical provider notes")
return {
"icd_codes": [],
"cpt_codes": [],
"overall_summary": "No medical coding applicable - input does not appear to contain clinical provider notes. Please provide legitimate medical documentation."
}
# Analyze with Groq
result = groq_service.analyze_provider_notes(request.provider_notes)
logger.info(f"Analysis complete: {len(result.get('icd_codes', []))} ICD codes, {len(result.get('cpt_codes', []))} CPT codes")
return result
except HTTPException:
raise
except ValueError as e:
logger.error(f"Validation error: {str(e)}")
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
logger.error(f"Error processing request: {str(e)}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Error processing request: {str(e)}")
@router.post("/upload-file", response_model=FileUploadResponse)
async def upload_provider_notes_file(file: UploadFile = File(...)):
"""
Upload a provider notes file (.txt), remove PII, and analyze
Returns:
- File processing info (PII removal stats)
- ICD-10 codes with explanations
- CPT codes with explanations
- Overall summary
"""
try:
logger.info(f"Received file upload: {file.filename}")
# Validate file type
if not file.filename.endswith('.txt'):
raise HTTPException(
status_code=400,
detail="Only .txt files are allowed"
)
# Read file content
content = await file.read()
text = content.decode('utf-8')
logger.info(f"File read successfully (length: {len(text)})")
# Remove PII
try:
cleaned_text, pii_count = file_service.remove_pii(text)
logger.info(f"PII removal complete: {pii_count} entities removed")
except Exception as pii_error:
logger.error(f"WARNING: PII removal failed: {str(pii_error)}")
# Continue without PII removal if it fails
cleaned_text = text
pii_count = 0
# Analyze with Groq
result = groq_service.analyze_provider_notes(cleaned_text)
# Combine results
response = {
"success": True,
"filename": file.filename,
"extracted_text_length": len(text),
"pii_removed": pii_count > 0,
"pii_count": pii_count,
"icd_codes": result.get("icd_codes", []),
"cpt_codes": result.get("cpt_codes", []),
"overall_summary": result.get("overall_summary", "")
}
logger.info(f"File processing complete")
return response
except HTTPException:
raise
except UnicodeDecodeError:
logger.error("File encoding error")
raise HTTPException(status_code=400, detail="File must be UTF-8 encoded text")
except Exception as e:
logger.error(f"Error processing uploaded file: {str(e)}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Error processing uploaded file: {str(e)}")