Commit
·
1915c66
1
Parent(s):
764e30e
Fixing model halucination
Browse files- src/api/routes.py +56 -11
- src/config/settings.py +3 -3
- src/main.py +6 -6
- src/services/file_service.py +4 -7
- src/services/groq_service.py +29 -37
- src/services/regex_pii_remover.py +2 -2
- src/utils/prompts.py +50 -12
src/api/routes.py
CHANGED
|
@@ -4,11 +4,47 @@ from models.request_models import ProviderNotesRequest
|
|
| 4 |
from models.response_models import CodingResponse, FileUploadResponse
|
| 5 |
from services.groq_service import groq_service
|
| 6 |
from services.file_service import file_service
|
|
|
|
| 7 |
|
| 8 |
router = APIRouter()
|
| 9 |
logger = logging.getLogger(__name__)
|
| 10 |
|
| 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
@router.post("/coding", response_model=CodingResponse)
|
| 13 |
async def analyze_provider_notes(request: ProviderNotesRequest):
|
| 14 |
"""
|
|
@@ -20,7 +56,7 @@ async def analyze_provider_notes(request: ProviderNotesRequest):
|
|
| 20 |
- Overall encounter summary
|
| 21 |
"""
|
| 22 |
try:
|
| 23 |
-
logger.info(f"
|
| 24 |
|
| 25 |
# Validate input
|
| 26 |
if not request.provider_notes or len(request.provider_notes.strip()) < 10:
|
|
@@ -29,20 +65,29 @@ async def analyze_provider_notes(request: ProviderNotesRequest):
|
|
| 29 |
detail="Provider notes must be at least 10 characters long"
|
| 30 |
)
|
| 31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
# Analyze with Groq
|
| 33 |
result = groq_service.analyze_provider_notes(request.provider_notes)
|
| 34 |
|
| 35 |
-
logger.info(f"
|
| 36 |
|
| 37 |
return result
|
| 38 |
|
| 39 |
except HTTPException:
|
| 40 |
raise
|
| 41 |
except ValueError as e:
|
| 42 |
-
logger.error(f"
|
| 43 |
raise HTTPException(status_code=400, detail=str(e))
|
| 44 |
except Exception as e:
|
| 45 |
-
logger.error(f"
|
| 46 |
raise HTTPException(status_code=500, detail=f"Error processing request: {str(e)}")
|
| 47 |
|
| 48 |
|
|
@@ -58,7 +103,7 @@ async def upload_provider_notes_file(file: UploadFile = File(...)):
|
|
| 58 |
- Overall summary
|
| 59 |
"""
|
| 60 |
try:
|
| 61 |
-
logger.info(f"
|
| 62 |
|
| 63 |
# Validate file type
|
| 64 |
if not file.filename.endswith('.txt'):
|
|
@@ -71,14 +116,14 @@ async def upload_provider_notes_file(file: UploadFile = File(...)):
|
|
| 71 |
content = await file.read()
|
| 72 |
text = content.decode('utf-8')
|
| 73 |
|
| 74 |
-
logger.info(f"
|
| 75 |
|
| 76 |
# Remove PII
|
| 77 |
try:
|
| 78 |
cleaned_text, pii_count = file_service.remove_pii(text)
|
| 79 |
-
logger.info(f"
|
| 80 |
except Exception as pii_error:
|
| 81 |
-
logger.error(f"
|
| 82 |
# Continue without PII removal if it fails
|
| 83 |
cleaned_text = text
|
| 84 |
pii_count = 0
|
|
@@ -98,15 +143,15 @@ async def upload_provider_notes_file(file: UploadFile = File(...)):
|
|
| 98 |
"overall_summary": result.get("overall_summary", "")
|
| 99 |
}
|
| 100 |
|
| 101 |
-
logger.info(f"
|
| 102 |
|
| 103 |
return response
|
| 104 |
|
| 105 |
except HTTPException:
|
| 106 |
raise
|
| 107 |
except UnicodeDecodeError:
|
| 108 |
-
logger.error("
|
| 109 |
raise HTTPException(status_code=400, detail="File must be UTF-8 encoded text")
|
| 110 |
except Exception as e:
|
| 111 |
-
logger.error(f"
|
| 112 |
raise HTTPException(status_code=500, detail=f"Error processing uploaded file: {str(e)}")
|
|
|
|
| 4 |
from models.response_models import CodingResponse, FileUploadResponse
|
| 5 |
from services.groq_service import groq_service
|
| 6 |
from services.file_service import file_service
|
| 7 |
+
import re
|
| 8 |
|
| 9 |
router = APIRouter()
|
| 10 |
logger = logging.getLogger(__name__)
|
| 11 |
|
| 12 |
|
| 13 |
+
def is_likely_medical_text(text: str) -> bool:
|
| 14 |
+
"""
|
| 15 |
+
Pre-validate if text appears to be medical provider notes
|
| 16 |
+
Returns True if likely medical, False otherwise
|
| 17 |
+
"""
|
| 18 |
+
text_lower = text.lower()
|
| 19 |
+
|
| 20 |
+
# Medical keywords that should be present
|
| 21 |
+
medical_keywords = [
|
| 22 |
+
'patient', 'diagnosis', 'symptom', 'treatment', 'exam', 'history',
|
| 23 |
+
'complaint', 'pain', 'fever', 'cough', 'prescribed', 'medication',
|
| 24 |
+
'procedure', 'surgery', 'vital', 'blood', 'pressure', 'heart',
|
| 25 |
+
'lung', 'breath', 'chronic', 'acute', 'assessment', 'plan',
|
| 26 |
+
'condition', 'disease', 'injury', 'wound', 'fracture', 'infection'
|
| 27 |
+
]
|
| 28 |
+
|
| 29 |
+
# Red flags for non-medical text
|
| 30 |
+
casual_phrases = [
|
| 31 |
+
'how are you', 'hello', 'hi there', 'good morning', 'good evening',
|
| 32 |
+
'test test', 'sample text', 'doing fine', 'nice to meet',
|
| 33 |
+
'what\'s up', 'how\'s it going'
|
| 34 |
+
]
|
| 35 |
+
|
| 36 |
+
# Check for casual phrases (immediate rejection)
|
| 37 |
+
for phrase in casual_phrases:
|
| 38 |
+
if phrase in text_lower:
|
| 39 |
+
return False
|
| 40 |
+
|
| 41 |
+
# Check for medical keywords (need at least 1)
|
| 42 |
+
medical_keyword_count = sum(1 for keyword in medical_keywords if keyword in text_lower)
|
| 43 |
+
|
| 44 |
+
# Require at least 1 medical keyword and minimum length
|
| 45 |
+
return medical_keyword_count >= 1 and len(text.split()) >= 10
|
| 46 |
+
|
| 47 |
+
|
| 48 |
@router.post("/coding", response_model=CodingResponse)
|
| 49 |
async def analyze_provider_notes(request: ProviderNotesRequest):
|
| 50 |
"""
|
|
|
|
| 56 |
- Overall encounter summary
|
| 57 |
"""
|
| 58 |
try:
|
| 59 |
+
logger.info(f"Received coding request (notes length: {len(request.provider_notes)})")
|
| 60 |
|
| 61 |
# Validate input
|
| 62 |
if not request.provider_notes or len(request.provider_notes.strip()) < 10:
|
|
|
|
| 65 |
detail="Provider notes must be at least 10 characters long"
|
| 66 |
)
|
| 67 |
|
| 68 |
+
# PRE-VALIDATION: Check if text appears to be medical
|
| 69 |
+
if not is_likely_medical_text(request.provider_notes):
|
| 70 |
+
logger.warning("Input rejected - does not appear to be medical provider notes")
|
| 71 |
+
return {
|
| 72 |
+
"icd_codes": [],
|
| 73 |
+
"cpt_codes": [],
|
| 74 |
+
"overall_summary": "No medical coding applicable - input does not appear to contain clinical provider notes. Please provide legitimate medical documentation."
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
# Analyze with Groq
|
| 78 |
result = groq_service.analyze_provider_notes(request.provider_notes)
|
| 79 |
|
| 80 |
+
logger.info(f"Analysis complete: {len(result.get('icd_codes', []))} ICD codes, {len(result.get('cpt_codes', []))} CPT codes")
|
| 81 |
|
| 82 |
return result
|
| 83 |
|
| 84 |
except HTTPException:
|
| 85 |
raise
|
| 86 |
except ValueError as e:
|
| 87 |
+
logger.error(f"Validation error: {str(e)}")
|
| 88 |
raise HTTPException(status_code=400, detail=str(e))
|
| 89 |
except Exception as e:
|
| 90 |
+
logger.error(f"Error processing request: {str(e)}", exc_info=True)
|
| 91 |
raise HTTPException(status_code=500, detail=f"Error processing request: {str(e)}")
|
| 92 |
|
| 93 |
|
|
|
|
| 103 |
- Overall summary
|
| 104 |
"""
|
| 105 |
try:
|
| 106 |
+
logger.info(f"Received file upload: {file.filename}")
|
| 107 |
|
| 108 |
# Validate file type
|
| 109 |
if not file.filename.endswith('.txt'):
|
|
|
|
| 116 |
content = await file.read()
|
| 117 |
text = content.decode('utf-8')
|
| 118 |
|
| 119 |
+
logger.info(f"File read successfully (length: {len(text)})")
|
| 120 |
|
| 121 |
# Remove PII
|
| 122 |
try:
|
| 123 |
cleaned_text, pii_count = file_service.remove_pii(text)
|
| 124 |
+
logger.info(f"PII removal complete: {pii_count} entities removed")
|
| 125 |
except Exception as pii_error:
|
| 126 |
+
logger.error(f"WARNING: PII removal failed: {str(pii_error)}")
|
| 127 |
# Continue without PII removal if it fails
|
| 128 |
cleaned_text = text
|
| 129 |
pii_count = 0
|
|
|
|
| 143 |
"overall_summary": result.get("overall_summary", "")
|
| 144 |
}
|
| 145 |
|
| 146 |
+
logger.info(f"File processing complete")
|
| 147 |
|
| 148 |
return response
|
| 149 |
|
| 150 |
except HTTPException:
|
| 151 |
raise
|
| 152 |
except UnicodeDecodeError:
|
| 153 |
+
logger.error("File encoding error")
|
| 154 |
raise HTTPException(status_code=400, detail="File must be UTF-8 encoded text")
|
| 155 |
except Exception as e:
|
| 156 |
+
logger.error(f"Error processing uploaded file: {str(e)}", exc_info=True)
|
| 157 |
raise HTTPException(status_code=500, detail=f"Error processing uploaded file: {str(e)}")
|
src/config/settings.py
CHANGED
|
@@ -19,8 +19,8 @@ settings = Settings()
|
|
| 19 |
|
| 20 |
# Validate API key on startup
|
| 21 |
if not settings.groq_api_key:
|
| 22 |
-
print("
|
| 23 |
print("Please set GROQ_API_KEY in Hugging Face Space secrets.")
|
| 24 |
else:
|
| 25 |
-
print(f"
|
| 26 |
-
print(f"
|
|
|
|
| 19 |
|
| 20 |
# Validate API key on startup
|
| 21 |
if not settings.groq_api_key:
|
| 22 |
+
print("WARNING: GROQ_API_KEY is not set! API will not function properly.")
|
| 23 |
print("Please set GROQ_API_KEY in Hugging Face Space secrets.")
|
| 24 |
else:
|
| 25 |
+
print(f"Groq API Key loaded (length: {len(settings.groq_api_key)})")
|
| 26 |
+
print(f"Using model: {settings.groq_model}")
|
src/main.py
CHANGED
|
@@ -21,22 +21,22 @@ async def lifespan(app: FastAPI):
|
|
| 21 |
"""
|
| 22 |
# Startup
|
| 23 |
logger.info("=" * 50)
|
| 24 |
-
logger.info(f"
|
| 25 |
logger.info("=" * 50)
|
| 26 |
|
| 27 |
if settings.groq_api_key:
|
| 28 |
-
logger.info(f"
|
| 29 |
-
logger.info(f"
|
| 30 |
else:
|
| 31 |
-
logger.error("
|
| 32 |
-
logger.error("
|
| 33 |
|
| 34 |
logger.info("=" * 50)
|
| 35 |
|
| 36 |
yield
|
| 37 |
|
| 38 |
# Shutdown
|
| 39 |
-
logger.info("
|
| 40 |
|
| 41 |
|
| 42 |
# Create FastAPI app with lifespan
|
|
|
|
| 21 |
"""
|
| 22 |
# Startup
|
| 23 |
logger.info("=" * 50)
|
| 24 |
+
logger.info(f"Starting {settings.api_title} v{settings.api_version}")
|
| 25 |
logger.info("=" * 50)
|
| 26 |
|
| 27 |
if settings.groq_api_key:
|
| 28 |
+
logger.info(f"Groq API Key: Configured (length: {len(settings.groq_api_key)})")
|
| 29 |
+
logger.info(f"Groq Model: {settings.groq_model}")
|
| 30 |
else:
|
| 31 |
+
logger.error("GROQ_API_KEY is NOT set!")
|
| 32 |
+
logger.error("WARNING: API will NOT function without valid API key")
|
| 33 |
|
| 34 |
logger.info("=" * 50)
|
| 35 |
|
| 36 |
yield
|
| 37 |
|
| 38 |
# Shutdown
|
| 39 |
+
logger.info("Shutting down API...")
|
| 40 |
|
| 41 |
|
| 42 |
# Create FastAPI app with lifespan
|
src/services/file_service.py
CHANGED
|
@@ -1,6 +1,3 @@
|
|
| 1 |
-
from fastapi import UploadFile, HTTPException
|
| 2 |
-
import os
|
| 3 |
-
from typing import Dict
|
| 4 |
import logging
|
| 5 |
from services.regex_pii_remover import RegexPIIRemover
|
| 6 |
|
|
@@ -11,7 +8,7 @@ class FileService:
|
|
| 11 |
def __init__(self):
|
| 12 |
"""Initialize file service with PII remover"""
|
| 13 |
self.pii_remover = RegexPIIRemover()
|
| 14 |
-
logger.info("
|
| 15 |
|
| 16 |
def remove_pii(self, text: str) -> tuple[str, int]:
|
| 17 |
"""
|
|
@@ -24,16 +21,16 @@ class FileService:
|
|
| 24 |
tuple: (cleaned_text, pii_count)
|
| 25 |
"""
|
| 26 |
try:
|
| 27 |
-
logger.info(f"
|
| 28 |
|
| 29 |
cleaned_text, pii_count = self.pii_remover.remove_pii(text)
|
| 30 |
|
| 31 |
-
logger.info(f"
|
| 32 |
|
| 33 |
return cleaned_text, pii_count
|
| 34 |
|
| 35 |
except Exception as e:
|
| 36 |
-
logger.error(f"
|
| 37 |
# Return original text if PII removal fails
|
| 38 |
return text, 0
|
| 39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import logging
|
| 2 |
from services.regex_pii_remover import RegexPIIRemover
|
| 3 |
|
|
|
|
| 8 |
def __init__(self):
|
| 9 |
"""Initialize file service with PII remover"""
|
| 10 |
self.pii_remover = RegexPIIRemover()
|
| 11 |
+
logger.info("FileService initialized")
|
| 12 |
|
| 13 |
def remove_pii(self, text: str) -> tuple[str, int]:
|
| 14 |
"""
|
|
|
|
| 21 |
tuple: (cleaned_text, pii_count)
|
| 22 |
"""
|
| 23 |
try:
|
| 24 |
+
logger.info(f"Starting PII removal (text length: {len(text)})")
|
| 25 |
|
| 26 |
cleaned_text, pii_count = self.pii_remover.remove_pii(text)
|
| 27 |
|
| 28 |
+
logger.info(f"PII removal complete: {pii_count} entities removed")
|
| 29 |
|
| 30 |
return cleaned_text, pii_count
|
| 31 |
|
| 32 |
except Exception as e:
|
| 33 |
+
logger.error(f"Error during PII removal: {str(e)}")
|
| 34 |
# Return original text if PII removal fails
|
| 35 |
return text, 0
|
| 36 |
|
src/services/groq_service.py
CHANGED
|
@@ -8,22 +8,27 @@ logger = logging.getLogger(__name__)
|
|
| 8 |
|
| 9 |
class GroqService:
|
| 10 |
def __init__(self):
|
| 11 |
-
"""Initialize Groq client
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
-
|
| 17 |
-
# Simple initialization without extra parameters
|
| 18 |
-
self.client = Groq(
|
| 19 |
-
api_key=settings.groq_api_key
|
| 20 |
-
)
|
| 21 |
-
self.model = settings.groq_model
|
| 22 |
-
logger.info(f"✅ Groq client initialized successfully")
|
| 23 |
-
logger.info(f"✅ Using model: {self.model}")
|
| 24 |
-
except Exception as e:
|
| 25 |
-
logger.error(f"❌ Failed to initialize Groq client: {str(e)}")
|
| 26 |
-
raise
|
| 27 |
|
| 28 |
def analyze_provider_notes(self, provider_notes: str) -> dict:
|
| 29 |
"""
|
|
@@ -31,19 +36,18 @@ class GroqService:
|
|
| 31 |
|
| 32 |
Args:
|
| 33 |
provider_notes: Clinical provider notes text
|
| 34 |
-
response_format: Desired response format (default: "json")
|
| 35 |
|
| 36 |
Returns:
|
| 37 |
dict: Parsed coding response with ICD-10, CPT codes and summary
|
| 38 |
"""
|
| 39 |
try:
|
| 40 |
-
logger.info(f"
|
| 41 |
|
| 42 |
# Get the prompt
|
| 43 |
prompt = get_coding_prompt(provider_notes)
|
| 44 |
|
| 45 |
-
# Call Groq API
|
| 46 |
-
logger.info(f"
|
| 47 |
response = self.client.chat.completions.create(
|
| 48 |
model=self.model,
|
| 49 |
messages=[
|
|
@@ -63,8 +67,7 @@ class GroqService:
|
|
| 63 |
|
| 64 |
# Extract response
|
| 65 |
raw_response = response.choices[0].message.content
|
| 66 |
-
logger.info(f"
|
| 67 |
-
logger.debug(f"Raw response: {raw_response[:500]}...")
|
| 68 |
|
| 69 |
# Parse JSON
|
| 70 |
parsed_response = json.loads(raw_response)
|
|
@@ -72,31 +75,20 @@ class GroqService:
|
|
| 72 |
# Validate and structure response
|
| 73 |
result = self._structure_response(parsed_response)
|
| 74 |
|
| 75 |
-
logger.info(f"
|
| 76 |
|
| 77 |
return result
|
| 78 |
|
| 79 |
except json.JSONDecodeError as e:
|
| 80 |
-
logger.error(f"
|
| 81 |
-
logger.error(f"Raw response: {raw_response}")
|
| 82 |
raise ValueError(f"Failed to parse Groq response as JSON: {str(e)}")
|
| 83 |
|
| 84 |
except Exception as e:
|
| 85 |
-
logger.error(f"
|
| 86 |
raise
|
| 87 |
|
| 88 |
def _structure_response(self, parsed_response: dict) -> dict:
|
| 89 |
-
"""
|
| 90 |
-
Structure and validate the response from Groq
|
| 91 |
-
|
| 92 |
-
Args:
|
| 93 |
-
parsed_response: Raw parsed JSON from Groq
|
| 94 |
-
response_format: Desired response format (default: "json")
|
| 95 |
-
|
| 96 |
-
Returns:
|
| 97 |
-
dict: Properly structured response
|
| 98 |
-
"""
|
| 99 |
-
# Handle different possible response formats
|
| 100 |
icd_codes = []
|
| 101 |
cpt_codes = []
|
| 102 |
overall_summary = parsed_response.get("overall_summary", "")
|
|
@@ -129,5 +121,5 @@ class GroqService:
|
|
| 129 |
"overall_summary": overall_summary
|
| 130 |
}
|
| 131 |
|
| 132 |
-
# Global instance
|
| 133 |
groq_service = GroqService()
|
|
|
|
| 8 |
|
| 9 |
class GroqService:
|
| 10 |
def __init__(self):
|
| 11 |
+
"""Initialize Groq service (client created on first use)"""
|
| 12 |
+
self._client = None
|
| 13 |
+
self.model = settings.groq_model
|
| 14 |
+
|
| 15 |
+
@property
|
| 16 |
+
def client(self):
|
| 17 |
+
"""Lazy initialization of Groq client"""
|
| 18 |
+
if self._client is None:
|
| 19 |
+
if not settings.groq_api_key:
|
| 20 |
+
logger.error("GROQ_API_KEY is not set!")
|
| 21 |
+
raise ValueError("GROQ_API_KEY environment variable is required")
|
| 22 |
+
|
| 23 |
+
try:
|
| 24 |
+
logger.info("Initializing Groq client...")
|
| 25 |
+
self._client = Groq(api_key=settings.groq_api_key)
|
| 26 |
+
logger.info(f"Groq client initialized successfully")
|
| 27 |
+
except Exception as e:
|
| 28 |
+
logger.error(f"Failed to initialize Groq client: {str(e)}")
|
| 29 |
+
raise
|
| 30 |
|
| 31 |
+
return self._client
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
def analyze_provider_notes(self, provider_notes: str) -> dict:
|
| 34 |
"""
|
|
|
|
| 36 |
|
| 37 |
Args:
|
| 38 |
provider_notes: Clinical provider notes text
|
|
|
|
| 39 |
|
| 40 |
Returns:
|
| 41 |
dict: Parsed coding response with ICD-10, CPT codes and summary
|
| 42 |
"""
|
| 43 |
try:
|
| 44 |
+
logger.info(f"Analyzing provider notes (length: {len(provider_notes)})")
|
| 45 |
|
| 46 |
# Get the prompt
|
| 47 |
prompt = get_coding_prompt(provider_notes)
|
| 48 |
|
| 49 |
+
# Call Groq API (client initialized here if needed)
|
| 50 |
+
logger.info(f"Calling Groq API with model: {self.model}")
|
| 51 |
response = self.client.chat.completions.create(
|
| 52 |
model=self.model,
|
| 53 |
messages=[
|
|
|
|
| 67 |
|
| 68 |
# Extract response
|
| 69 |
raw_response = response.choices[0].message.content
|
| 70 |
+
logger.info(f"Received response from Groq (length: {len(raw_response)})")
|
|
|
|
| 71 |
|
| 72 |
# Parse JSON
|
| 73 |
parsed_response = json.loads(raw_response)
|
|
|
|
| 75 |
# Validate and structure response
|
| 76 |
result = self._structure_response(parsed_response)
|
| 77 |
|
| 78 |
+
logger.info(f"Successfully analyzed: {len(result.get('icd_codes', []))} ICD codes, {len(result.get('cpt_codes', []))} CPT codes")
|
| 79 |
|
| 80 |
return result
|
| 81 |
|
| 82 |
except json.JSONDecodeError as e:
|
| 83 |
+
logger.error(f"JSON parsing error: {str(e)}")
|
|
|
|
| 84 |
raise ValueError(f"Failed to parse Groq response as JSON: {str(e)}")
|
| 85 |
|
| 86 |
except Exception as e:
|
| 87 |
+
logger.error(f"Error analyzing provider notes: {str(e)}")
|
| 88 |
raise
|
| 89 |
|
| 90 |
def _structure_response(self, parsed_response: dict) -> dict:
|
| 91 |
+
"""Structure and validate the response from Groq"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
icd_codes = []
|
| 93 |
cpt_codes = []
|
| 94 |
overall_summary = parsed_response.get("overall_summary", "")
|
|
|
|
| 121 |
"overall_summary": overall_summary
|
| 122 |
}
|
| 123 |
|
| 124 |
+
# Global instance (client initialized on first use)
|
| 125 |
groq_service = GroqService()
|
src/services/regex_pii_remover.py
CHANGED
|
@@ -35,7 +35,7 @@ class RegexPIIRemover:
|
|
| 35 |
'dob': re.compile(r'(DOB|Date of Birth)[:\s]+\d{1,2}[/-]\d{1,2}[/-]\d{2,4}', re.IGNORECASE),
|
| 36 |
}
|
| 37 |
|
| 38 |
-
logger.info(f"
|
| 39 |
|
| 40 |
def remove_pii(self, text: str) -> tuple[str, int]:
|
| 41 |
"""
|
|
@@ -66,6 +66,6 @@ class RegexPIIRemover:
|
|
| 66 |
else:
|
| 67 |
cleaned_text = pattern.sub('[REDACTED]', cleaned_text)
|
| 68 |
|
| 69 |
-
logger.info(f"
|
| 70 |
|
| 71 |
return cleaned_text, total_removed
|
|
|
|
| 35 |
'dob': re.compile(r'(DOB|Date of Birth)[:\s]+\d{1,2}[/-]\d{1,2}[/-]\d{2,4}', re.IGNORECASE),
|
| 36 |
}
|
| 37 |
|
| 38 |
+
logger.info(f"RegexPIIRemover initialized with {len(self.patterns)} patterns")
|
| 39 |
|
| 40 |
def remove_pii(self, text: str) -> tuple[str, int]:
|
| 41 |
"""
|
|
|
|
| 66 |
else:
|
| 67 |
cleaned_text = pattern.sub('[REDACTED]', cleaned_text)
|
| 68 |
|
| 69 |
+
logger.info(f"Removed {total_removed} PII entities")
|
| 70 |
|
| 71 |
return cleaned_text, total_removed
|
src/utils/prompts.py
CHANGED
|
@@ -2,11 +2,20 @@ SYSTEM_PROMPT = """You are a specialized medical coding assistant AI that analyz
|
|
| 2 |
|
| 3 |
CRITICAL INSTRUCTIONS:
|
| 4 |
1. You MUST respond ONLY in valid JSON format as specified below
|
| 5 |
-
2.
|
| 6 |
-
3. If
|
| 7 |
-
4.
|
| 8 |
-
5.
|
| 9 |
-
6.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
REQUIRED JSON FORMAT:
|
| 12 |
{
|
|
@@ -24,27 +33,53 @@ REQUIRED JSON FORMAT:
|
|
| 24 |
"explanation": "Detailed explanation of why this code was selected based on the provider notes"
|
| 25 |
}
|
| 26 |
],
|
| 27 |
-
"overall_summary": "Brief summary of the coding decisions"
|
| 28 |
}
|
| 29 |
|
| 30 |
CODING PRINCIPLES:
|
|
|
|
|
|
|
| 31 |
- Only assign codes that are clearly supported by documentation in the provider notes
|
| 32 |
- Be conservative - if unsure, omit the code rather than guess
|
| 33 |
- Prioritize accuracy over quantity
|
| 34 |
- Each explanation must reference specific details from the provider notes
|
| 35 |
-
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
-
Remember: Return ONLY the JSON object, nothing else."""
|
| 38 |
|
| 39 |
def create_user_prompt(provider_notes: str) -> str:
|
| 40 |
"""Create the user prompt with provider notes"""
|
| 41 |
-
return f"""Analyze the following
|
| 42 |
|
| 43 |
-
|
| 44 |
{provider_notes}
|
| 45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
Respond ONLY with the JSON object following the exact format specified in the system prompt."""
|
| 47 |
|
|
|
|
| 48 |
def get_coding_prompt(provider_notes: str) -> str:
|
| 49 |
"""
|
| 50 |
Generate prompt for medical coding analysis
|
|
@@ -65,6 +100,8 @@ def get_coding_prompt(provider_notes: str) -> str:
|
|
| 65 |
{provider_notes}
|
| 66 |
|
| 67 |
**Instructions:**
|
|
|
|
|
|
|
| 68 |
- Provide accurate ICD-10 and CPT codes based on current coding guidelines
|
| 69 |
- Include detailed explanations for each code
|
| 70 |
- Provide an overall summary of the patient encounter
|
|
@@ -90,5 +127,6 @@ def get_coding_prompt(provider_notes: str) -> str:
|
|
| 90 |
|
| 91 |
**Important:**
|
| 92 |
- Return ONLY valid JSON, no markdown, no code blocks
|
| 93 |
-
-
|
| 94 |
-
- Be specific and accurate with coding
|
|
|
|
|
|
| 2 |
|
| 3 |
CRITICAL INSTRUCTIONS:
|
| 4 |
1. You MUST respond ONLY in valid JSON format as specified below
|
| 5 |
+
2. ONLY process text that appears to be legitimate clinical provider notes
|
| 6 |
+
3. If the input is NOT medical provider notes (casual conversation, greetings, test text, non-medical content), respond with empty arrays
|
| 7 |
+
4. Do NOT hallucinate or make up codes - only use codes you are 100% confident about
|
| 8 |
+
5. If you are uncertain about any code, do NOT include it in the response
|
| 9 |
+
6. If you cannot find any relevant ICD or CPT codes, return empty arrays for those sections
|
| 10 |
+
7. Always provide clear, evidence-based explanations for each code you assign
|
| 11 |
+
8. Your response must be parseable JSON - do not add any text before or after the JSON object
|
| 12 |
+
|
| 13 |
+
INPUT VALIDATION - REJECT IF:
|
| 14 |
+
- The text is casual conversation (e.g., "how are you", "hello", "testing")
|
| 15 |
+
- The text lacks medical terminology or clinical context
|
| 16 |
+
- The text does not describe a patient encounter, diagnosis, or medical procedure
|
| 17 |
+
- The text is less than 20 words and contains no medical information
|
| 18 |
+
- The text appears to be a test or non-medical query
|
| 19 |
|
| 20 |
REQUIRED JSON FORMAT:
|
| 21 |
{
|
|
|
|
| 33 |
"explanation": "Detailed explanation of why this code was selected based on the provider notes"
|
| 34 |
}
|
| 35 |
],
|
| 36 |
+
"overall_summary": "Brief summary of the coding decisions, or 'No medical coding applicable - input does not contain clinical provider notes' if input is not medical"
|
| 37 |
}
|
| 38 |
|
| 39 |
CODING PRINCIPLES:
|
| 40 |
+
- FIRST verify the input is legitimate medical provider notes
|
| 41 |
+
- If NOT medical notes, return empty arrays with summary explaining why
|
| 42 |
- Only assign codes that are clearly supported by documentation in the provider notes
|
| 43 |
- Be conservative - if unsure, omit the code rather than guess
|
| 44 |
- Prioritize accuracy over quantity
|
| 45 |
- Each explanation must reference specific details from the provider notes
|
| 46 |
+
- Medical provider notes should include: patient symptoms, diagnoses, treatments, procedures, or clinical observations
|
| 47 |
+
|
| 48 |
+
EXAMPLES OF VALID MEDICAL INPUT:
|
| 49 |
+
✅ "Patient presents with acute bronchitis, productive cough for 5 days, prescribed azithromycin"
|
| 50 |
+
✅ "45 y/o female with Type 2 diabetes, HbA1c 8.2%, medication adjustment discussed"
|
| 51 |
+
✅ "Laceration repair of right forearm, 3cm wound, simple closure"
|
| 52 |
+
|
| 53 |
+
EXAMPLES OF INVALID INPUT (return empty arrays):
|
| 54 |
+
❌ "how are you and i doing fine"
|
| 55 |
+
❌ "hello world"
|
| 56 |
+
❌ "test test test"
|
| 57 |
+
❌ "this is a sample text"
|
| 58 |
+
❌ Any non-medical casual conversation
|
| 59 |
+
|
| 60 |
+
Remember: Return ONLY the JSON object, nothing else. If input is not medical provider notes, return empty arrays with explanatory summary."""
|
| 61 |
|
|
|
|
| 62 |
|
| 63 |
def create_user_prompt(provider_notes: str) -> str:
|
| 64 |
"""Create the user prompt with provider notes"""
|
| 65 |
+
return f"""Analyze the following text and determine if it contains legitimate clinical provider notes.
|
| 66 |
|
| 67 |
+
INPUT TEXT:
|
| 68 |
{provider_notes}
|
| 69 |
|
| 70 |
+
INSTRUCTIONS:
|
| 71 |
+
1. First, determine if this is medical provider notes or non-medical text
|
| 72 |
+
2. If it's NOT medical notes (casual conversation, greetings, test text), return:
|
| 73 |
+
{{
|
| 74 |
+
"icd_codes": [],
|
| 75 |
+
"cpt_codes": [],
|
| 76 |
+
"overall_summary": "No medical coding applicable - input does not contain clinical provider notes"
|
| 77 |
+
}}
|
| 78 |
+
3. If it IS medical notes, extract appropriate ICD-10 and CPT codes
|
| 79 |
+
|
| 80 |
Respond ONLY with the JSON object following the exact format specified in the system prompt."""
|
| 81 |
|
| 82 |
+
|
| 83 |
def get_coding_prompt(provider_notes: str) -> str:
|
| 84 |
"""
|
| 85 |
Generate prompt for medical coding analysis
|
|
|
|
| 100 |
{provider_notes}
|
| 101 |
|
| 102 |
**Instructions:**
|
| 103 |
+
- FIRST verify this is legitimate medical provider notes
|
| 104 |
+
- If NOT medical notes (casual text, greetings, tests), return empty arrays
|
| 105 |
- Provide accurate ICD-10 and CPT codes based on current coding guidelines
|
| 106 |
- Include detailed explanations for each code
|
| 107 |
- Provide an overall summary of the patient encounter
|
|
|
|
| 127 |
|
| 128 |
**Important:**
|
| 129 |
- Return ONLY valid JSON, no markdown, no code blocks
|
| 130 |
+
- If input is NOT medical notes, return empty arrays with explanatory summary
|
| 131 |
+
- Be specific and accurate with coding
|
| 132 |
+
- Do NOT code non-medical text"""
|