Distopia22 commited on
Commit
1915c66
·
1 Parent(s): 764e30e

Fixing model halucination

Browse files
src/api/routes.py CHANGED
@@ -4,11 +4,47 @@ from models.request_models import ProviderNotesRequest
4
  from models.response_models import CodingResponse, FileUploadResponse
5
  from services.groq_service import groq_service
6
  from services.file_service import file_service
 
7
 
8
  router = APIRouter()
9
  logger = logging.getLogger(__name__)
10
 
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  @router.post("/coding", response_model=CodingResponse)
13
  async def analyze_provider_notes(request: ProviderNotesRequest):
14
  """
@@ -20,7 +56,7 @@ async def analyze_provider_notes(request: ProviderNotesRequest):
20
  - Overall encounter summary
21
  """
22
  try:
23
- logger.info(f"📥 Received coding request (notes length: {len(request.provider_notes)})")
24
 
25
  # Validate input
26
  if not request.provider_notes or len(request.provider_notes.strip()) < 10:
@@ -29,20 +65,29 @@ async def analyze_provider_notes(request: ProviderNotesRequest):
29
  detail="Provider notes must be at least 10 characters long"
30
  )
31
 
 
 
 
 
 
 
 
 
 
32
  # Analyze with Groq
33
  result = groq_service.analyze_provider_notes(request.provider_notes)
34
 
35
- logger.info(f"Analysis complete: {len(result.get('icd_codes', []))} ICD codes, {len(result.get('cpt_codes', []))} CPT codes")
36
 
37
  return result
38
 
39
  except HTTPException:
40
  raise
41
  except ValueError as e:
42
- logger.error(f"Validation error: {str(e)}")
43
  raise HTTPException(status_code=400, detail=str(e))
44
  except Exception as e:
45
- logger.error(f"Error processing request: {str(e)}", exc_info=True)
46
  raise HTTPException(status_code=500, detail=f"Error processing request: {str(e)}")
47
 
48
 
@@ -58,7 +103,7 @@ async def upload_provider_notes_file(file: UploadFile = File(...)):
58
  - Overall summary
59
  """
60
  try:
61
- logger.info(f"📤 Received file upload: {file.filename}")
62
 
63
  # Validate file type
64
  if not file.filename.endswith('.txt'):
@@ -71,14 +116,14 @@ async def upload_provider_notes_file(file: UploadFile = File(...)):
71
  content = await file.read()
72
  text = content.decode('utf-8')
73
 
74
- logger.info(f"📄 File read successfully (length: {len(text)})")
75
 
76
  # Remove PII
77
  try:
78
  cleaned_text, pii_count = file_service.remove_pii(text)
79
- logger.info(f"🔒 PII removal complete: {pii_count} entities removed")
80
  except Exception as pii_error:
81
- logger.error(f"⚠️ PII removal failed: {str(pii_error)}")
82
  # Continue without PII removal if it fails
83
  cleaned_text = text
84
  pii_count = 0
@@ -98,15 +143,15 @@ async def upload_provider_notes_file(file: UploadFile = File(...)):
98
  "overall_summary": result.get("overall_summary", "")
99
  }
100
 
101
- logger.info(f"File processing complete")
102
 
103
  return response
104
 
105
  except HTTPException:
106
  raise
107
  except UnicodeDecodeError:
108
- logger.error("File encoding error")
109
  raise HTTPException(status_code=400, detail="File must be UTF-8 encoded text")
110
  except Exception as e:
111
- logger.error(f"Error processing uploaded file: {str(e)}", exc_info=True)
112
  raise HTTPException(status_code=500, detail=f"Error processing uploaded file: {str(e)}")
 
4
  from models.response_models import CodingResponse, FileUploadResponse
5
  from services.groq_service import groq_service
6
  from services.file_service import file_service
7
+ import re
8
 
9
  router = APIRouter()
10
  logger = logging.getLogger(__name__)
11
 
12
 
13
+ def is_likely_medical_text(text: str) -> bool:
14
+ """
15
+ Pre-validate if text appears to be medical provider notes
16
+ Returns True if likely medical, False otherwise
17
+ """
18
+ text_lower = text.lower()
19
+
20
+ # Medical keywords that should be present
21
+ medical_keywords = [
22
+ 'patient', 'diagnosis', 'symptom', 'treatment', 'exam', 'history',
23
+ 'complaint', 'pain', 'fever', 'cough', 'prescribed', 'medication',
24
+ 'procedure', 'surgery', 'vital', 'blood', 'pressure', 'heart',
25
+ 'lung', 'breath', 'chronic', 'acute', 'assessment', 'plan',
26
+ 'condition', 'disease', 'injury', 'wound', 'fracture', 'infection'
27
+ ]
28
+
29
+ # Red flags for non-medical text
30
+ casual_phrases = [
31
+ 'how are you', 'hello', 'hi there', 'good morning', 'good evening',
32
+ 'test test', 'sample text', 'doing fine', 'nice to meet',
33
+ 'what\'s up', 'how\'s it going'
34
+ ]
35
+
36
+ # Check for casual phrases (immediate rejection)
37
+ for phrase in casual_phrases:
38
+ if phrase in text_lower:
39
+ return False
40
+
41
+ # Check for medical keywords (need at least 1)
42
+ medical_keyword_count = sum(1 for keyword in medical_keywords if keyword in text_lower)
43
+
44
+ # Require at least 1 medical keyword and minimum length
45
+ return medical_keyword_count >= 1 and len(text.split()) >= 10
46
+
47
+
48
  @router.post("/coding", response_model=CodingResponse)
49
  async def analyze_provider_notes(request: ProviderNotesRequest):
50
  """
 
56
  - Overall encounter summary
57
  """
58
  try:
59
+ logger.info(f"Received coding request (notes length: {len(request.provider_notes)})")
60
 
61
  # Validate input
62
  if not request.provider_notes or len(request.provider_notes.strip()) < 10:
 
65
  detail="Provider notes must be at least 10 characters long"
66
  )
67
 
68
+ # PRE-VALIDATION: Check if text appears to be medical
69
+ if not is_likely_medical_text(request.provider_notes):
70
+ logger.warning("Input rejected - does not appear to be medical provider notes")
71
+ return {
72
+ "icd_codes": [],
73
+ "cpt_codes": [],
74
+ "overall_summary": "No medical coding applicable - input does not appear to contain clinical provider notes. Please provide legitimate medical documentation."
75
+ }
76
+
77
  # Analyze with Groq
78
  result = groq_service.analyze_provider_notes(request.provider_notes)
79
 
80
+ logger.info(f"Analysis complete: {len(result.get('icd_codes', []))} ICD codes, {len(result.get('cpt_codes', []))} CPT codes")
81
 
82
  return result
83
 
84
  except HTTPException:
85
  raise
86
  except ValueError as e:
87
+ logger.error(f"Validation error: {str(e)}")
88
  raise HTTPException(status_code=400, detail=str(e))
89
  except Exception as e:
90
+ logger.error(f"Error processing request: {str(e)}", exc_info=True)
91
  raise HTTPException(status_code=500, detail=f"Error processing request: {str(e)}")
92
 
93
 
 
103
  - Overall summary
104
  """
105
  try:
106
+ logger.info(f"Received file upload: {file.filename}")
107
 
108
  # Validate file type
109
  if not file.filename.endswith('.txt'):
 
116
  content = await file.read()
117
  text = content.decode('utf-8')
118
 
119
+ logger.info(f"File read successfully (length: {len(text)})")
120
 
121
  # Remove PII
122
  try:
123
  cleaned_text, pii_count = file_service.remove_pii(text)
124
+ logger.info(f"PII removal complete: {pii_count} entities removed")
125
  except Exception as pii_error:
126
+ logger.error(f"WARNING: PII removal failed: {str(pii_error)}")
127
  # Continue without PII removal if it fails
128
  cleaned_text = text
129
  pii_count = 0
 
143
  "overall_summary": result.get("overall_summary", "")
144
  }
145
 
146
+ logger.info(f"File processing complete")
147
 
148
  return response
149
 
150
  except HTTPException:
151
  raise
152
  except UnicodeDecodeError:
153
+ logger.error("File encoding error")
154
  raise HTTPException(status_code=400, detail="File must be UTF-8 encoded text")
155
  except Exception as e:
156
+ logger.error(f"Error processing uploaded file: {str(e)}", exc_info=True)
157
  raise HTTPException(status_code=500, detail=f"Error processing uploaded file: {str(e)}")
src/config/settings.py CHANGED
@@ -19,8 +19,8 @@ settings = Settings()
19
 
20
  # Validate API key on startup
21
  if not settings.groq_api_key:
22
- print("⚠️ WARNING: GROQ_API_KEY is not set! API will not function properly.")
23
  print("Please set GROQ_API_KEY in Hugging Face Space secrets.")
24
  else:
25
- print(f"Groq API Key loaded (length: {len(settings.groq_api_key)})")
26
- print(f"Using model: {settings.groq_model}")
 
19
 
20
  # Validate API key on startup
21
  if not settings.groq_api_key:
22
+ print("WARNING: GROQ_API_KEY is not set! API will not function properly.")
23
  print("Please set GROQ_API_KEY in Hugging Face Space secrets.")
24
  else:
25
+ print(f"Groq API Key loaded (length: {len(settings.groq_api_key)})")
26
+ print(f"Using model: {settings.groq_model}")
src/main.py CHANGED
@@ -21,22 +21,22 @@ async def lifespan(app: FastAPI):
21
  """
22
  # Startup
23
  logger.info("=" * 50)
24
- logger.info(f"🚀 {settings.api_title} v{settings.api_version}")
25
  logger.info("=" * 50)
26
 
27
  if settings.groq_api_key:
28
- logger.info(f"Groq API Key: Configured (length: {len(settings.groq_api_key)})")
29
- logger.info(f"Groq Model: {settings.groq_model}")
30
  else:
31
- logger.error("GROQ_API_KEY is NOT set!")
32
- logger.error("⚠️ API will NOT function without valid API key")
33
 
34
  logger.info("=" * 50)
35
 
36
  yield
37
 
38
  # Shutdown
39
- logger.info("👋 Shutting down API...")
40
 
41
 
42
  # Create FastAPI app with lifespan
 
21
  """
22
  # Startup
23
  logger.info("=" * 50)
24
+ logger.info(f"Starting {settings.api_title} v{settings.api_version}")
25
  logger.info("=" * 50)
26
 
27
  if settings.groq_api_key:
28
+ logger.info(f"Groq API Key: Configured (length: {len(settings.groq_api_key)})")
29
+ logger.info(f"Groq Model: {settings.groq_model}")
30
  else:
31
+ logger.error("GROQ_API_KEY is NOT set!")
32
+ logger.error("WARNING: API will NOT function without valid API key")
33
 
34
  logger.info("=" * 50)
35
 
36
  yield
37
 
38
  # Shutdown
39
+ logger.info("Shutting down API...")
40
 
41
 
42
  # Create FastAPI app with lifespan
src/services/file_service.py CHANGED
@@ -1,6 +1,3 @@
1
- from fastapi import UploadFile, HTTPException
2
- import os
3
- from typing import Dict
4
  import logging
5
  from services.regex_pii_remover import RegexPIIRemover
6
 
@@ -11,7 +8,7 @@ class FileService:
11
  def __init__(self):
12
  """Initialize file service with PII remover"""
13
  self.pii_remover = RegexPIIRemover()
14
- logger.info("FileService initialized")
15
 
16
  def remove_pii(self, text: str) -> tuple[str, int]:
17
  """
@@ -24,16 +21,16 @@ class FileService:
24
  tuple: (cleaned_text, pii_count)
25
  """
26
  try:
27
- logger.info(f"🔒 Starting PII removal (text length: {len(text)})")
28
 
29
  cleaned_text, pii_count = self.pii_remover.remove_pii(text)
30
 
31
- logger.info(f"PII removal complete: {pii_count} entities removed")
32
 
33
  return cleaned_text, pii_count
34
 
35
  except Exception as e:
36
- logger.error(f"Error during PII removal: {str(e)}")
37
  # Return original text if PII removal fails
38
  return text, 0
39
 
 
 
 
 
1
  import logging
2
  from services.regex_pii_remover import RegexPIIRemover
3
 
 
8
  def __init__(self):
9
  """Initialize file service with PII remover"""
10
  self.pii_remover = RegexPIIRemover()
11
+ logger.info("FileService initialized")
12
 
13
  def remove_pii(self, text: str) -> tuple[str, int]:
14
  """
 
21
  tuple: (cleaned_text, pii_count)
22
  """
23
  try:
24
+ logger.info(f"Starting PII removal (text length: {len(text)})")
25
 
26
  cleaned_text, pii_count = self.pii_remover.remove_pii(text)
27
 
28
+ logger.info(f"PII removal complete: {pii_count} entities removed")
29
 
30
  return cleaned_text, pii_count
31
 
32
  except Exception as e:
33
+ logger.error(f"Error during PII removal: {str(e)}")
34
  # Return original text if PII removal fails
35
  return text, 0
36
 
src/services/groq_service.py CHANGED
@@ -8,22 +8,27 @@ logger = logging.getLogger(__name__)
8
 
9
  class GroqService:
10
  def __init__(self):
11
- """Initialize Groq client with API key from settings"""
12
- if not settings.groq_api_key:
13
- logger.error("❌ GROQ_API_KEY is not set!")
14
- raise ValueError("GROQ_API_KEY environment variable is required")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- try:
17
- # Simple initialization without extra parameters
18
- self.client = Groq(
19
- api_key=settings.groq_api_key
20
- )
21
- self.model = settings.groq_model
22
- logger.info(f"✅ Groq client initialized successfully")
23
- logger.info(f"✅ Using model: {self.model}")
24
- except Exception as e:
25
- logger.error(f"❌ Failed to initialize Groq client: {str(e)}")
26
- raise
27
 
28
  def analyze_provider_notes(self, provider_notes: str) -> dict:
29
  """
@@ -31,19 +36,18 @@ class GroqService:
31
 
32
  Args:
33
  provider_notes: Clinical provider notes text
34
- response_format: Desired response format (default: "json")
35
 
36
  Returns:
37
  dict: Parsed coding response with ICD-10, CPT codes and summary
38
  """
39
  try:
40
- logger.info(f"📝 Analyzing provider notes (length: {len(provider_notes)})")
41
 
42
  # Get the prompt
43
  prompt = get_coding_prompt(provider_notes)
44
 
45
- # Call Groq API
46
- logger.info(f"🚀 Calling Groq API with model: {self.model}")
47
  response = self.client.chat.completions.create(
48
  model=self.model,
49
  messages=[
@@ -63,8 +67,7 @@ class GroqService:
63
 
64
  # Extract response
65
  raw_response = response.choices[0].message.content
66
- logger.info(f"📥 Received response from Groq (length: {len(raw_response)})")
67
- logger.debug(f"Raw response: {raw_response[:500]}...")
68
 
69
  # Parse JSON
70
  parsed_response = json.loads(raw_response)
@@ -72,31 +75,20 @@ class GroqService:
72
  # Validate and structure response
73
  result = self._structure_response(parsed_response)
74
 
75
- logger.info(f"Successfully analyzed notes: {len(result.get('icd_codes', []))} ICD codes, {len(result.get('cpt_codes', []))} CPT codes")
76
 
77
  return result
78
 
79
  except json.JSONDecodeError as e:
80
- logger.error(f"JSON parsing error: {str(e)}")
81
- logger.error(f"Raw response: {raw_response}")
82
  raise ValueError(f"Failed to parse Groq response as JSON: {str(e)}")
83
 
84
  except Exception as e:
85
- logger.error(f"Error analyzing provider notes: {str(e)}")
86
  raise
87
 
88
  def _structure_response(self, parsed_response: dict) -> dict:
89
- """
90
- Structure and validate the response from Groq
91
-
92
- Args:
93
- parsed_response: Raw parsed JSON from Groq
94
- response_format: Desired response format (default: "json")
95
-
96
- Returns:
97
- dict: Properly structured response
98
- """
99
- # Handle different possible response formats
100
  icd_codes = []
101
  cpt_codes = []
102
  overall_summary = parsed_response.get("overall_summary", "")
@@ -129,5 +121,5 @@ class GroqService:
129
  "overall_summary": overall_summary
130
  }
131
 
132
- # Global instance
133
  groq_service = GroqService()
 
8
 
9
  class GroqService:
10
  def __init__(self):
11
+ """Initialize Groq service (client created on first use)"""
12
+ self._client = None
13
+ self.model = settings.groq_model
14
+
15
+ @property
16
+ def client(self):
17
+ """Lazy initialization of Groq client"""
18
+ if self._client is None:
19
+ if not settings.groq_api_key:
20
+ logger.error("GROQ_API_KEY is not set!")
21
+ raise ValueError("GROQ_API_KEY environment variable is required")
22
+
23
+ try:
24
+ logger.info("Initializing Groq client...")
25
+ self._client = Groq(api_key=settings.groq_api_key)
26
+ logger.info(f"Groq client initialized successfully")
27
+ except Exception as e:
28
+ logger.error(f"Failed to initialize Groq client: {str(e)}")
29
+ raise
30
 
31
+ return self._client
 
 
 
 
 
 
 
 
 
 
32
 
33
  def analyze_provider_notes(self, provider_notes: str) -> dict:
34
  """
 
36
 
37
  Args:
38
  provider_notes: Clinical provider notes text
 
39
 
40
  Returns:
41
  dict: Parsed coding response with ICD-10, CPT codes and summary
42
  """
43
  try:
44
+ logger.info(f"Analyzing provider notes (length: {len(provider_notes)})")
45
 
46
  # Get the prompt
47
  prompt = get_coding_prompt(provider_notes)
48
 
49
+ # Call Groq API (client initialized here if needed)
50
+ logger.info(f"Calling Groq API with model: {self.model}")
51
  response = self.client.chat.completions.create(
52
  model=self.model,
53
  messages=[
 
67
 
68
  # Extract response
69
  raw_response = response.choices[0].message.content
70
+ logger.info(f"Received response from Groq (length: {len(raw_response)})")
 
71
 
72
  # Parse JSON
73
  parsed_response = json.loads(raw_response)
 
75
  # Validate and structure response
76
  result = self._structure_response(parsed_response)
77
 
78
+ logger.info(f"Successfully analyzed: {len(result.get('icd_codes', []))} ICD codes, {len(result.get('cpt_codes', []))} CPT codes")
79
 
80
  return result
81
 
82
  except json.JSONDecodeError as e:
83
+ logger.error(f"JSON parsing error: {str(e)}")
 
84
  raise ValueError(f"Failed to parse Groq response as JSON: {str(e)}")
85
 
86
  except Exception as e:
87
+ logger.error(f"Error analyzing provider notes: {str(e)}")
88
  raise
89
 
90
  def _structure_response(self, parsed_response: dict) -> dict:
91
+ """Structure and validate the response from Groq"""
 
 
 
 
 
 
 
 
 
 
92
  icd_codes = []
93
  cpt_codes = []
94
  overall_summary = parsed_response.get("overall_summary", "")
 
121
  "overall_summary": overall_summary
122
  }
123
 
124
+ # Global instance (client initialized on first use)
125
  groq_service = GroqService()
src/services/regex_pii_remover.py CHANGED
@@ -35,7 +35,7 @@ class RegexPIIRemover:
35
  'dob': re.compile(r'(DOB|Date of Birth)[:\s]+\d{1,2}[/-]\d{1,2}[/-]\d{2,4}', re.IGNORECASE),
36
  }
37
 
38
- logger.info(f"RegexPIIRemover initialized with {len(self.patterns)} patterns")
39
 
40
  def remove_pii(self, text: str) -> tuple[str, int]:
41
  """
@@ -66,6 +66,6 @@ class RegexPIIRemover:
66
  else:
67
  cleaned_text = pattern.sub('[REDACTED]', cleaned_text)
68
 
69
- logger.info(f"🔒 Removed {total_removed} PII entities")
70
 
71
  return cleaned_text, total_removed
 
35
  'dob': re.compile(r'(DOB|Date of Birth)[:\s]+\d{1,2}[/-]\d{1,2}[/-]\d{2,4}', re.IGNORECASE),
36
  }
37
 
38
+ logger.info(f"RegexPIIRemover initialized with {len(self.patterns)} patterns")
39
 
40
  def remove_pii(self, text: str) -> tuple[str, int]:
41
  """
 
66
  else:
67
  cleaned_text = pattern.sub('[REDACTED]', cleaned_text)
68
 
69
+ logger.info(f"Removed {total_removed} PII entities")
70
 
71
  return cleaned_text, total_removed
src/utils/prompts.py CHANGED
@@ -2,11 +2,20 @@ SYSTEM_PROMPT = """You are a specialized medical coding assistant AI that analyz
2
 
3
  CRITICAL INSTRUCTIONS:
4
  1. You MUST respond ONLY in valid JSON format as specified below
5
- 2. Do NOT hallucinate or make up codes - only use codes you are confident about
6
- 3. If you are uncertain about any code, do NOT include it in the response
7
- 4. If you cannot find any relevant ICD or CPT codes, return empty arrays for those sections
8
- 5. Always provide clear, evidence-based explanations for each code you assign
9
- 6. Your response must be parseable JSON - do not add any text before or after the JSON object
 
 
 
 
 
 
 
 
 
10
 
11
  REQUIRED JSON FORMAT:
12
  {
@@ -24,27 +33,53 @@ REQUIRED JSON FORMAT:
24
  "explanation": "Detailed explanation of why this code was selected based on the provider notes"
25
  }
26
  ],
27
- "overall_summary": "Brief summary of the coding decisions"
28
  }
29
 
30
  CODING PRINCIPLES:
 
 
31
  - Only assign codes that are clearly supported by documentation in the provider notes
32
  - Be conservative - if unsure, omit the code rather than guess
33
  - Prioritize accuracy over quantity
34
  - Each explanation must reference specific details from the provider notes
35
- - If no relevant codes can be determined, respond with empty arrays
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
- Remember: Return ONLY the JSON object, nothing else."""
38
 
39
  def create_user_prompt(provider_notes: str) -> str:
40
  """Create the user prompt with provider notes"""
41
- return f"""Analyze the following provider notes and extract appropriate ICD-10 and CPT codes.
42
 
43
- PROVIDER NOTES:
44
  {provider_notes}
45
 
 
 
 
 
 
 
 
 
 
 
46
  Respond ONLY with the JSON object following the exact format specified in the system prompt."""
47
 
 
48
  def get_coding_prompt(provider_notes: str) -> str:
49
  """
50
  Generate prompt for medical coding analysis
@@ -65,6 +100,8 @@ def get_coding_prompt(provider_notes: str) -> str:
65
  {provider_notes}
66
 
67
  **Instructions:**
 
 
68
  - Provide accurate ICD-10 and CPT codes based on current coding guidelines
69
  - Include detailed explanations for each code
70
  - Provide an overall summary of the patient encounter
@@ -90,5 +127,6 @@ def get_coding_prompt(provider_notes: str) -> str:
90
 
91
  **Important:**
92
  - Return ONLY valid JSON, no markdown, no code blocks
93
- - Include at least one ICD-10 code and one CPT code if applicable
94
- - Be specific and accurate with coding"""
 
 
2
 
3
  CRITICAL INSTRUCTIONS:
4
  1. You MUST respond ONLY in valid JSON format as specified below
5
+ 2. ONLY process text that appears to be legitimate clinical provider notes
6
+ 3. If the input is NOT medical provider notes (casual conversation, greetings, test text, non-medical content), respond with empty arrays
7
+ 4. Do NOT hallucinate or make up codes - only use codes you are 100% confident about
8
+ 5. If you are uncertain about any code, do NOT include it in the response
9
+ 6. If you cannot find any relevant ICD or CPT codes, return empty arrays for those sections
10
+ 7. Always provide clear, evidence-based explanations for each code you assign
11
+ 8. Your response must be parseable JSON - do not add any text before or after the JSON object
12
+
13
+ INPUT VALIDATION - REJECT IF:
14
+ - The text is casual conversation (e.g., "how are you", "hello", "testing")
15
+ - The text lacks medical terminology or clinical context
16
+ - The text does not describe a patient encounter, diagnosis, or medical procedure
17
+ - The text is less than 20 words and contains no medical information
18
+ - The text appears to be a test or non-medical query
19
 
20
  REQUIRED JSON FORMAT:
21
  {
 
33
  "explanation": "Detailed explanation of why this code was selected based on the provider notes"
34
  }
35
  ],
36
+ "overall_summary": "Brief summary of the coding decisions, or 'No medical coding applicable - input does not contain clinical provider notes' if input is not medical"
37
  }
38
 
39
  CODING PRINCIPLES:
40
+ - FIRST verify the input is legitimate medical provider notes
41
+ - If NOT medical notes, return empty arrays with summary explaining why
42
  - Only assign codes that are clearly supported by documentation in the provider notes
43
  - Be conservative - if unsure, omit the code rather than guess
44
  - Prioritize accuracy over quantity
45
  - Each explanation must reference specific details from the provider notes
46
+ - Medical provider notes should include: patient symptoms, diagnoses, treatments, procedures, or clinical observations
47
+
48
+ EXAMPLES OF VALID MEDICAL INPUT:
49
+ ✅ "Patient presents with acute bronchitis, productive cough for 5 days, prescribed azithromycin"
50
+ ✅ "45 y/o female with Type 2 diabetes, HbA1c 8.2%, medication adjustment discussed"
51
+ ✅ "Laceration repair of right forearm, 3cm wound, simple closure"
52
+
53
+ EXAMPLES OF INVALID INPUT (return empty arrays):
54
+ ❌ "how are you and i doing fine"
55
+ ❌ "hello world"
56
+ ❌ "test test test"
57
+ ❌ "this is a sample text"
58
+ ❌ Any non-medical casual conversation
59
+
60
+ Remember: Return ONLY the JSON object, nothing else. If input is not medical provider notes, return empty arrays with explanatory summary."""
61
 
 
62
 
63
  def create_user_prompt(provider_notes: str) -> str:
64
  """Create the user prompt with provider notes"""
65
+ return f"""Analyze the following text and determine if it contains legitimate clinical provider notes.
66
 
67
+ INPUT TEXT:
68
  {provider_notes}
69
 
70
+ INSTRUCTIONS:
71
+ 1. First, determine if this is medical provider notes or non-medical text
72
+ 2. If it's NOT medical notes (casual conversation, greetings, test text), return:
73
+ {{
74
+ "icd_codes": [],
75
+ "cpt_codes": [],
76
+ "overall_summary": "No medical coding applicable - input does not contain clinical provider notes"
77
+ }}
78
+ 3. If it IS medical notes, extract appropriate ICD-10 and CPT codes
79
+
80
  Respond ONLY with the JSON object following the exact format specified in the system prompt."""
81
 
82
+
83
  def get_coding_prompt(provider_notes: str) -> str:
84
  """
85
  Generate prompt for medical coding analysis
 
100
  {provider_notes}
101
 
102
  **Instructions:**
103
+ - FIRST verify this is legitimate medical provider notes
104
+ - If NOT medical notes (casual text, greetings, tests), return empty arrays
105
  - Provide accurate ICD-10 and CPT codes based on current coding guidelines
106
  - Include detailed explanations for each code
107
  - Provide an overall summary of the patient encounter
 
127
 
128
  **Important:**
129
  - Return ONLY valid JSON, no markdown, no code blocks
130
+ - If input is NOT medical notes, return empty arrays with explanatory summary
131
+ - Be specific and accurate with coding
132
+ - Do NOT code non-medical text"""