Commit
·
0b51e25
1
Parent(s):
ed12058
updated route
Browse files- src/api/routes.py +5 -5
- src/config/settings.py +3 -3
- src/services/file_service.py +5 -5
- src/services/groq_service.py +6 -6
- src/services/regex_pii_remover.py +8 -8
src/api/routes.py
CHANGED
|
@@ -68,7 +68,7 @@ async def upload_provider_notes_file(file: UploadFile = File(...)):
|
|
| 68 |
FileUploadResponse with codes, explanations, and PII removal info
|
| 69 |
"""
|
| 70 |
try:
|
| 71 |
-
logger.info(f"
|
| 72 |
|
| 73 |
# Step 1: Extract text from file with automatic regex-based PII removal
|
| 74 |
extraction_result = await file_service.extract_text_from_file(
|
|
@@ -81,15 +81,15 @@ async def upload_provider_notes_file(file: UploadFile = File(...)):
|
|
| 81 |
text_length = extraction_result["text_length"]
|
| 82 |
pii_info = extraction_result["pii_info"]
|
| 83 |
|
| 84 |
-
logger.info(f"
|
| 85 |
|
| 86 |
if pii_info["pii_removed"]:
|
| 87 |
-
logger.info(f"
|
| 88 |
|
| 89 |
# Step 2: Process sanitized text through Groq LLM
|
| 90 |
coding_result = await groq_service.analyze_provider_notes(extracted_text)
|
| 91 |
|
| 92 |
-
logger.info(f"
|
| 93 |
|
| 94 |
# Step 3: Return combined response with PII info
|
| 95 |
return FileUploadResponse(
|
|
@@ -107,7 +107,7 @@ async def upload_provider_notes_file(file: UploadFile = File(...)):
|
|
| 107 |
except HTTPException:
|
| 108 |
raise
|
| 109 |
except Exception as e:
|
| 110 |
-
logger.error(f"
|
| 111 |
raise HTTPException(
|
| 112 |
status_code=500,
|
| 113 |
detail=f"Error processing uploaded file: {str(e)}"
|
|
|
|
| 68 |
FileUploadResponse with codes, explanations, and PII removal info
|
| 69 |
"""
|
| 70 |
try:
|
| 71 |
+
logger.info(f"Received file upload request: {file.filename}")
|
| 72 |
|
| 73 |
# Step 1: Extract text from file with automatic regex-based PII removal
|
| 74 |
extraction_result = await file_service.extract_text_from_file(
|
|
|
|
| 81 |
text_length = extraction_result["text_length"]
|
| 82 |
pii_info = extraction_result["pii_info"]
|
| 83 |
|
| 84 |
+
logger.info(f"Extracted {text_length} characters from {filename}")
|
| 85 |
|
| 86 |
if pii_info["pii_removed"]:
|
| 87 |
+
logger.info(f"Removed {pii_info['pii_count']} PII entities using regex before processing")
|
| 88 |
|
| 89 |
# Step 2: Process sanitized text through Groq LLM
|
| 90 |
coding_result = await groq_service.analyze_provider_notes(extracted_text)
|
| 91 |
|
| 92 |
+
logger.info(f"Successfully processed file: {filename}")
|
| 93 |
|
| 94 |
# Step 3: Return combined response with PII info
|
| 95 |
return FileUploadResponse(
|
|
|
|
| 107 |
except HTTPException:
|
| 108 |
raise
|
| 109 |
except Exception as e:
|
| 110 |
+
logger.error(f"Error in upload_provider_notes_file: {str(e)}")
|
| 111 |
raise HTTPException(
|
| 112 |
status_code=500,
|
| 113 |
detail=f"Error processing uploaded file: {str(e)}"
|
src/config/settings.py
CHANGED
|
@@ -16,8 +16,8 @@ class Settings:
|
|
| 16 |
"Please set it in Hugging Face Space Settings -> Repository secrets"
|
| 17 |
)
|
| 18 |
|
| 19 |
-
print(f"
|
| 20 |
-
print(f"
|
| 21 |
-
print(f"
|
| 22 |
|
| 23 |
settings = Settings()
|
|
|
|
| 16 |
"Please set it in Hugging Face Space Settings -> Repository secrets"
|
| 17 |
)
|
| 18 |
|
| 19 |
+
print(f"Settings loaded successfully")
|
| 20 |
+
print(f"Model ID: {self.MODEL_ID}")
|
| 21 |
+
print(f"API Key configured: {self.GROQ_API_KEY[:10]}...")
|
| 22 |
|
| 23 |
settings = Settings()
|
src/services/file_service.py
CHANGED
|
@@ -86,7 +86,7 @@ class FileService:
|
|
| 86 |
detail="Extracted text is too short. Please provide more detailed provider notes"
|
| 87 |
)
|
| 88 |
|
| 89 |
-
logger.info(f"
|
| 90 |
|
| 91 |
# Remove PII using regex if requested
|
| 92 |
pii_info = {
|
|
@@ -96,7 +96,7 @@ class FileService:
|
|
| 96 |
}
|
| 97 |
|
| 98 |
if remove_pii:
|
| 99 |
-
logger.info("
|
| 100 |
pii_result = regex_pii_remover.sanitize_provider_notes(text)
|
| 101 |
|
| 102 |
text = pii_result["sanitized_notes"]
|
|
@@ -107,9 +107,9 @@ class FileService:
|
|
| 107 |
}
|
| 108 |
|
| 109 |
if pii_result["was_pii_found"]:
|
| 110 |
-
logger.info(f"
|
| 111 |
else:
|
| 112 |
-
logger.info("
|
| 113 |
|
| 114 |
return {
|
| 115 |
"text": text,
|
|
@@ -122,7 +122,7 @@ class FileService:
|
|
| 122 |
except HTTPException:
|
| 123 |
raise
|
| 124 |
except Exception as e:
|
| 125 |
-
logger.error(f"
|
| 126 |
raise HTTPException(
|
| 127 |
status_code=500,
|
| 128 |
detail=f"Error processing file: {str(e)}"
|
|
|
|
| 86 |
detail="Extracted text is too short. Please provide more detailed provider notes"
|
| 87 |
)
|
| 88 |
|
| 89 |
+
logger.info(f"Successfully extracted {len(text)} characters from {file.filename}")
|
| 90 |
|
| 91 |
# Remove PII using regex if requested
|
| 92 |
pii_info = {
|
|
|
|
| 96 |
}
|
| 97 |
|
| 98 |
if remove_pii:
|
| 99 |
+
logger.info("Removing PII from extracted text using regex patterns...")
|
| 100 |
pii_result = regex_pii_remover.sanitize_provider_notes(text)
|
| 101 |
|
| 102 |
text = pii_result["sanitized_notes"]
|
|
|
|
| 107 |
}
|
| 108 |
|
| 109 |
if pii_result["was_pii_found"]:
|
| 110 |
+
logger.info(f"Removed {pii_result['pii_removed_count']} PII entities using regex")
|
| 111 |
else:
|
| 112 |
+
logger.info("No PII detected in text")
|
| 113 |
|
| 114 |
return {
|
| 115 |
"text": text,
|
|
|
|
| 122 |
except HTTPException:
|
| 123 |
raise
|
| 124 |
except Exception as e:
|
| 125 |
+
logger.error(f"Error extracting text from file: {str(e)}")
|
| 126 |
raise HTTPException(
|
| 127 |
status_code=500,
|
| 128 |
detail=f"Error processing file: {str(e)}"
|
src/services/groq_service.py
CHANGED
|
@@ -15,9 +15,9 @@ class GroqService:
|
|
| 15 |
print(f"🔧 Initializing Groq client...")
|
| 16 |
self.client = Groq(api_key=settings.GROQ_API_KEY)
|
| 17 |
self.model_id = settings.MODEL_ID
|
| 18 |
-
print(f"
|
| 19 |
except Exception as e:
|
| 20 |
-
print(f"
|
| 21 |
raise
|
| 22 |
|
| 23 |
async def analyze_provider_notes(self, provider_notes: str) -> dict:
|
|
@@ -25,7 +25,7 @@ class GroqService:
|
|
| 25 |
Analyze provider notes and return ICD and CPT codes with explanations
|
| 26 |
"""
|
| 27 |
try:
|
| 28 |
-
print(f"
|
| 29 |
# Create the chat completion with system and user prompts
|
| 30 |
chat_completion = self.client.chat.completions.create(
|
| 31 |
messages=[
|
|
@@ -45,16 +45,16 @@ class GroqService:
|
|
| 45 |
|
| 46 |
# Extract and parse the response
|
| 47 |
response_content = chat_completion.choices[0].message.content
|
| 48 |
-
print(f"
|
| 49 |
parsed_response = json.loads(response_content)
|
| 50 |
|
| 51 |
return parsed_response
|
| 52 |
|
| 53 |
except json.JSONDecodeError as e:
|
| 54 |
-
print(f"
|
| 55 |
raise ValueError(f"Failed to parse JSON response from model: {str(e)}")
|
| 56 |
except Exception as e:
|
| 57 |
-
print(f"
|
| 58 |
raise Exception(f"Error calling Groq API: {str(e)}")
|
| 59 |
|
| 60 |
# Don't initialize here - let it be initialized when imported
|
|
|
|
| 15 |
print(f"🔧 Initializing Groq client...")
|
| 16 |
self.client = Groq(api_key=settings.GROQ_API_KEY)
|
| 17 |
self.model_id = settings.MODEL_ID
|
| 18 |
+
print(f"Groq client initialized successfully with model: {self.model_id}")
|
| 19 |
except Exception as e:
|
| 20 |
+
print(f"Failed to initialize Groq client: {str(e)}")
|
| 21 |
raise
|
| 22 |
|
| 23 |
async def analyze_provider_notes(self, provider_notes: str) -> dict:
|
|
|
|
| 25 |
Analyze provider notes and return ICD and CPT codes with explanations
|
| 26 |
"""
|
| 27 |
try:
|
| 28 |
+
print(f"Analyzing provider notes...")
|
| 29 |
# Create the chat completion with system and user prompts
|
| 30 |
chat_completion = self.client.chat.completions.create(
|
| 31 |
messages=[
|
|
|
|
| 45 |
|
| 46 |
# Extract and parse the response
|
| 47 |
response_content = chat_completion.choices[0].message.content
|
| 48 |
+
print(f"Received response from Groq API")
|
| 49 |
parsed_response = json.loads(response_content)
|
| 50 |
|
| 51 |
return parsed_response
|
| 52 |
|
| 53 |
except json.JSONDecodeError as e:
|
| 54 |
+
print(f"JSON parsing error: {str(e)}")
|
| 55 |
raise ValueError(f"Failed to parse JSON response from model: {str(e)}")
|
| 56 |
except Exception as e:
|
| 57 |
+
print(f"Groq API error: {str(e)}")
|
| 58 |
raise Exception(f"Error calling Groq API: {str(e)}")
|
| 59 |
|
| 60 |
# Don't initialize here - let it be initialized when imported
|
src/services/regex_pii_remover.py
CHANGED
|
@@ -92,7 +92,7 @@ class RegexPIIRemover:
|
|
| 92 |
}
|
| 93 |
}
|
| 94 |
|
| 95 |
-
logger.info("
|
| 96 |
|
| 97 |
def detect_pii(self, text: str) -> List[Dict]:
|
| 98 |
"""
|
|
@@ -130,7 +130,7 @@ class RegexPIIRemover:
|
|
| 130 |
'description': config['description']
|
| 131 |
})
|
| 132 |
|
| 133 |
-
logger.info(f"
|
| 134 |
return findings
|
| 135 |
|
| 136 |
def remove_pii(self, text: str) -> Dict[str, any]:
|
|
@@ -161,7 +161,7 @@ class RegexPIIRemover:
|
|
| 161 |
'description': config['description']
|
| 162 |
})
|
| 163 |
sanitized_text = re.sub(config['pattern'], config['replacement'], sanitized_text)
|
| 164 |
-
logger.info(f"
|
| 165 |
|
| 166 |
# Apply medical-specific patterns
|
| 167 |
for entity_type, config in self.medical_patterns.items():
|
|
@@ -175,14 +175,14 @@ class RegexPIIRemover:
|
|
| 175 |
'description': config['description']
|
| 176 |
})
|
| 177 |
sanitized_text = re.sub(config['pattern'], config['replacement'], sanitized_text, flags=re.IGNORECASE)
|
| 178 |
-
logger.info(f"
|
| 179 |
|
| 180 |
was_pii_removed = sanitized_text != original_text
|
| 181 |
|
| 182 |
if was_pii_removed:
|
| 183 |
-
logger.info(f"
|
| 184 |
else:
|
| 185 |
-
logger.info("
|
| 186 |
|
| 187 |
return {
|
| 188 |
'sanitized_text': sanitized_text,
|
|
@@ -193,7 +193,7 @@ class RegexPIIRemover:
|
|
| 193 |
}
|
| 194 |
|
| 195 |
except Exception as e:
|
| 196 |
-
logger.error(f"
|
| 197 |
return {
|
| 198 |
'sanitized_text': text,
|
| 199 |
'original_text': text,
|
|
@@ -214,7 +214,7 @@ class RegexPIIRemover:
|
|
| 214 |
Returns:
|
| 215 |
Dictionary with sanitized notes and PII removal report
|
| 216 |
"""
|
| 217 |
-
logger.info("
|
| 218 |
result = self.remove_pii(notes)
|
| 219 |
|
| 220 |
return {
|
|
|
|
| 92 |
}
|
| 93 |
}
|
| 94 |
|
| 95 |
+
logger.info("Regex PII Remover initialized with pattern-based detection")
|
| 96 |
|
| 97 |
def detect_pii(self, text: str) -> List[Dict]:
|
| 98 |
"""
|
|
|
|
| 130 |
'description': config['description']
|
| 131 |
})
|
| 132 |
|
| 133 |
+
logger.info(f"Detected {len(findings)} PII entities using regex patterns")
|
| 134 |
return findings
|
| 135 |
|
| 136 |
def remove_pii(self, text: str) -> Dict[str, any]:
|
|
|
|
| 161 |
'description': config['description']
|
| 162 |
})
|
| 163 |
sanitized_text = re.sub(config['pattern'], config['replacement'], sanitized_text)
|
| 164 |
+
logger.info(f"Removed {count} {config['description']}")
|
| 165 |
|
| 166 |
# Apply medical-specific patterns
|
| 167 |
for entity_type, config in self.medical_patterns.items():
|
|
|
|
| 175 |
'description': config['description']
|
| 176 |
})
|
| 177 |
sanitized_text = re.sub(config['pattern'], config['replacement'], sanitized_text, flags=re.IGNORECASE)
|
| 178 |
+
logger.info(f"Removed {count} {config['description']}")
|
| 179 |
|
| 180 |
was_pii_removed = sanitized_text != original_text
|
| 181 |
|
| 182 |
if was_pii_removed:
|
| 183 |
+
logger.info(f"Total PII removals: {total_replacements} entities")
|
| 184 |
else:
|
| 185 |
+
logger.info("No PII detected in text")
|
| 186 |
|
| 187 |
return {
|
| 188 |
'sanitized_text': sanitized_text,
|
|
|
|
| 193 |
}
|
| 194 |
|
| 195 |
except Exception as e:
|
| 196 |
+
logger.error(f"Error removing PII: {str(e)}")
|
| 197 |
return {
|
| 198 |
'sanitized_text': text,
|
| 199 |
'original_text': text,
|
|
|
|
| 214 |
Returns:
|
| 215 |
Dictionary with sanitized notes and PII removal report
|
| 216 |
"""
|
| 217 |
+
logger.info("Starting PII sanitization of provider notes...")
|
| 218 |
result = self.remove_pii(notes)
|
| 219 |
|
| 220 |
return {
|