Distopia22's picture
Add automatic PII removal during file extraction
fd20bd2
raw
history blame
4.31 kB
from fastapi import APIRouter, HTTPException, UploadFile, File
from models.request_models import ProviderNotesRequest, FileUploadResponse
from models.response_models import CodingResponse
from services.groq_service import groq_service
from services.file_service import file_service
import logging
router = APIRouter()
logger = logging.getLogger(__name__)
# EXISTING ENDPOINT - DO NOT CHANGE
@router.post("/coding", response_model=CodingResponse)
async def analyze_provider_notes(request: ProviderNotesRequest):
"""
Analyze provider notes and extract ICD-10 and CPT codes
This endpoint accepts provider notes as text input.
"""
try:
logger.info("Received coding request")
# Get provider notes from request
provider_notes = request.provider_notes
if not provider_notes or len(provider_notes.strip()) < 10:
raise HTTPException(
status_code=400,
detail="Provider notes must be at least 10 characters long"
)
# Process through Groq service
result = await groq_service.analyze_provider_notes(provider_notes)
logger.info("Successfully processed coding request")
# Return response matching CodingResponse model
return CodingResponse(
cpt_codes=result.get("CPT", []),
cpt_explanation=result.get("CPT_explanation", ""),
icd_codes=result.get("ICD", []),
icd_explanation=result.get("ICD_explanation", "")
)
except HTTPException:
raise
except Exception as e:
logger.error(f"Error in analyze_provider_notes: {str(e)}")
raise HTTPException(
status_code=500,
detail=f"Error processing request: {str(e)}"
)
# UPDATED ENDPOINT - File Upload with PII Removal
@router.post("/upload-file", response_model=FileUploadResponse)
async def upload_provider_notes_file(file: UploadFile = File(...)):
"""
Upload a TXT file containing provider notes and extract ICD-10 and CPT codes
This endpoint:
1. Extracts text from uploaded TXT file
2. Automatically detects and removes patient personal information (PII)
3. Processes sanitized text through LLM
4. Returns ICD-10 and CPT codes
Args:
file: TXT file containing provider notes
Returns:
FileUploadResponse with codes, explanations, and PII removal info
"""
try:
logger.info(f"📁 Received file upload request: {file.filename}")
# Step 1: Extract text from file with automatic PII removal
extraction_result = await file_service.extract_text_from_file(
file=file,
remove_pii=True # Always remove PII for safety
)
extracted_text = extraction_result["text"]
filename = extraction_result["filename"]
text_length = extraction_result["text_length"]
pii_info = extraction_result["pii_info"]
logger.info(f"✅ Extracted {text_length} characters from {filename}")
if pii_info["pii_removed"]:
logger.info(f"🔒 Removed {pii_info['pii_count']} PII entities before processing")
# Step 2: Process sanitized text through Groq LLM
coding_result = await groq_service.analyze_provider_notes(extracted_text)
logger.info(f"✅ Successfully processed file: {filename}")
# Step 3: Return combined response with PII info
return FileUploadResponse(
success=True,
filename=filename,
extracted_text_length=text_length,
pii_removed=pii_info["pii_removed"],
pii_count=pii_info["pii_count"],
cpt_codes=coding_result.get("CPT", []),
cpt_explanation=coding_result.get("CPT_explanation", ""),
icd_codes=coding_result.get("ICD", []),
icd_explanation=coding_result.get("ICD_explanation", "")
)
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Error in upload_provider_notes_file: {str(e)}")
raise HTTPException(
status_code=500,
detail=f"Error processing uploaded file: {str(e)}"
)