File size: 1,245 Bytes
a8f12f6
 
 
 
764e30e
a8f12f6
 
 
 
 
764e30e
 
 
 
a8f12f6
764e30e
a8f12f6
764e30e
a8f12f6
 
764e30e
a8f12f6
 
764e30e
a8f12f6
 
764e30e
fd20bd2
764e30e
fd20bd2
764e30e
a8f12f6
764e30e
a8f12f6
 
764e30e
 
 
a8f12f6
 
764e30e
a8f12f6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
from fastapi import UploadFile, HTTPException
import os
from typing import Dict
import logging
from services.regex_pii_remover import RegexPIIRemover

logger = logging.getLogger(__name__)


class FileService:
    def __init__(self):
        """Initialize file service with PII remover"""
        self.pii_remover = RegexPIIRemover()
        logger.info("✅ FileService initialized")
    
    def remove_pii(self, text: str) -> tuple[str, int]:
        """
        Remove PII from text using regex patterns
        
        Args:
            text: Input text containing potential PII
            
        Returns:
            tuple: (cleaned_text, pii_count)
        """
        try:
            logger.info(f"🔒 Starting PII removal (text length: {len(text)})")
            
            cleaned_text, pii_count = self.pii_remover.remove_pii(text)
            
            logger.info(f"✅ PII removal complete: {pii_count} entities removed")
            
            return cleaned_text, pii_count
            
        except Exception as e:
            logger.error(f"❌ Error during PII removal: {str(e)}")
            # Return original text if PII removal fails
            return text, 0


# Global instance
file_service = FileService()