Spaces:

Agents-MCP-Hackathon
/

PII-Image-Masking-mpc-server

Sleeping

App Files Files Community

Soroush commited on Jun 4, 2025

Commit

84e50e2

1 Parent(s): 82eb0e3

fixed

Browse files

Files changed (10) hide show

.gitignore +15 -0
.python-version +1 -0
README.md +1 -2
app.py +29 -0
gradio_ui.py +318 -0
pii_image_processing.py +613 -0
pyproject.toml +15 -0
requirements.txt +174 -0
tests/test_pii_image_processing.py +70 -0
uv.lock +0 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,15 @@

+# Python-generated files
+__pycache__/
+*.py[oc]
+build/
+dist/
+wheels/
+*.egg-info
+# Virtual environments
+.venv
+.env
+tmp/
+.gradio/

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.13

README.md CHANGED Viewed

@@ -1,4 +1,5 @@
 ---
 title: PII Image Masking Mpc Server
 emoji: 🐠
 colorFrom: pink
@@ -9,5 +10,3 @@ app_file: app.py
 pinned: false
 short_description: PII image masking mpc server using Mistral models
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+tags: [mcp-server-track]
 title: PII Image Masking Mpc Server
 emoji: 🐠
 colorFrom: pink
 pinned: false
 short_description: PII image masking mpc server using Mistral models
 ---

app.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import os
+import gradio as gr
+from gradio_ui import PIIMaskingUI
+def main():
+    """Launch the PII Detection & Masking UI."""
+    # Create output directory if it doesn't exist
+    output_dir = "tmp"
+    os.makedirs(output_dir, exist_ok=True)
+    # Create the UI
+    ui = PIIMaskingUI(output_dir=output_dir)
+    # Get the Gradio Blocks interface
+    demo = ui.demo
+    # Launch the interface on a different port to avoid conflicts
+    demo.launch(
+        # share=True,
+        # debug=True,
+        server_name="0.0.0.0",
+        mcp_server=True,
+        server_port=7869  # let the port be selected
+    )
+if __name__ == "__main__":
+    main()

gradio_ui.py ADDED Viewed

	@@ -0,0 +1,318 @@

+import os
+import tempfile
+import gradio as gr
+from typing import Dict, Tuple, Optional
+from pii_image_processing import process_image_api, MistralModels, CoverStrategy
+from PIL import Image
+class PIIMaskingUI:
+    """
+    A Gradio-based UI for the PII detection and masking tool.
+    This class creates an interactive web interface that allows users to:
+    - Upload images containing potential PII
+    - Select from available Mistral models
+    - Configure masking strategies
+    - Define regulation-specific masking rules
+    - View and download results
+    """
+    # Available regulations and their descriptions
+    REGULATIONS = {
+        "GDPR": "General Data Protection Regulation (EU)",
+        "CCPA": "California Consumer Privacy Act",
+        "PIPEDA": "Personal Information Protection and Electronic Documents Act (Canada)",
+        "LGPD": "Lei Geral de Proteção de Dados (Brazil)",
+        "PECR": "Privacy and Electronic Communications Regulations (UK)",
+        "PDPA": "Personal Data Protection Act (Singapore)",
+        "HIPAA": "Health Insurance Portability and Accountability Act (USA)",
+    }
+    # Available masking strategies
+    STRATEGIES = {
+        "blur": "Blur the sensitive area",
+        "single_color": "Cover with a solid color",
+        "none": "No masking (just detection)"
+    }
+    def __init__(self, output_dir: str = "output"):
+        """
+        Initialize the UI.
+        Args:
+            output_dir: Directory to save processed images
+        """
+        self.output_dir = output_dir
+        os.makedirs(self.output_dir, exist_ok=True)
+        self.demo = self._create_interface()
+        self.demo.title = "PII Detection & Masking Tool - Mistral Models"
+        self.demo.description = f"""
+        Upload an image to detect and mask PII based on privacy regulations using custom Mistral model.
+        Available regulations include: {', '.join(self.REGULATIONS.keys())}.
+        Available masking strategies: {', '.join(self.STRATEGIES.keys())}.
+        The tool supports various Mistral models for image processing.(e.g., {', '.join([m.value for m in MistralModels])}).
+        """
+        print(self.demo.title)
+        print(self.demo.description)
+    def _create_interface(self) -> gr.Blocks:
+        """Create and return the Gradio interface."""
+        with gr.Blocks(title="PII Detection & Masking") as demo:
+            gr.Markdown("# PII Detection & Masking Tool")
+            gr.Markdown("Upload an image to detect and mask PII based on privacy regulations.")
+            with gr.Row():
+                with gr.Column(scale=1):
+                    # Input image
+                    image_input = gr.Image(type="filepath", label="Upload Image")
+                    # Model selection
+                    model_dropdown = gr.Dropdown(
+                        choices=[m.value for m in MistralModels],
+                        value=MistralModels.PIXTRAL_LARGE_LATEST.value,
+                        label="Mistral Model"
+                    )
+                    # Default strategy
+                    default_strategy = gr.Dropdown(
+                        choices=list(self.STRATEGIES.keys()),
+                        value="blur",
+                        label="Default Masking Strategy"
+                    )
+                    # Blur amount (only show if blur is selected)
+                    blur_amount = gr.Slider(
+                        minimum=1,
+                        maximum=20,
+                        value=5,
+                        step=1,
+                        label="Blur Intensity",
+                        visible=True
+                    )
+                    # Color picker (only show if single_color is selected)
+                    color_picker = gr.ColorPicker(
+                        label="Mask Color",
+                        value="#000000",
+                        visible=False
+                    )
+                    # Show/hide blur/color based on strategy
+                    def update_strategy_ui(strategy):
+                        return [
+                            gr.Slider(visible=strategy == "blur"),
+                            gr.ColorPicker(visible=strategy == "single_color")
+                        ]
+                    default_strategy.change(
+                        update_strategy_ui,
+                        inputs=[default_strategy],
+                        outputs=[blur_amount, color_picker]
+                    )
+                    # Regulation strategies
+                    with gr.Group():
+                        gr.Markdown("### Regulation-specific Strategies")
+                        gr.Markdown("Set masking strategy for each regulation (or 'none' to ignore)")
+                        self.regulation_uis = {}
+                        for reg, desc in self.REGULATIONS.items():
+                            with gr.Row():
+                                reg_label = gr.Textbox(
+                                    value=f"{reg} - {desc}",
+                                    label="Regulation",
+                                    interactive=False,
+                                    scale=2
+                                )
+                                reg_strategy = gr.Dropdown(
+                                    choices=list(self.STRATEGIES.keys()),
+                                    value="blur",
+                                    label=f"Strategy for {reg}",
+                                    scale=1
+                                )
+                                self.regulation_uis[reg] = reg_strategy
+                    # Process button
+                    process_btn = gr.Button("Process Image", variant="primary")
+                with gr.Column(scale=1):
+                    # Output image
+                    self.output_image = gr.Image(
+                        type="filepath",
+                        label="Processed Image",
+                        interactive=False
+                    )
+                    # Output JSON
+                    self.output_json = gr.JSON(
+                        label="Detection Results",
+                        visible=True
+                    )
+                    # Download button
+                    self.download_btn = gr.Button("Download Processed Image", visible=False)
+            # Process button click handler
+            process_btn.click(
+                fn=self.process_image,
+                inputs=[
+                    image_input,
+                    model_dropdown,
+                    default_strategy,
+                    blur_amount,
+                    color_picker,
+                    *[self.regulation_uis[reg] for reg in self.REGULATIONS]
+                ],
+                outputs=[
+                    self.output_image,
+                    self.output_json,
+                    self.download_btn
+                ]
+            )
+            # Download button handler
+            self.download_btn.click(
+                fn=self.download_file,
+                inputs=gr.State(value=None),  # Will be set by process_click
+                outputs=gr.File(label="Download Processed Image")
+            )
+        return demo
+    def process_image(
+        self,
+        image,
+        model_name: str,
+        default_strategy: str,
+        blur_amount: int,
+        color_hex: str,
+        *regulation_values
+    ) -> Tuple[Optional[str], dict, dict]:
+        """
+        PII Detection & Masking Tool - Mistral Models
+        Process an image with the given parameters.
+        Upload an image to detect and mask PII based on privacy regulations using custom Mistral model.
+        Available regulations include: GDPR, CCPA, PIPEDA, LGPD, PECR, PDPA, HIPAA.
+        Available masking strategies: blur, single_color, none.
+        The tool supports various Mistral models for image processing.
+        (Available models: pixtral-large-latest, mistral-ocr-latest, mistral-medium-2505).
+        ALL ENUM FIELDS ARE REQUIRED and must be provided. the string none is a valid value when is among the choices.
+        Args:
+            image: Input image (PIL.Image or file path)
+            model_name: Name of the Mistral model to use
+            default_strategy: Default masking strategy
+            blur_amount: Blur intensity (1-20)
+            color_hex: Hex color for single_color strategy
+            *regulation_values: List of strategy values for each regulation
+        Returns:
+            Tuple of (output_image_path, result_json, download_btn_visibility)
+        """
+        # Convert regulation values from list to dict
+        regulation_values = dict(zip(self.REGULATIONS.keys(), regulation_values))
+        # Convert hex color to RGB tuple
+        if color_hex.startswith('#'):
+            color_hex = color_hex.lstrip('#')
+            color = tuple(int(color_hex[i:i+2], 16) for i in (0, 2, 4))
+        else:
+            color = (0, 0, 0)  # Default to black
+        # Handle case when no image is provided
+        if image is None:
+            return None, {"error": "No image provided"}, gr.update(visible=False)
+        # Save uploaded image to temp file if it's not a path
+        if not isinstance(image, str):
+            temp_dir = tempfile.mkdtemp()
+            image_path = os.path.join(temp_dir, "input.jpg")
+            Image.fromarray(image).save(image_path)
+        else:
+            image_path = image
+        # Create output path
+        os.makedirs(self.output_dir, exist_ok=True)
+        try:
+            output_path = os.path.join(self.output_dir, f"processed_{os.path.basename(image_path)}")
+        except Exception as e:
+            import datetime
+            output_path = os.path.join(output_dir, f"processed_image_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.jpg")
+        print(f"Output path: {output_path}")
+        print("Adding .jpg extension if not present")
+        if not output_path.lower().endswith('.jpg'):
+            output_path += '.jpg'
+        # Filter out 'none' strategies (convert to None)
+        regulation_map = {
+            reg: strat if strat != "none" else None
+            for reg, strat in regulation_values.items()
+        }
+        try:
+            # Call the API
+            result = process_image_api(
+                image_path=image_path,
+                strategy_name=default_strategy if default_strategy != "none" else None,
+                blur_amount=blur_amount,
+                color=color,
+                output_path=output_path,
+                model=model_name,
+                regulation_map=regulation_map or None
+            )
+            # Cleanup temp file if we created one
+            if 'temp_dir' in locals():
+                import shutil
+                shutil.rmtree(temp_dir, ignore_errors=True)
+            # Return results
+            output_image = output_path if os.path.exists(output_path) else None
+            download_visible = output_image is not None
+            return output_image, result, gr.update(visible=download_visible)
+        except Exception as e:
+            return None, {"error": str(e), "success": False}, gr.update(visible=False)
+    def download_file(self, file_path: Optional[str] = None) -> Optional[str]:
+        """
+        Handle file download.
+        Args:
+            file_path: Path to the file to download
+        Returns:
+            Path to the file if it exists, None otherwise
+        Raises:
+            gr.Error: If the file doesn't exist
+        """
+        if file_path and os.path.exists(file_path):
+            return file_path
+        raise gr.Error("No processed file available for download")
+    def launch(self, **kwargs):
+        """Launch the Gradio interface."""
+        return self.demo.launch(**kwargs)
+def main():
+    """Launch the PII Masking UI."""
+    output_dir = "tmp"
+    os.makedirs(output_dir, exist_ok=True)
+    ui = PIIMaskingUI(output_dir=output_dir)
+    ui.demo.launch(
+        share=True,
+        debug=True,
+        server_name="0.0.0.0",
+        # server_port=7869,
+        mcp_server=True,
+    )
+if __name__ == "__main__":
+    main()

pii_image_processing.py ADDED Viewed

	@@ -0,0 +1,613 @@

+## Image Handler
+import base64
+import requests
+from io import BytesIO
+from PIL import Image
+class ImageHandler:
+    @staticmethod
+    def load_image_from_local(path: str) -> Image.Image:
+        try:
+            image = Image.open(path)
+            image.load()
+            return image
+        except Exception as e:
+            raise IOError(f"Error loading local image: {e}")
+    @staticmethod
+    def load_image_from_web(url: str) -> Image.Image:
+        try:
+            response = requests.get(url)
+            response.raise_for_status()
+            image = Image.open(BytesIO(response.content))
+            image.load()
+            return image
+        except Exception as e:
+            raise IOError(f"Error loading web image: {e}")
+    @staticmethod
+    def load_image_from_base64(base64_str: str) -> Image.Image:
+        try:
+            image_data = base64.b64decode(base64_str)
+            image = Image.open(BytesIO(image_data))
+            image.load()
+            return image
+        except Exception as e:
+            raise IOError(f"Error loading base64 image: {e}")
+    @staticmethod
+    def save_image(image: Image.Image, path: str) -> None:
+        try:
+            image.save(path)
+        except Exception as e:
+            raise IOError(f"Error saving image: {e}")
+    @staticmethod
+    def load_image(path: str) -> Image.Image:
+        if path.startswith('http://') or path.startswith('https://'):
+            return ImageHandler.load_image_from_web(path)
+        elif path.startswith('data:image/') and ';base64,' in path:
+            base64_str = path.split(';base64,')[1]
+            return ImageHandler.load_image_from_base64(base64_str)
+        else:
+            return ImageHandler.load_image_from_local(path)
+## Area Covering
+import random
+import copy
+from PIL import ImageFilter, ImageDraw
+class CoverStrategy:
+    def cover(self, image, coordinates):
+        raise NotImplementedError("Cover method must be implemented by subclasses")
+class BlurStrategy(CoverStrategy):
+    def __init__(self, blur_amount=5):
+        self.blur_amount = blur_amount
+    def cover(self, image, coordinates):
+        x1, y1 = int(coordinates.get('x1', 0)), int(coordinates.get('y1', 0))
+        x2, y2 = int(coordinates.get('x2', 0)), int(coordinates.get('y2', 0))
+        # Extract the region to blur
+        region = image.crop((x1, y1, x2, y2))
+        blurred_region = region.filter(ImageFilter.GaussianBlur(radius=self.blur_amount))
+        # Paste back the blurred region
+        image.paste(blurred_region, (x1, y1))
+        return image
+class SingleColorStrategy(CoverStrategy):
+    def __init__(self, color=(0, 0, 0)):
+        self.color = color
+    def cover(self, image, coordinates):
+        x1, y1 = int(coordinates.get('x1', 0)), int(coordinates.get('y1', 0))
+        x2, y2 = int(coordinates.get('x2', 0)), int(coordinates.get('y2', 0))
+        draw = ImageDraw.Draw(image)
+        draw.rectangle([x1, y1, x2, y2], fill=self.color)
+        return image
+class CoordinateBlurrer:
+    def __init__(self, strategy: CoverStrategy):
+        self.strategy = strategy
+    def blur_coordinates(self, data, blur_amount=5):
+        blurred_data = []
+        for item in data:
+            blurred_item = copy.deepcopy(item)
+            coords = blurred_item.get('coordinates', {})
+            blurred_coords = {}
+            for key, value in coords.items():
+                if isinstance(value, (int, float)):
+                    blurred_coords[key] = value + random.uniform(-blur_amount, blur_amount)
+                else:
+                    blurred_coords[key] = value
+            blurred_item['coordinates'] = blurred_coords
+            blurred_data.append(blurred_item)
+        return blurred_data
+    def cover_areas(self, image, data):
+        for item in data:
+            coords = item.get('coordinates', {})
+            image = self.strategy.cover(image, coords)
+        return image
+# PII Extractor
+from dotenv import load_dotenv
+load_dotenv()
+import base64
+import os
+from abc import ABC, abstractmethod
+from typing import List, Optional, Union, Dict, Any
+from pydantic import BaseModel
+class Coordinates(BaseModel):
+    x1: int
+    y1: int
+    x2: int
+    y2: int
+class PIIItem(BaseModel):
+    name: str
+    coordinates: Coordinates
+    confidence: float
+    severity: str
+    type: str
+    probable_regulations: List[str]
+class PIIResponse(BaseModel):
+    piis: List[PIIItem]
+    containing_text: str
+class BaseVisionExtractor(ABC):
+    """Abstract base class for vision-based PII extractors"""
+    def __init__(self, api_key: Optional[str] = None, model: str = None):
+        self.api_key = api_key
+        self.model = model
+        self._client = None
+    @abstractmethod
+    def _initialize_client(self):
+        """Initialize the specific client (Mistral, OpenAI, etc.)"""
+        pass
+    @abstractmethod
+    def _create_messages(self, image_input: str, prompt: str) -> List[Dict[str, Any]]:
+        """Create messages in the format expected by the specific API"""
+        pass
+    @abstractmethod
+    def _make_request(self, messages: List[Dict[str, Any]]) -> Any:
+        """Make the actual API request"""
+        pass
+    @staticmethod
+    def encode_image_to_base64(image_path: str) -> Optional[str]:
+        """Encode a local image file to base64 string"""
+        try:
+            with open(image_path, "rb") as image_file:
+                return base64.b64encode(image_file.read()).decode('utf-8')
+        except FileNotFoundError:
+            print(f"Error: The file {image_path} was not found.")
+            return None
+        except Exception as e:
+            print(f"Error encoding image: {e}")
+            return None
+    @staticmethod
+    def is_url(input_string: str) -> bool:
+        """Check if the input is a URL"""
+        return input_string.startswith(('http://', 'https://'))
+    @staticmethod
+    def is_base64(input_string: str) -> bool:
+        """Check if the input is already base64 encoded"""
+        return input_string.startswith('data:image/')
+    def prepare_image_input(self, image_input: str) -> str:
+        """
+        Prepare image input - handles URL, base64, or local file path
+        Args:
+            image_input: Can be:
+                - URL (http://... or https://...)
+                - Base64 encoded string (data:image/...)
+                - Local file path
+        Returns:
+            Properly formatted image input for API
+        """
+        if self.is_url(image_input):
+            return image_input
+        elif self.is_base64(image_input):
+            return image_input
+        else:
+            # Assume it's a local file path
+            base64_image = self.encode_image_to_base64(image_input)
+            if base64_image:
+                # Detect image format from file extension
+                file_ext = image_input.lower().split('.')[-1]
+                if file_ext in ['jpg', 'jpeg']:
+                    mime_type = 'image/jpeg'
+                elif file_ext == 'png':
+                    mime_type = 'image/png'
+                elif file_ext == 'webp':
+                    mime_type = 'image/webp'
+                elif file_ext == 'gif':
+                    mime_type = 'image/gif'
+                else:
+                    mime_type = 'image/jpeg'  # Default fallback
+                return f"data:{mime_type};base64,{base64_image}"
+            else:
+                raise ValueError(f"Could not process image input: {image_input}")
+    def extract_pii(self, image_input: str, custom_prompt: Optional[str] = None) -> Any:
+        """Extract PII from image"""
+        if not self._client:
+            self._initialize_client()
+        prepared_image = self.prepare_image_input(image_input)
+        prompt = custom_prompt or self.get_default_prompt()
+        messages = self._create_messages(prepared_image, prompt)
+        return self._make_request(messages)
+    def get_default_prompt(self) -> str:
+        """Get the default PII extraction prompt"""
+        return """
+        Extract all the PII in the image and the corresponding coordinates (x1, y1, x2, y2) in the image. (units are pixel)
+        You must provide the smallest possible rectangle that contains the PII.
+        You must ensure that the provided rectangle covers the whole text containing that PII.
+        Provide the result in json which has a field called containing_text and
+        a field called piis which is a json array.
+        Each element of the array has the following fields:
+        - name
+        - coordinates
+           - x1
+           - y1
+           - x2
+           - y2
+        - confidence
+        - severity (low, medium, high)
+        - type
+        - probable_regulations (GDPR, HIPAA, CCPA, PECR, LGPD, PDPA)
+        ---- Additional information ----
+        REGULATIONS = {
+        "GDPR": "General Data Protection Regulation (EU)",
+        "CCPA": "California Consumer Privacy Act",
+        "PIPEDA": "Personal Information Protection and Electronic Documents Act (Canada)",
+        "LGPD": "Lei Geral de Proteção de Dados (Brazil)",
+        "PDPA": "Personal Data Protection Act (Singapore)",
+        "PECR": "Privacy and Electronic Communications Regulations (UK)",
+        "HIPAA": "Health Insurance Portability and Accountability Act (USA)",
+    }
+        """
+class MistralPIIExtractor(BaseVisionExtractor):
+    """Mistral-specific implementation"""
+    def __init__(self, api_key: Optional[str] = None, model: str = 'pixtral-large-latest'):
+        super().__init__(api_key or os.environ.get('MISTRAL_API_KEY'), model)
+    def _initialize_client(self):
+        """Initialize Mistral client"""
+        from mistralai import Mistral
+        self._client = Mistral(api_key=self.api_key)
+    def _create_messages(self, image_input: str, prompt: str) -> List[Dict[str, Any]]:
+        """Create messages in Mistral format"""
+        return [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": prompt
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": image_input
+                    }
+                ]
+            }
+        ]
+    def _make_request(self, messages: List[Dict[str, Any]]) -> str:
+        """Make request to Mistral API"""
+        chat_response = self._client.chat.parse(
+            model=self.model,
+            messages=messages,
+            response_format=PIIResponse,
+            temperature=0
+        )
+        return chat_response.choices[0].message.content
+class OpenAIPIIExtractor(BaseVisionExtractor):
+    """OpenAI-specific implementation (example of extensibility)"""
+    def __init__(self, api_key: Optional[str] = None, model: str = 'gpt-4-vision-preview'):
+        super().__init__(api_key or os.environ.get('OPENAI_API_KEY'), model)
+    def _initialize_client(self):
+        """Initialize OpenAI client"""
+        from openai import OpenAI
+        self._client = OpenAI(api_key=self.api_key)
+    def _create_messages(self, image_input: str, prompt: str) -> List[Dict[str, Any]]:
+        """Create messages in OpenAI format"""
+        return [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": prompt
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": image_input
+                        }
+                    }
+                ]
+            }
+        ]
+    def _make_request(self, messages: List[Dict[str, Any]]) -> str:
+        """Make request to OpenAI API"""
+        response = self._client.chat.completions.create(
+            model=self.model,
+            messages=messages,
+            max_tokens=1000
+        )
+        return response.choices[0].message.content
+# Factory for easy model switching
+class PIIExtractorFactory:
+    """Factory to create different PII extractors"""
+    @staticmethod
+    def create_extractor(provider: str, **kwargs) -> BaseVisionExtractor:
+        """
+        Create a PII extractor for the specified provider
+        Args:
+            provider: 'mistral', 'openai', etc.
+            **kwargs: Additional arguments passed to the extractor
+        """
+        if provider.lower() == 'mistral':
+            return MistralPIIExtractor(**kwargs)
+        elif provider.lower() == 'openai':
+            return OpenAIPIIExtractor(**kwargs)
+        else:
+            raise ValueError(f"Unsupported provider: {provider}")
+# Image Processing Facade
+import json
+class ImageProcessingService:
+    @staticmethod
+    def process_image(image):
+        extracotr = MistralPIIExtractor()
+        try:
+            data_str = extracotr.extract_pii(image)
+            print(f'DEBUG - Extracted PII: {data_str}')
+            data = json.loads(data_str)
+            piis = data['piis']
+            containing_text = data['containing_text']
+            return piis, containing_text
+        except Exception as e:
+            print({"error": f"Failed to extract PII: {e}"})
+            raise e
+class MockImageProcessingService:
+    @staticmethod
+    def process_image(image):
+        # Mock processing that would typically use OCR or computer vision
+        return [
+            {
+                "name": "Trattoria Il Gabbiano",
+                "coordinates": {"x1": 50, "y1": 20, "x2": 280, "y2": 40},
+                "confidence": 0.99,
+                "severity": "low",
+                "type": "business_name"
+            },
+            {
+                "name": "Tarta sas di Fontana Stefania & c.",
+                "coordinates": {"x1": 90, "y1": 40, "x2": 320, "y2": 55},
+                "confidence": 0.98,
+                "severity": "medium",
+                "type": "business_name"
+            }
+        ], "the containing text mocked"
+class ImageProcessingFacade:
+    def __init__(self):
+        self.image_handler = ImageHandler()
+    def process(self, image_path, strategy_name='blur', blur_amount=5, color=(0, 0, 0), output_path=None):
+        try:
+            image = self.image_handler.load_image(image_path)
+        except Exception as e:
+            return {"error": f"Failed to load image: {e}"}
+        # Select covering strategy
+        if strategy_name == 'blur':
+            strategy = BlurStrategy(blur_amount)
+        elif strategy_name == 'single_color':
+            strategy = SingleColorStrategy(color)
+        else:
+            return {"error": f"Unknown strategy: {strategy_name}"}
+        # Process image with mock service
+        try:
+            piis, containing_text = ImageProcessingService.process_image(image_path)
+        except Exception as e:
+            return {"error": f"Failed to process image: {e}"}
+        # Apply coordinate blurring and area covering
+        try:
+            blurrer = CoordinateBlurrer(strategy)
+            blurred_data = blurrer.blur_coordinates(piis, blur_amount)
+            processed_image = blurrer.cover_areas(image.copy(), blurred_data)
+            # Save processed image if output path provided
+            if output_path:
+                self.image_handler.save_image(processed_image, output_path)
+            return {
+                "data": blurred_data,
+                "processed_image": processed_image,
+                "success": True
+            }
+        except Exception as e:
+            return {"error": f"Failed to process coordinates: {e}"}
+def process_image_api(image_path,
+    strategy_name='blur',
+    blur_amount=5,
+    color=(0, 0, 0),
+    output_path=None,
+    provider='mistral',
+    model=None,
+    regulation_map=None):
+    """
+    API function to process images with coordinate blurring and area covering.
+    Args:
+        image_path (str): Path to image (local, web URL, or base64)
+        strategy_name (str): Default covering strategy when regulation_map is not provided ('blur' or 'single_color')
+        blur_amount (int): Amount of blur for coordinates and blur strategy
+        color (tuple): RGB color for single_color strategy
+        output_path (str, optional): Path to save processed image
+        provider (str): PII extractor provider ('mistral' or 'openai')
+        model (str, optional): Model name for the PII extractor
+        regulation_map (dict, optional): Mapping of regulation names to strategy names or None
+    Returns:
+        dict: Processing results with data and success status
+    """
+    # Load image
+    try:
+        print(f"DEBUG - Loading image from: {image_path}")
+        image = ImageHandler.load_image(image_path)
+    except Exception as e:
+        return {"error": f"Failed to load image: {e}"}
+    # Create PII extractor
+    try:
+        extractor_kwargs = {}
+        if model is not None:
+            extractor_kwargs["model"] = model
+        extractor = PIIExtractorFactory.create_extractor(provider, **extractor_kwargs)
+    except Exception as e:
+        return {"error": f"Failed to create PII extractor: {e}"}
+    # Extract PII
+    try:
+        data_str = extractor.extract_pii(image_path)
+        data = json.loads(data_str)
+        piis = data.get("piis", [])
+    except Exception as e:
+        return {"error": f"Failed to extract PII: {e}"}
+    processed_data = []
+    processed_image = image.copy()
+    # Apply covering
+    try:
+        if regulation_map is not None:
+            for item in piis:
+                regs = item.get("probable_regulations", [])
+                strategy_for_item = None
+                for reg in regs:
+                    if reg in regulation_map:
+                        strategy_for_item = regulation_map[reg]
+                        break
+                if strategy_for_item is None:
+                    processed_data.append(item)
+                    continue
+                if strategy_for_item == "blur":
+                    strategy = BlurStrategy(blur_amount)
+                elif strategy_for_item == "single_color":
+                    strategy = SingleColorStrategy(color)
+                else:
+                    return {"error": f"Unknown strategy for regulation {reg}: {strategy_for_item}"}
+                blurrer = CoordinateBlurrer(strategy)
+                blurred_item = blurrer.blur_coordinates([item], blur_amount)[0]
+                processed_image = blurrer.cover_areas(processed_image, [blurred_item])
+                processed_data.append(blurred_item)
+        else:
+            if strategy_name == "blur":
+                strategy = BlurStrategy(blur_amount)
+            elif strategy_name == "single_color":
+                strategy = SingleColorStrategy(color)
+            else:
+                return {"error": f"Unknown strategy: {strategy_name}"}
+            blurrer = CoordinateBlurrer(strategy)
+            processed_data = blurrer.blur_coordinates(piis, blur_amount)
+            processed_image = blurrer.cover_areas(image.copy(), processed_data)
+    except Exception as e:
+        return {"error": f"Failed to apply covering: {e}"}
+    # Save processed image if provided
+    if output_path:
+        try:
+            ImageHandler.save_image(processed_image, output_path)
+        except Exception as e:
+            return {"error": f"Failed to save processed image: {e}"}
+    return {"data": processed_data, "processed_image": processed_image, "success": True}
+from enum import Enum
+class CoverStrategy(Enum):
+    BLUR = "blur"
+    SINGLE_COLOR = "single_color"
+class MistralModels(Enum):
+    # https://docs.mistral.ai/getting-started/models/models_overview/
+    '''
+    mistral-large-latest: currently points to mistral-large-2411.
+pixtral-large-latest: currently points to pixtral-large-2411.
+mistral-medium-latest: currently points to mistral-medium-2505.
+mistral-moderation-latest: currently points to mistral-moderation-2411.
+ministral-3b-latest: currently points to ministral-3b-2410.
+ministral-8b-latest: currently points to ministral-8b-2410.
+open-mistral-nemo: currently points to open-mistral-nemo-2407.
+mistral-small-latest: currently points to mistral-small-2503.
+devstral-small-latest: currently points to devstral-small-2505
+mistral-saba-latest: currently points to mistral-saba-2502.
+codestral-latest: currently points to codestral-2501.
+mistral-ocr-latest: currently points to mistral-ocr-2505.
+'''
+    PIXTRAL_LARGE_LATEST = 'pixtral-large-latest'
+    MISTRAL_OCR_LATEST = 'mistral-ocr-latest'
+    # MISTRAL_SABA_2502 = 'mistral-saba-2502'
+    MISTRAL_MEDIUM_2505 = 'mistral-medium-2505'
+if __name__ == "__main__":
+    myhome = os.environ.get('HOME')
+    image = os.path.join(myhome, "/Pictures/tmp/lo-scontrino-fiscale.jpg")
+    result = ImageProcessingService.process_image(image)
+    print(result)
+    # Process with blur strategy
+    result = process_image_api(
+        image_path=image,
+        strategy_name="blur",
+        blur_amount=3,
+        output_path="tmp/processed_image.jpg"
+    )
+    print("Result1")
+    print(result)
+    # Process with single color covering
+    result2 = process_image_api(
+        image_path="https://www.servizicontabiliefiscaliviterbo.it/wordpress/wp-content/uploads/2016/03/lo-scontrino-fiscale.jpg",
+        strategy_name="single_color",
+        color=(255, 0, 0),  # Red
+        blur_amount=2
+    )
+    print("Result2")
+    print(result2)

pyproject.toml ADDED Viewed

	@@ -0,0 +1,15 @@

+[project]
+name = "pii-detection-mcp-server"
+version = "0.1.0"
+description = "PII Detection and Masking Tool with Mistral AI"
+readme = "README.md"
+requires-python = ">=3.13"
+dependencies = [
+    "gradio>=4.0.0",
+    "mistralai>=1.8.1",
+    "pillow>=11.2.1",
+    "python-dotenv>=1.1.0",
+    "requests>=2.31.0",
+    "numpy>=1.24.0",
+    "gradio-screenrecorder>=0.0.1",
+]

requirements.txt ADDED Viewed

	@@ -0,0 +1,174 @@

+# This file was autogenerated by uv via the following command:
+#    uv pip compile pyproject.toml
+aiofiles==24.1.0
+    # via gradio
+annotated-types==0.7.0
+    # via pydantic
+anyio==4.9.0
+    # via
+    #   gradio
+    #   httpx
+    #   starlette
+audioop-lts==0.2.1
+    # via gradio
+certifi==2025.4.26
+    # via
+    #   httpcore
+    #   httpx
+    #   requests
+charset-normalizer==3.4.2
+    # via requests
+click==8.2.1
+    # via
+    #   typer
+    #   uvicorn
+eval-type-backport==0.2.2
+    # via mistralai
+fastapi==0.115.12
+    # via gradio
+ffmpy==0.6.0
+    # via gradio
+filelock==3.18.0
+    # via huggingface-hub
+fsspec==2025.5.1
+    # via
+    #   gradio-client
+    #   huggingface-hub
+gradio==5.32.1
+    # via
+    #   pii-detection-mcp-server (pyproject.toml)
+    #   gradio-screenrecorder
+gradio-client==1.10.2
+    # via gradio
+gradio-screenrecorder==0.0.1
+    # via pii-detection-mcp-server (pyproject.toml)
+groovy==0.1.2
+    # via gradio
+h11==0.16.0
+    # via
+    #   httpcore
+    #   uvicorn
+hf-xet==1.1.3
+    # via huggingface-hub
+httpcore==1.0.9
+    # via httpx
+httpx==0.28.1
+    # via
+    #   gradio
+    #   gradio-client
+    #   mistralai
+    #   safehttpx
+huggingface-hub==0.32.4
+    # via
+    #   gradio
+    #   gradio-client
+idna==3.10
+    # via
+    #   anyio
+    #   httpx
+    #   requests
+jinja2==3.1.6
+    # via gradio
+markdown-it-py==3.0.0
+    # via rich
+markupsafe==3.0.2
+    # via
+    #   gradio
+    #   jinja2
+mdurl==0.1.2
+    # via markdown-it-py
+mistralai==1.8.1
+    # via pii-detection-mcp-server (pyproject.toml)
+numpy==2.2.6
+    # via
+    #   pii-detection-mcp-server (pyproject.toml)
+    #   gradio
+    #   pandas
+orjson==3.10.18
+    # via gradio
+packaging==25.0
+    # via
+    #   gradio
+    #   gradio-client
+    #   huggingface-hub
+pandas==2.2.3
+    # via gradio
+pillow==11.2.1
+    # via
+    #   pii-detection-mcp-server (pyproject.toml)
+    #   gradio
+pydantic==2.11.5
+    # via
+    #   fastapi
+    #   gradio
+    #   mistralai
+pydantic-core==2.33.2
+    # via pydantic
+pydub==0.25.1
+    # via gradio
+pygments==2.19.1
+    # via rich
+python-dateutil==2.9.0.post0
+    # via
+    #   mistralai
+    #   pandas
+python-dotenv==1.1.0
+    # via pii-detection-mcp-server (pyproject.toml)
+python-multipart==0.0.20
+    # via gradio
+pytz==2025.2
+    # via pandas
+pyyaml==6.0.2
+    # via
+    #   gradio
+    #   huggingface-hub
+requests==2.32.3
+    # via
+    #   pii-detection-mcp-server (pyproject.toml)
+    #   huggingface-hub
+rich==14.0.0
+    # via typer
+ruff==0.11.12
+    # via gradio
+safehttpx==0.1.6
+    # via gradio
+semantic-version==2.10.0
+    # via gradio
+shellingham==1.5.4
+    # via typer
+six==1.17.0
+    # via python-dateutil
+sniffio==1.3.1
+    # via anyio
+starlette==0.46.2
+    # via
+    #   fastapi
+    #   gradio
+tomlkit==0.13.2
+    # via gradio
+tqdm==4.67.1
+    # via huggingface-hub
+typer==0.16.0
+    # via gradio
+typing-extensions==4.14.0
+    # via
+    #   fastapi
+    #   gradio
+    #   gradio-client
+    #   huggingface-hub
+    #   pydantic
+    #   pydantic-core
+    #   typer
+    #   typing-inspection
+typing-inspection==0.4.1
+    # via
+    #   mistralai
+    #   pydantic
+tzdata==2025.2
+    # via pandas
+urllib3==2.4.0
+    # via requests
+uvicorn==0.34.3
+    # via gradio
+websockets==15.0.1
+    # via gradio-client

tests/test_pii_image_processing.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import unittest
+import os
+import json
+from PIL import Image
+from pii_image_processing import process_image_api, PIIExtractorFactory
+class DummyExtractor:
+    def __init__(self, model=None):
+        pass
+    def extract_pii(self, image_input):
+        sample = {
+            "piis": [
+                {
+                    "name": "TestPII",
+                    "coordinates": {"x1": 10, "y1": 10, "x2": 50, "y2": 50},
+                    "probable_regulations": ["GDPR"]
+                }
+            ],
+            "containing_text": "TestPII"
+        }
+        return json.dumps(sample)
+class TestProcessImageApi(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        # Monkey-patch factory to use dummy extractor
+        PIIExtractorFactory.create_extractor = staticmethod(lambda provider, **kwargs: DummyExtractor(**kwargs))
+        os.makedirs("tmp", exist_ok=True)
+        cls.test_image = "tmp/dummy_test.jpg"
+        Image.new("RGB", (100, 100), (128, 128, 128)).save(cls.test_image)
+    def test_blur_strategy(self):
+        out = "tmp/output_blur.jpg"
+        result = process_image_api(
+            self.test_image,
+            strategy_name="blur",
+            blur_amount=2,
+            output_path=out
+        )
+        self.assertTrue(result.get("success"))
+        self.assertTrue(os.path.exists(out))
+        self.assertEqual(len(result["data"]), 1)
+    def test_single_color_strategy(self):
+        out = "tmp/output_color.jpg"
+        result = process_image_api(
+            self.test_image,
+            strategy_name="single_color",
+            color=(255,0,0),
+            output_path=out
+        )
+        self.assertTrue(result.get("success"))
+        self.assertTrue(os.path.exists(out))
+        self.assertEqual(len(result["data"]), 1)
+    def test_regulation_map(self):
+        out = "tmp/output_reg.jpg"
+        reg_map = {"GDPR": "single_color"}
+        result = process_image_api(
+            self.test_image,
+            regulation_map=reg_map,
+            output_path=out
+        )
+        self.assertTrue(result.get("success"))
+        self.assertTrue(os.path.exists(out))
+        self.assertEqual(len(result["data"]), 1)
+if __name__ == "__main__":
+    unittest.main()

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff