Soroush commited on
Commit
84e50e2
·
1 Parent(s): 82eb0e3
.gitignore ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python-generated files
2
+ __pycache__/
3
+ *.py[oc]
4
+ build/
5
+ dist/
6
+ wheels/
7
+ *.egg-info
8
+
9
+ # Virtual environments
10
+ .venv
11
+
12
+ .env
13
+
14
+ tmp/
15
+ .gradio/
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.13
README.md CHANGED
@@ -1,4 +1,5 @@
1
  ---
 
2
  title: PII Image Masking Mpc Server
3
  emoji: 🐠
4
  colorFrom: pink
@@ -9,5 +10,3 @@ app_file: app.py
9
  pinned: false
10
  short_description: PII image masking mpc server using Mistral models
11
  ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ tags: [mcp-server-track]
3
  title: PII Image Masking Mpc Server
4
  emoji: 🐠
5
  colorFrom: pink
 
10
  pinned: false
11
  short_description: PII image masking mpc server using Mistral models
12
  ---
 
 
app.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from gradio_ui import PIIMaskingUI
4
+
5
+
6
+ def main():
7
+ """Launch the PII Detection & Masking UI."""
8
+ # Create output directory if it doesn't exist
9
+ output_dir = "tmp"
10
+ os.makedirs(output_dir, exist_ok=True)
11
+
12
+ # Create the UI
13
+ ui = PIIMaskingUI(output_dir=output_dir)
14
+
15
+ # Get the Gradio Blocks interface
16
+ demo = ui.demo
17
+
18
+ # Launch the interface on a different port to avoid conflicts
19
+ demo.launch(
20
+ # share=True,
21
+ # debug=True,
22
+ server_name="0.0.0.0",
23
+ mcp_server=True,
24
+ server_port=7869 # let the port be selected
25
+ )
26
+
27
+
28
+ if __name__ == "__main__":
29
+ main()
gradio_ui.py ADDED
@@ -0,0 +1,318 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+ import gradio as gr
4
+ from typing import Dict, Tuple, Optional
5
+ from pii_image_processing import process_image_api, MistralModels, CoverStrategy
6
+ from PIL import Image
7
+
8
+ class PIIMaskingUI:
9
+ """
10
+ A Gradio-based UI for the PII detection and masking tool.
11
+
12
+ This class creates an interactive web interface that allows users to:
13
+ - Upload images containing potential PII
14
+ - Select from available Mistral models
15
+ - Configure masking strategies
16
+ - Define regulation-specific masking rules
17
+ - View and download results
18
+ """
19
+
20
+ # Available regulations and their descriptions
21
+ REGULATIONS = {
22
+ "GDPR": "General Data Protection Regulation (EU)",
23
+ "CCPA": "California Consumer Privacy Act",
24
+ "PIPEDA": "Personal Information Protection and Electronic Documents Act (Canada)",
25
+ "LGPD": "Lei Geral de Proteção de Dados (Brazil)",
26
+ "PECR": "Privacy and Electronic Communications Regulations (UK)",
27
+ "PDPA": "Personal Data Protection Act (Singapore)",
28
+ "HIPAA": "Health Insurance Portability and Accountability Act (USA)",
29
+ }
30
+
31
+ # Available masking strategies
32
+ STRATEGIES = {
33
+ "blur": "Blur the sensitive area",
34
+ "single_color": "Cover with a solid color",
35
+ "none": "No masking (just detection)"
36
+ }
37
+
38
+ def __init__(self, output_dir: str = "output"):
39
+ """
40
+ Initialize the UI.
41
+
42
+ Args:
43
+ output_dir: Directory to save processed images
44
+ """
45
+ self.output_dir = output_dir
46
+ os.makedirs(self.output_dir, exist_ok=True)
47
+ self.demo = self._create_interface()
48
+ self.demo.title = "PII Detection & Masking Tool - Mistral Models"
49
+ self.demo.description = f"""
50
+ Upload an image to detect and mask PII based on privacy regulations using custom Mistral model.
51
+ Available regulations include: {', '.join(self.REGULATIONS.keys())}.
52
+ Available masking strategies: {', '.join(self.STRATEGIES.keys())}.
53
+ The tool supports various Mistral models for image processing.(e.g., {', '.join([m.value for m in MistralModels])}).
54
+ """
55
+ print(self.demo.title)
56
+ print(self.demo.description)
57
+
58
+ def _create_interface(self) -> gr.Blocks:
59
+ """Create and return the Gradio interface."""
60
+ with gr.Blocks(title="PII Detection & Masking") as demo:
61
+ gr.Markdown("# PII Detection & Masking Tool")
62
+ gr.Markdown("Upload an image to detect and mask PII based on privacy regulations.")
63
+
64
+
65
+ with gr.Row():
66
+ with gr.Column(scale=1):
67
+ # Input image
68
+ image_input = gr.Image(type="filepath", label="Upload Image")
69
+
70
+ # Model selection
71
+ model_dropdown = gr.Dropdown(
72
+ choices=[m.value for m in MistralModels],
73
+ value=MistralModels.PIXTRAL_LARGE_LATEST.value,
74
+ label="Mistral Model"
75
+ )
76
+
77
+ # Default strategy
78
+ default_strategy = gr.Dropdown(
79
+ choices=list(self.STRATEGIES.keys()),
80
+ value="blur",
81
+ label="Default Masking Strategy"
82
+ )
83
+
84
+ # Blur amount (only show if blur is selected)
85
+ blur_amount = gr.Slider(
86
+ minimum=1,
87
+ maximum=20,
88
+ value=5,
89
+ step=1,
90
+ label="Blur Intensity",
91
+ visible=True
92
+ )
93
+
94
+ # Color picker (only show if single_color is selected)
95
+ color_picker = gr.ColorPicker(
96
+ label="Mask Color",
97
+ value="#000000",
98
+ visible=False
99
+ )
100
+
101
+ # Show/hide blur/color based on strategy
102
+ def update_strategy_ui(strategy):
103
+ return [
104
+ gr.Slider(visible=strategy == "blur"),
105
+ gr.ColorPicker(visible=strategy == "single_color")
106
+ ]
107
+
108
+ default_strategy.change(
109
+ update_strategy_ui,
110
+ inputs=[default_strategy],
111
+ outputs=[blur_amount, color_picker]
112
+ )
113
+
114
+ # Regulation strategies
115
+ with gr.Group():
116
+ gr.Markdown("### Regulation-specific Strategies")
117
+ gr.Markdown("Set masking strategy for each regulation (or 'none' to ignore)")
118
+
119
+ self.regulation_uis = {}
120
+ for reg, desc in self.REGULATIONS.items():
121
+ with gr.Row():
122
+ reg_label = gr.Textbox(
123
+ value=f"{reg} - {desc}",
124
+ label="Regulation",
125
+ interactive=False,
126
+ scale=2
127
+ )
128
+ reg_strategy = gr.Dropdown(
129
+ choices=list(self.STRATEGIES.keys()),
130
+ value="blur",
131
+ label=f"Strategy for {reg}",
132
+ scale=1
133
+ )
134
+ self.regulation_uis[reg] = reg_strategy
135
+
136
+ # Process button
137
+ process_btn = gr.Button("Process Image", variant="primary")
138
+
139
+ with gr.Column(scale=1):
140
+ # Output image
141
+ self.output_image = gr.Image(
142
+ type="filepath",
143
+ label="Processed Image",
144
+ interactive=False
145
+ )
146
+
147
+ # Output JSON
148
+ self.output_json = gr.JSON(
149
+ label="Detection Results",
150
+ visible=True
151
+ )
152
+
153
+ # Download button
154
+ self.download_btn = gr.Button("Download Processed Image", visible=False)
155
+
156
+ # Process button click handler
157
+ process_btn.click(
158
+ fn=self.process_image,
159
+ inputs=[
160
+ image_input,
161
+ model_dropdown,
162
+ default_strategy,
163
+ blur_amount,
164
+ color_picker,
165
+ *[self.regulation_uis[reg] for reg in self.REGULATIONS]
166
+ ],
167
+ outputs=[
168
+ self.output_image,
169
+ self.output_json,
170
+ self.download_btn
171
+ ]
172
+ )
173
+
174
+ # Download button handler
175
+ self.download_btn.click(
176
+ fn=self.download_file,
177
+ inputs=gr.State(value=None), # Will be set by process_click
178
+ outputs=gr.File(label="Download Processed Image")
179
+ )
180
+
181
+ return demo
182
+
183
+ def process_image(
184
+ self,
185
+ image,
186
+ model_name: str,
187
+ default_strategy: str,
188
+ blur_amount: int,
189
+ color_hex: str,
190
+ *regulation_values
191
+ ) -> Tuple[Optional[str], dict, dict]:
192
+ """
193
+ PII Detection & Masking Tool - Mistral Models
194
+ Process an image with the given parameters.
195
+
196
+ Upload an image to detect and mask PII based on privacy regulations using custom Mistral model.
197
+ Available regulations include: GDPR, CCPA, PIPEDA, LGPD, PECR, PDPA, HIPAA.
198
+ Available masking strategies: blur, single_color, none.
199
+ The tool supports various Mistral models for image processing.
200
+ (Available models: pixtral-large-latest, mistral-ocr-latest, mistral-medium-2505).
201
+ ALL ENUM FIELDS ARE REQUIRED and must be provided. the string none is a valid value when is among the choices.
202
+
203
+ Args:
204
+ image: Input image (PIL.Image or file path)
205
+ model_name: Name of the Mistral model to use
206
+ default_strategy: Default masking strategy
207
+ blur_amount: Blur intensity (1-20)
208
+ color_hex: Hex color for single_color strategy
209
+ *regulation_values: List of strategy values for each regulation
210
+
211
+ Returns:
212
+ Tuple of (output_image_path, result_json, download_btn_visibility)
213
+ """
214
+ # Convert regulation values from list to dict
215
+ regulation_values = dict(zip(self.REGULATIONS.keys(), regulation_values))
216
+ # Convert hex color to RGB tuple
217
+ if color_hex.startswith('#'):
218
+ color_hex = color_hex.lstrip('#')
219
+ color = tuple(int(color_hex[i:i+2], 16) for i in (0, 2, 4))
220
+ else:
221
+ color = (0, 0, 0) # Default to black
222
+
223
+ # Handle case when no image is provided
224
+ if image is None:
225
+ return None, {"error": "No image provided"}, gr.update(visible=False)
226
+
227
+ # Save uploaded image to temp file if it's not a path
228
+ if not isinstance(image, str):
229
+ temp_dir = tempfile.mkdtemp()
230
+ image_path = os.path.join(temp_dir, "input.jpg")
231
+ Image.fromarray(image).save(image_path)
232
+ else:
233
+ image_path = image
234
+
235
+ # Create output path
236
+ os.makedirs(self.output_dir, exist_ok=True)
237
+ try:
238
+ output_path = os.path.join(self.output_dir, f"processed_{os.path.basename(image_path)}")
239
+ except Exception as e:
240
+ import datetime
241
+ output_path = os.path.join(output_dir, f"processed_image_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.jpg")
242
+
243
+ print(f"Output path: {output_path}")
244
+ print("Adding .jpg extension if not present")
245
+ if not output_path.lower().endswith('.jpg'):
246
+ output_path += '.jpg'
247
+
248
+ # Filter out 'none' strategies (convert to None)
249
+ regulation_map = {
250
+ reg: strat if strat != "none" else None
251
+ for reg, strat in regulation_values.items()
252
+ }
253
+
254
+ try:
255
+ # Call the API
256
+ result = process_image_api(
257
+ image_path=image_path,
258
+ strategy_name=default_strategy if default_strategy != "none" else None,
259
+ blur_amount=blur_amount,
260
+ color=color,
261
+ output_path=output_path,
262
+ model=model_name,
263
+ regulation_map=regulation_map or None
264
+ )
265
+
266
+ # Cleanup temp file if we created one
267
+ if 'temp_dir' in locals():
268
+ import shutil
269
+ shutil.rmtree(temp_dir, ignore_errors=True)
270
+
271
+ # Return results
272
+ output_image = output_path if os.path.exists(output_path) else None
273
+ download_visible = output_image is not None
274
+
275
+ return output_image, result, gr.update(visible=download_visible)
276
+
277
+ except Exception as e:
278
+ return None, {"error": str(e), "success": False}, gr.update(visible=False)
279
+
280
+ def download_file(self, file_path: Optional[str] = None) -> Optional[str]:
281
+ """
282
+ Handle file download.
283
+
284
+ Args:
285
+ file_path: Path to the file to download
286
+
287
+ Returns:
288
+ Path to the file if it exists, None otherwise
289
+
290
+ Raises:
291
+ gr.Error: If the file doesn't exist
292
+ """
293
+ if file_path and os.path.exists(file_path):
294
+ return file_path
295
+ raise gr.Error("No processed file available for download")
296
+
297
+ def launch(self, **kwargs):
298
+ """Launch the Gradio interface."""
299
+ return self.demo.launch(**kwargs)
300
+
301
+
302
+ def main():
303
+ """Launch the PII Masking UI."""
304
+ output_dir = "tmp"
305
+ os.makedirs(output_dir, exist_ok=True)
306
+
307
+ ui = PIIMaskingUI(output_dir=output_dir)
308
+ ui.demo.launch(
309
+ share=True,
310
+ debug=True,
311
+ server_name="0.0.0.0",
312
+ # server_port=7869,
313
+ mcp_server=True,
314
+ )
315
+
316
+
317
+ if __name__ == "__main__":
318
+ main()
pii_image_processing.py ADDED
@@ -0,0 +1,613 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Image Handler
2
+
3
+ import base64
4
+ import requests
5
+ from io import BytesIO
6
+ from PIL import Image
7
+
8
+ class ImageHandler:
9
+ @staticmethod
10
+ def load_image_from_local(path: str) -> Image.Image:
11
+ try:
12
+ image = Image.open(path)
13
+ image.load()
14
+ return image
15
+ except Exception as e:
16
+ raise IOError(f"Error loading local image: {e}")
17
+
18
+ @staticmethod
19
+ def load_image_from_web(url: str) -> Image.Image:
20
+ try:
21
+ response = requests.get(url)
22
+ response.raise_for_status()
23
+ image = Image.open(BytesIO(response.content))
24
+ image.load()
25
+ return image
26
+ except Exception as e:
27
+ raise IOError(f"Error loading web image: {e}")
28
+
29
+ @staticmethod
30
+ def load_image_from_base64(base64_str: str) -> Image.Image:
31
+ try:
32
+ image_data = base64.b64decode(base64_str)
33
+ image = Image.open(BytesIO(image_data))
34
+ image.load()
35
+ return image
36
+ except Exception as e:
37
+ raise IOError(f"Error loading base64 image: {e}")
38
+
39
+ @staticmethod
40
+ def save_image(image: Image.Image, path: str) -> None:
41
+ try:
42
+ image.save(path)
43
+ except Exception as e:
44
+ raise IOError(f"Error saving image: {e}")
45
+
46
+ @staticmethod
47
+ def load_image(path: str) -> Image.Image:
48
+ if path.startswith('http://') or path.startswith('https://'):
49
+ return ImageHandler.load_image_from_web(path)
50
+ elif path.startswith('data:image/') and ';base64,' in path:
51
+ base64_str = path.split(';base64,')[1]
52
+ return ImageHandler.load_image_from_base64(base64_str)
53
+ else:
54
+ return ImageHandler.load_image_from_local(path)
55
+
56
+
57
+ ## Area Covering
58
+
59
+ import random
60
+ import copy
61
+ from PIL import ImageFilter, ImageDraw
62
+
63
+ class CoverStrategy:
64
+ def cover(self, image, coordinates):
65
+ raise NotImplementedError("Cover method must be implemented by subclasses")
66
+
67
+ class BlurStrategy(CoverStrategy):
68
+ def __init__(self, blur_amount=5):
69
+ self.blur_amount = blur_amount
70
+
71
+ def cover(self, image, coordinates):
72
+ x1, y1 = int(coordinates.get('x1', 0)), int(coordinates.get('y1', 0))
73
+ x2, y2 = int(coordinates.get('x2', 0)), int(coordinates.get('y2', 0))
74
+
75
+ # Extract the region to blur
76
+ region = image.crop((x1, y1, x2, y2))
77
+ blurred_region = region.filter(ImageFilter.GaussianBlur(radius=self.blur_amount))
78
+
79
+ # Paste back the blurred region
80
+ image.paste(blurred_region, (x1, y1))
81
+ return image
82
+
83
+ class SingleColorStrategy(CoverStrategy):
84
+ def __init__(self, color=(0, 0, 0)):
85
+ self.color = color
86
+
87
+ def cover(self, image, coordinates):
88
+ x1, y1 = int(coordinates.get('x1', 0)), int(coordinates.get('y1', 0))
89
+ x2, y2 = int(coordinates.get('x2', 0)), int(coordinates.get('y2', 0))
90
+
91
+ draw = ImageDraw.Draw(image)
92
+ draw.rectangle([x1, y1, x2, y2], fill=self.color)
93
+ return image
94
+
95
+ class CoordinateBlurrer:
96
+ def __init__(self, strategy: CoverStrategy):
97
+ self.strategy = strategy
98
+
99
+ def blur_coordinates(self, data, blur_amount=5):
100
+ blurred_data = []
101
+ for item in data:
102
+ blurred_item = copy.deepcopy(item)
103
+ coords = blurred_item.get('coordinates', {})
104
+ blurred_coords = {}
105
+
106
+ for key, value in coords.items():
107
+ if isinstance(value, (int, float)):
108
+ blurred_coords[key] = value + random.uniform(-blur_amount, blur_amount)
109
+ else:
110
+ blurred_coords[key] = value
111
+
112
+ blurred_item['coordinates'] = blurred_coords
113
+ blurred_data.append(blurred_item)
114
+ return blurred_data
115
+
116
+ def cover_areas(self, image, data):
117
+ for item in data:
118
+ coords = item.get('coordinates', {})
119
+ image = self.strategy.cover(image, coords)
120
+ return image
121
+
122
+ # PII Extractor
123
+
124
+ from dotenv import load_dotenv
125
+ load_dotenv()
126
+ import base64
127
+ import os
128
+ from abc import ABC, abstractmethod
129
+ from typing import List, Optional, Union, Dict, Any
130
+ from pydantic import BaseModel
131
+
132
+ class Coordinates(BaseModel):
133
+ x1: int
134
+ y1: int
135
+ x2: int
136
+ y2: int
137
+
138
+ class PIIItem(BaseModel):
139
+ name: str
140
+ coordinates: Coordinates
141
+ confidence: float
142
+ severity: str
143
+ type: str
144
+ probable_regulations: List[str]
145
+
146
+ class PIIResponse(BaseModel):
147
+ piis: List[PIIItem]
148
+ containing_text: str
149
+
150
+ class BaseVisionExtractor(ABC):
151
+ """Abstract base class for vision-based PII extractors"""
152
+
153
+ def __init__(self, api_key: Optional[str] = None, model: str = None):
154
+ self.api_key = api_key
155
+ self.model = model
156
+ self._client = None
157
+
158
+ @abstractmethod
159
+ def _initialize_client(self):
160
+ """Initialize the specific client (Mistral, OpenAI, etc.)"""
161
+ pass
162
+
163
+ @abstractmethod
164
+ def _create_messages(self, image_input: str, prompt: str) -> List[Dict[str, Any]]:
165
+ """Create messages in the format expected by the specific API"""
166
+ pass
167
+
168
+ @abstractmethod
169
+ def _make_request(self, messages: List[Dict[str, Any]]) -> Any:
170
+ """Make the actual API request"""
171
+ pass
172
+
173
+ @staticmethod
174
+ def encode_image_to_base64(image_path: str) -> Optional[str]:
175
+ """Encode a local image file to base64 string"""
176
+ try:
177
+ with open(image_path, "rb") as image_file:
178
+ return base64.b64encode(image_file.read()).decode('utf-8')
179
+ except FileNotFoundError:
180
+ print(f"Error: The file {image_path} was not found.")
181
+ return None
182
+ except Exception as e:
183
+ print(f"Error encoding image: {e}")
184
+ return None
185
+
186
+ @staticmethod
187
+ def is_url(input_string: str) -> bool:
188
+ """Check if the input is a URL"""
189
+ return input_string.startswith(('http://', 'https://'))
190
+
191
+ @staticmethod
192
+ def is_base64(input_string: str) -> bool:
193
+ """Check if the input is already base64 encoded"""
194
+ return input_string.startswith('data:image/')
195
+
196
+ def prepare_image_input(self, image_input: str) -> str:
197
+ """
198
+ Prepare image input - handles URL, base64, or local file path
199
+
200
+ Args:
201
+ image_input: Can be:
202
+ - URL (http://... or https://...)
203
+ - Base64 encoded string (data:image/...)
204
+ - Local file path
205
+
206
+ Returns:
207
+ Properly formatted image input for API
208
+ """
209
+ if self.is_url(image_input):
210
+ return image_input
211
+ elif self.is_base64(image_input):
212
+ return image_input
213
+ else:
214
+ # Assume it's a local file path
215
+ base64_image = self.encode_image_to_base64(image_input)
216
+ if base64_image:
217
+ # Detect image format from file extension
218
+ file_ext = image_input.lower().split('.')[-1]
219
+ if file_ext in ['jpg', 'jpeg']:
220
+ mime_type = 'image/jpeg'
221
+ elif file_ext == 'png':
222
+ mime_type = 'image/png'
223
+ elif file_ext == 'webp':
224
+ mime_type = 'image/webp'
225
+ elif file_ext == 'gif':
226
+ mime_type = 'image/gif'
227
+ else:
228
+ mime_type = 'image/jpeg' # Default fallback
229
+
230
+ return f"data:{mime_type};base64,{base64_image}"
231
+ else:
232
+ raise ValueError(f"Could not process image input: {image_input}")
233
+
234
+ def extract_pii(self, image_input: str, custom_prompt: Optional[str] = None) -> Any:
235
+ """Extract PII from image"""
236
+ if not self._client:
237
+ self._initialize_client()
238
+
239
+ prepared_image = self.prepare_image_input(image_input)
240
+ prompt = custom_prompt or self.get_default_prompt()
241
+ messages = self._create_messages(prepared_image, prompt)
242
+
243
+ return self._make_request(messages)
244
+
245
+ def get_default_prompt(self) -> str:
246
+ """Get the default PII extraction prompt"""
247
+ return """
248
+ Extract all the PII in the image and the corresponding coordinates (x1, y1, x2, y2) in the image. (units are pixel)
249
+ You must provide the smallest possible rectangle that contains the PII.
250
+ You must ensure that the provided rectangle covers the whole text containing that PII.
251
+ Provide the result in json which has a field called containing_text and
252
+ a field called piis which is a json array.
253
+ Each element of the array has the following fields:
254
+ - name
255
+ - coordinates
256
+ - x1
257
+ - y1
258
+ - x2
259
+ - y2
260
+ - confidence
261
+ - severity (low, medium, high)
262
+ - type
263
+ - probable_regulations (GDPR, HIPAA, CCPA, PECR, LGPD, PDPA)
264
+
265
+ ---- Additional information ----
266
+ REGULATIONS = {
267
+ "GDPR": "General Data Protection Regulation (EU)",
268
+ "CCPA": "California Consumer Privacy Act",
269
+ "PIPEDA": "Personal Information Protection and Electronic Documents Act (Canada)",
270
+ "LGPD": "Lei Geral de Proteção de Dados (Brazil)",
271
+ "PDPA": "Personal Data Protection Act (Singapore)",
272
+ "PECR": "Privacy and Electronic Communications Regulations (UK)",
273
+ "HIPAA": "Health Insurance Portability and Accountability Act (USA)",
274
+ }
275
+ """
276
+
277
+ class MistralPIIExtractor(BaseVisionExtractor):
278
+ """Mistral-specific implementation"""
279
+
280
+ def __init__(self, api_key: Optional[str] = None, model: str = 'pixtral-large-latest'):
281
+ super().__init__(api_key or os.environ.get('MISTRAL_API_KEY'), model)
282
+
283
+ def _initialize_client(self):
284
+ """Initialize Mistral client"""
285
+ from mistralai import Mistral
286
+ self._client = Mistral(api_key=self.api_key)
287
+
288
+ def _create_messages(self, image_input: str, prompt: str) -> List[Dict[str, Any]]:
289
+ """Create messages in Mistral format"""
290
+ return [
291
+ {
292
+ "role": "user",
293
+ "content": [
294
+ {
295
+ "type": "text",
296
+ "text": prompt
297
+ },
298
+ {
299
+ "type": "image_url",
300
+ "image_url": image_input
301
+ }
302
+ ]
303
+ }
304
+ ]
305
+
306
+ def _make_request(self, messages: List[Dict[str, Any]]) -> str:
307
+ """Make request to Mistral API"""
308
+ chat_response = self._client.chat.parse(
309
+ model=self.model,
310
+ messages=messages,
311
+ response_format=PIIResponse,
312
+ temperature=0
313
+ )
314
+ return chat_response.choices[0].message.content
315
+
316
+ class OpenAIPIIExtractor(BaseVisionExtractor):
317
+ """OpenAI-specific implementation (example of extensibility)"""
318
+
319
+ def __init__(self, api_key: Optional[str] = None, model: str = 'gpt-4-vision-preview'):
320
+ super().__init__(api_key or os.environ.get('OPENAI_API_KEY'), model)
321
+
322
+ def _initialize_client(self):
323
+ """Initialize OpenAI client"""
324
+ from openai import OpenAI
325
+ self._client = OpenAI(api_key=self.api_key)
326
+
327
+ def _create_messages(self, image_input: str, prompt: str) -> List[Dict[str, Any]]:
328
+ """Create messages in OpenAI format"""
329
+ return [
330
+ {
331
+ "role": "user",
332
+ "content": [
333
+ {
334
+ "type": "text",
335
+ "text": prompt
336
+ },
337
+ {
338
+ "type": "image_url",
339
+ "image_url": {
340
+ "url": image_input
341
+ }
342
+ }
343
+ ]
344
+ }
345
+ ]
346
+
347
+ def _make_request(self, messages: List[Dict[str, Any]]) -> str:
348
+ """Make request to OpenAI API"""
349
+ response = self._client.chat.completions.create(
350
+ model=self.model,
351
+ messages=messages,
352
+ max_tokens=1000
353
+ )
354
+ return response.choices[0].message.content
355
+
356
+ # Factory for easy model switching
357
+ class PIIExtractorFactory:
358
+ """Factory to create different PII extractors"""
359
+
360
+ @staticmethod
361
+ def create_extractor(provider: str, **kwargs) -> BaseVisionExtractor:
362
+ """
363
+ Create a PII extractor for the specified provider
364
+
365
+ Args:
366
+ provider: 'mistral', 'openai', etc.
367
+ **kwargs: Additional arguments passed to the extractor
368
+ """
369
+ if provider.lower() == 'mistral':
370
+ return MistralPIIExtractor(**kwargs)
371
+ elif provider.lower() == 'openai':
372
+ return OpenAIPIIExtractor(**kwargs)
373
+ else:
374
+ raise ValueError(f"Unsupported provider: {provider}")
375
+
376
+
377
+ # Image Processing Facade
378
+
379
+ import json
380
+
381
+ class ImageProcessingService:
382
+ @staticmethod
383
+ def process_image(image):
384
+ extracotr = MistralPIIExtractor()
385
+ try:
386
+ data_str = extracotr.extract_pii(image)
387
+ print(f'DEBUG - Extracted PII: {data_str}')
388
+ data = json.loads(data_str)
389
+ piis = data['piis']
390
+ containing_text = data['containing_text']
391
+ return piis, containing_text
392
+ except Exception as e:
393
+ print({"error": f"Failed to extract PII: {e}"})
394
+ raise e
395
+
396
+ class MockImageProcessingService:
397
+ @staticmethod
398
+ def process_image(image):
399
+ # Mock processing that would typically use OCR or computer vision
400
+ return [
401
+ {
402
+ "name": "Trattoria Il Gabbiano",
403
+ "coordinates": {"x1": 50, "y1": 20, "x2": 280, "y2": 40},
404
+ "confidence": 0.99,
405
+ "severity": "low",
406
+ "type": "business_name"
407
+ },
408
+ {
409
+ "name": "Tarta sas di Fontana Stefania & c.",
410
+ "coordinates": {"x1": 90, "y1": 40, "x2": 320, "y2": 55},
411
+ "confidence": 0.98,
412
+ "severity": "medium",
413
+ "type": "business_name"
414
+ }
415
+ ], "the containing text mocked"
416
+
417
+
418
+
419
+
420
+ class ImageProcessingFacade:
421
+ def __init__(self):
422
+ self.image_handler = ImageHandler()
423
+
424
+ def process(self, image_path, strategy_name='blur', blur_amount=5, color=(0, 0, 0), output_path=None):
425
+ try:
426
+ image = self.image_handler.load_image(image_path)
427
+ except Exception as e:
428
+ return {"error": f"Failed to load image: {e}"}
429
+
430
+ # Select covering strategy
431
+ if strategy_name == 'blur':
432
+ strategy = BlurStrategy(blur_amount)
433
+ elif strategy_name == 'single_color':
434
+ strategy = SingleColorStrategy(color)
435
+ else:
436
+ return {"error": f"Unknown strategy: {strategy_name}"}
437
+
438
+ # Process image with mock service
439
+ try:
440
+ piis, containing_text = ImageProcessingService.process_image(image_path)
441
+ except Exception as e:
442
+ return {"error": f"Failed to process image: {e}"}
443
+
444
+ # Apply coordinate blurring and area covering
445
+ try:
446
+ blurrer = CoordinateBlurrer(strategy)
447
+ blurred_data = blurrer.blur_coordinates(piis, blur_amount)
448
+ processed_image = blurrer.cover_areas(image.copy(), blurred_data)
449
+
450
+ # Save processed image if output path provided
451
+ if output_path:
452
+ self.image_handler.save_image(processed_image, output_path)
453
+
454
+ return {
455
+ "data": blurred_data,
456
+ "processed_image": processed_image,
457
+ "success": True
458
+ }
459
+ except Exception as e:
460
+ return {"error": f"Failed to process coordinates: {e}"}
461
+
462
+
463
+ def process_image_api(image_path,
464
+ strategy_name='blur',
465
+ blur_amount=5,
466
+ color=(0, 0, 0),
467
+ output_path=None,
468
+ provider='mistral',
469
+ model=None,
470
+ regulation_map=None):
471
+ """
472
+ API function to process images with coordinate blurring and area covering.
473
+
474
+ Args:
475
+ image_path (str): Path to image (local, web URL, or base64)
476
+ strategy_name (str): Default covering strategy when regulation_map is not provided ('blur' or 'single_color')
477
+ blur_amount (int): Amount of blur for coordinates and blur strategy
478
+ color (tuple): RGB color for single_color strategy
479
+ output_path (str, optional): Path to save processed image
480
+ provider (str): PII extractor provider ('mistral' or 'openai')
481
+ model (str, optional): Model name for the PII extractor
482
+ regulation_map (dict, optional): Mapping of regulation names to strategy names or None
483
+
484
+ Returns:
485
+ dict: Processing results with data and success status
486
+ """
487
+ # Load image
488
+ try:
489
+ print(f"DEBUG - Loading image from: {image_path}")
490
+ image = ImageHandler.load_image(image_path)
491
+ except Exception as e:
492
+ return {"error": f"Failed to load image: {e}"}
493
+
494
+ # Create PII extractor
495
+ try:
496
+ extractor_kwargs = {}
497
+ if model is not None:
498
+ extractor_kwargs["model"] = model
499
+ extractor = PIIExtractorFactory.create_extractor(provider, **extractor_kwargs)
500
+ except Exception as e:
501
+ return {"error": f"Failed to create PII extractor: {e}"}
502
+
503
+ # Extract PII
504
+ try:
505
+ data_str = extractor.extract_pii(image_path)
506
+ data = json.loads(data_str)
507
+ piis = data.get("piis", [])
508
+ except Exception as e:
509
+ return {"error": f"Failed to extract PII: {e}"}
510
+
511
+ processed_data = []
512
+ processed_image = image.copy()
513
+
514
+ # Apply covering
515
+ try:
516
+ if regulation_map is not None:
517
+ for item in piis:
518
+ regs = item.get("probable_regulations", [])
519
+ strategy_for_item = None
520
+ for reg in regs:
521
+ if reg in regulation_map:
522
+ strategy_for_item = regulation_map[reg]
523
+ break
524
+ if strategy_for_item is None:
525
+ processed_data.append(item)
526
+ continue
527
+ if strategy_for_item == "blur":
528
+ strategy = BlurStrategy(blur_amount)
529
+ elif strategy_for_item == "single_color":
530
+ strategy = SingleColorStrategy(color)
531
+ else:
532
+ return {"error": f"Unknown strategy for regulation {reg}: {strategy_for_item}"}
533
+ blurrer = CoordinateBlurrer(strategy)
534
+ blurred_item = blurrer.blur_coordinates([item], blur_amount)[0]
535
+ processed_image = blurrer.cover_areas(processed_image, [blurred_item])
536
+ processed_data.append(blurred_item)
537
+ else:
538
+ if strategy_name == "blur":
539
+ strategy = BlurStrategy(blur_amount)
540
+ elif strategy_name == "single_color":
541
+ strategy = SingleColorStrategy(color)
542
+ else:
543
+ return {"error": f"Unknown strategy: {strategy_name}"}
544
+ blurrer = CoordinateBlurrer(strategy)
545
+ processed_data = blurrer.blur_coordinates(piis, blur_amount)
546
+ processed_image = blurrer.cover_areas(image.copy(), processed_data)
547
+ except Exception as e:
548
+ return {"error": f"Failed to apply covering: {e}"}
549
+
550
+ # Save processed image if provided
551
+ if output_path:
552
+ try:
553
+ ImageHandler.save_image(processed_image, output_path)
554
+ except Exception as e:
555
+ return {"error": f"Failed to save processed image: {e}"}
556
+
557
+ return {"data": processed_data, "processed_image": processed_image, "success": True}
558
+
559
+
560
+ from enum import Enum
561
+
562
+ class CoverStrategy(Enum):
563
+ BLUR = "blur"
564
+ SINGLE_COLOR = "single_color"
565
+
566
+ class MistralModels(Enum):
567
+ # https://docs.mistral.ai/getting-started/models/models_overview/
568
+ '''
569
+ mistral-large-latest: currently points to mistral-large-2411.
570
+ pixtral-large-latest: currently points to pixtral-large-2411.
571
+ mistral-medium-latest: currently points to mistral-medium-2505.
572
+ mistral-moderation-latest: currently points to mistral-moderation-2411.
573
+ ministral-3b-latest: currently points to ministral-3b-2410.
574
+ ministral-8b-latest: currently points to ministral-8b-2410.
575
+ open-mistral-nemo: currently points to open-mistral-nemo-2407.
576
+ mistral-small-latest: currently points to mistral-small-2503.
577
+ devstral-small-latest: currently points to devstral-small-2505
578
+ mistral-saba-latest: currently points to mistral-saba-2502.
579
+ codestral-latest: currently points to codestral-2501.
580
+ mistral-ocr-latest: currently points to mistral-ocr-2505.
581
+ '''
582
+ PIXTRAL_LARGE_LATEST = 'pixtral-large-latest'
583
+ MISTRAL_OCR_LATEST = 'mistral-ocr-latest'
584
+ # MISTRAL_SABA_2502 = 'mistral-saba-2502'
585
+ MISTRAL_MEDIUM_2505 = 'mistral-medium-2505'
586
+
587
+ if __name__ == "__main__":
588
+ myhome = os.environ.get('HOME')
589
+ image = os.path.join(myhome, "/Pictures/tmp/lo-scontrino-fiscale.jpg")
590
+ result = ImageProcessingService.process_image(image)
591
+ print(result)
592
+
593
+
594
+ # Process with blur strategy
595
+ result = process_image_api(
596
+ image_path=image,
597
+ strategy_name="blur",
598
+ blur_amount=3,
599
+ output_path="tmp/processed_image.jpg"
600
+ )
601
+ print("Result1")
602
+ print(result)
603
+
604
+ # Process with single color covering
605
+ result2 = process_image_api(
606
+ image_path="https://www.servizicontabiliefiscaliviterbo.it/wordpress/wp-content/uploads/2016/03/lo-scontrino-fiscale.jpg",
607
+ strategy_name="single_color",
608
+ color=(255, 0, 0), # Red
609
+ blur_amount=2
610
+ )
611
+
612
+ print("Result2")
613
+ print(result2)
pyproject.toml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "pii-detection-mcp-server"
3
+ version = "0.1.0"
4
+ description = "PII Detection and Masking Tool with Mistral AI"
5
+ readme = "README.md"
6
+ requires-python = ">=3.13"
7
+ dependencies = [
8
+ "gradio>=4.0.0",
9
+ "mistralai>=1.8.1",
10
+ "pillow>=11.2.1",
11
+ "python-dotenv>=1.1.0",
12
+ "requests>=2.31.0",
13
+ "numpy>=1.24.0",
14
+ "gradio-screenrecorder>=0.0.1",
15
+ ]
requirements.txt ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file was autogenerated by uv via the following command:
2
+ # uv pip compile pyproject.toml
3
+ aiofiles==24.1.0
4
+ # via gradio
5
+ annotated-types==0.7.0
6
+ # via pydantic
7
+ anyio==4.9.0
8
+ # via
9
+ # gradio
10
+ # httpx
11
+ # starlette
12
+ audioop-lts==0.2.1
13
+ # via gradio
14
+ certifi==2025.4.26
15
+ # via
16
+ # httpcore
17
+ # httpx
18
+ # requests
19
+ charset-normalizer==3.4.2
20
+ # via requests
21
+ click==8.2.1
22
+ # via
23
+ # typer
24
+ # uvicorn
25
+ eval-type-backport==0.2.2
26
+ # via mistralai
27
+ fastapi==0.115.12
28
+ # via gradio
29
+ ffmpy==0.6.0
30
+ # via gradio
31
+ filelock==3.18.0
32
+ # via huggingface-hub
33
+ fsspec==2025.5.1
34
+ # via
35
+ # gradio-client
36
+ # huggingface-hub
37
+ gradio==5.32.1
38
+ # via
39
+ # pii-detection-mcp-server (pyproject.toml)
40
+ # gradio-screenrecorder
41
+ gradio-client==1.10.2
42
+ # via gradio
43
+ gradio-screenrecorder==0.0.1
44
+ # via pii-detection-mcp-server (pyproject.toml)
45
+ groovy==0.1.2
46
+ # via gradio
47
+ h11==0.16.0
48
+ # via
49
+ # httpcore
50
+ # uvicorn
51
+ hf-xet==1.1.3
52
+ # via huggingface-hub
53
+ httpcore==1.0.9
54
+ # via httpx
55
+ httpx==0.28.1
56
+ # via
57
+ # gradio
58
+ # gradio-client
59
+ # mistralai
60
+ # safehttpx
61
+ huggingface-hub==0.32.4
62
+ # via
63
+ # gradio
64
+ # gradio-client
65
+ idna==3.10
66
+ # via
67
+ # anyio
68
+ # httpx
69
+ # requests
70
+ jinja2==3.1.6
71
+ # via gradio
72
+ markdown-it-py==3.0.0
73
+ # via rich
74
+ markupsafe==3.0.2
75
+ # via
76
+ # gradio
77
+ # jinja2
78
+ mdurl==0.1.2
79
+ # via markdown-it-py
80
+ mistralai==1.8.1
81
+ # via pii-detection-mcp-server (pyproject.toml)
82
+ numpy==2.2.6
83
+ # via
84
+ # pii-detection-mcp-server (pyproject.toml)
85
+ # gradio
86
+ # pandas
87
+ orjson==3.10.18
88
+ # via gradio
89
+ packaging==25.0
90
+ # via
91
+ # gradio
92
+ # gradio-client
93
+ # huggingface-hub
94
+ pandas==2.2.3
95
+ # via gradio
96
+ pillow==11.2.1
97
+ # via
98
+ # pii-detection-mcp-server (pyproject.toml)
99
+ # gradio
100
+ pydantic==2.11.5
101
+ # via
102
+ # fastapi
103
+ # gradio
104
+ # mistralai
105
+ pydantic-core==2.33.2
106
+ # via pydantic
107
+ pydub==0.25.1
108
+ # via gradio
109
+ pygments==2.19.1
110
+ # via rich
111
+ python-dateutil==2.9.0.post0
112
+ # via
113
+ # mistralai
114
+ # pandas
115
+ python-dotenv==1.1.0
116
+ # via pii-detection-mcp-server (pyproject.toml)
117
+ python-multipart==0.0.20
118
+ # via gradio
119
+ pytz==2025.2
120
+ # via pandas
121
+ pyyaml==6.0.2
122
+ # via
123
+ # gradio
124
+ # huggingface-hub
125
+ requests==2.32.3
126
+ # via
127
+ # pii-detection-mcp-server (pyproject.toml)
128
+ # huggingface-hub
129
+ rich==14.0.0
130
+ # via typer
131
+ ruff==0.11.12
132
+ # via gradio
133
+ safehttpx==0.1.6
134
+ # via gradio
135
+ semantic-version==2.10.0
136
+ # via gradio
137
+ shellingham==1.5.4
138
+ # via typer
139
+ six==1.17.0
140
+ # via python-dateutil
141
+ sniffio==1.3.1
142
+ # via anyio
143
+ starlette==0.46.2
144
+ # via
145
+ # fastapi
146
+ # gradio
147
+ tomlkit==0.13.2
148
+ # via gradio
149
+ tqdm==4.67.1
150
+ # via huggingface-hub
151
+ typer==0.16.0
152
+ # via gradio
153
+ typing-extensions==4.14.0
154
+ # via
155
+ # fastapi
156
+ # gradio
157
+ # gradio-client
158
+ # huggingface-hub
159
+ # pydantic
160
+ # pydantic-core
161
+ # typer
162
+ # typing-inspection
163
+ typing-inspection==0.4.1
164
+ # via
165
+ # mistralai
166
+ # pydantic
167
+ tzdata==2025.2
168
+ # via pandas
169
+ urllib3==2.4.0
170
+ # via requests
171
+ uvicorn==0.34.3
172
+ # via gradio
173
+ websockets==15.0.1
174
+ # via gradio-client
tests/test_pii_image_processing.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import unittest
2
+ import os
3
+ import json
4
+ from PIL import Image
5
+ from pii_image_processing import process_image_api, PIIExtractorFactory
6
+
7
+ class DummyExtractor:
8
+ def __init__(self, model=None):
9
+ pass
10
+
11
+ def extract_pii(self, image_input):
12
+ sample = {
13
+ "piis": [
14
+ {
15
+ "name": "TestPII",
16
+ "coordinates": {"x1": 10, "y1": 10, "x2": 50, "y2": 50},
17
+ "probable_regulations": ["GDPR"]
18
+ }
19
+ ],
20
+ "containing_text": "TestPII"
21
+ }
22
+ return json.dumps(sample)
23
+
24
+ class TestProcessImageApi(unittest.TestCase):
25
+ @classmethod
26
+ def setUpClass(cls):
27
+ # Monkey-patch factory to use dummy extractor
28
+ PIIExtractorFactory.create_extractor = staticmethod(lambda provider, **kwargs: DummyExtractor(**kwargs))
29
+ os.makedirs("tmp", exist_ok=True)
30
+ cls.test_image = "tmp/dummy_test.jpg"
31
+ Image.new("RGB", (100, 100), (128, 128, 128)).save(cls.test_image)
32
+
33
+ def test_blur_strategy(self):
34
+ out = "tmp/output_blur.jpg"
35
+ result = process_image_api(
36
+ self.test_image,
37
+ strategy_name="blur",
38
+ blur_amount=2,
39
+ output_path=out
40
+ )
41
+ self.assertTrue(result.get("success"))
42
+ self.assertTrue(os.path.exists(out))
43
+ self.assertEqual(len(result["data"]), 1)
44
+
45
+ def test_single_color_strategy(self):
46
+ out = "tmp/output_color.jpg"
47
+ result = process_image_api(
48
+ self.test_image,
49
+ strategy_name="single_color",
50
+ color=(255,0,0),
51
+ output_path=out
52
+ )
53
+ self.assertTrue(result.get("success"))
54
+ self.assertTrue(os.path.exists(out))
55
+ self.assertEqual(len(result["data"]), 1)
56
+
57
+ def test_regulation_map(self):
58
+ out = "tmp/output_reg.jpg"
59
+ reg_map = {"GDPR": "single_color"}
60
+ result = process_image_api(
61
+ self.test_image,
62
+ regulation_map=reg_map,
63
+ output_path=out
64
+ )
65
+ self.assertTrue(result.get("success"))
66
+ self.assertTrue(os.path.exists(out))
67
+ self.assertEqual(len(result["data"]), 1)
68
+
69
+ if __name__ == "__main__":
70
+ unittest.main()
uv.lock ADDED
The diff for this file is too large to render. See raw diff