Spaces:
Runtime error
Runtime error
Upload app.py
Browse files
app.py
CHANGED
|
@@ -88,43 +88,45 @@ def read_document(file):
|
|
| 88 |
else:
|
| 89 |
return process_txt(file)
|
| 90 |
|
| 91 |
-
def save_redacted_file(original_file, redacted_text):
|
| 92 |
-
"""Saves redacted text back in the original format and returns a valid file path."""
|
| 93 |
-
|
| 94 |
ext = original_file.name.split(".")[-1].lower()
|
| 95 |
-
|
| 96 |
-
# Create a temporary file with a proper path
|
| 97 |
-
temp_dir = tempfile.gettempdir() # OS-specific temp folder
|
| 98 |
safe_filename = f"redacted_{os.path.basename(original_file.name)}"
|
| 99 |
redacted_file_path = os.path.join(temp_dir, safe_filename)
|
| 100 |
-
|
| 101 |
-
if ext == "
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
doc = Document()
|
| 107 |
-
for line in redacted_text.split("\n"):
|
| 108 |
-
doc.add_paragraph(line)
|
| 109 |
-
doc.save(redacted_file_path) # Save DOCX in temp folder
|
| 110 |
elif ext == "pptx":
|
| 111 |
-
ppt = Presentation()
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
ppt.save(redacted_file_path)
|
| 116 |
-
else:
|
| 117 |
with open(redacted_file_path, "w", encoding="utf-8") as f:
|
| 118 |
f.write(redacted_text)
|
| 119 |
-
|
| 120 |
-
return redacted_file_path
|
| 121 |
|
| 122 |
|
| 123 |
def process_file(file, selected_entities, redaction_method):
|
|
|
|
| 124 |
text = read_document(file)
|
| 125 |
redacted_text = redact_text(text, selected_entities, redaction_method)
|
| 126 |
-
redacted_file_path = save_redacted_file(file, redacted_text)
|
| 127 |
-
|
|
|
|
|
|
|
| 128 |
|
| 129 |
def select_all_entities():
|
| 130 |
return PII_ENTITIES
|
|
|
|
| 88 |
else:
|
| 89 |
return process_txt(file)
|
| 90 |
|
| 91 |
+
def save_redacted_file(original_file, redacted_text, selected_entities, redaction_method):
|
|
|
|
|
|
|
| 92 |
ext = original_file.name.split(".")[-1].lower()
|
| 93 |
+
temp_dir = tempfile.gettempdir()
|
|
|
|
|
|
|
| 94 |
safe_filename = f"redacted_{os.path.basename(original_file.name)}"
|
| 95 |
redacted_file_path = os.path.join(temp_dir, safe_filename)
|
| 96 |
+
|
| 97 |
+
if ext == "docx":
|
| 98 |
+
doc = Document(original_file.name)
|
| 99 |
+
for para in doc.paragraphs:
|
| 100 |
+
para.text = redact_text(para.text, selected_entities, redaction_method) # Use redaction_method passed from UI
|
| 101 |
+
doc.save(redacted_file_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
elif ext == "pptx":
|
| 103 |
+
ppt = Presentation(original_file.name)
|
| 104 |
+
|
| 105 |
+
# Loop through each slide in the original PPTX and add redacted text
|
| 106 |
+
for slide_num, slide in enumerate(ppt.slides):
|
| 107 |
+
# Loop through all shapes on the slide
|
| 108 |
+
for shape in slide.shapes:
|
| 109 |
+
if hasattr(shape, "text"):
|
| 110 |
+
# Redact the text inside the shape
|
| 111 |
+
redacted_text_in_shape = redact_text(shape.text, selected_entities, redaction_method)
|
| 112 |
+
shape.text = redacted_text_in_shape # Apply the redacted text back to the shape
|
| 113 |
+
|
| 114 |
ppt.save(redacted_file_path)
|
| 115 |
+
else:
|
| 116 |
with open(redacted_file_path, "w", encoding="utf-8") as f:
|
| 117 |
f.write(redacted_text)
|
| 118 |
+
|
| 119 |
+
return redacted_file_path
|
| 120 |
|
| 121 |
|
| 122 |
def process_file(file, selected_entities, redaction_method):
|
| 123 |
+
"""Handles file upload, redacts selected PII, and returns redacted file."""
|
| 124 |
text = read_document(file)
|
| 125 |
redacted_text = redact_text(text, selected_entities, redaction_method)
|
| 126 |
+
redacted_file_path = save_redacted_file(file, redacted_text, selected_entities, redaction_method)
|
| 127 |
+
|
| 128 |
+
return redacted_text, redacted_file_path # Returning only a valid file path
|
| 129 |
+
|
| 130 |
|
| 131 |
def select_all_entities():
|
| 132 |
return PII_ENTITIES
|