| |
| """Simple Gradio demo for the PDF attacker tools |
| |
| Allows entering text, choosing attack type, and downloading the generated PDF. |
| """ |
| import os |
| import time |
| from typing import Tuple |
|
|
| import PyPDF2 |
| import gradio as gr |
|
|
| from pdf_attacker import PDFAttacker |
|
|
|
|
| attacker = PDFAttacker() |
|
|
|
|
| def _ensure_tmp_dir() -> str: |
| """Ensure tmp dir exists and return its path""" |
| path = os.path.join(os.getcwd(), "tmp") |
| os.makedirs(path, exist_ok=True) |
| return path |
|
|
|
|
| def _extract_text_from_pdf(pdf_path: str) -> str: |
| """Extract text from a PDF file for preview""" |
| try: |
| with open(pdf_path, 'rb') as f: |
| reader = PyPDF2.PdfReader(f) |
| text = "" |
| for page in reader.pages: |
| page_text = page.extract_text() |
| if page_text: |
| text += page_text |
| return text.strip() |
| except Exception as e: |
| return f"Error extracting text: {e}" |
|
|
|
|
| def generate_pdf( |
| text: str, |
| mode: str, |
| attack_factor: float = 0.7, |
| target_text: str = "", |
| ) -> Tuple[str, str]: |
| """Generate selected PDF and return (pdf_path, extracted_text) |
| |
| Inputs: text, mode: 'normal'|'attacked'|'targeted', attack_factor, target_text |
| Outputs: path to generated PDF, extracted text preview |
| """ |
| tmp_dir = _ensure_tmp_dir() |
| timestamp = int(time.time() * 1000) |
| filename = f"{mode}_{timestamp}.pdf" |
| output_path = os.path.join(tmp_dir, filename) |
|
|
| |
| clean_text = " ".join(text.split()) |
|
|
| try: |
| if mode == 'normal': |
| attacker.create_normal_pdf(text=clean_text, output_path=output_path) |
| elif mode == 'attacked': |
| attacker.create_attacked_pdf(text=clean_text, output_path=output_path, attack_factor=attack_factor) |
| elif mode == 'targeted': |
| |
| attacker.create_targeted_pdf(text=clean_text, target_text=target_text, output_path=output_path) |
| else: |
| return "", f"Unknown mode: {mode}" |
|
|
| except Exception as e: |
| |
| return "", f"Error generating PDF: {e}" |
|
|
| |
| extracted = _extract_text_from_pdf(output_path) |
|
|
| return output_path, extracted |
|
|
|
|
| def build_demo(): |
| """Construct and return the Gradio Blocks demo""" |
| with gr.Blocks() as demo: |
| gr.Markdown("# PDF Humanizer: Attack demo\nGenerate PDFs that look normal but extract differently when copied") |
|
|
| with gr.Row(): |
| txt = gr.Textbox(lines=8, label="Input text", value="Enter or paste text here...") |
| with gr.Column(): |
| mode = gr.Radio(choices=['normal', 'attacked', 'targeted'], value='attacked', label='Mode') |
| attack_factor = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, value=0.7, label='Attack factor (attacked mode)') |
| target_text = gr.Textbox(lines=2, label='Target text (targeted mode)') |
| generate = gr.Button('Generate PDF') |
|
|
| download_file = gr.File(label='Download generated PDF') |
| extracted_preview = gr.Textbox(lines=8, label='Extracted text preview') |
|
|
| def _on_generate(text, mode, attack_factor, target_text): |
| path, extracted = generate_pdf(text=text, mode=mode, attack_factor=attack_factor, target_text=target_text) |
| if not path: |
| |
| return None, extracted |
| return path, extracted |
|
|
| generate.click(fn=_on_generate, inputs=[txt, mode, attack_factor, target_text], outputs=[download_file, extracted_preview]) |
|
|
| return demo |
|
|
|
|
| if __name__ == '__main__': |
| app = build_demo() |
| app.launch(server_name='0.0.0.0', server_port=7860) |
|
|