""" Example client for PDF Redaction API """ import requests from pathlib import Path import sys def redact_pdf(api_url: str, pdf_path: str, output_path: str = "redacted.pdf", dpi: int = 300, entity_types: str = None): """ Redact a PDF file using the API Args: api_url: Base URL of the API pdf_path: Path to the PDF file to redact output_path: Path to save the redacted PDF dpi: DPI for OCR processing entity_types: Comma-separated list of entity types to redact """ # Check if file exists if not Path(pdf_path).exists(): print(f"Error: File {pdf_path} not found") return False print(f"Uploading {pdf_path}...") # Prepare request files = {"file": open(pdf_path, "rb")} params = {"dpi": dpi} if entity_types: params["entity_types"] = entity_types try: # Upload and redact response = requests.post(f"{api_url}/redact", files=files, params=params) response.raise_for_status() result = response.json() print(f"\nStatus: {result['status']}") print(f"Message: {result['message']}") # Display found entities if result.get('entities'): print("\nEntities redacted:") for i, entity in enumerate(result['entities'], 1): print(f" {i}. {entity['entity_type']}: {entity['entity_text']} " f"(Page {entity['page']}, {entity['word_count']} words)") # Download redacted file job_id = result['job_id'] print(f"\nDownloading redacted PDF...") download_response = requests.get(f"{api_url}/download/{job_id}") download_response.raise_for_status() # Save file with open(output_path, "wb") as f: f.write(download_response.content) print(f"✓ Redacted PDF saved to: {output_path}") # Cleanup (optional) # requests.delete(f"{api_url}/cleanup/{job_id}") return True except requests.exceptions.RequestException as e: print(f"Error: {e}") return False finally: files["file"].close() def check_health(api_url: str): """Check API health""" try: response = requests.get(f"{api_url}/health") response.raise_for_status() data = response.json() print(f"API Status: {data['status']}") print(f"Version: {data['version']}") print(f"Model Loaded: {data['model_loaded']}") return True except requests.exceptions.RequestException as e: print(f"Error checking health: {e}") return False def get_stats(api_url: str): """Get API statistics""" try: response = requests.get(f"{api_url}/stats") response.raise_for_status() data = response.json() print("API Statistics:") print(f" Pending uploads: {data['pending_uploads']}") print(f" Processed files: {data['processed_files']}") print(f" Model loaded: {data['model_loaded']}") return True except requests.exceptions.RequestException as e: print(f"Error getting stats: {e}") return False if __name__ == "__main__": # Example usage # For local development API_URL = "http://localhost:7860" # For HuggingFace Spaces (replace with your space URL) # API_URL = "https://your-username-pdf-redaction-api.hf.space" if len(sys.argv) < 2: print("Usage:") print(" python client_example.py [output_file] [dpi]") print("\nOr check health:") print(" python client_example.py --health") print("\nOr get stats:") print(" python client_example.py --stats") sys.exit(1) if sys.argv[1] == "--health": check_health(API_URL) elif sys.argv[1] == "--stats": get_stats(API_URL) else: pdf_path = sys.argv[1] output_path = sys.argv[2] if len(sys.argv) > 2 else "redacted.pdf" dpi = int(sys.argv[3]) if len(sys.argv) > 3 else 300 # Optional: Filter specific entity types # entity_types = "PER,ORG" # Only redact persons and organizations entity_types = None # Redact all entity types redact_pdf(API_URL, pdf_path, output_path, dpi, entity_types)