| """ |
| Example client for PDF Redaction API |
| """ |
| import requests |
| from pathlib import Path |
| import sys |
|
|
|
|
| def redact_pdf(api_url: str, pdf_path: str, output_path: str = "redacted.pdf", |
| dpi: int = 300, entity_types: str = None): |
| """ |
| Redact a PDF file using the API |
| |
| Args: |
| api_url: Base URL of the API |
| pdf_path: Path to the PDF file to redact |
| output_path: Path to save the redacted PDF |
| dpi: DPI for OCR processing |
| entity_types: Comma-separated list of entity types to redact |
| """ |
| |
| if not Path(pdf_path).exists(): |
| print(f"Error: File {pdf_path} not found") |
| return False |
| |
| print(f"Uploading {pdf_path}...") |
| |
| |
| files = {"file": open(pdf_path, "rb")} |
| params = {"dpi": dpi} |
| |
| if entity_types: |
| params["entity_types"] = entity_types |
| |
| try: |
| |
| response = requests.post(f"{api_url}/redact", files=files, params=params) |
| response.raise_for_status() |
| |
| result = response.json() |
| print(f"\nStatus: {result['status']}") |
| print(f"Message: {result['message']}") |
| |
| |
| if result.get('entities'): |
| print("\nEntities redacted:") |
| for i, entity in enumerate(result['entities'], 1): |
| print(f" {i}. {entity['entity_type']}: {entity['entity_text']} " |
| f"(Page {entity['page']}, {entity['word_count']} words)") |
| |
| |
| job_id = result['job_id'] |
| print(f"\nDownloading redacted PDF...") |
| |
| download_response = requests.get(f"{api_url}/download/{job_id}") |
| download_response.raise_for_status() |
| |
| |
| with open(output_path, "wb") as f: |
| f.write(download_response.content) |
| |
| print(f"✓ Redacted PDF saved to: {output_path}") |
| |
| |
| |
| |
| return True |
| |
| except requests.exceptions.RequestException as e: |
| print(f"Error: {e}") |
| return False |
| finally: |
| files["file"].close() |
|
|
|
|
| def check_health(api_url: str): |
| """Check API health""" |
| try: |
| response = requests.get(f"{api_url}/health") |
| response.raise_for_status() |
| data = response.json() |
| |
| print(f"API Status: {data['status']}") |
| print(f"Version: {data['version']}") |
| print(f"Model Loaded: {data['model_loaded']}") |
| |
| return True |
| except requests.exceptions.RequestException as e: |
| print(f"Error checking health: {e}") |
| return False |
|
|
|
|
| def get_stats(api_url: str): |
| """Get API statistics""" |
| try: |
| response = requests.get(f"{api_url}/stats") |
| response.raise_for_status() |
| data = response.json() |
| |
| print("API Statistics:") |
| print(f" Pending uploads: {data['pending_uploads']}") |
| print(f" Processed files: {data['processed_files']}") |
| print(f" Model loaded: {data['model_loaded']}") |
| |
| return True |
| except requests.exceptions.RequestException as e: |
| print(f"Error getting stats: {e}") |
| return False |
|
|
|
|
| if __name__ == "__main__": |
| |
| |
| |
| API_URL = "http://localhost:7860" |
| |
| |
| |
| |
| if len(sys.argv) < 2: |
| print("Usage:") |
| print(" python client_example.py <pdf_file> [output_file] [dpi]") |
| print("\nOr check health:") |
| print(" python client_example.py --health") |
| print("\nOr get stats:") |
| print(" python client_example.py --stats") |
| sys.exit(1) |
| |
| if sys.argv[1] == "--health": |
| check_health(API_URL) |
| elif sys.argv[1] == "--stats": |
| get_stats(API_URL) |
| else: |
| pdf_path = sys.argv[1] |
| output_path = sys.argv[2] if len(sys.argv) > 2 else "redacted.pdf" |
| dpi = int(sys.argv[3]) if len(sys.argv) > 3 else 300 |
| |
| |
| |
| entity_types = None |
| |
| redact_pdf(API_URL, pdf_path, output_path, dpi, entity_types) |
|
|