PDF-Redaction-API / client_example.py
Sammi1211's picture
adding url support
af107f1
"""
Example client for PDF Redaction API
"""
import requests
from pathlib import Path
import sys
def redact_pdf(api_url: str, pdf_path: str, output_path: str = "redacted.pdf",
dpi: int = 300, entity_types: str = None):
"""
Redact a PDF file using the API
Args:
api_url: Base URL of the API
pdf_path: Path to the PDF file to redact
output_path: Path to save the redacted PDF
dpi: DPI for OCR processing
entity_types: Comma-separated list of entity types to redact
"""
# Check if file exists
if not Path(pdf_path).exists():
print(f"Error: File {pdf_path} not found")
return False
print(f"Uploading {pdf_path}...")
# Prepare request
files = {"file": open(pdf_path, "rb")}
params = {"dpi": dpi}
if entity_types:
params["entity_types"] = entity_types
try:
# Upload and redact
response = requests.post(f"{api_url}/redact", files=files, params=params)
response.raise_for_status()
result = response.json()
print(f"\nStatus: {result['status']}")
print(f"Message: {result['message']}")
# Display found entities
if result.get('entities'):
print("\nEntities redacted:")
for i, entity in enumerate(result['entities'], 1):
print(f" {i}. {entity['entity_type']}: {entity['entity_text']} "
f"(Page {entity['page']}, {entity['word_count']} words)")
# Download redacted file
job_id = result['job_id']
print(f"\nDownloading redacted PDF...")
download_response = requests.get(f"{api_url}/download/{job_id}")
download_response.raise_for_status()
# Save file
with open(output_path, "wb") as f:
f.write(download_response.content)
print(f"✓ Redacted PDF saved to: {output_path}")
# Cleanup (optional)
# requests.delete(f"{api_url}/cleanup/{job_id}")
return True
except requests.exceptions.RequestException as e:
print(f"Error: {e}")
return False
finally:
files["file"].close()
def check_health(api_url: str):
"""Check API health"""
try:
response = requests.get(f"{api_url}/health")
response.raise_for_status()
data = response.json()
print(f"API Status: {data['status']}")
print(f"Version: {data['version']}")
print(f"Model Loaded: {data['model_loaded']}")
return True
except requests.exceptions.RequestException as e:
print(f"Error checking health: {e}")
return False
def get_stats(api_url: str):
"""Get API statistics"""
try:
response = requests.get(f"{api_url}/stats")
response.raise_for_status()
data = response.json()
print("API Statistics:")
print(f" Pending uploads: {data['pending_uploads']}")
print(f" Processed files: {data['processed_files']}")
print(f" Model loaded: {data['model_loaded']}")
return True
except requests.exceptions.RequestException as e:
print(f"Error getting stats: {e}")
return False
if __name__ == "__main__":
# Example usage
# For local development
API_URL = "http://localhost:7860"
# For HuggingFace Spaces (replace with your space URL)
# API_URL = "https://your-username-pdf-redaction-api.hf.space"
if len(sys.argv) < 2:
print("Usage:")
print(" python client_example.py <pdf_file> [output_file] [dpi]")
print("\nOr check health:")
print(" python client_example.py --health")
print("\nOr get stats:")
print(" python client_example.py --stats")
sys.exit(1)
if sys.argv[1] == "--health":
check_health(API_URL)
elif sys.argv[1] == "--stats":
get_stats(API_URL)
else:
pdf_path = sys.argv[1]
output_path = sys.argv[2] if len(sys.argv) > 2 else "redacted.pdf"
dpi = int(sys.argv[3]) if len(sys.argv) > 3 else 300
# Optional: Filter specific entity types
# entity_types = "PER,ORG" # Only redact persons and organizations
entity_types = None # Redact all entity types
redact_pdf(API_URL, pdf_path, output_path, dpi, entity_types)