#!/usr/bin/env python3 """ OCR Layout Detection API Client ================================ Simple script to interact with the OCR Layout Detection service. Usage: python api_client.py Examples: python api_client.py invoice.pdf python api_client.py document.jpg python api_client.py signature.png --signature-only """ import os import sys import json import argparse from pathlib import Path from gradio_client import Client, handle_file # API Configuration SPACE_URL = "Ayaan-Sharif/ocr-layout-detection-poc" HF_TOKEN = os.environ.get("HF_TOKEN") # Read from environment variable if available def analyze_document(file_path, mode="Fast", enable_ocr=True, enable_tables=True, detect_signatures=False, signature_conf=0.05): """ Analyze a document with layout detection and optional OCR. Args: file_path: Path to PDF or image file mode: "Fast" or "Accurate" processing mode enable_ocr: Extract text with OCR enable_tables: Detect and extract tables detect_signatures: Also detect signatures (slower) signature_conf: Confidence threshold for signatures (0.01-0.5) Returns: dict: Contains visualization, summary, markdown, and JSON outputs """ print(f"šŸ“„ Analyzing document: {file_path}") print(f" Mode: {mode} | OCR: {enable_ocr} | Tables: {enable_tables} | Signatures: {detect_signatures}") try: client = Client(SPACE_URL, hf_token=HF_TOKEN) result = client.predict( file=handle_file(file_path), mode=mode, enable_ocr=enable_ocr, enable_tables=enable_tables, run_signature_yolo=detect_signatures, signature_conf=signature_conf, api_name="/gradio_interface" ) # result is a tuple: (visualization_image, summary_text, markdown_text, json_text) visualization, summary, markdown, json_output = result print("āœ… Analysis complete!") return { "visualization": visualization, "summary": summary, "markdown": markdown, "json": json_output } except Exception as e: print(f"āŒ Error: {e}") return None def detect_signatures_only(file_path, multiscale=True, conf=0.03, iou=0.45, augment=True): """ Detect signatures only (faster, no OCR or layout analysis). Args: file_path: Path to PDF or image file multiscale: Try multiple scales (1.0, 1.5, 2.0) for better detection conf: Confidence threshold (0.01-0.5, lower = more detections) iou: IoU threshold for NMS (0.1-0.9) augment: Use augmentation (slower but better recall) Returns: dict: Contains annotated image, summary, and JSON detections """ print(f"āœļø Detecting signatures in: {file_path}") print(f" Multiscale: {multiscale} | Conf: {conf} | IoU: {iou} | Augment: {augment}") try: client = Client(SPACE_URL, hf_token=HF_TOKEN) result = client.predict( file=handle_file(file_path), try_scales=multiscale, conf=conf, iou=iou, augment=augment, api_name="/signature_only_infer" ) # result is a tuple: (annotated_image, summary_text, json_detections) annotated_image, summary, json_output = result print("āœ… Signature detection complete!") return { "annotated_image": annotated_image, "summary": summary, "json": json_output } except Exception as e: print(f"āŒ Error: {e}") return None def save_results(results, output_dir="output"): """Save API results to files.""" os.makedirs(output_dir, exist_ok=True) if results is None: return # Save visualization/annotated image if "visualization" in results and results["visualization"]: viz_path = results["visualization"].get("path") if viz_path and os.path.exists(viz_path): import shutil output_path = os.path.join(output_dir, "visualization.png") shutil.copy(viz_path, output_path) print(f"šŸ’¾ Saved visualization: {output_path}") if "annotated_image" in results and results["annotated_image"]: img_path = results["annotated_image"].get("path") if img_path and os.path.exists(img_path): import shutil output_path = os.path.join(output_dir, "signatures_annotated.png") shutil.copy(img_path, output_path) print(f"šŸ’¾ Saved annotated image: {output_path}") # Save markdown content if "markdown" in results and results["markdown"]: markdown_path = os.path.join(output_dir, "content.md") with open(markdown_path, "w", encoding="utf-8") as f: f.write(results["markdown"]) print(f"šŸ’¾ Saved markdown: {markdown_path}") # Save JSON output if "json" in results and results["json"]: json_path = os.path.join(output_dir, "layout.json") with open(json_path, "w", encoding="utf-8") as f: f.write(results["json"]) print(f"šŸ’¾ Saved JSON: {json_path}") # Save summary if "summary" in results and results["summary"]: summary_path = os.path.join(output_dir, "summary.txt") with open(summary_path, "w", encoding="utf-8") as f: f.write(results["summary"]) print(f"šŸ’¾ Saved summary: {summary_path}") def main(): parser = argparse.ArgumentParser( description="OCR Layout Detection API Client", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: # Full document analysis with OCR python api_client.py invoice.pdf # Accurate mode with signature detection python api_client.py document.pdf --mode Accurate --detect-signatures # Signature detection only (faster) python api_client.py contract.jpg --signature-only # Custom output directory python api_client.py file.pdf --output results/ """ ) parser.add_argument("file", help="Path to document (PDF, JPG, PNG)") parser.add_argument("--mode", choices=["Fast", "Accurate"], default="Fast", help="Processing mode (default: Fast)") parser.add_argument("--no-ocr", action="store_true", help="Disable OCR") parser.add_argument("--no-tables", action="store_true", help="Disable table detection") parser.add_argument("--detect-signatures", action="store_true", help="Also detect signatures in full analysis") parser.add_argument("--signature-conf", type=float, default=0.05, help="Signature confidence threshold (default: 0.05)") parser.add_argument("--signature-only", action="store_true", help="Only detect signatures (faster, no OCR)") parser.add_argument("--output", "-o", default="output", help="Output directory (default: output)") args = parser.parse_args() # Validate file exists if not os.path.exists(args.file): print(f"āŒ Error: File not found: {args.file}") sys.exit(1) # Check file type ext = Path(args.file).suffix.lower() if ext not in [".pdf", ".jpg", ".jpeg", ".png", ".tiff", ".bmp"]: print(f"āš ļø Warning: Unsupported file type: {ext}") print(" Supported: .pdf, .jpg, .jpeg, .png, .tiff, .bmp") print(f"\nšŸš€ Starting API call to {SPACE_URL}\n") # Call appropriate API endpoint if args.signature_only: results = detect_signatures_only(args.file) else: results = analyze_document( args.file, mode=args.mode, enable_ocr=not args.no_ocr, enable_tables=not args.no_tables, detect_signatures=args.detect_signatures, signature_conf=args.signature_conf ) # Save results if results: print(f"\nšŸ“ Saving results to: {args.output}/") save_results(results, args.output) print("\n✨ Done!") else: print("\nāŒ Failed to process document") sys.exit(1) if __name__ == "__main__": main()