|
|
|
|
|
""" |
|
|
OCR Layout Detection API Client |
|
|
================================ |
|
|
Simple script to interact with the OCR Layout Detection service. |
|
|
|
|
|
Usage: |
|
|
python api_client.py <path_to_file> |
|
|
|
|
|
Examples: |
|
|
python api_client.py invoice.pdf |
|
|
python api_client.py document.jpg |
|
|
python api_client.py signature.png --signature-only |
|
|
""" |
|
|
|
|
|
import os |
|
|
import sys |
|
|
import json |
|
|
import argparse |
|
|
from pathlib import Path |
|
|
from gradio_client import Client, handle_file |
|
|
|
|
|
|
|
|
|
|
|
SPACE_URL = "Ayaan-Sharif/ocr-layout-detection-poc" |
|
|
HF_TOKEN = os.environ.get("HF_TOKEN") |
|
|
|
|
|
|
|
|
def analyze_document(file_path, mode="Fast", enable_ocr=True, enable_tables=True, |
|
|
detect_signatures=False, signature_conf=0.05): |
|
|
""" |
|
|
Analyze a document with layout detection and optional OCR. |
|
|
|
|
|
Args: |
|
|
file_path: Path to PDF or image file |
|
|
mode: "Fast" or "Accurate" processing mode |
|
|
enable_ocr: Extract text with OCR |
|
|
enable_tables: Detect and extract tables |
|
|
detect_signatures: Also detect signatures (slower) |
|
|
signature_conf: Confidence threshold for signatures (0.01-0.5) |
|
|
|
|
|
Returns: |
|
|
dict: Contains visualization, summary, markdown, and JSON outputs |
|
|
""" |
|
|
print(f"π Analyzing document: {file_path}") |
|
|
print(f" Mode: {mode} | OCR: {enable_ocr} | Tables: {enable_tables} | Signatures: {detect_signatures}") |
|
|
|
|
|
try: |
|
|
client = Client(SPACE_URL, hf_token=HF_TOKEN) |
|
|
|
|
|
result = client.predict( |
|
|
file=handle_file(file_path), |
|
|
mode=mode, |
|
|
enable_ocr=enable_ocr, |
|
|
enable_tables=enable_tables, |
|
|
run_signature_yolo=detect_signatures, |
|
|
signature_conf=signature_conf, |
|
|
api_name="/gradio_interface" |
|
|
) |
|
|
|
|
|
|
|
|
visualization, summary, markdown, json_output = result |
|
|
|
|
|
print("β
Analysis complete!") |
|
|
return { |
|
|
"visualization": visualization, |
|
|
"summary": summary, |
|
|
"markdown": markdown, |
|
|
"json": json_output |
|
|
} |
|
|
|
|
|
except Exception as e: |
|
|
print(f"β Error: {e}") |
|
|
return None |
|
|
|
|
|
|
|
|
def detect_signatures_only(file_path, multiscale=True, conf=0.03, iou=0.45, augment=True): |
|
|
""" |
|
|
Detect signatures only (faster, no OCR or layout analysis). |
|
|
|
|
|
Args: |
|
|
file_path: Path to PDF or image file |
|
|
multiscale: Try multiple scales (1.0, 1.5, 2.0) for better detection |
|
|
conf: Confidence threshold (0.01-0.5, lower = more detections) |
|
|
iou: IoU threshold for NMS (0.1-0.9) |
|
|
augment: Use augmentation (slower but better recall) |
|
|
|
|
|
Returns: |
|
|
dict: Contains annotated image, summary, and JSON detections |
|
|
""" |
|
|
print(f"βοΈ Detecting signatures in: {file_path}") |
|
|
print(f" Multiscale: {multiscale} | Conf: {conf} | IoU: {iou} | Augment: {augment}") |
|
|
|
|
|
try: |
|
|
client = Client(SPACE_URL, hf_token=HF_TOKEN) |
|
|
|
|
|
result = client.predict( |
|
|
file=handle_file(file_path), |
|
|
try_scales=multiscale, |
|
|
conf=conf, |
|
|
iou=iou, |
|
|
augment=augment, |
|
|
api_name="/signature_only_infer" |
|
|
) |
|
|
|
|
|
|
|
|
annotated_image, summary, json_output = result |
|
|
|
|
|
print("β
Signature detection complete!") |
|
|
return { |
|
|
"annotated_image": annotated_image, |
|
|
"summary": summary, |
|
|
"json": json_output |
|
|
} |
|
|
|
|
|
except Exception as e: |
|
|
print(f"β Error: {e}") |
|
|
return None |
|
|
|
|
|
|
|
|
def save_results(results, output_dir="output"): |
|
|
"""Save API results to files.""" |
|
|
os.makedirs(output_dir, exist_ok=True) |
|
|
|
|
|
if results is None: |
|
|
return |
|
|
|
|
|
|
|
|
if "visualization" in results and results["visualization"]: |
|
|
viz_path = results["visualization"].get("path") |
|
|
if viz_path and os.path.exists(viz_path): |
|
|
import shutil |
|
|
output_path = os.path.join(output_dir, "visualization.png") |
|
|
shutil.copy(viz_path, output_path) |
|
|
print(f"πΎ Saved visualization: {output_path}") |
|
|
|
|
|
if "annotated_image" in results and results["annotated_image"]: |
|
|
img_path = results["annotated_image"].get("path") |
|
|
if img_path and os.path.exists(img_path): |
|
|
import shutil |
|
|
output_path = os.path.join(output_dir, "signatures_annotated.png") |
|
|
shutil.copy(img_path, output_path) |
|
|
print(f"πΎ Saved annotated image: {output_path}") |
|
|
|
|
|
|
|
|
if "markdown" in results and results["markdown"]: |
|
|
markdown_path = os.path.join(output_dir, "content.md") |
|
|
with open(markdown_path, "w", encoding="utf-8") as f: |
|
|
f.write(results["markdown"]) |
|
|
print(f"πΎ Saved markdown: {markdown_path}") |
|
|
|
|
|
|
|
|
if "json" in results and results["json"]: |
|
|
json_path = os.path.join(output_dir, "layout.json") |
|
|
with open(json_path, "w", encoding="utf-8") as f: |
|
|
f.write(results["json"]) |
|
|
print(f"πΎ Saved JSON: {json_path}") |
|
|
|
|
|
|
|
|
if "summary" in results and results["summary"]: |
|
|
summary_path = os.path.join(output_dir, "summary.txt") |
|
|
with open(summary_path, "w", encoding="utf-8") as f: |
|
|
f.write(results["summary"]) |
|
|
print(f"πΎ Saved summary: {summary_path}") |
|
|
|
|
|
|
|
|
def main(): |
|
|
parser = argparse.ArgumentParser( |
|
|
description="OCR Layout Detection API Client", |
|
|
formatter_class=argparse.RawDescriptionHelpFormatter, |
|
|
epilog=""" |
|
|
Examples: |
|
|
# Full document analysis with OCR |
|
|
python api_client.py invoice.pdf |
|
|
|
|
|
# Accurate mode with signature detection |
|
|
python api_client.py document.pdf --mode Accurate --detect-signatures |
|
|
|
|
|
# Signature detection only (faster) |
|
|
python api_client.py contract.jpg --signature-only |
|
|
|
|
|
# Custom output directory |
|
|
python api_client.py file.pdf --output results/ |
|
|
""" |
|
|
) |
|
|
|
|
|
parser.add_argument("file", help="Path to document (PDF, JPG, PNG)") |
|
|
parser.add_argument("--mode", choices=["Fast", "Accurate"], default="Fast", |
|
|
help="Processing mode (default: Fast)") |
|
|
parser.add_argument("--no-ocr", action="store_true", help="Disable OCR") |
|
|
parser.add_argument("--no-tables", action="store_true", help="Disable table detection") |
|
|
parser.add_argument("--detect-signatures", action="store_true", |
|
|
help="Also detect signatures in full analysis") |
|
|
parser.add_argument("--signature-conf", type=float, default=0.05, |
|
|
help="Signature confidence threshold (default: 0.05)") |
|
|
parser.add_argument("--signature-only", action="store_true", |
|
|
help="Only detect signatures (faster, no OCR)") |
|
|
parser.add_argument("--output", "-o", default="output", |
|
|
help="Output directory (default: output)") |
|
|
|
|
|
args = parser.parse_args() |
|
|
|
|
|
|
|
|
if not os.path.exists(args.file): |
|
|
print(f"β Error: File not found: {args.file}") |
|
|
sys.exit(1) |
|
|
|
|
|
|
|
|
ext = Path(args.file).suffix.lower() |
|
|
if ext not in [".pdf", ".jpg", ".jpeg", ".png", ".tiff", ".bmp"]: |
|
|
print(f"β οΈ Warning: Unsupported file type: {ext}") |
|
|
print(" Supported: .pdf, .jpg, .jpeg, .png, .tiff, .bmp") |
|
|
|
|
|
print(f"\nπ Starting API call to {SPACE_URL}\n") |
|
|
|
|
|
|
|
|
if args.signature_only: |
|
|
results = detect_signatures_only(args.file) |
|
|
else: |
|
|
results = analyze_document( |
|
|
args.file, |
|
|
mode=args.mode, |
|
|
enable_ocr=not args.no_ocr, |
|
|
enable_tables=not args.no_tables, |
|
|
detect_signatures=args.detect_signatures, |
|
|
signature_conf=args.signature_conf |
|
|
) |
|
|
|
|
|
|
|
|
if results: |
|
|
print(f"\nπ Saving results to: {args.output}/") |
|
|
save_results(results, args.output) |
|
|
print("\n⨠Done!") |
|
|
else: |
|
|
print("\nβ Failed to process document") |
|
|
sys.exit(1) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|