Ayaan Sharif commited on
Commit
255e6fd
·
1 Parent(s): 4182f7c

Add file validation and better error handling

Browse files

- Validate file existence before processing
- Check file extensions are in allowed list
- Handle both gr.File objects and direct paths
- Better error messages for debugging
- Add HF_TOKEN support to API client

Files changed (2) hide show
  1. api_client.py +240 -0
  2. app.py +38 -2
api_client.py ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ OCR Layout Detection API Client
4
+ ================================
5
+ Simple script to interact with the OCR Layout Detection service.
6
+
7
+ Usage:
8
+ python api_client.py <path_to_file>
9
+
10
+ Examples:
11
+ python api_client.py invoice.pdf
12
+ python api_client.py document.jpg
13
+ python api_client.py signature.png --signature-only
14
+ """
15
+
16
+ import os
17
+ import sys
18
+ import json
19
+ import argparse
20
+ from pathlib import Path
21
+ from gradio_client import Client, handle_file
22
+
23
+
24
+ # API Configuration
25
+ SPACE_URL = "Ayaan-Sharif/ocr-layout-detection-poc"
26
+ HF_TOKEN = os.environ.get("HF_TOKEN") # Read from environment variable if available
27
+
28
+
29
+ def analyze_document(file_path, mode="Fast", enable_ocr=True, enable_tables=True,
30
+ detect_signatures=False, signature_conf=0.05):
31
+ """
32
+ Analyze a document with layout detection and optional OCR.
33
+
34
+ Args:
35
+ file_path: Path to PDF or image file
36
+ mode: "Fast" or "Accurate" processing mode
37
+ enable_ocr: Extract text with OCR
38
+ enable_tables: Detect and extract tables
39
+ detect_signatures: Also detect signatures (slower)
40
+ signature_conf: Confidence threshold for signatures (0.01-0.5)
41
+
42
+ Returns:
43
+ dict: Contains visualization, summary, markdown, and JSON outputs
44
+ """
45
+ print(f"📄 Analyzing document: {file_path}")
46
+ print(f" Mode: {mode} | OCR: {enable_ocr} | Tables: {enable_tables} | Signatures: {detect_signatures}")
47
+
48
+ try:
49
+ client = Client(SPACE_URL, hf_token=HF_TOKEN)
50
+
51
+ result = client.predict(
52
+ file=handle_file(file_path),
53
+ mode=mode,
54
+ enable_ocr=enable_ocr,
55
+ enable_tables=enable_tables,
56
+ run_signature_yolo=detect_signatures,
57
+ signature_conf=signature_conf,
58
+ api_name="/gradio_interface"
59
+ )
60
+
61
+ # result is a tuple: (visualization_image, summary_text, markdown_text, json_text)
62
+ visualization, summary, markdown, json_output = result
63
+
64
+ print("✅ Analysis complete!")
65
+ return {
66
+ "visualization": visualization,
67
+ "summary": summary,
68
+ "markdown": markdown,
69
+ "json": json_output
70
+ }
71
+
72
+ except Exception as e:
73
+ print(f"❌ Error: {e}")
74
+ return None
75
+
76
+
77
+ def detect_signatures_only(file_path, multiscale=True, conf=0.03, iou=0.45, augment=True):
78
+ """
79
+ Detect signatures only (faster, no OCR or layout analysis).
80
+
81
+ Args:
82
+ file_path: Path to PDF or image file
83
+ multiscale: Try multiple scales (1.0, 1.5, 2.0) for better detection
84
+ conf: Confidence threshold (0.01-0.5, lower = more detections)
85
+ iou: IoU threshold for NMS (0.1-0.9)
86
+ augment: Use augmentation (slower but better recall)
87
+
88
+ Returns:
89
+ dict: Contains annotated image, summary, and JSON detections
90
+ """
91
+ print(f"✍️ Detecting signatures in: {file_path}")
92
+ print(f" Multiscale: {multiscale} | Conf: {conf} | IoU: {iou} | Augment: {augment}")
93
+
94
+ try:
95
+ client = Client(SPACE_URL, hf_token=HF_TOKEN)
96
+
97
+ result = client.predict(
98
+ file=handle_file(file_path),
99
+ try_scales=multiscale,
100
+ conf=conf,
101
+ iou=iou,
102
+ augment=augment,
103
+ api_name="/signature_only_infer"
104
+ )
105
+
106
+ # result is a tuple: (annotated_image, summary_text, json_detections)
107
+ annotated_image, summary, json_output = result
108
+
109
+ print("✅ Signature detection complete!")
110
+ return {
111
+ "annotated_image": annotated_image,
112
+ "summary": summary,
113
+ "json": json_output
114
+ }
115
+
116
+ except Exception as e:
117
+ print(f"❌ Error: {e}")
118
+ return None
119
+
120
+
121
+ def save_results(results, output_dir="output"):
122
+ """Save API results to files."""
123
+ os.makedirs(output_dir, exist_ok=True)
124
+
125
+ if results is None:
126
+ return
127
+
128
+ # Save visualization/annotated image
129
+ if "visualization" in results and results["visualization"]:
130
+ viz_path = results["visualization"].get("path")
131
+ if viz_path and os.path.exists(viz_path):
132
+ import shutil
133
+ output_path = os.path.join(output_dir, "visualization.png")
134
+ shutil.copy(viz_path, output_path)
135
+ print(f"💾 Saved visualization: {output_path}")
136
+
137
+ if "annotated_image" in results and results["annotated_image"]:
138
+ img_path = results["annotated_image"].get("path")
139
+ if img_path and os.path.exists(img_path):
140
+ import shutil
141
+ output_path = os.path.join(output_dir, "signatures_annotated.png")
142
+ shutil.copy(img_path, output_path)
143
+ print(f"💾 Saved annotated image: {output_path}")
144
+
145
+ # Save markdown content
146
+ if "markdown" in results and results["markdown"]:
147
+ markdown_path = os.path.join(output_dir, "content.md")
148
+ with open(markdown_path, "w", encoding="utf-8") as f:
149
+ f.write(results["markdown"])
150
+ print(f"💾 Saved markdown: {markdown_path}")
151
+
152
+ # Save JSON output
153
+ if "json" in results and results["json"]:
154
+ json_path = os.path.join(output_dir, "layout.json")
155
+ with open(json_path, "w", encoding="utf-8") as f:
156
+ f.write(results["json"])
157
+ print(f"💾 Saved JSON: {json_path}")
158
+
159
+ # Save summary
160
+ if "summary" in results and results["summary"]:
161
+ summary_path = os.path.join(output_dir, "summary.txt")
162
+ with open(summary_path, "w", encoding="utf-8") as f:
163
+ f.write(results["summary"])
164
+ print(f"💾 Saved summary: {summary_path}")
165
+
166
+
167
+ def main():
168
+ parser = argparse.ArgumentParser(
169
+ description="OCR Layout Detection API Client",
170
+ formatter_class=argparse.RawDescriptionHelpFormatter,
171
+ epilog="""
172
+ Examples:
173
+ # Full document analysis with OCR
174
+ python api_client.py invoice.pdf
175
+
176
+ # Accurate mode with signature detection
177
+ python api_client.py document.pdf --mode Accurate --detect-signatures
178
+
179
+ # Signature detection only (faster)
180
+ python api_client.py contract.jpg --signature-only
181
+
182
+ # Custom output directory
183
+ python api_client.py file.pdf --output results/
184
+ """
185
+ )
186
+
187
+ parser.add_argument("file", help="Path to document (PDF, JPG, PNG)")
188
+ parser.add_argument("--mode", choices=["Fast", "Accurate"], default="Fast",
189
+ help="Processing mode (default: Fast)")
190
+ parser.add_argument("--no-ocr", action="store_true", help="Disable OCR")
191
+ parser.add_argument("--no-tables", action="store_true", help="Disable table detection")
192
+ parser.add_argument("--detect-signatures", action="store_true",
193
+ help="Also detect signatures in full analysis")
194
+ parser.add_argument("--signature-conf", type=float, default=0.05,
195
+ help="Signature confidence threshold (default: 0.05)")
196
+ parser.add_argument("--signature-only", action="store_true",
197
+ help="Only detect signatures (faster, no OCR)")
198
+ parser.add_argument("--output", "-o", default="output",
199
+ help="Output directory (default: output)")
200
+
201
+ args = parser.parse_args()
202
+
203
+ # Validate file exists
204
+ if not os.path.exists(args.file):
205
+ print(f"❌ Error: File not found: {args.file}")
206
+ sys.exit(1)
207
+
208
+ # Check file type
209
+ ext = Path(args.file).suffix.lower()
210
+ if ext not in [".pdf", ".jpg", ".jpeg", ".png", ".tiff", ".bmp"]:
211
+ print(f"⚠️ Warning: Unsupported file type: {ext}")
212
+ print(" Supported: .pdf, .jpg, .jpeg, .png, .tiff, .bmp")
213
+
214
+ print(f"\n🚀 Starting API call to {SPACE_URL}\n")
215
+
216
+ # Call appropriate API endpoint
217
+ if args.signature_only:
218
+ results = detect_signatures_only(args.file)
219
+ else:
220
+ results = analyze_document(
221
+ args.file,
222
+ mode=args.mode,
223
+ enable_ocr=not args.no_ocr,
224
+ enable_tables=not args.no_tables,
225
+ detect_signatures=args.detect_signatures,
226
+ signature_conf=args.signature_conf
227
+ )
228
+
229
+ # Save results
230
+ if results:
231
+ print(f"\n📁 Saving results to: {args.output}/")
232
+ save_results(results, args.output)
233
+ print("\n✨ Done!")
234
+ else:
235
+ print("\n❌ Failed to process document")
236
+ sys.exit(1)
237
+
238
+
239
+ if __name__ == "__main__":
240
+ main()
app.py CHANGED
@@ -505,7 +505,26 @@ def gradio_interface(file, mode, enable_ocr, enable_tables, run_signature_yolo=F
505
  if file is None:
506
  return None, "Please upload a document", "", ""
507
 
508
- return process_document(file.name, mode, enable_ocr, enable_tables, run_signature_yolo, signature_conf)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
509
 
510
 
511
  # -------- Small preview helper (first page / image) --------
@@ -638,8 +657,25 @@ def signature_only_infer(
638
  if file is None:
639
  return None, "Upload an image or PDF", "[]"
640
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
641
  # Load source image (first page for PDFs)
642
- path = file.name
643
  ext = (os.path.splitext(path)[1] or "").lower()
644
  if ext in (".pdf",):
645
  doc = fitz.open(path)
 
505
  if file is None:
506
  return None, "Please upload a document", "", ""
507
 
508
+ # Get file path - handle both direct path and gr.File object
509
+ try:
510
+ if hasattr(file, 'name'):
511
+ file_path = file.name
512
+ else:
513
+ file_path = str(file)
514
+
515
+ # Validate file exists and has valid extension
516
+ if not os.path.exists(file_path):
517
+ return None, f"File not found: {file_path}", "", ""
518
+
519
+ ext = os.path.splitext(file_path)[1].lower()
520
+ valid_exts = [".pdf", ".jpg", ".jpeg", ".png", ".tiff", ".bmp"]
521
+ if ext not in valid_exts:
522
+ return None, f"Invalid file format: {ext}. Supported: {', '.join(valid_exts)}", "", ""
523
+
524
+ return process_document(file_path, mode, enable_ocr, enable_tables, run_signature_yolo, signature_conf)
525
+ except Exception as e:
526
+ error_msg = f"Error in gradio_interface: {str(e)}"
527
+ return None, error_msg, error_msg, error_msg
528
 
529
 
530
  # -------- Small preview helper (first page / image) --------
 
657
  if file is None:
658
  return None, "Upload an image or PDF", "[]"
659
 
660
+ try:
661
+ # Get file path - handle both direct path and gr.File object
662
+ if hasattr(file, 'name'):
663
+ path = file.name
664
+ else:
665
+ path = str(file)
666
+
667
+ # Validate file exists
668
+ if not os.path.exists(path):
669
+ return None, f"File not found: {path}", "[]"
670
+
671
+ ext = (os.path.splitext(path)[1] or "").lower()
672
+ valid_exts = [".pdf", ".jpg", ".jpeg", ".png", ".tiff", ".bmp"]
673
+ if ext not in valid_exts:
674
+ return None, f"Invalid file format: {ext}. Supported: {', '.join(valid_exts)}", "[]"
675
+ except Exception as e:
676
+ return None, f"Error validating file: {str(e)}", "[]"
677
+
678
  # Load source image (first page for PDFs)
 
679
  ext = (os.path.splitext(path)[1] or "").lower()
680
  if ext in (".pdf",):
681
  doc = fitz.open(path)