MHamdan Claude Opus 4.5 commited on
Commit
c1a790c
·
1 Parent(s): 5d516d9

Fix: Use backend API for document processing on Streamlit Cloud

Browse files
demo/pages/1_🔬_Live_Processing.py CHANGED
@@ -105,18 +105,51 @@ def process_document_actual(file_bytes: bytes, filename: str, options: dict) ->
105
  """
106
  Process document using the actual document processing pipeline.
107
  Returns processing results with all extracted data.
 
 
 
 
 
108
  """
109
  import tempfile
110
  import os
111
 
112
- # Create temp file
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  suffix = Path(filename).suffix
114
  with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
115
  tmp.write(file_bytes)
116
  tmp_path = tmp.name
117
 
118
  try:
119
- # Try to use actual document processor
120
  try:
121
  from src.document.pipeline.processor import (
122
  DocumentProcessor,
 
105
  """
106
  Process document using the actual document processing pipeline.
107
  Returns processing results with all extracted data.
108
+
109
+ Priority:
110
+ 1. Backend API (GPU server) - if configured
111
+ 2. Local processing - if dependencies available
112
+ 3. Fallback text extraction
113
  """
114
  import tempfile
115
  import os
116
 
117
+ # First, try to use backend API if configured
118
+ try:
119
+ from backend_client import BackendClient, is_backend_configured
120
+ if is_backend_configured():
121
+ client = BackendClient()
122
+ response = client.process_document(
123
+ file_bytes=file_bytes,
124
+ filename=filename,
125
+ ocr_engine=options.get("ocr_engine", "paddleocr"),
126
+ max_pages=options.get("max_pages", 10),
127
+ enable_layout=options.get("enable_layout", True),
128
+ preserve_tables=options.get("preserve_tables", True),
129
+ )
130
+ if response.success:
131
+ return {
132
+ "success": True,
133
+ "raw_text": response.data.get("text", ""),
134
+ "chunks": response.data.get("chunks", []),
135
+ "ocr_regions": response.data.get("ocr_regions", []),
136
+ "layout_regions": response.data.get("layout_regions", []),
137
+ "page_count": response.data.get("page_count", 0),
138
+ "ocr_confidence": response.data.get("ocr_confidence", 0.0),
139
+ "layout_confidence": response.data.get("layout_confidence", 0.0),
140
+ }
141
+ # Backend failed, continue to local processing
142
+ except Exception as e:
143
+ pass # Backend not available, try local processing
144
+
145
+ # Create temp file for local processing
146
  suffix = Path(filename).suffix
147
  with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
148
  tmp.write(file_bytes)
149
  tmp_path = tmp.name
150
 
151
  try:
152
+ # Try to use actual document processor locally
153
  try:
154
  from src.document.pipeline.processor import (
155
  DocumentProcessor,