Spaces:

MHamdan
/

SPARKNET

Sleeping

App Files Files Community

MHamdan Claude Opus 4.5 commited on Jan 26

Commit

c1a790c

1 Parent(s): 5d516d9

Fix: Use backend API for document processing on Streamlit Cloud

Browse files

Files changed (1) hide show

demo/pages/1_🔬_Live_Processing.py +35 -2

demo/pages/1_🔬_Live_Processing.py CHANGED Viewed

@@ -105,18 +105,51 @@ def process_document_actual(file_bytes: bytes, filename: str, options: dict) ->
     """
     Process document using the actual document processing pipeline.
     Returns processing results with all extracted data.
     """
     import tempfile
     import os
-    # Create temp file
     suffix = Path(filename).suffix
     with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
         tmp.write(file_bytes)
         tmp_path = tmp.name
     try:
-        # Try to use actual document processor
         try:
             from src.document.pipeline.processor import (
                 DocumentProcessor,

     """
     Process document using the actual document processing pipeline.
     Returns processing results with all extracted data.
+    Priority:
+    1. Backend API (GPU server) - if configured
+    2. Local processing - if dependencies available
+    3. Fallback text extraction
     """
     import tempfile
     import os
+    # First, try to use backend API if configured
+    try:
+        from backend_client import BackendClient, is_backend_configured
+        if is_backend_configured():
+            client = BackendClient()
+            response = client.process_document(
+                file_bytes=file_bytes,
+                filename=filename,
+                ocr_engine=options.get("ocr_engine", "paddleocr"),
+                max_pages=options.get("max_pages", 10),
+                enable_layout=options.get("enable_layout", True),
+                preserve_tables=options.get("preserve_tables", True),
+            )
+            if response.success:
+                return {
+                    "success": True,
+                    "raw_text": response.data.get("text", ""),
+                    "chunks": response.data.get("chunks", []),
+                    "ocr_regions": response.data.get("ocr_regions", []),
+                    "layout_regions": response.data.get("layout_regions", []),
+                    "page_count": response.data.get("page_count", 0),
+                    "ocr_confidence": response.data.get("ocr_confidence", 0.0),
+                    "layout_confidence": response.data.get("layout_confidence", 0.0),
+                }
+            # Backend failed, continue to local processing
+    except Exception as e:
+        pass  # Backend not available, try local processing
+    # Create temp file for local processing
     suffix = Path(filename).suffix
     with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
         tmp.write(file_bytes)
         tmp_path = tmp.name
     try:
+        # Try to use actual document processor locally
         try:
             from src.document.pipeline.processor import (
                 DocumentProcessor,