Fix: Use backend API for document processing on Streamlit Cloud
Browse files
demo/pages/1_🔬_Live_Processing.py
CHANGED
|
@@ -105,18 +105,51 @@ def process_document_actual(file_bytes: bytes, filename: str, options: dict) ->
|
|
| 105 |
"""
|
| 106 |
Process document using the actual document processing pipeline.
|
| 107 |
Returns processing results with all extracted data.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
"""
|
| 109 |
import tempfile
|
| 110 |
import os
|
| 111 |
|
| 112 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
suffix = Path(filename).suffix
|
| 114 |
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
|
| 115 |
tmp.write(file_bytes)
|
| 116 |
tmp_path = tmp.name
|
| 117 |
|
| 118 |
try:
|
| 119 |
-
# Try to use actual document processor
|
| 120 |
try:
|
| 121 |
from src.document.pipeline.processor import (
|
| 122 |
DocumentProcessor,
|
|
|
|
| 105 |
"""
|
| 106 |
Process document using the actual document processing pipeline.
|
| 107 |
Returns processing results with all extracted data.
|
| 108 |
+
|
| 109 |
+
Priority:
|
| 110 |
+
1. Backend API (GPU server) - if configured
|
| 111 |
+
2. Local processing - if dependencies available
|
| 112 |
+
3. Fallback text extraction
|
| 113 |
"""
|
| 114 |
import tempfile
|
| 115 |
import os
|
| 116 |
|
| 117 |
+
# First, try to use backend API if configured
|
| 118 |
+
try:
|
| 119 |
+
from backend_client import BackendClient, is_backend_configured
|
| 120 |
+
if is_backend_configured():
|
| 121 |
+
client = BackendClient()
|
| 122 |
+
response = client.process_document(
|
| 123 |
+
file_bytes=file_bytes,
|
| 124 |
+
filename=filename,
|
| 125 |
+
ocr_engine=options.get("ocr_engine", "paddleocr"),
|
| 126 |
+
max_pages=options.get("max_pages", 10),
|
| 127 |
+
enable_layout=options.get("enable_layout", True),
|
| 128 |
+
preserve_tables=options.get("preserve_tables", True),
|
| 129 |
+
)
|
| 130 |
+
if response.success:
|
| 131 |
+
return {
|
| 132 |
+
"success": True,
|
| 133 |
+
"raw_text": response.data.get("text", ""),
|
| 134 |
+
"chunks": response.data.get("chunks", []),
|
| 135 |
+
"ocr_regions": response.data.get("ocr_regions", []),
|
| 136 |
+
"layout_regions": response.data.get("layout_regions", []),
|
| 137 |
+
"page_count": response.data.get("page_count", 0),
|
| 138 |
+
"ocr_confidence": response.data.get("ocr_confidence", 0.0),
|
| 139 |
+
"layout_confidence": response.data.get("layout_confidence", 0.0),
|
| 140 |
+
}
|
| 141 |
+
# Backend failed, continue to local processing
|
| 142 |
+
except Exception as e:
|
| 143 |
+
pass # Backend not available, try local processing
|
| 144 |
+
|
| 145 |
+
# Create temp file for local processing
|
| 146 |
suffix = Path(filename).suffix
|
| 147 |
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
|
| 148 |
tmp.write(file_bytes)
|
| 149 |
tmp_path = tmp.name
|
| 150 |
|
| 151 |
try:
|
| 152 |
+
# Try to use actual document processor locally
|
| 153 |
try:
|
| 154 |
from src.document.pipeline.processor import (
|
| 155 |
DocumentProcessor,
|