Spaces:
Sleeping
Sleeping
Update backend/app/openrouter_client.py
Browse files
backend/app/openrouter_client.py
CHANGED
|
@@ -55,7 +55,9 @@ def _pdf_to_images(pdf_bytes: bytes) -> List[bytes]:
|
|
| 55 |
def _image_bytes_to_base64(image_bytes: bytes) -> str:
|
| 56 |
"""Convert image bytes to base64 data URL (JPEG format)."""
|
| 57 |
b64 = base64.b64encode(image_bytes).decode("utf-8")
|
| 58 |
-
|
|
|
|
|
|
|
| 59 |
|
| 60 |
|
| 61 |
def _file_to_image_blocks(file_bytes: bytes, content_type: str) -> List[Dict[str, Any]]:
|
|
@@ -251,10 +253,38 @@ async def extract_fields_from_document(
|
|
| 251 |
"X-Title": "Document Capture Demo",
|
| 252 |
}
|
| 253 |
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 258 |
|
| 259 |
# OpenRouter returns choices[0].message.content
|
| 260 |
if "choices" not in data or len(data["choices"]) == 0:
|
|
|
|
| 55 |
def _image_bytes_to_base64(image_bytes: bytes) -> str:
|
| 56 |
"""Convert image bytes to base64 data URL (JPEG format)."""
|
| 57 |
b64 = base64.b64encode(image_bytes).decode("utf-8")
|
| 58 |
+
data_url = f"data:image/jpeg;base64,{b64}"
|
| 59 |
+
print(f"[DEBUG] Base64 encoded image: {len(image_bytes)} bytes -> {len(data_url)} chars")
|
| 60 |
+
return data_url
|
| 61 |
|
| 62 |
|
| 63 |
def _file_to_image_blocks(file_bytes: bytes, content_type: str) -> List[Dict[str, Any]]:
|
|
|
|
| 253 |
"X-Title": "Document Capture Demo",
|
| 254 |
}
|
| 255 |
|
| 256 |
+
# Calculate payload size
|
| 257 |
+
import sys
|
| 258 |
+
payload_str = json.dumps(payload)
|
| 259 |
+
payload_size_mb = len(payload_str.encode('utf-8')) / 1024 / 1024
|
| 260 |
+
|
| 261 |
+
print(f"[INFO] Sending request to OpenRouter API...")
|
| 262 |
+
print(f"[INFO] Payload size: {payload_size_mb:.2f} MB, Images: {len(image_blocks)} blocks")
|
| 263 |
+
print(f"[INFO] Model: {MODEL_NAME}")
|
| 264 |
+
|
| 265 |
+
if payload_size_mb > 10:
|
| 266 |
+
print(f"[WARNING] Payload is very large ({payload_size_mb:.2f} MB). This may cause slow responses or timeouts.")
|
| 267 |
+
|
| 268 |
+
try:
|
| 269 |
+
# Use a longer timeout for large documents - 10 minutes
|
| 270 |
+
timeout = httpx.Timeout(600.0, connect=30.0) # 10 min total, 30s connect
|
| 271 |
+
async with httpx.AsyncClient(timeout=timeout) as client:
|
| 272 |
+
print(f"[INFO] Making POST request to {OPENROUTER_BASE_URL}...")
|
| 273 |
+
print(f"[INFO] Timeout set to 10 minutes for large document processing...")
|
| 274 |
+
resp = await client.post(OPENROUTER_BASE_URL, headers=headers, json=payload)
|
| 275 |
+
print(f"[INFO] Received response: Status {resp.status_code}")
|
| 276 |
+
resp.raise_for_status()
|
| 277 |
+
data = resp.json()
|
| 278 |
+
print(f"[INFO] Response parsed successfully")
|
| 279 |
+
except httpx.TimeoutException:
|
| 280 |
+
print(f"[ERROR] Request to OpenRouter timed out after 5 minutes")
|
| 281 |
+
raise RuntimeError("Request to OpenRouter API timed out. The document may be too large or the API is slow. Please try again or use a smaller document.")
|
| 282 |
+
except httpx.HTTPStatusError as e:
|
| 283 |
+
print(f"[ERROR] HTTP error from OpenRouter: {e.response.status_code} - {e.response.text[:500]}")
|
| 284 |
+
raise RuntimeError(f"OpenRouter API error: {e.response.status_code} - {str(e)}")
|
| 285 |
+
except Exception as e:
|
| 286 |
+
print(f"[ERROR] Unexpected error calling OpenRouter: {type(e).__name__}: {str(e)}")
|
| 287 |
+
raise RuntimeError(f"Failed to call OpenRouter API: {str(e)}")
|
| 288 |
|
| 289 |
# OpenRouter returns choices[0].message.content
|
| 290 |
if "choices" not in data or len(data["choices"]) == 0:
|