Spaces:
Running
Running
Update backend/app/openrouter_client.py
Browse files
backend/app/openrouter_client.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import os
|
| 2 |
import base64
|
| 3 |
import json
|
|
|
|
| 4 |
from typing import Any, Dict
|
| 5 |
|
| 6 |
import httpx
|
|
@@ -109,4 +110,33 @@ async def extract_fields_from_document(
|
|
| 109 |
text = content
|
| 110 |
|
| 111 |
# Try to parse JSON from the model output
|
| 112 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
import base64
|
| 3 |
import json
|
| 4 |
+
import re
|
| 5 |
from typing import Any, Dict
|
| 6 |
|
| 7 |
import httpx
|
|
|
|
| 110 |
text = content
|
| 111 |
|
| 112 |
# Try to parse JSON from the model output
|
| 113 |
+
# The model might return JSON wrapped in markdown code blocks or with extra text
|
| 114 |
+
try:
|
| 115 |
+
# First, try direct JSON parsing
|
| 116 |
+
return json.loads(text)
|
| 117 |
+
except json.JSONDecodeError:
|
| 118 |
+
# Try to extract JSON from markdown code blocks
|
| 119 |
+
json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', text, re.DOTALL)
|
| 120 |
+
if json_match:
|
| 121 |
+
try:
|
| 122 |
+
return json.loads(json_match.group(1))
|
| 123 |
+
except json.JSONDecodeError:
|
| 124 |
+
pass
|
| 125 |
+
|
| 126 |
+
# Try to find JSON object in the text (look for {...})
|
| 127 |
+
json_match = re.search(r'\{.*\}', text, re.DOTALL)
|
| 128 |
+
if json_match:
|
| 129 |
+
try:
|
| 130 |
+
return json.loads(json_match.group(0))
|
| 131 |
+
except json.JSONDecodeError:
|
| 132 |
+
pass
|
| 133 |
+
|
| 134 |
+
# If all parsing fails, return a default structure with the raw text
|
| 135 |
+
return {
|
| 136 |
+
"doc_type": "other",
|
| 137 |
+
"confidence": 50.0,
|
| 138 |
+
"fields": {
|
| 139 |
+
"raw_text": text[:500], # First 500 chars
|
| 140 |
+
"error": "Could not parse JSON from model response"
|
| 141 |
+
}
|
| 142 |
+
}
|