Seth0330 commited on
Commit
d03aadc
·
verified ·
1 Parent(s): da780d4

Update backend/app/openrouter_client.py

Browse files
Files changed (1) hide show
  1. backend/app/openrouter_client.py +31 -1
backend/app/openrouter_client.py CHANGED
@@ -1,6 +1,7 @@
1
  import os
2
  import base64
3
  import json
 
4
  from typing import Any, Dict
5
 
6
  import httpx
@@ -109,4 +110,33 @@ async def extract_fields_from_document(
109
  text = content
110
 
111
  # Try to parse JSON from the model output
112
- return json.loads(text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import base64
3
  import json
4
+ import re
5
  from typing import Any, Dict
6
 
7
  import httpx
 
110
  text = content
111
 
112
  # Try to parse JSON from the model output
113
+ # The model might return JSON wrapped in markdown code blocks or with extra text
114
+ try:
115
+ # First, try direct JSON parsing
116
+ return json.loads(text)
117
+ except json.JSONDecodeError:
118
+ # Try to extract JSON from markdown code blocks
119
+ json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', text, re.DOTALL)
120
+ if json_match:
121
+ try:
122
+ return json.loads(json_match.group(1))
123
+ except json.JSONDecodeError:
124
+ pass
125
+
126
+ # Try to find JSON object in the text (look for {...})
127
+ json_match = re.search(r'\{.*\}', text, re.DOTALL)
128
+ if json_match:
129
+ try:
130
+ return json.loads(json_match.group(0))
131
+ except json.JSONDecodeError:
132
+ pass
133
+
134
+ # If all parsing fails, return a default structure with the raw text
135
+ return {
136
+ "doc_type": "other",
137
+ "confidence": 50.0,
138
+ "fields": {
139
+ "raw_text": text[:500], # First 500 chars
140
+ "error": "Could not parse JSON from model response"
141
+ }
142
+ }