Increase max_tokens for generation settings and enhance JSON extraction to handle various markdown formats
Browse files- config/environments/default.yaml +5 -5
- utils.py +23 -17
config/environments/default.yaml
CHANGED
|
@@ -23,11 +23,11 @@ llama_index:
|
|
| 23 |
# Generation Settings
|
| 24 |
generation:
|
| 25 |
max_tokens:
|
| 26 |
-
openai:
|
| 27 |
-
anthropic:
|
| 28 |
-
gemini:
|
| 29 |
-
deepseek:
|
| 30 |
-
max_tokens_analysis:
|
| 31 |
temperature: 0.5
|
| 32 |
|
| 33 |
# Model Providers Configuration
|
|
|
|
| 23 |
# Generation Settings
|
| 24 |
generation:
|
| 25 |
max_tokens:
|
| 26 |
+
openai: 1024
|
| 27 |
+
anthropic: 1024
|
| 28 |
+
gemini: 1024
|
| 29 |
+
deepseek: 1024
|
| 30 |
+
max_tokens_analysis: 4000
|
| 31 |
temperature: 0.5
|
| 32 |
|
| 33 |
# Model Providers Configuration
|
utils.py
CHANGED
|
@@ -156,15 +156,19 @@ def extract_json_from_text(text: str) -> Optional[Dict]:
|
|
| 156 |
# 2. Try to find JSON within markdown or other text
|
| 157 |
text_to_parse = text.strip()
|
| 158 |
|
| 159 |
-
# Remove markdown code blocks
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
|
| 169 |
try:
|
| 170 |
return json.loads(text_to_parse)
|
|
@@ -172,14 +176,16 @@ def extract_json_from_text(text: str) -> Optional[Dict]:
|
|
| 172 |
pass
|
| 173 |
|
| 174 |
# 3. Last resort: find the first { and last }
|
|
|
|
| 175 |
start_idx = text_to_parse.find('{')
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
|
|
|
| 184 |
|
| 185 |
return None
|
|
|
|
| 156 |
# 2. Try to find JSON within markdown or other text
|
| 157 |
text_to_parse = text.strip()
|
| 158 |
|
| 159 |
+
# Remove markdown code blocks with triple backticks or triple single quotes
|
| 160 |
+
for delimiter in ["```json", "'''json", "```", "'''"]:
|
| 161 |
+
if delimiter in text_to_parse:
|
| 162 |
+
try:
|
| 163 |
+
parts = text_to_parse.split(delimiter)
|
| 164 |
+
if len(parts) > 1:
|
| 165 |
+
# Take the first content block after the delimiter
|
| 166 |
+
candidate = parts[1].split(delimiter.replace("json", ""))[0].strip()
|
| 167 |
+
if candidate:
|
| 168 |
+
text_to_parse = candidate
|
| 169 |
+
break
|
| 170 |
+
except Exception:
|
| 171 |
+
continue
|
| 172 |
|
| 173 |
try:
|
| 174 |
return json.loads(text_to_parse)
|
|
|
|
| 176 |
pass
|
| 177 |
|
| 178 |
# 3. Last resort: find the first { and last }
|
| 179 |
+
# Try to balance braces to handle potential truncation or trailing noise
|
| 180 |
start_idx = text_to_parse.find('{')
|
| 181 |
+
if start_idx != -1:
|
| 182 |
+
# Step backwards from the end to find the last valid-looking closing brace
|
| 183 |
+
for end_idx in range(len(text_to_parse) - 1, start_idx, -1):
|
| 184 |
+
if text_to_parse[end_idx] == '}':
|
| 185 |
+
candidate = text_to_parse[start_idx:end_idx + 1]
|
| 186 |
+
try:
|
| 187 |
+
return json.loads(candidate)
|
| 188 |
+
except json.JSONDecodeError:
|
| 189 |
+
continue
|
| 190 |
|
| 191 |
return None
|