DocUA commited on
Commit
db52560
·
1 Parent(s): 59fabbc

Increase max_tokens for generation settings and enhance JSON extraction to handle various markdown formats

Browse files
Files changed (2) hide show
  1. config/environments/default.yaml +5 -5
  2. utils.py +23 -17
config/environments/default.yaml CHANGED
@@ -23,11 +23,11 @@ llama_index:
23
  # Generation Settings
24
  generation:
25
  max_tokens:
26
- openai: 512
27
- anthropic: 512
28
- gemini: 512
29
- deepseek: 512
30
- max_tokens_analysis: 2000
31
  temperature: 0.5
32
 
33
  # Model Providers Configuration
 
23
  # Generation Settings
24
  generation:
25
  max_tokens:
26
+ openai: 1024
27
+ anthropic: 1024
28
+ gemini: 1024
29
+ deepseek: 1024
30
+ max_tokens_analysis: 4000
31
  temperature: 0.5
32
 
33
  # Model Providers Configuration
utils.py CHANGED
@@ -156,15 +156,19 @@ def extract_json_from_text(text: str) -> Optional[Dict]:
156
  # 2. Try to find JSON within markdown or other text
157
  text_to_parse = text.strip()
158
 
159
- # Remove markdown code blocks
160
- if "```json" in text_to_parse:
161
- parts = text_to_parse.split("```json")
162
- if len(parts) > 1:
163
- text_to_parse = parts[1].split("```")[0].strip()
164
- elif "```" in text_to_parse:
165
- parts = text_to_parse.split("```")
166
- if len(parts) > 1:
167
- text_to_parse = parts[1].strip()
 
 
 
 
168
 
169
  try:
170
  return json.loads(text_to_parse)
@@ -172,14 +176,16 @@ def extract_json_from_text(text: str) -> Optional[Dict]:
172
  pass
173
 
174
  # 3. Last resort: find the first { and last }
 
175
  start_idx = text_to_parse.find('{')
176
- end_idx = text_to_parse.rfind('}')
177
-
178
- if start_idx != -1 and end_idx != -1:
179
- text_to_parse = text_to_parse[start_idx:end_idx + 1]
180
- try:
181
- return json.loads(text_to_parse)
182
- except json.JSONDecodeError:
183
- pass
 
184
 
185
  return None
 
156
  # 2. Try to find JSON within markdown or other text
157
  text_to_parse = text.strip()
158
 
159
+ # Remove markdown code blocks with triple backticks or triple single quotes
160
+ for delimiter in ["```json", "'''json", "```", "'''"]:
161
+ if delimiter in text_to_parse:
162
+ try:
163
+ parts = text_to_parse.split(delimiter)
164
+ if len(parts) > 1:
165
+ # Take the first content block after the delimiter
166
+ candidate = parts[1].split(delimiter.replace("json", ""))[0].strip()
167
+ if candidate:
168
+ text_to_parse = candidate
169
+ break
170
+ except Exception:
171
+ continue
172
 
173
  try:
174
  return json.loads(text_to_parse)
 
176
  pass
177
 
178
  # 3. Last resort: find the first { and last }
179
+ # Try to balance braces to handle potential truncation or trailing noise
180
  start_idx = text_to_parse.find('{')
181
+ if start_idx != -1:
182
+ # Step backwards from the end to find the last valid-looking closing brace
183
+ for end_idx in range(len(text_to_parse) - 1, start_idx, -1):
184
+ if text_to_parse[end_idx] == '}':
185
+ candidate = text_to_parse[start_idx:end_idx + 1]
186
+ try:
187
+ return json.loads(candidate)
188
+ except json.JSONDecodeError:
189
+ continue
190
 
191
  return None