umr2015 commited on
Commit
109a37f
·
verified ·
1 Parent(s): 043ea03

Tighten MiniCPM5 JSON constraint parsing

Browse files
Files changed (1) hide show
  1. app.py +41 -17
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import json
2
  import math
3
  import os
 
4
  import re
5
  from dataclasses import dataclass
6
  from functools import lru_cache
@@ -183,13 +184,29 @@ def normalize_constraints(raw: Dict[str, object]) -> Dict[str, object]:
183
 
184
 
185
  def extract_json_object(text: str) -> Optional[Dict[str, object]]:
186
- match = re.search(r"\{.*\}", text or "", re.DOTALL)
 
 
 
 
187
  if not match:
188
  return None
 
189
  try:
190
- parsed = json.loads(match.group(0))
191
  except json.JSONDecodeError:
192
- return None
 
 
 
 
 
 
 
 
 
 
 
193
  return parsed if isinstance(parsed, dict) else None
194
 
195
 
@@ -223,25 +240,32 @@ def minicpm_parse_dispatch_notes(notes: str) -> Tuple[Dict[str, object], str]:
223
  fallback["source"] = "rule-fallback"
224
  return fallback, "MiniCPM5 local runtime is unavailable, so the deterministic parser handled the notes."
225
 
226
- prompt = f"""You are a dispatch constraint parser for a small delivery route planner.
227
- Return only valid JSON with these keys:
228
- prefer_early_priority: boolean
229
- avoid_late_penalty: number between 0.5 and 10
230
- max_route_load: integer
231
- depot_start: minutes after midnight as integer
232
- soft_due_before: optional minutes after midnight
233
- boost_terms: short lowercase strings
234
-
235
- Dispatcher notes:
236
- {notes}
 
 
 
 
 
 
 
237
  """
238
  try:
239
  result = llm(
240
  prompt,
241
  max_tokens=180,
242
- temperature=0.1,
243
- top_p=0.9,
244
- stop=["\n\n"],
245
  )
246
  text = result["choices"][0]["text"]
247
  parsed = extract_json_object(text)
 
1
  import json
2
  import math
3
  import os
4
+ import ast
5
  import re
6
  from dataclasses import dataclass
7
  from functools import lru_cache
 
184
 
185
 
186
  def extract_json_object(text: str) -> Optional[Dict[str, object]]:
187
+ cleaned = (text or "").strip()
188
+ fenced = re.search(r"```(?:json)?\s*(\{.*?\})\s*```", cleaned, re.DOTALL | re.IGNORECASE)
189
+ if fenced:
190
+ cleaned = fenced.group(1)
191
+ match = re.search(r"\{.*\}", cleaned, re.DOTALL)
192
  if not match:
193
  return None
194
+ raw = match.group(0)
195
  try:
196
+ parsed = json.loads(raw)
197
  except json.JSONDecodeError:
198
+ try:
199
+ parsed = ast.literal_eval(raw)
200
+ except (SyntaxError, ValueError):
201
+ normalized = (
202
+ raw.replace("True", "true")
203
+ .replace("False", "false")
204
+ .replace("None", "null")
205
+ )
206
+ try:
207
+ parsed = json.loads(normalized)
208
+ except json.JSONDecodeError:
209
+ return None
210
  return parsed if isinstance(parsed, dict) else None
211
 
212
 
 
240
  fallback["source"] = "rule-fallback"
241
  return fallback, "MiniCPM5 local runtime is unavailable, so the deterministic parser handled the notes."
242
 
243
+ prompt = f"""<|im_start|>system
244
+ You convert dispatcher notes into one compact JSON object.
245
+ Return JSON only. No markdown. No explanation.
246
+ Schema:
247
+ {{
248
+ "prefer_early_priority": true,
249
+ "avoid_late_penalty": 2.0,
250
+ "max_route_load": 18,
251
+ "depot_start": 480,
252
+ "soft_due_before": 720,
253
+ "boost_terms": ["school", "medical", "fresh"]
254
+ }}
255
+ Use minutes after midnight for times. Omit soft_due_before if no lunch/noon constraint is present.
256
+ <|im_end|>
257
+ <|im_start|>user
258
+ Dispatcher notes: {notes}
259
+ <|im_end|>
260
+ <|im_start|>assistant
261
  """
262
  try:
263
  result = llm(
264
  prompt,
265
  max_tokens=180,
266
+ temperature=0.0,
267
+ top_p=1.0,
268
+ stop=["<|im_end|>", "\n\n\n"],
269
  )
270
  text = result["choices"][0]["text"]
271
  parsed = extract_json_object(text)