umr2015 commited on
Commit
e28a9bc
·
verified ·
1 Parent(s): 109a37f

Default to fast parser with optional MiniCPM5 mode

Browse files
Files changed (4) hide show
  1. FIELD_NOTES.md +2 -1
  2. README.md +5 -2
  3. SUBMISSION.md +5 -4
  4. app.py +19 -6
FIELD_NOTES.md CHANGED
@@ -30,7 +30,8 @@ This directly targets the hackathon signals:
30
 
31
  - Backyard AI: practical helper for a local delivery operator.
32
  - Off the Grid: no cloud LLM API.
33
- - Llama Champion: MiniCPM5 GGUF is loaded through llama.cpp when available.
 
34
  - Sharing is Caring: the planner trace is included as `agent_trace.json`.
35
 
36
  ## What the model does
 
30
 
31
  - Backyard AI: practical helper for a local delivery operator.
32
  - Off the Grid: no cloud LLM API.
33
+ - Llama Champion: MiniCPM5 GGUF is available through llama.cpp, behind an
34
+ explicit checkbox so the public CPU Basic demo remains responsive.
35
  - Sharing is Caring: the planner trace is included as `agent_trace.json`.
36
 
37
  ## What the model does
README.md CHANGED
@@ -50,8 +50,11 @@ Spaces, and models under 32B parameters.
50
  - Cloud LLM APIs: none
51
 
52
  The Space preloads the Q4 MiniCPM5 GGUF file and installs the CPU llama.cpp
53
- wheel. If a runtime cold start still cannot load the model, the app falls back
54
- to a deterministic parser and makes that visible in the parser trace.
 
 
 
55
 
56
  The route optimizer never depends on hidden model output: every route, time
57
  window, lateness minute, and baseline delta is computed deterministically.
 
50
  - Cloud LLM APIs: none
51
 
52
  The Space preloads the Q4 MiniCPM5 GGUF file and installs the CPU llama.cpp
53
+ wheel. The public CPU Basic demo defaults to fast deterministic parsing so
54
+ judges can inspect the route planner immediately. The MiniCPM5 parser can be
55
+ enabled from the checkbox in the UI; if a runtime cold start cannot load the
56
+ model or the model returns invalid JSON, the app falls back to the deterministic
57
+ parser and makes that visible in the parser trace.
58
 
59
  The route optimizer never depends on hidden model output: every route, time
60
  window, lateness minute, and baseline delta is computed deterministically.
SUBMISSION.md CHANGED
@@ -8,7 +8,7 @@
8
  - Model: `openbmb/MiniCPM5-1B-GGUF`
9
  - File: `MiniCPM5-1B-Q4_K_M.gguf`
10
  - Parameters: 1.08B
11
- - Runtime path: local GGUF through `llama-cpp-python`
12
 
13
  ## Why It Fits Build Small
14
 
@@ -55,9 +55,10 @@ This makes the small model useful because the task is bounded:
55
  I built Tiny Dispatch Coach for the Build Small Hackathon:
56
 
57
  Small delivery teams often have messy notes, tight windows, and a van capacity
58
- constraint. This Gradio Space uses OpenBMB MiniCPM5-1B-GGUF to parse dispatch
59
- notes into route constraints, then a deterministic planner creates auditable
60
- driver routes.
 
61
 
62
  No cloud LLM API. Synthetic demo data only. 1.08B params.
63
 
 
8
  - Model: `openbmb/MiniCPM5-1B-GGUF`
9
  - File: `MiniCPM5-1B-Q4_K_M.gguf`
10
  - Parameters: 1.08B
11
+ - Runtime path: local GGUF through `llama-cpp-python`, enabled by checkbox
12
 
13
  ## Why It Fits Build Small
14
 
 
55
  I built Tiny Dispatch Coach for the Build Small Hackathon:
56
 
57
  Small delivery teams often have messy notes, tight windows, and a van capacity
58
+ constraint. This Gradio Space uses a MiniCPM5-ready constraint parser plus a
59
+ deterministic planner to create auditable driver routes. The OpenBMB
60
+ MiniCPM5-1B-GGUF path runs locally through llama.cpp when enabled; default fast
61
+ mode keeps the public CPU Basic demo responsive.
62
 
63
  No cloud LLM API. Synthetic demo data only. 1.08B params.
64
 
app.py CHANGED
@@ -224,8 +224,9 @@ def get_minicpm_llm():
224
  model_path = hf_hub_download(repo_id=MINICPM_REPO, filename=MINICPM_FILE)
225
  return Llama(
226
  model_path=model_path,
227
- n_ctx=2048,
228
  n_threads=max(1, min(4, os.cpu_count() or 2)),
 
229
  n_gpu_layers=0,
230
  verbose=False,
231
  )
@@ -233,8 +234,15 @@ def get_minicpm_llm():
233
  return None
234
 
235
 
236
- def minicpm_parse_dispatch_notes(notes: str) -> Tuple[Dict[str, object], str]:
237
  fallback = normalize_constraints(parse_dispatch_notes(notes))
 
 
 
 
 
 
 
238
  llm = get_minicpm_llm()
239
  if llm is None:
240
  fallback["source"] = "rule-fallback"
@@ -262,7 +270,7 @@ Dispatcher notes: {notes}
262
  try:
263
  result = llm(
264
  prompt,
265
- max_tokens=180,
266
  temperature=0.0,
267
  top_p=1.0,
268
  stop=["<|im_end|>", "\n\n\n"],
@@ -580,9 +588,9 @@ def route_map(plan: List[PlanStop]) -> str:
580
  """
581
 
582
 
583
- def analyze(file_obj, notes: str):
584
  stops = parse_orders(file_obj)
585
- constraints, model_trace = minicpm_parse_dispatch_notes(notes)
586
  auto_routes = build_capacity_routes(stops, constraints)
587
  manual = manual_route(stops)
588
  auto_plan, auto_metrics = simulate_routes(auto_routes, int(constraints["depot_start"]))
@@ -748,6 +756,11 @@ with gr.Blocks(
748
  value=DEFAULT_NOTES,
749
  lines=5,
750
  )
 
 
 
 
 
751
  run = gr.Button("Plan route", variant="primary")
752
  with gr.Column(scale=1):
753
  gr.HTML(
@@ -785,7 +798,7 @@ OpenBMB MiniCPM5, 1.08B parameters, local GGUF path, no cloud LLM API, synthetic
785
 
786
  run.click(
787
  analyze,
788
- inputs=[order_file, notes],
789
  outputs=[metrics, constraints, table, cards, map_html],
790
  )
791
 
 
224
  model_path = hf_hub_download(repo_id=MINICPM_REPO, filename=MINICPM_FILE)
225
  return Llama(
226
  model_path=model_path,
227
+ n_ctx=768,
228
  n_threads=max(1, min(4, os.cpu_count() or 2)),
229
+ n_batch=32,
230
  n_gpu_layers=0,
231
  verbose=False,
232
  )
 
234
  return None
235
 
236
 
237
+ def minicpm_parse_dispatch_notes(notes: str, use_minicpm: bool = False) -> Tuple[Dict[str, object], str]:
238
  fallback = normalize_constraints(parse_dispatch_notes(notes))
239
+ if not use_minicpm:
240
+ fallback["source"] = "rule-fallback"
241
+ return (
242
+ fallback,
243
+ "Fast CPU Basic mode used the deterministic parser. Enable MiniCPM5 parser to run the local GGUF model path.",
244
+ )
245
+
246
  llm = get_minicpm_llm()
247
  if llm is None:
248
  fallback["source"] = "rule-fallback"
 
270
  try:
271
  result = llm(
272
  prompt,
273
+ max_tokens=96,
274
  temperature=0.0,
275
  top_p=1.0,
276
  stop=["<|im_end|>", "\n\n\n"],
 
588
  """
589
 
590
 
591
+ def analyze(file_obj, notes: str, use_minicpm: bool):
592
  stops = parse_orders(file_obj)
593
+ constraints, model_trace = minicpm_parse_dispatch_notes(notes, use_minicpm)
594
  auto_routes = build_capacity_routes(stops, constraints)
595
  manual = manual_route(stops)
596
  auto_plan, auto_metrics = simulate_routes(auto_routes, int(constraints["depot_start"]))
 
756
  value=DEFAULT_NOTES,
757
  lines=5,
758
  )
759
+ use_minicpm = gr.Checkbox(
760
+ label="Use MiniCPM5 parser",
761
+ value=False,
762
+ info="Optional on CPU Basic. Default fast mode keeps the demo responsive.",
763
+ )
764
  run = gr.Button("Plan route", variant="primary")
765
  with gr.Column(scale=1):
766
  gr.HTML(
 
798
 
799
  run.click(
800
  analyze,
801
+ inputs=[order_file, notes, use_minicpm],
802
  outputs=[metrics, constraints, table, cards, map_html],
803
  )
804