Default to fast parser with optional MiniCPM5 mode
Browse files- FIELD_NOTES.md +2 -1
- README.md +5 -2
- SUBMISSION.md +5 -4
- app.py +19 -6
FIELD_NOTES.md
CHANGED
|
@@ -30,7 +30,8 @@ This directly targets the hackathon signals:
|
|
| 30 |
|
| 31 |
- Backyard AI: practical helper for a local delivery operator.
|
| 32 |
- Off the Grid: no cloud LLM API.
|
| 33 |
-
- Llama Champion: MiniCPM5 GGUF is
|
|
|
|
| 34 |
- Sharing is Caring: the planner trace is included as `agent_trace.json`.
|
| 35 |
|
| 36 |
## What the model does
|
|
|
|
| 30 |
|
| 31 |
- Backyard AI: practical helper for a local delivery operator.
|
| 32 |
- Off the Grid: no cloud LLM API.
|
| 33 |
+
- Llama Champion: MiniCPM5 GGUF is available through llama.cpp, behind an
|
| 34 |
+
explicit checkbox so the public CPU Basic demo remains responsive.
|
| 35 |
- Sharing is Caring: the planner trace is included as `agent_trace.json`.
|
| 36 |
|
| 37 |
## What the model does
|
README.md
CHANGED
|
@@ -50,8 +50,11 @@ Spaces, and models under 32B parameters.
|
|
| 50 |
- Cloud LLM APIs: none
|
| 51 |
|
| 52 |
The Space preloads the Q4 MiniCPM5 GGUF file and installs the CPU llama.cpp
|
| 53 |
-
wheel.
|
| 54 |
-
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
The route optimizer never depends on hidden model output: every route, time
|
| 57 |
window, lateness minute, and baseline delta is computed deterministically.
|
|
|
|
| 50 |
- Cloud LLM APIs: none
|
| 51 |
|
| 52 |
The Space preloads the Q4 MiniCPM5 GGUF file and installs the CPU llama.cpp
|
| 53 |
+
wheel. The public CPU Basic demo defaults to fast deterministic parsing so
|
| 54 |
+
judges can inspect the route planner immediately. The MiniCPM5 parser can be
|
| 55 |
+
enabled from the checkbox in the UI; if a runtime cold start cannot load the
|
| 56 |
+
model or the model returns invalid JSON, the app falls back to the deterministic
|
| 57 |
+
parser and makes that visible in the parser trace.
|
| 58 |
|
| 59 |
The route optimizer never depends on hidden model output: every route, time
|
| 60 |
window, lateness minute, and baseline delta is computed deterministically.
|
SUBMISSION.md
CHANGED
|
@@ -8,7 +8,7 @@
|
|
| 8 |
- Model: `openbmb/MiniCPM5-1B-GGUF`
|
| 9 |
- File: `MiniCPM5-1B-Q4_K_M.gguf`
|
| 10 |
- Parameters: 1.08B
|
| 11 |
-
- Runtime path: local GGUF through `llama-cpp-python`
|
| 12 |
|
| 13 |
## Why It Fits Build Small
|
| 14 |
|
|
@@ -55,9 +55,10 @@ This makes the small model useful because the task is bounded:
|
|
| 55 |
I built Tiny Dispatch Coach for the Build Small Hackathon:
|
| 56 |
|
| 57 |
Small delivery teams often have messy notes, tight windows, and a van capacity
|
| 58 |
-
constraint. This Gradio Space uses
|
| 59 |
-
|
| 60 |
-
|
|
|
|
| 61 |
|
| 62 |
No cloud LLM API. Synthetic demo data only. 1.08B params.
|
| 63 |
|
|
|
|
| 8 |
- Model: `openbmb/MiniCPM5-1B-GGUF`
|
| 9 |
- File: `MiniCPM5-1B-Q4_K_M.gguf`
|
| 10 |
- Parameters: 1.08B
|
| 11 |
+
- Runtime path: local GGUF through `llama-cpp-python`, enabled by checkbox
|
| 12 |
|
| 13 |
## Why It Fits Build Small
|
| 14 |
|
|
|
|
| 55 |
I built Tiny Dispatch Coach for the Build Small Hackathon:
|
| 56 |
|
| 57 |
Small delivery teams often have messy notes, tight windows, and a van capacity
|
| 58 |
+
constraint. This Gradio Space uses a MiniCPM5-ready constraint parser plus a
|
| 59 |
+
deterministic planner to create auditable driver routes. The OpenBMB
|
| 60 |
+
MiniCPM5-1B-GGUF path runs locally through llama.cpp when enabled; default fast
|
| 61 |
+
mode keeps the public CPU Basic demo responsive.
|
| 62 |
|
| 63 |
No cloud LLM API. Synthetic demo data only. 1.08B params.
|
| 64 |
|
app.py
CHANGED
|
@@ -224,8 +224,9 @@ def get_minicpm_llm():
|
|
| 224 |
model_path = hf_hub_download(repo_id=MINICPM_REPO, filename=MINICPM_FILE)
|
| 225 |
return Llama(
|
| 226 |
model_path=model_path,
|
| 227 |
-
n_ctx=
|
| 228 |
n_threads=max(1, min(4, os.cpu_count() or 2)),
|
|
|
|
| 229 |
n_gpu_layers=0,
|
| 230 |
verbose=False,
|
| 231 |
)
|
|
@@ -233,8 +234,15 @@ def get_minicpm_llm():
|
|
| 233 |
return None
|
| 234 |
|
| 235 |
|
| 236 |
-
def minicpm_parse_dispatch_notes(notes: str) -> Tuple[Dict[str, object], str]:
|
| 237 |
fallback = normalize_constraints(parse_dispatch_notes(notes))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 238 |
llm = get_minicpm_llm()
|
| 239 |
if llm is None:
|
| 240 |
fallback["source"] = "rule-fallback"
|
|
@@ -262,7 +270,7 @@ Dispatcher notes: {notes}
|
|
| 262 |
try:
|
| 263 |
result = llm(
|
| 264 |
prompt,
|
| 265 |
-
max_tokens=
|
| 266 |
temperature=0.0,
|
| 267 |
top_p=1.0,
|
| 268 |
stop=["<|im_end|>", "\n\n\n"],
|
|
@@ -580,9 +588,9 @@ def route_map(plan: List[PlanStop]) -> str:
|
|
| 580 |
"""
|
| 581 |
|
| 582 |
|
| 583 |
-
def analyze(file_obj, notes: str):
|
| 584 |
stops = parse_orders(file_obj)
|
| 585 |
-
constraints, model_trace = minicpm_parse_dispatch_notes(notes)
|
| 586 |
auto_routes = build_capacity_routes(stops, constraints)
|
| 587 |
manual = manual_route(stops)
|
| 588 |
auto_plan, auto_metrics = simulate_routes(auto_routes, int(constraints["depot_start"]))
|
|
@@ -748,6 +756,11 @@ with gr.Blocks(
|
|
| 748 |
value=DEFAULT_NOTES,
|
| 749 |
lines=5,
|
| 750 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 751 |
run = gr.Button("Plan route", variant="primary")
|
| 752 |
with gr.Column(scale=1):
|
| 753 |
gr.HTML(
|
|
@@ -785,7 +798,7 @@ OpenBMB MiniCPM5, 1.08B parameters, local GGUF path, no cloud LLM API, synthetic
|
|
| 785 |
|
| 786 |
run.click(
|
| 787 |
analyze,
|
| 788 |
-
inputs=[order_file, notes],
|
| 789 |
outputs=[metrics, constraints, table, cards, map_html],
|
| 790 |
)
|
| 791 |
|
|
|
|
| 224 |
model_path = hf_hub_download(repo_id=MINICPM_REPO, filename=MINICPM_FILE)
|
| 225 |
return Llama(
|
| 226 |
model_path=model_path,
|
| 227 |
+
n_ctx=768,
|
| 228 |
n_threads=max(1, min(4, os.cpu_count() or 2)),
|
| 229 |
+
n_batch=32,
|
| 230 |
n_gpu_layers=0,
|
| 231 |
verbose=False,
|
| 232 |
)
|
|
|
|
| 234 |
return None
|
| 235 |
|
| 236 |
|
| 237 |
+
def minicpm_parse_dispatch_notes(notes: str, use_minicpm: bool = False) -> Tuple[Dict[str, object], str]:
|
| 238 |
fallback = normalize_constraints(parse_dispatch_notes(notes))
|
| 239 |
+
if not use_minicpm:
|
| 240 |
+
fallback["source"] = "rule-fallback"
|
| 241 |
+
return (
|
| 242 |
+
fallback,
|
| 243 |
+
"Fast CPU Basic mode used the deterministic parser. Enable MiniCPM5 parser to run the local GGUF model path.",
|
| 244 |
+
)
|
| 245 |
+
|
| 246 |
llm = get_minicpm_llm()
|
| 247 |
if llm is None:
|
| 248 |
fallback["source"] = "rule-fallback"
|
|
|
|
| 270 |
try:
|
| 271 |
result = llm(
|
| 272 |
prompt,
|
| 273 |
+
max_tokens=96,
|
| 274 |
temperature=0.0,
|
| 275 |
top_p=1.0,
|
| 276 |
stop=["<|im_end|>", "\n\n\n"],
|
|
|
|
| 588 |
"""
|
| 589 |
|
| 590 |
|
| 591 |
+
def analyze(file_obj, notes: str, use_minicpm: bool):
|
| 592 |
stops = parse_orders(file_obj)
|
| 593 |
+
constraints, model_trace = minicpm_parse_dispatch_notes(notes, use_minicpm)
|
| 594 |
auto_routes = build_capacity_routes(stops, constraints)
|
| 595 |
manual = manual_route(stops)
|
| 596 |
auto_plan, auto_metrics = simulate_routes(auto_routes, int(constraints["depot_start"]))
|
|
|
|
| 756 |
value=DEFAULT_NOTES,
|
| 757 |
lines=5,
|
| 758 |
)
|
| 759 |
+
use_minicpm = gr.Checkbox(
|
| 760 |
+
label="Use MiniCPM5 parser",
|
| 761 |
+
value=False,
|
| 762 |
+
info="Optional on CPU Basic. Default fast mode keeps the demo responsive.",
|
| 763 |
+
)
|
| 764 |
run = gr.Button("Plan route", variant="primary")
|
| 765 |
with gr.Column(scale=1):
|
| 766 |
gr.HTML(
|
|
|
|
| 798 |
|
| 799 |
run.click(
|
| 800 |
analyze,
|
| 801 |
+
inputs=[order_file, notes, use_minicpm],
|
| 802 |
outputs=[metrics, constraints, table, cards, map_html],
|
| 803 |
)
|
| 804 |
|