Spaces:
Sleeping
Sleeping
Commit
·
2790442
1
Parent(s):
e829b15
Fix prefetch init order
Browse files
app.py
CHANGED
|
@@ -94,6 +94,23 @@ def _start_prefetch_workers():
|
|
| 94 |
PREFETCH_EXECUTOR.submit(_prefetch_repo, repo)
|
| 95 |
|
| 96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
_start_prefetch_workers()
|
| 98 |
|
| 99 |
# Try to import LLM Compressor (for quantization - optional, vLLM has native AWQ support)
|
|
@@ -163,23 +180,6 @@ STOP_SEQUENCES = [PLAN_END_TOKEN, "</json>", "</JSON>"]
|
|
| 163 |
|
| 164 |
ROUTER_SYSTEM_PROMPT = """You are the Router Agent coordinating Math, Code, and General-Search specialists.\nEmit EXACTLY ONE strict JSON object with keys route_plan, route_rationale, expected_artifacts,\nthinking_outline, handoff_plan, todo_list, difficulty, tags, acceptance_criteria, metrics.\nRules:\n- No markdown/code fences, no natural-language prologues or epilogues.\n- route_plan must be an ordered list of tool invocations such as /math(...), /code(...), /general-search(...).\n- todo_list must map each checklist item to the responsible tool.\n- metrics must include primary and secondary arrays (add optional *_guidance fields when they exist).\n- After the closing brace of the JSON object, immediately append the sentinel <|end_of_plan|>.\nExample output:\n{\n "route_plan": ["/general-search(...)"],\n "route_rationale": "...",\n ...\n}<|end_of_plan|>\nReturn nothing else."""
|
| 165 |
|
| 166 |
-
MODELS = {
|
| 167 |
-
"Router-Qwen3-32B-AWQ": {
|
| 168 |
-
"repo_id": "Alovestocode/router-qwen3-32b-merged-awq", # AWQ quantized model
|
| 169 |
-
"tokenizer_repo": "Alovestocode/router-qwen3-32b-merged", # Tokenizer from original repo
|
| 170 |
-
"description": "Router checkpoint on Qwen3 32B merged, optimized with AWQ quantization via vLLM.",
|
| 171 |
-
"params_b": 32.0,
|
| 172 |
-
"quantization": "awq", # vLLM will auto-detect AWQ
|
| 173 |
-
},
|
| 174 |
-
"Router-Gemma3-27B-AWQ": {
|
| 175 |
-
"repo_id": "Alovestocode/router-gemma3-merged-awq", # AWQ quantized model
|
| 176 |
-
"tokenizer_repo": "Alovestocode/router-gemma3-merged", # Tokenizer from original repo
|
| 177 |
-
"description": "Router checkpoint on Gemma3 27B merged, optimized with AWQ quantization via vLLM.",
|
| 178 |
-
"params_b": 27.0,
|
| 179 |
-
"quantization": "awq", # vLLM will auto-detect AWQ
|
| 180 |
-
},
|
| 181 |
-
}
|
| 182 |
-
|
| 183 |
REQUIRED_KEYS = [
|
| 184 |
"route_plan",
|
| 185 |
"route_rationale",
|
|
|
|
| 94 |
PREFETCH_EXECUTOR.submit(_prefetch_repo, repo)
|
| 95 |
|
| 96 |
|
| 97 |
+
MODELS = {
|
| 98 |
+
"Router-Qwen3-32B-AWQ": {
|
| 99 |
+
"repo_id": "Alovestocode/router-qwen3-32b-merged-awq", # AWQ quantized model
|
| 100 |
+
"tokenizer_repo": "Alovestocode/router-qwen3-32b-merged", # Tokenizer from original repo
|
| 101 |
+
"description": "Router checkpoint on Qwen3 32B merged, optimized with AWQ quantization via vLLM.",
|
| 102 |
+
"params_b": 32.0,
|
| 103 |
+
"quantization": "awq", # vLLM will auto-detect AWQ
|
| 104 |
+
},
|
| 105 |
+
"Router-Gemma3-27B-AWQ": {
|
| 106 |
+
"repo_id": "Alovestocode/router-gemma3-merged-awq", # AWQ quantized model
|
| 107 |
+
"tokenizer_repo": "Alovestocode/router-gemma3-merged", # Tokenizer from original repo
|
| 108 |
+
"description": "Router checkpoint on Gemma3 27B merged, optimized with AWQ quantization via vLLM.",
|
| 109 |
+
"params_b": 27.0,
|
| 110 |
+
"quantization": "awq", # vLLM will auto-detect AWQ
|
| 111 |
+
},
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
_start_prefetch_workers()
|
| 115 |
|
| 116 |
# Try to import LLM Compressor (for quantization - optional, vLLM has native AWQ support)
|
|
|
|
| 180 |
|
| 181 |
ROUTER_SYSTEM_PROMPT = """You are the Router Agent coordinating Math, Code, and General-Search specialists.\nEmit EXACTLY ONE strict JSON object with keys route_plan, route_rationale, expected_artifacts,\nthinking_outline, handoff_plan, todo_list, difficulty, tags, acceptance_criteria, metrics.\nRules:\n- No markdown/code fences, no natural-language prologues or epilogues.\n- route_plan must be an ordered list of tool invocations such as /math(...), /code(...), /general-search(...).\n- todo_list must map each checklist item to the responsible tool.\n- metrics must include primary and secondary arrays (add optional *_guidance fields when they exist).\n- After the closing brace of the JSON object, immediately append the sentinel <|end_of_plan|>.\nExample output:\n{\n "route_plan": ["/general-search(...)"],\n "route_rationale": "...",\n ...\n}<|end_of_plan|>\nReturn nothing else."""
|
| 182 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
REQUIRED_KEYS = [
|
| 184 |
"route_plan",
|
| 185 |
"route_rationale",
|