Alikestocode commited on
Commit
2790442
·
1 Parent(s): e829b15

Fix prefetch init order

Browse files
Files changed (1) hide show
  1. app.py +17 -17
app.py CHANGED
@@ -94,6 +94,23 @@ def _start_prefetch_workers():
94
  PREFETCH_EXECUTOR.submit(_prefetch_repo, repo)
95
 
96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  _start_prefetch_workers()
98
 
99
  # Try to import LLM Compressor (for quantization - optional, vLLM has native AWQ support)
@@ -163,23 +180,6 @@ STOP_SEQUENCES = [PLAN_END_TOKEN, "</json>", "</JSON>"]
163
 
164
  ROUTER_SYSTEM_PROMPT = """You are the Router Agent coordinating Math, Code, and General-Search specialists.\nEmit EXACTLY ONE strict JSON object with keys route_plan, route_rationale, expected_artifacts,\nthinking_outline, handoff_plan, todo_list, difficulty, tags, acceptance_criteria, metrics.\nRules:\n- No markdown/code fences, no natural-language prologues or epilogues.\n- route_plan must be an ordered list of tool invocations such as /math(...), /code(...), /general-search(...).\n- todo_list must map each checklist item to the responsible tool.\n- metrics must include primary and secondary arrays (add optional *_guidance fields when they exist).\n- After the closing brace of the JSON object, immediately append the sentinel <|end_of_plan|>.\nExample output:\n{\n "route_plan": ["/general-search(...)"],\n "route_rationale": "...",\n ...\n}<|end_of_plan|>\nReturn nothing else."""
165
 
166
- MODELS = {
167
- "Router-Qwen3-32B-AWQ": {
168
- "repo_id": "Alovestocode/router-qwen3-32b-merged-awq", # AWQ quantized model
169
- "tokenizer_repo": "Alovestocode/router-qwen3-32b-merged", # Tokenizer from original repo
170
- "description": "Router checkpoint on Qwen3 32B merged, optimized with AWQ quantization via vLLM.",
171
- "params_b": 32.0,
172
- "quantization": "awq", # vLLM will auto-detect AWQ
173
- },
174
- "Router-Gemma3-27B-AWQ": {
175
- "repo_id": "Alovestocode/router-gemma3-merged-awq", # AWQ quantized model
176
- "tokenizer_repo": "Alovestocode/router-gemma3-merged", # Tokenizer from original repo
177
- "description": "Router checkpoint on Gemma3 27B merged, optimized with AWQ quantization via vLLM.",
178
- "params_b": 27.0,
179
- "quantization": "awq", # vLLM will auto-detect AWQ
180
- },
181
- }
182
-
183
  REQUIRED_KEYS = [
184
  "route_plan",
185
  "route_rationale",
 
94
  PREFETCH_EXECUTOR.submit(_prefetch_repo, repo)
95
 
96
 
97
+ MODELS = {
98
+ "Router-Qwen3-32B-AWQ": {
99
+ "repo_id": "Alovestocode/router-qwen3-32b-merged-awq", # AWQ quantized model
100
+ "tokenizer_repo": "Alovestocode/router-qwen3-32b-merged", # Tokenizer from original repo
101
+ "description": "Router checkpoint on Qwen3 32B merged, optimized with AWQ quantization via vLLM.",
102
+ "params_b": 32.0,
103
+ "quantization": "awq", # vLLM will auto-detect AWQ
104
+ },
105
+ "Router-Gemma3-27B-AWQ": {
106
+ "repo_id": "Alovestocode/router-gemma3-merged-awq", # AWQ quantized model
107
+ "tokenizer_repo": "Alovestocode/router-gemma3-merged", # Tokenizer from original repo
108
+ "description": "Router checkpoint on Gemma3 27B merged, optimized with AWQ quantization via vLLM.",
109
+ "params_b": 27.0,
110
+ "quantization": "awq", # vLLM will auto-detect AWQ
111
+ },
112
+ }
113
+
114
  _start_prefetch_workers()
115
 
116
  # Try to import LLM Compressor (for quantization - optional, vLLM has native AWQ support)
 
180
 
181
  ROUTER_SYSTEM_PROMPT = """You are the Router Agent coordinating Math, Code, and General-Search specialists.\nEmit EXACTLY ONE strict JSON object with keys route_plan, route_rationale, expected_artifacts,\nthinking_outline, handoff_plan, todo_list, difficulty, tags, acceptance_criteria, metrics.\nRules:\n- No markdown/code fences, no natural-language prologues or epilogues.\n- route_plan must be an ordered list of tool invocations such as /math(...), /code(...), /general-search(...).\n- todo_list must map each checklist item to the responsible tool.\n- metrics must include primary and secondary arrays (add optional *_guidance fields when they exist).\n- After the closing brace of the JSON object, immediately append the sentinel <|end_of_plan|>.\nExample output:\n{\n "route_plan": ["/general-search(...)"],\n "route_rationale": "...",\n ...\n}<|end_of_plan|>\nReturn nothing else."""
182
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
  REQUIRED_KEYS = [
184
  "route_plan",
185
  "route_rationale",