restokes92 commited on
Commit
53943f9
·
verified ·
1 Parent(s): 245244e

Upload Kaiju Coder 7 runtime quantization recipe

Browse files
README.md CHANGED
@@ -1,5 +1,7 @@
1
  # Kaiju Coder 7 Runtime-Quantized Local Candidate
2
 
 
 
3
  This is the current working local quantized variant for Kaiju Coder 7. It is a
4
  runtime bitsandbytes vLLM serving path, not a separate persisted quantized
5
  weight artifact yet.
 
1
  # Kaiju Coder 7 Runtime-Quantized Local Candidate
2
 
3
+ ![RMDW logo](assets/RMDWlogo.png)
4
+
5
  This is the current working local quantized variant for Kaiju Coder 7. It is a
6
  runtime bitsandbytes vLLM serving path, not a separate persisted quantized
7
  weight artifact yet.
assets/RMDWlogo.png ADDED
scripts/kaiju_opencode_fast_proxy.py CHANGED
@@ -11,6 +11,7 @@ from __future__ import annotations
11
  import argparse
12
  import json
13
  import os
 
14
  import time
15
  import urllib.error
16
  import urllib.request
@@ -28,6 +29,10 @@ NORMAL_MAX_TOKENS = int(os.environ.get("KAIJU_NORMAL_MAX_TOKENS", "384"))
28
  WORK_MAX_TOKENS = int(os.environ.get("KAIJU_WORK_MAX_TOKENS", "1536"))
29
  ARTIFACT_MAX_TOKENS = int(os.environ.get("KAIJU_ARTIFACT_MAX_TOKENS", "4096"))
30
  MAX_REQUEST_BYTES = int(os.environ.get("KAIJU_MAX_REQUEST_BYTES", "2097152"))
 
 
 
 
31
 
32
 
33
  def normalize_messages(messages: Any) -> list[dict[str, Any]]:
@@ -36,20 +41,97 @@ def normalize_messages(messages: Any) -> list[dict[str, Any]]:
36
  return [message for message in messages if isinstance(message, dict)]
37
 
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  def message_text(messages: list[dict[str, Any]]) -> str:
40
  parts: list[str] = []
41
  for message in messages:
 
 
 
 
 
 
 
 
42
  content = message.get("content", "")
43
  if isinstance(content, str):
44
- parts.append(content)
45
- else:
46
- parts.append(json.dumps(content, ensure_ascii=False))
47
- return "\n".join(parts).lower()
 
 
 
 
 
 
 
 
 
 
48
 
49
 
50
- def classify_job(messages: list[dict[str, Any]]) -> str:
 
 
 
 
 
 
 
 
 
 
 
51
  text = message_text(messages)
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  artifact_terms = (
 
 
 
 
53
  "complete html",
54
  "html file",
55
  "one-file website",
@@ -57,6 +139,10 @@ def classify_job(messages: list[dict[str, Any]]) -> str:
57
  "build a website",
58
  "make a website",
59
  "full file",
 
 
 
 
60
  )
61
  work_terms = (
62
  "create ",
@@ -77,6 +163,278 @@ def classify_job(messages: list[dict[str, Any]]) -> str:
77
  return "normal"
78
 
79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  def target_tokens(job_class: str) -> int:
81
  if job_class == "artifact":
82
  return ARTIFACT_MAX_TOKENS
@@ -154,6 +512,16 @@ class Handler(BaseHTTPRequestHandler):
154
  except Exception as error: # noqa: BLE001 - return request parse failures.
155
  self._json(HTTPStatus.BAD_REQUEST, {"error": {"message": str(error), "type": "bad_request"}})
156
  return
 
 
 
 
 
 
 
 
 
 
157
  self._forward_post("/chat/completions", body)
158
 
159
  def _headers(self) -> dict[str, str]:
 
11
  import argparse
12
  import json
13
  import os
14
+ import re
15
  import time
16
  import urllib.error
17
  import urllib.request
 
29
  WORK_MAX_TOKENS = int(os.environ.get("KAIJU_WORK_MAX_TOKENS", "1536"))
30
  ARTIFACT_MAX_TOKENS = int(os.environ.get("KAIJU_ARTIFACT_MAX_TOKENS", "4096"))
31
  MAX_REQUEST_BYTES = int(os.environ.get("KAIJU_MAX_REQUEST_BYTES", "2097152"))
32
+ AUTOROUTE_ENABLED = os.environ.get("KAIJU_OPENCODE_AUTOROUTE", "1").lower() not in {"0", "false", "no"}
33
+ SUMMARY_ENABLED = os.environ.get("KAIJU_OPENCODE_FAST_SUMMARY", "1").lower() not in {"0", "false", "no"}
34
+ TOOL_NAME = "kaiju_artifact"
35
+ WRITE_TOOL_NAME = "kaiju_write_file"
36
 
37
 
38
  def normalize_messages(messages: Any) -> list[dict[str, Any]]:
 
41
  return [message for message in messages if isinstance(message, dict)]
42
 
43
 
44
+ def content_to_text(content: Any) -> str:
45
+ if isinstance(content, str):
46
+ stripped = content.strip()
47
+ if stripped.startswith(("{", "[")):
48
+ try:
49
+ return content_to_text(json.loads(stripped))
50
+ except Exception:
51
+ return content
52
+ return content
53
+ if isinstance(content, list):
54
+ parts: list[str] = []
55
+ for item in content:
56
+ if not isinstance(item, dict):
57
+ continue
58
+ if isinstance(item.get("text"), str):
59
+ parts.append(item["text"])
60
+ elif item.get("type") == "text" and isinstance(item.get("content"), str):
61
+ parts.append(item["content"])
62
+ elif isinstance(item.get("output"), str):
63
+ parts.append(item["output"])
64
+ if parts:
65
+ return "\n".join(parts)
66
+ return json.dumps(content, ensure_ascii=False)
67
+ if isinstance(content, dict):
68
+ for key in ("output", "text", "content"):
69
+ if isinstance(content.get(key), str):
70
+ return content[key]
71
+ return json.dumps(content, ensure_ascii=False)
72
+ return json.dumps(content, ensure_ascii=False)
73
+
74
+
75
  def message_text(messages: list[dict[str, Any]]) -> str:
76
  parts: list[str] = []
77
  for message in messages:
78
+ parts.append(content_to_text(message.get("content", "")))
79
+ return "\n".join(parts).lower()
80
+
81
+
82
+ def latest_user_text(messages: list[dict[str, Any]]) -> str:
83
+ for message in reversed(messages):
84
+ if message.get("role") != "user":
85
+ continue
86
  content = message.get("content", "")
87
  if isinstance(content, str):
88
+ return content
89
+ if isinstance(content, list):
90
+ parts: list[str] = []
91
+ for item in content:
92
+ if not isinstance(item, dict):
93
+ continue
94
+ if isinstance(item.get("text"), str):
95
+ parts.append(item["text"])
96
+ elif item.get("type") == "text" and isinstance(item.get("content"), str):
97
+ parts.append(item["content"])
98
+ if parts:
99
+ return "\n".join(parts)
100
+ return json.dumps(content, ensure_ascii=False)
101
+ return ""
102
 
103
 
104
+ def clean_prompt(prompt: str) -> str:
105
+ cleaned = prompt.strip()
106
+ if len(cleaned) >= 2 and cleaned[0] == cleaned[-1] and cleaned[0] in {"'", '"'}:
107
+ return cleaned[1:-1].strip()
108
+ return cleaned
109
+
110
+
111
+ def has_tool(messages: list[dict[str, Any]]) -> bool:
112
+ return any(message.get("role") == "tool" or message.get("tool_call_id") for message in messages)
113
+
114
+
115
+ def has_tool_result(messages: list[dict[str, Any]]) -> bool:
116
  text = message_text(messages)
117
+ return (
118
+ TOOL_NAME in text
119
+ or WRITE_TOOL_NAME in text
120
+ or "wrote file:" in text
121
+ or "task type:" in text
122
+ or "artifact:" in text
123
+ or "manifest:" in text
124
+ or "changed files:" in text
125
+ ) and has_tool(messages)
126
+
127
+
128
+ def classify_text(text: str) -> str:
129
+ text = text.lower()
130
  artifact_terms = (
131
+ "website",
132
+ "one-page",
133
+ "one page",
134
+ "homepage",
135
  "complete html",
136
  "html file",
137
  "one-file website",
 
139
  "build a website",
140
  "make a website",
141
  "full file",
142
+ "desktop",
143
+ "owner pack",
144
+ "operating pack",
145
+ "business suite",
146
  )
147
  work_terms = (
148
  "create ",
 
163
  return "normal"
164
 
165
 
166
+ def classify_job(messages: list[dict[str, Any]]) -> str:
167
+ return classify_text(clean_prompt(latest_user_text(messages)))
168
+
169
+
170
+ def infer_kind(prompt: str) -> str:
171
+ lower = prompt.lower()
172
+ if any(term in lower for term in ("website", "landing page", "one-page", "one page", "homepage", "html")):
173
+ return "website"
174
+ if any(term in lower for term in ("owner pack", "operating pack", "business suite")):
175
+ return "business_suite"
176
+ return "auto"
177
+
178
+
179
+ def infer_out_dir(prompt: str) -> str:
180
+ folder_match = re.search(r"folder named\s+([A-Za-z0-9_ -]{3,80})(?:\.|,|$)", prompt, re.IGNORECASE)
181
+ if folder_match:
182
+ folder = re.sub(r"\s+", "-", re.sub(r"[^A-Za-z0-9_. -]", "", folder_match.group(1).strip().rstrip(" .")))
183
+ return os.path.join(os.path.expanduser("~"), "Desktop", folder)
184
+ if "desktop" in prompt.lower():
185
+ return os.path.join(os.path.expanduser("~"), "Desktop", "Kaiju-Coder-7-Artifacts")
186
+ return ""
187
+
188
+
189
+ def should_synthesize_tool_call(body: dict[str, Any], messages: list[dict[str, Any]]) -> bool:
190
+ if not AUTOROUTE_ENABLED or has_tool(messages):
191
+ return False
192
+ if classify_job(messages) != "artifact":
193
+ return False
194
+ return tool_available(body, TOOL_NAME)
195
+
196
+
197
+ def tool_available(body: dict[str, Any], name: str) -> bool:
198
+ tools = body.get("tools")
199
+ if not isinstance(tools, list):
200
+ return False
201
+ return any(
202
+ isinstance(item, dict)
203
+ and item.get("type") == "function"
204
+ and isinstance(item.get("function"), dict)
205
+ and item["function"].get("name") == name
206
+ for item in tools
207
+ )
208
+
209
+
210
+ def parse_exact_file_write(prompt: str) -> dict[str, str] | None:
211
+ prompt = clean_prompt(prompt)
212
+ match = re.search(
213
+ r"\bcreate\s+([A-Za-z0-9_./-]{1,160})\s+with exactly(?: this content and no extra characters)?:\s*(.+?)\s*$",
214
+ prompt,
215
+ re.IGNORECASE | re.DOTALL,
216
+ )
217
+ if not match:
218
+ return None
219
+ file_path = match.group(1).strip()
220
+ content = match.group(2).strip()
221
+ if not file_path or not content:
222
+ return None
223
+ return {"file_path": file_path, "content": content}
224
+
225
+
226
+ def should_synthesize_file_write(body: dict[str, Any], messages: list[dict[str, Any]]) -> bool:
227
+ if not AUTOROUTE_ENABLED or has_tool(messages):
228
+ return False
229
+ if not tool_available(body, WRITE_TOOL_NAME):
230
+ return False
231
+ return parse_exact_file_write(latest_user_text(messages)) is not None
232
+
233
+
234
+ def tool_call_arguments(prompt: str) -> dict[str, Any]:
235
+ prompt = clean_prompt(prompt)
236
+ args: dict[str, Any] = {
237
+ "prompt": prompt,
238
+ "kind": infer_kind(prompt),
239
+ "no_planner": True,
240
+ }
241
+ out_dir = infer_out_dir(prompt)
242
+ if out_dir:
243
+ args["out_dir"] = out_dir
244
+ return args
245
+
246
+
247
+ def completion_id(prefix: str = "chatcmpl-kaiju") -> str:
248
+ return f"{prefix}-{int(time.time() * 1000)}"
249
+
250
+
251
+ def write_sse(handler: BaseHTTPRequestHandler, chunks: list[dict[str, Any]]) -> None:
252
+ handler.send_response(HTTPStatus.OK)
253
+ handler.send_header("content-type", "text/event-stream; charset=utf-8")
254
+ handler.send_header("cache-control", "no-store, no-transform")
255
+ handler.send_header("connection", "close")
256
+ handler.end_headers()
257
+ for chunk in chunks:
258
+ handler.wfile.write(f"data: {json.dumps(chunk, separators=(',', ':'))}\n\n".encode("utf-8"))
259
+ handler.wfile.flush()
260
+ handler.wfile.write(b"data: [DONE]\n\n")
261
+ handler.wfile.flush()
262
+
263
+
264
+ def split_json_arguments(args: dict[str, Any]) -> list[str]:
265
+ raw = json.dumps(args, separators=(",", ":"), ensure_ascii=False)
266
+ return [raw[index : index + 768] for index in range(0, len(raw), 768)] or ["{}"]
267
+
268
+
269
+ def synthesize_function_call(handler: BaseHTTPRequestHandler, body: dict[str, Any], tool_name: str, args: dict[str, Any]) -> None:
270
+ created = int(time.time())
271
+ model = str(body.get("model") or DEFAULT_MODEL)
272
+ chat_id = completion_id()
273
+ call_id = f"call_kaiju_{created}"
274
+ if body.get("stream") is True:
275
+ chunks = [
276
+ {
277
+ "id": chat_id,
278
+ "object": "chat.completion.chunk",
279
+ "created": created,
280
+ "model": model,
281
+ "choices": [
282
+ {
283
+ "index": 0,
284
+ "delta": {
285
+ "role": "assistant",
286
+ "tool_calls": [
287
+ {
288
+ "index": 0,
289
+ "id": call_id,
290
+ "type": "function",
291
+ "function": {"name": tool_name, "arguments": ""},
292
+ }
293
+ ],
294
+ },
295
+ "finish_reason": None,
296
+ }
297
+ ],
298
+ }
299
+ ]
300
+ chunks.extend(
301
+ {
302
+ "id": chat_id,
303
+ "object": "chat.completion.chunk",
304
+ "created": created,
305
+ "model": model,
306
+ "choices": [
307
+ {
308
+ "index": 0,
309
+ "delta": {"tool_calls": [{"index": 0, "function": {"arguments": part}}]},
310
+ "finish_reason": None,
311
+ }
312
+ ],
313
+ }
314
+ for part in split_json_arguments(args)
315
+ )
316
+ chunks.append(
317
+ {
318
+ "id": chat_id,
319
+ "object": "chat.completion.chunk",
320
+ "created": created,
321
+ "model": model,
322
+ "choices": [{"index": 0, "delta": {}, "finish_reason": "tool_calls"}],
323
+ }
324
+ )
325
+ write_sse(handler, chunks)
326
+ return
327
+ handler._json(
328
+ HTTPStatus.OK,
329
+ {
330
+ "id": chat_id,
331
+ "object": "chat.completion",
332
+ "created": created,
333
+ "model": model,
334
+ "choices": [
335
+ {
336
+ "index": 0,
337
+ "message": {
338
+ "role": "assistant",
339
+ "content": None,
340
+ "tool_calls": [
341
+ {
342
+ "id": call_id,
343
+ "type": "function",
344
+ "function": {"name": tool_name, "arguments": json.dumps(args, separators=(",", ":"))},
345
+ }
346
+ ],
347
+ },
348
+ "finish_reason": "tool_calls",
349
+ }
350
+ ],
351
+ "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
352
+ },
353
+ )
354
+
355
+
356
+ def synthesize_tool_call(handler: BaseHTTPRequestHandler, body: dict[str, Any], prompt: str) -> None:
357
+ synthesize_function_call(handler, body, TOOL_NAME, tool_call_arguments(prompt))
358
+
359
+
360
+ def synthesize_file_write_call(handler: BaseHTTPRequestHandler, body: dict[str, Any], prompt: str) -> None:
361
+ args = parse_exact_file_write(prompt)
362
+ if args is None:
363
+ raise ValueError("prompt is not an exact file-write request")
364
+ synthesize_function_call(handler, body, WRITE_TOOL_NAME, args)
365
+
366
+
367
+ def extract_tool_summary(messages: list[dict[str, Any]]) -> str:
368
+ text = ""
369
+ for message in reversed(messages):
370
+ if message.get("role") == "tool" or message.get("tool_call_id"):
371
+ text = content_to_text(message.get("content", ""))
372
+ break
373
+ if not text:
374
+ text = message_text(messages)
375
+ fields = []
376
+ for label in ("Task type", "Artifact type", "Manifest", "Artifact", "Project/repo", "Changed files", "Opened artifact"):
377
+ match = re.search(rf"^{re.escape(label)}:\s*(.+)$", text, re.MULTILINE)
378
+ if match:
379
+ fields.append(f"{label}: {match.group(1).strip()}")
380
+ if fields:
381
+ return "Kaiju artifact complete.\n\n" + "\n".join(fields)
382
+ write_match = re.search(r"Wrote file:\s*(.+)$", text, re.MULTILINE)
383
+ if write_match:
384
+ return f"File written.\n\nPath: {write_match.group(1).strip()}"
385
+ return "Kaiju artifact complete. Review the generated output folder and manifest from the tool result."
386
+
387
+
388
+ def synthesize_summary(handler: BaseHTTPRequestHandler, body: dict[str, Any], messages: list[dict[str, Any]]) -> None:
389
+ created = int(time.time())
390
+ model = str(body.get("model") or DEFAULT_MODEL)
391
+ content = extract_tool_summary(messages)
392
+ chat_id = completion_id("chatcmpl-kaiju-summary")
393
+ if body.get("stream") is True:
394
+ chunks = [
395
+ {
396
+ "id": chat_id,
397
+ "object": "chat.completion.chunk",
398
+ "created": created,
399
+ "model": model,
400
+ "choices": [{"index": 0, "delta": {"role": "assistant"}, "finish_reason": None}],
401
+ },
402
+ {
403
+ "id": chat_id,
404
+ "object": "chat.completion.chunk",
405
+ "created": created,
406
+ "model": model,
407
+ "choices": [{"index": 0, "delta": {"content": content}, "finish_reason": None}],
408
+ },
409
+ {
410
+ "id": chat_id,
411
+ "object": "chat.completion.chunk",
412
+ "created": created,
413
+ "model": model,
414
+ "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
415
+ },
416
+ ]
417
+ write_sse(handler, chunks)
418
+ return
419
+ handler._json(
420
+ HTTPStatus.OK,
421
+ {
422
+ "id": chat_id,
423
+ "object": "chat.completion",
424
+ "created": created,
425
+ "model": model,
426
+ "choices": [
427
+ {
428
+ "index": 0,
429
+ "message": {"role": "assistant", "content": content},
430
+ "finish_reason": "stop",
431
+ }
432
+ ],
433
+ "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
434
+ },
435
+ )
436
+
437
+
438
  def target_tokens(job_class: str) -> int:
439
  if job_class == "artifact":
440
  return ARTIFACT_MAX_TOKENS
 
512
  except Exception as error: # noqa: BLE001 - return request parse failures.
513
  self._json(HTTPStatus.BAD_REQUEST, {"error": {"message": str(error), "type": "bad_request"}})
514
  return
515
+ messages = normalize_messages(body.get("messages"))
516
+ if should_synthesize_file_write(body, messages):
517
+ synthesize_file_write_call(self, body, latest_user_text(messages))
518
+ return
519
+ if should_synthesize_tool_call(body, messages):
520
+ synthesize_tool_call(self, body, latest_user_text(messages))
521
+ return
522
+ if SUMMARY_ENABLED and has_tool_result(messages):
523
+ synthesize_summary(self, body, messages)
524
+ return
525
  self._forward_post("/chat/completions", body)
526
 
527
  def _headers(self) -> dict[str, str]: