R-Kentaren commited on
Commit
54056c4
Β·
verified Β·
1 Parent(s): 4176077

Upload folder using huggingface_hub

Browse files
code/config/constants.py CHANGED
@@ -12,8 +12,8 @@ MODEL_URL = "https://huggingface.co/openbmb/MiniCPM5-1B"
12
 
13
  # ─── Runtime Defaults ───────────────────────────────────────────────────
14
 
15
- DEFAULT_TEMPERATURE = 0.6
16
- DEFAULT_MAX_TOKENS = 4096
17
  PY_TIMEOUT_S = 15
18
  GRADIO_TIMEOUT_S = 30
19
  PY_MEM_LIMIT_MB = 1024
@@ -53,7 +53,12 @@ LANGUAGE_MAP: dict[str, list[str]] = {lang: frameworks for lang, frameworks in L
53
 
54
  # ─── System Prompt ───────────────────────────────────────────────────────
55
 
56
- SYSTEM_PROMPT = """You are a fullstack application code generator running locally. You help users build complete, runnable applications in any programming language and framework.
 
 
 
 
 
57
 
58
  When the user asks you to build an application:
59
  1. Generate complete, working code - not snippets or pseudocode
 
12
 
13
  # ─── Runtime Defaults ───────────────────────────────────────────────────
14
 
15
+ DEFAULT_TEMPERATURE = 0.4
16
+ DEFAULT_MAX_TOKENS = 2048
17
  PY_TIMEOUT_S = 15
18
  GRADIO_TIMEOUT_S = 30
19
  PY_MEM_LIMIT_MB = 1024
 
53
 
54
  # ─── System Prompt ───────────────────────────────────────────────────────
55
 
56
+ SYSTEM_PROMPT = """You are a code generator. Output ONLY the code. No thinking, no explanation, no commentary.
57
+
58
+ CRITICAL RULES:
59
+ - Do NOT use <think> or <thinking> tags. Do NOT reason aloud. Just output code directly.
60
+ - Do NOT write explanations before or after code. Just output the code.
61
+ - If you must explain something, keep it to ONE short sentence.
62
 
63
  When the user asks you to build an application:
64
  1. Generate complete, working code - not snippets or pseudocode
code/server/routes.py CHANGED
@@ -36,6 +36,7 @@ from code.execution.code_extractor import (
36
  extract_multi_file,
37
  is_gradio_code,
38
  normalize_language,
 
39
  )
40
  from code.execution.gradio_runner import run_gradio_app, stop_gradio_app
41
  from code.execution.python_runner import run_python
@@ -227,7 +228,9 @@ def handle_chat(
227
  final_response = ""
228
  for partial in call_model(messages):
229
  final_response = partial
230
- history[-1]["content"] = partial
 
 
231
  yield json.dumps({
232
  "type": "streaming",
233
  "status_text": "Generating...",
@@ -247,12 +250,14 @@ def handle_chat(
247
  })
248
  return
249
 
250
- # Extract code from response
251
- code, fence_lang = extract_code(final_response)
 
 
252
  target = normalize_language(target_language, fence_lang)
253
 
254
  # Also try multi-file extraction
255
- multi_files = extract_multi_file(final_response)
256
 
257
  if not code and not multi_files:
258
  yield json.dumps({
 
36
  extract_multi_file,
37
  is_gradio_code,
38
  normalize_language,
39
+ strip_thinking_blocks,
40
  )
41
  from code.execution.gradio_runner import run_gradio_app, stop_gradio_app
42
  from code.execution.python_runner import run_python
 
228
  final_response = ""
229
  for partial in call_model(messages):
230
  final_response = partial
231
+ # Strip thinking blocks so chat only shows clean output
232
+ clean_partial = strip_thinking_blocks(partial)
233
+ history[-1]["content"] = clean_partial
234
  yield json.dumps({
235
  "type": "streaming",
236
  "status_text": "Generating...",
 
250
  })
251
  return
252
 
253
+ # Extract code from response (use original with thinking for extraction,
254
+ # but chat history already has clean version)
255
+ clean_response = strip_thinking_blocks(final_response)
256
+ code, fence_lang = extract_code(clean_response)
257
  target = normalize_language(target_language, fence_lang)
258
 
259
  # Also try multi-file extraction
260
+ multi_files = extract_multi_file(clean_response)
261
 
262
  if not code and not multi_files:
263
  yield json.dumps({