Spaces:

R-Kentaren
/

fullstack-code-builder

Running

R-Kentaren commited on 16 days ago

Commit

54056c4

verified ·

1 Parent(s): 4176077

Upload folder using huggingface_hub

Files changed (2) hide show

code/config/constants.py CHANGED Viewed

@@ -12,8 +12,8 @@ MODEL_URL = "https://huggingface.co/openbmb/MiniCPM5-1B"
 # ─── Runtime Defaults ───────────────────────────────────────────────────
-DEFAULT_TEMPERATURE = 0.6
-DEFAULT_MAX_TOKENS = 4096
 PY_TIMEOUT_S = 15
 GRADIO_TIMEOUT_S = 30
 PY_MEM_LIMIT_MB = 1024
@@ -53,7 +53,12 @@ LANGUAGE_MAP: dict[str, list[str]] = {lang: frameworks for lang, frameworks in L
 # ─── System Prompt ───────────────────────────────────────────────────────
-SYSTEM_PROMPT = """You are a fullstack application code generator running locally. You help users build complete, runnable applications in any programming language and framework.
 When the user asks you to build an application:
 1. Generate complete, working code - not snippets or pseudocode

 # ─── Runtime Defaults ───────────────────────────────────────────────────
+DEFAULT_TEMPERATURE = 0.4
+DEFAULT_MAX_TOKENS = 2048
 PY_TIMEOUT_S = 15
 GRADIO_TIMEOUT_S = 30
 PY_MEM_LIMIT_MB = 1024
 # ─── System Prompt ───────────────────────────────────────────────────────
+SYSTEM_PROMPT = """You are a code generator. Output ONLY the code. No thinking, no explanation, no commentary.
+CRITICAL RULES:
+- Do NOT use <think> or <thinking> tags. Do NOT reason aloud. Just output code directly.
+- Do NOT write explanations before or after code. Just output the code.
+- If you must explain something, keep it to ONE short sentence.
 When the user asks you to build an application:
 1. Generate complete, working code - not snippets or pseudocode

code/server/routes.py CHANGED Viewed

@@ -36,6 +36,7 @@ from code.execution.code_extractor import (
     extract_multi_file,
     is_gradio_code,
     normalize_language,
 )
 from code.execution.gradio_runner import run_gradio_app, stop_gradio_app
 from code.execution.python_runner import run_python
@@ -227,7 +228,9 @@ def handle_chat(
     final_response = ""
     for partial in call_model(messages):
         final_response = partial
-        history[-1]["content"] = partial
         yield json.dumps({
             "type": "streaming",
             "status_text": "Generating...",
@@ -247,12 +250,14 @@ def handle_chat(
         })
         return
-    # Extract code from response
-    code, fence_lang = extract_code(final_response)
     target = normalize_language(target_language, fence_lang)
     # Also try multi-file extraction
-    multi_files = extract_multi_file(final_response)
     if not code and not multi_files:
         yield json.dumps({

     extract_multi_file,
     is_gradio_code,
     normalize_language,
+    strip_thinking_blocks,
 )
 from code.execution.gradio_runner import run_gradio_app, stop_gradio_app
 from code.execution.python_runner import run_python
     final_response = ""
     for partial in call_model(messages):
         final_response = partial
+        # Strip thinking blocks so chat only shows clean output
+        clean_partial = strip_thinking_blocks(partial)
+        history[-1]["content"] = clean_partial
         yield json.dumps({
             "type": "streaming",
             "status_text": "Generating...",
         })
         return
+    # Extract code from response (use original with thinking for extraction,
+    # but chat history already has clean version)
+    clean_response = strip_thinking_blocks(final_response)
+    code, fence_lang = extract_code(clean_response)
     target = normalize_language(target_language, fence_lang)
     # Also try multi-file extraction
+    multi_files = extract_multi_file(clean_response)
     if not code and not multi_files:
         yield json.dumps({