Spaces:

Paul1966-2
/

python-dev-assistant

Runtime error

App Files Files Community

Paul1966-2 commited on 29 days ago

Commit

44d74c1

verified ·

1 Parent(s): 9f9e362

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -28

app.py CHANGED Viewed

@@ -1,67 +1,67 @@
 import os
 import gradio as gr
 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
-# 🔧 CONFIGURATION: Change these to swap models
 MODEL_REPO = "bartowski/Qwen2.5-Coder-1.5B-Instruct-GGUF"
 MODEL_FILE = "Qwen2.5-Coder-1.5B-Instruct-Q4_K_M.gguf"
-# For 7B: MODEL_REPO = "bartowski/Qwen2.5-Coder-7B-Instruct-GGUF"
-#          MODEL_FILE = "Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf"
-# 1️⃣ Download model on first boot (cached automatically)
-print(f"⬇️ Downloading {MODEL_FILE} from {MODEL_REPO}...")
 model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
-# 2️⃣ Initialize CPU-optimized LLM
 llm = Llama(
     model_path=model_path,
-    n_ctx=4096,          # Max context window
-    n_threads=2,         # Matches HF free tier vCPU count
     n_batch=512,
     verbose=False,
-    use_mlock=True       # Keep model in RAM (prevents swapping)
 )
-# 3️⃣ Generation function
 def generate_python_code(user_prompt):
-    system_prompt = (
-        "You are an expert Python developer. Write clean, efficient, PEP-8 compliant code. "
-        "Include type hints, docstrings, and error handling where appropriate. "
-        "Output only the code block unless explicitly asked for explanations."
-    )
     messages = [
-        {"role": "system", "content": system_prompt},
         {"role": "user", "content": user_prompt}
     ]
     output = llm.create_chat_completion(
         messages=messages,
         max_tokens=1024,
-        temperature=0.2,      # Low for deterministic code
         top_p=0.9,
         repeat_penalty=1.1,
-        stop=["</s>", "```"]  # Prevent runaway generation
     )
     return output["choices"][0]["message"]["content"]
 # 4️⃣ Gradio UI
 demo = gr.Interface(
     fn=generate_python_code,
-    inputs=gr.Textbox(
-        lines=4,
-        placeholder="e.g., Write an async function to fetch JSON from a URL, retry 3 times on failure, and parse specific fields...",
-        label="Python Task"
-    ),
-    outputs=gr.Code(language="python", label="Generated Code"),
     title="🐍 Python Dev Assistant",
-    description=f"Running `{MODEL_FILE}` on HF Free CPU Tier. First load takes ~60s.",
     examples=[
-        ["Write a FastAPI route that accepts a CSV file and returns summary statistics"],
-        ["Refactor this list comprehension into a more readable loop with logging: `results = [x**2 for x in data if x > 0]`"],
-        ["Create a Pydantic model for a user profile with email validation and a custom validator for age > 18"]
     ]
 )

 import os
+import time
 import gradio as gr
 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
+# 🔧 CONFIGURATION
 MODEL_REPO = "bartowski/Qwen2.5-Coder-1.5B-Instruct-GGUF"
 MODEL_FILE = "Qwen2.5-Coder-1.5B-Instruct-Q4_K_M.gguf"
+print("⏳ Starting Python Dev Assistant Space...")
+START_TIME = time.time()
+# 1️⃣ Download (only happens on first boot or cache miss)
+print(f"📦 Checking cache for {MODEL_FILE}...")
 model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
+print(f"✅ Model cached at: {model_path}")
+# 2️⃣ Load into RAM (runs ONCE per Space startup)
+print("🧠 Loading model into memory...")
 llm = Llama(
     model_path=model_path,
+    n_ctx=4096,
+    n_threads=2,
     n_batch=512,
     verbose=False,
+    use_mlock=True
 )
+LOAD_TIME = round(time.time() - START_TIME, 1)
+print(f"🚀 Model loaded in {LOAD_TIME}s. Ready for prompts!")
+# 3️⃣ Generation function (reuses `llm` every time)
 def generate_python_code(user_prompt):
+    inference_start = time.time()
+    print(f"🔹 Processing prompt at {time.strftime('%H:%M:%S')}")
     messages = [
+        {"role": "system", "content": "You are an expert Python developer. Write clean, PEP-8 compliant code with type hints. Output only code unless asked otherwise."},
         {"role": "user", "content": user_prompt}
     ]
     output = llm.create_chat_completion(
         messages=messages,
         max_tokens=1024,
+        temperature=0.2,
         top_p=0.9,
         repeat_penalty=1.1,
+        stop=["</s>", "```"]
     )
+    inference_time = round(time.time() - inference_start, 2)
+    print(f"✅ Done in {inference_time}s")
     return output["choices"][0]["message"]["content"]
 # 4️⃣ Gradio UI
 demo = gr.Interface(
     fn=generate_python_code,
+    inputs=gr.Textbox(lines=4, placeholder="Describe your Python task..."),
+    outputs=gr.Code(language="python"),
     title="🐍 Python Dev Assistant",
+    description=f"Loaded `{MODEL_FILE}` in {LOAD_TIME}s. Model stays in RAM between prompts.",
     examples=[
+        ["Write a Pydantic v2 model for a User with email validation"],
+        ["Create an async retry wrapper for HTTP requests using aiohttp"]
     ]
 )