Spaces:

Paul1966-2
/

python-dev-assistant

Runtime error

App Files Files Community

python-dev-assistant / app.py

Paul1966-2

Update app.py

44d74c1 verified 29 days ago

Raw

History Blame Contribute Delete

2.24 kB

	import os
	import time
	import gradio as gr
	from huggingface_hub import hf_hub_download
	from llama_cpp import Llama

	# 🔧 CONFIGURATION
	MODEL_REPO = "bartowski/Qwen2.5-Coder-1.5B-Instruct-GGUF"
	MODEL_FILE = "Qwen2.5-Coder-1.5B-Instruct-Q4_K_M.gguf"

	print("⏳ Starting Python Dev Assistant Space...")
	START_TIME = time.time()

	# 1️⃣ Download (only happens on first boot or cache miss)
	print(f"📦 Checking cache for {MODEL_FILE}...")
	model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
	print(f"✅ Model cached at: {model_path}")

	# 2️⃣ Load into RAM (runs ONCE per Space startup)
	print("🧠 Loading model into memory...")
	llm = Llama(
	model_path=model_path,
	n_ctx=4096,
	n_threads=2,
	n_batch=512,
	verbose=False,
	use_mlock=True
	)
	LOAD_TIME = round(time.time() - START_TIME, 1)
	print(f"🚀 Model loaded in {LOAD_TIME}s. Ready for prompts!")

	# 3️⃣ Generation function (reuses `llm` every time)
	def generate_python_code(user_prompt):
	inference_start = time.time()
	print(f"🔹 Processing prompt at {time.strftime('%H:%M:%S')}")

	messages = [
	{"role": "system", "content": "You are an expert Python developer. Write clean, PEP-8 compliant code with type hints. Output only code unless asked otherwise."},
	{"role": "user", "content": user_prompt}
	]

	output = llm.create_chat_completion(
	messages=messages,
	max_tokens=1024,
	temperature=0.2,
	top_p=0.9,
	repeat_penalty=1.1,
	stop=["</s>", "```"]
	)

	inference_time = round(time.time() - inference_start, 2)
	print(f"✅ Done in {inference_time}s")
	return output["choices"][0]["message"]["content"]

	# 4️⃣ Gradio UI
	demo = gr.Interface(
	fn=generate_python_code,
	inputs=gr.Textbox(lines=4, placeholder="Describe your Python task..."),
	outputs=gr.Code(language="python"),
	title="🐍 Python Dev Assistant",
	description=f"Loaded `{MODEL_FILE}` in {LOAD_TIME}s. Model stays in RAM between prompts.",
	examples=[
	["Write a Pydantic v2 model for a User with email validation"],
	["Create an async retry wrapper for HTTP requests using aiohttp"]
	]
	)

	if __name__ == "__main__":
	demo.launch()