Spaces:
Runtime error
Runtime error
File size: 2,242 Bytes
9f9e362 44d74c1 9f9e362 44d74c1 9f9e362 44d74c1 9f9e362 44d74c1 9f9e362 44d74c1 9f9e362 44d74c1 9f9e362 44d74c1 9f9e362 44d74c1 9f9e362 44d74c1 9f9e362 44d74c1 9f9e362 44d74c1 9f9e362 44d74c1 9f9e362 44d74c1 9f9e362 44d74c1 9f9e362 44d74c1 9f9e362 44d74c1 9f9e362 44d74c1 9f9e362 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 | import os
import time
import gradio as gr
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
# π§ CONFIGURATION
MODEL_REPO = "bartowski/Qwen2.5-Coder-1.5B-Instruct-GGUF"
MODEL_FILE = "Qwen2.5-Coder-1.5B-Instruct-Q4_K_M.gguf"
print("β³ Starting Python Dev Assistant Space...")
START_TIME = time.time()
# 1οΈβ£ Download (only happens on first boot or cache miss)
print(f"π¦ Checking cache for {MODEL_FILE}...")
model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
print(f"β
Model cached at: {model_path}")
# 2οΈβ£ Load into RAM (runs ONCE per Space startup)
print("π§ Loading model into memory...")
llm = Llama(
model_path=model_path,
n_ctx=4096,
n_threads=2,
n_batch=512,
verbose=False,
use_mlock=True
)
LOAD_TIME = round(time.time() - START_TIME, 1)
print(f"π Model loaded in {LOAD_TIME}s. Ready for prompts!")
# 3οΈβ£ Generation function (reuses `llm` every time)
def generate_python_code(user_prompt):
inference_start = time.time()
print(f"πΉ Processing prompt at {time.strftime('%H:%M:%S')}")
messages = [
{"role": "system", "content": "You are an expert Python developer. Write clean, PEP-8 compliant code with type hints. Output only code unless asked otherwise."},
{"role": "user", "content": user_prompt}
]
output = llm.create_chat_completion(
messages=messages,
max_tokens=1024,
temperature=0.2,
top_p=0.9,
repeat_penalty=1.1,
stop=["</s>", "```"]
)
inference_time = round(time.time() - inference_start, 2)
print(f"β
Done in {inference_time}s")
return output["choices"][0]["message"]["content"]
# 4οΈβ£ Gradio UI
demo = gr.Interface(
fn=generate_python_code,
inputs=gr.Textbox(lines=4, placeholder="Describe your Python task..."),
outputs=gr.Code(language="python"),
title="π Python Dev Assistant",
description=f"Loaded `{MODEL_FILE}` in {LOAD_TIME}s. Model stays in RAM between prompts.",
examples=[
["Write a Pydantic v2 model for a User with email validation"],
["Create an async retry wrapper for HTTP requests using aiohttp"]
]
)
if __name__ == "__main__":
demo.launch() |