Spaces:
Runtime error
Runtime error
| import os | |
| import time | |
| import gradio as gr | |
| from huggingface_hub import hf_hub_download | |
| from llama_cpp import Llama | |
| # π§ CONFIGURATION | |
| MODEL_REPO = "bartowski/Qwen2.5-Coder-1.5B-Instruct-GGUF" | |
| MODEL_FILE = "Qwen2.5-Coder-1.5B-Instruct-Q4_K_M.gguf" | |
| print("β³ Starting Python Dev Assistant Space...") | |
| START_TIME = time.time() | |
| # 1οΈβ£ Download (only happens on first boot or cache miss) | |
| print(f"π¦ Checking cache for {MODEL_FILE}...") | |
| model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE) | |
| print(f"β Model cached at: {model_path}") | |
| # 2οΈβ£ Load into RAM (runs ONCE per Space startup) | |
| print("π§ Loading model into memory...") | |
| llm = Llama( | |
| model_path=model_path, | |
| n_ctx=4096, | |
| n_threads=2, | |
| n_batch=512, | |
| verbose=False, | |
| use_mlock=True | |
| ) | |
| LOAD_TIME = round(time.time() - START_TIME, 1) | |
| print(f"π Model loaded in {LOAD_TIME}s. Ready for prompts!") | |
| # 3οΈβ£ Generation function (reuses `llm` every time) | |
| def generate_python_code(user_prompt): | |
| inference_start = time.time() | |
| print(f"πΉ Processing prompt at {time.strftime('%H:%M:%S')}") | |
| messages = [ | |
| {"role": "system", "content": "You are an expert Python developer. Write clean, PEP-8 compliant code with type hints. Output only code unless asked otherwise."}, | |
| {"role": "user", "content": user_prompt} | |
| ] | |
| output = llm.create_chat_completion( | |
| messages=messages, | |
| max_tokens=1024, | |
| temperature=0.2, | |
| top_p=0.9, | |
| repeat_penalty=1.1, | |
| stop=["</s>", "```"] | |
| ) | |
| inference_time = round(time.time() - inference_start, 2) | |
| print(f"β Done in {inference_time}s") | |
| return output["choices"][0]["message"]["content"] | |
| # 4οΈβ£ Gradio UI | |
| demo = gr.Interface( | |
| fn=generate_python_code, | |
| inputs=gr.Textbox(lines=4, placeholder="Describe your Python task..."), | |
| outputs=gr.Code(language="python"), | |
| title="π Python Dev Assistant", | |
| description=f"Loaded `{MODEL_FILE}` in {LOAD_TIME}s. Model stays in RAM between prompts.", | |
| examples=[ | |
| ["Write a Pydantic v2 model for a User with email validation"], | |
| ["Create an async retry wrapper for HTTP requests using aiohttp"] | |
| ] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |