import gradio as gr from llama_cpp import Llama import os # Path is now local because we downloaded it in Dockerfile model_path = "/app/coding-agent-qwen-sft-v3-GGUF.q4_k_m.gguf" print(f"Checking for model at {model_path}...") if not os.path.exists(model_path): print("MODEL NOT FOUND!") llm = Llama(model_path=model_path, n_ctx=2048) def generate(prompt): output = llm( f"<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n", max_tokens=1024, stop=["<|im_end|>"], repeat_penalty=1.2, temperature=0.4 ) return output['choices'][0]['text'] gr.ChatInterface(generate).launch(server_name="0.0.0.0", server_port=7860)