File size: 685 Bytes
3424ab1
 
0b79f85
3424ab1
0b79f85
 
3424ab1
0b79f85
 
 
3424ab1
0b79f85
 
 
3424ab1
 
 
 
 
 
 
 
 
0b79f85
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import gradio as gr
from llama_cpp import Llama
import os

# Path is now local because we downloaded it in Dockerfile
model_path = "/app/coding-agent-qwen-sft-v3-GGUF.q4_k_m.gguf"

print(f"Checking for model at {model_path}...")
if not os.path.exists(model_path):
    print("MODEL NOT FOUND!")

llm = Llama(model_path=model_path, n_ctx=2048)

def generate(prompt):
    output = llm(
        f"<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n",
        max_tokens=1024,
        stop=["<|im_end|>"],
        repeat_penalty=1.2,
        temperature=0.4
    )
    return output['choices'][0]['text']

gr.ChatInterface(generate).launch(server_name="0.0.0.0", server_port=7860)