Coder / app.py
Anonymous0045's picture
Create app.py
2c0bdd9 verified
raw
history blame
2.03 kB
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import config
import multiprocessing
print("Downloading model...")
model_path = hf_hub_download(
repo_id=config.MODEL_REPO,
filename=config.MODEL_FILE
)
print("Loading model...")
cpu_threads = multiprocessing.cpu_count()
llm = Llama(
model_path=model_path,
n_ctx=config.CTX_SIZE,
n_threads=cpu_threads,
n_batch=512,
use_mmap=True,
use_mlock=False,
verbose=False
)
SYSTEM_PROMPT = """You are DeepSeek Coder, an expert programming assistant.
You write clean, correct, efficient code.
Always return only code unless explanation is requested.
"""
def format_prompt(message, history):
prompt = SYSTEM_PROMPT + "\n\n"
for user, assistant in history:
prompt += f"User: {user}\nAssistant: {assistant}\n"
prompt += f"User: {message}\nAssistant:"
return prompt
def generate(message, history):
prompt = format_prompt(message, history)
output = ""
for token in llm(
prompt,
max_tokens=config.MAX_TOKENS,
temperature=config.TEMPERATURE,
stream=True
):
text = token["choices"][0]["text"]
output += text
yield output
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# DeepSeek Coder 1.3B (Production GGUF)")
chatbot = gr.Chatbot(height=500)
msg = gr.Textbox(
placeholder="Ask coding question...",
container=False
)
clear = gr.Button("Clear")
def user(user_message, history):
return "", history + [[user_message, ""]]
def bot(history):
user_message = history[-1][0]
for response in generate(user_message, history[:-1]):
history[-1][1] = response
yield history
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=True).then(
bot, chatbot, chatbot
)
clear.click(lambda: [], None, chatbot, queue=False)
demo.queue()
demo.launch(server_name="0.0.0.0", server_port=7860)