Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from llama_cpp import Llama | |
| from huggingface_hub import hf_hub_download | |
| import config | |
| import multiprocessing | |
| print("Downloading model...") | |
| model_path = hf_hub_download( | |
| repo_id=config.MODEL_REPO, | |
| filename=config.MODEL_FILE | |
| ) | |
| print("Loading model...") | |
| cpu_threads = multiprocessing.cpu_count() | |
| llm = Llama( | |
| model_path=model_path, | |
| n_ctx=config.CTX_SIZE, | |
| n_threads=cpu_threads, | |
| n_batch=512, | |
| use_mmap=True, | |
| use_mlock=False, | |
| verbose=False | |
| ) | |
| SYSTEM_PROMPT = """You are DeepSeek Coder, an expert programming assistant. | |
| You write clean, correct, efficient code. | |
| Always return only code unless explanation is requested. | |
| """ | |
| def format_prompt(message, history): | |
| prompt = SYSTEM_PROMPT + "\n\n" | |
| for user, assistant in history: | |
| prompt += f"User: {user}\nAssistant: {assistant}\n" | |
| prompt += f"User: {message}\nAssistant:" | |
| return prompt | |
| def generate(message, history): | |
| prompt = format_prompt(message, history) | |
| output = "" | |
| for token in llm( | |
| prompt, | |
| max_tokens=config.MAX_TOKENS, | |
| temperature=config.TEMPERATURE, | |
| stream=True | |
| ): | |
| text = token["choices"][0]["text"] | |
| output += text | |
| yield output | |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("# DeepSeek Coder 1.3B (Production GGUF)") | |
| chatbot = gr.Chatbot(height=500) | |
| msg = gr.Textbox( | |
| placeholder="Ask coding question...", | |
| container=False | |
| ) | |
| clear = gr.Button("Clear") | |
| def user(user_message, history): | |
| return "", history + [[user_message, ""]] | |
| def bot(history): | |
| user_message = history[-1][0] | |
| for response in generate(user_message, history[:-1]): | |
| history[-1][1] = response | |
| yield history | |
| msg.submit(user, [msg, chatbot], [msg, chatbot], queue=True).then( | |
| bot, chatbot, chatbot | |
| ) | |
| clear.click(lambda: [], None, chatbot, queue=False) | |
| demo.queue() | |
| demo.launch(server_name="0.0.0.0", server_port=7860) |