| | import gradio as gr |
| | from llama_cpp import Llama |
| |
|
| | |
| | MODEL_PATH = "DeepSeek-V3.1-Chat-Q4_K_M.gguf" |
| |
|
| | |
| | llm = Llama( |
| | model_path=MODEL_PATH, |
| | n_ctx=2048, |
| | n_threads=4 |
| | ) |
| |
|
| | def respond(message, history): |
| | prompt = "" |
| | for user, bot in history: |
| | prompt += f"User: {user}\nAssistant: {bot}\n" |
| | prompt += f"User: {message}\nAssistant:" |
| |
|
| | output = llm(prompt, max_tokens=512, temperature=0.7, top_p=0.9) |
| | response = output["choices"][0]["text"].strip() |
| | return response |
| |
|
| | with gr.Blocks() as demo: |
| | gr.Markdown("# 🤖 DeepSeek V3.1 Chatbot (Quantized, CPU)") |
| | chatbot = gr.Chatbot() |
| | msg = gr.Textbox(placeholder="Tulis pesan di sini...") |
| | clear = gr.Button("Clear") |
| |
|
| | def user_input(message, history): |
| | response = respond(message, history) |
| | history.append((message, response)) |
| | return "", history |
| |
|
| | msg.submit(user_input, [msg, chatbot], [msg, chatbot]) |
| | clear.click(lambda: None, None, chatbot, queue=False) |
| |
|
| | demo.launch() |