File size: 1,110 Bytes
c3cbe3c d188887 c3cbe3c d188887 c3cbe3c d188887 c3cbe3c d188887 c3cbe3c d188887 c3cbe3c d188887 c3cbe3c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 | import gradio as gr
from llama_cpp import Llama
# Ganti dengan path model GGUF (download dulu ke Space atau pakai hf:// link)
MODEL_PATH = "DeepSeek-V3.1-Chat-Q4_K_M.gguf"
# Load model quantized (ringan untuk CPU 16GB)
llm = Llama(
model_path=MODEL_PATH,
n_ctx=2048,
n_threads=4
)
def respond(message, history):
prompt = ""
for user, bot in history:
prompt += f"User: {user}\nAssistant: {bot}\n"
prompt += f"User: {message}\nAssistant:"
output = llm(prompt, max_tokens=512, temperature=0.7, top_p=0.9)
response = output["choices"][0]["text"].strip()
return response
with gr.Blocks() as demo:
gr.Markdown("# 🤖 DeepSeek V3.1 Chatbot (Quantized, CPU)")
chatbot = gr.Chatbot()
msg = gr.Textbox(placeholder="Tulis pesan di sini...")
clear = gr.Button("Clear")
def user_input(message, history):
response = respond(message, history)
history.append((message, response))
return "", history
msg.submit(user_input, [msg, chatbot], [msg, chatbot])
clear.click(lambda: None, None, chatbot, queue=False)
demo.launch() |