import gradio as gr from llama_cpp import Llama # Ganti dengan path model GGUF (download dulu ke Space atau pakai hf:// link) MODEL_PATH = "DeepSeek-V3.1-Chat-Q4_K_M.gguf" # Load model quantized (ringan untuk CPU 16GB) llm = Llama( model_path=MODEL_PATH, n_ctx=2048, n_threads=4 ) def respond(message, history): prompt = "" for user, bot in history: prompt += f"User: {user}\nAssistant: {bot}\n" prompt += f"User: {message}\nAssistant:" output = llm(prompt, max_tokens=512, temperature=0.7, top_p=0.9) response = output["choices"][0]["text"].strip() return response with gr.Blocks() as demo: gr.Markdown("# 🤖 DeepSeek V3.1 Chatbot (Quantized, CPU)") chatbot = gr.Chatbot() msg = gr.Textbox(placeholder="Tulis pesan di sini...") clear = gr.Button("Clear") def user_input(message, history): response = respond(message, history) history.append((message, response)) return "", history msg.submit(user_input, [msg, chatbot], [msg, chatbot]) clear.click(lambda: None, None, chatbot, queue=False) demo.launch()