File size: 1,110 Bytes
c3cbe3c
d188887
c3cbe3c
d188887
 
c3cbe3c
d188887
 
 
 
 
c3cbe3c
 
 
 
 
 
 
 
d188887
 
c3cbe3c
 
 
d188887
 
 
c3cbe3c
 
 
 
 
 
 
d188887
 
c3cbe3c
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import gradio as gr
from llama_cpp import Llama

# Ganti dengan path model GGUF (download dulu ke Space atau pakai hf:// link)
MODEL_PATH = "DeepSeek-V3.1-Chat-Q4_K_M.gguf"

# Load model quantized (ringan untuk CPU 16GB)
llm = Llama(
    model_path=MODEL_PATH,
    n_ctx=2048,
    n_threads=4
)

def respond(message, history):
    prompt = ""
    for user, bot in history:
        prompt += f"User: {user}\nAssistant: {bot}\n"
    prompt += f"User: {message}\nAssistant:"

    output = llm(prompt, max_tokens=512, temperature=0.7, top_p=0.9)
    response = output["choices"][0]["text"].strip()
    return response

with gr.Blocks() as demo:
    gr.Markdown("# 🤖 DeepSeek V3.1 Chatbot (Quantized, CPU)")
    chatbot = gr.Chatbot()
    msg = gr.Textbox(placeholder="Tulis pesan di sini...")
    clear = gr.Button("Clear")

    def user_input(message, history):
        response = respond(message, history)
        history.append((message, response))
        return "", history

    msg.submit(user_input, [msg, chatbot], [msg, chatbot])
    clear.click(lambda: None, None, chatbot, queue=False)

demo.launch()