File size: 1,333 Bytes
fa10521
f632d29
fa10521
f632d29
 
 
 
 
 
 
 
 
 
 
 
 
880a66e
f632d29
 
 
fa10521
 
f632d29
 
fa10521
 
f632d29
880a66e
f632d29
 
 
 
880a66e
fa10521
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, pipeline

model_id = "deepseek-ai/DeepSeek-R1"

# Load & patch config
config = AutoConfig.from_pretrained(model_id, trust_remote_code=True)
if hasattr(config, "quantization_config"):
    delattr(config, "quantization_config")

# Load model on CPU
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    config=config,
    trust_remote_code=True,
    device_map={"": "cpu"}
)
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)

pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=-1)

def generate(prompt, max_new_tokens=256, temperature=0.7, top_p=0.95):
    out = pipe(prompt, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p)
    return out[0]["generated_text"]

with gr.Blocks() as demo:
    gr.Markdown("## 🚀 DeepSeek‑R1 on CPU (no quantization)")
    prompt = gr.Textbox(label="Prompt")
    max_tokens = gr.Slider(64, 1024, 256, 16, label="Max new tokens")
    temperature = gr.Slider(0.1, 1.5, 0.7, 0.1, label="Temperature")
    top_p = gr.Slider(0.1, 1.0, 0.95, 0.05, label="Top‑p")
    output = gr.Textbox(label="Output")
    demo.Button("Generate").click(fn=generate, inputs=[prompt, max_tokens, temperature, top_p], outputs=output)
demo.launch()