import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, pipeline model_id = "deepseek-ai/DeepSeek-R1" # Load & patch config config = AutoConfig.from_pretrained(model_id, trust_remote_code=True) if hasattr(config, "quantization_config"): delattr(config, "quantization_config") # Load model on CPU model = AutoModelForCausalLM.from_pretrained( model_id, config=config, trust_remote_code=True, device_map={"": "cpu"} ) tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=-1) def generate(prompt, max_new_tokens=256, temperature=0.7, top_p=0.95): out = pipe(prompt, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p) return out[0]["generated_text"] with gr.Blocks() as demo: gr.Markdown("## 🚀 DeepSeek‑R1 on CPU (no quantization)") prompt = gr.Textbox(label="Prompt") max_tokens = gr.Slider(64, 1024, 256, 16, label="Max new tokens") temperature = gr.Slider(0.1, 1.5, 0.7, 0.1, label="Temperature") top_p = gr.Slider(0.1, 1.0, 0.95, 0.05, label="Top‑p") output = gr.Textbox(label="Output") demo.Button("Generate").click(fn=generate, inputs=[prompt, max_tokens, temperature, top_p], outputs=output) demo.launch()