Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, pipeline | |
| model_id = "deepseek-ai/DeepSeek-R1" | |
| # Load & patch config | |
| config = AutoConfig.from_pretrained(model_id, trust_remote_code=True) | |
| if hasattr(config, "quantization_config"): | |
| delattr(config, "quantization_config") | |
| # Load model on CPU | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_id, | |
| config=config, | |
| trust_remote_code=True, | |
| device_map={"": "cpu"} | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) | |
| pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=-1) | |
| def generate(prompt, max_new_tokens=256, temperature=0.7, top_p=0.95): | |
| out = pipe(prompt, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p) | |
| return out[0]["generated_text"] | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## 🚀 DeepSeek‑R1 on CPU (no quantization)") | |
| prompt = gr.Textbox(label="Prompt") | |
| max_tokens = gr.Slider(64, 1024, 256, 16, label="Max new tokens") | |
| temperature = gr.Slider(0.1, 1.5, 0.7, 0.1, label="Temperature") | |
| top_p = gr.Slider(0.1, 1.0, 0.95, 0.05, label="Top‑p") | |
| output = gr.Textbox(label="Output") | |
| demo.Button("Generate").click(fn=generate, inputs=[prompt, max_tokens, temperature, top_p], outputs=output) | |
| demo.launch() | |