chat / app.py
7nglzz's picture
wowww
f632d29
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, pipeline
model_id = "deepseek-ai/DeepSeek-R1"
# Load & patch config
config = AutoConfig.from_pretrained(model_id, trust_remote_code=True)
if hasattr(config, "quantization_config"):
delattr(config, "quantization_config")
# Load model on CPU
model = AutoModelForCausalLM.from_pretrained(
model_id,
config=config,
trust_remote_code=True,
device_map={"": "cpu"}
)
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=-1)
def generate(prompt, max_new_tokens=256, temperature=0.7, top_p=0.95):
out = pipe(prompt, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p)
return out[0]["generated_text"]
with gr.Blocks() as demo:
gr.Markdown("## 🚀 DeepSeek‑R1 on CPU (no quantization)")
prompt = gr.Textbox(label="Prompt")
max_tokens = gr.Slider(64, 1024, 256, 16, label="Max new tokens")
temperature = gr.Slider(0.1, 1.5, 0.7, 0.1, label="Temperature")
top_p = gr.Slider(0.1, 1.0, 0.95, 0.05, label="Top‑p")
output = gr.Textbox(label="Output")
demo.Button("Generate").click(fn=generate, inputs=[prompt, max_tokens, temperature, top_p], outputs=output)
demo.launch()