ar0551 commited on
Commit
0085f71
·
verified ·
1 Parent(s): 8d852ab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -15
app.py CHANGED
@@ -2,34 +2,46 @@ import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
4
 
5
- # Load model and tokenizer
6
  model_name = "deepseek-ai/DeepSeek-V3-0324"
 
 
7
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
8
- model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto", trust_remote_code=True)
 
 
 
 
 
9
 
10
- # Function to generate response
11
  def chat_with_bot(user_input, history):
12
  history = history or []
13
  prompt = ""
14
- for i, (user, bot) in enumerate(history):
15
  prompt += f"<|user|>\n{user}\n<|assistant|>\n{bot}\n"
16
  prompt += f"<|user|>\n{user_input}\n<|assistant|>\n"
17
 
18
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
19
- outputs = model.generate(**inputs, max_new_tokens=512, do_sample=True, temperature=0.7, top_p=0.9, eos_token_id=tokenizer.eos_token_id)
 
 
 
 
 
 
 
 
 
 
20
 
21
- decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
22
- response = decoded_output.split("<|assistant|>\n")[-1].strip()
23
-
24
  history.append((user_input, response))
25
  return response, history
26
 
27
  # Gradio UI
28
- chatbot_ui = gr.ChatInterface(
29
  fn=chat_with_bot,
30
- title="DeepSeek Chatbot",
31
  theme="soft",
32
- examples=["Tell me a joke", "What's the capital of France?", "Explain quantum computing simply."]
33
- )
34
-
35
- chatbot_ui.launch()
 
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
4
 
5
+ # Model name
6
  model_name = "deepseek-ai/DeepSeek-V3-0324"
7
+
8
+ # Load tokenizer and model in float32 for CPU
9
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
10
+ model = AutoModelForCausalLM.from_pretrained(
11
+ model_name,
12
+ trust_remote_code=True,
13
+ torch_dtype=torch.float32, # CPU-compatible precision
14
+ device_map={"": "cpu"} # Force CPU
15
+ )
16
 
17
+ # Chat function
18
  def chat_with_bot(user_input, history):
19
  history = history or []
20
  prompt = ""
21
+ for user, bot in history:
22
  prompt += f"<|user|>\n{user}\n<|assistant|>\n{bot}\n"
23
  prompt += f"<|user|>\n{user_input}\n<|assistant|>\n"
24
 
25
+ inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
26
+ outputs = model.generate(
27
+ **inputs,
28
+ max_new_tokens=256,
29
+ do_sample=True,
30
+ temperature=0.7,
31
+ top_p=0.9,
32
+ eos_token_id=tokenizer.eos_token_id,
33
+ )
34
+
35
+ decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
36
+ response = decoded.split("<|assistant|>\n")[-1].strip()
37
 
 
 
 
38
  history.append((user_input, response))
39
  return response, history
40
 
41
  # Gradio UI
42
+ gr.ChatInterface(
43
  fn=chat_with_bot,
44
+ title="DeepSeek CPU Chatbot",
45
  theme="soft",
46
+ examples=["Tell me a joke", "What's the capital of Italy?", "What is a black hole?"]
47
+ ).launch()