ar0551 commited on
Commit
21ea286
·
verified ·
1 Parent(s): 1d119ed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -25
app.py CHANGED
@@ -2,46 +2,32 @@ import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
4
 
5
- # Model name
6
- model_name = "deepseek-ai/DeepSeek-V3-0324"
7
 
8
- # Load tokenizer and model in float32 for CPU
9
- tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
10
- model = AutoModelForCausalLM.from_pretrained(
11
- model_name,
12
- trust_remote_code=True,
13
- torch_dtype=torch.float32, # CPU-compatible precision
14
- device_map={"": "cpu"} # Force CPU
15
- )
16
 
17
- # Chat function
18
  def chat_with_bot(user_input, history):
19
  history = history or []
20
  prompt = ""
21
  for user, bot in history:
22
- prompt += f"<|user|>\n{user}\n<|assistant|>\n{bot}\n"
23
- prompt += f"<|user|>\n{user_input}\n<|assistant|>\n"
24
 
25
  inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
26
- outputs = model.generate(
27
- **inputs,
28
- max_new_tokens=256,
29
- do_sample=True,
30
- temperature=0.7,
31
- top_p=0.9,
32
- eos_token_id=tokenizer.eos_token_id,
33
- )
34
-
35
  decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
36
- response = decoded.split("<|assistant|>\n")[-1].strip()
37
 
 
38
  history.append((user_input, response))
39
  return response, history
40
 
41
  # Gradio UI
42
  gr.ChatInterface(
43
  fn=chat_with_bot,
44
- title="DeepSeek CPU Chatbot",
45
  theme="soft",
46
- examples=["Tell me a joke", "What's the capital of Italy?", "What is a black hole?"]
47
  ).launch()
 
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
4
 
5
+ model_id = "tiiuae/falcon-rw-1b"
 
6
 
7
+ # Load tokenizer and model for CPU
8
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
9
+ model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float32)
 
 
 
 
 
10
 
11
+ # Chat logic
12
  def chat_with_bot(user_input, history):
13
  history = history or []
14
  prompt = ""
15
  for user, bot in history:
16
+ prompt += f"{user}\n{bot}\n"
17
+ prompt += f"{user_input}\n"
18
 
19
  inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
20
+ outputs = model.generate(**inputs, max_new_tokens=200, do_sample=True)
 
 
 
 
 
 
 
 
21
  decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
22
 
23
+ response = decoded[len(prompt):].strip()
24
  history.append((user_input, response))
25
  return response, history
26
 
27
  # Gradio UI
28
  gr.ChatInterface(
29
  fn=chat_with_bot,
30
+ title="Chatbot (CPU-Friendly)",
31
  theme="soft",
32
+ examples=["What's Falcon?", "Tell me something about space.", "What is time travel?"]
33
  ).launch()