ar0551 commited on
Commit
edad343
·
verified ·
1 Parent(s): ea8587f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -12
app.py CHANGED
@@ -1,35 +1,44 @@
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
4
- import spaces
5
 
6
- model_id = "tiiuae/falcon-rw-1b"
7
 
8
- # Load tokenizer and model for CPU
9
  tokenizer = AutoTokenizer.from_pretrained(model_id)
10
- model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float32)
 
 
 
11
 
12
- # Chat logic
13
- @spaces.GPU
14
  def chat_with_bot(user_input, history):
15
  history = history or []
16
  prompt = ""
17
  for user, bot in history:
18
- prompt += f"{user}\n{bot}\n"
19
- prompt += f"{user_input}\n"
20
 
21
  inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
22
- outputs = model.generate(**inputs, max_new_tokens=200, do_sample=True)
 
 
 
 
 
 
 
 
23
  decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
24
 
25
- response = decoded[len(prompt):].strip()
26
  history.append((user_input, response))
27
  return response, history
28
 
29
  # Gradio UI
30
  gr.ChatInterface(
31
  fn=chat_with_bot,
32
- title="Chatbot (CPU-Friendly)",
33
  theme="soft",
34
- examples=["What's Falcon?", "Tell me something about space.", "What is time travel?"]
35
  ).launch(share=True)
 
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
 
4
 
5
+ model_id = "microsoft/phi-2"
6
 
7
+ # Load model and tokenizer (CPU + float32)
8
  tokenizer = AutoTokenizer.from_pretrained(model_id)
9
+ model = AutoModelForCausalLM.from_pretrained(
10
+ model_id,
11
+ torch_dtype=torch.float32
12
+ ).to("cpu")
13
 
14
+ # Chat function
 
15
  def chat_with_bot(user_input, history):
16
  history = history or []
17
  prompt = ""
18
  for user, bot in history:
19
+ prompt += f"User: {user}\nAssistant: {bot}\n"
20
+ prompt += f"User: {user_input}\nAssistant:"
21
 
22
  inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
23
+ outputs = model.generate(
24
+ **inputs,
25
+ max_new_tokens=256,
26
+ do_sample=True,
27
+ temperature=0.7,
28
+ top_p=0.95,
29
+ eos_token_id=tokenizer.eos_token_id,
30
+ )
31
+
32
  decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
33
+ response = decoded[len(prompt):].strip().split("\n")[0]
34
 
 
35
  history.append((user_input, response))
36
  return response, history
37
 
38
  # Gradio UI
39
  gr.ChatInterface(
40
  fn=chat_with_bot,
41
+ title="Phi-2 Chatbot (ZeroGPU Safe)",
42
  theme="soft",
43
+ examples=["What is AI?", "Summarize the French Revolution.", "Tell me a space fact."]
44
  ).launch(share=True)