Trigger82 commited on
Commit
8e2859c
·
verified ·
1 Parent(s): d945e11

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -2
app.py CHANGED
@@ -3,8 +3,10 @@ import torch
3
  import gradio as gr
4
 
5
  model_id = "microsoft/phi-2"
 
 
6
  tokenizer = AutoTokenizer.from_pretrained(model_id)
7
- model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
8
 
9
  def chat(message):
10
  prompt = f"""
@@ -18,9 +20,11 @@ Avoid saying "as an AI" or sounding fake. Be real. Be humanlike. Be 𝕴 𝖆
18
 
19
  Now respond naturally to this message: {message}
20
  """
21
- inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
 
22
  outputs = model.generate(**inputs, max_new_tokens=200)
23
  result = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
24
  return result.split("Now respond naturally to this message:")[-1].strip()
25
 
26
  iface = gr.Interface(fn=chat, inputs="text", outputs="text")
 
3
  import gradio as gr
4
 
5
  model_id = "microsoft/phi-2"
6
+
7
+ # Load tokenizer and model on CPU
8
  tokenizer = AutoTokenizer.from_pretrained(model_id)
9
+ model = AutoModelForCausalLM.from_pretrained(model_id)
10
 
11
  def chat(message):
12
  prompt = f"""
 
20
 
21
  Now respond naturally to this message: {message}
22
  """
23
+ inputs = tokenizer(prompt, return_tensors="pt")
24
+ inputs = {k: v.to("cpu") for k, v in inputs.items()} # make sure inputs are on CPU
25
  outputs = model.generate(**inputs, max_new_tokens=200)
26
  result = tokenizer.decode(outputs[0], skip_special_tokens=True)
27
+ # Return only the AI response part, removing prompt
28
  return result.split("Now respond naturally to this message:")[-1].strip()
29
 
30
  iface = gr.Interface(fn=chat, inputs="text", outputs="text")