prakhardoneria commited on
Commit
d9fd894
·
verified ·
1 Parent(s): 129cbeb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -25
app.py CHANGED
@@ -1,34 +1,16 @@
 
1
  import torch
2
- from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
3
  import gradio as gr
4
 
5
- model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
6
 
7
  tokenizer = AutoTokenizer.from_pretrained(model_id)
8
- model = AutoModelForCausalLM.from_pretrained(
9
- model_id,
10
- torch_dtype=torch.float32 # use float32 for CPU compatibility
11
- ).to("cpu")
12
-
13
- streamer = TextStreamer(tokenizer, skip_prompt=True)
14
 
15
  def chat(message, history):
16
- prompt = ""
17
- for user, bot in history:
18
- prompt += f"<|user|>\n{user.strip()}\n<|assistant|>\n{bot.strip()}\n"
19
- prompt += f"<|user|>\n{message.strip()}\n<|assistant|>\n"
20
-
21
  inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
22
- outputs = model.generate(
23
- **inputs,
24
- max_new_tokens=256,
25
- temperature=0.7,
26
- top_p=0.9,
27
- do_sample=True,
28
- pad_token_id=tokenizer.eos_token_id
29
- )
30
- text = tokenizer.decode(outputs[0], skip_special_tokens=True)
31
- reply = text.split("<|assistant|>")[-1].strip()
32
- return reply
33
 
34
- gr.ChatInterface(chat, title="TinyLlama Chat").launch()
 
1
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
2
  import torch
 
3
  import gradio as gr
4
 
5
+ model_id = "google/flan-t5-small"
6
 
7
  tokenizer = AutoTokenizer.from_pretrained(model_id)
8
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_id).to("cpu")
 
 
 
 
 
9
 
10
  def chat(message, history):
11
+ prompt = f"Answer: {message}"
 
 
 
 
12
  inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
13
+ outputs = model.generate(**inputs, max_new_tokens=100)
14
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
 
 
 
 
 
 
 
15
 
16
+ gr.ChatInterface(chat, title="Flan-T5 Chat (Small)").launch()