pradeeparul2 commited on
Commit
d9c2248
·
verified ·
1 Parent(s): 27a78ee

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -0
app.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import torch
4
+
5
+ model_name = "Qwen/Qwen2.5-Coder-14B-Instruct"
6
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
7
+ model = AutoModelForCausalLM.from_pretrained(model_name, load_in_4bit=True, device_map="auto")
8
+
9
+ def chat(message, history):
10
+ messages = [{"role": "user", "content": message}]
11
+ text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
12
+ inputs = tokenizer(text, return_tensors="pt").to(model.device)
13
+ with torch.no_grad():
14
+ outputs = model.generate(**inputs, max_new_tokens=512, temperature=0.7)
15
+ response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
16
+ history.append((message, response))
17
+ return history, ""
18
+
19
+ demo = gr.ChatInterface(chat)
20
+ demo.launch()