Rajiv4Code commited on
Commit
7ddd5cc
·
verified ·
1 Parent(s): d49a9b1

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -0
app.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import gradio as gr
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
+
5
+ MODEL_NAME = "ibm-granite/granite-3.3-2b-instruct"
6
+
7
+ # Load tokenizer and model
8
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
9
+ model = AutoModelForCausalLM.from_pretrained(
10
+ MODEL_NAME,
11
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
12
+ )
13
+ model.to("cuda" if torch.cuda.is_available() else "cpu")
14
+ model.eval()
15
+
16
+
17
+ def chat(user_input, history):
18
+ messages = []
19
+
20
+ # Convert chat history to Granite chat format
21
+ for user, assistant in history:
22
+ messages.append({"role": "user", "content": user})
23
+ messages.append({"role": "assistant", "content": assistant})
24
+
25
+ messages.append({"role": "user", "content": user_input})
26
+
27
+ inputs = tokenizer.apply_chat_template(
28
+ messages,
29
+ add_generation_prompt=True,
30
+ tokenize=True,
31
+ return_dict=True,
32
+ return_tensors="pt",
33
+ ).to(model.device)
34
+
35
+ with torch.no_grad():
36
+ outputs = model.generate(
37
+ **inputs,
38
+ max_new_tokens=100,
39
+ do_sample=True,
40
+ temperature=0.7,
41
+ top_p=0.9,
42
+ )
43
+
44
+ response = tokenizer.decode(
45
+ outputs[0][inputs["input_ids"].shape[-1]:],
46
+ skip_special_tokens=True,
47
+ )
48
+
49
+ history.append((user_input, response))
50
+ return history, history
51
+
52
+
53
+ gr.ChatInterface(
54
+ fn=chat,
55
+ title="IBM Granite 3.3 2B Instruct",
56
+ description="Chat with IBM Granite using Hugging Face Transformers",
57
+ ).launch()