Rajiv4Code commited on
Commit
91014aa
·
verified ·
1 Parent(s): 7ddd5cc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -32
app.py CHANGED
@@ -2,7 +2,7 @@ import torch
2
  import gradio as gr
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
 
5
- MODEL_NAME = "ibm-granite/granite-3.3-2b-instruct"
6
 
7
  # Load tokenizer and model
8
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
@@ -10,48 +10,37 @@ model = AutoModelForCausalLM.from_pretrained(
10
  MODEL_NAME,
11
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
12
  )
13
- model.to("cuda" if torch.cuda.is_available() else "cpu")
14
- model.eval()
15
-
16
-
17
- def chat(user_input, history):
18
- messages = []
19
 
20
- # Convert chat history to Granite chat format
21
- for user, assistant in history:
22
- messages.append({"role": "user", "content": user})
23
- messages.append({"role": "assistant", "content": assistant})
24
 
25
- messages.append({"role": "user", "content": user_input})
26
 
27
- inputs = tokenizer.apply_chat_template(
28
- messages,
29
- add_generation_prompt=True,
30
- tokenize=True,
31
- return_dict=True,
32
- return_tensors="pt",
33
- ).to(model.device)
34
 
35
  with torch.no_grad():
36
  outputs = model.generate(
37
  **inputs,
38
- max_new_tokens=100,
39
  do_sample=True,
40
- temperature=0.7,
41
  top_p=0.9,
42
  )
43
 
44
- response = tokenizer.decode(
45
- outputs[0][inputs["input_ids"].shape[-1]:],
46
- skip_special_tokens=True,
47
- )
48
 
49
- history.append((user_input, response))
50
- return history, history
51
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
- gr.ChatInterface(
54
- fn=chat,
55
- title="IBM Granite 3.3 2B Instruct",
56
- description="Chat with IBM Granite using Hugging Face Transformers",
57
- ).launch()
 
2
  import gradio as gr
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
 
5
+ MODEL_NAME = "ibm-granite/granite-3.0-2b-base"
6
 
7
  # Load tokenizer and model
8
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 
10
  MODEL_NAME,
11
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
12
  )
 
 
 
 
 
 
13
 
14
+ device = "cuda" if torch.cuda.is_available() else "cpu"
15
+ model.to(device)
16
+ model.eval()
 
17
 
 
18
 
19
+ def generate_text(prompt, max_new_tokens=100, temperature=0.7):
20
+ inputs = tokenizer(prompt, return_tensors="pt").to(device)
 
 
 
 
 
21
 
22
  with torch.no_grad():
23
  outputs = model.generate(
24
  **inputs,
25
+ max_new_tokens=max_new_tokens,
26
  do_sample=True,
27
+ temperature=temperature,
28
  top_p=0.9,
29
  )
30
 
31
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
 
32
 
 
 
33
 
34
+ demo = gr.Interface(
35
+ fn=generate_text,
36
+ inputs=[
37
+ gr.Textbox(lines=5, label="Input Prompt"),
38
+ gr.Slider(10, 300, value=100, step=10, label="Max New Tokens"),
39
+ gr.Slider(0.1, 1.5, value=0.7, step=0.1, label="Temperature"),
40
+ ],
41
+ outputs=gr.Textbox(lines=10, label="Generated Output"),
42
+ title="IBM Granite 3.0 – 2B Base",
43
+ description="Text generation using IBM Granite 3.0 2B Base model",
44
+ )
45
 
46
+ demo.launch()