Wenye He commited on
Commit
ec86a60
·
verified ·
1 Parent(s): 9d099a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -7
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
3
  import torch
 
4
 
5
  MODEL_CONFIG = {
6
  "phi-3": {
@@ -10,9 +11,7 @@ MODEL_CONFIG = {
10
  "llama3-8b": {
11
  "model_name": "NousResearch/Meta-Llama-3-8B-Instruct",
12
  "template": """<|begin_of_text|><|start_header_id|>user<|end_header_id|>
13
-
14
  {message}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
15
-
16
  """
17
  }
18
  }
@@ -41,13 +40,13 @@ class ChatModel:
41
  quantization_config=bnb_config,
42
  device_map="auto",
43
  torch_dtype=torch.float16,
44
- trust_remote_code=False
45
  )
46
 
47
  self.models[model_name] = model
48
  self.tokenizers[model_name] = tokenizer
49
 
50
  def generate(self, message, model_name, history):
 
51
  self.load_model(model_name)
52
  config = MODEL_CONFIG[model_name]
53
 
@@ -68,19 +67,26 @@ class ChatModel:
68
  )
69
 
70
  response = pipe(prompt)[0]['generated_text']
71
- return response.strip()
 
 
 
 
 
 
72
 
73
  model_handler = ChatModel()
74
 
75
  def chat(message, history, model_choice):
76
  try:
77
- response = model_handler.generate(message, model_choice, history)
78
- return [(message, response)]
 
79
  except Exception as e:
80
  return [(message, f"Error: {str(e)}")]
81
 
82
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
83
- gr.Markdown("# 🚀 Phi-3 vs Llama-3 Chatbot")
84
  with gr.Row():
85
  model_choice = gr.Dropdown(
86
  choices=["phi-3", "llama3-8b"],
 
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
3
  import torch
4
+ import time # Added for timing
5
 
6
  MODEL_CONFIG = {
7
  "phi-3": {
 
11
  "llama3-8b": {
12
  "model_name": "NousResearch/Meta-Llama-3-8B-Instruct",
13
  "template": """<|begin_of_text|><|start_header_id|>user<|end_header_id|>
 
14
  {message}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 
15
  """
16
  }
17
  }
 
40
  quantization_config=bnb_config,
41
  device_map="auto",
42
  torch_dtype=torch.float16,
 
43
  )
44
 
45
  self.models[model_name] = model
46
  self.tokenizers[model_name] = tokenizer
47
 
48
  def generate(self, message, model_name, history):
49
+ start_time = time.time() # Start timing
50
  self.load_model(model_name)
51
  config = MODEL_CONFIG[model_name]
52
 
 
67
  )
68
 
69
  response = pipe(prompt)[0]['generated_text']
70
+
71
+ # Calculate metrics
72
+ elapsed_time = time.time() - start_time
73
+ tokens = len(self.tokenizers[model_name].encode(response))
74
+ tokens_per_sec = tokens / elapsed_time if elapsed_time > 0 else 0
75
+
76
+ return response, elapsed_time, tokens_per_sec
77
 
78
  model_handler = ChatModel()
79
 
80
  def chat(message, history, model_choice):
81
  try:
82
+ response, response_time, token_speed = model_handler.generate(message, model_choice, history)
83
+ formatted_response = f"{response}\n\n⏱️ Response Time: {response_time:.2f}s | 🚀 Speed: {token_speed:.2f} tokens/s"
84
+ return [(message, formatted_response)]
85
  except Exception as e:
86
  return [(message, f"Error: {str(e)}")]
87
 
88
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
89
+ gr.Markdown("# 🚀 LLM Chatbot with Performance Metrics")
90
  with gr.Row():
91
  model_choice = gr.Dropdown(
92
  choices=["phi-3", "llama3-8b"],