Wenye He commited on
Commit
1cb71a2
·
verified ·
1 Parent(s): c31bf37

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -5
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
3
  import torch
 
4
 
5
  MODEL_CONFIG = {
6
  "phi-3": {
@@ -48,6 +49,7 @@ class ChatModel:
48
  self.tokenizers[model_name] = tokenizer
49
 
50
  def generate(self, message, model_name, history):
 
51
  self.load_model(model_name)
52
  config = MODEL_CONFIG[model_name]
53
 
@@ -67,20 +69,27 @@ class ChatModel:
67
  return_full_text=False
68
  )
69
 
70
- response = pipe(prompt)[0]['generated_text']
71
- return response.strip()
 
 
 
 
 
 
72
 
73
  model_handler = ChatModel()
74
 
75
  def chat(message, history, model_choice):
76
  try:
77
- response = model_handler.generate(message, model_choice, history)
78
- return [(message, response)]
 
79
  except Exception as e:
80
  return [(message, f"Error: {str(e)}")]
81
 
82
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
83
- gr.Markdown("# 🚀 Phi-3 vs Llama-3 Chatbot")
84
  with gr.Row():
85
  model_choice = gr.Dropdown(
86
  choices=["phi-3", "llama3-8b"],
 
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
3
  import torch
4
+ import time # Added for timing
5
 
6
  MODEL_CONFIG = {
7
  "phi-3": {
 
49
  self.tokenizers[model_name] = tokenizer
50
 
51
  def generate(self, message, model_name, history):
52
+ start_time = time.time() # Start timing
53
  self.load_model(model_name)
54
  config = MODEL_CONFIG[model_name]
55
 
 
69
  return_full_text=False
70
  )
71
 
72
+ response = pipe(prompt)[0]['generated_text'].strip()
73
+
74
+ # Calculate metrics
75
+ elapsed_time = time.time() - start_time
76
+ tokens = len(self.tokenizers[model_name].encode(response))
77
+ tokens_per_sec = tokens / elapsed_time if elapsed_time > 0 else 0
78
+
79
+ return response, elapsed_time, tokens_per_sec
80
 
81
  model_handler = ChatModel()
82
 
83
  def chat(message, history, model_choice):
84
  try:
85
+ response, response_time, token_speed = model_handler.generate(message, model_choice, history)
86
+ formatted_response = f"{response}\n\n⏱️ Response Time: {response_time:.2f}s | 🚀 Speed: {token_speed:.2f} tokens/s"
87
+ return [(message, formatted_response)]
88
  except Exception as e:
89
  return [(message, f"Error: {str(e)}")]
90
 
91
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
92
+ gr.Markdown("# 🚀 LLM Chatbot with Performance Metrics")
93
  with gr.Row():
94
  model_choice = gr.Dropdown(
95
  choices=["phi-3", "llama3-8b"],