Spaces:

InnovisionLLC
/

example_test

Paused

App Files Files Community

Wenye He commited on Feb 17, 2025

Commit

ec86a60

verified ·

1 Parent(s): 9d099a3

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -7

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
 import torch
 MODEL_CONFIG = {
     "phi-3": {
@@ -10,9 +11,7 @@ MODEL_CONFIG = {
     "llama3-8b": {
         "model_name": "NousResearch/Meta-Llama-3-8B-Instruct",
         "template": """<|begin_of_text|><|start_header_id|>user<|end_header_id|>
 {message}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 """
     }
 }
@@ -41,13 +40,13 @@ class ChatModel:
                 quantization_config=bnb_config,
                 device_map="auto",
                 torch_dtype=torch.float16,
-                trust_remote_code=False
             )
             self.models[model_name] = model
             self.tokenizers[model_name] = tokenizer
     def generate(self, message, model_name, history):
         self.load_model(model_name)
         config = MODEL_CONFIG[model_name]
@@ -68,19 +67,26 @@ class ChatModel:
         )
         response = pipe(prompt)[0]['generated_text']
-        return response.strip()
 model_handler = ChatModel()
 def chat(message, history, model_choice):
     try:
-        response = model_handler.generate(message, model_choice, history)
-        return [(message, response)]
     except Exception as e:
         return [(message, f"Error: {str(e)}")]
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 🚀 Phi-3 vs Llama-3 Chatbot")
     with gr.Row():
         model_choice = gr.Dropdown(
             choices=["phi-3", "llama3-8b"],

 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
 import torch
+import time  # Added for timing
 MODEL_CONFIG = {
     "phi-3": {
     "llama3-8b": {
         "model_name": "NousResearch/Meta-Llama-3-8B-Instruct",
         "template": """<|begin_of_text|><|start_header_id|>user<|end_header_id|>
 {message}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 """
     }
 }
                 quantization_config=bnb_config,
                 device_map="auto",
                 torch_dtype=torch.float16,
             )
             self.models[model_name] = model
             self.tokenizers[model_name] = tokenizer
     def generate(self, message, model_name, history):
+        start_time = time.time()  # Start timing
         self.load_model(model_name)
         config = MODEL_CONFIG[model_name]
         )
         response = pipe(prompt)[0]['generated_text']
+        # Calculate metrics
+        elapsed_time = time.time() - start_time
+        tokens = len(self.tokenizers[model_name].encode(response))
+        tokens_per_sec = tokens / elapsed_time if elapsed_time > 0 else 0
+        return response, elapsed_time, tokens_per_sec
 model_handler = ChatModel()
 def chat(message, history, model_choice):
     try:
+        response, response_time, token_speed = model_handler.generate(message, model_choice, history)
+        formatted_response = f"{response}\n\n⏱️ Response Time: {response_time:.2f}s | 🚀 Speed: {token_speed:.2f} tokens/s"
+        return [(message, formatted_response)]
     except Exception as e:
         return [(message, f"Error: {str(e)}")]
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🚀 LLM Chatbot with Performance Metrics")
     with gr.Row():
         model_choice = gr.Dropdown(
             choices=["phi-3", "llama3-8b"],