Spaces:

InnovisionLLC
/

example_test

Paused

App Files Files Community

Wenye He commited on Feb 17, 2025

Commit

dd8d3db

verified ·

1 Parent(s): 3a4c40c

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -44

app.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import gradio as gr
-from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 import torch
-import time
 MODEL_CONFIG = {
     "phi-3": {
@@ -49,64 +48,39 @@ class ChatModel:
             self.tokenizers[model_name] = tokenizer
     def generate(self, message, model_name, history):
-        start_time = time.time()
         self.load_model(model_name)
         config = MODEL_CONFIG[model_name]
         # Format prompt
         prompt = config["template"].format(message=message)
-        # Tokenize input with proper max_length handling
-        inputs = self.tokenizers[model_name](
-            prompt,
-            return_tensors="pt",
-            max_length=2048,
-            truncation=True
-        ).to("cuda")
-        # Generation parameters
-        generation_kwargs = {
-            "inputs": inputs.input_ids,
-            "max_new_tokens": 384,
-            "temperature": 0.7,
-            "top_p": 0.9,
-            "do_sample": True,
-            "pad_token_id": self.tokenizers[model_name].eos_token_id
-        }
-        # Phi-3 specific workaround
-        if "phi-3" in model_name:
-            generation_kwargs["attention_mask"] = inputs.attention_mask
-            generation_kwargs.pop("inputs")
-            generation_kwargs["input_ids"] = inputs.input_ids
-        outputs = self.models[model_name].generate(**generation_kwargs)
-        # Decode response
-        response = self.tokenizers[model_name].decode(
-            outputs[0][inputs.input_ids.shape[-1]:],
-            skip_special_tokens=True
-        ).strip()
-        # Calculate metrics
-        elapsed_time = time.time() - start_time
-        tokens = outputs[0].shape[-1] - inputs.input_ids.shape[-1]
-        tokens_per_sec = tokens / elapsed_time if elapsed_time > 0 else 0
-        return response, elapsed_time, tokens_per_sec
 model_handler = ChatModel()
 def chat(message, history, model_choice):
     try:
-        response, response_time, token_speed = model_handler.generate(message, model_choice, history)
-        formatted_response = f"{response}\n\n⏱️ Response Time: {response_time:.2f}s | 🚀 Speed: {token_speed:.2f} tokens/s"
-        return [(message, formatted_response)]
     except Exception as e:
         return [(message, f"Error: {str(e)}")]
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 🚀 LLM Chatbot with Performance Metrics")
     with gr.Row():
         model_choice = gr.Dropdown(
             choices=["phi-3", "llama3-8b"],

 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
 import torch
 MODEL_CONFIG = {
     "phi-3": {
             self.tokenizers[model_name] = tokenizer
     def generate(self, message, model_name, history):
         self.load_model(model_name)
         config = MODEL_CONFIG[model_name]
         # Format prompt
         prompt = config["template"].format(message=message)
+        # Create pipeline
+        pipe = pipeline(
+            "text-generation",
+            model=self.models[model_name],
+            tokenizer=self.tokenizers[model_name],
+            max_new_tokens=384,
+            temperature=0.7,
+            top_p=0.9,
+            repetition_penalty=1.1,
+            do_sample=True,
+            return_full_text=False
+        )
+        response = pipe(prompt)[0]['generated_text']
+        return response.strip()
 model_handler = ChatModel()
 def chat(message, history, model_choice):
     try:
+        response = model_handler.generate(message, model_choice, history)
+        return [(message, response)]
     except Exception as e:
         return [(message, f"Error: {str(e)}")]
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🚀 Phi-3 vs Llama-3 Chatbot")
     with gr.Row():
         model_choice = gr.Dropdown(
             choices=["phi-3", "llama3-8b"],