Spaces:

InnovisionLLC
/

example_test

Paused

App Files Files Community

Wenye He commited on Feb 17, 2025

Commit

dd93054

verified ·

1 Parent(s): 7a75e11

Update app.py

Browse files

Files changed (1) hide show

app.py +61 -58

app.py CHANGED Viewed

@@ -1,97 +1,100 @@
 import gradio as gr
 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
-# Model configurations
 MODEL_CONFIG = {
-    "llama": {
-        "model_name": "meta-llama/Llama-2-7b-chat-hf",
-        "template": "[INST] {message} [/INST]"
     },
-    "phi": {
-        "model_name": "microsoft/phi-2",
-        "template": "{message}"
     }
 }
 class ChatModel:
     def __init__(self):
-        self.model = None
-        self.tokenizer = None
-        self.current_model = None
     def load_model(self, model_name):
-        if model_name != self.current_model:
             config = MODEL_CONFIG[model_name]
-            self.tokenizer = AutoTokenizer.from_pretrained(config["model_name"])
-            self.model = AutoModelForCausalLM.from_pretrained(
                 config["model_name"],
-                torch_dtype=torch.float16,
-                device_map="auto"
             )
-            self.current_model = model_name
-    def format_message(self, message, model_name):
-        return MODEL_CONFIG[model_name]["template"].format(message=message)
     def generate(self, message, model_name, history):
         self.load_model(model_name)
-        formatted_message = self.format_message(message, model_name)
-        # Create pipeline for text generation
         pipe = pipeline(
             "text-generation",
-            model=self.model,
-            tokenizer=self.tokenizer,
-            device_map="auto"
-        )
-        # Generate response
-        response = pipe(
-            formatted_message,
-            max_length=200,
-            do_sample=True,
             temperature=0.7,
-            top_k=50,
-            top_p=0.95,
-            pad_token_id=self.tokenizer.eos_token_id
         )
-        return response[0]['generated_text'].replace(formatted_message, "").strip()
-# Initialize model handler
 model_handler = ChatModel()
 def chat(message, history, model_choice):
-    response = model_handler.generate(message, model_choice, history)
-    return [(message, response)]
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 🤖 Local LLM Chatbot\nSelect a model and start chatting!")
     with gr.Row():
         model_choice = gr.Dropdown(
-            choices=["llama", "phi"],
             label="Select Model",
-            value="phi"
         )
     chatbot = gr.Chatbot(height=400)
-    msg = gr.Textbox(label="Your Message", placeholder="Type your message here...")
     with gr.Row():
-        submit_btn = gr.Button("Send")
         clear_btn = gr.ClearButton([msg, chatbot])
-    msg.submit(
-        fn=chat,
-        inputs=[msg, chatbot, model_choice],
-        outputs=[chatbot]
-    )
-    submit_btn.click(
-        fn=chat,
-        inputs=[msg, chatbot, model_choice],
-        outputs=[chatbot]
-    )
 demo.launch()

 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
 import torch
 MODEL_CONFIG = {
+    "phi-3": {
+        "model_name": "microsoft/phi-3-mini-4k-instruct",
+        "template": "<|user|>\n{message}<|end|>\n<|assistant|>"
     },
+    "llama3-8b": {
+        "model_name": "meta-llama/Meta-Llama-3-8B-Instruct",
+        "template": """<|begin_of_text|><|start_header_id|>user<|end_header_id|>
+{message}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+"""
     }
 }
+# Quantization config for 4-bit loading
+bnb_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_compute_dtype=torch.float16,
+    bnb_4bit_use_double_quant=True
+)
 class ChatModel:
     def __init__(self):
+        self.models = {}
+        self.tokenizers = {}
     def load_model(self, model_name):
+        if model_name not in self.models:
             config = MODEL_CONFIG[model_name]
+            tokenizer = AutoTokenizer.from_pretrained(config["model_name"])
+            tokenizer.pad_token = tokenizer.eos_token
+            model = AutoModelForCausalLM.from_pretrained(
                 config["model_name"],
+                quantization_config=bnb_config,
+                device_map="auto",
+                attn_implementation="flash_attention_2" if "phi-3" in model_name else None,
+                torch_dtype=torch.float16
             )
+            self.models[model_name] = model
+            self.tokenizers[model_name] = tokenizer
     def generate(self, message, model_name, history):
         self.load_model(model_name)
+        config = MODEL_CONFIG[model_name]
+        # Format prompt
+        prompt = config["template"].format(message=message)
+        # Create pipeline
         pipe = pipeline(
             "text-generation",
+            model=self.models[model_name],
+            tokenizer=self.tokenizers[model_name],
+            max_new_tokens=512,
             temperature=0.7,
+            top_p=0.9,
+            repetition_penalty=1.1,
+            do_sample=True,
+            return_full_text=False
         )
+        response = pipe(prompt)[0]['generated_text']
+        return response.strip()
 model_handler = ChatModel()
 def chat(message, history, model_choice):
+    try:
+        response = model_handler.generate(message, model_choice, history)
+        return [(message, response)]
+    except Exception as e:
+        return [(message, f"Error: {str(e)}")]
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🚀 Phi-3 vs Llama-3 Chatbot")
     with gr.Row():
         model_choice = gr.Dropdown(
+            choices=["phi-3", "llama3-8b"],
             label="Select Model",
+            value="phi-3"
         )
     chatbot = gr.Chatbot(height=400)
+    msg = gr.Textbox(label="Message", placeholder="Type here...")
     with gr.Row():
+        submit_btn = gr.Button("Send", variant="primary")
         clear_btn = gr.ClearButton([msg, chatbot])
+    msg.submit(chat, [msg, chatbot, model_choice], chatbot)
+    submit_btn.click(chat, [msg, chatbot, model_choice], chatbot)
 demo.launch()