Spaces:

InnovisionLLC
/

example_test

Paused

App Files Files Community

Wenye He commited on Feb 17, 2025

Commit

9b42973

verified ·

1 Parent(s): 87c7b7d

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -48

app.py CHANGED Viewed

@@ -1,16 +1,15 @@
 import gradio as gr
-import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
-# Model configurations
 MODEL_CONFIG = {
-    "llama": {
-        "model_name": "meta-llama/Llama-2-7b-chat-hf",
-        "template": "[INST] {message} [/INST]"
     },
-    "phi": {
-        "model_name": "microsoft/phi-2",
-        "template": "{message}"
     }
 }
@@ -27,39 +26,32 @@ class ChatModel:
             self.model = AutoModelForCausalLM.from_pretrained(
                 config["model_name"],
                 torch_dtype=torch.float16,
-                device_map="auto"
             )
             self.current_model = model_name
-    def format_message(self, message, model_name):
-        return MODEL_CONFIG[model_name]["template"].format(message=message)
     def generate(self, message, model_name, history):
         self.load_model(model_name)
-        formatted_message = self.format_message(message, model_name)
-        # Create pipeline for text generation
         pipe = pipeline(
             "text-generation",
             model=self.model,
             tokenizer=self.tokenizer,
-            device_map="auto"
-        )
-        # Generate response
-        response = pipe(
-            formatted_message,
-            max_length=200,
             do_sample=True,
-            temperature=0.7,
-            top_k=50,
-            top_p=0.95,
-            pad_token_id=self.tokenizer.eos_token_id
         )
-        return response[0]['generated_text'].replace(formatted_message, "").strip()
-# Initialize model handler
 model_handler = ChatModel()
 def chat(message, history, model_choice):
@@ -67,31 +59,19 @@ def chat(message, history, model_choice):
     return [(message, response)]
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 🤖 Local LLM Chatbot\nSelect a model and start chatting!")
     with gr.Row():
         model_choice = gr.Dropdown(
-            choices=["llama", "phi"],
             label="Select Model",
-            value="phi"
         )
-    chatbot = gr.Chatbot(height=400)
-    msg = gr.Textbox(label="Your Message", placeholder="Type your message here...")
-    with gr.Row():
-        submit_btn = gr.Button("Send")
-        clear_btn = gr.ClearButton([msg, chatbot])
-    msg.submit(
-        fn=chat,
-        inputs=[msg, chatbot, model_choice],
-        outputs=[chatbot]
-    )
-    submit_btn.click(
-        fn=chat,
-        inputs=[msg, chatbot, model_choice],
-        outputs=[chatbot]
-    )
 demo.launch()

 import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+import torch
 MODEL_CONFIG = {
+    "tinyllama": {
+        "model_name": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+        "template": "<|user|>\n{message}\n<|assistant|>"
     },
+    "phi-1.5": {
+        "model_name": "microsoft/phi-1_5",
+        "template": "Instruct: {message}\nOutput:"
     }
 }
             self.model = AutoModelForCausalLM.from_pretrained(
                 config["model_name"],
                 torch_dtype=torch.float16,
+                device_map="auto",
+                load_in_4bit=True  # Quantization to reduce memory usage
             )
             self.current_model = model_name
     def generate(self, message, model_name, history):
         self.load_model(model_name)
+        config = MODEL_CONFIG[model_name]
+        # Format prompt
+        prompt = config["template"].format(message=message)
+        # Create pipeline
         pipe = pipeline(
             "text-generation",
             model=self.model,
             tokenizer=self.tokenizer,
+            max_new_tokens=150,
+            temperature=0.3,
             do_sample=True,
+            device_map="auto"
         )
+        response = pipe(prompt)[0]['generated_text']
+        return response.split(prompt)[-1].strip()
 model_handler = ChatModel()
 def chat(message, history, model_choice):
     return [(message, response)]
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🚀 Free-Tier LLM Chatbot")
     with gr.Row():
         model_choice = gr.Dropdown(
+            choices=["tinyllama", "phi-1.5"],
             label="Select Model",
+            value="tinyllama"
         )
+    chatbot = gr.Chatbot(height=300)
+    msg = gr.Textbox(label="Message", placeholder="Type here...")
+    submit_btn = gr.Button("Send", variant="primary")
+    clear_btn = gr.ClearButton([msg, chatbot])
+    msg.submit(chat, [msg, chatbot, model_choice], chatbot)
+    submit_btn.click(chat, [msg, chatbot, model_choice], chatbot)
 demo.launch()