Spaces:

InnovisionLLC
/

example_test

Paused

App Files Files Community

Wenye He commited on Feb 17, 2025

Commit

97128d6

verified ·

1 Parent(s): cfb24bd

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -52

app.py CHANGED Viewed

@@ -8,7 +8,7 @@ MODEL_CONFIG = {
         "template": "<|user|>\n{message}<|end|>\n<|assistant|>"
     },
     "llama3-8b": {
-        "model_name": "meta-llama/Meta-Llama-3-8B-Instruct",
         "template": """<|begin_of_text|><|start_header_id|>user<|end_header_id|>
 {message}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
@@ -17,7 +17,6 @@ MODEL_CONFIG = {
     }
 }
-# Quantization config (4-bit)
 bnb_config = BitsAndBytesConfig(
     load_in_4bit=True,
     bnb_4bit_quant_type="nf4",
@@ -42,59 +41,12 @@ class ChatModel:
                 quantization_config=bnb_config,
                 device_map="auto",
                 torch_dtype=torch.float16,
-                low_cpu_mem_usage=True
             )
             self.models[model_name] = model
             self.tokenizers[model_name] = tokenizer
-    def generate(self, message, model_name, history):
-        self.load_model(model_name)
-        config = MODEL_CONFIG[model_name]
-        # Format prompt
-        prompt = config["template"].format(message=message)
-        # Create pipeline
-        pipe = pipeline(
-            "text-generation",
-            model=self.models[model_name],
-            tokenizer=self.tokenizers[model_name],
-            max_new_tokens=384,
-            temperature=0.7,
-            top_p=0.9,
-            repetition_penalty=1.1,
-            do_sample=True,
-            return_full_text=False
-        )
-        response = pipe(prompt)[0]['generated_text']
-        return response.strip()
-model_handler = ChatModel()
-def chat(message, history, model_choice):
-    try:
-        response = model_handler.generate(message, model_choice, history)
-        return [(message, response)]
-    except Exception as e:
-        return [(message, f"Error: {str(e)}")]
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 🚀 Phi-3 vs Llama-3 Chatbot")
-    with gr.Row():
-        model_choice = gr.Dropdown(
-            choices=["phi-3", "llama3-8b"],
-            label="Select Model",
-            value="phi-3"
-        )
-    chatbot = gr.Chatbot(height=400)
-    msg = gr.Textbox(label="Message", placeholder="Type here...")
-    with gr.Row():
-        submit_btn = gr.Button("Send", variant="primary")
-        clear_btn = gr.ClearButton([msg, chatbot])
-    msg.submit(chat, [msg, chatbot, model_choice], chatbot)
-    submit_btn.click(chat, [msg, chatbot, model_choice], chatbot)
-demo.launch()

         "template": "<|user|>\n{message}<|end|>\n<|assistant|>"
     },
     "llama3-8b": {
+        "model_name": "NousResearch/Meta-Llama-3-8B-Instruct",
         "template": """<|begin_of_text|><|start_header_id|>user<|end_header_id|>
 {message}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
     }
 }
 bnb_config = BitsAndBytesConfig(
     load_in_4bit=True,
     bnb_4bit_quant_type="nf4",
                 quantization_config=bnb_config,
                 device_map="auto",
                 torch_dtype=torch.float16,
+                trust_remote_code=True
             )
             self.models[model_name] = model
             self.tokenizers[model_name] = tokenizer
+    # ... (keep the rest of the code the same as previous version)
+# ... (remaining code identical to previous implementation)