Spaces:

BICORP
/

dhgtfrd

Runtime error

App Files Files Community

BICORP commited on Feb 1, 2025

Commit

2d6bec8

verified ·

1 Parent(s): 2229b59

Update app.py

Browse files

Files changed (1) hide show

app.py +124 -108

app.py CHANGED Viewed

@@ -1,108 +1,124 @@
-import gradio as gr
-from huggingface_hub import InferenceClient
-# Default clients for each model
-clients = {
-    "mistralai/Mistral-7B-Instruct-v0.3": InferenceClient("mistralai/Mistral-7B-Instruct-v0.3"),
-    "BICORP/Lake-1-Advanced": InferenceClient("BICORP/Lake-1-Advanced")
-}
-# Define presets for each model
-presets = {
-    "mistralai/Mistral-7B-Instruct-v0.3": {
-        "Fast": {"max_tokens": 256, "temperature": 1.0, "top_p": 0.8},
-        "Normal": {"max_tokens": 512, "temperature": 0.6, "top_p": 0.75},
-        "Quality": {"max_tokens": 1024, "temperature": 0.45, "top_p": 0.60},
-        "Unreal Performance": {"max_tokens": 1048, "temperature": 0.5, "top_p": 0.7},
-    },
-    "BICORP/Lake-1-Advanced": {
-        "Fast": {"max_tokens": 800, "temperature": 1.0, "top_p": 0.9},
-        "Normal": {"max_tokens": 4000, "temperature": 0.7, "top_p": 0.95},
-        "Quality": {"max_tokens": 32000, "temperature": 0.5, "top_p": 0.90},
-        "Unreal Performance": {"max_tokens": 128000, "temperature": 0.6, "top_p": 0.75},
-    }
-}
-# System messages for each model
-system_messages = {
-    "mistralai/Mistral-7B-Instruct-v0.3": "Your name is Lake 1 Base but mine is User",
-    "BICORP/Lake-1-Advanced": "Your name is Lake 1 Advanced [Alpha] but mine is User or what i will type as my name"
-}
-# Model names and their pseudonyms
-model_choices = [
-    ("mistralai/Mistral-7B-Instruct-v0.3", "Lake 1 Base"),
-    ("BICORP/Lake-1-Advanced", "Lake 1 Advanced [Alpha]")
-]
-# Convert pseudonyms to model names for the dropdown
-pseudonyms = [model[1] for model in model_choices]
-def respond(
-    message,
-    history: list,
-    model_name,
-    preset_name
-):
-    # Get the correct client
-    client = clients[model_name]
-    # Get the system message for the model
-    system_message = system_messages[model_name]
-    messages = [{"role": "system", "content": system_message}]
-    # Ensure history is a list of dictionaries
-    for val in history:
-        if isinstance(val, dict) and 'role' in val and 'content' in val:
-            messages.append({"role": val['role'], "content": val['content']})
-    messages.append({"role": "user", "content": message})
-    # Get the preset settings
-    preset = presets[model_name][preset_name]
-    max_tokens = preset["max_tokens"]
-    temperature = preset["temperature"]
-    top_p = preset["top_p"]
-    # Get the response from the model
-    response = client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        temperature=temperature,
-        top_p=top_p,
-    )
-    # Extract the content from the response
-    final_response = response.choices[0].message['content']
-    return final_response
-def respond_with_pseudonym(
-    message,
-    history: list,
-    selected_model,
-    selected_preset
-):
-    # Find the actual model name from the pseudonym
-    try:
-        model_name = next(model[0] for model in model_choices if model[1] == selected_model)
-    except StopIteration:
-        return f"Error: The selected model '{selected_model}' is not valid. Please select a valid model."
-    # Call the existing respond function
-    response = respond(message, history, model_name, selected_preset)
-    return response
-# Gradio Chat Interface
-demo = gr.ChatInterface(
-    fn=respond_with_pseudonym,
-    additional_inputs=[
-        gr.Dropdown(choices=pseudonyms, label="Select Model", value=pseudonyms[0]),
-        gr.Dropdown(choices=list(presets[model_choices[0][0]].keys()), label="Select Preset", value="Fast")
-    ],
-)
-if __name__ == "__main__":
-    demo.launch()

+import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+# Local model loading
+models = {
+    "mistralai/Mistral-7B-Instruct-v0.3": AutoModelForCausalLM.from_pretrained(
+        "mistralai/Mistral-7B-Instruct-v0.3",
+        device_map="auto",
+        torch_dtype=torch.bfloat16
+    ),
+    "BICORP/Lake-1-Advanced": AutoModelForCausalLM.from_pretrained(
+        "BICORP/Lake-1-Advanced",
+        device_map="auto",
+        torch_dtype=torch.bfloat16
+    )
+}
+tokenizers = {
+    "mistralai/Mistral-7B-Instruct-v0.3": AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3"),
+    "BICORP/Lake-1-Advanced": AutoTokenizer.from_pretrained("BICORP/Lake-1-Advanced")
+}
+# Define presets (updated parameter names for local inference)
+presets = {
+    "mistralai/Mistral-7B-Instruct-v0.3": {
+        "Fast": {"max_new_tokens": 256, "temperature": 1.0, "top_p": 0.8},
+        "Normal": {"max_new_tokens": 512, "temperature": 0.6, "top_p": 0.75},
+        "Quality": {"max_new_tokens": 1024, "temperature": 0.45, "top_p": 0.60},
+        "Unreal Performance": {"max_new_tokens": 1048, "temperature": 0.5, "top_p": 0.7},
+    },
+    "BICORP/Lake-1-Advanced": {
+        "Fast": {"max_new_tokens": 800, "temperature": 1.0, "top_p": 0.9},
+        "Normal": {"max_new_tokens": 4000, "temperature": 0.7, "top_p": 0.95},
+        "Quality": {"max_new_tokens": 32000, "temperature": 0.5, "top_p": 0.90},
+        "Unreal Performance": {"max_new_tokens": 128000, "temperature": 0.6, "top_p": 0.75},
+    }
+}
+# System messages and model choices remain the same
+system_messages = {
+    "mistralai/Mistral-7B-Instruct-v0.3": "Your name is Lake 1 Base but mine is User",
+    "BICORP/Lake-1-Advanced": "Your name is Lake 1 Advanced [Alpha] but mine is User or what I will type as my name"
+}
+model_choices = [
+    ("mistralai/Mistral-7B-Instruct-v0.3", "Lake 1 Base"),
+    ("BICORP/Lake-1-Advanced", "Lake 1 Advanced [Alpha]")
+]
+pseudonyms = [model[1] for model in model_choices]
+def respond(
+    message,
+    history: list,
+    model_name,
+    preset_name
+):
+    # Get the correct model and tokenizer
+    model = models[model_name]
+    tokenizer = tokenizers[model_name]
+    # Get the system message for the model
+    system_message = system_messages[model_name]
+    messages = [{"role": "system", "content": system_message}]
+    # Ensure history is a list of dictionaries
+    for val in history:
+        if isinstance(val, dict) and 'role' in val and 'content' in val:
+            messages.append({"role": val['role'], "content": val['content']})
+    messages.append({"role": "user", "content": message})
+    # Get the preset settings
+    preset = presets[model_name][preset_name]
+    max_new_tokens = preset["max_new_tokens"]
+    temperature = preset["temperature"]
+    top_p = preset["top_p"]
+    # Prepare input for the model
+    input_ids = tokenizer.encode(tokenizer.chat_template(messages), return_tensors="pt").to(model.device)
+    # Get the response from the model
+    outputs = model.generate(
+        input_ids,
+        max_new_tokens=max_new_tokens,
+        temperature=temperature,
+        top_p=top_p,
+    )
+    # Extract the content from the response
+    final_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return final_response
+def respond_with_pseudonym(
+    message,
+    history: list,
+    selected_model,
+    selected_preset
+):
+    # Find the actual model name from the pseudonym
+    try:
+        model_name = next(model[0] for model in model_choices if model[1] == selected_model)
+    except StopIteration:
+        return f"Error: The selected model '{selected_model}' is not valid. Please select a valid model."
+    # Call the existing respond function
+    response = respond(message, history, model_name, selected_preset)
+    return response
+# Gradio Chat Interface
+demo = gr.ChatInterface(
+    fn=respond_with_pseudonym,
+    additional_inputs=[
+        gr.Dropdown(choices=pseudonyms, label="Select Model", value=pseudonyms[0]),
+        gr.Dropdown(choices=list(presets[model_choices[0][0]].keys()), label="Select Preset", value="Fast")
+    ],
+)
+if __name__ == "__main__":
+    demo.launch()