File size: 4,657 Bytes
cc00062
30b93f3
52d8843
30b93f3
 
2d6bec8
30b93f3
 
1d03acf
30b93f3
 
 
 
2d6bec8
 
30b93f3
 
 
 
 
 
52d8843
 
 
30b93f3
 
 
 
52d8843
 
2d6bec8
 
30b93f3
 
 
 
2d6bec8
 
30b93f3
 
 
 
2d6bec8
 
 
52d8843
2d6bec8
 
 
 
 
52d8843
2d6bec8
 
 
 
52d8843
30b93f3
2d6bec8
 
30b93f3
 
 
 
 
 
 
0b07d1c
 
30b93f3
 
52d8843
 
30b93f3
 
52d8843
30b93f3
 
52d8843
 
85af627
30b93f3
52d8843
 
 
30b93f3
 
 
 
 
 
 
 
 
52d8843
30b93f3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85af627
52d8843
30b93f3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import os
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from huggingface_hub import hf_hub_download

# Set your Hugging Face token
HF_TOKEN = os.getenv("HF_TOKEN")

# Define model names and their local paths
model_names = {
    "mistralai/Mistral-7B-Instruct-v0.3": "mistral-7b-instruct",
    "BICORP/Lake-1-Advanced": "lake-1-advanced"
}

# Download models and tokenizers using the Hugging Face Hub
def download_model(repo_id):
    model_path = hf_hub_download(repo_id=repo_id, token=HF_TOKEN)
    return model_path

# Load models and tokenizers
models = {}
tokenizers = {}

for name in model_names.keys():
    model_path = download_model(name)
    models[name] = AutoModelForCausalLM.from_pretrained(model_path)
    tokenizers[name] = AutoTokenizer.from_pretrained(model_path)

# Define presets for each model
presets = {
    "mistralai/Mistral-7B-Instruct-v0.3": {
        "Fast": {"max_tokens": 256, "temperature": 1.0, "top_p": 0.8},
        "Normal": {"max_tokens": 512, "temperature": 0.6, "top_p": 0.75},
        "Quality": {"max_tokens": 1024, "temperature": 0.45, "top_p": 0.60},
        "Unreal Performance": {"max_tokens": 1048, "temperature": 0.5, "top_p": 0.7},
    },
    "BICORP/Lake-1-Advanced": {
        "Fast": {"max_tokens": 800, "temperature": 1.0, "top_p": 0.9},
        "Normal": {"max_tokens": 4000, "temperature": 0.7, "top_p": 0.95},
        "Quality": {"max_tokens": 32000, "temperature": 0.5, "top_p": 0.90},
        "Unreal Performance": {"max_tokens": 128000, "temperature": 0.6, "top_p": 0.75},
    }
}

# System messages for each model
system_messages = {
    "mistralai/Mistral-7B-Instruct-v0.3": "Your name is Lake 1 Base but mine is User",
    "BICORP/Lake-1-Advanced": "Your name is Lake 1 Advanced [Alpha] but mine is User or what I will type as my name"
}

# Model names and their pseudonyms
model_choices = [
    ("mistralai/Mistral-7B-Instruct-v0.3", "Lake 1 Base"),
    ("BICORP/Lake-1-Advanced", "Lake 1 Advanced [Alpha]")
]

# Convert pseudonyms to model names for the dropdown
pseudonyms = [model[1] for model in model_choices]

def respond(
    message,
    history: list,
    model_name,
    preset_name
):
    # Get the correct model and tokenizer
    model = models[model_name]
    tokenizer = tokenizers[model_name]
    
    # Get the system message for the model
    system_message = system_messages[model_name]
    
    # Prepare the input for the model
    input_text = system_message + "\n" + "\n".join([f"{val['role']}: {val['content']}" for val in history]) + f"\n:User  {message}\n"
    
    # Tokenize the input
    inputs = tokenizer.encode(input_text, return_tensors='pt')
    
    # Get the preset settings
    preset = presets[model_name][preset_name]
    max_tokens = preset["max_tokens"]
    temperature = preset["temperature"]
    top_p = preset["top_p"]

    # Generate response
    with torch.no_grad():
        outputs = model.generate(
            inputs,
            max_length=max_tokens,
            temperature=temperature,
            top_p=top_p,
            do_sample=True
        )
    
    # Decode the response
    final_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    # Extract the assistant's response
    assistant_response = final_response.split(":User ")[-1].strip # Append the user message and assistant response to the history
    history.append({"role": "User ", "content": message})
    history.append({"role": "Assistant", "content": assistant_response})
    
    return assistant_response, history

# Gradio interface
def launch_interface():
    with gr.Blocks() as demo:
        gr.Markdown("## Chat with Lake 1 Models")
        
        model_selector = gr.Dropdown(choices=pseudonyms, label="Select Model")
        preset_selector = gr.Dropdown(choices=["Fast", "Normal", "Quality", "Unreal Performance"], label="Select Preset")
        message_input = gr.Textbox(label="Your Message")
        chat_history = gr.Chatbox(label="Chat History")
        
        def update_model(selected_model):
            return model_names[pseudonyms.index(selected_model)]
        
        model_selector.change(update_model, inputs=model_selector, outputs=model_selector)
        
        def submit_message(message, history, model_name, preset_name):
            return respond(message, history, model_name, preset_name)
        
        submit_button = gr.Button("Send")
        submit_button.click(submit_message, inputs=[message_input, chat_history, model_selector, preset_selector], outputs=[chat_history, chat_history])
        
    demo.launch()

if __name__ == "__main__":
    launch_interface()