File size: 2,577 Bytes
bf9137e
f9d8cf4
122221c
f9d8cf4
122221c
bf9137e
f9d8cf4
122221c
bf9137e
 
 
f9d8cf4
bf9137e
 
 
122221c
 
 
f9d8cf4
 
 
 
122221c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
from huggingface_hub import hf_hub_list, hf_hub_download
from transformers import AutoModel, AutoTokenizer
import gradio as gr

# Hugging Face repository
model_repo = "mradermacher/Qwen2-2B-RepleteCoder-DHM-GGUF"

# Step 1: List and filter GGUF files with 'Q2_K'
def find_q2_k_files(repo_id):
    files = hf_hub_list(repo_id)
    q2_k_files = [file for file in files if "Q2_K" in file.filename and file.filename.endswith(".gguf")]
    if not q2_k_files:
        raise ValueError(f"No files containing 'Q2_K' found in the repository {repo_id}.")
    return q2_k_files

# Step 2: Load model and tokenizer
def load_q2_k_model(repo_id, filename):
    model_path = hf_hub_download(repo_id, filename)
    model = AutoModel.from_pretrained(model_path)
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    return model, tokenizer

# Step 3: Chatbot logic
def chat(messages, model, tokenizer):
    # Chat template formatting
    input_text = ""
    for i, message in enumerate(messages):
        if i == 0 and message['role'] != 'system':
            input_text += "<|im_start|>system You are a helpful assistant<|im_end|> "
        input_text += f"<|im_start|>{message['role']} {message['content']}<|im_end|> "
    input_text += "<|im_start|>assistant "
    
    inputs = tokenizer(input_text, return_tensors="pt")
    outputs = model.generate(**inputs)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# Step 4: Gradio Interface
def chatbot_interface(selected_file, chat_history):
    # Load model dynamically
    model, tokenizer = load_q2_k_model(model_repo, selected_file)
    response = chat(chat_history, model, tokenizer)
    chat_history.append({"role": "assistant", "content": response})
    return chat_history, chat_history

# Step 5: Build the UI
q2_k_files = find_q2_k_files(model_repo)
file_options = [file.filename for file in q2_k_files]

with gr.Blocks() as demo:
    gr.Markdown("# Hugging Face Q2_K Chatbot")
    model_selector = gr.Dropdown(choices=file_options, label="Select Model File")
    chat_history = gr.State([])
    chatbot = gr.Chatbot()
    user_input = gr.Textbox(label="Your Message")
    send_button = gr.Button("Send")

    # Update chat history
    def update_chat(history, user_message, selected_file):
        history.append({"role": "user", "content": user_message})
        return chatbot_interface(selected_file, history)
    
    send_button.click(
        update_chat, 
        inputs=[chat_history, user_input, model_selector], 
        outputs=[chatbot, chat_history]
    )

demo.launch()