|
|
import gradio as gr |
|
|
from llama_cpp import Llama |
|
|
from huggingface_hub import hf_hub_download |
|
|
|
|
|
|
|
|
MODELS = { |
|
|
"DeepSeek Chat (7B Chat)": "deepseek-llm-7b-chat-Q4_K_M.gguf", |
|
|
"LLaMA 2 7B Chat": "llama-2-7b-chat.Q4_K_M.gguf", |
|
|
"Smol 7B Chat (GGUF)": "smol-7b-Q4_K_M.gguf", |
|
|
"DeepSeek Coder (6.7B Instruct)": "deepseek-coder-6.7b-instruct-Q4_K_M.gguf", |
|
|
|
|
|
"TinyLlama 1.1B Test": "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf" |
|
|
} |
|
|
|
|
|
|
|
|
REPO_IDS = { |
|
|
"DeepSeek Chat (7B Chat)": "TheBloke/deepseek-llm-7B-chat-GGUF", |
|
|
"LLaMA 2 7B Chat": "TheBloke/Llama-2-7B-Chat-GGUF", |
|
|
"Smol 7B Chat (GGUF)": "TheBloke/smol-7B-GGUF", |
|
|
"DeepSeek Coder (6.7B Instruct)": "second-state/Deepseek-Coder-6.7B-Instruct-GGUF", |
|
|
|
|
|
"TinyLlama 1.1B Test": "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF" |
|
|
} |
|
|
|
|
|
|
|
|
SYSTEM_PROMPTS = { |
|
|
"DeepSeek Chat (7B Chat)": "You are a helpful assistant for friendly conversation:", |
|
|
"LLaMA 2 7B Chat": "You are a helpful general AI assistant:", |
|
|
"Smol 7B Chat (GGUF)": "You are a friendly general chat assistant:", |
|
|
"DeepSeek Coder (6.7B Instruct)": "You are an instruction-following assistant:", |
|
|
"TinyLlama 1.1B Test": "You are a lightweight test chat assistant:" |
|
|
} |
|
|
|
|
|
loaded_models = {} |
|
|
|
|
|
def load_model(choice): |
|
|
if choice not in loaded_models: |
|
|
|
|
|
model_path = hf_hub_download(repo_id=REPO_IDS[choice], filename=MODELS[choice]) |
|
|
loaded_models[choice] = Llama(model=model_path, n_ctx=2048) |
|
|
return loaded_models[choice] |
|
|
|
|
|
def generate_chat(model_choice, history): |
|
|
llm = load_model(model_choice) |
|
|
user_msg = history[-1][0] |
|
|
prompt = f"{SYSTEM_PROMPTS.get(model_choice, '')}\n{user_msg}" |
|
|
out = llm(prompt, max_tokens=150)["choices"][0]["text"] |
|
|
history[-1] = (user_msg, out) |
|
|
return history |
|
|
|
|
|
|
|
|
def preset_text(text): |
|
|
return text |
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("# Playground.Deeemoi — Multi-Model Chat + Quick Prompts") |
|
|
|
|
|
selector = gr.Dropdown(list(MODELS.keys()), label="Choose Model") |
|
|
|
|
|
with gr.Row(): |
|
|
explain_btn = gr.Button("Explain") |
|
|
summarize_btn = gr.Button("Summarize") |
|
|
translate_btn = gr.Button("Translate to Spanish") |
|
|
code_help_btn = gr.Button("Code Help") |
|
|
|
|
|
chat_ui = gr.ChatInterface( |
|
|
fn=lambda history, model_choice=selector: generate_chat(model_choice, history), |
|
|
title="Chat with Quantized GGUF Models" |
|
|
) |
|
|
|
|
|
explain_btn.click(lambda: preset_text("Explain this clearly:"), None, chat_ui) |
|
|
summarize_btn.click(lambda: preset_text("Summarize this text:"), None, chat_ui) |
|
|
translate_btn.click(lambda: preset_text("Translate to Spanish:"), None, chat_ui) |
|
|
code_help_btn.click(lambda: preset_text("Help with this code snippet:"), None, chat_ui) |
|
|
|
|
|
selector.change(lambda _: [], None, chat_ui) |
|
|
|
|
|
demo.launch() |