ai / app.py
Erik22TY's picture
Create app.py
a99a049 verified
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
# GGUF model filenames
MODELS = {
"DeepSeek Chat (7B Chat)": "deepseek-llm-7b-chat-Q4_K_M.gguf",
"LLaMA 2 7B Chat": "llama-2-7b-chat.Q4_K_M.gguf",
"Smol 7B Chat (GGUF)": "smol-7b-Q4_K_M.gguf",
"DeepSeek Coder (6.7B Instruct)": "deepseek-coder-6.7b-instruct-Q4_K_M.gguf",
# Tiny test model
"TinyLlama 1.1B Test": "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
}
# Corresponding Hugging Face repo IDs
REPO_IDS = {
"DeepSeek Chat (7B Chat)": "TheBloke/deepseek-llm-7B-chat-GGUF",
"LLaMA 2 7B Chat": "TheBloke/Llama-2-7B-Chat-GGUF",
"Smol 7B Chat (GGUF)": "TheBloke/smol-7B-GGUF",
"DeepSeek Coder (6.7B Instruct)": "second-state/Deepseek-Coder-6.7B-Instruct-GGUF",
# Tiny test model repo
"TinyLlama 1.1B Test": "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
}
# System prompts per model
SYSTEM_PROMPTS = {
"DeepSeek Chat (7B Chat)": "You are a helpful assistant for friendly conversation:",
"LLaMA 2 7B Chat": "You are a helpful general AI assistant:",
"Smol 7B Chat (GGUF)": "You are a friendly general chat assistant:",
"DeepSeek Coder (6.7B Instruct)": "You are an instruction-following assistant:",
"TinyLlama 1.1B Test": "You are a lightweight test chat assistant:"
}
loaded_models = {}
def load_model(choice):
if choice not in loaded_models:
# Download GGUF model file from Hugging Face Hub
model_path = hf_hub_download(repo_id=REPO_IDS[choice], filename=MODELS[choice])
loaded_models[choice] = Llama(model=model_path, n_ctx=2048)
return loaded_models[choice]
def generate_chat(model_choice, history):
llm = load_model(model_choice)
user_msg = history[-1][0]
prompt = f"{SYSTEM_PROMPTS.get(model_choice, '')}\n{user_msg}"
out = llm(prompt, max_tokens=150)["choices"][0]["text"]
history[-1] = (user_msg, out)
return history
# Quick preset helper
def preset_text(text):
return text
with gr.Blocks() as demo:
gr.Markdown("# Playground.Deeemoi — Multi-Model Chat + Quick Prompts")
selector = gr.Dropdown(list(MODELS.keys()), label="Choose Model")
with gr.Row():
explain_btn = gr.Button("Explain")
summarize_btn = gr.Button("Summarize")
translate_btn = gr.Button("Translate to Spanish")
code_help_btn = gr.Button("Code Help")
chat_ui = gr.ChatInterface(
fn=lambda history, model_choice=selector: generate_chat(model_choice, history),
title="Chat with Quantized GGUF Models"
)
explain_btn.click(lambda: preset_text("Explain this clearly:"), None, chat_ui)
summarize_btn.click(lambda: preset_text("Summarize this text:"), None, chat_ui)
translate_btn.click(lambda: preset_text("Translate to Spanish:"), None, chat_ui)
code_help_btn.click(lambda: preset_text("Help with this code snippet:"), None, chat_ui)
selector.change(lambda _: [], None, chat_ui)
demo.launch()