import gradio as gr from llama_cpp import Llama from huggingface_hub import hf_hub_download # GGUF model filenames MODELS = { "DeepSeek Chat (7B Chat)": "deepseek-llm-7b-chat-Q4_K_M.gguf", "LLaMA 2 7B Chat": "llama-2-7b-chat.Q4_K_M.gguf", "Smol 7B Chat (GGUF)": "smol-7b-Q4_K_M.gguf", "DeepSeek Coder (6.7B Instruct)": "deepseek-coder-6.7b-instruct-Q4_K_M.gguf", # Tiny test model "TinyLlama 1.1B Test": "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf" } # Corresponding Hugging Face repo IDs REPO_IDS = { "DeepSeek Chat (7B Chat)": "TheBloke/deepseek-llm-7B-chat-GGUF", "LLaMA 2 7B Chat": "TheBloke/Llama-2-7B-Chat-GGUF", "Smol 7B Chat (GGUF)": "TheBloke/smol-7B-GGUF", "DeepSeek Coder (6.7B Instruct)": "second-state/Deepseek-Coder-6.7B-Instruct-GGUF", # Tiny test model repo "TinyLlama 1.1B Test": "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF" } # System prompts per model SYSTEM_PROMPTS = { "DeepSeek Chat (7B Chat)": "You are a helpful assistant for friendly conversation:", "LLaMA 2 7B Chat": "You are a helpful general AI assistant:", "Smol 7B Chat (GGUF)": "You are a friendly general chat assistant:", "DeepSeek Coder (6.7B Instruct)": "You are an instruction-following assistant:", "TinyLlama 1.1B Test": "You are a lightweight test chat assistant:" } loaded_models = {} def load_model(choice): if choice not in loaded_models: # Download GGUF model file from Hugging Face Hub model_path = hf_hub_download(repo_id=REPO_IDS[choice], filename=MODELS[choice]) loaded_models[choice] = Llama(model=model_path, n_ctx=2048) return loaded_models[choice] def generate_chat(model_choice, history): llm = load_model(model_choice) user_msg = history[-1][0] prompt = f"{SYSTEM_PROMPTS.get(model_choice, '')}\n{user_msg}" out = llm(prompt, max_tokens=150)["choices"][0]["text"] history[-1] = (user_msg, out) return history # Quick preset helper def preset_text(text): return text with gr.Blocks() as demo: gr.Markdown("# Playground.Deeemoi — Multi-Model Chat + Quick Prompts") selector = gr.Dropdown(list(MODELS.keys()), label="Choose Model") with gr.Row(): explain_btn = gr.Button("Explain") summarize_btn = gr.Button("Summarize") translate_btn = gr.Button("Translate to Spanish") code_help_btn = gr.Button("Code Help") chat_ui = gr.ChatInterface( fn=lambda history, model_choice=selector: generate_chat(model_choice, history), title="Chat with Quantized GGUF Models" ) explain_btn.click(lambda: preset_text("Explain this clearly:"), None, chat_ui) summarize_btn.click(lambda: preset_text("Summarize this text:"), None, chat_ui) translate_btn.click(lambda: preset_text("Translate to Spanish:"), None, chat_ui) code_help_btn.click(lambda: preset_text("Help with this code snippet:"), None, chat_ui) selector.change(lambda _: [], None, chat_ui) demo.launch()