chat / app.py
ameliakris's picture
Replace model selection with specialized coding models
ffcd201
import spaces
import json
import subprocess
from llama_cpp import Llama
from llama_cpp_agent import LlamaCppAgent
from llama_cpp_agent import MessagesFormatterType
from llama_cpp_agent.providers import LlamaCppPythonProvider
from llama_cpp_agent.chat_history import BasicChatHistory
from llama_cpp_agent.chat_history.messages import Roles
import gradio as gr
from huggingface_hub import hf_hub_download
from ui import css
llm = None
llm_model = None
# Comprehensive model configurations
MODELS = {
"WhiteRabbitNeo 2.5 Qwen 2.5 Coder 7B": {
"filename": "WhiteRabbitNeo-2.5-Qwen-2.5-Coder-7B-OBLITERATED-i1-Q5_K_M.gguf",
"repo_id": "mradermacher/WhiteRabbitNeo-2.5-Qwen-2.5-Coder-7B-OBLITERATED-i1-GGUF",
"system_prompt": "You are WhiteRabbitNeo, an advanced AI coding assistant with deep expertise in software development, security analysis, and problem-solving. You provide detailed, accurate responses with proper code examples and thorough explanations.",
"formatter": "CHATML",
"description": "Advanced coding assistant with security focus"
},
"Gemma 3 Prompt Coder 270m": {
"filename": "Gemma-3-Prompt-Coder-270m-it-Uncensored-Q8_0.gguf",
"repo_id": "mradermacher/Gemma-3-Prompt-Coder-270m-it-Uncensored-GGUF",
"system_prompt": "You are Gemma 3 Prompt Coder, a lightweight but powerful AI assistant specialized in coding and technical tasks. Provide clear, accurate responses with well-formatted code examples.",
"formatter": "CHATML",
"description": "Ultra-fast lightweight coding specialist"
},
"DeepSeek V4 Pro": {
"filename": "DeepSeek-V4-Pro-Q5_K_M.gguf",
"repo_id": "unsloth/DeepSeek-V4-Pro-GGUF",
"system_prompt": "You are DeepSeek V4 Pro, an advanced AI assistant with extensive knowledge across multiple domains. Provide detailed, accurate, and well-reasoned responses with proper analysis and explanations.",
"formatter": "CHATML",
"description": "Advanced multimodal reasoning model"
},
"Qwen 3.6 35B A3B Uncensored": {
"filename": "Qwen3.6-35B-A3B-Uncensored-Q5_K_M.gguf",
"repo_id": "HauhauCS/Qwen3.6-35B-A3B-Uncensored-HauhauCS-Aggressive-GGUF",
"system_prompt": "You are Qwen 3.6, an advanced AI assistant with aggressive reasoning capabilities and extensive knowledge. Provide direct, detailed responses with thorough analysis and strong reasoning.",
"formatter": "CHATML",
"description": "Large model with aggressive reasoning"
}
}
# Download models on startup
def download_models():
"""Download all configured models"""
for model_name, config in MODELS.items():
try:
print(f"Downloading {model_name}...")
hf_hub_download(
repo_id=config["repo_id"],
filename=config["filename"],
local_dir="./models"
)
print(f"✓ {model_name} downloaded successfully")
except Exception as e:
print(f"✗ Failed to download {model_name}: {e}")
# Download models (commented out - uncomment to enable auto-download)
# download_models()
@spaces.GPU(duration=120)
def respond(
message,
history: list[tuple[str, str]],
model_name,
max_tokens,
temperature,
top_p,
top_k,
repeat_penalty,
):
global llm
global llm_model
if model_name not in MODELS:
yield f"Error: Model '{model_name}' not found in configuration."
return
model_config = MODELS[model_name]
model_filename = model_config["filename"]
system_prompt = model_config["system_prompt"]
# Load or reload model if needed
if llm is None or llm_model != model_filename:
try:
llm = Llama(
model_path=f"models/{model_filename}",
flash_attn=True,
n_gpu_layers=81,
n_batch=1024,
n_ctx=8192,
verbose=False
)
llm_model = model_filename
except Exception as e:
yield f"Error loading model: {str(e)}"
return
provider = LlamaCppPythonProvider(llm)
# Map formatter names to actual types
formatter_map = {
"CHATML": MessagesFormatterType.CHATML,
"MLCODESTRAL": MessagesFormatterType.MLCODESTRAL,
"VICUNA": MessagesFormatterType.VICUNA,
}
formatter_type = formatter_map.get(model_config.get("formatter", "CHATML"), MessagesFormatterType.CHATML)
agent = LlamaCppAgent(
provider,
system_prompt=system_prompt,
predefined_messages_formatter_type=formatter_type,
debug_output=False
)
settings = provider.get_provider_default_settings()
settings.temperature = temperature
settings.top_k = top_k
settings.top_p = top_p
settings.max_tokens = max_tokens
settings.repeat_penalty = repeat_penalty
settings.stream = True
messages = BasicChatHistory()
for msn in history:
user = {
'role': Roles.user,
'content': msn[0]
}
assistant = {
'role': Roles.assistant,
'content': msn[1]
}
messages.add_message(user)
messages.add_message(assistant)
try:
stream = agent.get_chat_response(
message,
llm_sampling_settings=settings,
chat_history=messages,
returns_streaming_generator=True,
print_output=False
)
outputs = ""
for output in stream:
outputs += output
yield outputs
except Exception as e:
yield f"Error during generation: {str(e)}"
# Create model choices with descriptions
model_choices = [f"{name} - {config['description']}" for name, config in MODELS.items()]
model_value_map = {f"{name} - {config['description']}": name for name, config in MODELS.items()}
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Dropdown(
choices=model_choices,
value=model_choices[0],
label="Model",
info="Select the AI model to use",
allow_custom_value=False
),
gr.Slider(minimum=1, maximum=8192, value=4096, step=1, label="Max tokens"),
gr.Slider(minimum=0.05, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.9,
step=0.05,
label="Top-p",
),
gr.Slider(
minimum=0,
maximum=100,
value=40,
step=1,
label="Top-k",
),
gr.Slider(
minimum=0.0,
maximum=2.0,
value=1.0,
step=0.1,
label="Repetition penalty",
),
],
theme=gr.themes.Soft(
primary_hue="indigo",
secondary_hue="blue",
neutral_hue="gray",
font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]
).set(
body_background_fill_dark="#0f172a",
block_background_fill_dark="#0f172a",
block_border_width="1px",
block_title_background_fill_dark="#070d1b",
input_background_fill_dark="#0c1425",
button_secondary_background_fill_dark="#070d1b",
border_color_accent_dark="#21293b",
border_color_primary_dark="#21293b",
background_fill_secondary_dark="#0f172a",
color_accent_soft_dark="transparent"
),
css=css,
retry_btn="Retry",
undo_btn="Undo",
clear_btn="Clear",
submit_btn="Send",
description="🐬 Cognitive Computations: Multi-Model Chat Interface",
chatbot=gr.Chatbot(
scale=1,
show_copy_button=True,
likeable=True
)
)
if __name__ == "__main__":
demo.launch()