| import spaces |
| import json |
| import subprocess |
| from llama_cpp import Llama |
| from llama_cpp_agent import LlamaCppAgent |
| from llama_cpp_agent import MessagesFormatterType |
| from llama_cpp_agent.providers import LlamaCppPythonProvider |
| from llama_cpp_agent.chat_history import BasicChatHistory |
| from llama_cpp_agent.chat_history.messages import Roles |
| import gradio as gr |
| from huggingface_hub import hf_hub_download |
| from ui import css |
| llm = None |
| llm_model = None |
|
|
| |
| MODELS = { |
| "WhiteRabbitNeo 2.5 Qwen 2.5 Coder 7B": { |
| "filename": "WhiteRabbitNeo-2.5-Qwen-2.5-Coder-7B-OBLITERATED-i1-Q5_K_M.gguf", |
| "repo_id": "mradermacher/WhiteRabbitNeo-2.5-Qwen-2.5-Coder-7B-OBLITERATED-i1-GGUF", |
| "system_prompt": "You are WhiteRabbitNeo, an advanced AI coding assistant with deep expertise in software development, security analysis, and problem-solving. You provide detailed, accurate responses with proper code examples and thorough explanations.", |
| "formatter": "CHATML", |
| "description": "Advanced coding assistant with security focus" |
| }, |
| "Gemma 3 Prompt Coder 270m": { |
| "filename": "Gemma-3-Prompt-Coder-270m-it-Uncensored-Q8_0.gguf", |
| "repo_id": "mradermacher/Gemma-3-Prompt-Coder-270m-it-Uncensored-GGUF", |
| "system_prompt": "You are Gemma 3 Prompt Coder, a lightweight but powerful AI assistant specialized in coding and technical tasks. Provide clear, accurate responses with well-formatted code examples.", |
| "formatter": "CHATML", |
| "description": "Ultra-fast lightweight coding specialist" |
| }, |
| "DeepSeek V4 Pro": { |
| "filename": "DeepSeek-V4-Pro-Q5_K_M.gguf", |
| "repo_id": "unsloth/DeepSeek-V4-Pro-GGUF", |
| "system_prompt": "You are DeepSeek V4 Pro, an advanced AI assistant with extensive knowledge across multiple domains. Provide detailed, accurate, and well-reasoned responses with proper analysis and explanations.", |
| "formatter": "CHATML", |
| "description": "Advanced multimodal reasoning model" |
| }, |
| "Qwen 3.6 35B A3B Uncensored": { |
| "filename": "Qwen3.6-35B-A3B-Uncensored-Q5_K_M.gguf", |
| "repo_id": "HauhauCS/Qwen3.6-35B-A3B-Uncensored-HauhauCS-Aggressive-GGUF", |
| "system_prompt": "You are Qwen 3.6, an advanced AI assistant with aggressive reasoning capabilities and extensive knowledge. Provide direct, detailed responses with thorough analysis and strong reasoning.", |
| "formatter": "CHATML", |
| "description": "Large model with aggressive reasoning" |
| } |
| } |
|
|
| |
| def download_models(): |
| """Download all configured models""" |
| for model_name, config in MODELS.items(): |
| try: |
| print(f"Downloading {model_name}...") |
| hf_hub_download( |
| repo_id=config["repo_id"], |
| filename=config["filename"], |
| local_dir="./models" |
| ) |
| print(f"✓ {model_name} downloaded successfully") |
| except Exception as e: |
| print(f"✗ Failed to download {model_name}: {e}") |
|
|
| |
| |
|
|
| @spaces.GPU(duration=120) |
| def respond( |
| message, |
| history: list[tuple[str, str]], |
| model_name, |
| max_tokens, |
| temperature, |
| top_p, |
| top_k, |
| repeat_penalty, |
| ): |
| global llm |
| global llm_model |
|
|
| if model_name not in MODELS: |
| yield f"Error: Model '{model_name}' not found in configuration." |
| return |
|
|
| model_config = MODELS[model_name] |
| model_filename = model_config["filename"] |
| system_prompt = model_config["system_prompt"] |
|
|
| |
| if llm is None or llm_model != model_filename: |
| try: |
| llm = Llama( |
| model_path=f"models/{model_filename}", |
| flash_attn=True, |
| n_gpu_layers=81, |
| n_batch=1024, |
| n_ctx=8192, |
| verbose=False |
| ) |
| llm_model = model_filename |
| except Exception as e: |
| yield f"Error loading model: {str(e)}" |
| return |
| |
| provider = LlamaCppPythonProvider(llm) |
|
|
| |
| formatter_map = { |
| "CHATML": MessagesFormatterType.CHATML, |
| "MLCODESTRAL": MessagesFormatterType.MLCODESTRAL, |
| "VICUNA": MessagesFormatterType.VICUNA, |
| } |
| |
| formatter_type = formatter_map.get(model_config.get("formatter", "CHATML"), MessagesFormatterType.CHATML) |
|
|
| agent = LlamaCppAgent( |
| provider, |
| system_prompt=system_prompt, |
| predefined_messages_formatter_type=formatter_type, |
| debug_output=False |
| ) |
| |
| settings = provider.get_provider_default_settings() |
| settings.temperature = temperature |
| settings.top_k = top_k |
| settings.top_p = top_p |
| settings.max_tokens = max_tokens |
| settings.repeat_penalty = repeat_penalty |
| settings.stream = True |
|
|
| messages = BasicChatHistory() |
|
|
| for msn in history: |
| user = { |
| 'role': Roles.user, |
| 'content': msn[0] |
| } |
| assistant = { |
| 'role': Roles.assistant, |
| 'content': msn[1] |
| } |
| messages.add_message(user) |
| messages.add_message(assistant) |
| |
| try: |
| stream = agent.get_chat_response( |
| message, |
| llm_sampling_settings=settings, |
| chat_history=messages, |
| returns_streaming_generator=True, |
| print_output=False |
| ) |
| |
| outputs = "" |
| for output in stream: |
| outputs += output |
| yield outputs |
| except Exception as e: |
| yield f"Error during generation: {str(e)}" |
|
|
| |
| model_choices = [f"{name} - {config['description']}" for name, config in MODELS.items()] |
| model_value_map = {f"{name} - {config['description']}": name for name, config in MODELS.items()} |
|
|
| demo = gr.ChatInterface( |
| respond, |
| additional_inputs=[ |
| gr.Dropdown( |
| choices=model_choices, |
| value=model_choices[0], |
| label="Model", |
| info="Select the AI model to use", |
| allow_custom_value=False |
| ), |
| gr.Slider(minimum=1, maximum=8192, value=4096, step=1, label="Max tokens"), |
| gr.Slider(minimum=0.05, maximum=4.0, value=0.7, step=0.1, label="Temperature"), |
| gr.Slider( |
| minimum=0.1, |
| maximum=1.0, |
| value=0.9, |
| step=0.05, |
| label="Top-p", |
| ), |
| gr.Slider( |
| minimum=0, |
| maximum=100, |
| value=40, |
| step=1, |
| label="Top-k", |
| ), |
| gr.Slider( |
| minimum=0.0, |
| maximum=2.0, |
| value=1.0, |
| step=0.1, |
| label="Repetition penalty", |
| ), |
| ], |
| theme=gr.themes.Soft( |
| primary_hue="indigo", |
| secondary_hue="blue", |
| neutral_hue="gray", |
| font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"] |
| ).set( |
| body_background_fill_dark="#0f172a", |
| block_background_fill_dark="#0f172a", |
| block_border_width="1px", |
| block_title_background_fill_dark="#070d1b", |
| input_background_fill_dark="#0c1425", |
| button_secondary_background_fill_dark="#070d1b", |
| border_color_accent_dark="#21293b", |
| border_color_primary_dark="#21293b", |
| background_fill_secondary_dark="#0f172a", |
| color_accent_soft_dark="transparent" |
| ), |
| css=css, |
| retry_btn="Retry", |
| undo_btn="Undo", |
| clear_btn="Clear", |
| submit_btn="Send", |
| description="🐬 Cognitive Computations: Multi-Model Chat Interface", |
| chatbot=gr.Chatbot( |
| scale=1, |
| show_copy_button=True, |
| likeable=True |
| ) |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch() |
|
|