import spaces
import json
import subprocess
from llama_cpp import Llama
from llama_cpp_agent import LlamaCppAgent
from llama_cpp_agent import MessagesFormatterType
from llama_cpp_agent.providers import LlamaCppPythonProvider
from llama_cpp_agent.chat_history import BasicChatHistory
from llama_cpp_agent.chat_history.messages import Roles
import gradio as gr
from huggingface_hub import hf_hub_download
from ui import css
llm = None
llm_model = None

# Comprehensive model configurations
MODELS = {
    "WhiteRabbitNeo 2.5 Qwen 2.5 Coder 7B": {
        "filename": "WhiteRabbitNeo-2.5-Qwen-2.5-Coder-7B-OBLITERATED-i1-Q5_K_M.gguf",
        "repo_id": "mradermacher/WhiteRabbitNeo-2.5-Qwen-2.5-Coder-7B-OBLITERATED-i1-GGUF",
        "system_prompt": "You are WhiteRabbitNeo, an advanced AI coding assistant with deep expertise in software development, security analysis, and problem-solving. You provide detailed, accurate responses with proper code examples and thorough explanations.",
        "formatter": "CHATML",
        "description": "Advanced coding assistant with security focus"
    },
    "Gemma 3 Prompt Coder 270m": {
        "filename": "Gemma-3-Prompt-Coder-270m-it-Uncensored-Q8_0.gguf",
        "repo_id": "mradermacher/Gemma-3-Prompt-Coder-270m-it-Uncensored-GGUF",
        "system_prompt": "You are Gemma 3 Prompt Coder, a lightweight but powerful AI assistant specialized in coding and technical tasks. Provide clear, accurate responses with well-formatted code examples.",
        "formatter": "CHATML",
        "description": "Ultra-fast lightweight coding specialist"
    },
    "DeepSeek V4 Pro": {
        "filename": "DeepSeek-V4-Pro-Q5_K_M.gguf",
        "repo_id": "unsloth/DeepSeek-V4-Pro-GGUF",
        "system_prompt": "You are DeepSeek V4 Pro, an advanced AI assistant with extensive knowledge across multiple domains. Provide detailed, accurate, and well-reasoned responses with proper analysis and explanations.",
        "formatter": "CHATML",
        "description": "Advanced multimodal reasoning model"
    },
    "Qwen 3.6 35B A3B Uncensored": {
        "filename": "Qwen3.6-35B-A3B-Uncensored-Q5_K_M.gguf",
        "repo_id": "HauhauCS/Qwen3.6-35B-A3B-Uncensored-HauhauCS-Aggressive-GGUF",
        "system_prompt": "You are Qwen 3.6, an advanced AI assistant with aggressive reasoning capabilities and extensive knowledge. Provide direct, detailed responses with thorough analysis and strong reasoning.",
        "formatter": "CHATML",
        "description": "Large model with aggressive reasoning"
    }
}

# Download models on startup
def download_models():
    """Download all configured models"""
    for model_name, config in MODELS.items():
        try:
            print(f"Downloading {model_name}...")
            hf_hub_download(
                repo_id=config["repo_id"],
                filename=config["filename"],
                local_dir="./models"
            )
            print(f"✓ {model_name} downloaded successfully")
        except Exception as e:
            print(f"✗ Failed to download {model_name}: {e}")

# Download models (commented out - uncomment to enable auto-download)
# download_models()

@spaces.GPU(duration=120)
def respond(
    message,
    history: list[tuple[str, str]],
    model_name,
    max_tokens,
    temperature,
    top_p,
    top_k,
    repeat_penalty,
):
    global llm
    global llm_model

    if model_name not in MODELS:
        yield f"Error: Model '{model_name}' not found in configuration."
        return

    model_config = MODELS[model_name]
    model_filename = model_config["filename"]
    system_prompt = model_config["system_prompt"]

    # Load or reload model if needed
    if llm is None or llm_model != model_filename:
        try:
            llm = Llama(
                model_path=f"models/{model_filename}",
                flash_attn=True,
                n_gpu_layers=81,
                n_batch=1024,
                n_ctx=8192,
                verbose=False
            )
            llm_model = model_filename
        except Exception as e:
            yield f"Error loading model: {str(e)}"
            return
    
    provider = LlamaCppPythonProvider(llm)

    # Map formatter names to actual types
    formatter_map = {
        "CHATML": MessagesFormatterType.CHATML,
        "MLCODESTRAL": MessagesFormatterType.MLCODESTRAL,
        "VICUNA": MessagesFormatterType.VICUNA,
    }
    
    formatter_type = formatter_map.get(model_config.get("formatter", "CHATML"), MessagesFormatterType.CHATML)

    agent = LlamaCppAgent(
        provider,
        system_prompt=system_prompt,
        predefined_messages_formatter_type=formatter_type,
        debug_output=False
    )
    
    settings = provider.get_provider_default_settings()
    settings.temperature = temperature
    settings.top_k = top_k
    settings.top_p = top_p
    settings.max_tokens = max_tokens
    settings.repeat_penalty = repeat_penalty
    settings.stream = True

    messages = BasicChatHistory()

    for msn in history:
        user = {
            'role': Roles.user,
            'content': msn[0]
        }
        assistant = {
            'role': Roles.assistant,
            'content': msn[1]
        }
        messages.add_message(user)
        messages.add_message(assistant)
    
    try:
        stream = agent.get_chat_response(
            message, 
            llm_sampling_settings=settings, 
            chat_history=messages, 
            returns_streaming_generator=True, 
            print_output=False
        )
        
        outputs = ""
        for output in stream:
            outputs += output
            yield outputs
    except Exception as e:
        yield f"Error during generation: {str(e)}"

# Create model choices with descriptions
model_choices = [f"{name} - {config['description']}" for name, config in MODELS.items()]
model_value_map = {f"{name} - {config['description']}": name for name, config in MODELS.items()}

demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Dropdown(
            choices=model_choices,
            value=model_choices[0],
            label="Model",
            info="Select the AI model to use",
            allow_custom_value=False
        ),
        gr.Slider(minimum=1, maximum=8192, value=4096, step=1, label="Max tokens"),
        gr.Slider(minimum=0.05, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.9,
            step=0.05,
            label="Top-p",
        ),
        gr.Slider(
            minimum=0,
            maximum=100,
            value=40,
            step=1,
            label="Top-k",
        ),
        gr.Slider(
            minimum=0.0,
            maximum=2.0,
            value=1.0,
            step=0.1,
            label="Repetition penalty",
        ),
    ],
    theme=gr.themes.Soft(
        primary_hue="indigo", 
        secondary_hue="blue", 
        neutral_hue="gray",
        font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]
    ).set(
        body_background_fill_dark="#0f172a",
        block_background_fill_dark="#0f172a",
        block_border_width="1px",
        block_title_background_fill_dark="#070d1b",
        input_background_fill_dark="#0c1425",
        button_secondary_background_fill_dark="#070d1b",
        border_color_accent_dark="#21293b",
        border_color_primary_dark="#21293b",
        background_fill_secondary_dark="#0f172a",
        color_accent_soft_dark="transparent"
    ),
    css=css,
    retry_btn="Retry",
    undo_btn="Undo",
    clear_btn="Clear",
    submit_btn="Send",
    description="🐬 Cognitive Computations: Multi-Model Chat Interface",
    chatbot=gr.Chatbot(
        scale=1,
        show_copy_button=True,
        likeable=True
    )
)

if __name__ == "__main__":
    demo.launch()