import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import warnings
warnings.filterwarnings("ignore")

class MiMoChatBot:
    def __init__(self):
        self.model_name = "XiaomiMiMo/MiMo-V2-Flash"
        self.tokenizer = None
        self.model = None
        self.pipeline = None
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        
    def load_model(self):
        """Carrega o modelo e tokenizer"""
        try:
            print(f"Carregando modelo {self.model_name}...")
            self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
            self.model = AutoModelForCausalLM.from_pretrained(
                self.model_name,
                torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
                device_map="auto" if self.device == "cuda" else None,
                trust_remote_code=True
            )
            
            if self.device == "cpu":
                self.model = self.model.to(self.device)
            
            # Configura o pipeline
            self.pipeline = pipeline(
                "text-generation",
                model=self.model,
                tokenizer=self.tokenizer,
                max_new_tokens=512,
                temperature=0.7,
                top_p=0.95,
                do_sample=True,
                pad_token_id=self.tokenizer.eos_token_id
            )
            
            print("Modelo carregado com sucesso!")
            return True
        except Exception as e:
            print(f"Erro ao carregar o modelo: {e}")
            return False
    
    def generate_response(self, message, history):
        """Gera uma resposta baseada na mensagem e histórico"""
        if not self.pipeline:
            return "Modelo ainda está carregando. Por favor, aguarde..."
        
        try:
            # Constrói o contexto com histórico
            context = ""
            for user_msg, bot_msg in history[-3:]:  # Usa últimas 3 interações
                context += f"Usuário: {user_msg}\nAssistente: {bot_msg}\n"
            
            context += f"Usuário: {message}\nAssistente: "
            
            # Gera a resposta
            response = self.pipeline(
                context,
                max_new_tokens=512,
                temperature=0.7,
                do_sample=True,
                pad_token_id=self.tokenizer.eos_token_id,
                eos_token_id=self.tokenizer.eos_token_id,
            )
            
            # Extrai apenas a nova parte da resposta
            generated_text = response[0]['generated_text']
            new_response = generated_text[len(context):].strip()
            
            # Limpa a resposta se necessário
            if new_response.startswith("Assistente: "):
                new_response = new_response[len("Assistente: "):]
            
            return new_response if new_response else "Desculpe, não consegui gerar uma resposta."
            
        except Exception as e:
            return f"Erro ao gerar resposta: {str(e)}"

# Inicializa o chatbot
chatbot = MiMoChatBot()

def create_chatbot_app():
    """Cria a interface do aplicativo Gradio"""
    
    # Função principal do chat
    def chat_function(message, history):
        if not message.strip():
            return history, ""
        
        # Adiciona a mensagem do usuário ao histórico
        history.append({"role": "user", "content": message})
        
        # Gera a resposta
        response = chatbot.generate_response(message, [{"role": h["role"], "content": h["content"]} for h in history[:-1]])
        
        # Adiciona a resposta do assistente
        history.append({"role": "assistant", "content": response})
        
        return history, ""
    
    # Função para limpar o chat
    def clear_chat():
        return [], ""
    
    # Carrega o modelo em background
    model_status = gr.Textbox("Carregando modelo...", visible=False)
    
    # 🚨 Gradio 6: NO parameters in gr.Blocks() constructor!
    with gr.Blocks() as demo:
        # Header com branding
        gr.HTML("""
        <div style='text-align: center; margin-bottom: 20px;'>
            <h1>🤖 MiMo-V2-Flash Chat Assistant</h1>
            <p>Converse com o modelo de linguagem XiaomiMiMo/MiMo-V2-Flash</p>
            <p><a href='https://huggingface.co/spaces/akhaliq/anycoder' target='_blank' style='color: #007bff; text-decoration: none;'>Built with anycoder</a></p>
        </div>
        """)
        
        # Status do modelo
        with gr.Row():
            model_status = gr.Textbox(
                value="🔄 Carregando modelo...",
                label="Status",
                interactive=False,
                scale=2
            )
        
        # Interface de chat
        with gr.Column():
            chatbot_interface = gr.Chatbot(
                label="Conversa",
                height=500,
                show_copy_button=True,
                bubble_full_width=False,
                type="messages"
            )
            
            with gr.Row():
                msg_input = gr.Textbox(
                    label="Digite sua mensagem...",
                    placeholder="Olá! Como posso ajudar você hoje?",
                    scale=4,
                    container=False
                )
                
                with gr.Column(scale=1):
                    submit_btn = gr.Button("Enviar", variant="primary", size="sm")
                    clear_btn = gr.Button("Limpar", variant="secondary", size="sm")
        
        # Informações adicionais
        with gr.Accordion("ℹ️ Informações do Modelo", open=False):
            gr.Markdown("""
            ### Sobre o MiMo-V2-Flash
            
            - **Modelo**: XiaomiMiMo/MiMo-V2-Flash
            - **Tipo**: Modelo de linguagem causal
            - **Idioma**: Principalmente Chinês, com capacidade limitada em outros idiomas
            - **Uso**: Conversação, geração de texto, assistência virtual
            
            **Dicas de uso:**
            - Seja claro e específico em suas perguntas
            - O modelo funciona melhor com contextos mais curtos
            - Para melhores resultados, use idiomas que o modelo foi treinado
            """)
        
        # Configura eventos - 🚨 Gradio 6: Use api_visibility instead of api_name
        msg_input.submit(chat_function, [msg_input, chatbot_interface], [chatbot_interface, msg_input], api_visibility="public")
        submit_btn.click(chat_function, [msg_input, chatbot_interface], [chatbot_interface, msg_input], api_visibility="public")
        clear_btn.click(clear_chat, outputs=[chatbot_interface, msg_input], api_visibility="public")
        
        # Carrega o modelo após a interface ser criada
        demo.load(
            fn=lambda: "✅ Modelo carregado com sucesso!" if chatbot.load_model() else "❌ Erro ao carregar modelo",
            outputs=model_status
        )
    
    return demo

# Cria e lança o aplicativo
if __name__ == "__main__":
    app = create_chatbot_app()
    
    # 🚨 Gradio 6: ALL app parameters go in demo.launch()!
    # Create custom theme
    custom_theme = gr.themes.Soft(
        primary_hue="blue",
        secondary_hue="indigo",
        neutral_hue="slate",
        font=gr.themes.GoogleFont("Inter"),
        text_size="lg",
        spacing_size="lg",
        radius_size="md"
    ).set(
        button_primary_background_fill="*primary_600",
        button_primary_background_fill_hover="*primary_700",
        block_title_text_weight="600",
    )
    
    app.launch(
        theme=custom_theme,  # 🚨 Gradio 6: theme goes in launch()
        footer_links=[
            {"label": "Modelo no Hugging Face", "url": "https://huggingface.co/XiaomiMiMo/MiMo-V2-Flash"},
            {"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"}
        ],
        share=True
    )