import os
import torch
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
from huggingface_hub import login

# Log in to Hugging Face Hub
login(token=os.environ.get("HF_AUTH_TOKEN", "YOUR_HUGGING_FACE_TOKEN"))

# Load the model and tokenizer
def load_model():
    print("Loading model...")
    base_model_name = "mistralai/Mistral-7B-v0.1"
    adapter_model_name = "Psalms23Wave/Alkebulan-AI"
    
    # Use CUDA if available, otherwise CPU
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"Using device: {device}")
    
    # Free up memory
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    
    # Load the base model
    print(f"Loading base model: {base_model_name}")
    model = AutoModelForCausalLM.from_pretrained(
        base_model_name,
        token=os.environ.get("HF_AUTH_TOKEN", "YOUR_HUGGING_FACE_TOKEN"),
        torch_dtype=torch.float16 if device == "cuda" else torch.float32,
        low_cpu_mem_usage=True,
        device_map="auto" if device == "cuda" else None
    )
    
    # Load adapter weights
    try:
        print(f"Loading adapter: {adapter_model_name}")
        model = PeftModel.from_pretrained(
            model,
            adapter_model_name,
            token=os.environ.get("HF_AUTH_TOKEN", "YOUR_HUGGING_FACE_TOKEN")
        )
        print("Adapter loaded successfully")
    except Exception as e:
        print(f"Error loading adapter: {str(e)}")
        print("Continuing with base model only")
    
    # Load tokenizer
    print("Loading tokenizer...")
    tokenizer = AutoTokenizer.from_pretrained(
        adapter_model_name,
        token=os.environ.get("HF_AUTH_TOKEN", "YOUR_HUGGING_FACE_TOKEN")
    )
    
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    
    print("Model and tokenizer loaded successfully!")
    return model, tokenizer

# Define the chatbot function
def chat(message, history, language, max_tokens=100, temperature=0.7, model=None, tokenizer=None):
    """
    Generates a response from the chatbot
    """
    try:
        # Build prompt
        prompt = f"Language: {language}\nUser: {message}\nBot:"
        
        # Tokenize with length limitation
        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
        inputs = {k: v.to(model.device) for k, v in inputs.items()}
        
        # Generate response
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=max_tokens,
                do_sample=True,
                temperature=temperature,
                top_p=0.95,
                pad_token_id=tokenizer.eos_token_id
            )
        
        # Process response
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        response = response.split("Bot:")[1].strip() if "Bot:" in response else response[len(prompt):].strip()
        
        # Clean up memory
        del inputs, outputs
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        
        return response
    except Exception as e:
        return f"Sorry, I encountered an error: {str(e)}. Please try again."

# Define the translation function
def translate(text, target_language, model, tokenizer):
    """
    Translates text to the target language.
    """
    try:
        prompt = f"Translate this to {target_language}: \"{text}\"\nTranslation:"
        
        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
        inputs = {k: v.to(model.device) for k, v in inputs.items()}
        
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=200,
                do_sample=True,
                temperature=0.3,
                top_p=0.95,
                pad_token_id=tokenizer.eos_token_id
            )
        
        translation = tokenizer.decode(outputs[0], skip_special_tokens=True)
        translation = translation.split("Translation:")[1].strip() if "Translation:" in translation else translation.strip()
        
        del inputs, outputs
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        
        return translation
    except Exception as e:
        return f"Sorry, I encountered an error: {str(e)}. Please try again."

# Gradio interface with lazy loading
def create_interface():
    # Load model only when needed
    model_loaded = False
    model, tokenizer = None, None
    
    def load_model_if_needed():
        nonlocal model, tokenizer, model_loaded
        if not model_loaded:
            try:
                model, tokenizer = load_model()
                model_loaded = True
            except Exception as e:
                print(f"Error loading model: {str(e)}")
                raise e
        return model, tokenizer
    
    # Main interface function
    def chatbot_interface(message, history, language, max_tokens, temperature):
        try:
            # Load model on first use
            model, tokenizer = load_model_if_needed()
            
            # Process the message
            if "translate" in message.lower() and "to" in message.lower():
                # Handle translation
                parts = message.lower().split("translate")[1].split("to")
                if len(parts) >= 2:
                    text_to_translate = parts[0].strip()
                    target_lang = parts[1].strip()
                    translation = translate(text_to_translate, target_lang, model, tokenizer)
                    response = f"Translation to {target_lang}: {translation}"
                else:
                    response = "Please specify what to translate and target language, e.g., 'translate Hello to Luganda'"
            else:
                # Regular chat
                response = chat(message, history, language, max_tokens, temperature, model, tokenizer)
            
            # Add to history
            updated_history = history.copy()
            updated_history.append((message, response))
            return updated_history, updated_history
        except Exception as e:
            error_message = f"Sorry, I encountered an error: {str(e)}. Please try again."
            updated_history = history.copy()
            updated_history.append((message, error_message))
            return updated_history, updated_history
    
    # Create Gradio interface
    with gr.Blocks(title="Alkebulan AI Chatbot") as demo:
        gr.Markdown("# Alkebulan AI Chatbot")
        gr.Markdown("Chat in Luganda, Iteso, Runyankore, Acholi, or Ateso!")
        
        chat_history = gr.State([])
        
        with gr.Row():
            with gr.Column(scale=1):
                language = gr.Dropdown(
                    label="Select Language",
                    choices=["Luganda", "Iteso", "Runyankore", "Acholi", "Ateso", "English"],
                    value="Luganda"
                )
                
                with gr.Accordion("Advanced Settings", open=False):
                    max_tokens_slider = gr.Slider(minimum=50, maximum=200, value=100, step=10, label="Max Tokens")
                    temperature_slider = gr.Slider(minimum=0.1, maximum=1.5, value=0.7, step=0.1, label="Temperature")
                
                gr.Markdown("""
                ## Examples:
                - Basic chat: Just type a message in the selected language
                - Translate: "translate How are you to Luganda"
                """)
            
            with gr.Column(scale=3):
                chatbot = gr.Chatbot(label="Chat History", height=400)
                
                with gr.Row():
                    message = gr.Textbox(
                        label="Your Message", 
                        placeholder="Type your message here...",
                        scale=8
                    )
                    submit = gr.Button("Send", scale=1)
                    clear = gr.Button("Clear", scale=1)
        
        # Set up interactions
        submit.click(
            chatbot_interface,
            inputs=[message, chat_history, language, max_tokens_slider, temperature_slider],
            outputs=[chatbot, chat_history]
        )
        message.submit(
            chatbot_interface,
            inputs=[message, chat_history, language, max_tokens_slider, temperature_slider],
            outputs=[chatbot, chat_history]
        )
        clear.click(lambda: [], None, chatbot, queue=False)
        clear.click(lambda: [], None, chat_history, queue=False)
    
    return demo

# Launch the interface
if __name__ == "__main__":
    demo = create_interface()
    demo.launch(share=True)  # Add share=True to get a public URL