from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import torch

class IndonesianChatbot:
    def __init__(self):
        """Initialize Indonesian chatbot with multiple model options"""
        self.device = 0 if torch.cuda.is_available() else -1
        self.models = {}
        self.tokenizers = {}
        
    def load_model(self, model_type="bahasa_gpt"):
        """Load Indonesian chatbot model based on type"""
        
        if model_type == "bahasa_gpt" and "bahasa_gpt" not in self.models:
            # BahasaGPT - Best untuk Indonesian chat (7B parameters)
            model_name = "Bahasalab/BahasaGpt-chat"
            self.tokenizers["bahasa_gpt"] = AutoTokenizer.from_pretrained(model_name)
            self.models["bahasa_gpt"] = AutoModelForCausalLM.from_pretrained(
                model_name,
                torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
                device_map="auto" if torch.cuda.is_available() else None
            )
            
        elif model_type == "indo_gpt" and "indo_gpt" not in self.models:
            # IndoGPT - Alternative bagus
            model_name = "indolem/indobart-v2"
            self.models["indo_gpt"] = pipeline(
                "text-generation",
                model=model_name,
                device=self.device,
                torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
            )
            
        elif model_type == "sea_llm" and "sea_llm" not in self.models:
            # SeaLLM - Multi-language termasuk Indonesian
            model_name = "SeaLLMs/SeaLLM-7B-v2-Chat"
            self.tokenizers["sea_llm"] = AutoTokenizer.from_pretrained(model_name)
            self.models["sea_llm"] = AutoModelForCausalLM.from_pretrained(
                model_name,
                torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
                device_map="auto" if torch.cuda.is_available() else None
            )
            
        elif model_type == "gemma_id" and "gemma_id" not in self.models:
            # Gemma fine-tuned untuk Indonesian
            model_name = "google/gemma-2b-it"  # Lightweight option
            self.models["gemma_id"] = pipeline(
                "text-generation",
                model=model_name,
                device=self.device,
                torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
            )

    def chat_with_bahasa_gpt(self, message, history=None, max_tokens=512, temperature=0.7):
        """Chat using BahasaGPT model"""
        if "bahasa_gpt" not in self.models:
            self.load_model("bahasa_gpt")
            
        tokenizer = self.tokenizers["bahasa_gpt"]
        model = self.models["bahasa_gpt"]
        
        # Format conversation history
        conversation = ""
        if history:
            for turn in history:
                role = turn.get("role", "user")
                content = turn.get("content", "")
                if role == "user":
                    conversation += f"Human: {content}\n"
                elif role == "assistant":
                    conversation += f"Assistant: {content}\n"
        
        conversation += f"Human: {message}\nAssistant:"
        
        # Tokenize and generate
        inputs = tokenizer.encode(conversation, return_tensors="pt")
        if torch.cuda.is_available():
            inputs = inputs.to("cuda")
            
        with torch.no_grad():
            outputs = model.generate(
                inputs,
                max_new_tokens=max_tokens,
                temperature=temperature,
                do_sample=True,
                top_p=0.95,
                pad_token_id=tokenizer.eos_token_id
            )
        
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        # Extract only the new assistant response
        response = response.replace(conversation, "").strip()
        
        return response

    def chat_with_sea_llm(self, message, history=None, max_tokens=512, temperature=0.7):
        """Chat using SeaLLM model"""
        if "sea_llm" not in self.models:
            self.load_model("sea_llm")
            
        tokenizer = self.tokenizers["sea_llm"]
        model = self.models["sea_llm"]
        
        # Format prompt for SeaLLM
        system_message = "Kamu adalah asisten AI yang membantu dalam bahasa Indonesia."
        
        conversation = f"<|system|>\n{system_message}\n"
        if history:
            for turn in history:
                role = turn.get("role", "user")
                content = turn.get("content", "")
                if role == "user":
                    conversation += f"<|user|>\n{content}\n"
                elif role == "assistant":
                    conversation += f"<|assistant|>\n{content}\n"
        
        conversation += f"<|user|>\n{message}\n<|assistant|>\n"
        
        inputs = tokenizer.encode(conversation, return_tensors="pt")
        if torch.cuda.is_available():
            inputs = inputs.to("cuda")
            
        with torch.no_grad():
            outputs = model.generate(
                inputs,
                max_new_tokens=max_tokens,
                temperature=temperature,
                do_sample=True,
                top_p=0.95,
                eos_token_id=tokenizer.eos_token_id
            )
        
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        response = response.replace(conversation, "").strip()
        
        return response

    def chat_with_pipeline(self, message, model_type="gemma_id", max_tokens=512, temperature=0.7):
        """Chat using pipeline models"""
        if model_type not in self.models:
            self.load_model(model_type)
            
        pipeline_model = self.models[model_type]
        
        prompt = f"Pertanyaan: {message}\nJawaban:"
        
        result = pipeline_model(
            prompt,
            max_new_tokens=max_tokens,
            temperature=temperature,
            do_sample=True,
            top_p=0.95,
            truncation=True
        )
        
        response = result[0]['generated_text'].replace(prompt, "").strip()
        return response

# Global chatbot instance
chatbot = IndonesianChatbot()

def chat_indonesian(message, history=None, system_message="", max_tokens=512, temperature=0.7, model_type="bahasa_gpt"):
    """
    Main chat function for Indonesian chatbot
    
    Args:
        message (str): User message
        history (list): Conversation history
        system_message (str): System prompt
        max_tokens (int): Maximum tokens to generate
        temperature (float): Temperature for generation
        model_type (str): "bahasa_gpt", "sea_llm", "indo_gpt", "gemma_id"
    """
    try:
        if model_type == "bahasa_gpt":
            return chatbot.chat_with_bahasa_gpt(message, history, max_tokens, temperature)
        elif model_type == "sea_llm":
            return chatbot.chat_with_sea_llm(message, history, max_tokens, temperature)
        elif model_type in ["indo_gpt", "gemma_id"]:
            return chatbot.chat_with_pipeline(message, model_type, max_tokens, temperature)
        else:
            return chatbot.chat_with_bahasa_gpt(message, history, max_tokens, temperature)
            
    except Exception as e:
        print(f"Chat error with {model_type}: {e}")
        return f"Maaf, terjadi kesalahan: {str(e)}"

# Wrapper untuk kompatibilitas
def chat_simple(message):
    """Simple wrapper for quick testing"""
    return chat_indonesian(message, model_type="bahasa_gpt")