import gradio as gr
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import torch

class LLMModule:
    def __init__(self):
        self.model_options = {
            "TinyLlama": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
            "Phi-2": "microsoft/phi-2",
            "Qwen 0.5B": "Qwen/Qwen2.5-0.5B-Instruct"
        }
        self.current_model = None
        self.pipe = None
        self.chat_history = []

    def load_model(self, model_name):
        """Load LLM model"""
        try:
            model_id = self.model_options[model_name]
            device = "cuda" if torch.cuda.is_available() else "cpu"

            self.pipe = pipeline(
                "text-generation",
                model=model_id,
                device=device,
                torch_dtype=torch.float16 if device == "cuda" else torch.float32
            )
            self.current_model = model_name
            self.chat_history = []
            return f"✓ Loaded {model_name} on {device}"
        except Exception as e:
            return f"✗ Error loading model: {str(e)}"

    def generate_response(self, message, max_tokens, temperature):
        """Generate LLM response"""
        if self.pipe is None:
            return "⚠ Please load a model first", []

        if not message.strip():
            return "⚠ Please enter a message", self.chat_history

        try:
            # Add user message to history
            self.chat_history.append({"role": "user", "content": message})

            # Generate response
            response = self.pipe(
                message,
                max_new_tokens=int(max_tokens),
                temperature=float(temperature),
                do_sample=True,
                top_p=0.9
            )

            assistant_message = response[0]["generated_text"]

            # Clean up if the model repeats the input
            if assistant_message.startswith(message):
                assistant_message = assistant_message[len(message):].strip()

            # Add assistant response to history
            self.chat_history.append({"role": "assistant", "content": assistant_message})

            # Format for chatbot display
            chat_display = [(h["content"], self.chat_history[i+1]["content"])
                          for i, h in enumerate(self.chat_history[::2])
                          if i*2+1 < len(self.chat_history)]

            return "", chat_display
        except Exception as e:
            return f"✗ Error generating response: {str(e)}", self.chat_history

    def clear_history(self):
        """Clear chat history"""
        self.chat_history = []
        return [], ""

    def create_interface(self):
        """Create Gradio interface for LLM testing"""
        with gr.Column() as interface:
            gr.Markdown("## 🤖 LLM Testing")

            with gr.Row():
                model_selector = gr.Dropdown(
                    choices=list(self.model_options.keys()),
                    value="Qwen 0.5B",
                    label="Select LLM Model"
                )
                load_btn = gr.Button("Load Model", variant="primary")

            status = gr.Textbox(label="Status", interactive=False)

            gr.Markdown("### Chat Interface")
            chatbot = gr.Chatbot(label="Conversation", height=400)

            with gr.Row():
                message_input = gr.Textbox(
                    label="Message",
                    placeholder="Type your message...",
                    scale=4
                )
                send_btn = gr.Button("Send", variant="secondary", scale=1)

            with gr.Row():
                max_tokens = gr.Slider(
                    minimum=50,
                    maximum=500,
                    value=150,
                    step=10,
                    label="Max Tokens"
                )
                temperature = gr.Slider(
                    minimum=0.1,
                    maximum=1.5,
                    value=0.7,
                    step=0.1,
                    label="Temperature"
                )

            clear_btn = gr.Button("Clear Chat", variant="stop")

            load_btn.click(
                fn=self.load_model,
                inputs=[model_selector],
                outputs=[status]
            )

            send_btn.click(
                fn=self.generate_response,
                inputs=[message_input, max_tokens, temperature],
                outputs=[message_input, chatbot]
            )

            message_input.submit(
                fn=self.generate_response,
                inputs=[message_input, max_tokens, temperature],
                outputs=[message_input, chatbot]
            )

            clear_btn.click(
                fn=self.clear_history,
                outputs=[chatbot, message_input]
            )

        return interface