import gradio as gr
import torch
from PIL import Image
import json
import os
from transformers import AutoProcessor, AutoModelForImageTextToText
from typing import List, Dict, Any
import logging
import spaces

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Model configuration
MODEL_ID = "Tonic/l-operator"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# Get Hugging Face token from environment variable (Spaces secrets)
import os
HF_TOKEN = os.getenv("HF_TOKEN")
if not HF_TOKEN:
    logger.warning("HF_TOKEN not found in environment variables. Model access may be restricted.")
    logger.warning("Please set HF_TOKEN in your environment variables or Spaces secrets.")

class LOperatorDemo:
    def __init__(self):
        self.model = None
        self.processor = None
        self.is_loaded = False
        
    def load_model(self):
        """Load the L-Operator model and processor with timeout handling"""
        try:
            import time
            start_time = time.time()
            logger.info(f"Loading model {MODEL_ID} on device {DEVICE}")

            # Check if token is available
            if not HF_TOKEN:
                return "❌ HF_TOKEN not found. Please set HF_TOKEN in Spaces secrets."

            # Load model with progress logging
            logger.info("Downloading and loading model weights...")
            self.model = AutoModelForImageTextToText.from_pretrained(
                MODEL_ID,
                device_map="auto",
                torch_dtype=torch.bfloat16 if DEVICE == "cuda" else torch.float32,
                trust_remote_code=True
            )

            # Load processor
            logger.info("Loading processor...")
            self.processor = AutoProcessor.from_pretrained(
                MODEL_ID,
                trust_remote_code=True
            )

            if DEVICE == "cpu":
                self.model = self.model.to(DEVICE)

            self.is_loaded = True
            load_time = time.time() - start_time
            logger.info(".1f")
            return ".1f"

        except Exception as e:
            logger.error(f"Error loading model: {str(e)}")
            return f"❌ Error loading model: {str(e)} - This may be a custom model requiring special handling"
    
    @spaces.GPU(duration=120)  # 2 minutes for action generation
    def generate_action(self, image: Image.Image, goal: str, instruction: str) -> str:
        """Generate action based on image and text inputs"""
        if not self.is_loaded:
            return "❌ Model not loaded. Please load the model first."
        
        try:
            # Convert image to RGB if needed
            if image.mode != "RGB":
                image = image.convert("RGB")
            
            # Build conversation
            conversation = [
                {
                    "role": "system",
                    "content": [
                        {"type": "text", "text": "You are a helpful multimodal assistant by Liquid AI."}
                    ]
                },
                {
                    "role": "user",
                    "content": [
                        {"type": "image", "image": image},
                        {"type": "text", "text": f"Goal: {goal}\nStep: {instruction}\nRespond with a JSON action containing relevant keys (e.g., action_type, x, y, text, app_name, direction)."}
                    ]
                }
            ]
            
            # Process inputs
            inputs = self.processor.apply_chat_template(
                conversation,
                add_generation_prompt=True,
                return_tensors="pt"
            ).to(self.model.device)
            
            # Generate response
            with torch.no_grad():
                outputs = self.model.generate(
                    inputs,
                    max_new_tokens=128,
                    do_sample=True,
                    temperature=0.7,
                    top_p=0.9
                )
            
            response = self.processor.tokenizer.decode(
                outputs[0][inputs.shape[1]:], 
                skip_special_tokens=True
            )
            
            # Try to parse as JSON for better formatting
            try:
                parsed_response = json.loads(response)
                return json.dumps(parsed_response, indent=2)
            except:
                return response
                
        except Exception as e:
            logger.error(f"Error generating action: {str(e)}")
            return f"❌ Error generating action: {str(e)}"
    
    @spaces.GPU(duration=90)  # 1.5 minutes for chat responses
    def chat_with_model(self, message: str, history: List[Dict[str, str]], image: Image.Image = None) -> List[Dict[str, str]]:
        """Chat interface function for Gradio"""
        if not self.is_loaded:
            return history + [{"role": "user", "content": message}, {"role": "assistant", "content": "❌ Model not loaded. Please load the model first."}]
        
        if image is None:
            return history + [{"role": "user", "content": message}, {"role": "assistant", "content": "❌ Please upload an Android screenshot image."}]
        
        try:
            # Extract goal and instruction from message
            if "Goal:" in message and "Step:" in message:
                # Parse structured input
                lines = message.split('\n')
                goal = ""
                instruction = ""
                
                for line in lines:
                    if line.startswith("Goal:"):
                        goal = line.replace("Goal:", "").strip()
                    elif line.startswith("Step:"):
                        instruction = line.replace("Step:", "").strip()
                
                if not goal or not instruction:
                    return history + [{"role": "user", "content": message}, {"role": "assistant", "content": "❌ Please provide both Goal and Step in your message."}]
            else:
                # Treat as general instruction
                goal = "Complete the requested action"
                instruction = message
            
            # Generate action
            response = self.generate_action(image, goal, instruction)
            return history + [{"role": "user", "content": message}, {"role": "assistant", "content": response}]
            
        except Exception as e:
            logger.error(f"Error in chat: {str(e)}")
            return history + [{"role": "user", "content": message}, {"role": "assistant", "content": f"❌ Error: {str(e)}"}]

# Initialize demo
demo_instance = LOperatorDemo()

def load_model_with_timeout(timeout_seconds=600):  # 10 minutes timeout
    """Load model with timeout protection"""
    import signal
    import time

    def timeout_handler(signum, frame):
        raise TimeoutError("Model loading timed out")

    # Set up the signal handler for timeout
    old_handler = signal.signal(signal.SIGALRM, timeout_handler)
    signal.alarm(timeout_seconds)

    try:
        logger.info("Loading L-Operator model with timeout protection...")
        result = demo_instance.load_model()
        logger.info(f"Model loading result: {result}")
        return result
    except TimeoutError:
        logger.error("Model loading timed out - this may be due to network issues or large model size")
        return "❌ Model loading timed out. Please try again or check your internet connection."
    except Exception as e:
        logger.error(f"Error loading model: {str(e)}")
        return f"❌ Error loading model: {str(e)}"
    finally:
        # Restore the original signal handler
        signal.alarm(0)
        signal.signal(signal.SIGALRM, old_handler)

# Load example episodes (lazy loading to avoid startup timeout)
def load_example_episodes():
    """Load example episodes from the extracted data - simplified for fast startup"""
    examples = []

    try:
        # Load episode metadata quickly without PIL validation
        episodes_data = []
        episode_dirs = ["episode_13", "episode_53", "episode_73"]

        for episode_dir in episode_dirs:
            try:
                metadata_path = f"extracted_episodes_duckdb/{episode_dir}/metadata.json"
                with open(metadata_path, "r") as f:
                    metadata = json.load(f)
                    episodes_data.append(metadata)
            except Exception as e:
                logger.warning(f"Could not load metadata for {episode_dir}: {str(e)}")
                continue

        # Create examples with simple path checks (no PIL validation)
        for i, metadata in enumerate(episodes_data):
            episode_num = ["13", "53", "73"][i]
            image_path = f"extracted_episodes_duckdb/episode_{episode_num}/screenshots/screenshot_1.png"

            # Simple file existence check instead of PIL validation
            if os.path.exists(image_path):
                goal_text = metadata.get('goal', f'Episode {episode_num} example')
                examples.append([
                    image_path,
                    f"Episode {episode_num}: {goal_text[:50]}..."
                ])

    except Exception as e:
        logger.error(f"Error loading examples: {str(e)}")
        examples = []

    logger.info(f"Loaded {len(examples)} examples (without validation for faster startup)")
    return examples

# Create Gradio interface
def create_demo():
    """Create the Gradio demo interface"""
    
    with gr.Blocks(
        title="L-Operator: Android Device Control Demo",
        theme=gr.themes.Soft(),
        css="""
        .gradio-container {
            max-width: 1200px !important;
        }
        .chat-container {
            height: 600px;
        }
        """
    ) as demo:
        
        gr.Markdown("""
        # 🤖 L-Operator: Android Device Control Demo
        
        **Lightweight Multimodal Android Device Control Agent**
        
        This demo showcases the L-Operator model, a fine-tuned multimodal AI agent based on LiquidAI's LFM2-VL-1.6B model, 
        optimized for Android device control through visual understanding and action generation.
        
        ## 🚀 How to Use
        
        1. **Model Loading**: The L-Operator model loads automatically on startup
        2. **Upload Screenshot**: Upload an Android device screenshot
        3. **Provide Instructions**: Enter your goal and step instructions
        4. **Get Actions**: The model will generate JSON actions for Android device control
        
        ## 📋 Expected Output Format
        
        The model generates JSON actions in the following format:
        ```json
        {
          "action_type": "tap",
          "x": 540,
          "y": 1200,
          "text": "Settings",
          "app_name": "com.android.settings",
          "confidence": 0.92
        }
        ```
        
        ---
        """)
        
        with gr.Row():
            with gr.Column(scale=1):
                gr.Markdown("### 🤖 Model Status")
                model_status = gr.Textbox(
                    label="L-Operator Model",
                    value="🔄 Loading model on startup...",
                    interactive=False
                )
                
                gr.Markdown("### 📱 Input")
                image_input = gr.Image(
                    label="Android Screenshot",
                    type="pil",
                    height=400,
                    sources=["upload"]
                )
                
                gr.Markdown("### 📝 Instructions")
                goal_input = gr.Textbox(
                    label="Goal",
                    placeholder="e.g., Open the Settings app and navigate to Display settings",
                    lines=2
                )
                
                step_input = gr.Textbox(
                    label="Step Instruction",
                    placeholder="e.g., Tap on the Settings app icon on the home screen",
                    lines=2
                )
                
                generate_btn = gr.Button("🎯 Generate Action", variant="secondary")
                
            with gr.Column(scale=2):
                gr.Markdown("### 💬 Chat Interface")
                chat_interface = gr.ChatInterface(
                    fn=demo_instance.chat_with_model,
                    additional_inputs=[image_input],
                    title="L-Operator Chat",
                    description="Chat with L-Operator using screenshots and text instructions",
                    examples=load_example_episodes(),
                    type="messages",
                    cache_examples=False 
                )
                
                gr.Markdown("### 🎯 Action Output")
                action_output = gr.JSON(
                    label="Generated Action",
                    value={},
                    height=200
                )
        
        # Event handlers
        def on_generate_action(image, goal, step):
            if not image:
                return {"error": "Please upload an image"}
            
            if not goal or not step:
                return {"error": "Please provide both goal and step"}
            
            response = demo_instance.generate_action(image, goal, step)
            
            try:
                # Try to parse as JSON
                parsed = json.loads(response)
                return parsed
            except:
                return {"raw_response": response}
        
        # Update model status on page load (with timeout-protected model loading)
        def update_model_status():
            if not demo_instance.is_loaded:
                logger.info("Loading model on Gradio startup with timeout protection...")
                result = load_model_with_timeout(timeout_seconds=900)  # 15 minutes for Spaces
                logger.info(f"Model loading result: {result}")
                return result

            if demo_instance.is_loaded:
                return "✅ L-Operator model loaded and ready!"
            else:
                return "❌ Model failed to load. Please check logs."

        generate_btn.click(
            fn=on_generate_action,
            inputs=[image_input, goal_input, step_input],
            outputs=action_output
        )

        # Load model and update status on page load
        demo.load(
            fn=update_model_status,
            outputs=model_status
        )
        
        # Update chat interface when image changes
        def update_chat_image(image):
            return image
        
        image_input.change(
            fn=update_chat_image,
            inputs=[image_input],
            outputs=[chat_interface.chatbot]
        )
        
        gr.Markdown("""
        ---
        
        ## 📊 Model Details
        
        | Property | Value |
        |----------|-------|
        | **Base Model** | LiquidAI/LFM2-VL-1.6B |
        | **Architecture** | LFM2-VL (1.6B parameters) |
        | **Fine-tuning** | LoRA (Low-Rank Adaptation) |
        | **Training Data** | Android control episodes with screenshots and actions |
        
        ## 🎯 Use Cases
        
        - **Mobile App Testing**: Automated UI testing for Android applications
        - **Accessibility Applications**: Voice-controlled device navigation
        - **Remote Support**: Remote device troubleshooting
        - **Development Workflows**: UI/UX testing automation

                            ---
        
        **Made with ❤️ by Tonic** | [Model on Hugging Face](https://huggingface.co/Tonic/l-android-control) 
        """)
    
    return demo

# Create and launch the demo with optimized settings
if __name__ == "__main__":
    try:
        logger.info("Creating Gradio demo interface...")
        demo = create_demo()

        logger.info("Launching Gradio server...")
        demo.launch(
            server_name="0.0.0.0",
            server_port=7860,
            share=False,
            debug=False,  # Disable debug to reduce startup time
            show_error=True,
            ssr_mode=False,
            max_threads=2,  # Limit threads to prevent resource exhaustion
            quiet=True  # Reduce startup logging noise
        )
    except Exception as e:
        logger.error(f"Failed to launch Gradio app: {str(e)}")
        raise