import asyncio
import json
import re
import uuid
import time
import secrets
import base64
import mimetypes
from collections import defaultdict
from typing import Optional, Dict, List
from datetime import datetime, timezone, timedelta

import uvicorn

# nodriver for undetectable browser automation (replaces Camoufox)
try:
    import nodriver
    HAS_NODRIVER = True
except ImportError:
    HAS_NODRIVER = False
    print("=" * 60)
    print("❌ ERROR: nodriver not installed!")
    print("")
    print("   PROBLEM: nodriver is required for reCAPTCHA bypassing.")
    print("")
    print("   SOLUTION:")
    print("   1. Run: pip install nodriver")
    print("   2. Restart LMArenaBridge")
    print("=" * 60)

from fastapi import FastAPI, HTTPException, Depends, status, Form, Request, Response, Header
from starlette.responses import HTMLResponse, RedirectResponse, StreamingResponse
from fastapi.security import APIKeyHeader

import httpx

# curl_cffi for TLS fingerprint mimicking (bypasses Cloudflare JA3 detection)
try:
    from curl_cffi.requests import AsyncSession as CurlAsyncSession
    HAS_CURL_CFFI = True
except ImportError:
    HAS_CURL_CFFI = False
    CurlAsyncSession = None
    print("⚠️  curl_cffi not installed. Install with: pip install curl_cffi")
    print("   (Falling back to httpx - may trigger bot detection)")


# ============================================================
# CONFIGURATION
# ============================================================
# Set to True for detailed logging, False for minimal logging
DEBUG = True

# Port to run the server on
import os
PORT = int(os.environ.get("PORT", 7860))
HEADLESS = os.environ.get("HEADLESS", "false").lower() == "true"

# HTTP Status Codes
class HTTPStatus:
    # 1xx Informational
    CONTINUE = 100
    SWITCHING_PROTOCOLS = 101
    PROCESSING = 102
    EARLY_HINTS = 103
    
    # 2xx Success
    OK = 200
    CREATED = 201
    ACCEPTED = 202
    NON_AUTHORITATIVE_INFORMATION = 203
    NO_CONTENT = 204
    RESET_CONTENT = 205
    PARTIAL_CONTENT = 206
    MULTI_STATUS = 207
    
    # 3xx Redirection
    MULTIPLE_CHOICES = 300
    MOVED_PERMANENTLY = 301
    MOVED_TEMPORARILY = 302
    SEE_OTHER = 303
    NOT_MODIFIED = 304
    USE_PROXY = 305
    TEMPORARY_REDIRECT = 307
    PERMANENT_REDIRECT = 308
    
    # 4xx Client Errors
    BAD_REQUEST = 400
    UNAUTHORIZED = 401
    PAYMENT_REQUIRED = 402
    FORBIDDEN = 403
    NOT_FOUND = 404
    METHOD_NOT_ALLOWED = 405
    NOT_ACCEPTABLE = 406
    PROXY_AUTHENTICATION_REQUIRED = 407
    REQUEST_TIMEOUT = 408
    CONFLICT = 409
    GONE = 410
    LENGTH_REQUIRED = 411
    PRECONDITION_FAILED = 412
    REQUEST_TOO_LONG = 413
    REQUEST_URI_TOO_LONG = 414
    UNSUPPORTED_MEDIA_TYPE = 415
    REQUESTED_RANGE_NOT_SATISFIABLE = 416
    EXPECTATION_FAILED = 417
    IM_A_TEAPOT = 418
    INSUFFICIENT_SPACE_ON_RESOURCE = 419
    METHOD_FAILURE = 420
    MISDIRECTED_REQUEST = 421
    UNPROCESSABLE_ENTITY = 422
    LOCKED = 423
    FAILED_DEPENDENCY = 424
    UPGRADE_REQUIRED = 426
    PRECONDITION_REQUIRED = 428
    TOO_MANY_REQUESTS = 429
    REQUEST_HEADER_FIELDS_TOO_LARGE = 431
    UNAVAILABLE_FOR_LEGAL_REASONS = 451
    
    # 5xx Server Errors
    INTERNAL_SERVER_ERROR = 500
    NOT_IMPLEMENTED = 501
    BAD_GATEWAY = 502
    SERVICE_UNAVAILABLE = 503
    GATEWAY_TIMEOUT = 504
    HTTP_VERSION_NOT_SUPPORTED = 505
    INSUFFICIENT_STORAGE = 507
    NETWORK_AUTHENTICATION_REQUIRED = 511

# Status code descriptions for logging
STATUS_MESSAGES = {
    100: "Continue",
    101: "Switching Protocols",
    102: "Processing",
    103: "Early Hints",
    200: "OK - Success",
    201: "Created",
    202: "Accepted",
    203: "Non-Authoritative Information",
    204: "No Content",
    205: "Reset Content",
    206: "Partial Content",
    207: "Multi-Status",
    300: "Multiple Choices",
    301: "Moved Permanently",
    302: "Moved Temporarily",
    303: "See Other",
    304: "Not Modified",
    305: "Use Proxy",
    307: "Temporary Redirect",
    308: "Permanent Redirect",
    400: "Bad Request - Invalid request syntax",
    401: "Unauthorized - Invalid or expired token",
    402: "Payment Required",
    403: "Forbidden - Access denied",
    404: "Not Found - Resource doesn't exist",
    405: "Method Not Allowed",
    406: "Not Acceptable",
    407: "Proxy Authentication Required",
    408: "Request Timeout",
    409: "Conflict",
    410: "Gone - Resource permanently deleted",
    411: "Length Required",
    412: "Precondition Failed",
    413: "Request Too Long - Payload too large",
    414: "Request URI Too Long",
    415: "Unsupported Media Type",
    416: "Requested Range Not Satisfiable",
    417: "Expectation Failed",
    418: "I'm a Teapot",
    419: "Insufficient Space on Resource",
    420: "Method Failure",
    421: "Misdirected Request",
    422: "Unprocessable Entity",
    423: "Locked",
    424: "Failed Dependency",
    426: "Upgrade Required",
    428: "Precondition Required",
    429: "Too Many Requests - Rate limit exceeded",
    431: "Request Header Fields Too Large",
    451: "Unavailable For Legal Reasons",
    500: "Internal Server Error",
    501: "Not Implemented",
    502: "Bad Gateway",
    503: "Service Unavailable",
    504: "Gateway Timeout",
    505: "HTTP Version Not Supported",
    507: "Insufficient Storage",
    511: "Network Authentication Required"
}

def get_status_emoji(status_code: int) -> str:
    """Get emoji for status code"""
    if 200 <= status_code < 300:
        return "✅"
    elif 300 <= status_code < 400:
        return "↪️"
    elif 400 <= status_code < 500:
        if status_code == 401:
            return "🔒"
        elif status_code == 403:
            return "🚫"
        elif status_code == 404:
            return "❓"
        elif status_code == 429:
            return "⏱️"
        return "⚠️"
    elif 500 <= status_code < 600:
        return "❌"
    return "ℹ️"

def log_http_status(status_code: int, context: str = ""):
    """Log HTTP status with readable message"""
    emoji = get_status_emoji(status_code)
    message = STATUS_MESSAGES.get(status_code, f"Unknown Status {status_code}")
    if context:
        debug_print(f"{emoji} HTTP {status_code}: {message} ({context})")
    else:
        debug_print(f"{emoji} HTTP {status_code}: {message}")
# ============================================================

def debug_print(*args, **kwargs):
    """Print debug messages only if DEBUG is True"""
    if DEBUG:
        print(*args, **kwargs)

# --- New reCAPTCHA Functions ---

# Updated constants from gpt4free/g4f/Provider/needs_auth/LMArena.py
RECAPTCHA_SITEKEY = "6Led_uYrAAAAAKjxDIF58fgFtX3t8loNAK85bW9I"
RECAPTCHA_ACTION = "chat_submit"

async def initialize_nodriver_browser():
    """
    Opens a visible Chrome browser and navigates to LMArena.
    User must solve CAPTCHA manually. Browser stays open for session duration.
    """
    global NODRIVER_BROWSER, NODRIVER_TAB, BROWSER_READY
    
    if not HAS_NODRIVER:
        print("=" * 60)
        print("❌ ERROR: Chrome browser not found!")
        print("")
        print("   PROBLEM: nodriver requires Google Chrome to be installed.")
        print("")
        print("   SOLUTION:")
        print("   1. Download Chrome from: https://www.google.com/chrome/")
        print("   2. Install Chrome")
        print("   3. Restart LMArenaBridge")
        print("=" * 60)
        return False
    
    if BROWSER_READY and NODRIVER_TAB is not None:
        debug_print("   └── Browser already initialized, reusing session")
        return True
    
    print("")
    print("🌐 STEP 1/3: Launching Chrome browser...")
    print("   ├── Looking for Chrome installation...")
    
    # Create chrome profile directory path (for persistent login)
    import os
    chrome_profile_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "chrome_profile")
    
    try:
        # Start nodriver with visible browser and PERSISTENT profile
        NODRIVER_BROWSER = await nodriver.start(
            headless=HEADLESS,  # Toggleable via environment variable
            user_data_dir=chrome_profile_dir,  # 💾 Saves login across restarts!
            browser_args=[
                '--disable-blink-features=AutomationControlled',
                '--no-first-run',
                '--no-default-browser-check',
                '--no-sandbox',
                '--disable-dev-shm-usage',
            ]
        )
        print("   ├── ✅ Chrome launched successfully")
        print(f"   ├── 💾 Using persistent profile: {chrome_profile_dir}")
        print("   └── 🔄 Navigating to lmarena.ai...")
        
        # Navigate to LMArena
        NODRIVER_TAB = await NODRIVER_BROWSER.get("https://arena.ai/?mode=direct")
        
        # Capture User-Agent from the actual browser
        global USER_AGENT
        try:
            ua = await NODRIVER_TAB.evaluate("navigator.userAgent")
            if ua:
                USER_AGENT = ua
                debug_print(f"🕵️  Captured User-Agent: {USER_AGENT[:50]}...")
        except Exception as e:
            debug_print(f"⚠️  Failed to captures User-Agent: {e}")
            
        # Wait for page to settle
        await asyncio.sleep(3)
        
        print("")
        print("⏳ STEP 2/3: Waiting for CAPTCHA verification...")
        print("   ┌────────────────────────────────────────────────────────┐")
        print("   │  👆 ACTION REQUIRED: Please click the reCAPTCHA        │")
        print("   │     checkbox in the Chrome window that just opened!    │")
        print("   │                                                        │")
        print("   │  ⏱️  Timeout in 120 seconds...                         │")
        print("   └────────────────────────────────────────────────────────┘")
        
        # Wait for reCAPTCHA library to load and get first token
        captcha_solved = await wait_for_recaptcha_ready(timeout=120)
        
        if captcha_solved:
            print("")
            print("✅ STEP 2/3: CAPTCHA verified successfully!")
            BROWSER_READY = True
            return True
        else:
            print("")
            print("❌ ERROR: CAPTCHA verification timed out (120 seconds)")
            print("")
            print("   PROBLEM: You didn't click the reCAPTCHA checkbox in time.")
            print("")
            print("   SOLUTION:")
            print("   1. Restart the server: python src/main.py")
            print("   2. When Chrome opens, quickly click the \"I'm not a robot\" checkbox")
            print("   3. You have 2 minutes to complete this")
            return False
            
    except Exception as e:
        print(f"❌ ERROR: Failed to launch Chrome browser!")
        print(f"   └── Details: {e}")
        print("")
        print("   POSSIBLE CAUSES:")
        print("   1. Chrome not installed → Install from google.com/chrome")
        print("   2. Chrome in use by another process → Close other Chrome windows")
        print("   3. Permission issue → Run as administrator")
        return False


async def wait_for_recaptcha_ready(timeout: int = 120) -> bool:
    """
    Wait for user to complete CAPTCHA verification.
    Returns True when reCAPTCHA library is loaded and we can get tokens.
    """
    global NODRIVER_TAB, RECAPTCHA_TOKEN, RECAPTCHA_EXPIRY
    
    start_time = time.time()
    last_status_time = 0
    
    while time.time() - start_time < timeout:
        elapsed = int(time.time() - start_time)
        
        # Print status every 10 seconds
        if elapsed > 0 and elapsed % 10 == 0 and elapsed != last_status_time:
            last_status_time = elapsed
            remaining = timeout - elapsed
            print(f"⏳ Waiting for CAPTCHA... ({elapsed}s elapsed, {remaining}s remaining)")
        
        try:
            # Check if grecaptcha enterprise is available
            lib_ready = await NODRIVER_TAB.evaluate(
                "!!(window.grecaptcha && window.grecaptcha.enterprise)"
            )
            
            if lib_ready:
                # Try to get a token
                debug_print("   └── reCAPTCHA library detected, requesting token...")
                token = await get_recaptcha_token_from_browser()
                
                if token:
                    RECAPTCHA_TOKEN = token
                    RECAPTCHA_EXPIRY = datetime.now(timezone.utc) + timedelta(seconds=110)
                    print(f"   └── reCAPTCHA token acquired ({len(token)} chars)")
                    return True
                    
        except Exception as e:
            debug_print(f"   └── Check failed (normal during load): {e}")
        
        await asyncio.sleep(2)
    
    return False


async def get_recaptcha_token_from_browser() -> Optional[str]:
    """
    Gets a reCAPTCHA token from the persistent browser session.
    Uses a side-channel approach: sets global variable, triggers execute, polls for result.
    """
    global NODRIVER_TAB
    
    if NODRIVER_TAB is None:
        debug_print("❌ Browser tab not available")
        return None
    
    try:
        # Step 1: Initialize the global variable
        await NODRIVER_TAB.evaluate("window.__recaptcha_token = 'PENDING';")
        
        # Step 2: Trigger the reCAPTCHA execution (don't await the Promise)
        trigger_script = f"""
            (function() {{
                try {{
                    window.grecaptcha.enterprise.execute('{RECAPTCHA_SITEKEY}', {{ action: '{RECAPTCHA_ACTION}' }})
                    .then(function(token) {{
                        window.__recaptcha_token = token;
                    }})
                    .catch(function(err) {{
                        window.__recaptcha_token = 'ERROR: ' + err.toString();
                    }});
                }} catch (e) {{
                    window.__recaptcha_token = 'SYNC_ERROR: ' + e.toString();
                }}
            }})();
        """
        await NODRIVER_TAB.evaluate(trigger_script)
        
        # Step 3: Poll for the result
        for i in range(15):  # Max 15 seconds
            await asyncio.sleep(1)
            result = await NODRIVER_TAB.evaluate("window.__recaptcha_token")
            
            if result and result != 'PENDING':
                if isinstance(result, str) and result.startswith('ERROR'):
                    debug_print(f"   └── JS Error: {result}")
                    return None
                elif isinstance(result, str) and result.startswith('SYNC_ERROR'):
                    debug_print(f"   └── Sync Error: {result}")
                    return None
                elif isinstance(result, str) and len(result) > 100:
                    # Valid token!
                    return result
                else:
                    debug_print(f"   └── Unexpected result: {result}")
                    return None
        
        debug_print("   └── Token polling timed out")
        return None
            
    except Exception as e:
        debug_print(f"   └── Token request failed: {e}")
        return None


async def get_recaptcha_v3_token() -> Optional[str]:
    """
    Gets reCAPTCHA v3 token using the persistent nodriver browser session.
    If browser not initialized, returns None.
    """
    global RECAPTCHA_TOKEN, RECAPTCHA_EXPIRY, BROWSER_READY
    
    if not BROWSER_READY or NODRIVER_TAB is None:
        debug_print("❌ Browser not ready. Token refresh unavailable.")
        print("")
        print("❌ ERROR: Browser connection lost!")
        print("")
        print("   PROBLEM: The Chrome window was closed or crashed.")
        print("")
        print("   SOLUTION:")
        print("   1. Restart the server: python src/main.py")
        print("   2. When Chrome opens, click the CAPTCHA")
        print("   3. DO NOT close the Chrome window while using the bridge")
        return None
    
    current_time = datetime.now(timezone.utc).strftime("%H:%M:%S")
    debug_print(f"🔄 [{current_time}] Token refresh triggered")
    debug_print("   ├── Requesting new reCAPTCHA token...")
    
    token = await get_recaptcha_token_from_browser()
    
    if token:
        RECAPTCHA_TOKEN = token
        RECAPTCHA_EXPIRY = datetime.now(timezone.utc) + timedelta(seconds=110)
        next_refresh = (datetime.now(timezone.utc) + timedelta(seconds=100)).strftime("%H:%M:%S")
        debug_print(f"   ├── ✅ New token acquired ({len(token)} chars)")
        debug_print(f"   └── Next refresh at: {next_refresh}")
        return token
    else:
        debug_print("   └── ❌ Failed to get token")
        return None


async def refresh_recaptcha_token() -> Optional[str]:
    """
    Gets a FRESH reCAPTCHA token for each request.
    
    IMPORTANT: reCAPTCHA tokens are SINGLE-USE per Google docs.
    Once a token is verified by the server, it becomes immediately invalid.
    We MUST get a fresh token for every LMArena API request.
    """
    global RECAPTCHA_TOKEN, RECAPTCHA_EXPIRY
    
    current_time = datetime.now(timezone.utc)
    time_str = current_time.strftime("%H:%M:%S")
    
    debug_print(f"🔄 [{time_str}] Getting fresh reCAPTCHA token (tokens are single-use)...")
    
    # ALWAYS get a fresh token - tokens are single-use!
    for attempt in range(1, 4):
        new_token = await get_recaptcha_v3_token()
        
        if new_token:
            RECAPTCHA_TOKEN = new_token
            RECAPTCHA_EXPIRY = current_time + timedelta(seconds=110)
            debug_print(f"✅ [{time_str}] Fresh token acquired ({len(new_token)} chars)")
            return new_token
        
        if attempt < 3:
            wait_time = attempt * 2  # Shorter waits: 2s, 4s
            debug_print(f"⚠️ Token fetch failed (attempt {attempt}/3), retrying in {wait_time}s...")
            await asyncio.sleep(wait_time)
    
    # All attempts failed
    print("")
    print("❌ ERROR: Token refresh failed after 3 attempts!")
    print("")
    print("   PROBLEM: Cannot acquire new reCAPTCHA token.")
    print("")
    print("   SOLUTION:")
    print("   1. Check the Chrome window - you may need to solve CAPTCHA again")
    print("   2. If Chrome is unresponsive, restart the server")
    
    return None

# --- End New reCAPTCHA Functions ---

# Custom UUIDv7 implementation (using correct Unix epoch)
def uuid7():
    """
    Generate a UUIDv7 using Unix epoch (milliseconds since 1970-01-01)
    matching the browser's implementation.
    """
    timestamp_ms = int(time.time() * 1000)
    rand_a = secrets.randbits(12)
    rand_b = secrets.randbits(62)
    
    uuid_int = timestamp_ms << 80
    uuid_int |= (0x7000 | rand_a) << 64
    uuid_int |= (0x8000000000000000 | rand_b)
    
    hex_str = f"{uuid_int:032x}"
    return f"{hex_str[0:8]}-{hex_str[8:12]}-{hex_str[12:16]}-{hex_str[16:20]}-{hex_str[20:32]}"

# Image upload helper functions
async def upload_image_to_lmarena(image_data: bytes, mime_type: str, filename: str) -> Optional[tuple]:
    """
    Upload an image to LMArena R2 storage and return the key and download URL.
    
    Args:
        image_data: Binary image data
        mime_type: MIME type of the image (e.g., 'image/png')
        filename: Original filename for the image
    
    Returns:
        Tuple of (key, download_url) if successful, or None if upload fails
    """
    try:
        # Validate inputs
        if not image_data:
            debug_print("❌ Image data is empty")
            return None
        
        if not mime_type or not mime_type.startswith('image/'):
            debug_print(f"❌ Invalid MIME type: {mime_type}")
            return None
        
        # Step 1: Request upload URL
        debug_print(f"📤 Step 1: Requesting upload URL for {filename}")
        
        # Get Next-Action IDs from config
        config = get_config()
        upload_action_id = config.get("next_action_upload")
        signed_url_action_id = config.get("next_action_signed_url")
        
        if not upload_action_id or not signed_url_action_id:
            debug_print("❌ Next-Action IDs not found in config. Please refresh tokens from dashboard.")
            return None
        
        # Prepare headers for Next.js Server Action
        request_headers = get_request_headers()
        request_headers.update({
            "Accept": "text/x-component",
            "Content-Type": "text/plain;charset=UTF-8",
            "Next-Action": upload_action_id,
            "Referer": "https://arena.ai/?mode=direct",
        })
        
        async with httpx.AsyncClient() as client:
            try:
                response = await client.post(
                    "https://arena.ai/?mode=direct",
                    headers=request_headers,
                    content=json.dumps([filename, mime_type]),
                    timeout=30.0
                )
                response.raise_for_status()
            except httpx.TimeoutException:
                debug_print("❌ Timeout while requesting upload URL")
                return None
            except httpx.HTTPError as e:
                debug_print(f"❌ HTTP error while requesting upload URL: {e}")
                return None
            
            # Parse response - format: 0:{...}\n1:{...}\n
            try:
                lines = response.text.strip().split('\n')
                upload_data = None
                for line in lines:
                    if line.startswith('1:'):
                        upload_data = json.loads(line[2:])
                        break
                
                if not upload_data or not upload_data.get('success'):
                    debug_print(f"❌ Failed to get upload URL: {response.text[:200]}")
                    return None
                
                upload_url = upload_data['data']['uploadUrl']
                key = upload_data['data']['key']
                debug_print(f"✅ Got upload URL and key: {key}")
            except (json.JSONDecodeError, KeyError, IndexError) as e:
                debug_print(f"❌ Failed to parse upload URL response: {e}")
                return None
            
            # Step 2: Upload image to R2 storage
            debug_print(f"📤 Step 2: Uploading image to R2 storage ({len(image_data)} bytes)")
            try:
                response = await client.put(
                    upload_url,
                    content=image_data,
                    headers={"Content-Type": mime_type},
                    timeout=60.0
                )
                response.raise_for_status()
                debug_print(f"✅ Image uploaded successfully")
            except httpx.TimeoutException:
                debug_print("❌ Timeout while uploading image to R2 storage")
                return None
            except httpx.HTTPError as e:
                debug_print(f"❌ HTTP error while uploading image: {e}")
                return None
            
            # Step 3: Get signed download URL (uses different Next-Action)
            debug_print(f"📤 Step 3: Requesting signed download URL")
            request_headers_step3 = request_headers.copy()
            request_headers_step3["Next-Action"] = signed_url_action_id
            
            try:
                response = await client.post(
                    "https://arena.ai/?mode=direct",
                    headers=request_headers_step3,
                    content=json.dumps([key]),
                    timeout=30.0
                )
                response.raise_for_status()
            except httpx.TimeoutException:
                debug_print("❌ Timeout while requesting download URL")
                return None
            except httpx.HTTPError as e:
                debug_print(f"❌ HTTP error while requesting download URL: {e}")
                return None
            
            # Parse response
            try:
                lines = response.text.strip().split('\n')
                download_data = None
                for line in lines:
                    if line.startswith('1:'):
                        download_data = json.loads(line[2:])
                        break
                
                if not download_data or not download_data.get('success'):
                    debug_print(f"❌ Failed to get download URL: {response.text[:200]}")
                    return None
                
                download_url = download_data['data']['url']
                debug_print(f"✅ Got signed download URL: {download_url[:100]}...")
                return (key, download_url)
            except (json.JSONDecodeError, KeyError, IndexError) as e:
                debug_print(f"❌ Failed to parse download URL response: {e}")
                return None
            
    except Exception as e:
        debug_print(f"❌ Unexpected error uploading image: {type(e).__name__}: {e}")
        return None

async def process_message_content(content, model_capabilities: dict) -> tuple[str, List[dict]]:
    """
    Process message content, handle images if present and model supports them.
    
    Args:
        content: Message content (string or list of content parts)
        model_capabilities: Model's capability dictionary
    
    Returns:
        Tuple of (text_content, experimental_attachments)
    """
    # Check if model supports image input
    supports_images = model_capabilities.get('inputCapabilities', {}).get('image', False)
    
    # If content is a string, return it as-is
    if isinstance(content, str):
        return content, []
    
    # If content is a list (OpenAI format with multiple parts)
    if isinstance(content, list):
        text_parts = []
        attachments = []
        
        for part in content:
            if isinstance(part, dict):
                if part.get('type') == 'text':
                    text_parts.append(part.get('text', ''))
                    
                elif part.get('type') == 'image_url' and supports_images:
                    image_url = part.get('image_url', {})
                    if isinstance(image_url, dict):
                        url = image_url.get('url', '')
                    else:
                        url = image_url
                    
                    # Handle base64-encoded images
                    if url.startswith('data:'):
                        # Format: data:image/png;base64,iVBORw0KGgo...
                        try:
                            # Validate and parse data URI
                            if ',' not in url:
                                debug_print(f"❌ Invalid data URI format (no comma separator)")
                                continue
                            
                            header, data = url.split(',', 1)
                            
                            # Parse MIME type
                            if ';' not in header or ':' not in header:
                                debug_print(f"❌ Invalid data URI header format")
                                continue
                            
                            mime_type = header.split(';')[0].split(':')[1]
                            
                            # Validate MIME type
                            if not mime_type.startswith('image/'):
                                debug_print(f"❌ Invalid MIME type: {mime_type}")
                                continue
                            
                            # Decode base64
                            try:
                                image_data = base64.b64decode(data)
                            except Exception as e:
                                debug_print(f"❌ Failed to decode base64 data: {e}")
                                continue
                            
                            # Validate image size (max 10MB)
                            if len(image_data) > 10 * 1024 * 1024:
                                debug_print(f"❌ Image too large: {len(image_data)} bytes (max 10MB)")
                                continue
                            
                            # Generate filename
                            ext = mimetypes.guess_extension(mime_type) or '.png'
                            filename = f"upload-{uuid.uuid4()}{ext}"
                            
                            debug_print(f"🖼️  Processing base64 image: {filename}, size: {len(image_data)} bytes")
                            
                            # Upload to LMArena
                            upload_result = await upload_image_to_lmarena(image_data, mime_type, filename)
                            
                            if upload_result:
                                key, download_url = upload_result
                                # Add as attachment in LMArena format
                                attachments.append({
                                    "name": key,
                                    "contentType": mime_type,
                                    "url": download_url
                                })
                                debug_print(f"✅ Image uploaded and added to attachments")
                            else:
                                debug_print(f"⚠️  Failed to upload image, skipping")
                        except Exception as e:
                            debug_print(f"❌ Unexpected error processing base64 image: {type(e).__name__}: {e}")
                    
                    # Handle URL images (direct URLs)
                    elif url.startswith('http://') or url.startswith('https://'):
                        # For external URLs, we'd need to download and re-upload
                        # For now, skip this case
                        debug_print(f"⚠️  External image URLs not yet supported: {url[:100]}")
                        
                elif part.get('type') == 'image_url' and not supports_images:
                    debug_print(f"⚠️  Image provided but model doesn't support images")
        
        # Combine text parts
        text_content = '\n'.join(text_parts).strip()
        return text_content, attachments
    
    # Fallback
    return str(content), []

app = FastAPI()

# --- Constants & Global State ---
CONFIG_FILE = "config.json"
MODELS_FILE = "models.json"
API_KEY_HEADER = APIKeyHeader(name="Authorization", auto_error=False)

# In-memory stores
# { "api_key": { "conversation_id": session_data } }
chat_sessions: Dict[str, Dict[str, dict]] = defaultdict(dict)
# { "session_id": "username" }
dashboard_sessions = {}
# { "api_key": [timestamp1, timestamp2, ...] }
api_key_usage = defaultdict(list)
# { "model_id": count }
model_usage_stats = defaultdict(int)
# Token cycling: current index for round-robin selection
current_token_index = 0
# Track which token is assigned to each conversation (conversation_id -> token)
conversation_tokens: Dict[str, str] = {}
# Track failed tokens per request to avoid retrying with same token
request_failed_tokens: Dict[str, set] = {}

# --- New Global State for reCAPTCHA ---
RECAPTCHA_TOKEN: Optional[str] = None
# Initialize expiry far in the past to force a refresh on startup
RECAPTCHA_EXPIRY: datetime = datetime.now(timezone.utc) - timedelta(days=365)

# --- nodriver Browser Instance (persistent session) ---
# These stay alive for the entire server session
NODRIVER_BROWSER = None  # nodriver.Browser instance
NODRIVER_TAB = None      # nodriver.Tab instance (the page)
BROWSER_READY = False    # Flag to indicate browser is ready for token refresh
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" # Default fallback
LMARENA_REQUEST_LOCK = asyncio.Lock()  # Lock to serialize LMArena requests (prevents rate limiting)
LAST_LMARENA_REQUEST_TIME = 0.0  # Timestamp of last LMArena request (for rate limiting)

# --- Webshare Proxy Pool Configuration ---
# Enable/disable proxy rotation (set to True when proxies are configured)
PROXY_ROTATION_ENABLED = False  # Will be auto-enabled when proxies are added

# When True, each request creates a NEW session instead of reusing existing ones
# This bypasses LMArena's per-session rate limiting (they track by session ID, not just IP)
# Recommended: Enable this when using proxy rotation for unlimited parallel requests
FORCE_NEW_SESSION = True  # Always create fresh session (bypasses per-session rate limits)

# Proxy pool - Add your Webshare proxy credentials here
# Format: {"host": "IP", "port": PORT, "username": "user", "password": "pass"}
WEBSHARE_PROXY_POOL = [
    # Account 1 - 10 Proxies (wlnpiril)
    {"host": "142.111.48.253", "port": 7030, "username": "wlnpiril", "password": "rz8y4an5o6n1"},  # US - Los Angeles
    {"host": "23.95.150.145", "port": 6114, "username": "wlnpiril", "password": "rz8y4an5o6n1"},   # US - Buffalo
    {"host": "198.23.239.134", "port": 6540, "username": "wlnpiril", "password": "rz8y4an5o6n1"},  # US - Buffalo
    {"host": "107.172.163.27", "port": 6543, "username": "wlnpiril", "password": "rz8y4an5o6n1"},  # US - Bloomingdale
    {"host": "198.105.121.200", "port": 6462, "username": "wlnpiril", "password": "rz8y4an5o6n1"}, # UK - London
    {"host": "64.137.96.74", "port": 6641, "username": "wlnpiril", "password": "rz8y4an5o6n1"},    # Spain - Madrid
    {"host": "84.247.60.125", "port": 6095, "username": "wlnpiril", "password": "rz8y4an5o6n1"},   # Poland - Warsaw
    {"host": "216.10.27.159", "port": 6837, "username": "wlnpiril", "password": "rz8y4an5o6n1"},   # US - Dallas
    {"host": "23.26.71.145", "port": 5628, "username": "wlnpiril", "password": "rz8y4an5o6n1"},    # US - Orem
    {"host": "23.27.208.120", "port": 5830, "username": "wlnpiril", "password": "rz8y4an5o6n1"},   # US - Reston
    # Account 2 - 10 Proxies (wfpfhvqd)
    {"host": "142.111.48.253", "port": 7030, "username": "wfpfhvqd", "password": "akmgj7n23qgw"},  # US - Los Angeles
    {"host": "23.95.150.145", "port": 6114, "username": "wfpfhvqd", "password": "akmgj7n23qgw"},   # US - Buffalo
    {"host": "198.23.239.134", "port": 6540, "username": "wfpfhvqd", "password": "akmgj7n23qgw"},  # US - Buffalo
    {"host": "107.172.163.27", "port": 6543, "username": "wfpfhvqd", "password": "akmgj7n23qgw"},  # US - Bloomingdale
    {"host": "198.105.121.200", "port": 6462, "username": "wfpfhvqd", "password": "akmgj7n23qgw"}, # UK - London
    {"host": "64.137.96.74", "port": 6641, "username": "wfpfhvqd", "password": "akmgj7n23qgw"},    # Spain - Madrid
    {"host": "84.247.60.125", "port": 6095, "username": "wfpfhvqd", "password": "akmgj7n23qgw"},   # Poland - Warsaw
    {"host": "216.10.27.159", "port": 6837, "username": "wfpfhvqd", "password": "akmgj7n23qgw"},   # US - Dallas
    {"host": "23.26.71.145", "port": 5628, "username": "wfpfhvqd", "password": "akmgj7n23qgw"},    # US - Orem
    {"host": "23.27.208.120", "port": 5830, "username": "wfpfhvqd", "password": "akmgj7n23qgw"},   # US - Reston
    # Account 3 - 10 Proxies (qbwdhdrw)
    {"host": "142.111.48.253", "port": 7030, "username": "qbwdhdrw", "password": "9f9w1szgq7tu"},  # US - Los Angeles
    {"host": "23.95.150.145", "port": 6114, "username": "qbwdhdrw", "password": "9f9w1szgq7tu"},   # US - Buffalo
    {"host": "198.23.239.134", "port": 6540, "username": "qbwdhdrw", "password": "9f9w1szgq7tu"},  # US - Buffalo
    {"host": "107.172.163.27", "port": 6543, "username": "qbwdhdrw", "password": "9f9w1szgq7tu"},  # US - Bloomingdale
    {"host": "198.105.121.200", "port": 6462, "username": "qbwdhdrw", "password": "9f9w1szgq7tu"}, # UK - London
    {"host": "64.137.96.74", "port": 6641, "username": "qbwdhdrw", "password": "9f9w1szgq7tu"},    # Spain - Madrid
    {"host": "84.247.60.125", "port": 6095, "username": "qbwdhdrw", "password": "9f9w1szgq7tu"},   # Poland - Warsaw
    {"host": "216.10.27.159", "port": 6837, "username": "qbwdhdrw", "password": "9f9w1szgq7tu"},   # US - Dallas
    {"host": "23.26.71.145", "port": 5628, "username": "qbwdhdrw", "password": "9f9w1szgq7tu"},    # US - Orem
    {"host": "23.27.208.120", "port": 5830, "username": "qbwdhdrw", "password": "9f9w1szgq7tu"},   # US - Reston
    # Account 4 - 10 Proxies (vwqxqyew)
    {"host": "142.111.48.253", "port": 7030, "username": "vwqxqyew", "password": "4l6qlayr252q"},  # US - Los Angeles
    {"host": "23.95.150.145", "port": 6114, "username": "vwqxqyew", "password": "4l6qlayr252q"},   # US - Buffalo
    {"host": "198.23.239.134", "port": 6540, "username": "vwqxqyew", "password": "4l6qlayr252q"},  # US - Buffalo
    {"host": "107.172.163.27", "port": 6543, "username": "vwqxqyew", "password": "4l6qlayr252q"},  # US - Bloomingdale
    {"host": "198.105.121.200", "port": 6462, "username": "vwqxqyew", "password": "4l6qlayr252q"}, # UK - London
    {"host": "64.137.96.74", "port": 6641, "username": "vwqxqyew", "password": "4l6qlayr252q"},    # Spain - Madrid
    {"host": "84.247.60.125", "port": 6095, "username": "vwqxqyew", "password": "4l6qlayr252q"},   # Poland - Warsaw
    {"host": "216.10.27.159", "port": 6837, "username": "vwqxqyew", "password": "4l6qlayr252q"},   # US - Dallas
    {"host": "23.26.71.145", "port": 5628, "username": "vwqxqyew", "password": "4l6qlayr252q"},    # US - Orem
    {"host": "23.27.208.120", "port": 5830, "username": "vwqxqyew", "password": "4l6qlayr252q"},   # US - Reston
    # Account 5 - 10 Proxies (ynwjxcuz)
    {"host": "142.111.48.253", "port": 7030, "username": "ynwjxcuz", "password": "l90dlksfzyia"},  # US - Los Angeles
    {"host": "23.95.150.145", "port": 6114, "username": "ynwjxcuz", "password": "l90dlksfzyia"},   # US - Buffalo
    {"host": "198.23.239.134", "port": 6540, "username": "ynwjxcuz", "password": "l90dlksfzyia"},  # US - Buffalo
    {"host": "107.172.163.27", "port": 6543, "username": "ynwjxcuz", "password": "l90dlksfzyia"},  # US - Bloomingdale
    {"host": "198.105.121.200", "port": 6462, "username": "ynwjxcuz", "password": "l90dlksfzyia"}, # UK - London
    {"host": "64.137.96.74", "port": 6641, "username": "ynwjxcuz", "password": "l90dlksfzyia"},    # Spain - Madrid
    {"host": "84.247.60.125", "port": 6095, "username": "ynwjxcuz", "password": "l90dlksfzyia"},   # Poland - Warsaw
    {"host": "216.10.27.159", "port": 6837, "username": "ynwjxcuz", "password": "l90dlksfzyia"},   # US - Dallas
    {"host": "23.26.71.145", "port": 5628, "username": "ynwjxcuz", "password": "l90dlksfzyia"},    # US - Orem
    {"host": "23.27.208.120", "port": 5830, "username": "ynwjxcuz", "password": "l90dlksfzyia"},   # US - Reston
]

# Track which proxy to use next (round-robin)
CURRENT_PROXY_INDEX = 0
PROXY_USAGE_STATS = defaultdict(lambda: {"requests": 0, "errors": 0})

def get_next_proxy():
    """Get the next proxy from the pool in round-robin fashion."""
    global CURRENT_PROXY_INDEX
    
    if not WEBSHARE_PROXY_POOL:
        return None
    
    proxy = WEBSHARE_PROXY_POOL[CURRENT_PROXY_INDEX]
    CURRENT_PROXY_INDEX = (CURRENT_PROXY_INDEX + 1) % len(WEBSHARE_PROXY_POOL)
    
    # Track usage
    proxy_id = f"{proxy['host']}:{proxy['port']}"
    PROXY_USAGE_STATS[proxy_id]["requests"] += 1
    
    return proxy

def format_proxy_url(proxy: dict) -> str:
    """Format proxy dict into URL string for httpx/requests."""
    if not proxy:
        return None
    return f"http://{proxy['username']}:{proxy['password']}@{proxy['host']}:{proxy['port']}"

def get_proxy_for_browser() -> dict:
    """Get proxy configuration formatted for browser use."""
    proxy = get_next_proxy()
    if not proxy:
        return None
    return {
        "server": f"http://{proxy['host']}:{proxy['port']}",
        "username": proxy['username'],
        "password": proxy['password']
    }

# Auto-enable proxy rotation if proxies are configured
if WEBSHARE_PROXY_POOL:
    PROXY_ROTATION_ENABLED = True
    print(f"🔄 Proxy rotation ENABLED with {len(WEBSHARE_PROXY_POOL)} proxies")
else:
    print("⚠️  No proxies configured. Using direct connection (rate limits may apply)")
# --------------------------------------

# --- Helper Functions ---

def get_config():
    try:
        with open(CONFIG_FILE, "r") as f:
            config = json.load(f)
    except (FileNotFoundError, json.JSONDecodeError) as e:
        debug_print(f"⚠️  Config file error: {e}, using defaults")
        config = {}
    except Exception as e:
        debug_print(f"⚠️  Unexpected error reading config: {e}, using defaults")
        config = {}

    # Ensure default keys exist
    try:
        config.setdefault("password", "admin")
        config.setdefault("auth_token", "")
        config.setdefault("auth_tokens", [])  # Multiple auth tokens
        config.setdefault("cf_clearance", "")
        config.setdefault("api_keys", [])
        config.setdefault("usage_stats", {})
    except Exception as e:
        debug_print(f"⚠️  Error setting config defaults: {e}")
    
    return config

def load_usage_stats():
    """Load usage stats from config into memory"""
    global model_usage_stats
    try:
        config = get_config()
        model_usage_stats = defaultdict(int, config.get("usage_stats", {}))
    except Exception as e:
        debug_print(f"⚠️  Error loading usage stats: {e}, using empty stats")
        model_usage_stats = defaultdict(int)

def save_config(config):
    try:
        # Persist in-memory stats to the config dict before saving
        config["usage_stats"] = dict(model_usage_stats)
        with open(CONFIG_FILE, "w") as f:
            json.dump(config, f, indent=4)
    except Exception as e:
        debug_print(f"❌ Error saving config: {e}")

def get_models():
    try:
        with open(MODELS_FILE, "r") as f:
            return json.load(f)
    except (FileNotFoundError, json.JSONDecodeError):
        return []

def save_models(models):
    try:
        with open(MODELS_FILE, "w") as f:
            json.dump(models, f, indent=2)
    except Exception as e:
        debug_print(f"❌ Error saving models: {e}")


def get_request_headers():
    """Get request headers with the first available auth token (for compatibility)"""
    config = get_config()
    
    # Try to get token from auth_tokens first, then fallback to single token
    auth_tokens = config.get("auth_tokens", [])
    if auth_tokens:
        token = auth_tokens[0]  # Just use first token for non-API requests
    else:
        token = config.get("auth_token", "").strip()
        if not token:
            raise HTTPException(status_code=500, detail="Arena auth token not set in dashboard.")
    
    return get_request_headers_with_token(token)

def get_request_headers_with_token(token: str):
    """Get request headers with a specific auth token"""
    config = get_config()
    cf_clearance = config.get("cf_clearance", "").strip()
    
    # Check if the token is a full cookie string (contains multiple cookies or the key name)
    if "arena-auth-prod-v1=" in token:
        # User pasted the full cookie string! Use it directly.
        # Ensure cf_clearance is updated if present in the string? 
        # Actually, best to just use what user gave, but we might want to ensure cf_clearance is there.
        # If user gave full string, it likely has everything.
        cookie_header = token
    else:
        # Standard behavior: wrap the value
        cookie_header = f"cf_clearance={cf_clearance}; arena-auth-prod-v1={token}"

    return {
        "Content-Type": "text/plain;charset=UTF-8",
        "Cookie": cookie_header,
        "User-Agent": USER_AGENT,
    }

def get_next_auth_token(exclude_tokens: set = None):
    """Get next auth token using round-robin selection
    
    Args:
        exclude_tokens: Set of tokens to exclude from selection (e.g., already tried tokens)
    """
    global current_token_index
    config = get_config()
    
    # Get all available tokens
    auth_tokens = config.get("auth_tokens", [])
    if not auth_tokens:
        raise HTTPException(status_code=500, detail="No auth tokens configured")
    
    # Filter out excluded tokens
    if exclude_tokens:
        available_tokens = [t for t in auth_tokens if t not in exclude_tokens]
        if not available_tokens:
            raise HTTPException(status_code=500, detail="No more auth tokens available to try")
    else:
        available_tokens = auth_tokens
    
    # Round-robin selection from available tokens
    token = available_tokens[current_token_index % len(available_tokens)]
    current_token_index = (current_token_index + 1) % len(auth_tokens)
    return token

def remove_auth_token(token: str):
    """Remove an expired/invalid auth token from the list"""
    try:
        config = get_config()
        auth_tokens = config.get("auth_tokens", [])
        if token in auth_tokens:
            auth_tokens.remove(token)
            config["auth_tokens"] = auth_tokens
            save_config(config)
            debug_print(f"🗑️  Removed expired token from list: {token[:20]}...")
    except Exception as e:
        debug_print(f"⚠️  Error removing auth token: {e}")


async def make_lmarena_request_browser(url: str, payload: dict, method: str = "POST") -> dict:
    """Make LMArena API request through the real Chrome browser (bypasses all bot detection)
    
    This function uses the nodriver browser to execute JavaScript fetch() calls,
    ensuring the request comes from a real browser with proper cookies, TLS fingerprint,
    and session context.
    
    Args:
        url: Full URL to the LMArena API endpoint
        payload: JSON payload to send
        method: HTTP method (POST or PUT)
    
    Returns:
        dict with 'status_code' and 'text' (response body)
    """
    global NODRIVER_TAB, BROWSER_READY, LAST_LMARENA_REQUEST_TIME
    
    if not BROWSER_READY or NODRIVER_TAB is None:
        raise HTTPException(status_code=503, detail="Browser not ready for API calls")
    
    # Only use lock and rate limiting if proxy rotation is DISABLED
    # With rotating proxies, each request uses different IP = no rate limit concerns
    if not PROXY_ROTATION_ENABLED:
        # Acquire lock to serialize requests (parallel requests will queue up here)
        debug_print(f"🔒 Waiting to acquire request lock...")
        await LMARENA_REQUEST_LOCK.acquire()
        debug_print(f"🔓 Lock acquired!")
    else:
        proxy = get_next_proxy()
        proxy_id = f"{proxy['host']}:{proxy['port']}" if proxy else "direct"
        debug_print(f"🔄 Using rotating proxy: {proxy_id} (no lock needed)")
    
    try:
        # Rate limiting: only if proxy rotation is disabled
        if not PROXY_ROTATION_ENABLED:
            MIN_REQUEST_INTERVAL = 2.5
            current_time = time.time()
            if LAST_LMARENA_REQUEST_TIME > 0:
                elapsed = current_time - LAST_LMARENA_REQUEST_TIME
                if elapsed < MIN_REQUEST_INTERVAL:
                    wait_time = MIN_REQUEST_INTERVAL - elapsed
                    debug_print(f"⏳ Rate limiting: waiting {wait_time:.1f}s before next request")
                    await asyncio.sleep(wait_time)
            LAST_LMARENA_REQUEST_TIME = time.time()
    
        # CRITICAL: Refresh reCAPTCHA token AFTER acquiring lock
        # Token may have expired while waiting in queue
        debug_print(f"🔄 Refreshing reCAPTCHA token after lock...")
        fresh_token = await refresh_recaptcha_token()
        if fresh_token and 'recaptchaV3Token' in payload:
            payload['recaptchaV3Token'] = fresh_token
            debug_print(f"✅ Fresh token applied ({len(fresh_token)} chars)")
    
        debug_print(f"🌐 Making browser-based request to: {url}")
        debug_print(f"🔐 Using REAL Chrome browser (bypasses bot detection)")
    
        # Escape the payload for JavaScript
        payload_json = json.dumps(payload).replace('\\', '\\\\').replace("'", "\\'").replace('\n', '\\n')
    
        # Generate unique request ID to avoid collisions
        request_id = f"lmab_{int(time.time() * 1000)}"
    
        # JavaScript code that stores result in window global (since evaluate() can't return async results)
        js_code = f"""
        (function() {{
            window.{request_id} = null;  // Reset
            fetch('{url}', {{
                method: '{method}',
                headers: {{
                    'Content-Type': 'application/json'
                }},
                body: '{payload_json}',
                credentials: 'include'
            }})
            .then(async (response) => {{
                const text = await response.text();
                window.{request_id} = {{
                    status_code: response.status,
                    text: text,
                    ok: response.ok,
                    done: true
                }};
            }})
            .catch((error) => {{
                window.{request_id} = {{
                    status_code: 0,
                    text: 'Fetch error: ' + error.message,
                    ok: false,
                    done: true
                }};
            }});
            return 'request_started';
        }})();
        """
    
        # Start the fetch request
        start_result = await NODRIVER_TAB.evaluate(js_code)
        debug_print(f"🚀 Browser fetch started: {start_result}")
        
        # Poll for result (timeout after 120 seconds)
        max_wait = 120
        poll_interval = 0.5
        waited = 0
        
        while waited < max_wait:
            await asyncio.sleep(poll_interval)
            waited += poll_interval
            
            # Check if result is ready
            result = await NODRIVER_TAB.evaluate(f"window.{request_id}")
            
            # Debug: log result type
            if result is not None:
                debug_print(f"🔍 Result type: {type(result).__name__}, value: {str(result)[:100]}")
            
            # Handle different return types from nodriver
            if result is not None:
                # nodriver returns JS objects as list of [key, {type, value}] pairs
                # e.g. [['status_code', {'type': 'number', 'value': 200}], ['text', {...}], ...]
                if isinstance(result, list) and len(result) > 0:
                    # Check if it's the nodriver format: list of 2-element lists
                    if isinstance(result[0], list) and len(result[0]) == 2:
                        # Convert nodriver format to dict
                        converted = {}
                        for item in result:
                            if isinstance(item, list) and len(item) == 2:
                                key = item[0]
                                value_wrapper = item[1]
                                if isinstance(value_wrapper, dict) and 'value' in value_wrapper:
                                    converted[key] = value_wrapper['value']
                                else:
                                    converted[key] = value_wrapper
                        result = converted
                        debug_print(f"✅ Converted nodriver format to dict: {list(result.keys())}")
                        debug_print(f"   done={result.get('done')}, status={result.get('status_code')}")
                    # If first element is a dict, take it (old handling)
                    elif isinstance(result[0], dict):
                        result = result[0]
                
                # Now check if it's a dict with 'done' key
                if isinstance(result, dict) and result.get("done"):
                    debug_print(f"🌐 Browser response status: {result.get('status_code', 'unknown')}")
                    
                    # Log first 200 chars of response for debugging
                    response_preview = str(result.get('text', ''))[:200]
                    debug_print(f"📄 Response preview: {response_preview}...")
                    
                    # Clean up window variable
                    await NODRIVER_TAB.evaluate(f"delete window.{request_id}")
                    
                    return {
                        "status_code": result.get("status_code", 500),
                        "text": result.get("text", ""),
                        "ok": result.get("ok", False)
                    }
            
            if waited % 5 == 0:
                debug_print(f"⏳ Waiting for browser response... ({int(waited)}s)")
        
        # Timeout
        debug_print(f"❌ Browser fetch timed out after {max_wait}s")
        await NODRIVER_TAB.evaluate(f"delete window.{request_id}")
        return {"status_code": 504, "text": "Browser request timed out"}
        
    except Exception as e:
        debug_print(f"❌ Browser request failed: {type(e).__name__}: {e}")
        return {"status_code": 500, "text": f"Browser error: {str(e)}"}
    finally:
        # Only release lock if we acquired it (proxy rotation disabled)
        if not PROXY_ROTATION_ENABLED:
            LMARENA_REQUEST_LOCK.release()
            debug_print(f"🔓 Request lock released")


async def make_lmarena_streaming_request_browser(url: str, payload: dict, method: str = "POST"):
    """Stream LMArena API response through the real Chrome browser (bypasses reCAPTCHA).
    
    This is an async generator that yields response chunks as they arrive.
    Uses browser's ReadableStream API to capture streaming data.
    
    Args:
        url: Full URL to the LMArena API endpoint
        payload: JSON payload to send
        method: HTTP method (POST or PUT)
    
    Yields:
        str: Each chunk of the streaming response as it arrives
    """
    global NODRIVER_TAB, BROWSER_READY, LAST_LMARENA_REQUEST_TIME
    
    if not BROWSER_READY or NODRIVER_TAB is None:
        raise HTTPException(status_code=503, detail="Browser not ready for API calls")
    
    # Only use lock and rate limiting if proxy rotation is DISABLED
    if not PROXY_ROTATION_ENABLED:
        # Acquire lock to serialize requests (parallel requests will queue up here)
        debug_print(f"🔒 [STREAM] Waiting to acquire request lock...")
        await LMARENA_REQUEST_LOCK.acquire()
        debug_print(f"🔓 [STREAM] Lock acquired!")
    else:
        proxy = get_next_proxy()
        proxy_id = f"{proxy['host']}:{proxy['port']}" if proxy else "direct"
        debug_print(f"🔄 [STREAM] Using rotating proxy: {proxy_id} (no lock needed)")
    
    # Rate limiting: only if proxy rotation is disabled
    if not PROXY_ROTATION_ENABLED:
        MIN_REQUEST_INTERVAL = 2.5
        current_time = time.time()
        if LAST_LMARENA_REQUEST_TIME > 0:
            elapsed = current_time - LAST_LMARENA_REQUEST_TIME
            if elapsed < MIN_REQUEST_INTERVAL:
                wait_time = MIN_REQUEST_INTERVAL - elapsed
                debug_print(f"⏳ Rate limiting: waiting {wait_time:.1f}s before next streaming request")
                await asyncio.sleep(wait_time)
        LAST_LMARENA_REQUEST_TIME = time.time()
    
    # CRITICAL: Refresh reCAPTCHA token AFTER acquiring lock
    # Token may have expired while waiting in queue
    debug_print(f"🔄 [STREAM] Refreshing reCAPTCHA token after lock...")
    fresh_token = await refresh_recaptcha_token()
    if fresh_token and 'recaptchaV3Token' in payload:
        payload['recaptchaV3Token'] = fresh_token
        debug_print(f"✅ [STREAM] Fresh token applied ({len(fresh_token)} chars)")
    
    debug_print(f"🌐 Making STREAMING browser request to: {url}")
    debug_print(f"🔐 Using REAL Chrome browser for streaming (bypasses reCAPTCHA)")
    
    # Escape the payload for JavaScript
    payload_json = json.dumps(payload).replace('\\', '\\\\').replace("'", "\\'").replace('\n', '\\n')
    
    # Generate unique request ID
    request_id = f"lmab_stream_{int(time.time() * 1000)}"
    
    # JavaScript that uses ReadableStream to collect chunks
    # Stores chunks in an array that Python can poll
    js_code = f"""
    (function() {{
        window.{request_id} = {{
            chunks: [],
            done: false,
            error: null,
            status_code: 0
        }};
        
        fetch('{url}', {{
            method: '{method}',
            headers: {{
                'Content-Type': 'application/json'
            }},
            body: '{payload_json}',
            credentials: 'include'
        }})
        .then(async (response) => {{
            window.{request_id}.status_code = response.status;
            
            if (!response.ok) {{
                const text = await response.text();
                window.{request_id}.error = text;
                window.{request_id}.done = true;
                return;
            }}
            
            const reader = response.body.getReader();
            const decoder = new TextDecoder();
            
            while (true) {{
                const {{done, value}} = await reader.read();
                if (done) {{
                    window.{request_id}.done = true;
                    break;
                }}
                const chunk = decoder.decode(value, {{stream: true}});
                window.{request_id}.chunks.push(chunk);
            }}
        }})
        .catch((error) => {{
            window.{request_id}.error = 'Fetch error: ' + error.message;
            window.{request_id}.done = true;
        }});
        return 'streaming_started';
    }})();
    """
    
    try:
        # Start the streaming fetch
        start_result = await NODRIVER_TAB.evaluate(js_code)
        debug_print(f"🚀 Browser streaming started: {start_result}")
        
        # Poll for chunks and yield them as they arrive
        max_wait = 120
        poll_interval = 0.1  # Poll faster for streaming
        waited = 0
        last_chunk_index = 0
        
        while waited < max_wait:
            await asyncio.sleep(poll_interval)
            waited += poll_interval
            
            # Get current state
            state_js = f"""
            (function() {{
                const s = window.{request_id};
                if (!s) return null;
                return {{
                    chunk_count: s.chunks.length,
                    done: s.done,
                    error: s.error,
                    status_code: s.status_code
                }};
            }})();
            """
            state = await NODRIVER_TAB.evaluate(state_js)
            
            if state is None:
                continue
            
            # Handle nodriver list format conversion
            if isinstance(state, list):
                converted = {}
                for item in state:
                    if isinstance(item, list) and len(item) == 2:
                        key = item[0]
                        value_wrapper = item[1]
                        if isinstance(value_wrapper, dict) and 'value' in value_wrapper:
                            converted[key] = value_wrapper['value']
                        else:
                            converted[key] = value_wrapper
                state = converted
            
            if not isinstance(state, dict):
                continue
            
            chunk_count = state.get('chunk_count', 0)
            done = state.get('done', False)
            error = state.get('error')
            status_code = state.get('status_code', 0)
            
            # Check for error (nodriver returns {'type': 'null'} for JS null, which is truthy)
            # Only treat as error if it's an actual error string
            is_real_error = error and isinstance(error, str) and error != ""
            if is_real_error:
                debug_print(f"❌ Stream error: {error}")
                await NODRIVER_TAB.evaluate(f"delete window.{request_id}")
                raise HTTPException(status_code=status_code or 500, detail=f"Browser stream error: {error}")
            
            # Get new chunks if available
            if chunk_count > last_chunk_index:
                # Get all new chunks
                get_chunks_js = f"""
                (function() {{
                    const s = window.{request_id};
                    if (!s) return [];
                    return s.chunks.slice({last_chunk_index});
                }})();
                """
                new_chunks = await NODRIVER_TAB.evaluate(get_chunks_js)
                
                # Handle nodriver format for chunk array
                if isinstance(new_chunks, list):
                    for chunk_item in new_chunks:
                        # Extract chunk text
                        if isinstance(chunk_item, dict) and 'value' in chunk_item:
                            chunk_text = chunk_item['value']
                        elif isinstance(chunk_item, str):
                            chunk_text = chunk_item
                        else:
                            chunk_text = str(chunk_item) if chunk_item else ""
                        
                        if chunk_text:
                            yield chunk_text
                
                last_chunk_index = chunk_count
            
            # Check if done
            if done:
                debug_print(f"✅ Browser streaming completed. Status: {status_code}, Total chunks: {chunk_count}")
                break
            
            # Periodic status log
            if waited % 10 == 0 and waited > 0:
                debug_print(f"⏳ Streaming... ({int(waited)}s, {chunk_count} chunks)")
        
        # Clean up
        await NODRIVER_TAB.evaluate(f"delete window.{request_id}")
        
        if waited >= max_wait:
            debug_print(f"❌ Browser streaming timed out after {max_wait}s")
            raise HTTPException(status_code=504, detail="Browser streaming timed out")
            
    except HTTPException:
        raise
    except Exception as e:
        debug_print(f"❌ Browser streaming failed: {type(e).__name__}: {e}")
        try:
            await NODRIVER_TAB.evaluate(f"delete window.{request_id}")
        except:
            pass
        raise HTTPException(status_code=500, detail=f"Browser streaming error: {str(e)}")
    finally:
        # Only release lock if we acquired it (proxy rotation disabled)
        if not PROXY_ROTATION_ENABLED:
            LMARENA_REQUEST_LOCK.release()
            debug_print(f"🔓 [STREAM] Request lock released")


# --- Dashboard Authentication ---

async def get_current_session(request: Request):
    session_id = request.cookies.get("session_id")
    if session_id and session_id in dashboard_sessions:
        return dashboard_sessions[session_id]
    return None

# --- API Key Authentication & Rate Limiting ---

async def rate_limit_api_key(
    auth_header: Optional[str] = Depends(API_KEY_HEADER),
    x_api_key: Optional[str] = Header(None, alias="x-api-key")
):
    api_key_str = None
    
    # Check Authorization header (Bearer token)
    debug_print(f"🔑 Auth Debug: AuthHeader set? {auth_header is not None}, X-API-Key set? {x_api_key is not None}")
    
    if auth_header and auth_header.startswith("Bearer "):
        api_key_str = auth_header[7:].strip()
    
    # Check x-api-key header (Anthropic standard)
    if not api_key_str and x_api_key:
        api_key_str = x_api_key.strip()
        
    if not api_key_str:
        raise HTTPException(
            status_code=401, 
            detail="Missing or invalid authentication. Expected 'Authorization: Bearer KEY' or 'x-api-key: KEY'"
        )
    config = get_config()
    
    key_data = next((k for k in config["api_keys"] if k["key"] == api_key_str), None)
    if not key_data:
        raise HTTPException(status_code=401, detail="Invalid API Key.")

    # Rate Limiting
    rate_limit = key_data.get("rpm", 60)
    current_time = time.time()
    
    # Clean up old timestamps (older than 60 seconds)
    api_key_usage[api_key_str] = [t for t in api_key_usage[api_key_str] if current_time - t < 60]

    if len(api_key_usage[api_key_str]) >= rate_limit:
        # Calculate seconds until oldest request expires (60 seconds window)
        oldest_timestamp = min(api_key_usage[api_key_str])
        retry_after = int(60 - (current_time - oldest_timestamp))
        retry_after = max(1, retry_after)  # At least 1 second
        
        raise HTTPException(
            status_code=429,
            detail="Rate limit exceeded. Please try again later.",
            headers={"Retry-After": str(retry_after)}
        )
        
    api_key_usage[api_key_str].append(current_time)
    
    return key_data

# --- Core Logic ---

async def get_initial_data():
    """
    Extracts initial data from the nodriver browser session.
    Must be called AFTER initialize_nodriver_browser().
    Extracts: cf_clearance cookie, models list.
    """
    global NODRIVER_TAB
    
    print("")
    print("📦 STEP 3/3: Loading LMArena data...")
    
    if NODRIVER_TAB is None:
        print("   └── ❌ Browser not available, skipping data extraction")
        return
    
    try:
        config = get_config()
        
        # Extract cf_clearance from cookies
        print("   ├── Extracting Cloudflare clearance...")
        try:
            cookies = await NODRIVER_TAB.browser.cookies.get_all()
            cf_clearance_cookie = None
            for cookie in cookies:
                if cookie.name == "cf_clearance":
                    cf_clearance_cookie = cookie
                    break
            
            if cf_clearance_cookie:
                config["cf_clearance"] = cf_clearance_cookie.value
                save_config(config)
                print(f"   ├── ✅ cf_clearance saved")
            else:
                print("   ├── ⚠️ No cf_clearance cookie found (might not be needed)")
        except Exception as e:
            debug_print(f"   ├── ⚠️ Error extracting cookies: {e}")
        
        # Extract models from page content
        print("   ├── Extracting available models...")
        try:
            # Get the page HTML content
            body = await NODRIVER_TAB.get_content()
            
            # Try to find models in the page
            match = re.search(r'{\\\"initialModels\\\":(\\[.*?\\]),\\\"initialModel[A-Z]Id', body, re.DOTALL)
            if match:
                models_json = match.group(1).encode().decode('unicode_escape')
                models = json.loads(models_json)
                save_models(models)
                print(f"   ├── ✅ Found {len(models)} models")
            else:
                # Try alternative pattern
                match2 = re.search(r'"initialModels":(\[.*?\]),"initialModel', body, re.DOTALL)
                if match2:
                    models = json.loads(match2.group(1))
                    save_models(models)
                    print(f"   ├── ✅ Found {len(models)} models")
                else:
                    print("   ├── ⚠️ Could not find models in page (using cached)")
        except Exception as e:
            debug_print(f"   ├── ⚠️ Error extracting models: {e}")
        
        print("   └── ✅ Initial data extraction complete")
        
    except Exception as e:
        print(f"   └── ❌ Error during data extraction: {e}")

async def periodic_refresh_task():
    """Background task to refresh cf_clearance and models every 30 minutes"""
    while True:
        try:
            # Wait 30 minutes (1800 seconds)
            await asyncio.sleep(1800)
            debug_print("\n" + "="*60)
            debug_print("🔄 Starting scheduled 30-minute refresh...")
            debug_print("="*60)
            await get_initial_data()
            debug_print("✅ Scheduled refresh completed")
            debug_print("="*60 + "\n")
        except Exception as e:
            debug_print(f"❌ Error in periodic refresh task: {e}")
            # Continue the loop even if there's an error
            continue

@app.on_event("startup")
async def startup_event():
    try:
        # Print startup banner
        print("=" * 60)
        print("🚀 LMArena Bridge Server Starting...")
        print("=" * 60)
        
        # Load configuration
        config = get_config()
        save_config(config)
        save_models(get_models())
        load_usage_stats()
        
        api_key_count = len(config.get("api_keys", []))
        auth_token_count = len(config.get("auth_tokens", [])) or (1 if config.get("auth_token") else 0)
        
        print(f"📋 Configuration loaded from config.json")
        print(f"   ├── API Keys: {api_key_count} configured")
        print(f"   ├── Auth Tokens: {auth_token_count} configured")
        print(f"   └── Debug Mode: {'ON' if DEBUG else 'OFF'}")
        
        # 1. Initialize browser and solve CAPTCHA (this blocks until user solves)
        browser_ready = await initialize_nodriver_browser()
        
        if not browser_ready:
            print("")
            print("⚠️ WARNING: Server starting without browser (limited functionality)")
            print("   └── reCAPTCHA token refresh will not work")
            print("")
        else:
            # 2. Extract initial data from the browser session
            await get_initial_data()
        
        # 3. Start background tasks
        asyncio.create_task(periodic_refresh_task())
        
        # Print ready message
        print("")
        print("=" * 60)
        print("✅ SERVER READY!")
        print("=" * 60)
        print(f"📍 Dashboard:         http://localhost:{PORT}/dashboard")
        print(f"🔐 Login:             http://localhost:{PORT}/dash/login")
        print(f"📚 Universal API:     http://localhost:{PORT}/v1")
        if browser_ready:
            print("💡 Chrome window will stay open (do not close it!)")
        print("=" * 60)
        print("")
        
    except Exception as e:
        print(f"❌ Error during startup: {e}")
        import traceback
        traceback.print_exc()
        # Continue anyway - server should still start

# --- UI Endpoints (Login/Dashboard) ---

@app.get("/")
async def root():
    return {"status": "online", "message": "Arena Bridge is running"}

@app.get("/dash/login", response_class=HTMLResponse)
async def login_page(request: Request, error: Optional[str] = None):
    if await get_current_session(request):
        return RedirectResponse(url="/dashboard")
    
    error_msg = '<div class="error-message">Invalid password. Please try again.</div>' if error else ''
    
    return f"""
        <!DOCTYPE html>
        <html>
        <head>
            <title>Login - LMArena Bridge</title>
            <meta name="viewport" content="width=device-width, initial-scale=1.0">
            <style>
                * {{ margin: 0; padding: 0; box-sizing: border-box; }}
                body {{
                    font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
                    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
                    min-height: 100vh;
                    display: flex;
                    align-items: center;
                    justify-content: center;
                    padding: 20px;
                }}
                .login-container {{
                    background: white;
                    padding: 40px;
                    border-radius: 10px;
                    box-shadow: 0 10px 40px rgba(0,0,0,0.2);
                    width: 100%;
                    max-width: 400px;
                }}
                h1 {{
                    color: #333;
                    margin-bottom: 10px;
                    font-size: 28px;
                }}
                .subtitle {{
                    color: #666;
                    margin-bottom: 30px;
                    font-size: 14px;
                }}
                .form-group {{
                    margin-bottom: 20px;
                }}
                label {{
                    display: block;
                    margin-bottom: 8px;
                    color: #555;
                    font-weight: 500;
                }}
                input[type="password"] {{
                    width: 100%;
                    padding: 12px;
                    border: 2px solid #e1e8ed;
                    border-radius: 6px;
                    font-size: 16px;
                    transition: border-color 0.3s;
                }}
                input[type="password"]:focus {{
                    outline: none;
                    border-color: #667eea;
                }}
                button {{
                    width: 100%;
                    padding: 12px;
                    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
                    color: white;
                    border: none;
                    border-radius: 6px;
                    font-size: 16px;
                    font-weight: 600;
                    cursor: pointer;
                    transition: transform 0.2s;
                }}
                button:hover {{
                    transform: translateY(-2px);
                }}
                button:active {{
                    transform: translateY(0);
                }}
                .error-message {{
                    background: #fee;
                    color: #c33;
                    padding: 12px;
                    border-radius: 6px;
                    margin-bottom: 20px;
                    border-left: 4px solid #c33;
                }}
            </style>
        </head>
        <body>
            <div class="login-container">
                <h1>LMArena Bridge</h1>
                <div class="subtitle">Sign in to access the dashboard</div>
                {error_msg}
                <form action="/dash/login" method="post">
                    <div class="form-group">
                        <label for="password">Password</label>
                        <input type="password" id="password" name="password" placeholder="Enter your password" required autofocus>
                    </div>
                    <button type="submit">Sign In</button>
                </form>
            </div>
        </body>
        </html>
    """

@app.post("/dash/login")
async def login_submit(response: Response, password: str = Form(...)):
    config = get_config()
    if password == config.get("password"):
        session_id = str(uuid.uuid4())
        dashboard_sessions[session_id] = "admin"
        response = RedirectResponse(url="/dashboard", status_code=status.HTTP_303_SEE_OTHER)
        response.set_cookie(key="session_id", value=session_id, httponly=True)
        return response
    return RedirectResponse(url="/dash/login?error=1", status_code=status.HTTP_303_SEE_OTHER)

@app.get("/logout")
async def logout(request: Request, response: Response):
    session_id = request.cookies.get("session_id")
    if session_id in dashboard_sessions:
        del dashboard_sessions[session_id]
    response = RedirectResponse(url="/dash/login", status_code=status.HTTP_303_SEE_OTHER)
    response.delete_cookie("session_id")
    return response

@app.get("/dashboard", response_class=HTMLResponse)
async def dashboard(session: str = Depends(get_current_session)):
    if not session:
        return RedirectResponse(url="/dash/login")

    try:
        config = get_config()
        models = get_models()
    except Exception as e:
        debug_print(f"❌ Error loading dashboard data: {e}")
        # Return error page
        return HTMLResponse(f"""
            <html><body style="font-family: sans-serif; padding: 40px; text-align: center;">
                <h1>⚠️ Dashboard Error</h1>
                <p>Failed to load configuration: {str(e)}</p>
                <p><a href="/logout">Logout</a> | <a href="/dashboard">Retry</a></p>
            </body></html>
        """, status_code=500)

    # Render API Keys
    keys_html = ""
    for key in config["api_keys"]:
        created_date = time.strftime('%Y-%m-%d %H:%M', time.localtime(key.get('created', 0)))
        keys_html += f"""
            <tr>
                <td><strong>{key['name']}</strong></td>
                <td><code class="api-key-code">{key['key']}</code></td>
                <td><span class="badge">{key['rpm']} RPM</span></td>
                <td><small>{created_date}</small></td>
                <td>
                    <form action='/delete-key' method='post' style='margin:0;'>
                        <input type='hidden' name='key_id' value='{key['key']}'>
                        <button type='submit' class='btn-delete'>Delete</button>
                    </form>
                </td>
            </tr>
        """

    # Render Models (limit to first 20 with text output)
    text_models = [m for m in models if m.get('capabilities', {}).get('outputCapabilities', {}).get('text')]
    models_html = ""
    for i, model in enumerate(text_models[:20]):
        rank = model.get('rank', '?')
        org = model.get('organization', 'Unknown')
        models_html += f"""
            <div class="model-card">
                <div class="model-header">
                    <span class="model-name">{model.get('publicName', 'Unnamed')}</span>
                    <span class="model-rank">Rank {rank}</span>
                </div>
                <div class="model-org">{org}</div>
            </div>
        """
    
    if not models_html:
        models_html = '<div class="no-data">No models found. Token may be invalid or expired.</div>'

    # Render Stats
    stats_html = ""
    if model_usage_stats:
        for model, count in sorted(model_usage_stats.items(), key=lambda x: x[1], reverse=True)[:10]:
            stats_html += f"<tr><td>{model}</td><td><strong>{count}</strong></td></tr>"
    else:
        stats_html = "<tr><td colspan='2' class='no-data'>No usage data yet</td></tr>"

    # Check token status - check BOTH auth_token (legacy single) and auth_tokens (new array)
    has_tokens = config.get("auth_token") or (config.get("auth_tokens") and len(config.get("auth_tokens", [])) > 0)
    token_status = "✅ Configured" if has_tokens else "❌ Not Set"
    token_class = "status-good" if has_tokens else "status-bad"
    
    cf_status = "✅ Configured" if config.get("cf_clearance") else "❌ Not Set"
    cf_class = "status-good" if config.get("cf_clearance") else "status-bad"
    
    # Get recent activity count (last 24 hours)
    recent_activity = sum(1 for timestamps in api_key_usage.values() for t in timestamps if time.time() - t < 86400)

    return f"""
        <!DOCTYPE html>
        <html>
        <head>
            <title>Namo LLM - Dashboard</title>
            <meta name="viewport" content="width=device-width, initial-scale=1.0">
            <script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.0/dist/chart.umd.js"></script>
            <link href="https://fonts.googleapis.com/css2?family=Outfit:wght@300;400;500;600;700&display=swap" rel="stylesheet">
            <link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css" rel="stylesheet">
            <style>
                :root {{
                    --bg-color: #F0F4FA;
                    --card-bg: #FFFFFF;
                    --purple-light: #E0D9FD;
                    --purple-dark: #C5B5FA;
                    --green-accent: #C9F257;
                    --text-dark: #1A1A2E;
                    --text-grey: #8E92BC;
                    --border-radius: 24px;
                    --shadow: 0 10px 40px rgba(0,0,0,0.03);
                }}
                
                * {{ margin: 0; padding: 0; box-sizing: border-box; }}
                
                body {{
                    font-family: 'Outfit', sans-serif;
                    background-color: var(--bg-color);
                    color: var(--text-dark);
                    min-height: 100vh;
                    padding: 24px;
                }}

                .bento-container {{
                    display: grid;
                    grid-template-columns: 260px 1fr;
                    gap: 24px;
                    max-width: 1600px;
                    margin: 0 auto;
                    height: calc(100vh - 48px);
                }}

                /* Sidebar */
                .sidebar {{
                    background: var(--card-bg);
                    border-radius: var(--border-radius);
                    padding: 32px;
                    display: flex;
                    flex-direction: column;
                    box-shadow: var(--shadow);
                }}

                .logo {{
                    font-size: 24px;
                    font-weight: 700;
                    margin-bottom: 48px;
                    display: flex;
                    align-items: center;
                    gap: 12px;
                }}
                .logo i {{ color: #764ba2; }}

                .nav-menu {{ display: flex; flex-direction: column; gap: 12px; flex: 1; }}
                
                .nav-item {{
                    display: flex;
                    align-items: center;
                    gap: 16px;
                    padding: 14px 20px;
                    border-radius: 16px;
                    color: var(--text-grey);
                    text-decoration: none;
                    font-weight: 500;
                    transition: all 0.2s;
                }}
                
                .nav-item.active {{
                    background-color: var(--green-accent);
                    color: var(--text-dark);
                    font-weight: 600;
                }}
                
                .nav-item:hover:not(.active) {{
                    background-color: #f8f9fa;
                    color: var(--text-dark);
                }}

                /* Main Content */
                .main-column {{
                    display: flex;
                    flex-direction: column;
                    gap: 24px;
                    overflow-y: auto;
                    padding-right: 5px;
                }}
                
                /* Header */
                .header-card {{
                    background: var(--card-bg);
                    border-radius: var(--border-radius);
                    padding: 20px 32px;
                    display: flex;
                    justify-content: space-between;
                    align-items: center;
                    box-shadow: var(--shadow);
                }}
                
                .page-title {{ font-size: 24px; font-weight: 600; }}
                
                .search-bar {{
                    background: #F5F7FA;
                    border-radius: 12px;
                    padding: 12px 20px;
                    display: flex;
                    align-items: center;
                    gap: 10px;
                    width: 300px;
                    color: var(--text-grey);
                }}
                .search-bar input {{
                    border: none;
                    background: transparent;
                    outline: none;
                    font-family: inherit;
                    width: 100%;
                    color: var(--text-dark);
                }}

                .profile-section {{
                    display: flex;
                    align-items: center;
                    gap: 24px;
                }}
                
                .icon-btn {{
                    font-size: 20px;
                    color: var(--text-dark);
                    cursor: pointer;
                }}

                .user-badge {{
                    background: linear-gradient(135deg, #E0D9FD 0%, #C5B5FA 100%);
                    padding: 8px 16px;
                    border-radius: 50px;
                    display: flex;
                    align-items: center;
                    gap: 10px;
                    font-weight: 500;
                }}
                
                /* Dashboard Grid */
                .dashboard-grid {{
                    display: grid;
                    grid-template-columns: repeat(3, 1fr);
                    gap: 24px;
                }}

                .card {{
                    background: var(--card-bg);
                    border-radius: var(--border-radius);
                    padding: 24px;
                    box-shadow: var(--shadow);
                }}

                .full-width {{ grid-column: 1 / -1; }}
                .two-thirds {{ grid-column: span 2; }}
                
                /* Purple Hero Card */
                .hero-card {{
                    background: linear-gradient(135deg, #E0D9FD 0%, #C5B5FA 100%);
                    padding: 32px;
                    position: relative;
                    overflow: hidden;
                }}
                
                .hero-stat {{
                    font-size: 48px;
                    font-weight: 700;
                    margin-top: 10px;
                    margin-bottom: 5px;
                    color: #1A1A2E;
                }}
                
                .hero-label {{ font-size: 16px; font-weight: 500; opacity: 0.8; color: #1A1A2E; }}
                .avatars {{ display: flex; margin-top: 20px; }}
                .avatar-stack {{
                    width: 40px; height: 40px; border-radius: 50%; border: 2px solid white; margin-left: -10px; background: #ddd;
                    display: flex; align-items: center; justify-content: center; font-size: 12px; font-weight: bold;
                }}
                .avatar-stack:first-child {{ margin-left: 0; background: #FFD166; }}
                
                /* Green Chart Card */
                .usage-card {{
                    background: var(--card-bg);
                }}
                
                .card-title {{
                    font-size: 18px;
                    font-weight: 600;
                    margin-bottom: 24px;
                    display: flex;
                    justify-content: space-between;
                    align-items: center;
                }}

                /* Status Pills */
                .status-pill {{
                    padding: 6px 12px;
                    border-radius: 8px;
                    font-size: 12px;
                    font-weight: 600;
                }}
                .status-active {{ background: #C9F257; color: #1A1A2E; }}
                .status-inactive {{ background: #FFE5D9; color: #FF5C5C; }}

                /* Form Elements */
                .styled-input {{
                    width: 100%;
                    padding: 12px 16px;
                    border: 2px solid #F0F4FA;
                    border-radius: 12px;
                    font-family: inherit;
                    margin-bottom: 15px;
                    transition: all 0.2s;
                }}
                .styled-input:focus {{ border-color: #764ba2; outline: none; }}
                
                .btn-primary {{
                    background: var(--text-dark);
                    color: white;
                    padding: 12px 24px;
                    border-radius: 12px;
                    border: none;
                    font-weight: 600;
                    cursor: pointer;
                    width: 100%;
                    transition: transform 0.2s;
                }}
                .btn-primary:hover {{ transform: translateY(-2px); }}
                
                .btn-green {{
                    background: var(--green-accent);
                    color: var(--text-dark);
                    padding: 8px 16px;
                    border-radius: 10px;
                    border: none;
                    font-weight: 600;
                    cursor: pointer;
                }}
                .btn-delete {{ 
                    background: #FFF0F0; color: #FF5C5C; 
                    padding: 6px 12px; border-radius: 8px; border: none; cursor: pointer;
                }}
                
                /* Table Styles */
                table {{ width: 100%; border-collapse: separate; border-spacing: 0 8px; }}
                th {{ text-align: left; padding: 0 16px; color: var(--text-grey); font-weight: 500; font-size: 14px; }}
                td {{ background: #F8F9FB; padding: 16px; first-child: border-top-left-radius: 12px; }}
                tr td:first-child {{ border-top-left-radius: 12px; border-bottom-left-radius: 12px; }}
                tr td:last-child {{ border-top-right-radius: 12px; border-bottom-right-radius: 12px; }}

                .token-item {{
                    background: #FAFAFA;
                    border: 1px solid #EEE;
                    padding: 12px;
                    border-radius: 12px;
                    display: flex;
                    gap: 10px;
                    align-items: center;
                    margin-bottom: 8px;
                    width: 100%;
                }}

                .token-item code {{
                    font-family: monospace;
                    font-size: 12px;
                    color: #666;
                    flex: 1;
                    white-space: nowrap;
                    overflow: hidden;
                    text-overflow: ellipsis;
                }}

            </style>
        </head>
        <body>
            <div class="bento-container">
                <!-- Sidebar -->
                <div class="sidebar">
                    <div class="logo">
                        <i class="fa-solid fa-cube"></i>
                        Namo LLM
                    </div>
                    <div class="nav-menu">
                        <a href="#" class="nav-item active"><i class="fa-solid fa-chart-pie"></i> Dashboard</a>
                        <a href="#" class="nav-item"><i class="fa-solid fa-server"></i> Proxies</a>
                        <a href="#" class="nav-item"><i class="fa-solid fa-key"></i> Keys</a>
                        <a href="#" class="nav-item"><i class="fa-solid fa-gear"></i> Settings</a>
                    </div>
                    
                    <div style="margin-top: auto; padding: 20px; background: #F5F7FA; border-radius: 16px;">
                        <div style="font-size: 12px; color: #888; margin-bottom: 5px;">Server Status</div>
                        <div style="display: flex; align-items: center; gap: 8px;">
                             <div style="width: 8px; height: 8px; background: #00CC66; border-radius: 50%;"></div>
                             <span style="font-weight: 600; font-size: 14px;">Online</span>
                        </div>
                    </div>
                    
                    <a href="/logout" class="nav-item" style="margin-top: 10px; color: #FF5C5C;">
                        <i class="fa-solid fa-arrow-right-from-bracket"></i> Logout
                    </a>
                </div>

                <!-- Main Content -->
                <div class="main-column">
                    <!-- Header -->
                    <div class="header-card">
                        <div class="page-title">Dashboard</div>
                        
                        <div class="search-bar">
                            <i class="fa-solid fa-magnifying-glass"></i>
                            <input type="text" placeholder="Search...">
                        </div>
                        
                        <div class="profile-section">
                            <i class="fa-regular fa-bell icon-btn"></i>
                            <div class="user-badge">
                                <i class="fa-solid fa-user-astronaut"></i>
                                <span>Admin User</span>
                                <i class="fa-solid fa-angle-down" style="font-size: 12px;"></i>
                            </div>
                        </div>
                    </div>

                    <div class="dashboard-grid">
                        <!-- Hero Stat Card (Purple) -->
                        <div class="card hero-card">
                            <div class="hero-label">Total Requests</div>
                            <div class="hero-stat">{sum(model_usage_stats.values())}</div>
                            <div style="font-size: 14px; background: rgba(255,255,255,0.3); display: inline-block; padding: 4px 10px; border-radius: 20px;">
                                <i class="fa-solid fa-arrow-trend-up"></i> +12% this week
                            </div>
                            <div class="avatars">
                                <div class="avatar-stack">L</div>
                                <div class="avatar-stack">M</div>
                                <div class="avatar-stack">+3</div>
                            </div>
                        </div>

                        <!-- Secondary Stat Card 1 -->
                        <div class="card">
                            <div class="card-title">
                                <span>Active Models</span>
                                <span class="status-pill status-active" style="background: #E0E7FF; color: #4338CA;">{len(text_models)}</span>
                            </div>
                            <div style="font-size: 32px; font-weight: 700; margin-bottom: 5px;">{len(text_models)}</div>
                            <div style="color: grey; font-size: 13px;">Text generation enabled</div>
                            <div style="margin-top: 20px; height: 6px; background: #F0F0F0; border-radius: 10px; overflow: hidden;">
                                <div style="width: 85%; height: 100%; background: #4338CA;"></div>
                            </div>
                        </div>

                        <!-- Secondary Stat Card 2 (Green) -->
                        <div class="card" style="background: #C9F257;">
                            <div class="card-title">
                                <span>System Health</span>
                                <i class="fa-solid fa-heart-pulse"></i>
                            </div>
                            <div style="font-size: 32px; font-weight: 700; color: #1A1A2E;">98%</div>
                            <div style="color: #1A1A2E; opacity: 0.8; font-size: 13px;">Uptime this session</div>
                            <div style="margin-top: 20px; display: flex; gap: 5px;">
                                <div style="height: 30px; width: 6px; background: rgba(0,0,0,0.1); border-radius: 4px;"></div>
                                <div style="height: 20px; width: 6px; background: rgba(0,0,0,0.1); border-radius: 4px;"></div>
                                <div style="height: 40px; width: 6px; background: rgba(0,0,0,0.2); border-radius: 4px;"></div>
                                <div style="height: 35px; width: 6px; background: rgba(0,0,0,0.1); border-radius: 4px;"></div>
                            </div>
                        </div>

                        <!-- Main Graph Card -->
                        <div class="card two-thirds">
                            <div class="card-title">
                                <span>Proxy Usage</span>
                                <select style="border: none; background: #F5F7FA; padding: 5px 10px; border-radius: 8px; font-family: inherit;">
                                    <option>Last 24 Hours</option>
                                    <option>Last 7 Days</option>
                                </select>
                            </div>
                            <div style="height: 250px;">
                                <canvas id="modelBarChart"></canvas>
                            </div>
                        </div>

                        <!-- Tokens & Auth List -->
                        <div class="card">
                            <div class="card-title">
                                <span>Auth Tokens</span>
                                <form action="/refresh-tokens" method="post" style="display:inline;">
                                    <button style="border: none; background: none; cursor: pointer; color: #666;"><i class="fa-solid fa-sync"></i></button>
                                </form>
                            </div>
                            
                            <div style="max-height: 250px; overflow-y: auto; margin-bottom: 15px;">
                                {''.join([f'''
                                <div class="token-item">
                                    <div style="width: 8px; height: 8px; background: #C9F257; border-radius: 50%;"></div>
                                    <code>{token[:20]}...</code>
                                    <form action="/delete-auth-token" method="post" style="margin: 0;">
                                        <input type="hidden" name="token_index" value="{i}">
                                        <button type="submit" style="color: #FF5C5C; border: none; background: none; cursor: pointer;">x</button>
                                    </form>
                                </div>
                                ''' for i, token in enumerate(config.get("auth_tokens", []))])}
                                
                                {('<div style="text-align: center; color: #AAA; font-size: 13px;">No tokens set</div>' if not config.get("auth_tokens") else '')}
                            </div>

                            <form action="/add-auth-token" method="post">
                                <input type="text" name="new_auth_token" class="styled-input" placeholder="Paste Auth Token..." required style="margin-bottom: 10px; padding: 8px;">
                                <button type="submit" class="btn-green" style="width: 100%;">Add Token</button>
                            </form>
                        </div>
                        
                        <!-- Configuration / API Keys -->
                         <div class="card full-width">
                            <div class="card-title">
                                <span>API Keys Management</span>
                                <span class="status-pill status-active">{len(config['api_keys'])} Active Keys</span>
                            </div>
                            
                            <table>
                                <thead>
                                    <tr>
                                        <th>Name</th>
                                        <th>Key</th>
                                        <th>RPM Limit</th>
                                        <th>Created</th>
                                        <th>Action</th>
                                    </tr>
                                </thead>
                                <tbody>
                                    {keys_html if keys_html else '<tr><td colspan="5" style="text-align:center; color:#999;">No keys found</td></tr>'}
                                </tbody>
                            </table>
                            
                            <div style="margin-top: 20px; padding-top: 20px; border-top: 1px solid #EEE;">
                                <h4 style="margin-bottom: 15px;">Create New Key</h4>
                                <form action="/create-key" method="post" style="display: flex; gap: 15px;">
                                    <input type="text" name="name" class="styled-input" placeholder="Key Name" required style="margin:0; flex: 1;">
                                    <input type="number" name="rpm" class="styled-input" placeholder="RPM" value="60" required style="margin:0; width: 100px;">
                                    <button type="submit" class="btn-primary" style="width: auto;">Generate Key</button>
                                </form>
                            </div>
                        </div>

                    </div>
                </div>
            </div>

            <script>
                // Prepare data for charts
                const statsData = {json.dumps(dict(sorted(model_usage_stats.items(), key=lambda x: x[1], reverse=True)[:10]))};
                const modelNames = Object.keys(statsData);
                const modelCounts = Object.values(statsData);
                
                // Bento Style Chart Config
                Chart.defaults.font.family = "'Outfit', sans-serif";
                Chart.defaults.color = '#8E92BC';
                
                if (modelNames.length > 0) {{
                    const barCtx = document.getElementById('modelBarChart').getContext('2d');
                    // Create gradient
                    const gradient = barCtx.createLinearGradient(0, 0, 0, 400);
                    gradient.addColorStop(0, '#764ba2');
                    gradient.addColorStop(1, '#667eea');

                    new Chart(barCtx, {{
                        type: 'line',
                        data: {{
                            labels: modelNames,
                            datasets: [{{
                                label: 'Requests',
                                data: modelCounts,
                                backgroundColor: 'rgba(118, 75, 162, 0.1)',
                                borderColor: '#764ba2',
                                borderWidth: 3,
                                pointBackgroundColor: '#fff',
                                pointBorderColor: '#764ba2',
                                pointRadius: 6,
                                fill: true,
                                tension: 0.4
                            }}]
                        }},
                        options: {{
                            responsive: true,
                            maintainAspectRatio: false,
                            plugins: {{
                                legend: {{ display: false }},
                                tooltip: {{
                                    backgroundColor: '#1A1A2E',
                                    padding: 12,
                                    titleFont: {{ size: 13 }},
                                    bodyFont: {{ size: 14, weight: 'bold' }},
                                    cornerRadius: 8,
                                    displayColors: false
                                }}
                            }},
                            scales: {{
                                y: {{
                                    beginAtZero: true,
                                    grid: {{ color: '#F0F0F0', borderDash: [5, 5] }},
                                    border: {{ display: false }}
                                }},
                                x: {{
                                    grid: {{ display: false }},
                                    border: {{ display: false }}
                                }}
                            }}
                        }}
                    }});
                }}
            </script>
        </body>
        </html>
    """

@app.post("/update-auth-token")
async def update_auth_token(session: str = Depends(get_current_session), auth_token: str = Form(...)):
    if not session:
        return RedirectResponse(url="/dash/login")
    config = get_config()
    config["auth_token"] = auth_token.strip()
    save_config(config)
    return RedirectResponse(url="/dashboard", status_code=status.HTTP_303_SEE_OTHER)

@app.post("/create-key")
async def create_key(session: str = Depends(get_current_session), name: str = Form(...), rpm: int = Form(...)):
    if not session:
        return RedirectResponse(url="/dash/login")
    try:
        config = get_config()
        new_key = {
            "name": name.strip(),
            "key": f"sk-lmab-{uuid.uuid4()}",
            "rpm": max(1, min(rpm, 1000)),  # Clamp between 1-1000
            "created": int(time.time())
        }
        config["api_keys"].append(new_key)
        save_config(config)
    except Exception as e:
        debug_print(f"❌ Error creating key: {e}")
    return RedirectResponse(url="/dashboard", status_code=status.HTTP_303_SEE_OTHER)

@app.post("/delete-key")
async def delete_key(session: str = Depends(get_current_session), key_id: str = Form(...)):
    if not session:
        return RedirectResponse(url="/dash/login")
    try:
        config = get_config()
        config["api_keys"] = [k for k in config["api_keys"] if k["key"] != key_id]
        save_config(config)
    except Exception as e:
        debug_print(f"❌ Error deleting key: {e}")
    return RedirectResponse(url="/dashboard", status_code=status.HTTP_303_SEE_OTHER)

@app.post("/add-auth-token")
async def add_auth_token(session: str = Depends(get_current_session), new_auth_token: str = Form(...)):
    if not session:
        return RedirectResponse(url="/dash/login")
    try:
        config = get_config()
        token = new_auth_token.strip()
        if token and token not in config.get("auth_tokens", []):
            if "auth_tokens" not in config:
                config["auth_tokens"] = []
            config["auth_tokens"].append(token)
            save_config(config)
    except Exception as e:
        debug_print(f"❌ Error adding auth token: {e}")
    return RedirectResponse(url="/dashboard", status_code=status.HTTP_303_SEE_OTHER)

@app.post("/delete-auth-token")
async def delete_auth_token(session: str = Depends(get_current_session), token_index: int = Form(...)):
    if not session:
        return RedirectResponse(url="/dash/login")
    try:
        config = get_config()
        auth_tokens = config.get("auth_tokens", [])
        if 0 <= token_index < len(auth_tokens):
            auth_tokens.pop(token_index)
            config["auth_tokens"] = auth_tokens
            save_config(config)
    except Exception as e:
        debug_print(f"❌ Error deleting auth token: {e}")
    return RedirectResponse(url="/dashboard", status_code=status.HTTP_303_SEE_OTHER)

@app.post("/refresh-tokens")
async def refresh_tokens(session: str = Depends(get_current_session)):
    if not session:
        return RedirectResponse(url="/dash/login")
    try:
        await get_initial_data()
    except Exception as e:
        debug_print(f"❌ Error refreshing tokens: {e}")
    return RedirectResponse(url="/dashboard", status_code=status.HTTP_303_SEE_OTHER)

# --- OpenAI Compatible API Endpoints ---

@app.get("/v1/health")
@app.get("/api/v1/health")
async def health_check():
    """Health check endpoint for monitoring"""
    try:
        models = get_models()
        config = get_config()
        
        # Basic health checks
        has_cf_clearance = bool(config.get("cf_clearance"))
        has_models = len(models) > 0
        has_api_keys = len(config.get("api_keys", [])) > 0
        
        status = "healthy" if (has_cf_clearance and has_models) else "degraded"
        
        return {
            "status": status,
            "timestamp": datetime.now(timezone.utc).isoformat(),
            "checks": {
                "cf_clearance": has_cf_clearance,
                "models_loaded": has_models,
                "model_count": len(models),
                "api_keys_configured": has_api_keys
            }
        }
    except Exception as e:
        return {
            "status": "unhealthy",
            "timestamp": datetime.now(timezone.utc).isoformat(),
            "error": str(e)
        }

@app.get("/v1/models")
@app.get("/api/v1/models")
async def list_models(api_key: dict = Depends(rate_limit_api_key)):
    try:
        models = get_models()
        
        # Filter for models with text OR search OR image output capability and an organization (exclude stealth models)
        # Always include image models - no special key needed
        valid_models = [m for m in models 
                       if (m.get('capabilities', {}).get('outputCapabilities', {}).get('text')
                           or m.get('capabilities', {}).get('outputCapabilities', {}).get('search')
                           or m.get('capabilities', {}).get('outputCapabilities', {}).get('image'))
                       and m.get('organization')]
        
        return {
            "object": "list",
            "data": [
                {
                    "id": model.get("publicName"),
                    "object": "model",
                    "created": int(time.time()),
                    "owned_by": model.get("organization", "lmarena")
                } for model in valid_models if model.get("publicName")
            ]
        }
    except Exception as e:
        debug_print(f"❌ Error listing models: {e}")
        raise HTTPException(status_code=500, detail=f"Failed to load models: {str(e)}")

@app.post("/v1/chat/completions")
@app.post("/api/v1/chat/completions")
@app.post("/api/v1/responses")
@app.post("/v1/responses")
@app.post("/v1/v1/responses")
async def api_chat_completions(request: Request, api_key: dict = Depends(rate_limit_api_key)):
    debug_print("\n" + "="*80)
    debug_print("🔵 NEW API REQUEST RECEIVED")
    debug_print("="*80)
    
    try:
        # Parse request body with error handling
        try:
            body = await request.json()
        except json.JSONDecodeError as e:
            debug_print(f"❌ Invalid JSON in request body: {e}")
            raise HTTPException(status_code=400, detail=f"Invalid JSON in request body: {str(e)}")
        except Exception as e:
            debug_print(f"❌ Failed to read request body: {e}")
            raise HTTPException(status_code=400, detail=f"Failed to read request body: {str(e)}")
        
        debug_print(f"📥 Request body keys: {list(body.keys())}")
        
        # Validate required fields
        model_public_name = body.get("model")
        messages = body.get("messages", [])
        stream = body.get("stream", False)
        
        # TEMPORARY WORKAROUND: Force non-streaming mode
        # Streaming now uses browser-based streaming (bypasses reCAPTCHA!)
        # Implemented via make_lmarena_streaming_request_browser()
        

        debug_print(f"🌊 Stream mode: {stream}")
        debug_print(f"🤖 Requested model: {model_public_name}")
        debug_print(f"💬 Number of messages: {len(messages)}")
        
        if not model_public_name:
            debug_print("❌ Missing 'model' in request")
            raise HTTPException(status_code=400, detail="Missing 'model' in request body.")
        
        if not messages:
            debug_print("❌ Missing 'messages' in request")
            raise HTTPException(status_code=400, detail="Missing 'messages' in request body.")
        
        if not isinstance(messages, list):
            debug_print("❌ 'messages' must be an array")
            raise HTTPException(status_code=400, detail="'messages' must be an array.")
        
        if len(messages) == 0:
            debug_print("❌ 'messages' array is empty")
            raise HTTPException(status_code=400, detail="'messages' array cannot be empty.")

        # Find model ID from public name
        try:
            models = get_models()
            debug_print(f"📚 Total models loaded: {len(models)}")
        except Exception as e:
            debug_print(f"❌ Failed to load models: {e}")
            raise HTTPException(
                status_code=503,
                detail="Failed to load model list from LMArena. Please try again later."
            )
        
        model_id = None
        model_org = None
        model_capabilities = {}
        
        for m in models:
            if m.get("publicName") == model_public_name:
                model_id = m.get("id")
                model_org = m.get("organization")
                model_capabilities = m.get("capabilities", {})
                break
        
        if not model_id:
            debug_print(f"❌ Model '{model_public_name}' not found in model list")
            raise HTTPException(
                status_code=404, 
                detail=f"Model '{model_public_name}' not found. Use /api/v1/models to see available models."
            )
        
        # Check if model is a stealth model (no organization)
        if not model_org:
            debug_print(f"❌ Model '{model_public_name}' is a stealth model (no organization)")
            raise HTTPException(
                status_code=403,
                detail="You do not have access to stealth models. Contact cloudwaddie for more info."
            )
        
        debug_print(f"✅ Found model ID: {model_id}")
        debug_print(f"🔧 Model capabilities: {model_capabilities}")
        
        # Determine modality based on model capabilities
        # Priority: image > search > chat
        if model_capabilities.get('outputCapabilities', {}).get('image'):
            modality = "image"
        elif model_capabilities.get('outputCapabilities', {}).get('search'):
            modality = "search"
        else:
            modality = "chat"
        debug_print(f"🔍 Model modality: {modality}")

        # Log usage
        try:
            model_usage_stats[model_public_name] += 1
            # Save stats immediately after incrementing
            config = get_config()
            config["usage_stats"] = dict(model_usage_stats)
            save_config(config)
        except Exception as e:
            # Don't fail the request if usage logging fails
            debug_print(f"⚠️  Failed to log usage stats: {e}")

        # Extract system prompt if present and prepend to first user message
        system_prompt = ""
        system_messages = [m for m in messages if m.get("role") == "system"]
        if system_messages:
            # Handle content that might be a list (Claude CLI format) or string
            system_parts = []
            for m in system_messages:
                content = m.get("content", "")
                if isinstance(content, list):
                    # Extract text from content blocks
                    for block in content:
                        if isinstance(block, dict) and block.get("type") == "text":
                            system_parts.append(str(block.get("text", "")))
                        elif isinstance(block, str):
                            system_parts.append(block)
                else:
                    system_parts.append(str(content))
            system_prompt = "\n\n".join(system_parts)
            debug_print(f"📋 System prompt found: {system_prompt[:100]}..." if len(system_prompt) > 100 else f"📋 System prompt: {system_prompt}")
        
        # Process last message content (may include images)
        try:
            last_message_content = messages[-1].get("content", "")
            prompt, experimental_attachments = await process_message_content(last_message_content, model_capabilities)
            
            # If there's a system prompt and this is the first user message, prepend it
            if system_prompt:
                prompt = f"{system_prompt}\n\n{prompt}"
                debug_print(f"✅ System prompt prepended to user message")
        except Exception as e:
            debug_print(f"❌ Failed to process message content: {e}")
            raise HTTPException(
                status_code=400,
                detail=f"Failed to process message content: {str(e)}"
            )
        
        # Validate prompt
        if not prompt:
            # If no text but has attachments, that's okay for vision models
            if not experimental_attachments:
                debug_print("❌ Last message has no content")
                raise HTTPException(status_code=400, detail="Last message must have content.")
        
        # Log prompt length for debugging character limit issues
        debug_print(f"📝 User prompt length: {len(prompt)} characters")
        debug_print(f"🖼️  Attachments: {len(experimental_attachments)} images")
        debug_print(f"📝 User prompt preview: {prompt[:100]}..." if len(prompt) > 100 else f"📝 User prompt: {prompt}")
        
        # Check for reasonable character limit (LMArena appears to have limits)
        # Typical limit seems to be around 32K-64K characters based on testing
        MAX_PROMPT_LENGTH = 113567  # User hardcoded limit
        if len(prompt) > MAX_PROMPT_LENGTH:
            error_msg = f"Prompt too long ({len(prompt)} characters). LMArena has a character limit of approximately {MAX_PROMPT_LENGTH} characters. Please reduce the message size."
            debug_print(f"❌ {error_msg}")
            raise HTTPException(status_code=400, detail=error_msg)
        
        # Use API key + conversation tracking
        api_key_str = api_key["key"]

        # --- NEW: Get reCAPTCHA v3 Token for Payload ---
        recaptcha_token = await refresh_recaptcha_token()
        if not recaptcha_token:
            debug_print("❌ Cannot proceed, failed to get reCAPTCHA token.")
            raise HTTPException(
                status_code=503,
                detail="Service Unavailable: Failed to acquire reCAPTCHA token. The bridge server may be blocked."
            )
        debug_print(f"🔑 Using reCAPTCHA v3 token: {recaptcha_token[:20]}...")
        # -----------------------------------------------
        
        # Generate conversation ID from context (API key + model + first user message)
        import hashlib
        first_user_message = next((m.get("content", "") for m in messages if m.get("role") == "user"), "")
        if isinstance(first_user_message, list):
            # Handle array content format
            first_user_message = str(first_user_message)
        conversation_key = f"{api_key_str}_{model_public_name}_{first_user_message[:100]}"
        conversation_id = hashlib.sha256(conversation_key.encode()).hexdigest()[:16]
        
        debug_print(f"🔑 API Key: {api_key_str[:20]}...")
        debug_print(f"💭 Auto-generated Conversation ID: {conversation_id}")
        debug_print(f"🔑 Conversation key: {conversation_key[:100]}...")
        
        headers = get_request_headers()
        debug_print(f"📋 Headers prepared (auth token length: {len(headers.get('Cookie', '').split('arena-auth-prod-v1=')[-1].split(';')[0])} chars)")
        
        # Check if conversation exists for this API key
        # When FORCE_NEW_SESSION is enabled, always create new sessions to bypass per-session rate limits
        if FORCE_NEW_SESSION:
            session = None  # Force new session for every request
            debug_print("🔄 FORCE_NEW_SESSION enabled - creating fresh session (bypasses rate limits)")
        else:
            session = chat_sessions[api_key_str].get(conversation_id)
        
        # Detect retry: if session exists and last message is same user message (no assistant response after it)
        is_retry = False
        retry_message_id = None
        
        if session and len(session.get("messages", [])) >= 2:
            stored_messages = session["messages"]
            # Check if last stored message is from user with same content
            if stored_messages[-1]["role"] == "user" and stored_messages[-1]["content"] == prompt:
                # This is a retry - client sent same message again without assistant response
                is_retry = True
                retry_message_id = stored_messages[-1]["id"]
                # Get the assistant message ID that needs to be regenerated
                if len(stored_messages) >= 2 and stored_messages[-2]["role"] == "assistant":
                    # There was a previous assistant response - we'll retry that one
                    retry_message_id = stored_messages[-2]["id"]
                    debug_print(f"🔁 RETRY DETECTED - Regenerating assistant message {retry_message_id}")
        
        if is_retry and retry_message_id:
            debug_print(f"🔁 Using RETRY endpoint")
            # Use LMArena's retry endpoint
            # Format: PUT /nextjs-api/stream/retry-evaluation-session-message/{sessionId}/messages/{messageId}
            payload = {}
            url = f"https://arena.ai/nextjs-api/stream/retry-evaluation-session-message/{session['conversation_id']}/messages/{retry_message_id}"
            debug_print(f"📤 Target URL: {url}")
            debug_print(f"📦 Using PUT method for retry")
            http_method = "PUT"
        elif not session:
            debug_print("🆕 Creating NEW conversation session")
            # New conversation - Generate all IDs at once (like the browser does)
            session_id = str(uuid7())
            user_msg_id = str(uuid7())
            model_msg_id = str(uuid7())
            
            debug_print(f"🔑 Generated session_id: {session_id}")
            debug_print(f"👤 Generated user_msg_id: {user_msg_id}")
            debug_print(f"🤖 Generated model_msg_id: {model_msg_id}")
            
            payload = {
                "id": session_id,
                "mode": "direct",
                "modelAId": model_id,
                "userMessageId": user_msg_id,
                "modelAMessageId": model_msg_id,
                "userMessage": {
                    "content": prompt,
                    "experimental_attachments": experimental_attachments,
                    "metadata": {}
                },
                "modality": modality,
                "recaptchaV3Token": recaptcha_token, # <--- ADD TOKEN HERE
            }
            url = "https://arena.ai/nextjs-api/stream/create-evaluation"
            debug_print(f"📤 Target URL: {url}")
            debug_print(f"📦 Payload structure: Simple userMessage format")
            debug_print(f"🔍 Full payload: {json.dumps(payload, indent=2)}")
            http_method = "POST"
        else:
            debug_print("🔄 Using EXISTING conversation session")
            # Follow-up message - Generate new message IDs
            user_msg_id = str(uuid7())
            debug_print(f"👤 Generated followup user_msg_id: {user_msg_id}")
            model_msg_id = str(uuid7())
            debug_print(f"🤖 Generated followup model_msg_id: {model_msg_id}")
            
            payload = {
                "id": session["conversation_id"],
                "modelAId": model_id,
                "userMessageId": user_msg_id,
                "modelAMessageId": model_msg_id,
                "userMessage": {
                    "content": prompt,
                    "experimental_attachments": experimental_attachments,
                    "metadata": {}
                },
                "modality": modality,
                "recaptchaV3Token": recaptcha_token, # <--- ADD TOKEN HERE
            }
            url = f"https://arena.ai/nextjs-api/stream/post-to-evaluation/{session['conversation_id']}"
            debug_print(f"📤 Target URL: {url}")
            debug_print(f"📦 Payload structure: Simple userMessage format")
            debug_print(f"🔍 Full payload: {json.dumps(payload, indent=2)}")
            http_method = "POST"

        debug_print(f"\n🚀 Making API request to LMArena...")
        debug_print(f"⏱️  Timeout set to: 120 seconds")
        
        # Initialize failed tokens tracking for this request
        request_id = str(uuid.uuid4())
        failed_tokens = set()
        
        # Get initial auth token using round-robin (excluding any failed ones)
        current_token = get_next_auth_token(exclude_tokens=failed_tokens)
        headers = get_request_headers_with_token(current_token)
        debug_print(f"🔑 Using token (round-robin): {current_token[:20]}...")
        
        # Retry logic wrapper
        async def make_request_with_retry(url, payload, http_method, max_retries=3):
            """Make request with automatic retry on 429/401 errors"""
            nonlocal current_token, headers, failed_tokens
            
            for attempt in range(max_retries):
                try:
                    # Use browser-based request (bypasses ALL bot detection)
                    debug_print(f"🌐 Using REAL Chrome browser for API call (attempt {attempt + 1}/{max_retries})")
                    browser_response = await make_lmarena_request_browser(url, payload, method=http_method)
                    
                    # Create a response-like object for compatibility
                    class BrowserResponse:
                        def __init__(self, status_code, text):
                            self.status_code = status_code
                            self.text = text
                            self.headers = {}  # Empty headers for browser requests
                        def raise_for_status(self):
                            if self.status_code >= 400:
                                raise HTTPException(status_code=self.status_code, detail=f"Browser request failed: {self.text[:200]}")
                    
                    response = BrowserResponse(browser_response["status_code"], browser_response["text"])
                    
                    # Log status with human-readable message
                    log_http_status(response.status_code, "LMArena API (via Browser)")
                    
                    # Check for retry-able errors
                    if response.status_code == HTTPStatus.TOO_MANY_REQUESTS:
                        debug_print(f"⏱️  Attempt {attempt + 1}/{max_retries} - Rate limit")
                        if attempt < max_retries - 1:
                            await asyncio.sleep(2)  # Wait before retry
                            continue
                    
                    elif response.status_code == HTTPStatus.UNAUTHORIZED:
                        # Log the actual LMArena error response
                        debug_print(f"🔒 LMArena 401 Response: {response.text}")
                        debug_print(f"🔒 Attempt {attempt + 1}/{max_retries} - Auth failed")
                        if attempt < max_retries - 1:
                            await asyncio.sleep(1)
                            continue
                    
                    # If we get here, return the response (success or non-retryable error)
                    response.raise_for_status()
                    return response
                        
                except Exception as e:
                    # Catch browser and other exceptions
                    debug_print(f"❌ Request attempt {attempt + 1}/{max_retries} failed: {type(e).__name__}: {e}")
                    if attempt == max_retries - 1:
                        raise HTTPException(status_code=503, detail=f"Max retries exceeded: {type(e).__name__}: {str(e)}")
                    await asyncio.sleep(1)
                    continue
            
            # Should not reach here, but just in case
            raise HTTPException(status_code=503, detail="Max retries exceeded")
        
        # Handle streaming mode
        if stream:
            async def generate_stream():
                nonlocal current_token, headers
                chunk_id = f"chatcmpl-{uuid.uuid4()}"
                
                # Retry logic for streaming
                max_retries = 3
                for attempt in range(max_retries):
                    # Reset response data for each attempt
                    response_text = ""
                    reasoning_text = ""
                    citations = []
                    try:
                        # Use browser-based streaming (bypasses reCAPTCHA!)
                        debug_print(f"📡 Browser Streaming (attempt {attempt + 1}/{max_retries})")
                        debug_print(f"🔐 Using REAL Chrome browser for streaming")
                        
                        # Buffer for accumulating partial lines across chunks
                        line_buffer = ""
                        
                        async for raw_chunk in make_lmarena_streaming_request_browser(url, payload, method=http_method):
                            # Combine buffer with new chunk and split into lines
                            combined = line_buffer + raw_chunk
                            chunk_lines = combined.split('\n')
                            
                            # Keep the last partial line in buffer (if no trailing newline)
                            if not combined.endswith('\n'):
                                line_buffer = chunk_lines[-1]
                                chunk_lines = chunk_lines[:-1]
                            else:
                                line_buffer = ""
                            
                            for line in chunk_lines:
                                line = line.strip()
                                if not line:
                                    continue
                                
                                # Parse thinking/reasoning chunks: ag:"thinking text"
                                if line.startswith("ag:"):
                                    chunk_data = line[3:]
                                    try:
                                        reasoning_chunk = json.loads(chunk_data)
                                        reasoning_text += reasoning_chunk
                                        
                                        # Send SSE-formatted chunk with reasoning_content
                                        chunk_response = {
                                            "id": chunk_id,
                                            "object": "chat.completion.chunk",
                                            "created": int(time.time()),
                                            "model": model_public_name,
                                            "choices": [{
                                                "index": 0,
                                                "delta": {
                                                    "reasoning_content": reasoning_chunk
                                                },
                                                "finish_reason": None
                                            }]
                                        }
                                        yield f"data: {json.dumps(chunk_response)}\n\n"
                                        
                                    except json.JSONDecodeError:
                                        continue
                                
                                # Parse text chunks: a0:"Hello "
                                elif line.startswith("a0:"):
                                    chunk_data = line[3:]
                                    try:
                                        text_chunk = json.loads(chunk_data)
                                        response_text += text_chunk
                                        
                                        # Send SSE-formatted chunk
                                        chunk_response = {
                                            "id": chunk_id,
                                            "object": "chat.completion.chunk",
                                            "created": int(time.time()),
                                            "model": model_public_name,
                                            "choices": [{
                                                "index": 0,
                                                "delta": {
                                                    "content": text_chunk
                                                },
                                                "finish_reason": None
                                            }]
                                        }
                                        yield f"data: {json.dumps(chunk_response)}\n\n"
                                        
                                    except json.JSONDecodeError:
                                        continue
                                
                                # Parse image generation: a2:[{...}] (for image models)
                                elif line.startswith("a2:"):
                                    image_data = line[3:]
                                    try:
                                        image_list = json.loads(image_data)
                                        if isinstance(image_list, list) and len(image_list) > 0:
                                            image_obj = image_list[0]
                                            if image_obj.get('type') == 'image':
                                                image_url = image_obj.get('image', '')
                                                response_text = f"![Generated Image]({image_url})"
                                                
                                                chunk_response = {
                                                    "id": chunk_id,
                                                    "object": "chat.completion.chunk",
                                                    "created": int(time.time()),
                                                    "model": model_public_name,
                                                    "choices": [{
                                                        "index": 0,
                                                        "delta": {
                                                            "content": response_text
                                                        },
                                                        "finish_reason": None
                                                    }]
                                                }
                                                yield f"data: {json.dumps(chunk_response)}\n\n"
                                    except json.JSONDecodeError:
                                        pass
                                
                                # Parse citations/tool calls: ac:{...}
                                elif line.startswith("ac:"):
                                    citation_data = line[3:]
                                    try:
                                        citation_obj = json.loads(citation_data)
                                        if 'argsTextDelta' in citation_obj:
                                            args_data = json.loads(citation_obj['argsTextDelta'])
                                            if 'source' in args_data:
                                                source = args_data['source']
                                                if isinstance(source, list):
                                                    citations.extend(source)
                                                elif isinstance(source, dict):
                                                    citations.append(source)
                                        debug_print(f"  🔗 Citation added: {citation_obj.get('toolCallId')}")
                                    except json.JSONDecodeError:
                                        pass
                                
                                # Parse error messages: a3:"error"
                                elif line.startswith("a3:"):
                                    error_data = line[3:]
                                    try:
                                        error_message = json.loads(error_data)
                                        print(f"  ❌ Error in stream: {error_message}")
                                    except json.JSONDecodeError:
                                        pass
                                
                                # Parse metadata for finish: ad:{"finishReason":"stop"}
                                elif line.startswith("ad:"):
                                    metadata_data = line[3:]
                                    try:
                                        metadata = json.loads(metadata_data)
                                        finish_reason = metadata.get("finishReason", "stop")
                                        
                                        # Send final chunk with finish_reason
                                        final_chunk = {
                                            "id": chunk_id,
                                            "object": "chat.completion.chunk",
                                            "created": int(time.time()),
                                            "model": model_public_name,
                                            "choices": [{
                                                "index": 0,
                                                "delta": {},
                                                "finish_reason": finish_reason
                                            }]
                                        }
                                        yield f"data: {json.dumps(final_chunk)}\n\n"
                                    except json.JSONDecodeError:
                                        continue
                        
                        # Update session with completed message
                        assistant_message = {
                            "id": model_msg_id, 
                            "role": "assistant", 
                            "content": response_text.strip()
                        }
                        if reasoning_text:
                            assistant_message["reasoning_content"] = reasoning_text.strip()
                        if citations:
                            unique_citations = []
                            seen_urls = set()
                            for citation in citations:
                                citation_url = citation.get('url')
                                if citation_url and citation_url not in seen_urls:
                                    seen_urls.add(citation_url)
                                    unique_citations.append(citation)
                            assistant_message["citations"] = unique_citations
                        
                        if not session:
                            chat_sessions[api_key_str][conversation_id] = {
                                "conversation_id": session_id,
                                "model": model_public_name,
                                "messages": [
                                    {"id": user_msg_id, "role": "user", "content": prompt},
                                    assistant_message
                                ]
                            }
                            debug_print(f"💾 Saved new session for conversation {conversation_id}")
                        else:
                            chat_sessions[api_key_str][conversation_id]["messages"].append(
                                {"id": user_msg_id, "role": "user", "content": prompt}
                            )
                            chat_sessions[api_key_str][conversation_id]["messages"].append(
                                assistant_message
                            )
                            debug_print(f"💾 Updated existing session for conversation {conversation_id}")
                        
                        yield "data: [DONE]\n\n"
                        debug_print(f"✅ Stream completed - {len(response_text)} chars sent")
                        return  # Success, exit retry loop
                        
                    except HTTPException as e:
                        # Handle HTTPException from browser streaming
                        error_msg = str(e.detail)
                        print(f"❌ Stream error: {error_msg}")
                        
                        # Check for rate limit (429)
                        if e.status_code == 429 and attempt < max_retries - 1:
                            debug_print(f"⏱️  Rate limited, retrying...")
                            await asyncio.sleep(2)
                            continue
                        
                        error_chunk = {
                            "error": {
                                "message": error_msg,
                                "type": "api_error",
                                "code": e.status_code
                            }
                        }
                        yield f"data: {json.dumps(error_chunk)}\n\n"
                        return
                        
                    except Exception as e:
                        print(f"❌ Stream error: {str(e)}")
                        error_chunk = {
                            "error": {
                                "message": str(e),
                                "type": "internal_error"
                            }
                        }
                        yield f"data: {json.dumps(error_chunk)}\n\n"
                        return
            
            return StreamingResponse(generate_stream(), media_type="text/event-stream")
        
        # Handle non-streaming mode with retry
        try:
            response = await make_request_with_retry(url, payload, http_method)
            
            log_http_status(response.status_code, "LMArena API Response")
            debug_print(f"📏 Response length: {len(response.text)} characters")
            debug_print(f"📋 Response headers: {dict(response.headers)}")
            
            debug_print(f"🔍 Processing response...")
            debug_print(f"📄 First 500 chars of response:\n{response.text[:500]}")
            
            # Process response in lmarena format
            # Format: ag:"thinking" for reasoning, a0:"text chunk" for content, ac:{...} for citations, ad:{...} for metadata
            response_text = ""
            reasoning_text = ""
            citations = []
            finish_reason = None
            line_count = 0
            text_chunks_found = 0
            reasoning_chunks_found = 0
            citation_chunks_found = 0
            metadata_found = 0
            
            debug_print(f"📊 Parsing response lines...")
            
            error_message = None
            for line in response.text.splitlines():
                line_count += 1
                line = line.strip()
                if not line:
                    continue
                
                # Parse thinking/reasoning chunks: ag:"thinking text"
                if line.startswith("ag:"):
                    chunk_data = line[3:]  # Remove "ag:" prefix
                    reasoning_chunks_found += 1
                    try:
                        # Parse as JSON string (includes quotes)
                        reasoning_chunk = json.loads(chunk_data)
                        reasoning_text += reasoning_chunk
                        if reasoning_chunks_found <= 3:  # Log first 3 reasoning chunks
                            debug_print(f"  🧠 Reasoning chunk {reasoning_chunks_found}: {repr(reasoning_chunk[:50])}")
                    except json.JSONDecodeError as e:
                        debug_print(f"  ⚠️ Failed to parse reasoning chunk on line {line_count}: {chunk_data[:100]} - {e}")
                        continue
                
                # Parse text chunks: a0:"Hello "
                elif line.startswith("a0:"):
                    chunk_data = line[3:]  # Remove "a0:" prefix
                    text_chunks_found += 1
                    try:
                        # Parse as JSON string (includes quotes)
                        text_chunk = json.loads(chunk_data)
                        response_text += text_chunk
                        if text_chunks_found <= 3:  # Log first 3 chunks
                            debug_print(f"  ✅ Chunk {text_chunks_found}: {repr(text_chunk[:50])}")
                    except json.JSONDecodeError as e:
                        debug_print(f"  ⚠️ Failed to parse text chunk on line {line_count}: {chunk_data[:100]} - {e}")
                        continue
                
                # Parse image generation: a2:[{...}] (for image models)
                elif line.startswith("a2:"):
                    image_data = line[3:]  # Remove "a2:" prefix
                    try:
                        image_list = json.loads(image_data)
                        # OpenAI format expects URL in content
                        if isinstance(image_list, list) and len(image_list) > 0:
                            image_obj = image_list[0]
                            if image_obj.get('type') == 'image':
                                image_url = image_obj.get('image', '')
                                # Format as markdown
                                response_text = f"![Generated Image]({image_url})"
                    except json.JSONDecodeError as e:
                        debug_print(f"  ⚠️ Failed to parse image data on line {line_count}: {image_data[:100]} - {e}")
                        continue
                
                # Parse citations/tool calls: ac:{...} (for search models)
                elif line.startswith("ac:"):
                    citation_data = line[3:]  # Remove "ac:" prefix
                    citation_chunks_found += 1
                    try:
                        citation_obj = json.loads(citation_data)
                        # Extract source information from argsTextDelta
                        if 'argsTextDelta' in citation_obj:
                            args_data = json.loads(citation_obj['argsTextDelta'])
                            if 'source' in args_data:
                                source = args_data['source']
                                # Can be a single source or array of sources
                                if isinstance(source, list):
                                    citations.extend(source)
                                elif isinstance(source, dict):
                                    citations.append(source)
                        if citation_chunks_found <= 3:  # Log first 3 citations
                            debug_print(f"  🔗 Citation chunk {citation_chunks_found}: {citation_obj.get('toolCallId')}")
                    except json.JSONDecodeError as e:
                        debug_print(f"  ⚠️ Failed to parse citation chunk on line {line_count}: {citation_data[:100]} - {e}")
                        continue
                
                # Parse error messages: a3:"An error occurred"
                elif line.startswith("a3:"):
                    error_data = line[3:]  # Remove "a3:" prefix
                    try:
                        error_message = json.loads(error_data)
                        debug_print(f"  ❌ Error message received: {error_message}")
                    except json.JSONDecodeError as e:
                        debug_print(f"  ⚠️ Failed to parse error message on line {line_count}: {error_data[:100]} - {e}")
                        error_message = error_data
                
                # Parse metadata: ad:{"finishReason":"stop"}
                elif line.startswith("ad:"):
                    metadata_data = line[3:]  # Remove "ad:" prefix
                    metadata_found += 1
                    try:
                        metadata = json.loads(metadata_data)
                        finish_reason = metadata.get("finishReason")
                        debug_print(f"  📋 Metadata found: finishReason={finish_reason}")
                    except json.JSONDecodeError as e:
                        debug_print(f"  ⚠️ Failed to parse metadata on line {line_count}: {metadata_data[:100]} - {e}")
                        continue
                elif line.strip():  # Non-empty line that doesn't match expected format
                    if line_count <= 5:  # Log first 5 unexpected lines
                        debug_print(f"  ❓ Unexpected line format {line_count}: {line[:100]}")

            debug_print(f"\n📊 Parsing Summary:")
            debug_print(f"  - Total lines: {line_count}")
            debug_print(f"  - Reasoning chunks found: {reasoning_chunks_found}")
            debug_print(f"  - Text chunks found: {text_chunks_found}")
            debug_print(f"  - Citation chunks found: {citation_chunks_found}")
            debug_print(f"  - Metadata entries: {metadata_found}")
            debug_print(f"  - Final response length: {len(response_text)} chars")
            debug_print(f"  - Final reasoning length: {len(reasoning_text)} chars")
            debug_print(f"  - Citations found: {len(citations)}")
            debug_print(f"  - Finish reason: {finish_reason}")
            
            if not response_text:
                debug_print(f"\n⚠️  WARNING: Empty response text!")
                debug_print(f"📄 Full raw response:\n{response.text}")
                if error_message:
                    error_detail = f"LMArena API error: {error_message}"
                    print(f"❌ {error_detail}")
                    # Return OpenAI-compatible error response
                    return {
                        "error": {
                            "message": error_detail,
                            "type": "upstream_error",
                            "code": "lmarena_error"
                        }
                    }
                else:
                    error_detail = "LMArena API returned empty response. This could be due to: invalid auth token, expired cf_clearance, model unavailable, or API rate limiting."
                    debug_print(f"❌ {error_detail}")
                    # Return OpenAI-compatible error response
                    return {
                        "error": {
                            "message": error_detail,
                            "type": "upstream_error",
                            "code": "empty_response"
                        }
                    }
            else:
                debug_print(f"✅ Response text preview: {response_text[:200]}...")
            
            # Update session - Store message history with IDs (including reasoning and citations if present)
            assistant_message = {
                "id": model_msg_id, 
                "role": "assistant", 
                "content": response_text.strip()
            }
            if reasoning_text:
                assistant_message["reasoning_content"] = reasoning_text.strip()
            if citations:
                # Deduplicate citations by URL
                unique_citations = []
                seen_urls = set()
                for citation in citations:
                    citation_url = citation.get('url')
                    if citation_url and citation_url not in seen_urls:
                        seen_urls.add(citation_url)
                        unique_citations.append(citation)
                assistant_message["citations"] = unique_citations
            
            if not session:
                chat_sessions[api_key_str][conversation_id] = {
                    "conversation_id": session_id,
                    "model": model_public_name,
                    "messages": [
                        {"id": user_msg_id, "role": "user", "content": prompt},
                        assistant_message
                    ]
                }
                debug_print(f"💾 Saved new session for conversation {conversation_id}")
            else:
                # Append new messages to history
                chat_sessions[api_key_str][conversation_id]["messages"].append(
                    {"id": user_msg_id, "role": "user", "content": prompt}
                )
                chat_sessions[api_key_str][conversation_id]["messages"].append(
                    assistant_message
                )
                debug_print(f"💾 Updated existing session for conversation {conversation_id}")

            # Build message object with reasoning and citations if present
            message_obj = {
                "role": "assistant",
                "content": response_text.strip(),
            }
            if reasoning_text:
                message_obj["reasoning_content"] = reasoning_text.strip()
            if citations:
                # Deduplicate citations by URL
                unique_citations = []
                seen_urls = set()
                for citation in citations:
                    citation_url = citation.get('url')
                    if citation_url and citation_url not in seen_urls:
                        seen_urls.add(citation_url)
                        unique_citations.append(citation)
                message_obj["citations"] = unique_citations
                
                # Add citations as markdown footnotes
                if unique_citations:
                    footnotes = "\n\n---\n\n**Sources:**\n\n"
                    for i, citation in enumerate(unique_citations, 1):
                        title = citation.get('title', 'Untitled')
                        url = citation.get('url', '')
                        footnotes += f"{i}. [{title}]({url})\n"
                    message_obj["content"] = response_text.strip() + footnotes
            
            # Image models already have markdown formatting from parsing
            # No additional conversion needed
            
            # Calculate token counts (including reasoning tokens)
            prompt_tokens = len(prompt)
            completion_tokens = len(response_text)
            reasoning_tokens = len(reasoning_text)
            total_tokens = prompt_tokens + completion_tokens + reasoning_tokens
            
            # Build usage object with reasoning tokens if present
            usage_obj = {
                "prompt_tokens": prompt_tokens,
                "completion_tokens": completion_tokens,
                "total_tokens": total_tokens
            }
            if reasoning_tokens > 0:
                usage_obj["reasoning_tokens"] = reasoning_tokens
            
            final_response = {
                "id": f"chatcmpl-{uuid.uuid4()}",
                "object": "chat.completion",
                "created": int(time.time()),
                "model": model_public_name,
                "conversation_id": conversation_id,
                "choices": [{
                    "index": 0,
                    "message": message_obj,
                    "finish_reason": "stop"
                }],
                "usage": usage_obj
            }
            
            debug_print(f"\n✅ REQUEST COMPLETED SUCCESSFULLY")
            debug_print("="*80)
            # LOG EXACT RESPONSE BEING SENT
            debug_print(f"📤 FINAL RESPONSE TO CLIENT:")
            debug_print(json.dumps(final_response, indent=2)[:1000])  # First 1000 chars
            debug_print("="*80 + "\n")
            
            return final_response


        except httpx.HTTPStatusError as e:
            # Log error status
            log_http_status(e.response.status_code, "Error Response")
            
            # Try to parse JSON error response from LMArena
            lmarena_error = None
            try:
                error_body = e.response.json()
                if isinstance(error_body, dict) and "error" in error_body:
                    lmarena_error = error_body["error"]
                    debug_print(f"📛 LMArena error message: {lmarena_error}")
            except:
                pass
            
            # Provide user-friendly error messages
            if e.response.status_code == HTTPStatus.TOO_MANY_REQUESTS:
                error_detail = "Rate limit exceeded on LMArena. Please try again in a few moments."
                error_type = "rate_limit_error"
            elif e.response.status_code == HTTPStatus.UNAUTHORIZED:
                error_detail = "Unauthorized: Your LMArena auth token has expired or is invalid. Please get a new auth token from the dashboard."
                error_type = "authentication_error"
            elif e.response.status_code == HTTPStatus.FORBIDDEN:
                error_detail = "Forbidden: Access to this resource is denied."
                error_type = "forbidden_error"
            elif e.response.status_code == HTTPStatus.NOT_FOUND:
                error_detail = "Not Found: The requested resource doesn't exist."
                error_type = "not_found_error"
            elif e.response.status_code == HTTPStatus.BAD_REQUEST:
                # Use LMArena's error message if available
                if lmarena_error:
                    error_detail = f"Bad Request: {lmarena_error}"
                else:
                    error_detail = "Bad Request: Invalid request parameters."
                error_type = "bad_request_error"
            elif e.response.status_code >= 500:
                error_detail = f"Server Error: LMArena API returned {e.response.status_code}"
                error_type = "server_error"
            else:
                # Use LMArena's error message if available
                if lmarena_error:
                    error_detail = f"LMArena API error: {lmarena_error}"
                else:
                    error_detail = f"LMArena API error: {e.response.status_code}"
                    try:
                        error_body = e.response.json()
                        error_detail += f" - {error_body}"
                    except:
                        error_detail += f" - {e.response.text[:200]}"
                error_type = "upstream_error"
            
            print(f"\n❌ HTTP STATUS ERROR")
            print(f"📛 Error detail: {error_detail}")
            print(f"📤 Request URL: {url}")
            debug_print(f"📤 Request payload (truncated): {json.dumps(payload, indent=2)[:500]}")
            debug_print(f"📥 Response text: {e.response.text[:500]}")
            print("="*80 + "\n")
            
            # Return OpenAI-compatible error response
            return {
                "error": {
                    "message": error_detail,
                    "type": error_type,
                    "code": f"http_{e.response.status_code}"
                }
            }
        
        except httpx.TimeoutException as e:
            print(f"\n⏱️  TIMEOUT ERROR")
            print(f"📛 Request timed out after 120 seconds")
            print(f"📤 Request URL: {url}")
            print("="*80 + "\n")
            # Return OpenAI-compatible error response
            return {
                "error": {
                    "message": "Request to LMArena API timed out after 120 seconds",
                    "type": "timeout_error",
                    "code": "request_timeout"
                }
            }
        
        except Exception as e:
            print(f"\n❌ UNEXPECTED ERROR IN HTTP CLIENT")
            print(f"📛 Error type: {type(e).__name__}")
            print(f"📛 Error message: {str(e)}")
            print(f"📤 Request URL: {url}")
            print("="*80 + "\n")
            # Return OpenAI-compatible error response
            return {
                "error": {
                    "message": f"Unexpected error: {str(e)}",
                    "type": "internal_error",
                    "code": type(e).__name__.lower()
                }
            }
                
    except HTTPException:
        raise
    except Exception as e:
        print(f"\n❌ TOP-LEVEL EXCEPTION")
        print(f"📛 Error type: {type(e).__name__}")
        print(f"📛 Error message: {str(e)}")
        print("="*80 + "\n")
        raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")

# ============================================================
# ANTHROPIC-COMPATIBLE API ENDPOINTS
# ============================================================
# These endpoints allow Claude Code and other Anthropic SDK clients
# to use LMArenaBridge by translating between Anthropic and OpenAI formats.

def convert_anthropic_to_openai_messages(anthropic_messages: list, system: str = None) -> list:
    """Convert Anthropic message format to OpenAI message format"""
    openai_messages = []
    
    # Add system message if present
    if system:
        openai_messages.append({"role": "system", "content": system})
    
    for msg in anthropic_messages:
        role = msg.get("role", "user")
        content = msg.get("content", "")
        
        # Handle content that could be string or list of content blocks
        if isinstance(content, list):
            # Convert Anthropic content blocks to text
            text_parts = []
            for block in content:
                if isinstance(block, dict):
                    if block.get("type") == "text":
                        text_value = block.get("text", "")
                        # Handle case where text might be a list
                        if isinstance(text_value, list):
                            text_parts.extend([str(t) for t in text_value])
                        else:
                            text_parts.append(str(text_value))
                    elif block.get("type") == "image":
                        # Handle image content if present
                        source = block.get("source", {})
                        if source.get("type") == "base64":
                            media_type = source.get("media_type", "image/png")
                            data = source.get("data", "")
                            # Convert to OpenAI image_url format
                            openai_messages.append({
                                "role": role,
                                "content": [{
                                    "type": "image_url",
                                    "image_url": {
                                        "url": f"data:{media_type};base64,{data}"
                                    }
                                }]
                            })
                            continue
                elif isinstance(block, str):
                    text_parts.append(block)
                elif isinstance(block, list):
                    # Handle nested lists
                    text_parts.extend([str(item) for item in block])
            
            if text_parts:
                final_content = "\n".join(text_parts)
                openai_messages.append({"role": role, "content": final_content})
        else:
            openai_messages.append({"role": role, "content": str(content)})
    
    return openai_messages

def convert_openai_to_anthropic_response(openai_response: dict, model: str) -> dict:
    """Convert OpenAI response format to Anthropic response format"""
    # Handle error responses
    if "error" in openai_response:
        return {
            "type": "error",
            "error": {
                "type": "api_error",
                "message": openai_response["error"].get("message", "Unknown error")
            }
        }
    
    # Extract content from OpenAI response
    choices = openai_response.get("choices", [])
    if not choices:
        return {
            "type": "error",
            "error": {
                "type": "api_error", 
                "message": "No response from model"
            }
        }
    
    message = choices[0].get("message", {})
    content_text = message.get("content", "")
    finish_reason = choices[0].get("finish_reason", "end_turn")
    
    # Map OpenAI finish reasons to Anthropic stop reasons
    stop_reason_map = {
        "stop": "end_turn",
        "length": "max_tokens",
        "content_filter": "end_turn",
        "tool_calls": "tool_use",
        None: "end_turn"
    }
    stop_reason = stop_reason_map.get(finish_reason, "end_turn")
    
    # Build Anthropic response
    return {
        "id": f"msg_{uuid.uuid4().hex[:24]}",
        "type": "message",
        "role": "assistant",
        "content": [
            {
                "type": "text",
                "text": content_text
            }
        ],
        "model": model,
        "stop_reason": stop_reason,
        "stop_sequence": None,
        "usage": {
            "input_tokens": openai_response.get("usage", {}).get("prompt_tokens", 0),
            "output_tokens": openai_response.get("usage", {}).get("completion_tokens", 0)
        }
    }

@app.post("/v1/messages")
async def anthropic_messages(request: Request, api_key: dict = Depends(rate_limit_api_key)):
    """
    Anthropic-compatible /v1/messages endpoint.
    Translates Anthropic API format to OpenAI format, calls the internal OpenAI endpoint,
    then translates the response back to Anthropic format.
    """
    debug_print("\n" + "="*80)
    debug_print("🔷 NEW ANTHROPIC API REQUEST RECEIVED")
    debug_print("="*80)
    
    try:
        # Parse request body
        try:
            body = await request.json()
        except json.JSONDecodeError as e:
            debug_print(f"❌ Invalid JSON in request body: {e}")
            raise HTTPException(status_code=400, detail=f"Invalid JSON in request body: {str(e)}")
        
        debug_print(f"📥 Anthropic request body keys: {list(body.keys())}")
        
        # Extract Anthropic-specific fields
        model = body.get("model", "")
        messages = body.get("messages", [])
        system = body.get("system", "")
        max_tokens = body.get("max_tokens", 4096)
        stream = body.get("stream", False)
        
        debug_print(f"🤖 Requested model: {model}")
        debug_print(f"💬 Number of messages: {len(messages)}")
        debug_print(f"🌊 Stream mode: {stream}")
        
        if not model:
            raise HTTPException(status_code=400, detail="Missing 'model' in request body.")
        
        if not messages:
            raise HTTPException(status_code=400, detail="Missing 'messages' in request body.")
        
        # Convert Anthropic messages to OpenAI format
        openai_messages = convert_anthropic_to_openai_messages(messages, system)
        debug_print(f"🔄 Converted to {len(openai_messages)} OpenAI messages")
        
        # Build OpenAI-compatible request
        openai_body = {
            "model": model,
            "messages": openai_messages,
            "max_tokens": max_tokens,
            "stream": stream
        }
        
        # Get auth headers from the original request (support both Authorization and x-api-key)
        auth_header = request.headers.get("Authorization", "")
        x_api_key = request.headers.get("x-api-key", "")
        
        # Build headers for internal request
        internal_headers = {"Content-Type": "application/json"}
        if auth_header:
            internal_headers["Authorization"] = auth_header
        if x_api_key:
            internal_headers["x-api-key"] = x_api_key
        
        # Call the internal OpenAI endpoint
        debug_print(f"🔀 Forwarding to internal OpenAI endpoint...")
        
        if stream:
            # For streaming, we need to forward the stream and translate it
            async def anthropic_stream_generator():
                try:
                    async with httpx.AsyncClient(timeout=180.0) as client:
                        async with client.stream(
                            "POST",
                            f"http://localhost:{PORT}/api/v1/chat/completions",
                            headers=internal_headers,
                            json=openai_body
                        ) as response:
                            if response.status_code != 200:
                                error_text = await response.aread()
                                debug_print(f"❌ Internal OpenAI endpoint error: {response.status_code}")
                                error_event = {
                                    "type": "error",
                                    "error": {
                                        "type": "api_error",
                                        "message": error_text.decode()[:500]
                                    }
                                }
                                yield f"event: error\ndata: {json.dumps(error_event)}\n\n"
                                return
                            
                            # Send Anthropic message_start event
                            msg_id = f"msg_{uuid.uuid4().hex[:24]}"
                            start_event = {
                                "type": "message_start",
                                "message": {
                                    "id": msg_id,
                                    "type": "message",
                                    "role": "assistant",
                                    "content": [],
                                    "model": model,
                                    "stop_reason": None,
                                    "stop_sequence": None,
                                    "usage": {"input_tokens": 0, "output_tokens": 0}
                                }
                            }
                            yield f"event: message_start\ndata: {json.dumps(start_event)}\n\n"
                            
                            # Send content_block_start
                            block_start = {
                                "type": "content_block_start",
                                "index": 0,
                                "content_block": {"type": "text", "text": ""}
                            }
                            yield f"event: content_block_start\ndata: {json.dumps(block_start)}\n\n"
                            
                            output_tokens = 0
                            async for line in response.aiter_lines():
                                if not line:
                                    continue
                                
                                # OpenAI SSE format: data: {...}
                                if line.startswith("data: "):
                                    data_str = line[6:]
                                    if data_str == "[DONE]":
                                        break
                                    
                                    try:
                                        chunk = json.loads(data_str)
                                        # Extract content delta
                                        choices = chunk.get("choices", [])
                                        if choices:
                                            delta = choices[0].get("delta", {})
                                            content = delta.get("content", "")
                                            if content:
                                                output_tokens += 1
                                                # Send content_block_delta
                                                delta_event = {
                                                    "type": "content_block_delta",
                                                    "index": 0,
                                                    "delta": {"type": "text_delta", "text": content}
                                                }
                                                yield f"event: content_block_delta\ndata: {json.dumps(delta_event)}\n\n"
                                    except json.JSONDecodeError:
                                        pass
                            
                            # Send content_block_stop
                            yield f"event: content_block_stop\ndata: {{\"type\": \"content_block_stop\", \"index\": 0}}\n\n"
                            
                            # Send message_delta
                            message_delta = {
                                "type": "message_delta",
                                "delta": {"stop_reason": "end_turn", "stop_sequence": None},
                                "usage": {"output_tokens": output_tokens}
                            }
                            yield f"event: message_delta\ndata: {json.dumps(message_delta)}\n\n"
                            
                            # Send message_stop
                            yield f"event: message_stop\ndata: {{\"type\": \"message_stop\"}}\n\n"
                            
                except Exception as e:
                    debug_print(f"❌ Streaming error: {e}")
                    error_event = {
                        "type": "error",
                        "error": {"type": "api_error", "message": str(e)}
                    }
                    yield f"event: error\ndata: {json.dumps(error_event)}\n\n"
            
            return StreamingResponse(
                anthropic_stream_generator(),
                media_type="text/event-stream",
                headers={
                    "Cache-Control": "no-cache",
                    "Connection": "keep-alive",
                    "X-Accel-Buffering": "no"
                }
            )
        
        else:
            # Non-streaming - call internal endpoint and convert response
            try:
                async with httpx.AsyncClient(timeout=180.0) as client:
                    response = await client.post(
                        f"http://localhost:{PORT}/api/v1/chat/completions",
                        headers=internal_headers,
                        json=openai_body
                    )
                    
                    if response.status_code != 200:
                        debug_print(f"❌ Internal OpenAI endpoint error: {response.status_code}")
                        raise HTTPException(
                            status_code=response.status_code,
                            detail=response.text[:500]
                        )
                    
                    openai_response = response.json()
                    debug_print(f"✅ Got OpenAI response, converting to Anthropic format...")
                    
                    # Convert to Anthropic format
                    anthropic_response = convert_openai_to_anthropic_response(openai_response, model)
                    return anthropic_response
                    
            except httpx.TimeoutException:
                raise HTTPException(status_code=504, detail="Request timed out")
            except httpx.HTTPError as e:
                raise HTTPException(status_code=502, detail=f"Internal request failed: {str(e)}")
    
    except HTTPException:
        raise
    except Exception as e:
        debug_print(f"❌ Anthropic endpoint error: {e}")
        raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")

if __name__ == "__main__":
    print("=" * 60)
    print("🚀 LMArena Bridge Server Starting...")
    print("=" * 60)
    print(f"📍 Dashboard: http://localhost:{PORT}/dashboard")
    print(f"🔐 Login:     http://localhost:{PORT}/dash/login")
    print(f"📚 API Base URL: http://localhost:{PORT}/v1")
    print("=" * 60)
    uvicorn.run(app, host="0.0.0.0", port=PORT)