diff --git "a/src/main.py" "b/src/main.py" --- "a/src/main.py" +++ "b/src/main.py" @@ -11,13 +11,40 @@ from typing import Optional, Dict, List from datetime import datetime, timezone, timedelta import uvicorn -from camoufox.async_api import AsyncCamoufox -from fastapi import FastAPI, HTTPException, Depends, status, Form, Request, Response + +# nodriver for undetectable browser automation (replaces Camoufox) +try: + import nodriver + HAS_NODRIVER = True +except ImportError: + HAS_NODRIVER = False + print("=" * 60) + print("āŒ ERROR: nodriver not installed!") + print("") + print(" PROBLEM: nodriver is required for reCAPTCHA bypassing.") + print("") + print(" SOLUTION:") + print(" 1. Run: pip install nodriver") + print(" 2. Restart LMArenaBridge") + print("=" * 60) + +from fastapi import FastAPI, HTTPException, Depends, status, Form, Request, Response, Header from starlette.responses import HTMLResponse, RedirectResponse, StreamingResponse from fastapi.security import APIKeyHeader import httpx +# curl_cffi for TLS fingerprint mimicking (bypasses Cloudflare JA3 detection) +try: + from curl_cffi.requests import AsyncSession as CurlAsyncSession + HAS_CURL_CFFI = True +except ImportError: + HAS_CURL_CFFI = False + CurlAsyncSession = None + print("āš ļø curl_cffi not installed. Install with: pip install curl_cffi") + print(" (Falling back to httpx - may trigger bot detection)") + + # ============================================================ # CONFIGURATION # ============================================================ @@ -200,190 +227,292 @@ def debug_print(*args, **kwargs): RECAPTCHA_SITEKEY = "6Led_uYrAAAAAKjxDIF58fgFtX3t8loNAK85bW9I" RECAPTCHA_ACTION = "chat_submit" -async def click_turnstile(page): +async def initialize_nodriver_browser(): """ - Attempts to locate and click the Cloudflare Turnstile widget. - Based on gpt4free logic. + Opens a visible Chrome browser and navigates to LMArena. + User must solve CAPTCHA manually. Browser stays open for session duration. """ - debug_print(" šŸ–±ļø Attempting to click Cloudflare Turnstile...") - try: - # Common selectors used by LMArena's Turnstile implementation - selectors = [ - '#cf-turnstile', - 'iframe[src*="challenges.cloudflare.com"]', - '[style*="display: grid"] iframe' # The grid style often wraps the checkbox - ] - - for selector in selectors: - element = await page.query_selector(selector) - if element: - # Get bounding box to click specific coordinates if needed - box = await element.bounding_box() - if box: - x = box['x'] + (box['width'] / 2) - y = box['y'] + (box['height'] / 2) - debug_print(f" šŸŽÆ Found widget at {x},{y}. Clicking...") - await page.mouse.click(x, y) - await asyncio.sleep(2) - return True + global NODRIVER_BROWSER, NODRIVER_TAB, BROWSER_READY + + if not HAS_NODRIVER: + print("=" * 60) + print("āŒ ERROR: Chrome browser not found!") + print("") + print(" PROBLEM: nodriver requires Google Chrome to be installed.") + print("") + print(" SOLUTION:") + print(" 1. Download Chrome from: https://www.google.com/chrome/") + print(" 2. Install Chrome") + print(" 3. Restart LMArenaBridge") + print("=" * 60) return False + + if BROWSER_READY and NODRIVER_TAB is not None: + debug_print(" └── Browser already initialized, reusing session") + return True + + print("") + print("🌐 STEP 1/3: Launching Chrome browser...") + print(" ā”œā”€ā”€ Looking for Chrome installation...") + + # Create chrome profile directory path (for persistent login) + import os + chrome_profile_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "chrome_profile") + + try: + # Start nodriver with visible browser and PERSISTENT profile + NODRIVER_BROWSER = await nodriver.start( + headless=False, # VISIBLE browser for CAPTCHA solving + user_data_dir=chrome_profile_dir, # šŸ’¾ Saves login across restarts! + browser_args=[ + '--disable-blink-features=AutomationControlled', + '--no-first-run', + '--no-default-browser-check', + ] + ) + print(" ā”œā”€ā”€ āœ… Chrome launched successfully") + print(f" ā”œā”€ā”€ šŸ’¾ Using persistent profile: {chrome_profile_dir}") + print(" └── šŸ”„ Navigating to lmarena.ai...") + + # Navigate to LMArena + NODRIVER_TAB = await NODRIVER_BROWSER.get("https://arena.ai/?mode=direct") + + # Capture User-Agent from the actual browser + global USER_AGENT + try: + ua = await NODRIVER_TAB.evaluate("navigator.userAgent") + if ua: + USER_AGENT = ua + debug_print(f"šŸ•µļø Captured User-Agent: {USER_AGENT[:50]}...") + except Exception as e: + debug_print(f"āš ļø Failed to captures User-Agent: {e}") + + # Wait for page to settle + await asyncio.sleep(3) + + print("") + print("ā³ STEP 2/3: Waiting for CAPTCHA verification...") + print(" ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”") + print(" │ šŸ‘† ACTION REQUIRED: Please click the reCAPTCHA │") + print(" │ checkbox in the Chrome window that just opened! │") + print(" │ │") + print(" │ ā±ļø Timeout in 120 seconds... │") + print(" ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜") + + # Wait for reCAPTCHA library to load and get first token + captcha_solved = await wait_for_recaptcha_ready(timeout=120) + + if captcha_solved: + print("") + print("āœ… STEP 2/3: CAPTCHA verified successfully!") + BROWSER_READY = True + return True + else: + print("") + print("āŒ ERROR: CAPTCHA verification timed out (120 seconds)") + print("") + print(" PROBLEM: You didn't click the reCAPTCHA checkbox in time.") + print("") + print(" SOLUTION:") + print(" 1. Restart the server: python src/main.py") + print(" 2. When Chrome opens, quickly click the \"I'm not a robot\" checkbox") + print(" 3. You have 2 minutes to complete this") + return False + except Exception as e: - debug_print(f" āš ļø Error clicking turnstile: {e}") + print(f"āŒ ERROR: Failed to launch Chrome browser!") + print(f" └── Details: {e}") + print("") + print(" POSSIBLE CAUSES:") + print(" 1. Chrome not installed → Install from google.com/chrome") + print(" 2. Chrome in use by another process → Close other Chrome windows") + print(" 3. Permission issue → Run as administrator") return False -async def get_recaptcha_v3_token() -> Optional[str]: + +async def wait_for_recaptcha_ready(timeout: int = 120) -> bool: """ - Retrieves reCAPTCHA v3 token using a 'Side-Channel' approach. - We write the token to a global window variable and poll for it, - bypassing Promise serialization issues in the Main World bridge. + Wait for user to complete CAPTCHA verification. + Returns True when reCAPTCHA library is loaded and we can get tokens. """ - debug_print("šŸ” Starting reCAPTCHA v3 token retrieval (Side-Channel Mode)...") + global NODRIVER_TAB, RECAPTCHA_TOKEN, RECAPTCHA_EXPIRY - config = get_config() - cf_clearance = config.get("cf_clearance", "") + start_time = time.time() + last_status_time = 0 - try: - async with AsyncCamoufox(headless=True, main_world_eval=True) as browser: - context = await browser.new_context() - if cf_clearance: - await context.add_cookies([{ - "name": "cf_clearance", - "value": cf_clearance, - "domain": ".lmarena.ai", - "path": "/" - }]) - - page = await context.new_page() - - debug_print(" 🌐 Navigating to lmarena.ai...") - await page.goto("https://lmarena.ai/", wait_until="domcontentloaded") - - # --- NEW: Cloudflare/Turnstile Pass-Through --- - debug_print(" šŸ›”ļø Checking for Cloudflare Turnstile...") + while time.time() - start_time < timeout: + elapsed = int(time.time() - start_time) + + # Print status every 10 seconds + if elapsed > 0 and elapsed % 10 == 0 and elapsed != last_status_time: + last_status_time = elapsed + remaining = timeout - elapsed + print(f"ā³ Waiting for CAPTCHA... ({elapsed}s elapsed, {remaining}s remaining)") + + try: + # Check if grecaptcha enterprise is available + lib_ready = await NODRIVER_TAB.evaluate( + "!!(window.grecaptcha && window.grecaptcha.enterprise)" + ) - # Allow time for the widget to render if it's going to - try: - # Check for challenge title or widget presence - for _ in range(5): - title = await page.title() - if "Just a moment" in title: - debug_print(" šŸ”’ Cloudflare challenge active. Attempting to click...") - clicked = await click_turnstile(page) - if clicked: - debug_print(" āœ… Clicked Turnstile.") - # Give it time to verify - await asyncio.sleep(3) - else: - # If title is normal, we might still have a widget on the page - await click_turnstile(page) - break - await asyncio.sleep(1) + if lib_ready: + # Try to get a token + debug_print(" └── reCAPTCHA library detected, requesting token...") + token = await get_recaptcha_token_from_browser() - # Wait for the page to actually settle into the main app - await page.wait_for_load_state("domcontentloaded") - except Exception as e: - debug_print(f" āš ļø Error handling Turnstile: {e}") - # ---------------------------------------------- - - # 1. Wake up the page (Humanize) - debug_print(" šŸ–±ļø Waking up page...") - await page.mouse.move(100, 100) - await page.mouse.wheel(0, 200) - await asyncio.sleep(2) # Vital "Human" pause - - # 2. Check for Library - debug_print(" ā³ Checking for library...") - lib_ready = await page.evaluate("mw:() => !!(window.grecaptcha && window.grecaptcha.enterprise)") - if not lib_ready: - debug_print(" āš ļø Library not found immediately. Waiting...") - await asyncio.sleep(3) - lib_ready = await page.evaluate("mw:() => !!(window.grecaptcha && window.grecaptcha.enterprise)") - if not lib_ready: - debug_print("āŒ reCAPTCHA library never loaded.") - return None + if token: + RECAPTCHA_TOKEN = token + RECAPTCHA_EXPIRY = datetime.now(timezone.utc) + timedelta(seconds=110) + print(f" └── reCAPTCHA token acquired ({len(token)} chars)") + return True + + except Exception as e: + debug_print(f" └── Check failed (normal during load): {e}") + + await asyncio.sleep(2) + + return False - # 3. SETUP: Initialize our global result variable - # We use a unique name to avoid conflicts - await page.evaluate("mw:window.__token_result = 'PENDING'") - # 4. TRIGGER: Execute reCAPTCHA and write to the variable - # We do NOT await the result here. We just fire the process. - debug_print(" šŸš€ Triggering reCAPTCHA execution...") - trigger_script = f"""mw:() => {{ +async def get_recaptcha_token_from_browser() -> Optional[str]: + """ + Gets a reCAPTCHA token from the persistent browser session. + Uses a side-channel approach: sets global variable, triggers execute, polls for result. + """ + global NODRIVER_TAB + + if NODRIVER_TAB is None: + debug_print("āŒ Browser tab not available") + return None + + try: + # Step 1: Initialize the global variable + await NODRIVER_TAB.evaluate("window.__recaptcha_token = 'PENDING';") + + # Step 2: Trigger the reCAPTCHA execution (don't await the Promise) + trigger_script = f""" + (function() {{ try {{ window.grecaptcha.enterprise.execute('{RECAPTCHA_SITEKEY}', {{ action: '{RECAPTCHA_ACTION}' }}) - .then(token => {{ - window.__token_result = token; + .then(function(token) {{ + window.__recaptcha_token = token; }}) - .catch(err => {{ - window.__token_result = 'ERROR: ' + err.toString(); + .catch(function(err) {{ + window.__recaptcha_token = 'ERROR: ' + err.toString(); }}); }} catch (e) {{ - window.__token_result = 'SYNC_ERROR: ' + e.toString(); + window.__recaptcha_token = 'SYNC_ERROR: ' + e.toString(); }} - }}""" + }})(); + """ + await NODRIVER_TAB.evaluate(trigger_script) + + # Step 3: Poll for the result + for i in range(15): # Max 15 seconds + await asyncio.sleep(1) + result = await NODRIVER_TAB.evaluate("window.__recaptcha_token") - await page.evaluate(trigger_script) - - # 5. POLL: Watch the variable for changes - debug_print(" šŸ‘€ Polling for result...") - token = None + if result and result != 'PENDING': + if isinstance(result, str) and result.startswith('ERROR'): + debug_print(f" └── JS Error: {result}") + return None + elif isinstance(result, str) and result.startswith('SYNC_ERROR'): + debug_print(f" └── Sync Error: {result}") + return None + elif isinstance(result, str) and len(result) > 100: + # Valid token! + return result + else: + debug_print(f" └── Unexpected result: {result}") + return None + + debug_print(" └── Token polling timed out") + return None - for i in range(20): # Wait up to 20 seconds - # Read the global variable - result = await page.evaluate("mw:window.__token_result") - - if result != 'PENDING': - if result and result.startswith('ERROR'): - debug_print(f"āŒ JS Execution Error: {result}") - return None - elif result and result.startswith('SYNC_ERROR'): - debug_print(f"āŒ JS Sync Error: {result}") - return None - else: - token = result - debug_print(f"āœ… Token captured! ({len(token)} chars)") - break - - if i % 2 == 0: - debug_print(f" ... waiting ({i}s)") - await asyncio.sleep(1) - - if token: - global RECAPTCHA_TOKEN, RECAPTCHA_EXPIRY - RECAPTCHA_TOKEN = token - RECAPTCHA_EXPIRY = datetime.now(timezone.utc) + timedelta(seconds=110) - return token - else: - debug_print("āŒ Timed out waiting for token variable to update.") - return None - except Exception as e: - debug_print(f"āŒ Unexpected error: {e}") + debug_print(f" └── Token request failed: {e}") + return None + + +async def get_recaptcha_v3_token() -> Optional[str]: + """ + Gets reCAPTCHA v3 token using the persistent nodriver browser session. + If browser not initialized, returns None. + """ + global RECAPTCHA_TOKEN, RECAPTCHA_EXPIRY, BROWSER_READY + + if not BROWSER_READY or NODRIVER_TAB is None: + debug_print("āŒ Browser not ready. Token refresh unavailable.") + print("") + print("āŒ ERROR: Browser connection lost!") + print("") + print(" PROBLEM: The Chrome window was closed or crashed.") + print("") + print(" SOLUTION:") + print(" 1. Restart the server: python src/main.py") + print(" 2. When Chrome opens, click the CAPTCHA") + print(" 3. DO NOT close the Chrome window while using the bridge") return None + + current_time = datetime.now(timezone.utc).strftime("%H:%M:%S") + debug_print(f"šŸ”„ [{current_time}] Token refresh triggered") + debug_print(" ā”œā”€ā”€ Requesting new reCAPTCHA token...") + + token = await get_recaptcha_token_from_browser() + + if token: + RECAPTCHA_TOKEN = token + RECAPTCHA_EXPIRY = datetime.now(timezone.utc) + timedelta(seconds=110) + next_refresh = (datetime.now(timezone.utc) + timedelta(seconds=100)).strftime("%H:%M:%S") + debug_print(f" ā”œā”€ā”€ āœ… New token acquired ({len(token)} chars)") + debug_print(f" └── Next refresh at: {next_refresh}") + return token + else: + debug_print(" └── āŒ Failed to get token") + return None + -async def refresh_recaptcha_token(): - """Checks if the global reCAPTCHA token is expired and refreshes it if necessary.""" +async def refresh_recaptcha_token() -> Optional[str]: + """ + Gets a FRESH reCAPTCHA token for each request. + + IMPORTANT: reCAPTCHA tokens are SINGLE-USE per Google docs. + Once a token is verified by the server, it becomes immediately invalid. + We MUST get a fresh token for every LMArena API request. + """ global RECAPTCHA_TOKEN, RECAPTCHA_EXPIRY current_time = datetime.now(timezone.utc) - # Check if token is expired (set a refresh margin of 10 seconds) - if RECAPTCHA_TOKEN is None or current_time > RECAPTCHA_EXPIRY - timedelta(seconds=10): - debug_print("šŸ”„ Recaptcha token expired or missing. Refreshing...") + time_str = current_time.strftime("%H:%M:%S") + + debug_print(f"šŸ”„ [{time_str}] Getting fresh reCAPTCHA token (tokens are single-use)...") + + # ALWAYS get a fresh token - tokens are single-use! + for attempt in range(1, 4): new_token = await get_recaptcha_v3_token() + if new_token: RECAPTCHA_TOKEN = new_token - # reCAPTCHA v3 tokens typically last 120 seconds (2 minutes) - RECAPTCHA_EXPIRY = current_time + timedelta(seconds=120) - debug_print(f"āœ… Recaptcha token refreshed, expires at {RECAPTCHA_EXPIRY.isoformat()}") + RECAPTCHA_EXPIRY = current_time + timedelta(seconds=110) + debug_print(f"āœ… [{time_str}] Fresh token acquired ({len(new_token)} chars)") return new_token - else: - debug_print("āŒ Failed to refresh recaptcha token.") - # Set a short retry delay if refresh fails - RECAPTCHA_EXPIRY = current_time + timedelta(seconds=10) - return None + + if attempt < 3: + wait_time = attempt * 2 # Shorter waits: 2s, 4s + debug_print(f"āš ļø Token fetch failed (attempt {attempt}/3), retrying in {wait_time}s...") + await asyncio.sleep(wait_time) + + # All attempts failed + print("") + print("āŒ ERROR: Token refresh failed after 3 attempts!") + print("") + print(" PROBLEM: Cannot acquire new reCAPTCHA token.") + print("") + print(" SOLUTION:") + print(" 1. Check the Chrome window - you may need to solve CAPTCHA again") + print(" 2. If Chrome is unresponsive, restart the server") - return RECAPTCHA_TOKEN + return None # --- End New reCAPTCHA Functions --- @@ -445,13 +574,13 @@ async def upload_image_to_lmarena(image_data: bytes, mime_type: str, filename: s "Accept": "text/x-component", "Content-Type": "text/plain;charset=UTF-8", "Next-Action": upload_action_id, - "Referer": "https://lmarena.ai/?mode=direct", + "Referer": "https://arena.ai/?mode=direct", }) async with httpx.AsyncClient() as client: try: response = await client.post( - "https://lmarena.ai/?mode=direct", + "https://arena.ai/?mode=direct", headers=request_headers, content=json.dumps([filename, mime_type]), timeout=30.0 @@ -509,7 +638,7 @@ async def upload_image_to_lmarena(image_data: bytes, mime_type: str, filename: s try: response = await client.post( - "https://lmarena.ai/?mode=direct", + "https://arena.ai/?mode=direct", headers=request_headers_step3, content=json.dumps([key]), timeout=30.0 @@ -660,7 +789,7 @@ app = FastAPI() # --- Constants & Global State --- CONFIG_FILE = "config.json" MODELS_FILE = "models.json" -API_KEY_HEADER = APIKeyHeader(name="Authorization") +API_KEY_HEADER = APIKeyHeader(name="Authorization", auto_error=False) # In-memory stores # { "api_key": { "conversation_id": session_data } } @@ -682,6 +811,128 @@ request_failed_tokens: Dict[str, set] = {} RECAPTCHA_TOKEN: Optional[str] = None # Initialize expiry far in the past to force a refresh on startup RECAPTCHA_EXPIRY: datetime = datetime.now(timezone.utc) - timedelta(days=365) + +# --- nodriver Browser Instance (persistent session) --- +# These stay alive for the entire server session +NODRIVER_BROWSER = None # nodriver.Browser instance +NODRIVER_TAB = None # nodriver.Tab instance (the page) +BROWSER_READY = False # Flag to indicate browser is ready for token refresh +USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" # Default fallback +LMARENA_REQUEST_LOCK = asyncio.Lock() # Lock to serialize LMArena requests (prevents rate limiting) +LAST_LMARENA_REQUEST_TIME = 0.0 # Timestamp of last LMArena request (for rate limiting) + +# --- Webshare Proxy Pool Configuration --- +# Enable/disable proxy rotation (set to True when proxies are configured) +PROXY_ROTATION_ENABLED = False # Will be auto-enabled when proxies are added + +# When True, each request creates a NEW session instead of reusing existing ones +# This bypasses LMArena's per-session rate limiting (they track by session ID, not just IP) +# Recommended: Enable this when using proxy rotation for unlimited parallel requests +FORCE_NEW_SESSION = True # Always create fresh session (bypasses per-session rate limits) + +# Proxy pool - Add your Webshare proxy credentials here +# Format: {"host": "IP", "port": PORT, "username": "user", "password": "pass"} +WEBSHARE_PROXY_POOL = [ + # Account 1 - 10 Proxies (wlnpiril) + {"host": "142.111.48.253", "port": 7030, "username": "wlnpiril", "password": "rz8y4an5o6n1"}, # US - Los Angeles + {"host": "23.95.150.145", "port": 6114, "username": "wlnpiril", "password": "rz8y4an5o6n1"}, # US - Buffalo + {"host": "198.23.239.134", "port": 6540, "username": "wlnpiril", "password": "rz8y4an5o6n1"}, # US - Buffalo + {"host": "107.172.163.27", "port": 6543, "username": "wlnpiril", "password": "rz8y4an5o6n1"}, # US - Bloomingdale + {"host": "198.105.121.200", "port": 6462, "username": "wlnpiril", "password": "rz8y4an5o6n1"}, # UK - London + {"host": "64.137.96.74", "port": 6641, "username": "wlnpiril", "password": "rz8y4an5o6n1"}, # Spain - Madrid + {"host": "84.247.60.125", "port": 6095, "username": "wlnpiril", "password": "rz8y4an5o6n1"}, # Poland - Warsaw + {"host": "216.10.27.159", "port": 6837, "username": "wlnpiril", "password": "rz8y4an5o6n1"}, # US - Dallas + {"host": "23.26.71.145", "port": 5628, "username": "wlnpiril", "password": "rz8y4an5o6n1"}, # US - Orem + {"host": "23.27.208.120", "port": 5830, "username": "wlnpiril", "password": "rz8y4an5o6n1"}, # US - Reston + # Account 2 - 10 Proxies (wfpfhvqd) + {"host": "142.111.48.253", "port": 7030, "username": "wfpfhvqd", "password": "akmgj7n23qgw"}, # US - Los Angeles + {"host": "23.95.150.145", "port": 6114, "username": "wfpfhvqd", "password": "akmgj7n23qgw"}, # US - Buffalo + {"host": "198.23.239.134", "port": 6540, "username": "wfpfhvqd", "password": "akmgj7n23qgw"}, # US - Buffalo + {"host": "107.172.163.27", "port": 6543, "username": "wfpfhvqd", "password": "akmgj7n23qgw"}, # US - Bloomingdale + {"host": "198.105.121.200", "port": 6462, "username": "wfpfhvqd", "password": "akmgj7n23qgw"}, # UK - London + {"host": "64.137.96.74", "port": 6641, "username": "wfpfhvqd", "password": "akmgj7n23qgw"}, # Spain - Madrid + {"host": "84.247.60.125", "port": 6095, "username": "wfpfhvqd", "password": "akmgj7n23qgw"}, # Poland - Warsaw + {"host": "216.10.27.159", "port": 6837, "username": "wfpfhvqd", "password": "akmgj7n23qgw"}, # US - Dallas + {"host": "23.26.71.145", "port": 5628, "username": "wfpfhvqd", "password": "akmgj7n23qgw"}, # US - Orem + {"host": "23.27.208.120", "port": 5830, "username": "wfpfhvqd", "password": "akmgj7n23qgw"}, # US - Reston + # Account 3 - 10 Proxies (qbwdhdrw) + {"host": "142.111.48.253", "port": 7030, "username": "qbwdhdrw", "password": "9f9w1szgq7tu"}, # US - Los Angeles + {"host": "23.95.150.145", "port": 6114, "username": "qbwdhdrw", "password": "9f9w1szgq7tu"}, # US - Buffalo + {"host": "198.23.239.134", "port": 6540, "username": "qbwdhdrw", "password": "9f9w1szgq7tu"}, # US - Buffalo + {"host": "107.172.163.27", "port": 6543, "username": "qbwdhdrw", "password": "9f9w1szgq7tu"}, # US - Bloomingdale + {"host": "198.105.121.200", "port": 6462, "username": "qbwdhdrw", "password": "9f9w1szgq7tu"}, # UK - London + {"host": "64.137.96.74", "port": 6641, "username": "qbwdhdrw", "password": "9f9w1szgq7tu"}, # Spain - Madrid + {"host": "84.247.60.125", "port": 6095, "username": "qbwdhdrw", "password": "9f9w1szgq7tu"}, # Poland - Warsaw + {"host": "216.10.27.159", "port": 6837, "username": "qbwdhdrw", "password": "9f9w1szgq7tu"}, # US - Dallas + {"host": "23.26.71.145", "port": 5628, "username": "qbwdhdrw", "password": "9f9w1szgq7tu"}, # US - Orem + {"host": "23.27.208.120", "port": 5830, "username": "qbwdhdrw", "password": "9f9w1szgq7tu"}, # US - Reston + # Account 4 - 10 Proxies (vwqxqyew) + {"host": "142.111.48.253", "port": 7030, "username": "vwqxqyew", "password": "4l6qlayr252q"}, # US - Los Angeles + {"host": "23.95.150.145", "port": 6114, "username": "vwqxqyew", "password": "4l6qlayr252q"}, # US - Buffalo + {"host": "198.23.239.134", "port": 6540, "username": "vwqxqyew", "password": "4l6qlayr252q"}, # US - Buffalo + {"host": "107.172.163.27", "port": 6543, "username": "vwqxqyew", "password": "4l6qlayr252q"}, # US - Bloomingdale + {"host": "198.105.121.200", "port": 6462, "username": "vwqxqyew", "password": "4l6qlayr252q"}, # UK - London + {"host": "64.137.96.74", "port": 6641, "username": "vwqxqyew", "password": "4l6qlayr252q"}, # Spain - Madrid + {"host": "84.247.60.125", "port": 6095, "username": "vwqxqyew", "password": "4l6qlayr252q"}, # Poland - Warsaw + {"host": "216.10.27.159", "port": 6837, "username": "vwqxqyew", "password": "4l6qlayr252q"}, # US - Dallas + {"host": "23.26.71.145", "port": 5628, "username": "vwqxqyew", "password": "4l6qlayr252q"}, # US - Orem + {"host": "23.27.208.120", "port": 5830, "username": "vwqxqyew", "password": "4l6qlayr252q"}, # US - Reston + # Account 5 - 10 Proxies (ynwjxcuz) + {"host": "142.111.48.253", "port": 7030, "username": "ynwjxcuz", "password": "l90dlksfzyia"}, # US - Los Angeles + {"host": "23.95.150.145", "port": 6114, "username": "ynwjxcuz", "password": "l90dlksfzyia"}, # US - Buffalo + {"host": "198.23.239.134", "port": 6540, "username": "ynwjxcuz", "password": "l90dlksfzyia"}, # US - Buffalo + {"host": "107.172.163.27", "port": 6543, "username": "ynwjxcuz", "password": "l90dlksfzyia"}, # US - Bloomingdale + {"host": "198.105.121.200", "port": 6462, "username": "ynwjxcuz", "password": "l90dlksfzyia"}, # UK - London + {"host": "64.137.96.74", "port": 6641, "username": "ynwjxcuz", "password": "l90dlksfzyia"}, # Spain - Madrid + {"host": "84.247.60.125", "port": 6095, "username": "ynwjxcuz", "password": "l90dlksfzyia"}, # Poland - Warsaw + {"host": "216.10.27.159", "port": 6837, "username": "ynwjxcuz", "password": "l90dlksfzyia"}, # US - Dallas + {"host": "23.26.71.145", "port": 5628, "username": "ynwjxcuz", "password": "l90dlksfzyia"}, # US - Orem + {"host": "23.27.208.120", "port": 5830, "username": "ynwjxcuz", "password": "l90dlksfzyia"}, # US - Reston +] + +# Track which proxy to use next (round-robin) +CURRENT_PROXY_INDEX = 0 +PROXY_USAGE_STATS = defaultdict(lambda: {"requests": 0, "errors": 0}) + +def get_next_proxy(): + """Get the next proxy from the pool in round-robin fashion.""" + global CURRENT_PROXY_INDEX + + if not WEBSHARE_PROXY_POOL: + return None + + proxy = WEBSHARE_PROXY_POOL[CURRENT_PROXY_INDEX] + CURRENT_PROXY_INDEX = (CURRENT_PROXY_INDEX + 1) % len(WEBSHARE_PROXY_POOL) + + # Track usage + proxy_id = f"{proxy['host']}:{proxy['port']}" + PROXY_USAGE_STATS[proxy_id]["requests"] += 1 + + return proxy + +def format_proxy_url(proxy: dict) -> str: + """Format proxy dict into URL string for httpx/requests.""" + if not proxy: + return None + return f"http://{proxy['username']}:{proxy['password']}@{proxy['host']}:{proxy['port']}" + +def get_proxy_for_browser() -> dict: + """Get proxy configuration formatted for browser use.""" + proxy = get_next_proxy() + if not proxy: + return None + return { + "server": f"http://{proxy['host']}:{proxy['port']}", + "username": proxy['username'], + "password": proxy['password'] + } + +# Auto-enable proxy rotation if proxies are configured +if WEBSHARE_PROXY_POOL: + PROXY_ROTATION_ENABLED = True + print(f"šŸ”„ Proxy rotation ENABLED with {len(WEBSHARE_PROXY_POOL)} proxies") +else: + print("āš ļø No proxies configured. Using direct connection (rate limits may apply)") # -------------------------------------- # --- Helper Functions --- @@ -763,9 +1014,22 @@ def get_request_headers_with_token(token: str): """Get request headers with a specific auth token""" config = get_config() cf_clearance = config.get("cf_clearance", "").strip() + + # Check if the token is a full cookie string (contains multiple cookies or the key name) + if "arena-auth-prod-v1=" in token: + # User pasted the full cookie string! Use it directly. + # Ensure cf_clearance is updated if present in the string? + # Actually, best to just use what user gave, but we might want to ensure cf_clearance is there. + # If user gave full string, it likely has everything. + cookie_header = token + else: + # Standard behavior: wrap the value + cookie_header = f"cf_clearance={cf_clearance}; arena-auth-prod-v1={token}" + return { "Content-Type": "text/plain;charset=UTF-8", - "Cookie": f"cf_clearance={cf_clearance}; arena-auth-prod-v1={token}", + "Cookie": cookie_header, + "User-Agent": USER_AGENT, } def get_next_auth_token(exclude_tokens: set = None): @@ -808,6 +1072,412 @@ def remove_auth_token(token: str): except Exception as e: debug_print(f"āš ļø Error removing auth token: {e}") + +async def make_lmarena_request_browser(url: str, payload: dict, method: str = "POST") -> dict: + """Make LMArena API request through the real Chrome browser (bypasses all bot detection) + + This function uses the nodriver browser to execute JavaScript fetch() calls, + ensuring the request comes from a real browser with proper cookies, TLS fingerprint, + and session context. + + Args: + url: Full URL to the LMArena API endpoint + payload: JSON payload to send + method: HTTP method (POST or PUT) + + Returns: + dict with 'status_code' and 'text' (response body) + """ + global NODRIVER_TAB, BROWSER_READY, LAST_LMARENA_REQUEST_TIME + + if not BROWSER_READY or NODRIVER_TAB is None: + raise HTTPException(status_code=503, detail="Browser not ready for API calls") + + # Only use lock and rate limiting if proxy rotation is DISABLED + # With rotating proxies, each request uses different IP = no rate limit concerns + if not PROXY_ROTATION_ENABLED: + # Acquire lock to serialize requests (parallel requests will queue up here) + debug_print(f"šŸ”’ Waiting to acquire request lock...") + await LMARENA_REQUEST_LOCK.acquire() + debug_print(f"šŸ”“ Lock acquired!") + else: + proxy = get_next_proxy() + proxy_id = f"{proxy['host']}:{proxy['port']}" if proxy else "direct" + debug_print(f"šŸ”„ Using rotating proxy: {proxy_id} (no lock needed)") + + try: + # Rate limiting: only if proxy rotation is disabled + if not PROXY_ROTATION_ENABLED: + MIN_REQUEST_INTERVAL = 2.5 + current_time = time.time() + if LAST_LMARENA_REQUEST_TIME > 0: + elapsed = current_time - LAST_LMARENA_REQUEST_TIME + if elapsed < MIN_REQUEST_INTERVAL: + wait_time = MIN_REQUEST_INTERVAL - elapsed + debug_print(f"ā³ Rate limiting: waiting {wait_time:.1f}s before next request") + await asyncio.sleep(wait_time) + LAST_LMARENA_REQUEST_TIME = time.time() + + # CRITICAL: Refresh reCAPTCHA token AFTER acquiring lock + # Token may have expired while waiting in queue + debug_print(f"šŸ”„ Refreshing reCAPTCHA token after lock...") + fresh_token = await refresh_recaptcha_token() + if fresh_token and 'recaptchaV3Token' in payload: + payload['recaptchaV3Token'] = fresh_token + debug_print(f"āœ… Fresh token applied ({len(fresh_token)} chars)") + + debug_print(f"🌐 Making browser-based request to: {url}") + debug_print(f"šŸ” Using REAL Chrome browser (bypasses bot detection)") + + # Escape the payload for JavaScript + payload_json = json.dumps(payload).replace('\\', '\\\\').replace("'", "\\'").replace('\n', '\\n') + + # Generate unique request ID to avoid collisions + request_id = f"lmab_{int(time.time() * 1000)}" + + # JavaScript code that stores result in window global (since evaluate() can't return async results) + js_code = f""" + (function() {{ + window.{request_id} = null; // Reset + fetch('{url}', {{ + method: '{method}', + headers: {{ + 'Content-Type': 'application/json' + }}, + body: '{payload_json}', + credentials: 'include' + }}) + .then(async (response) => {{ + const text = await response.text(); + window.{request_id} = {{ + status_code: response.status, + text: text, + ok: response.ok, + done: true + }}; + }}) + .catch((error) => {{ + window.{request_id} = {{ + status_code: 0, + text: 'Fetch error: ' + error.message, + ok: false, + done: true + }}; + }}); + return 'request_started'; + }})(); + """ + + # Start the fetch request + start_result = await NODRIVER_TAB.evaluate(js_code) + debug_print(f"šŸš€ Browser fetch started: {start_result}") + + # Poll for result (timeout after 120 seconds) + max_wait = 120 + poll_interval = 0.5 + waited = 0 + + while waited < max_wait: + await asyncio.sleep(poll_interval) + waited += poll_interval + + # Check if result is ready + result = await NODRIVER_TAB.evaluate(f"window.{request_id}") + + # Debug: log result type + if result is not None: + debug_print(f"šŸ” Result type: {type(result).__name__}, value: {str(result)[:100]}") + + # Handle different return types from nodriver + if result is not None: + # nodriver returns JS objects as list of [key, {type, value}] pairs + # e.g. [['status_code', {'type': 'number', 'value': 200}], ['text', {...}], ...] + if isinstance(result, list) and len(result) > 0: + # Check if it's the nodriver format: list of 2-element lists + if isinstance(result[0], list) and len(result[0]) == 2: + # Convert nodriver format to dict + converted = {} + for item in result: + if isinstance(item, list) and len(item) == 2: + key = item[0] + value_wrapper = item[1] + if isinstance(value_wrapper, dict) and 'value' in value_wrapper: + converted[key] = value_wrapper['value'] + else: + converted[key] = value_wrapper + result = converted + debug_print(f"āœ… Converted nodriver format to dict: {list(result.keys())}") + debug_print(f" done={result.get('done')}, status={result.get('status_code')}") + # If first element is a dict, take it (old handling) + elif isinstance(result[0], dict): + result = result[0] + + # Now check if it's a dict with 'done' key + if isinstance(result, dict) and result.get("done"): + debug_print(f"🌐 Browser response status: {result.get('status_code', 'unknown')}") + + # Log first 200 chars of response for debugging + response_preview = str(result.get('text', ''))[:200] + debug_print(f"šŸ“„ Response preview: {response_preview}...") + + # Clean up window variable + await NODRIVER_TAB.evaluate(f"delete window.{request_id}") + + return { + "status_code": result.get("status_code", 500), + "text": result.get("text", ""), + "ok": result.get("ok", False) + } + + if waited % 5 == 0: + debug_print(f"ā³ Waiting for browser response... ({int(waited)}s)") + + # Timeout + debug_print(f"āŒ Browser fetch timed out after {max_wait}s") + await NODRIVER_TAB.evaluate(f"delete window.{request_id}") + return {"status_code": 504, "text": "Browser request timed out"} + + except Exception as e: + debug_print(f"āŒ Browser request failed: {type(e).__name__}: {e}") + return {"status_code": 500, "text": f"Browser error: {str(e)}"} + finally: + # Only release lock if we acquired it (proxy rotation disabled) + if not PROXY_ROTATION_ENABLED: + LMARENA_REQUEST_LOCK.release() + debug_print(f"šŸ”“ Request lock released") + + +async def make_lmarena_streaming_request_browser(url: str, payload: dict, method: str = "POST"): + """Stream LMArena API response through the real Chrome browser (bypasses reCAPTCHA). + + This is an async generator that yields response chunks as they arrive. + Uses browser's ReadableStream API to capture streaming data. + + Args: + url: Full URL to the LMArena API endpoint + payload: JSON payload to send + method: HTTP method (POST or PUT) + + Yields: + str: Each chunk of the streaming response as it arrives + """ + global NODRIVER_TAB, BROWSER_READY, LAST_LMARENA_REQUEST_TIME + + if not BROWSER_READY or NODRIVER_TAB is None: + raise HTTPException(status_code=503, detail="Browser not ready for API calls") + + # Only use lock and rate limiting if proxy rotation is DISABLED + if not PROXY_ROTATION_ENABLED: + # Acquire lock to serialize requests (parallel requests will queue up here) + debug_print(f"šŸ”’ [STREAM] Waiting to acquire request lock...") + await LMARENA_REQUEST_LOCK.acquire() + debug_print(f"šŸ”“ [STREAM] Lock acquired!") + else: + proxy = get_next_proxy() + proxy_id = f"{proxy['host']}:{proxy['port']}" if proxy else "direct" + debug_print(f"šŸ”„ [STREAM] Using rotating proxy: {proxy_id} (no lock needed)") + + # Rate limiting: only if proxy rotation is disabled + if not PROXY_ROTATION_ENABLED: + MIN_REQUEST_INTERVAL = 2.5 + current_time = time.time() + if LAST_LMARENA_REQUEST_TIME > 0: + elapsed = current_time - LAST_LMARENA_REQUEST_TIME + if elapsed < MIN_REQUEST_INTERVAL: + wait_time = MIN_REQUEST_INTERVAL - elapsed + debug_print(f"ā³ Rate limiting: waiting {wait_time:.1f}s before next streaming request") + await asyncio.sleep(wait_time) + LAST_LMARENA_REQUEST_TIME = time.time() + + # CRITICAL: Refresh reCAPTCHA token AFTER acquiring lock + # Token may have expired while waiting in queue + debug_print(f"šŸ”„ [STREAM] Refreshing reCAPTCHA token after lock...") + fresh_token = await refresh_recaptcha_token() + if fresh_token and 'recaptchaV3Token' in payload: + payload['recaptchaV3Token'] = fresh_token + debug_print(f"āœ… [STREAM] Fresh token applied ({len(fresh_token)} chars)") + + debug_print(f"🌐 Making STREAMING browser request to: {url}") + debug_print(f"šŸ” Using REAL Chrome browser for streaming (bypasses reCAPTCHA)") + + # Escape the payload for JavaScript + payload_json = json.dumps(payload).replace('\\', '\\\\').replace("'", "\\'").replace('\n', '\\n') + + # Generate unique request ID + request_id = f"lmab_stream_{int(time.time() * 1000)}" + + # JavaScript that uses ReadableStream to collect chunks + # Stores chunks in an array that Python can poll + js_code = f""" + (function() {{ + window.{request_id} = {{ + chunks: [], + done: false, + error: null, + status_code: 0 + }}; + + fetch('{url}', {{ + method: '{method}', + headers: {{ + 'Content-Type': 'application/json' + }}, + body: '{payload_json}', + credentials: 'include' + }}) + .then(async (response) => {{ + window.{request_id}.status_code = response.status; + + if (!response.ok) {{ + const text = await response.text(); + window.{request_id}.error = text; + window.{request_id}.done = true; + return; + }} + + const reader = response.body.getReader(); + const decoder = new TextDecoder(); + + while (true) {{ + const {{done, value}} = await reader.read(); + if (done) {{ + window.{request_id}.done = true; + break; + }} + const chunk = decoder.decode(value, {{stream: true}}); + window.{request_id}.chunks.push(chunk); + }} + }}) + .catch((error) => {{ + window.{request_id}.error = 'Fetch error: ' + error.message; + window.{request_id}.done = true; + }}); + return 'streaming_started'; + }})(); + """ + + try: + # Start the streaming fetch + start_result = await NODRIVER_TAB.evaluate(js_code) + debug_print(f"šŸš€ Browser streaming started: {start_result}") + + # Poll for chunks and yield them as they arrive + max_wait = 120 + poll_interval = 0.1 # Poll faster for streaming + waited = 0 + last_chunk_index = 0 + + while waited < max_wait: + await asyncio.sleep(poll_interval) + waited += poll_interval + + # Get current state + state_js = f""" + (function() {{ + const s = window.{request_id}; + if (!s) return null; + return {{ + chunk_count: s.chunks.length, + done: s.done, + error: s.error, + status_code: s.status_code + }}; + }})(); + """ + state = await NODRIVER_TAB.evaluate(state_js) + + if state is None: + continue + + # Handle nodriver list format conversion + if isinstance(state, list): + converted = {} + for item in state: + if isinstance(item, list) and len(item) == 2: + key = item[0] + value_wrapper = item[1] + if isinstance(value_wrapper, dict) and 'value' in value_wrapper: + converted[key] = value_wrapper['value'] + else: + converted[key] = value_wrapper + state = converted + + if not isinstance(state, dict): + continue + + chunk_count = state.get('chunk_count', 0) + done = state.get('done', False) + error = state.get('error') + status_code = state.get('status_code', 0) + + # Check for error (nodriver returns {'type': 'null'} for JS null, which is truthy) + # Only treat as error if it's an actual error string + is_real_error = error and isinstance(error, str) and error != "" + if is_real_error: + debug_print(f"āŒ Stream error: {error}") + await NODRIVER_TAB.evaluate(f"delete window.{request_id}") + raise HTTPException(status_code=status_code or 500, detail=f"Browser stream error: {error}") + + # Get new chunks if available + if chunk_count > last_chunk_index: + # Get all new chunks + get_chunks_js = f""" + (function() {{ + const s = window.{request_id}; + if (!s) return []; + return s.chunks.slice({last_chunk_index}); + }})(); + """ + new_chunks = await NODRIVER_TAB.evaluate(get_chunks_js) + + # Handle nodriver format for chunk array + if isinstance(new_chunks, list): + for chunk_item in new_chunks: + # Extract chunk text + if isinstance(chunk_item, dict) and 'value' in chunk_item: + chunk_text = chunk_item['value'] + elif isinstance(chunk_item, str): + chunk_text = chunk_item + else: + chunk_text = str(chunk_item) if chunk_item else "" + + if chunk_text: + yield chunk_text + + last_chunk_index = chunk_count + + # Check if done + if done: + debug_print(f"āœ… Browser streaming completed. Status: {status_code}, Total chunks: {chunk_count}") + break + + # Periodic status log + if waited % 10 == 0 and waited > 0: + debug_print(f"ā³ Streaming... ({int(waited)}s, {chunk_count} chunks)") + + # Clean up + await NODRIVER_TAB.evaluate(f"delete window.{request_id}") + + if waited >= max_wait: + debug_print(f"āŒ Browser streaming timed out after {max_wait}s") + raise HTTPException(status_code=504, detail="Browser streaming timed out") + + except HTTPException: + raise + except Exception as e: + debug_print(f"āŒ Browser streaming failed: {type(e).__name__}: {e}") + try: + await NODRIVER_TAB.evaluate(f"delete window.{request_id}") + except: + pass + raise HTTPException(status_code=500, detail=f"Browser streaming error: {str(e)}") + finally: + # Only release lock if we acquired it (proxy rotation disabled) + if not PROXY_ROTATION_ENABLED: + LMARENA_REQUEST_LOCK.release() + debug_print(f"šŸ”“ [STREAM] Request lock released") + + # --- Dashboard Authentication --- async def get_current_session(request: Request): @@ -818,15 +1488,27 @@ async def get_current_session(request: Request): # --- API Key Authentication & Rate Limiting --- -async def rate_limit_api_key(key: str = Depends(API_KEY_HEADER)): - if not key.startswith("Bearer "): +async def rate_limit_api_key( + auth_header: Optional[str] = Depends(API_KEY_HEADER), + x_api_key: Optional[str] = Header(None, alias="x-api-key") +): + api_key_str = None + + # Check Authorization header (Bearer token) + debug_print(f"šŸ”‘ Auth Debug: AuthHeader set? {auth_header is not None}, X-API-Key set? {x_api_key is not None}") + + if auth_header and auth_header.startswith("Bearer "): + api_key_str = auth_header[7:].strip() + + # Check x-api-key header (Anthropic standard) + if not api_key_str and x_api_key: + api_key_str = x_api_key.strip() + + if not api_key_str: raise HTTPException( status_code=401, - detail="Invalid Authorization header. Expected 'Bearer YOUR_API_KEY'" + detail="Missing or invalid authentication. Expected 'Authorization: Bearer KEY' or 'x-api-key: KEY'" ) - - # Remove "Bearer " prefix and strip whitespace - api_key_str = key[7:].strip() config = get_config() key_data = next((k for k in config["api_keys"] if k["key"] == api_key_str), None) @@ -859,162 +1541,71 @@ async def rate_limit_api_key(key: str = Depends(API_KEY_HEADER)): # --- Core Logic --- async def get_initial_data(): - debug_print("Starting initial data retrieval...") + """ + Extracts initial data from the nodriver browser session. + Must be called AFTER initialize_nodriver_browser(). + Extracts: cf_clearance cookie, models list. + """ + global NODRIVER_TAB + + print("") + print("šŸ“¦ STEP 3/3: Loading LMArena data...") + + if NODRIVER_TAB is None: + print(" └── āŒ Browser not available, skipping data extraction") + return + try: - async with AsyncCamoufox(headless=True, main_world_eval=True) as browser: - page = await browser.new_page() - - # Set up route interceptor BEFORE navigating - debug_print(" šŸŽÆ Setting up route interceptor for JS chunks...") - captured_responses = [] - - async def capture_js_route(route): - """Intercept and capture JS chunk responses""" - url = route.request.url - if '/_next/static/chunks/' in url and '.js' in url: - try: - # Fetch the original response - response = await route.fetch() - # Get the response body - body = await response.body() - text = body.decode('utf-8') - - # debug_print(f" šŸ“„ Captured JS chunk: {url.split('/')[-1][:50]}...") - captured_responses.append({'url': url, 'text': text}) - - # Continue with the original response (don't modify) - await route.fulfill(response=response, body=body) - except Exception as e: - debug_print(f" āš ļø Error capturing response: {e}") - # If something fails, just continue normally - await route.continue_() - else: - # Not a JS chunk, just continue normally - await route.continue_() - - # Register the route interceptor - await page.route('**/*', capture_js_route) - - debug_print("Navigating to lmarena.ai...") - await page.goto("https://lmarena.ai/", wait_until="domcontentloaded") - - debug_print("Waiting for Cloudflare challenge to complete...") - try: - await page.wait_for_function( - "() => document.title.indexOf('Just a moment...') === -1", - timeout=45000 - ) - debug_print("āœ… Cloudflare challenge passed.") - except Exception as e: - debug_print(f"āŒ Cloudflare challenge took too long or failed: {e}") - return - - # Give it time to capture all JS responses - await asyncio.sleep(5) - - # Extract cf_clearance - cookies = await page.context.cookies() - cf_clearance_cookie = next((c for c in cookies if c["name"] == "cf_clearance"), None) + config = get_config() + + # Extract cf_clearance from cookies + print(" ā”œā”€ā”€ Extracting Cloudflare clearance...") + try: + cookies = await NODRIVER_TAB.browser.cookies.get_all() + cf_clearance_cookie = None + for cookie in cookies: + if cookie.name == "cf_clearance": + cf_clearance_cookie = cookie + break - config = get_config() if cf_clearance_cookie: - config["cf_clearance"] = cf_clearance_cookie["value"] + config["cf_clearance"] = cf_clearance_cookie.value save_config(config) - debug_print(f"āœ… Saved cf_clearance token: {cf_clearance_cookie['value'][:20]}...") + print(f" ā”œā”€ā”€ āœ… cf_clearance saved") else: - debug_print("āš ļø Could not find cf_clearance cookie.") - - # Extract models - debug_print("Extracting models from page...") - try: - body = await page.content() - match = re.search(r'{\\"initialModels\\":(\[.*?\]),\\"initialModel[A-Z]Id', body, re.DOTALL) - if match: - models_json = match.group(1).encode().decode('unicode_escape') - models = json.loads(models_json) + print(" ā”œā”€ā”€ āš ļø No cf_clearance cookie found (might not be needed)") + except Exception as e: + debug_print(f" ā”œā”€ā”€ āš ļø Error extracting cookies: {e}") + + # Extract models from page content + print(" ā”œā”€ā”€ Extracting available models...") + try: + # Get the page HTML content + body = await NODRIVER_TAB.get_content() + + # Try to find models in the page + match = re.search(r'{\\\"initialModels\\\":(\\[.*?\\]),\\\"initialModel[A-Z]Id', body, re.DOTALL) + if match: + models_json = match.group(1).encode().decode('unicode_escape') + models = json.loads(models_json) + save_models(models) + print(f" ā”œā”€ā”€ āœ… Found {len(models)} models") + else: + # Try alternative pattern + match2 = re.search(r'"initialModels":(\[.*?\]),"initialModel', body, re.DOTALL) + if match2: + models = json.loads(match2.group(1)) save_models(models) - debug_print(f"āœ… Saved {len(models)} models") + print(f" ā”œā”€ā”€ āœ… Found {len(models)} models") else: - debug_print("āš ļø Could not find models in page") - except Exception as e: - debug_print(f"āŒ Error extracting models: {e}") - - # Extract Next-Action IDs from captured JavaScript responses - debug_print(f"\nExtracting Next-Action IDs from {len(captured_responses)} captured JS responses...") - try: - upload_action_id = None - signed_url_action_id = None - - if not captured_responses: - debug_print(" āš ļø No JavaScript responses were captured") - else: - debug_print(f" šŸ“¦ Processing {len(captured_responses)} JavaScript chunk files") - - for item in captured_responses: - url = item['url'] - text = item['text'] - - try: - # debug_print(f" šŸ”Ž Checking: {url.split('/')[-1][:50]}...") - - # Look for getSignedUrl action ID (ID captured in group 1) - signed_url_matches = re.findall( - r'\(0,[a-zA-Z].createServerReference\)\(\"([\w\d]*?)\",[a-zA-Z_$][\w$]*\.callServer,void 0,[a-zA-Z_$][\w$]*\.findSourceMapURL,["\']getSignedUrl["\']\)', - text - ) - - # Look for generateUploadUrl action ID (ID captured in group 1) - upload_matches = re.findall( - r'\(0,[a-zA-Z].createServerReference\)\(\"([\w\d]*?)\",[a-zA-Z_$][\w$]*\.callServer,void 0,[a-zA-Z_$][\w$]*\.findSourceMapURL,["\']generateUploadUrl["\']\)', - text - ) - - # Process matches - if signed_url_matches and not signed_url_action_id: - signed_url_action_id = signed_url_matches[0] - debug_print(f" šŸ“„ Found getSignedUrl action ID: {signed_url_action_id[:20]}...") - - if upload_matches and not upload_action_id: - upload_action_id = upload_matches[0] - debug_print(f" šŸ“¤ Found generateUploadUrl action ID: {upload_action_id[:20]}...") - - if upload_action_id and signed_url_action_id: - debug_print(f" āœ… Found both action IDs, stopping search") - break - - except Exception as e: - debug_print(f" āš ļø Error parsing response from {url}: {e}") - continue - - # Save the action IDs to config - if upload_action_id: - config["next_action_upload"] = upload_action_id - if signed_url_action_id: - config["next_action_signed_url"] = signed_url_action_id - - if upload_action_id and signed_url_action_id: - save_config(config) - debug_print(f"\nāœ… Saved both Next-Action IDs to config") - debug_print(f" Upload: {upload_action_id}") - debug_print(f" Signed URL: {signed_url_action_id}") - elif upload_action_id or signed_url_action_id: - save_config(config) - debug_print(f"\nāš ļø Saved partial Next-Action IDs:") - if upload_action_id: - debug_print(f" Upload: {upload_action_id}") - if signed_url_action_id: - debug_print(f" Signed URL: {signed_url_action_id}") - else: - debug_print(f"\nāš ļø Could not extract Next-Action IDs from JavaScript chunks") - debug_print(f" This is optional - image upload may not work without them") - - except Exception as e: - debug_print(f"āŒ Error extracting Next-Action IDs: {e}") - debug_print(f" This is optional - continuing without them") - - debug_print("āœ… Initial data retrieval complete") + print(" ā”œā”€ā”€ āš ļø Could not find models in page (using cached)") + except Exception as e: + debug_print(f" ā”œā”€ā”€ āš ļø Error extracting models: {e}") + + print(" └── āœ… Initial data extraction complete") + except Exception as e: - debug_print(f"āŒ An error occurred during initial data retrieval: {e}") + print(f" └── āŒ Error during data extraction: {e}") async def periodic_refresh_task(): """Background task to refresh cf_clearance and models every 30 minutes""" @@ -1036,25 +1627,57 @@ async def periodic_refresh_task(): @app.on_event("startup") async def startup_event(): try: - # Ensure config and models files exist - save_config(get_config()) + # Print startup banner + print("=" * 60) + print("šŸš€ LMArena Bridge Server Starting...") + print("=" * 60) + + # Load configuration + config = get_config() + save_config(config) save_models(get_models()) - # Load usage stats from config load_usage_stats() - # 1. First, get initial data (cookies, models, etc.) - # We await this so we have the cookie BEFORE trying reCAPTCHA - await get_initial_data() + api_key_count = len(config.get("api_keys", [])) + auth_token_count = len(config.get("auth_tokens", [])) or (1 if config.get("auth_token") else 0) - # 2. Now start the initial reCAPTCHA fetch (using the cookie we just got) - # Block startup until we have a token or fail, so we don't serve 403s - await refresh_recaptcha_token() + print(f"šŸ“‹ Configuration loaded from config.json") + print(f" ā”œā”€ā”€ API Keys: {api_key_count} configured") + print(f" ā”œā”€ā”€ Auth Tokens: {auth_token_count} configured") + print(f" └── Debug Mode: {'ON' if DEBUG else 'OFF'}") + + # 1. Initialize browser and solve CAPTCHA (this blocks until user solves) + browser_ready = await initialize_nodriver_browser() + + if not browser_ready: + print("") + print("āš ļø WARNING: Server starting without browser (limited functionality)") + print(" └── reCAPTCHA token refresh will not work") + print("") + else: + # 2. Extract initial data from the browser session + await get_initial_data() # 3. Start background tasks asyncio.create_task(periodic_refresh_task()) + # Print ready message + print("") + print("=" * 60) + print("āœ… SERVER READY!") + print("=" * 60) + print(f"šŸ“ Dashboard: http://localhost:{PORT}/dashboard") + print(f"šŸ“š OpenAI API: http://localhost:{PORT}/api/v1") + print(f"šŸ”· Anthropic API: http://localhost:{PORT}/v1") + if browser_ready: + print("šŸ’” Chrome window will stay open (do not close it!)") + print("=" * 60) + print("") + except Exception as e: - debug_print(f"āŒ Error during startup: {e}") + print(f"āŒ Error during startup: {e}") + import traceback + traceback.print_exc() # Continue anyway - server should still start # --- UI Endpoints (Login/Dashboard) --- @@ -1221,7 +1844,7 @@ async def dashboard(session: str = Depends(get_current_session)): {key['rpm']} RPM {created_date} -
+
@@ -1256,9 +1879,10 @@ async def dashboard(session: str = Depends(get_current_session)): else: stats_html = "No usage data yet" - # Check token status - token_status = "āœ… Configured" if config.get("auth_token") else "āŒ Not Set" - token_class = "status-good" if config.get("auth_token") else "status-bad" + # Check token status - check BOTH auth_token (legacy single) and auth_tokens (new array) + has_tokens = config.get("auth_token") or (config.get("auth_tokens") and len(config.get("auth_tokens", [])) > 0) + token_status = "āœ… Configured" if has_tokens else "āŒ Not Set" + token_class = "status-good" if has_tokens else "status-bad" cf_status = "āœ… Configured" if config.get("cf_clearance") else "āŒ Not Set" cf_class = "status-good" if config.get("cf_clearance") else "status-bad" @@ -1270,542 +1894,520 @@ async def dashboard(session: str = Depends(get_current_session)): - Dashboard - LMArena Bridge + Namo LLM - Dashboard + + -
-
-

šŸš€ LMArena Bridge Dashboard

- Logout -
-
- -
- -
-
-
{len(config['api_keys'])}
-
API Keys
-
-
-
{len(text_models)}
-
Available Models
+
+ +