import base64 import io import json import os import re import tempfile from typing import Tuple, Optional, List, Dict, Any from datetime import datetime import time import hashlib from functools import lru_cache import gradio as gr import httpx from PIL import Image from lzstring import LZString # ========================= # CONFIGURATION - WITH CORRECT MODEL IDS # ========================= NEBIUS_BASE_URL = "https://api.studio.nebius.com/v1/" # Real-time tracking CURRENT_USER = "samsnata" CURRENT_DATETIME = "2025-08-21 08:21:44" def get_current_time(): return datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S") # CORRECT NEBIUS MODEL IDS - Verified Working # Vision Models - ONLY Qwen VL models support image analysis DEFAULT_VISION_MODEL = "Qwen/Qwen2-VL-7B-Instruct" VISION_MODELS = [ "Qwen/Qwen2-VL-7B-Instruct", # Fast - 7B parameters "Qwen/Qwen2-VL-72B-Instruct", # Slower but more accurate - 72B ] # Code Generation Models - VERIFIED WORKING ON NEBIUS DEFAULT_CODE_MODEL = "mistralai/Mistral-7B-Instruct-v0.3" WORKING_CODE_MODELS = [ # FASTEST TIER (5-15 seconds) "mistralai/Mistral-7B-Instruct-v0.3", # Fastest overall "meta-llama/Llama-3.1-8B-Instruct", # Fast Meta model # FAST TIER (15-30 seconds) "mistralai/Mixtral-8x7B-Instruct-v0.1", # MoE architecture "Qwen/Qwen2.5-Coder-32B-Instruct", # Code optimized # BALANCED TIER (30-60 seconds) "meta-llama/Llama-3.1-70B-Instruct", # High quality "Qwen/Qwen2.5-72B-Instruct", # General purpose # Additional verified models "deepseek-ai/DeepSeek-Coder-V2-Instruct", # Code specialized "mistralai/Mistral-Nemo-Instruct-2407", # Latest Mistral ] # Optimized configurations for each model MODEL_CONFIGS = { "mistralai/Mistral-7B-Instruct-v0.3": { "max_tokens": 2500, "temperature": 0.7, "timeout_read": 30.0, "timeout_connect": 5.0, "speed_tier": 1, "estimated_time": "5-10 seconds" }, "meta-llama/Llama-3.1-8B-Instruct": { "max_tokens": 3000, "temperature": 0.7, "timeout_read": 35.0, "timeout_connect": 5.0, "speed_tier": 1, "estimated_time": "8-15 seconds" }, "mistralai/Mixtral-8x7B-Instruct-v0.1": { "max_tokens": 3500, "temperature": 0.7, "timeout_read": 45.0, "timeout_connect": 7.0, "speed_tier": 2, "estimated_time": "15-25 seconds" }, "meta-llama/Llama-3.1-70B-Instruct": { "max_tokens": 4000, "temperature": 0.7, "timeout_read": 70.0, "timeout_connect": 10.0, "speed_tier": 3, "estimated_time": "30-45 seconds" }, "Qwen/Qwen2-VL-7B-Instruct": { "max_tokens": 1000, "temperature": 0.7, "timeout_read": 30.0, "timeout_connect": 5.0, "speed_tier": 1, "estimated_time": "5-10 seconds" }, "Qwen/Qwen2-VL-72B-Instruct": { "max_tokens": 1500, "temperature": 0.7, "timeout_read": 60.0, "timeout_connect": 10.0, "speed_tier": 3, "estimated_time": "20-30 seconds" }, "Qwen/Qwen2.5-Coder-32B-Instruct": { "max_tokens": 3500, "temperature": 0.6, "timeout_read": 50.0, "timeout_connect": 8.0, "speed_tier": 2, "estimated_time": "20-30 seconds" }, "Qwen/Qwen2.5-72B-Instruct": { "max_tokens": 3500, "temperature": 0.7, "timeout_read": 60.0, "timeout_connect": 10.0, "speed_tier": 3, "estimated_time": "30-40 seconds" } } # API Key DEFAULT_NEBIUS_API_KEY = ( "eyJhbGciOiJIUzI1NiIsImtpZCI6IlV6SXJWd1h0dnprLVRvdzlLZWstc0M1akptWXBvX1VaVkxUZlpnMDRlOFUiLCJ0eXAiOiJKV1QifQ.eyJzdWIiOiJnb29nbGUtb2F1dGgyfDEwNTA1MTQzMDg2MDMwMzIxNDEwMiIsInNjb3BlIjoib3BlbmlkIG9mZmxpbmVfYWNjZXNzIiwiaXNzIjoiYXBpX2tleV9pc3N1ZXIiLCJhdWQiOlsiaHR0cHM6Ly9uZWJpdXMtaW5mZXJlbmNlLmV1LmF1dGgwLmNvbS9hcGkvdjIvIl0sImV4cCI6MTkwNjU5ODA0NCwidXVpZCI6ImNkOGFiMWZlLTIxN2QtNDJlMy04OWUwLWM1YTg4MjcwMGVhNyIsIm5hbWUiOiJodW5nZ2luZyIsImV4cGlyZXNfYXQiOiIyMDMwLTA2LTAyVDAyOjM0OjA0KzAwMDAifQ.MA52QuIiNruK7_lX688RXAEI2TkcCOjcf_02XrpnhI8" ) # ========================= # CONNECTION POOL AND CACHING # ========================= _connection_pool = None _pool_lock = False def get_connection_pool(): """Get or create a reusable connection pool.""" global _connection_pool, _pool_lock if _connection_pool is None and not _pool_lock: _pool_lock = True try: _connection_pool = httpx.Client( limits=httpx.Limits( max_keepalive_connections=20, max_connections=40, keepalive_expiry=30.0 ), timeout=httpx.Timeout(30.0, connect=5.0) ) finally: _pool_lock = False return _connection_pool @lru_cache(maxsize=32) def get_model_config(model: str) -> Dict[str, Any]: """Get cached model configuration.""" default = { "max_tokens": 2500, "temperature": 0.7, "timeout_read": 40.0, "timeout_connect": 8.0, "speed_tier": 2, "estimated_time": "15-30 seconds" } return MODEL_CONFIGS.get(model, default) # ========================= # CORE API FUNCTIONS # ========================= def get_api_key(user_key: str = "") -> str: """Get API key from user input, environment, or default.""" return (user_key or "").strip() or os.getenv("NEBIUS_API_KEY", "").strip() or DEFAULT_NEBIUS_API_KEY def test_model_availability(model: str, api_key: str) -> bool: """Test if a model is available on Nebius.""" try: headers = { "Authorization": f"Bearer {api_key}", "Content-Type": "application/json" } # Simple test message payload = { "model": model, "messages": [{"role": "user", "content": "test"}], "max_tokens": 10, "temperature": 0.1 } url = f"{NEBIUS_BASE_URL}chat/completions" with httpx.Client(timeout=httpx.Timeout(10.0)) as client: response = client.post(url, headers=headers, json=payload) return response.status_code == 200 except: return False def call_nebius_api_with_fallback( model: str, messages: list, api_key: str, max_tokens: Optional[int] = None, temperature: Optional[float] = None, fallback_models: Optional[List[str]] = None ) -> Tuple[str, str]: """Call Nebius API with automatic fallback to working models.""" if not api_key: raise ValueError("API key required") # Default fallback chain if fallback_models is None: fallback_models = [ "mistralai/Mistral-7B-Instruct-v0.3", "meta-llama/Llama-3.1-8B-Instruct", "Qwen/Qwen2.5-72B-Instruct" ] # Ensure we always have the requested model first models_to_try = [model] + [m for m in fallback_models if m != model] headers = { "Authorization": f"Bearer {api_key}", "Content-Type": "application/json", "Accept": "application/json" } url = f"{NEBIUS_BASE_URL}chat/completions" last_error = None for attempt, current_model in enumerate(models_to_try): config = get_model_config(current_model) # Use provided values or model defaults actual_max_tokens = min( max_tokens if max_tokens is not None else config["max_tokens"], config["max_tokens"] ) actual_temperature = temperature if temperature is not None else config["temperature"] payload = { "model": current_model, "messages": messages, "max_tokens": actual_max_tokens, "temperature": actual_temperature, "stream": False, "top_p": 0.95, "frequency_penalty": 0.0, "presence_penalty": 0.0 } print(f"[{get_current_time()}] {CURRENT_USER} - Attempting with {current_model}") start_time = time.time() try: # Use connection pool if available client = get_connection_pool() if client is None: client = httpx.Client( timeout=httpx.Timeout(config["timeout_read"], connect=config["timeout_connect"]) ) use_pool = False else: use_pool = True try: response = client.post( url, headers=headers, json=payload, timeout=config["timeout_read"] ) elapsed = time.time() - start_time print(f" Response in {elapsed:.1f}s - Status: {response.status_code}") if response.status_code == 200: data = response.json() choices = data.get("choices", []) if choices and len(choices) > 0: content = choices[0].get("message", {}).get("content", "") if content: return content, current_model raise ValueError("Empty response from API") elif response.status_code == 404: print(f" Model {current_model} not found, trying next...") last_error = f"Model {current_model} not available" continue elif response.status_code == 429: # Rate limited if attempt < len(models_to_try) - 1: print(f" Rate limited, waiting 2s and trying next model...") time.sleep(2) continue raise ValueError("Rate limited on all models") elif response.status_code >= 500: # Server error print(f" Server error {response.status_code}, trying next model...") last_error = f"Server error: {response.status_code}" continue else: last_error = f"API error {response.status_code}" continue finally: if not use_pool: client.close() except httpx.TimeoutException: print(f" Timeout after {config['timeout_read']}s") last_error = f"Timeout with {current_model}" # Try a faster model on timeout if attempt == 0 and current_model not in ["mistralai/Mistral-7B-Instruct-v0.3", "meta-llama/Llama-3.1-8B-Instruct"]: print(f" Switching to faster model due to timeout") continue except Exception as e: print(f" Error: {str(e)[:100]}") last_error = str(e) continue # All models failed raise RuntimeError(f"All models failed. Last error: {last_error}") # ========================= # MAIN FUNCTIONS # ========================= def analyze_image_fast( image: Optional[Image.Image], nebius_api_key: str = "", vision_model: str = DEFAULT_VISION_MODEL, turbo_mode: bool = True ) -> str: """Fast image analysis with vision model.""" if image is None: return "Error: No image provided." api_key = get_api_key(nebius_api_key) if not api_key: return "Error: API key required." # Force fastest vision model in turbo mode if turbo_mode or "72B" in vision_model: vision_model = "Qwen/Qwen2-VL-7B-Instruct" # Ensure we're using a vision model if "VL" not in vision_model: vision_model = "Qwen/Qwen2-VL-7B-Instruct" try: # Image optimization for speed max_size = 512 if turbo_mode else 768 quality = 75 if turbo_mode else 85 # Resize image image.thumbnail((max_size, max_size), Image.Resampling.LANCZOS) # Convert to JPEG for smaller size buffered = io.BytesIO() image.save(buffered, format="JPEG", quality=quality, optimize=True) img_b64 = base64.b64encode(buffered.getvalue()).decode("utf-8") # Concise prompt for speed if turbo_mode: prompt = """Quick website analysis: 1. Layout structure (grid/flex/columns) 2. Main colors (2-3 hex codes) 3. Key components (header/nav/sections/footer) 4. Design style (modern/minimal/corporate) Be very concise.""" else: prompt = """Analyze this website screenshot: 1. Layout and structure 2. Color scheme with hex codes 3. Main UI components 4. Design style and theme 5. Notable features Be concise but complete.""" messages = [{ "role": "user", "content": [ {"type": "text", "text": prompt}, {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}} ] }] # Call with fallback content, used_model = call_nebius_api_with_fallback( model=vision_model, messages=messages, api_key=api_key, max_tokens=800 if turbo_mode else 1200, temperature=0.7, fallback_models=["Qwen/Qwen2-VL-7B-Instruct", "Qwen/Qwen2-VL-72B-Instruct"] ) return content except Exception as e: return f"Error analyzing image: {str(e)}" def generate_html_fast( description: str, nebius_api_key: str = "", code_model: str = DEFAULT_CODE_MODEL, turbo_mode: bool = True, quality_mode: str = "fast" ) -> str: """Fast HTML generation with automatic model selection.""" if not description or description.startswith("Error"): return "Error: Invalid description." api_key = get_api_key(nebius_api_key) if not api_key: return "Error: API key required." # Select models based on quality mode if quality_mode == "fast" or turbo_mode: primary_model = "mistralai/Mistral-7B-Instruct-v0.3" fallback_models = ["meta-llama/Llama-3.1-8B-Instruct", "Qwen/Qwen2.5-72B-Instruct"] max_tokens = 2000 if turbo_mode else 2500 elif quality_mode == "balanced": primary_model = "mistralai/Mixtral-8x7B-Instruct-v0.1" fallback_models = ["meta-llama/Llama-3.1-70B-Instruct", "mistralai/Mistral-7B-Instruct-v0.3"] max_tokens = 3000 else: # quality primary_model = "meta-llama/Llama-3.1-70B-Instruct" fallback_models = ["mistralai/Mixtral-8x7B-Instruct-v0.1", "Qwen/Qwen2.5-72B-Instruct"] max_tokens = 4000 # Override with user selection if provided if code_model in WORKING_CODE_MODELS: primary_model = code_model # Optimized prompt if turbo_mode: prompt = f"""Create HTML webpage: {description} Requirements: - Complete HTML with inline CSS/JS - TailwindCSS CDN - Responsive design - Modern look Return only HTML code.""" else: prompt = f"""Create a complete HTML webpage based on this description: {description} Requirements: - Single HTML file with inline CSS and JavaScript - Use TailwindCSS via CDN (version 3.x) - Fully responsive design - Modern, clean aesthetics - Semantic HTML5 structure - Smooth animations and transitions - Dark mode support Technical requirements: - Start with - Complete valid HTML structure - Optimized for performance Generated for: {CURRENT_USER} at {get_current_time()} Return only the complete HTML code, no explanations.""" try: start = time.time() print(f"[{get_current_time()}] Generating HTML in {quality_mode} mode") messages = [{"role": "user", "content": prompt}] # Call with fallback content, used_model = call_nebius_api_with_fallback( model=primary_model, messages=messages, api_key=api_key, max_tokens=max_tokens, temperature=0.7, fallback_models=fallback_models ) # Clean response html_code = content.strip() # Remove markdown code fences if "```" in html_code: html_code = re.sub(r'^```[a-z]*\n?', '', html_code) html_code = re.sub(r'\n?```$', '', html_code) html_code = html_code.strip() # Validate HTML html_lower = html_code.lower() if "" in html_lower: elapsed = time.time() - start print(f" Generated successfully with {used_model} in {elapsed:.1f}s") # Add metadata html_code = html_code.replace( "
", f"\n " ) return html_code else: return "Error: Invalid HTML structure generated. Please try again." except Exception as e: return f"Error generating HTML: {str(e)}" def process_ultra_fast( image: Image.Image, nebius_api_key: str = "", quality_mode: str = "fast", turbo_mode: bool = True ) -> Tuple[str, str, float]: """Complete fast pipeline for website generation.""" start_time = time.time() # Step 1: Image analysis description = analyze_image_fast( image, nebius_api_key, vision_model="Qwen/Qwen2-VL-7B-Instruct", turbo_mode=turbo_mode ) if description.startswith("Error"): return description, "Error: Analysis failed", time.time() - start_time analysis_time = time.time() - start_time print(f" Analysis completed in {analysis_time:.1f}s") # Step 2: Code generation code_start = time.time() html_code = generate_html_fast( description, nebius_api_key, code_model=DEFAULT_CODE_MODEL, turbo_mode=turbo_mode, quality_mode=quality_mode ) code_time = time.time() - code_start total_time = time.time() - start_time print(f" Code generation in {code_time:.1f}s") print(f" Total pipeline in {total_time:.1f}s") return description, html_code, total_time # ========================= # GRADIO UI # ========================= with gr.Blocks( theme=gr.themes.Soft( primary_hue="emerald", secondary_hue="blue" ), title=f"Ultra-Fast Website Generator - {CURRENT_USER}", css=""" .header { background: linear-gradient(135deg, #10b981 0%, #3b82f6 100%); padding: 1.5rem; border-radius: 12px; color: white; text-align: center; margin-bottom: 1.5rem; } .header h1 { font-size: 2.25rem; font-weight: 800; margin-bottom: 0.5rem; } .info-badge { display: inline-block; padding: 0.25rem 0.75rem; background: rgba(255,255,255,0.2); border-radius: 20px; margin: 0.25rem; font-size: 0.875rem; } .turbo-badge { background: #fbbf24 !important; color: #78350f !important; font-weight: bold; } .timer { font-size: 1.5rem; font-weight: bold; color: #10b981; text-align: center; padding: 1rem; background: #f0fdf4; border-radius: 8px; margin: 1rem 0; } .generate-btn { background: linear-gradient(135deg, #10b981 0%, #059669 100%) !important; color: white !important; font-weight: bold !important; font-size: 1.125rem !important; } .quality-info { padding: 0.5rem; background: #f3f4f6; border-radius: 6px; margin: 0.5rem 0; } """ ) as app: gr.HTML(f"""Generate production-ready websites in seconds using AI