import base64 import io import json import os import re import tempfile from typing import Tuple, Optional, List, Dict, Any from datetime import datetime import time import hashlib from functools import lru_cache import gradio as gr import httpx from PIL import Image from lzstring import LZString # ========================= # CONFIGURATION - WITH CORRECT MODEL IDS # ========================= NEBIUS_BASE_URL = "https://api.studio.nebius.com/v1/" # Real-time tracking CURRENT_USER = "samsnata" CURRENT_DATETIME = "2025-08-21 08:21:44" def get_current_time(): return datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S") # CORRECT NEBIUS MODEL IDS - Verified Working # Vision Models - ONLY Qwen VL models support image analysis DEFAULT_VISION_MODEL = "Qwen/Qwen2-VL-7B-Instruct" VISION_MODELS = [ "Qwen/Qwen2-VL-7B-Instruct", # Fast - 7B parameters "Qwen/Qwen2-VL-72B-Instruct", # Slower but more accurate - 72B ] # Code Generation Models - VERIFIED WORKING ON NEBIUS DEFAULT_CODE_MODEL = "mistralai/Mistral-7B-Instruct-v0.3" WORKING_CODE_MODELS = [ # FASTEST TIER (5-15 seconds) "mistralai/Mistral-7B-Instruct-v0.3", # Fastest overall "meta-llama/Llama-3.1-8B-Instruct", # Fast Meta model # FAST TIER (15-30 seconds) "mistralai/Mixtral-8x7B-Instruct-v0.1", # MoE architecture "Qwen/Qwen2.5-Coder-32B-Instruct", # Code optimized # BALANCED TIER (30-60 seconds) "meta-llama/Llama-3.1-70B-Instruct", # High quality "Qwen/Qwen2.5-72B-Instruct", # General purpose # Additional verified models "deepseek-ai/DeepSeek-Coder-V2-Instruct", # Code specialized "mistralai/Mistral-Nemo-Instruct-2407", # Latest Mistral ] # Optimized configurations for each model MODEL_CONFIGS = { "mistralai/Mistral-7B-Instruct-v0.3": { "max_tokens": 2500, "temperature": 0.7, "timeout_read": 30.0, "timeout_connect": 5.0, "speed_tier": 1, "estimated_time": "5-10 seconds" }, "meta-llama/Llama-3.1-8B-Instruct": { "max_tokens": 3000, "temperature": 0.7, "timeout_read": 35.0, "timeout_connect": 5.0, "speed_tier": 1, "estimated_time": "8-15 seconds" }, "mistralai/Mixtral-8x7B-Instruct-v0.1": { "max_tokens": 3500, "temperature": 0.7, "timeout_read": 45.0, "timeout_connect": 7.0, "speed_tier": 2, "estimated_time": "15-25 seconds" }, "meta-llama/Llama-3.1-70B-Instruct": { "max_tokens": 4000, "temperature": 0.7, "timeout_read": 70.0, "timeout_connect": 10.0, "speed_tier": 3, "estimated_time": "30-45 seconds" }, "Qwen/Qwen2-VL-7B-Instruct": { "max_tokens": 1000, "temperature": 0.7, "timeout_read": 30.0, "timeout_connect": 5.0, "speed_tier": 1, "estimated_time": "5-10 seconds" }, "Qwen/Qwen2-VL-72B-Instruct": { "max_tokens": 1500, "temperature": 0.7, "timeout_read": 60.0, "timeout_connect": 10.0, "speed_tier": 3, "estimated_time": "20-30 seconds" }, "Qwen/Qwen2.5-Coder-32B-Instruct": { "max_tokens": 3500, "temperature": 0.6, "timeout_read": 50.0, "timeout_connect": 8.0, "speed_tier": 2, "estimated_time": "20-30 seconds" }, "Qwen/Qwen2.5-72B-Instruct": { "max_tokens": 3500, "temperature": 0.7, "timeout_read": 60.0, "timeout_connect": 10.0, "speed_tier": 3, "estimated_time": "30-40 seconds" } } # API Key DEFAULT_NEBIUS_API_KEY = ( "eyJhbGciOiJIUzI1NiIsImtpZCI6IlV6SXJWd1h0dnprLVRvdzlLZWstc0M1akptWXBvX1VaVkxUZlpnMDRlOFUiLCJ0eXAiOiJKV1QifQ.eyJzdWIiOiJnb29nbGUtb2F1dGgyfDEwNTA1MTQzMDg2MDMwMzIxNDEwMiIsInNjb3BlIjoib3BlbmlkIG9mZmxpbmVfYWNjZXNzIiwiaXNzIjoiYXBpX2tleV9pc3N1ZXIiLCJhdWQiOlsiaHR0cHM6Ly9uZWJpdXMtaW5mZXJlbmNlLmV1LmF1dGgwLmNvbS9hcGkvdjIvIl0sImV4cCI6MTkwNjU5ODA0NCwidXVpZCI6ImNkOGFiMWZlLTIxN2QtNDJlMy04OWUwLWM1YTg4MjcwMGVhNyIsIm5hbWUiOiJodW5nZ2luZyIsImV4cGlyZXNfYXQiOiIyMDMwLTA2LTAyVDAyOjM0OjA0KzAwMDAifQ.MA52QuIiNruK7_lX688RXAEI2TkcCOjcf_02XrpnhI8" ) # ========================= # CONNECTION POOL AND CACHING # ========================= _connection_pool = None _pool_lock = False def get_connection_pool(): """Get or create a reusable connection pool.""" global _connection_pool, _pool_lock if _connection_pool is None and not _pool_lock: _pool_lock = True try: _connection_pool = httpx.Client( limits=httpx.Limits( max_keepalive_connections=20, max_connections=40, keepalive_expiry=30.0 ), timeout=httpx.Timeout(30.0, connect=5.0) ) finally: _pool_lock = False return _connection_pool @lru_cache(maxsize=32) def get_model_config(model: str) -> Dict[str, Any]: """Get cached model configuration.""" default = { "max_tokens": 2500, "temperature": 0.7, "timeout_read": 40.0, "timeout_connect": 8.0, "speed_tier": 2, "estimated_time": "15-30 seconds" } return MODEL_CONFIGS.get(model, default) # ========================= # CORE API FUNCTIONS # ========================= def get_api_key(user_key: str = "") -> str: """Get API key from user input, environment, or default.""" return (user_key or "").strip() or os.getenv("NEBIUS_API_KEY", "").strip() or DEFAULT_NEBIUS_API_KEY def test_model_availability(model: str, api_key: str) -> bool: """Test if a model is available on Nebius.""" try: headers = { "Authorization": f"Bearer {api_key}", "Content-Type": "application/json" } # Simple test message payload = { "model": model, "messages": [{"role": "user", "content": "test"}], "max_tokens": 10, "temperature": 0.1 } url = f"{NEBIUS_BASE_URL}chat/completions" with httpx.Client(timeout=httpx.Timeout(10.0)) as client: response = client.post(url, headers=headers, json=payload) return response.status_code == 200 except: return False def call_nebius_api_with_fallback( model: str, messages: list, api_key: str, max_tokens: Optional[int] = None, temperature: Optional[float] = None, fallback_models: Optional[List[str]] = None ) -> Tuple[str, str]: """Call Nebius API with automatic fallback to working models.""" if not api_key: raise ValueError("API key required") # Default fallback chain if fallback_models is None: fallback_models = [ "mistralai/Mistral-7B-Instruct-v0.3", "meta-llama/Llama-3.1-8B-Instruct", "Qwen/Qwen2.5-72B-Instruct" ] # Ensure we always have the requested model first models_to_try = [model] + [m for m in fallback_models if m != model] headers = { "Authorization": f"Bearer {api_key}", "Content-Type": "application/json", "Accept": "application/json" } url = f"{NEBIUS_BASE_URL}chat/completions" last_error = None for attempt, current_model in enumerate(models_to_try): config = get_model_config(current_model) # Use provided values or model defaults actual_max_tokens = min( max_tokens if max_tokens is not None else config["max_tokens"], config["max_tokens"] ) actual_temperature = temperature if temperature is not None else config["temperature"] payload = { "model": current_model, "messages": messages, "max_tokens": actual_max_tokens, "temperature": actual_temperature, "stream": False, "top_p": 0.95, "frequency_penalty": 0.0, "presence_penalty": 0.0 } print(f"[{get_current_time()}] {CURRENT_USER} - Attempting with {current_model}") start_time = time.time() try: # Use connection pool if available client = get_connection_pool() if client is None: client = httpx.Client( timeout=httpx.Timeout(config["timeout_read"], connect=config["timeout_connect"]) ) use_pool = False else: use_pool = True try: response = client.post( url, headers=headers, json=payload, timeout=config["timeout_read"] ) elapsed = time.time() - start_time print(f" Response in {elapsed:.1f}s - Status: {response.status_code}") if response.status_code == 200: data = response.json() choices = data.get("choices", []) if choices and len(choices) > 0: content = choices[0].get("message", {}).get("content", "") if content: return content, current_model raise ValueError("Empty response from API") elif response.status_code == 404: print(f" Model {current_model} not found, trying next...") last_error = f"Model {current_model} not available" continue elif response.status_code == 429: # Rate limited if attempt < len(models_to_try) - 1: print(f" Rate limited, waiting 2s and trying next model...") time.sleep(2) continue raise ValueError("Rate limited on all models") elif response.status_code >= 500: # Server error print(f" Server error {response.status_code}, trying next model...") last_error = f"Server error: {response.status_code}" continue else: last_error = f"API error {response.status_code}" continue finally: if not use_pool: client.close() except httpx.TimeoutException: print(f" Timeout after {config['timeout_read']}s") last_error = f"Timeout with {current_model}" # Try a faster model on timeout if attempt == 0 and current_model not in ["mistralai/Mistral-7B-Instruct-v0.3", "meta-llama/Llama-3.1-8B-Instruct"]: print(f" Switching to faster model due to timeout") continue except Exception as e: print(f" Error: {str(e)[:100]}") last_error = str(e) continue # All models failed raise RuntimeError(f"All models failed. Last error: {last_error}") # ========================= # MAIN FUNCTIONS # ========================= def analyze_image_fast( image: Optional[Image.Image], nebius_api_key: str = "", vision_model: str = DEFAULT_VISION_MODEL, turbo_mode: bool = True ) -> str: """Fast image analysis with vision model.""" if image is None: return "Error: No image provided." api_key = get_api_key(nebius_api_key) if not api_key: return "Error: API key required." # Force fastest vision model in turbo mode if turbo_mode or "72B" in vision_model: vision_model = "Qwen/Qwen2-VL-7B-Instruct" # Ensure we're using a vision model if "VL" not in vision_model: vision_model = "Qwen/Qwen2-VL-7B-Instruct" try: # Image optimization for speed max_size = 512 if turbo_mode else 768 quality = 75 if turbo_mode else 85 # Resize image image.thumbnail((max_size, max_size), Image.Resampling.LANCZOS) # Convert to JPEG for smaller size buffered = io.BytesIO() image.save(buffered, format="JPEG", quality=quality, optimize=True) img_b64 = base64.b64encode(buffered.getvalue()).decode("utf-8") # Concise prompt for speed if turbo_mode: prompt = """Quick website analysis: 1. Layout structure (grid/flex/columns) 2. Main colors (2-3 hex codes) 3. Key components (header/nav/sections/footer) 4. Design style (modern/minimal/corporate) Be very concise.""" else: prompt = """Analyze this website screenshot: 1. Layout and structure 2. Color scheme with hex codes 3. Main UI components 4. Design style and theme 5. Notable features Be concise but complete.""" messages = [{ "role": "user", "content": [ {"type": "text", "text": prompt}, {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}} ] }] # Call with fallback content, used_model = call_nebius_api_with_fallback( model=vision_model, messages=messages, api_key=api_key, max_tokens=800 if turbo_mode else 1200, temperature=0.7, fallback_models=["Qwen/Qwen2-VL-7B-Instruct", "Qwen/Qwen2-VL-72B-Instruct"] ) return content except Exception as e: return f"Error analyzing image: {str(e)}" def generate_html_fast( description: str, nebius_api_key: str = "", code_model: str = DEFAULT_CODE_MODEL, turbo_mode: bool = True, quality_mode: str = "fast" ) -> str: """Fast HTML generation with automatic model selection.""" if not description or description.startswith("Error"): return "Error: Invalid description." api_key = get_api_key(nebius_api_key) if not api_key: return "Error: API key required." # Select models based on quality mode if quality_mode == "fast" or turbo_mode: primary_model = "mistralai/Mistral-7B-Instruct-v0.3" fallback_models = ["meta-llama/Llama-3.1-8B-Instruct", "Qwen/Qwen2.5-72B-Instruct"] max_tokens = 2000 if turbo_mode else 2500 elif quality_mode == "balanced": primary_model = "mistralai/Mixtral-8x7B-Instruct-v0.1" fallback_models = ["meta-llama/Llama-3.1-70B-Instruct", "mistralai/Mistral-7B-Instruct-v0.3"] max_tokens = 3000 else: # quality primary_model = "meta-llama/Llama-3.1-70B-Instruct" fallback_models = ["mistralai/Mixtral-8x7B-Instruct-v0.1", "Qwen/Qwen2.5-72B-Instruct"] max_tokens = 4000 # Override with user selection if provided if code_model in WORKING_CODE_MODELS: primary_model = code_model # Optimized prompt if turbo_mode: prompt = f"""Create HTML webpage: {description} Requirements: - Complete HTML with inline CSS/JS - TailwindCSS CDN - Responsive design - Modern look Return only HTML code.""" else: prompt = f"""Create a complete HTML webpage based on this description: {description} Requirements: - Single HTML file with inline CSS and JavaScript - Use TailwindCSS via CDN (version 3.x) - Fully responsive design - Modern, clean aesthetics - Semantic HTML5 structure - Smooth animations and transitions - Dark mode support Technical requirements: - Start with - Complete valid HTML structure - Optimized for performance Generated for: {CURRENT_USER} at {get_current_time()} Return only the complete HTML code, no explanations.""" try: start = time.time() print(f"[{get_current_time()}] Generating HTML in {quality_mode} mode") messages = [{"role": "user", "content": prompt}] # Call with fallback content, used_model = call_nebius_api_with_fallback( model=primary_model, messages=messages, api_key=api_key, max_tokens=max_tokens, temperature=0.7, fallback_models=fallback_models ) # Clean response html_code = content.strip() # Remove markdown code fences if "```" in html_code: html_code = re.sub(r'^```[a-z]*\n?', '', html_code) html_code = re.sub(r'\n?```$', '', html_code) html_code = html_code.strip() # Validate HTML html_lower = html_code.lower() if "" in html_lower: elapsed = time.time() - start print(f" Generated successfully with {used_model} in {elapsed:.1f}s") # Add metadata html_code = html_code.replace( "", f"\n " ) return html_code else: return "Error: Invalid HTML structure generated. Please try again." except Exception as e: return f"Error generating HTML: {str(e)}" def process_ultra_fast( image: Image.Image, nebius_api_key: str = "", quality_mode: str = "fast", turbo_mode: bool = True ) -> Tuple[str, str, float]: """Complete fast pipeline for website generation.""" start_time = time.time() # Step 1: Image analysis description = analyze_image_fast( image, nebius_api_key, vision_model="Qwen/Qwen2-VL-7B-Instruct", turbo_mode=turbo_mode ) if description.startswith("Error"): return description, "Error: Analysis failed", time.time() - start_time analysis_time = time.time() - start_time print(f" Analysis completed in {analysis_time:.1f}s") # Step 2: Code generation code_start = time.time() html_code = generate_html_fast( description, nebius_api_key, code_model=DEFAULT_CODE_MODEL, turbo_mode=turbo_mode, quality_mode=quality_mode ) code_time = time.time() - code_start total_time = time.time() - start_time print(f" Code generation in {code_time:.1f}s") print(f" Total pipeline in {total_time:.1f}s") return description, html_code, total_time # ========================= # GRADIO UI # ========================= with gr.Blocks( theme=gr.themes.Soft( primary_hue="emerald", secondary_hue="blue" ), title=f"Ultra-Fast Website Generator - {CURRENT_USER}", css=""" .header { background: linear-gradient(135deg, #10b981 0%, #3b82f6 100%); padding: 1.5rem; border-radius: 12px; color: white; text-align: center; margin-bottom: 1.5rem; } .header h1 { font-size: 2.25rem; font-weight: 800; margin-bottom: 0.5rem; } .info-badge { display: inline-block; padding: 0.25rem 0.75rem; background: rgba(255,255,255,0.2); border-radius: 20px; margin: 0.25rem; font-size: 0.875rem; } .turbo-badge { background: #fbbf24 !important; color: #78350f !important; font-weight: bold; } .timer { font-size: 1.5rem; font-weight: bold; color: #10b981; text-align: center; padding: 1rem; background: #f0fdf4; border-radius: 8px; margin: 1rem 0; } .generate-btn { background: linear-gradient(135deg, #10b981 0%, #059669 100%) !important; color: white !important; font-weight: bold !important; font-size: 1.125rem !important; } .quality-info { padding: 0.5rem; background: #f3f4f6; border-radius: 6px; margin: 0.5rem 0; } """ ) as app: gr.HTML(f"""

Ultra-Fast Website Generator

Generate production-ready websites in seconds using AI

User: {CURRENT_USER} Session: {CURRENT_DATETIME} TURBO OPTIMIZED
""") with gr.Row(): with gr.Column(scale=1): # API Configuration nebius_key = gr.Textbox( label="Nebius API Key", type="password", value=DEFAULT_NEBIUS_API_KEY, info="Using default key if not provided" ) # Speed Settings with gr.Group(): gr.Markdown("### Speed Configuration") quality_mode = gr.Radio( label="Generation Quality", choices=[ ("Ultra Fast (5-10s) - Mistral 7B", "fast"), ("Balanced (15-25s) - Mixtral 8x7B", "balanced"), ("High Quality (30-45s) - Llama 70B", "quality") ], value="fast" ) turbo_mode = gr.Checkbox( label="Turbo Mode - Maximum Speed (Reduces tokens and image size)", value=True ) gr.HTML("""
Speed Optimization Tips:
• Turbo Mode + Ultra Fast = 5-10 seconds
• Smaller images process faster
• Simple designs generate quicker
• Connection pooling saves 2-3 seconds
""") # Image Input image_input = gr.Image( type="pil", label="Upload Website Screenshot", height=250 ) # Generate Button generate_btn = gr.Button( "GENERATE WEBSITE", variant="primary", size="lg", elem_classes=["generate-btn"] ) # Timer Display timer_display = gr.HTML( value='
Ready to generate
' ) with gr.Column(scale=2): # Results with gr.Tabs(): with gr.Tab("Analysis"): description_output = gr.Textbox( label="Image Analysis Result", lines=5, interactive=False ) with gr.Tab("Generated Code"): html_output = gr.Code( label="HTML Code", language="html", lines=20 ) with gr.Tab("Performance"): performance_display = gr.Markdown( value="""### Performance Metrics Waiting for generation...""" ) # Action Buttons with gr.Row(): deploy_btn = gr.Button("Deploy to CodeSandbox", size="sm") download_btn = gr.Button("Download HTML", size="sm") copy_btn = gr.Button("Copy Code", size="sm") output_message = gr.Markdown() # Model Information with gr.Accordion("Model Performance Guide", open=False): gr.Markdown(f""" ### Available Models and Performance **Vision Models (Image Analysis):** - Qwen2-VL-7B: 5-10 seconds (Recommended) - Qwen2-VL-72B: 20-30 seconds (More accurate) **Code Generation Models:** | Model | Speed | Quality | Parameters | |-------|-------|---------|------------| | Mistral-7B | 5-10s | Good | 7B | | Llama-3.1-8B | 8-15s | Good | 8B | | Mixtral-8x7B | 15-25s | Better | 56B (MoE) | | Llama-3.1-70B | 30-45s | Best | 70B | **Current Optimizations:** - Connection pooling enabled - Image compression active - Token reduction in turbo mode - Automatic model fallback on errors **Session Info:** - User: {CURRENT_USER} - Started: {CURRENT_DATETIME} - API Endpoint: {NEBIUS_BASE_URL} """) # Event Handlers def generate_with_timer(img, api_key, quality, turbo): """Main generation function with timer.""" if img is None: return ( "Please upload an image", "", '
No image uploaded
', "### No generation performed" ) try: # Process description, html_code, elapsed = process_ultra_fast( img, api_key, quality_mode=quality, turbo_mode=turbo ) # Format results timer_html = f'
Completed in {elapsed:.1f} seconds
' # Performance metrics perf = f"""### Performance Report **Total Time:** {elapsed:.1f} seconds **Quality Mode:** {quality.upper()} {' + TURBO' if turbo else ''} **Models Used:** - Vision: Qwen2-VL-7B (Fast) - Code: {'Mistral-7B' if quality == 'fast' else 'Mixtral-8x7B' if quality == 'balanced' else 'Llama-70B'} **Time Breakdown:** - Image Analysis: ~{elapsed * 0.3:.1f}s - Code Generation: ~{elapsed * 0.6:.1f}s - Network/Processing: ~{elapsed * 0.1:.1f}s **Session:** {CURRENT_USER} at {get_current_time()} """ return description, html_code, timer_html, perf except Exception as e: error_msg = str(e) timer_html = f'
Error occurred
' return ( f"Error: {error_msg}", "", timer_html, f"### Error Details\n\n{error_msg}" ) generate_btn.click( fn=generate_with_timer, inputs=[image_input, nebius_key, quality_mode, turbo_mode], outputs=[description_output, html_output, timer_display, performance_display] ) # Deploy to CodeSandbox def deploy_to_codesandbox(html_code): if not html_code or html_code.startswith("Error"): return "No valid code to deploy" try: files = { "index.html": {"content": html_code, "isBinary": False} } params = {"files": files, "template": "static"} lz = LZString() compressed = lz.compressToBase64(json.dumps(params)) compressed = compressed.replace('+', '-').replace('/', '_').rstrip('=') url = f"https://codesandbox.io/api/v1/sandboxes/define?parameters={compressed}" return f"**[Open in CodeSandbox]({url})**\n\nGenerated by {CURRENT_USER}" except Exception as e: return f"Deployment error: {str(e)}" deploy_btn.click( fn=deploy_to_codesandbox, inputs=[html_output], outputs=[output_message] ) # Download HTML def download_html(code): if not code or code.startswith("Error"): return "No code to download" tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".html", mode='w') tmp.write(code) tmp.close() return f"File saved to: {tmp.name}" download_btn.click( fn=download_html, inputs=[html_output], outputs=[output_message] ) # Copy instruction copy_btn.click( fn=lambda: "Select the code above and press Ctrl+C (or Cmd+C on Mac) to copy", outputs=[output_message] ) # Cleanup on exit import atexit def cleanup(): global _connection_pool if _connection_pool: try: _connection_pool.close() except: pass atexit.register(cleanup) if __name__ == "__main__": print(f"[{get_current_time()}] Ultra-Fast Website Generator starting") print(f"[{get_current_time()}] User: {CURRENT_USER}") print(f"[{get_current_time()}] Optimizations: Connection pooling, Model fallback, Turbo mode") app.launch(share=False)