diff --git "a/app.py" "b/app.py"
--- "a/app.py"
+++ "b/app.py"
@@ -34,7 +34,6 @@ import asyncio
 from datetime import datetime, timedelta
 from typing import Optional
 import dashscope
-from dashscope.utils.oss_utils import check_and_upload_local
 
 # Gradio supported languages for syntax highlighting
 GRADIO_SUPPORTED_LANGUAGES = [
@@ -1500,37 +1499,6 @@ Generate complete, working HTML code that can be run immediately.
 
 IMPORTANT: Always include "Built with anycoder" as clickable text in the header/top section of your application that links to https://huggingface.co/spaces/akhaliq/anycoder"""
 
-def validate_video_html(video_html: str) -> bool:
-    """Validate that the video HTML is well-formed and safe to insert."""
-    try:
-        # Basic checks for video HTML structure
-        if not video_html or not video_html.strip():
-            return False
-        
-        # Check for required video elements
-        if '<video' not in video_html or '</video>' not in video_html:
-            return False
-        
-        # Check for proper source tag
-        if '<source' not in video_html:
-            return False
-        
-        # Check for valid video source (data URI, HF URL, or file URL)
-        has_data_uri = 'data:video/mp4;base64,' in video_html
-        has_hf_url = 'https://huggingface.co/datasets/' in video_html and '/resolve/main/' in video_html
-        has_file_url = 'file://' in video_html
-        if not (has_data_uri or has_hf_url or has_file_url):
-            return False
-        
-        # Basic HTML structure validation
-        video_start = video_html.find('<video')
-        video_end = video_html.find('</video>') + 8
-        if video_start == -1 or video_end == 7:  # 7 means </video> not found
-            return False
-        
-        return True
-    except Exception:
-        return False
 
 
 # Stricter prompt for GLM-4.5V to ensure a complete, runnable HTML document with no escaped characters
@@ -3157,2312 +3125,201 @@ def inline_multipage_into_single_preview(files: Dict[str, str]) -> str:
             doc = doc[:i] + nav_script + doc[i:]
         else:
             doc = doc + nav_script
-    except Exception:
-        # Non-fatal in preview
-        pass
-
-    return doc
-
-def extract_html_document(text: str) -> str:
-    """Return substring starting from the first <!DOCTYPE html> or <html> if present, else original text.
-
-    This ignores prose or planning notes before the actual HTML so previews don't break.
-    """
-    if not text:
-        return text
-    lower = text.lower()
-    idx = lower.find("<!doctype html")
-    if idx == -1:
-        idx = lower.find("<html")
-    return text[idx:] if idx != -1 else text
-
-def parse_svelte_output(text):
-    """Parse Svelte output to extract individual files.
-
-    Supports dynamic multi-file using === filename === sections (preferred),
-    and falls back to ```svelte / ```css code blocks for minimal projects.
-    """
-    if not text:
-        return {}
-
-    # Preferred: multi-file sections (works for any filenames)
-    try:
-        files = parse_multipage_html_output(text) or {}
-    except Exception:
-        files = {}
-
-    if isinstance(files, dict) and files:
-        return files
-
-    # Fallback: code fences for minimal two-file output
-    import re
-    results = {}
-    svelte_match = re.search(r"```svelte\s*\n([\s\S]+?)\n```", text, re.IGNORECASE)
-    if svelte_match:
-        results['src/App.svelte'] = svelte_match.group(1).strip()
-    css_match = re.search(r"```css\s*\n([\s\S]+?)\n```", text, re.IGNORECASE)
-    if css_match:
-        results['src/app.css'] = css_match.group(1).strip()
-    return results
-
-def parse_react_output(text):
-    """Parse React/Next.js output to extract individual files.
-
-    Supports multi-file sections using === filename === sections.
-    """
-    if not text:
-        return {}
-
-    # Use the generic multipage parser
-    try:
-        files = parse_multipage_html_output(text) or {}
-    except Exception:
-        files = {}
-
-    return files if isinstance(files, dict) and files else {}
-
-def format_svelte_output(files):
-    """Format Svelte files into === filename === sections (generic)."""
-    return format_multipage_output(files)
-def infer_svelte_dependencies(files: Dict[str, str]) -> Dict[str, str]:
-    """Infer npm dependencies from Svelte/TS imports across generated files.
-
-    Returns mapping of package name -> semver (string). Uses conservative defaults
-    when versions aren't known. Adds special-cased versions when known.
-    """
-    import re as _re
-    deps: Dict[str, str] = {}
-    import_from = _re.compile(r"import\s+[^;]*?from\s+['\"]([^'\"]+)['\"]", _re.IGNORECASE)
-    bare_import = _re.compile(r"import\s+['\"]([^'\"]+)['\"]", _re.IGNORECASE)
-
-    def maybe_add(pkg: str):
-        if not pkg or pkg.startswith('.') or pkg.startswith('/') or pkg.startswith('http'):
-            return
-        if pkg.startswith('svelte'):
-            return
-        if pkg not in deps:
-            # Default to wildcard; adjust known packages below
-            deps[pkg] = "*"
-
-    for path, content in (files or {}).items():
-        if not isinstance(content, str):
-            continue
-        for m in import_from.finditer(content):
-            maybe_add(m.group(1))
-        for m in bare_import.finditer(content):
-            maybe_add(m.group(1))
-
-    # Pin known versions when sensible
-    if '@gradio/dataframe' in deps:
-        deps['@gradio/dataframe'] = '^0.19.1'
-
-    return deps
-
-def build_svelte_package_json(existing_json_text: str | None, detected_dependencies: Dict[str, str]) -> str:
-    """Create or merge a package.json for Svelte spaces.
-
-    - If existing_json_text is provided, merge detected deps into its dependencies.
-    - Otherwise, start from the template defaults provided by the user and add deps.
-    - Always preserve template scripts and devDependencies.
-    """
-    import json as _json
-    # Template from the user's Svelte space scaffold
-    template = {
-        "name": "svelte",
-        "private": True,
-        "version": "0.0.0",
-        "type": "module",
-        "scripts": {
-            "dev": "vite",
-            "build": "vite build",
-            "preview": "vite preview",
-            "check": "svelte-check --tsconfig ./tsconfig.app.json && tsc -p tsconfig.node.json"
-        },
-        "devDependencies": {
-            "@sveltejs/vite-plugin-svelte": "^5.0.3",
-            "@tsconfig/svelte": "^5.0.4",
-            "svelte": "^5.28.1",
-            "svelte-check": "^4.1.6",
-            "typescript": "~5.8.3",
-            "vite": "^6.3.5"
-        }
-    }
-
-    result = template
-    if existing_json_text:
-        try:
-            parsed = _json.loads(existing_json_text)
-            # Merge with template as base, keeping template scripts/devDependencies if missing in parsed
-            result = {
-                **template,
-                **{k: v for k, v in parsed.items() if k not in ("scripts", "devDependencies")},
-            }
-            # If parsed contains its own scripts/devDependencies, prefer parsed to respect user's file
-            if isinstance(parsed.get("scripts"), dict):
-                result["scripts"] = parsed["scripts"]
-            if isinstance(parsed.get("devDependencies"), dict):
-                result["devDependencies"] = parsed["devDependencies"]
-        except Exception:
-            # Fallback to template if parse fails
-            result = template
-
-    # Merge dependencies
-    existing_deps = result.get("dependencies", {})
-    if not isinstance(existing_deps, dict):
-        existing_deps = {}
-    merged = {**existing_deps, **(detected_dependencies or {})}
-    if merged:
-        result["dependencies"] = merged
-    else:
-        result.pop("dependencies", None)
-
-    return _json.dumps(result, indent=2, ensure_ascii=False) + "\n"
-
-def history_render(history: History):
-    return gr.update(visible=True), history
-
-def clear_history():
-    return [], [], None, ""  # Empty lists for both tuple format and chatbot messages, None for file, empty string for website URL
-
-def update_image_input_visibility(model):
-    """Update image input visibility based on selected model"""
-    is_ernie_vl = model.get("id") == "baidu/ERNIE-4.5-VL-424B-A47B-Base-PT"
-    is_glm_vl = model.get("id") == "THUDM/GLM-4.1V-9B-Thinking"
-    is_glm_45v = model.get("id") == "zai-org/GLM-4.5V"
-    return gr.update(visible=is_ernie_vl or is_glm_vl or is_glm_45v)
-
-def process_image_for_model(image):
-    """Convert image to base64 for model input"""
-    if image is None:
-        return None
-    
-    # Convert numpy array to PIL Image if needed
-    import io
-    import base64
-    import numpy as np
-    from PIL import Image
-    
-    # Handle numpy array from Gradio
-    if isinstance(image, np.ndarray):
-        image = Image.fromarray(image)
-    
-    buffer = io.BytesIO()
-    image.save(buffer, format='PNG')
-    img_str = base64.b64encode(buffer.getvalue()).decode('utf-8')
-    return f"data:image/png;base64,{img_str}"
-
-def compress_video_for_data_uri(video_bytes: bytes, max_size_mb: int = 8) -> bytes:
-    """Compress video bytes for data URI embedding with size limit"""
-    import subprocess
-    import tempfile
-    import os
-    
-    max_size = max_size_mb * 1024 * 1024
-    
-    # If already small enough, return as-is
-    if len(video_bytes) <= max_size:
-        return video_bytes
-    
-    print(f"[VideoCompress] Video size {len(video_bytes)} bytes exceeds {max_size_mb}MB limit, attempting compression")
-    
-    try:
-        # Create temp files
-        with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as temp_input:
-            temp_input.write(video_bytes)
-            temp_input_path = temp_input.name
-        
-        temp_output_path = temp_input_path.replace('.mp4', '_compressed.mp4')
-        
-        try:
-            # Compress with ffmpeg - extremely aggressive settings for tiny preview size
-            subprocess.run([
-                'ffmpeg', '-i', temp_input_path, 
-                '-vcodec', 'libx264', '-crf', '40', '-preset', 'ultrafast',
-                '-vf', 'scale=320:-1', '-r', '10',  # Very low resolution and frame rate
-                '-an',  # Remove audio to save space
-                '-t', '10',  # Limit to first 10 seconds for preview
-                '-y', temp_output_path
-            ], check=True, capture_output=True, stderr=subprocess.DEVNULL)
-            
-            # Read compressed video
-            with open(temp_output_path, 'rb') as f:
-                compressed_bytes = f.read()
-            
-            print(f"[VideoCompress] Compressed from {len(video_bytes)} to {len(compressed_bytes)} bytes")
-            return compressed_bytes
-            
-        except (subprocess.CalledProcessError, FileNotFoundError):
-            print("[VideoCompress] ffmpeg compression failed, using original video")
-            return video_bytes
-        finally:
-            # Clean up temp files
-            for path in [temp_input_path, temp_output_path]:
-                try:
-                    if os.path.exists(path):
-                        os.remove(path)
-                except Exception:
-                    pass
-                    
-    except Exception as e:
-        print(f"[VideoCompress] Compression failed: {e}, using original video")
-        return video_bytes
-
-def compress_audio_for_data_uri(audio_bytes: bytes, max_size_mb: int = 4) -> bytes:
-    """Compress audio bytes for data URI embedding with size limit"""
-    import subprocess
-    import tempfile
-    import os
-    
-    max_size = max_size_mb * 1024 * 1024
-    
-    # If already small enough, return as-is
-    if len(audio_bytes) <= max_size:
-        return audio_bytes
-    
-    print(f"[AudioCompress] Audio size {len(audio_bytes)} bytes exceeds {max_size_mb}MB limit, attempting compression")
-    
-    try:
-        # Create temp files
-        with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_input:
-            temp_input.write(audio_bytes)
-            temp_input_path = temp_input.name
-        
-        temp_output_path = temp_input_path.replace('.wav', '_compressed.mp3')
-        
-        try:
-            # Compress with ffmpeg - convert to MP3 with lower bitrate
-            subprocess.run([
-                'ffmpeg', '-i', temp_input_path,
-                '-codec:a', 'libmp3lame', '-b:a', '64k',  # Low bitrate MP3
-                '-y', temp_output_path
-            ], check=True, capture_output=True, stderr=subprocess.DEVNULL)
-            
-            # Read compressed audio
-            with open(temp_output_path, 'rb') as f:
-                compressed_bytes = f.read()
-            
-            print(f"[AudioCompress] Compressed from {len(audio_bytes)} to {len(compressed_bytes)} bytes")
-            return compressed_bytes
-            
-        except (subprocess.CalledProcessError, FileNotFoundError):
-            print("[AudioCompress] ffmpeg compression failed, using original audio")
-            return audio_bytes
-        finally:
-            # Clean up temp files
-            for path in [temp_input_path, temp_output_path]:
-                try:
-                    if os.path.exists(path):
-                        os.remove(path)
-                except Exception:
-                    pass
-                    
-    except Exception as e:
-        print(f"[AudioCompress] Compression failed: {e}, using original audio")
-        return audio_bytes
-
-# ---------------------------------------------------------------------------
-# General temp media file management (per-session tracking and cleanup)
-# ---------------------------------------------------------------------------
-MEDIA_TEMP_DIR = os.path.join(tempfile.gettempdir(), "anycoder_media")
-MEDIA_FILE_TTL_SECONDS = 6 * 60 * 60  # 6 hours
-_SESSION_MEDIA_FILES: Dict[str, List[str]] = {}
-_MEDIA_FILES_LOCK = threading.Lock()
-
-# Global dictionary to store temporary media files for the session
-temp_media_files = {}
-
-def _ensure_media_dir_exists() -> None:
-    """Ensure the media temp directory exists."""
-    try:
-        os.makedirs(MEDIA_TEMP_DIR, exist_ok=True)
-    except Exception:
-        pass
-
-def track_session_media_file(session_id: str | None, file_path: str) -> None:
-    """Track a media file for session-based cleanup."""
-    if not session_id or not file_path:
-        return
-    with _MEDIA_FILES_LOCK:
-        if session_id not in _SESSION_MEDIA_FILES:
-            _SESSION_MEDIA_FILES[session_id] = []
-        _SESSION_MEDIA_FILES[session_id].append(file_path)
-
-def cleanup_session_media(session_id: str | None) -> None:
-    """Clean up media files for a specific session."""
-    if not session_id:
-        return
-    with _MEDIA_FILES_LOCK:
-        files_to_clean = _SESSION_MEDIA_FILES.pop(session_id, [])
-    
-    for path in files_to_clean:
-        try:
-            if path and os.path.exists(path):
-                os.unlink(path)
-        except Exception:
-            # Best-effort cleanup
-            pass
-
-def reap_old_media(ttl_seconds: int = MEDIA_FILE_TTL_SECONDS) -> None:
-    """Delete old media files in the temp directory based on modification time."""
-    try:
-        _ensure_media_dir_exists()
-        now_ts = time.time()
-        for name in os.listdir(MEDIA_TEMP_DIR):
-            path = os.path.join(MEDIA_TEMP_DIR, name)
-            if os.path.isfile(path):
-                try:
-                    mtime = os.path.getmtime(path)
-                    if (now_ts - mtime) > ttl_seconds:
-                        os.unlink(path)
-                except Exception:
-                    pass
-    except Exception:
-        # Temp dir might not exist or be accessible; ignore
-        pass
-
-def cleanup_all_temp_media_on_startup() -> None:
-    """Clean up all temporary media files on app startup."""
-    try:
-        # Clean up temp_media_files registry
-        temp_media_files.clear()
-        
-        # Clean up actual files from disk (assume all are orphaned on startup)
-        _ensure_media_dir_exists()
-        for name in os.listdir(MEDIA_TEMP_DIR):
-            path = os.path.join(MEDIA_TEMP_DIR, name)
-            if os.path.isfile(path):
-                try:
-                    os.unlink(path)
-                except Exception:
-                    pass
-        
-        # Clear session tracking
-        with _MEDIA_FILES_LOCK:
-            _SESSION_MEDIA_FILES.clear()
-            
-        print("[StartupCleanup] Cleaned up orphaned temporary media files")
-    except Exception as e:
-        print(f"[StartupCleanup] Error during media cleanup: {str(e)}")
-
-def cleanup_all_temp_media_on_shutdown() -> None:
-    """Clean up all temporary media files on app shutdown."""
-    try:
-        print("[ShutdownCleanup] Cleaning up temporary media files...")
-        
-        # Clean up temp_media_files registry and remove files
-        for file_id, file_info in temp_media_files.items():
-            try:
-                if os.path.exists(file_info['path']):
-                    os.unlink(file_info['path'])
-            except Exception:
-                pass
-        temp_media_files.clear()
-        
-        # Clean up all session files
-        with _MEDIA_FILES_LOCK:
-            for session_id, file_paths in _SESSION_MEDIA_FILES.items():
-                for path in file_paths:
-                    try:
-                        if path and os.path.exists(path):
-                            os.unlink(path)
-                    except Exception:
-                        pass
-            _SESSION_MEDIA_FILES.clear()
-        
-        print("[ShutdownCleanup] Temporary media cleanup completed")
-    except Exception as e:
-        print(f"[ShutdownCleanup] Error during cleanup: {str(e)}")
-
-# Register shutdown cleanup handler
-atexit.register(cleanup_all_temp_media_on_shutdown)
-
-def create_temp_media_url(media_bytes: bytes, filename: str, media_type: str = "image", session_id: str | None = None) -> str:
-    """Create a temporary file and return a local URL for preview.
-    
-    Args:
-        media_bytes: Raw bytes of the media file
-        filename: Name for the file (will be made unique)
-        media_type: Type of media ('image', 'video', 'audio')
-        session_id: Session ID for tracking cleanup
-    
-    Returns:
-        Temporary file URL for preview or error message
-    """
-    try:
-        # Create unique filename with timestamp and UUID
-        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-        unique_id = str(uuid.uuid4())[:8]
-        base_name, ext = os.path.splitext(filename)
-        unique_filename = f"{media_type}_{timestamp}_{unique_id}_{base_name}{ext}"
-        
-        # Create temporary file in the dedicated directory
-        _ensure_media_dir_exists()
-        temp_path = os.path.join(MEDIA_TEMP_DIR, unique_filename)
-        
-        # Write media bytes to temporary file
-        with open(temp_path, 'wb') as f:
-            f.write(media_bytes)
-        
-        # Track file for session-based cleanup
-        if session_id:
-            track_session_media_file(session_id, temp_path)
-        
-        # Store the file info for later upload
-        file_id = f"{media_type}_{unique_id}"
-        temp_media_files[file_id] = {
-            'path': temp_path,
-            'filename': filename,
-            'media_type': media_type,
-            'media_bytes': media_bytes
-        }
-        
-        # Return file:// URL for preview
-        file_url = f"file://{temp_path}"
-        print(f"[TempMedia] Created temporary {media_type} file: {file_url}")
-        return file_url
-        
-    except Exception as e:
-        print(f"[TempMedia] Failed to create temporary file: {str(e)}")
-        return f"Error creating temporary {media_type} file: {str(e)}"
-
-def upload_media_to_hf(media_bytes: bytes, filename: str, media_type: str = "image", token: gr.OAuthToken | None = None, use_temp: bool = True) -> str:
-    """Upload media file to user's Hugging Face account or create temporary file.
-    
-    Args:
-        media_bytes: Raw bytes of the media file
-        filename: Name for the file (will be made unique)
-        media_type: Type of media ('image', 'video', 'audio')
-        token: OAuth token from gr.login (takes priority over env var)
-        use_temp: If True, create temporary file for preview; if False, upload to HF
-    
-    Returns:
-        Permanent URL to the uploaded file, temporary URL, or error message
-    """
-    try:
-        # If use_temp is True, create temporary file for preview
-        if use_temp:
-            return create_temp_media_url(media_bytes, filename, media_type)
-        
-        # Otherwise, upload to Hugging Face for permanent URL
-        # Try to get token from OAuth first, then fall back to environment variable
-        hf_token = None
-        if token and token.token:
-            hf_token = token.token
-        else:
-            hf_token = os.getenv('HF_TOKEN')
-        
-        if not hf_token:
-            return "Error: Please log in with your Hugging Face account to upload media, or set HF_TOKEN environment variable."
-        
-        # Initialize HF API
-        api = HfApi(token=hf_token)
-        
-        # Get current user info to determine username
-        try:
-            user_info = api.whoami()
-            username = user_info.get('name', 'unknown-user')
-        except Exception as e:
-            print(f"[HFUpload] Could not get user info: {e}")
-            username = 'anycoder-user'
-        
-        # Create repository name for media storage
-        repo_name = f"{username}/anycoder-media"
-        
-        # Try to create the repository if it doesn't exist
-        try:
-            api.create_repo(
-                repo_id=repo_name,
-                repo_type="dataset",
-                private=False,
-                exist_ok=True
-            )
-            print(f"[HFUpload] Repository {repo_name} ready")
-        except Exception as e:
-            print(f"[HFUpload] Repository creation/access issue: {e}")
-            # Continue anyway, repo might already exist
-        
-        # Create unique filename with timestamp and UUID
-        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-        unique_id = str(uuid.uuid4())[:8]
-        base_name, ext = os.path.splitext(filename)
-        unique_filename = f"{media_type}/{timestamp}_{unique_id}_{base_name}{ext}"
-        
-        # Create temporary file for upload
-        with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as temp_file:
-            temp_file.write(media_bytes)
-            temp_path = temp_file.name
-        
-        try:
-            # Upload file to HF repository
-            api.upload_file(
-                path_or_fileobj=temp_path,
-                path_in_repo=unique_filename,
-                repo_id=repo_name,
-                repo_type="dataset",
-                commit_message=f"Upload {media_type} generated by AnyCoder"
-            )
-            
-            # Generate permanent URL
-            permanent_url = f"https://huggingface.co/datasets/{repo_name}/resolve/main/{unique_filename}"
-            print(f"[HFUpload] Successfully uploaded {media_type} to {permanent_url}")
-            return permanent_url
-            
-        finally:
-            # Clean up temporary file
-            try:
-                os.unlink(temp_path)
-            except Exception:
-                pass
-                
-    except Exception as e:
-        print(f"[HFUpload] Upload failed: {str(e)}")
-        return f"Error uploading {media_type} to Hugging Face: {str(e)}"
-
-def upload_temp_files_to_hf_and_replace_urls(html_content: str, token: gr.OAuthToken | None = None) -> str:
-    """Upload all temporary media files to HF and replace their URLs in HTML content.
-    
-    Args:
-        html_content: HTML content containing temporary file URLs
-        token: OAuth token for HF authentication
-    
-    Returns:
-        Updated HTML content with permanent HF URLs
-    """
-    try:
-        if not temp_media_files:
-            print("[DeployUpload] No temporary media files to upload")
-            return html_content
-        
-        print(f"[DeployUpload] Uploading {len(temp_media_files)} temporary media files to HF")
-        updated_content = html_content
-        
-        for file_id, file_info in temp_media_files.items():
-            try:
-                # Upload to HF with permanent URL
-                permanent_url = upload_media_to_hf(
-                    file_info['media_bytes'],
-                    file_info['filename'], 
-                    file_info['media_type'],
-                    token,
-                    use_temp=False  # Force permanent upload
-                )
-                
-                if not permanent_url.startswith("Error"):
-                    # Replace the temporary file URL with permanent URL
-                    temp_url = f"file://{file_info['path']}"
-                    updated_content = updated_content.replace(temp_url, permanent_url)
-                    print(f"[DeployUpload] Replaced {temp_url} with {permanent_url}")
-                else:
-                    print(f"[DeployUpload] Failed to upload {file_id}: {permanent_url}")
-            
-            except Exception as e:
-                print(f"[DeployUpload] Error uploading {file_id}: {str(e)}")
-                continue
-        
-        # Clean up temporary files after upload
-        cleanup_temp_media_files()
-        
-        return updated_content
-        
-    except Exception as e:
-        print(f"[DeployUpload] Failed to upload temporary files: {str(e)}")
-        return html_content
-
-def cleanup_temp_media_files():
-    """Clean up temporary media files from disk and memory."""
-    try:
-        for file_id, file_info in temp_media_files.items():
-            try:
-                if os.path.exists(file_info['path']):
-                    os.remove(file_info['path'])
-                    print(f"[TempCleanup] Removed {file_info['path']}")
-            except Exception as e:
-                print(f"[TempCleanup] Failed to remove {file_info['path']}: {str(e)}")
-        
-        # Clear the global dictionary
-        temp_media_files.clear()
-        print("[TempCleanup] Cleared temporary media files registry")
-        
-    except Exception as e:
-        print(f"[TempCleanup] Error during cleanup: {str(e)}")
-def generate_image_to_image(input_image_data, prompt: str, token: gr.OAuthToken | None = None) -> str:
-    """Generate an image using image-to-image via OpenRouter.
-
-    Uses Google Gemini 2.5 Flash Image Preview via OpenRouter chat completions API.
-
-    Returns an HTML <img> tag whose src is an uploaded temporary URL.
-    """
-    try:
-        # Check for OpenRouter API key
-        openrouter_key = os.getenv('OPENROUTER_API_KEY')
-        if not openrouter_key:
-            return "Error: OPENROUTER_API_KEY environment variable is not set. Please set it to your OpenRouter API key."
-
-        # Normalize input image to bytes
-        import io
-        from PIL import Image
-        import base64
-        import requests
-        import json as _json
-        try:
-            import numpy as np
-        except Exception:
-            np = None
-
-        if hasattr(input_image_data, 'read'):
-            raw = input_image_data.read()
-            pil_image = Image.open(io.BytesIO(raw))
-        elif hasattr(input_image_data, 'mode') and hasattr(input_image_data, 'size'):
-            pil_image = input_image_data
-        elif np is not None and isinstance(input_image_data, np.ndarray):
-            pil_image = Image.fromarray(input_image_data)
-        elif isinstance(input_image_data, (bytes, bytearray)):
-            pil_image = Image.open(io.BytesIO(input_image_data))
-        else:
-            pil_image = Image.open(io.BytesIO(bytes(input_image_data)))
-
-        if pil_image.mode != 'RGB':
-            pil_image = pil_image.convert('RGB')
-
-        # Resize input image to avoid request body size limits
-        max_input_size = 1024
-        if pil_image.width > max_input_size or pil_image.height > max_input_size:
-            pil_image.thumbnail((max_input_size, max_input_size), Image.Resampling.LANCZOS)
-
-        # Convert to base64
-        import io as _io
-        buffered = _io.BytesIO()
-        pil_image.save(buffered, format='PNG')
-        img_b64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
-
-        # Call OpenRouter API
-        headers = {
-            "Authorization": f"Bearer {openrouter_key}",
-            "Content-Type": "application/json",
-            "HTTP-Referer": os.getenv("YOUR_SITE_URL", "https://example.com"),
-            "X-Title": os.getenv("YOUR_SITE_NAME", "AnyCoder Image I2I"),
-        }
-        payload = {
-            "model": "google/gemini-2.5-flash-image-preview:free",
-            "messages": [
-                {
-                    "role": "user",
-                    "content": [
-                        {"type": "text", "text": prompt},
-                        {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_b64}"}},
-                    ],
-                }
-            ],
-            "max_tokens": 2048,
-        }
-        
-        try:
-            resp = requests.post(
-                "https://openrouter.ai/api/v1/chat/completions",
-                headers=headers,
-                data=_json.dumps(payload),
-                timeout=60,
-            )
-            resp.raise_for_status()
-            result_data = resp.json()
-            
-            # Corrected response parsing logic
-            message = result_data.get('choices', [{}])[0].get('message', {})
-            
-            if message and 'images' in message and message['images']:
-                # Get the first image from the 'images' list
-                image_data = message['images'][0]
-                base64_string = image_data.get('image_url', {}).get('url', '')
-                
-                if base64_string and ',' in base64_string:
-                    # Remove the "data:image/png;base64," prefix
-                    base64_content = base64_string.split(',')[1]
-                    
-                    # Decode the base64 string and create a PIL image
-                    img_bytes = base64.b64decode(base64_content)
-                    edited_image = Image.open(_io.BytesIO(img_bytes))
-                    
-                    # Convert PIL image to JPEG bytes for upload
-                    out_buf = _io.BytesIO()
-                    edited_image.convert('RGB').save(out_buf, format='JPEG', quality=90, optimize=True)
-                    image_bytes = out_buf.getvalue()
-                else:
-                    raise RuntimeError(f"API returned an invalid image format. Response: {_json.dumps(result_data, indent=2)}")
-            else:
-                raise RuntimeError(f"API did not return an image. Full Response: {_json.dumps(result_data, indent=2)}")
-                
-        except requests.exceptions.HTTPError as err:
-            error_body = err.response.text
-            if err.response.status_code == 401:
-                return "Error: Authentication failed. Check your OpenRouter API key."
-            elif err.response.status_code == 429:
-                return "Error: Rate limit exceeded or insufficient credits. Check your OpenRouter account."
-            else:
-                return f"Error: An API error occurred: {error_body}"
-        except Exception as e:
-            return f"Error: An unexpected error occurred: {str(e)}"
-
-        # Upload and return HTML tag
-        filename = "image_to_image_result.jpg"
-        temp_url = upload_media_to_hf(image_bytes, filename, "image", token, use_temp=True)
-        if temp_url.startswith("Error"):
-            return temp_url
-        return f"<img src=\"{temp_url}\" alt=\"{prompt}\" style=\"max-width: 100%; height: auto; border-radius: 8px; margin: 10px 0;\" loading=\"lazy\" />"
-    except Exception as e:
-        print(f"Image-to-image generation error: {str(e)}")
-        return f"Error generating image (image-to-image): {str(e)}"
-def generate_video_from_image(input_image_data, prompt: str, session_id: str | None = None, token: gr.OAuthToken | None = None) -> str:
-    """Generate a video from an input image and prompt using Hugging Face InferenceClient.
-
-    Returns an HTML <video> tag whose source points to a local file URL (file://...).
-    """
-    try:
-        print("[Image2Video] Starting video generation")
-        if not os.getenv('HF_TOKEN'):
-            print("[Image2Video] Missing HF_TOKEN")
-            return "Error: HF_TOKEN environment variable is not set. Please set it to your Hugging Face API token."
-
-        # Prepare client
-        client = InferenceClient(
-            provider="auto",
-            api_key=os.getenv('HF_TOKEN'),
-            bill_to="huggingface",
-        )
-        print(f"[Image2Video] InferenceClient initialized (provider=auto)")
-
-        # Normalize input image to bytes, with downscale/compress to cap request size
-        import io
-        from PIL import Image
-        try:
-            import numpy as np
-        except Exception:
-            np = None
-
-        def _load_pil(img_like) -> Image.Image:
-            if hasattr(img_like, 'read'):
-                return Image.open(io.BytesIO(img_like.read()))
-            if hasattr(img_like, 'mode') and hasattr(img_like, 'size'):
-                return img_like
-            if np is not None and isinstance(img_like, np.ndarray):
-                return Image.fromarray(img_like)
-            if isinstance(img_like, (bytes, bytearray)):
-                return Image.open(io.BytesIO(img_like))
-            return Image.open(io.BytesIO(bytes(img_like)))
-
-        pil_image = _load_pil(input_image_data)
-        if pil_image.mode != 'RGB':
-            pil_image = pil_image.convert('RGB')
-        try:
-            print(f"[Image2Video] Input PIL image size={pil_image.size} mode={pil_image.mode}")
-        except Exception:
-            pass
-
-        # Progressive encode to keep payload under ~3.9MB (below 4MB limit)
-        MAX_BYTES = 3_900_000
-        max_dim = 1024  # initial cap on longest edge
-        quality = 90
-
-        def encode_current(pil: Image.Image, q: int) -> bytes:
-            tmp = io.BytesIO()
-            pil.save(tmp, format='JPEG', quality=q, optimize=True)
-            return tmp.getvalue()
-
-        # Downscale while the longest edge exceeds max_dim
-        while max(pil_image.size) > max_dim:
-            ratio = max_dim / float(max(pil_image.size))
-            new_size = (max(1, int(pil_image.size[0] * ratio)), max(1, int(pil_image.size[1] * ratio)))
-            pil_image = pil_image.resize(new_size, Image.Resampling.LANCZOS)
-
-        encoded = encode_current(pil_image, quality)
-        # If still too big, iteratively reduce quality, then dimensions
-        while len(encoded) > MAX_BYTES and (quality > 40 or max(pil_image.size) > 640):
-            if quality > 40:
-                quality -= 10
-            else:
-                # reduce dims by 15% if already at low quality
-                new_w = max(1, int(pil_image.size[0] * 0.85))
-                new_h = max(1, int(pil_image.size[1] * 0.85))
-                pil_image = pil_image.resize((new_w, new_h), Image.Resampling.LANCZOS)
-            encoded = encode_current(pil_image, quality)
-
-        input_bytes = encoded
-
-        # Call image-to-video; require method support
-        model_id = "Lightricks/LTX-Video-0.9.8-13B-distilled"
-        image_to_video_method = getattr(client, "image_to_video", None)
-        if not callable(image_to_video_method):
-            print("[Image2Video] InferenceClient.image_to_video not available in this huggingface_hub version")
-            return (
-                "Error generating video (image-to-video): Your installed huggingface_hub version "
-                "does not expose InferenceClient.image_to_video. Please upgrade with "
-                "`pip install -U huggingface_hub` and try again."
-            )
-        print(f"[Image2Video] Calling image_to_video with model={model_id}, prompt length={len(prompt or '')}")
-        video_bytes = image_to_video_method(
-            input_bytes,
-            prompt=prompt,
-            model=model_id,
-        )
-        print(f"[Image2Video] Received video bytes: {len(video_bytes) if hasattr(video_bytes, '__len__') else 'unknown length'}")
-
-        # Create temporary URL for preview (will be uploaded to HF during deploy)
-        filename = "image_to_video_result.mp4"
-        temp_url = upload_media_to_hf(video_bytes, filename, "video", token, use_temp=True)
-        
-        # Check if creation was successful
-        if temp_url.startswith("Error"):
-            return temp_url
-        
-        video_html = (
-            f'<video controls autoplay muted loop playsinline '
-            f'style="max-width: 100%; height: auto; border-radius: 8px; margin: 10px 0; display: block;" '
-            f'onloadstart="this.style.backgroundColor=\'#f0f0f0\'" '
-            f'onerror="this.style.display=\'none\'; console.error(\'Video failed to load\')">'
-            f'<source src="{temp_url}" type="video/mp4" />'
-            f'<p style="text-align: center; color: #666;">Your browser does not support the video tag.</p>'
-            f'</video>'
-        )
-        
-        print(f"[Image2Video] Successfully generated video HTML tag with temporary URL: {temp_url}")
-        
-        # Validate the generated video HTML
-        if not validate_video_html(video_html):
-            print("[Image2Video] Generated video HTML failed validation")
-            return "Error: Generated video HTML is malformed"
-        
-        return video_html
-    except Exception as e:
-        import traceback
-        print("[Image2Video] Exception during generation:")
-        traceback.print_exc()
-        print(f"Image-to-video generation error: {str(e)}")
-        return f"Error generating video (image-to-video): {str(e)}"
-
-def generate_video_from_text(prompt: str, session_id: str | None = None, token: gr.OAuthToken | None = None) -> str:
-    """Generate a video from a text prompt using Hugging Face InferenceClient.
-
-    Returns an HTML <video> tag with compressed data URI for deployment compatibility.
-    """
-    try:
-        print("[Text2Video] Starting video generation from text")
-        if not os.getenv('HF_TOKEN'):
-            print("[Text2Video] Missing HF_TOKEN")
-            return "Error: HF_TOKEN environment variable is not set. Please set it to your Hugging Face API token."
-
-        client = InferenceClient(
-            provider="auto",
-            api_key=os.getenv('HF_TOKEN'),
-            bill_to="huggingface",
-        )
-        print("[Text2Video] InferenceClient initialized (provider=auto)")
-
-        # Ensure the client has text_to_video (newer huggingface_hub)
-        text_to_video_method = getattr(client, "text_to_video", None)
-        if not callable(text_to_video_method):
-            print("[Text2Video] InferenceClient.text_to_video not available in this huggingface_hub version")
-            return (
-                "Error generating video (text-to-video): Your installed huggingface_hub version "
-                "does not expose InferenceClient.text_to_video. Please upgrade with "
-                "`pip install -U huggingface_hub` and try again."
-            )
-
-        model_id = "Wan-AI/Wan2.2-T2V-A14B"
-        prompt_str = (prompt or "").strip()
-        print(f"[Text2Video] Calling text_to_video with model={model_id}, prompt length={len(prompt_str)}")
-        video_bytes = text_to_video_method(
-            prompt_str,
-            model=model_id,
-        )
-        print(f"[Text2Video] Received video bytes: {len(video_bytes) if hasattr(video_bytes, '__len__') else 'unknown length'}")
-
-        # Create temporary URL for preview (will be uploaded to HF during deploy)
-        filename = "text_to_video_result.mp4"
-        temp_url = upload_media_to_hf(video_bytes, filename, "video", token, use_temp=True)
-        
-        # Check if creation was successful
-        if temp_url.startswith("Error"):
-            return temp_url
-        
-        video_html = (
-            f'<video controls autoplay muted loop playsinline '
-            f'style="max-width: 100%; height: auto; border-radius: 8px; margin: 10px 0; display: block;" '
-            f'onloadstart="this.style.backgroundColor=\'#f0f0f0\'" '
-            f'onerror="this.style.display=\'none\'; console.error(\'Video failed to load\')">'
-            f'<source src="{temp_url}" type="video/mp4" />'
-            f'<p style="text-align: center; color: #666;">Your browser does not support the video tag.</p>'
-            f'</video>'
-        )
-        
-        print(f"[Text2Video] Successfully generated video HTML tag with temporary URL: {temp_url}")
-        
-        # Validate the generated video HTML
-        if not validate_video_html(video_html):
-            print("[Text2Video] Generated video HTML failed validation")
-            return "Error: Generated video HTML is malformed"
-        
-        return video_html
-    except Exception as e:
-        import traceback
-        print("[Text2Video] Exception during generation:")
-        traceback.print_exc()
-        print(f"Text-to-video generation error: {str(e)}")
-        return f"Error generating video (text-to-video): {str(e)}"
-
-def generate_video_from_video(input_video_data, prompt: str, session_id: str | None = None, token: gr.OAuthToken | None = None) -> str:
-    """Generate a video from an input video and prompt using Decart AI's Lucy Pro V2V API.
-    
-    Returns an HTML <video> tag whose source points to a temporary file URL.
-    """
-    try:
-        print("[Video2Video] Starting video generation from video")
-        
-        # Check for Decart API key
-        api_key = os.getenv('DECART_API_KEY')
-        if not api_key:
-            print("[Video2Video] Missing DECART_API_KEY")
-            return "Error: DECART_API_KEY environment variable is not set. Please set it to your Decart AI API token."
-        
-        # Normalize input video to bytes
-        import io
-        import tempfile
-        
-        def _load_video_bytes(video_like) -> bytes:
-            if hasattr(video_like, 'read'):
-                return video_like.read()
-            if isinstance(video_like, (bytes, bytearray)):
-                return bytes(video_like)
-            if hasattr(video_like, 'name'):  # File path
-                with open(video_like.name, 'rb') as f:
-                    return f.read()
-            # If it's a string, assume it's a file path
-            if isinstance(video_like, str):
-                with open(video_like, 'rb') as f:
-                    return f.read()
-            return bytes(video_like)
-        
-        video_bytes = _load_video_bytes(input_video_data)
-        print(f"[Video2Video] Input video size: {len(video_bytes)} bytes")
-        
-        # Prepare the API request
-        form_data = {
-            "prompt": prompt or "Enhance the video quality"
-        }
-        
-        # Create temporary file for video data
-        with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as temp_file:
-            temp_file.write(video_bytes)
-            temp_file_path = temp_file.name
-        
-        try:
-            # Make API request to Decart AI
-            with open(temp_file_path, "rb") as video_file:
-                files = {"data": video_file}
-                headers = {"X-API-KEY": api_key}
-                
-                print(f"[Video2Video] Calling Decart API with prompt: {prompt}")
-                response = requests.post(
-                    "https://api.decart.ai/v1/generate/lucy-pro-v2v",
-                    headers=headers,
-                    data=form_data,
-                    files=files,
-                    timeout=300  # 5 minute timeout
-                )
-                
-                if response.status_code != 200:
-                    print(f"[Video2Video] API request failed with status {response.status_code}: {response.text}")
-                    return f"Error: Decart API request failed with status {response.status_code}"
-                
-                result_video_bytes = response.content
-                print(f"[Video2Video] Received video bytes: {len(result_video_bytes)}")
-                
-        finally:
-            # Clean up temporary file
-            try:
-                os.unlink(temp_file_path)
-            except Exception:
-                pass
-        
-        # Create temporary URL for preview (will be uploaded to HF during deploy)
-        filename = "video_to_video_result.mp4"
-        temp_url = upload_media_to_hf(result_video_bytes, filename, "video", token, use_temp=True)
-        
-        # Check if creation was successful
-        if temp_url.startswith("Error"):
-            return temp_url
-        
-        video_html = (
-            f'<video controls autoplay muted loop playsinline '
-            f'style="max-width: 100%; height: auto; border-radius: 8px; margin: 10px 0; display: block;" '
-            f'onloadstart="this.style.backgroundColor=\'#f0f0f0\'" '
-            f'onerror="this.style.display=\'none\'; console.error(\'Video failed to load\')">'
-            f'<source src="{temp_url}" type="video/mp4" />'
-            f'<p style="text-align: center; color: #666;">Your browser does not support the video tag.</p>'
-            f'</video>'
-        )
-        
-        print(f"[Video2Video] Successfully generated video HTML tag with temporary URL: {temp_url}")
-        
-        # Validate the generated video HTML
-        if not validate_video_html(video_html):
-            print("[Video2Video] Generated video HTML failed validation")
-            return "Error: Generated video HTML is malformed"
-        
-        return video_html
-        
-    except Exception as e:
-        import traceback
-        print("[Video2Video] Exception during generation:")
-        traceback.print_exc()
-        print(f"Video-to-video generation error: {str(e)}")
-        return f"Error generating video (video-to-video): {str(e)}"
-
-def generate_music_from_text(prompt: str, music_length_ms: int = 30000, session_id: str | None = None, token: gr.OAuthToken | None = None) -> str:
-    """Generate music from a text prompt using ElevenLabs Music API and return an HTML <audio> tag.
-
-    Returns compressed data URI for deployment compatibility.
-    Requires ELEVENLABS_API_KEY in the environment.
-    """
-    try:
-        api_key = os.getenv('ELEVENLABS_API_KEY')
-        if not api_key:
-            return "Error: ELEVENLABS_API_KEY environment variable is not set."
-
-        headers = {
-            'Content-Type': 'application/json',
-            'xi-api-key': api_key,
-        }
-        payload = {
-            'prompt': (prompt or 'Epic orchestral theme with soaring strings and powerful brass'),
-            'music_length_ms': int(music_length_ms) if music_length_ms else 30000,
-        }
-
-        resp = requests.post('https://api.elevenlabs.io/v1/music/compose', headers=headers, json=payload)
-        try:
-            resp.raise_for_status()
-        except Exception as e:
-            return f"Error generating music: {getattr(e, 'response', resp).text if hasattr(e, 'response') else resp.text}"
-
-        # Create temporary URL for preview (will be uploaded to HF during deploy)
-        filename = "generated_music.mp3"
-        temp_url = upload_media_to_hf(resp.content, filename, "audio", token, use_temp=True)
-        
-        # Check if creation was successful
-        if temp_url.startswith("Error"):
-            return temp_url
-        
-        audio_html = (
-            "<div class=\"anycoder-music\" style=\"max-width:420px;margin:16px auto;padding:12px 16px;border:1px solid #e5e7eb;border-radius:12px;background:linear-gradient(180deg,#fafafa,#f3f4f6);box-shadow:0 2px 8px rgba(0,0,0,0.06)\">"
-            "  <div style=\"font-size:13px;color:#374151;margin-bottom:8px;display:flex;align-items:center;gap:6px\">"
-            "    <span>🎵 Generated music</span>"
-            "  </div>"
-            f"  <audio controls autoplay loop style=\"width:100%;outline:none;\">"
-            f"    <source src=\"{temp_url}\" type=\"audio/mpeg\" />"
-            "    Your browser does not support the audio element."
-            "  </audio>"
-            "</div>"
-        )
-        
-        print(f"[Music] Successfully generated music HTML tag with temporary URL: {temp_url}")
-        return audio_html
-    except Exception as e:
-        return f"Error generating music: {str(e)}"
-
-class WanAnimateApp:
-    """Wan2.2-Animate integration for character animation and video replacement using DashScope API"""
-    
-    def __init__(self):
-        self.api_key = os.getenv("DASHSCOPE_API_KEY")
-        if self.api_key:
-            dashscope.api_key = self.api_key
-        self.url = "https://dashscope.aliyuncs.com/api/v1/services/aigc/image2video/video-synthesis/"
-        self.get_url = "https://dashscope.aliyuncs.com/api/v1/tasks"
-
-    def check_task_status(self, task_id: str):
-        """Check the status of a specific animation task by TaskId"""
-        if not self.api_key:
-            return None, "Error: DASHSCOPE_API_KEY environment variable is not set"
-            
-        try:
-            get_url = f"{self.get_url}/{task_id}"
-            headers = {
-                "Authorization": f"Bearer {self.api_key}",
-                "Content-Type": "application/json"
-            }
-            
-            response = requests.get(get_url, headers=headers, timeout=30)
-            if response.status_code != 200:
-                error_msg = f"Failed to get task status: {response.status_code}: {response.text}"
-                return None, error_msg
-            
-            result = response.json()
-            task_status = result.get("output", {}).get("task_status")
-            
-            if task_status == "SUCCEEDED":
-                video_url = result["output"]["results"]["video_url"]
-                return video_url, "SUCCEEDED"
-            elif task_status == "FAILED":
-                error_msg = result.get("output", {}).get("message", "Unknown error")
-                code_msg = result.get("output", {}).get("code", "Unknown code")
-                return None, f"Task failed: {error_msg} Code: {code_msg}"
-            else:
-                return None, f"Task is still {task_status}"
-                
-        except Exception as e:
-            return None, f"Exception checking task status: {str(e)}"
-
-    def predict(self, ref_img, video, model_id, model):
-        """
-        Generate animated video using Wan2.2-Animate
-        
-        Args:
-            ref_img: Reference image file path
-            video: Template video file path  
-            model_id: Animation mode ("wan2.2-animate-move" or "wan2.2-animate-mix")
-            model: Inference quality ("wan-pro" or "wan-std")
-            
-        Returns:
-            Tuple of (video_url, status_message)
-        """
-        if not self.api_key:
-            return None, "Error: DASHSCOPE_API_KEY environment variable is not set"
-            
-        try:
-            # Upload files to OSS if needed and get URLs
-            _, image_url = check_and_upload_local(model_id, ref_img, self.api_key)
-            _, video_url = check_and_upload_local(model_id, video, self.api_key)
-
-            # Prepare the request payload
-            payload = {
-                "model": model_id,
-                "input": {
-                    "image_url": image_url,
-                    "video_url": video_url
-                },
-                "parameters": {
-                    "check_image": True,
-                    "mode": model,
-                }
-            }
-            
-            # Set up headers
-            headers = {
-                "X-DashScope-Async": "enable",
-                "X-DashScope-OssResourceResolve": "enable",
-                "Authorization": f"Bearer {self.api_key}",
-                "Content-Type": "application/json"
-            }
-            
-            # Make the initial API request
-            response = requests.post(self.url, json=payload, headers=headers)
-            
-            # Check if request was successful
-            if response.status_code != 200:
-                error_msg = f"Initial request failed with status code {response.status_code}: {response.text}"
-                print(f"[WanAnimate] {error_msg}")
-                return None, error_msg
-            
-            # Get the task ID from response
-            result = response.json()
-            task_id = result.get("output", {}).get("task_id")
-            if not task_id:
-                error_msg = "Failed to get task ID from response"
-                print(f"[WanAnimate] {error_msg}")
-                return None, error_msg
-            
-            # Poll for results
-            get_url = f"{self.get_url}/{task_id}"
-            headers = {
-                "Authorization": f"Bearer {self.api_key}",
-                "Content-Type": "application/json"
-            }
-            
-            max_attempts = 180  # 15 minutes max wait time (increased from 5 minutes)
-            attempt = 0
-            
-            while attempt < max_attempts:
-                try:
-                    response = requests.get(get_url, headers=headers, timeout=30)
-                    if response.status_code != 200:
-                        error_msg = f"Failed to get task status: {response.status_code}: {response.text}"
-                        print(f"[WanAnimate] {error_msg}")
-                        return None, error_msg
-                    
-                    result = response.json()
-                    task_status = result.get("output", {}).get("task_status")
-                    
-                    # Log progress every 20 attempts (100 seconds) to show activity
-                    if attempt % 20 == 0 or task_status in ["SUCCEEDED", "FAILED"]:
-                        print(f"[WanAnimate] Task status check {attempt + 1}/{max_attempts}: {task_status} (TaskId: {task_id})")
-                    
-                    if task_status == "SUCCEEDED":
-                        # Task completed successfully, return video URL
-                        video_url = result["output"]["results"]["video_url"]
-                        print(f"[WanAnimate] Animation completed successfully: {video_url}")
-                        return video_url, "SUCCEEDED"
-                    elif task_status == "FAILED":
-                        # Task failed, return error message
-                        error_msg = result.get("output", {}).get("message", "Unknown error")
-                        code_msg = result.get("output", {}).get("code", "Unknown code")
-                        full_error = f"Task failed: {error_msg} Code: {code_msg} TaskId: {task_id}"
-                        print(f"[WanAnimate] {full_error}")
-                        return None, full_error
-                    else:
-                        # Task is still running, wait and retry
-                        time.sleep(5)  # Wait 5 seconds before polling again
-                        attempt += 1
-                        
-                except requests.exceptions.RequestException as e:
-                    print(f"[WanAnimate] Network error during status check {attempt + 1}: {str(e)}")
-                    # For network errors, wait a bit longer before retrying
-                    time.sleep(10)
-                    attempt += 1
-                    continue
-            
-            # Timeout reached
-            timeout_msg = f"Animation generation timed out after {max_attempts * 5} seconds ({max_attempts * 5 // 60} minutes). TaskId: {task_id}. The animation may still be processing - please check back later or try with a simpler input."
-            print(f"[WanAnimate] {timeout_msg}")
-            return None, timeout_msg
-            
-        except Exception as e:
-            error_msg = f"Exception during animation generation: {str(e)}"
-            print(f"[WanAnimate] {error_msg}")
-            return None, error_msg
-
-def generate_animation_from_image_video(input_image_data, input_video_data, prompt: str, model_id: str = "wan2.2-animate-move", model: str = "wan-pro", session_id: str | None = None, token: gr.OAuthToken | None = None) -> str:
-    """Generate animated video from reference image and template video using Wan2.2-Animate.
-    
-    Returns an HTML <video> tag whose source points to a temporary file URL.
-    """
-    try:
-        print(f"[ImageVideo2Animation] Starting animation generation with model={model_id}, quality={model}")
-        
-        if not os.getenv("DASHSCOPE_API_KEY"):
-            print("[ImageVideo2Animation] Missing DASHSCOPE_API_KEY")
-            return "Error: DASHSCOPE_API_KEY environment variable is not set. Please configure your DashScope API key."
-
-        # Normalize inputs to file paths
-        def _save_to_temp_file(data, suffix):
-            if isinstance(data, str) and os.path.exists(data):
-                return data
-            elif hasattr(data, 'name') and os.path.exists(data.name):
-                return data.name
-            else:
-                # Save to temporary file
-                temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
-                if hasattr(data, 'read'):
-                    temp_file.write(data.read())
-                elif isinstance(data, (bytes, bytearray)):
-                    temp_file.write(data)
-                elif isinstance(data, np.ndarray):
-                    # Handle numpy array (likely image data)
-                    if suffix.lower() in ['.jpg', '.jpeg', '.png']:
-                        # Convert numpy array to image
-                        from PIL import Image
-                        if data.dtype != np.uint8:
-                            data = (data * 255).astype(np.uint8)
-                        if len(data.shape) == 3 and data.shape[2] == 3:
-                            # RGB image
-                            img = Image.fromarray(data, 'RGB')
-                        elif len(data.shape) == 3 and data.shape[2] == 4:
-                            # RGBA image
-                            img = Image.fromarray(data, 'RGBA')
-                        elif len(data.shape) == 2:
-                            # Grayscale image
-                            img = Image.fromarray(data, 'L')
-                        else:
-                            raise ValueError(f"Unsupported numpy array shape for image: {data.shape}")
-                        img.save(temp_file.name, format='JPEG' if suffix.lower() in ['.jpg', '.jpeg'] else 'PNG')
-                    else:
-                        raise ValueError(f"Cannot save numpy array as {suffix} format")
-                else:
-                    raise ValueError(f"Unsupported data type: {type(data)}")
-                temp_file.close()
-                return temp_file.name
-
-        ref_img_path = _save_to_temp_file(input_image_data, '.jpg')
-        video_path = _save_to_temp_file(input_video_data, '.mp4')
-        
-        print(f"[ImageVideo2Animation] Input files prepared: image={ref_img_path}, video={video_path}")
-
-        # Initialize WanAnimateApp and generate animation
-        wan_app = WanAnimateApp()
-        video_url, status = wan_app.predict(ref_img_path, video_path, model_id, model)
-        
-        if video_url and status == "SUCCEEDED":
-            print(f"[ImageVideo2Animation] Animation generated successfully: {video_url}")
-            
-            # Download the video and create temporary URL
-            try:
-                response = requests.get(video_url, timeout=60)
-                response.raise_for_status()
-                video_bytes = response.content
-                
-                filename = "wan_animate_result.mp4"
-                temp_url = upload_media_to_hf(video_bytes, filename, "video", token, use_temp=True)
-                
-                if temp_url.startswith("Error"):
-                    print(f"[ImageVideo2Animation] Failed to upload video: {temp_url}")
-                    return temp_url
-                
-                # Create video HTML tag
-                video_html = (
-                    f'<video controls autoplay muted loop playsinline '
-                    f'style="max-width:100%; height:auto; border-radius:8px; box-shadow:0 4px 8px rgba(0,0,0,0.1)" '
-                    f'onerror="this.style.display=\'none\'; console.error(\'Animation video failed to load\')">'
-                    f'<source src="{temp_url}" type="video/mp4" />'
-                    f'<p style="text-align: center; color: #666;">Your browser does not support the video tag.</p>'
-                    f'</video>'
-                )
-                
-                print(f"[ImageVideo2Animation] Successfully created animation HTML with temporary URL: {temp_url}")
-                return video_html
-                
-            except Exception as e:
-                error_msg = f"Failed to download generated animation: {str(e)}"
-                print(f"[ImageVideo2Animation] {error_msg}")
-                return f"Error: {error_msg}"
-        else:
-            # Provide more helpful error messages based on status
-            if "timed out" in str(status).lower():
-                error_msg = f"Animation generation timed out. This can happen with complex animations or during high server load. Please try again with simpler inputs or wait a few minutes before retrying. Details: {status}"
-            elif "taskid" in str(status).lower():
-                error_msg = f"Animation generation failed. You can check the status later using the TaskId from the error message. Details: {status}"
-            else:
-                error_msg = f"Animation generation failed: {status}"
-            print(f"[ImageVideo2Animation] {error_msg}")
-            return f"Error: {error_msg}"
-            
-    except Exception as e:
-        print(f"[ImageVideo2Animation] Exception during generation:")
-        print(f"Animation generation error: {str(e)}")
-        return f"Error generating animation: {str(e)}"
-
-def extract_image_prompts_from_text(text: str, num_images_needed: int = 1) -> list:
-    """Extract image generation prompts from the full text based on number of images needed"""
-    # Use the entire text as the base prompt for image generation
-    # Clean up the text and create variations for the required number of images
-    
-    # Clean the text
-    cleaned_text = text.strip()
-    if not cleaned_text:
-        return []
-    
-    # Create variations of the prompt for the required number of images
-    prompts = []
-    
-    # Generate exactly the number of images needed
-    for i in range(num_images_needed):
-        if i == 0:
-            # First image: Use the full prompt as-is
-            prompts.append(cleaned_text)
-        elif i == 1:
-            # Second image: Add "visual representation" to make it more image-focused
-            prompts.append(f"Visual representation of {cleaned_text}")
-        elif i == 2:
-            # Third image: Add "illustration" to create a different style
-            prompts.append(f"Illustration of {cleaned_text}")
-        else:
-            # For additional images, use different variations
-            variations = [
-                f"Digital art of {cleaned_text}",
-                f"Modern design of {cleaned_text}",
-                f"Professional illustration of {cleaned_text}",
-                f"Clean design of {cleaned_text}",
-                f"Beautiful visualization of {cleaned_text}",
-                f"Stylish representation of {cleaned_text}",
-                f"Contemporary design of {cleaned_text}",
-                f"Elegant illustration of {cleaned_text}"
-            ]
-            variation_index = (i - 3) % len(variations)
-            prompts.append(variations[variation_index])
-    
-    return prompts
-def create_image_replacement_blocks(html_content: str, user_prompt: str) -> str:
-    """Create search/replace blocks to replace placeholder images with generated Qwen images"""
-    if not user_prompt:
-        return ""
-    
-    # Find existing image placeholders in the HTML first
-    import re
-    
-    # Common patterns for placeholder images
-    placeholder_patterns = [
-        r'<img[^>]*src=["\'](?:placeholder|dummy|sample|example)[^"\']*["\'][^>]*>',
-        r'<img[^>]*src=["\']https?://via\.placeholder\.com[^"\']*["\'][^>]*>',
-        r'<img[^>]*src=["\']https?://picsum\.photos[^"\']*["\'][^>]*>',
-        r'<img[^>]*src=["\']https?://dummyimage\.com[^"\']*["\'][^>]*>',
-        r'<img[^>]*alt=["\'][^"\']*placeholder[^"\']*["\'][^>]*>',
-        r'<img[^>]*class=["\'][^"\']*placeholder[^"\']*["\'][^>]*>',
-        r'<img[^>]*id=["\'][^"\']*placeholder[^"\']*["\'][^>]*>',
-        r'<img[^>]*src=["\']data:image[^"\']*["\'][^>]*>',  # Base64 images
-        r'<img[^>]*src=["\']#["\'][^>]*>',  # Empty src
-        r'<img[^>]*src=["\']about:blank["\'][^>]*>',  # About blank
-    ]
-    
-    # Find all placeholder images
-    placeholder_images = []
-    for pattern in placeholder_patterns:
-        matches = re.findall(pattern, html_content, re.IGNORECASE)
-        placeholder_images.extend(matches)
-    
-    # Filter out HF URLs from placeholders (they are real generated content)
-    placeholder_images = [img for img in placeholder_images if 'huggingface.co/datasets/' not in img]
-    
-    # If no placeholder images found, look for any img tags
-    if not placeholder_images:
-        img_pattern = r'<img[^>]*>'
-        # Case-insensitive to catch <IMG> or mixed-case tags
-        placeholder_images = re.findall(img_pattern, html_content, re.IGNORECASE)
-    
-    # Also look for div elements that might be image placeholders
-    div_placeholder_patterns = [
-        r'<div[^>]*class=["\'][^"\']*(?:image|img|photo|picture)[^"\']*["\'][^>]*>.*?</div>',
-        r'<div[^>]*id=["\'][^"\']*(?:image|img|photo|picture)[^"\']*["\'][^>]*>.*?</div>',
-    ]
-    
-    for pattern in div_placeholder_patterns:
-        matches = re.findall(pattern, html_content, re.IGNORECASE | re.DOTALL)
-        placeholder_images.extend(matches)
-    
-    # Count how many images we need to generate
-    num_images_needed = len(placeholder_images)
-    
-    if num_images_needed == 0:
-        return ""
-    
-    # Generate image prompts based on the number of images found
-    image_prompts = extract_image_prompts_from_text(user_prompt, num_images_needed)
-    
-    # Generate images for each prompt
-    generated_images = []
-    for i, prompt in enumerate(image_prompts):
-        image_html = generate_image_with_hunyuan(prompt, i, token=None)  # TODO: Pass token from parent context
-        if not image_html.startswith("Error"):
-            generated_images.append((i, image_html))
-    
-    if not generated_images:
-        return ""
-    
-    # Create search/replace blocks
-    replacement_blocks = []
-    
-    for i, (prompt_index, generated_image) in enumerate(generated_images):
-        if i < len(placeholder_images):
-            # Replace existing placeholder
-            placeholder = placeholder_images[i]
-            # Clean up the placeholder for better matching
-            placeholder_clean = re.sub(r'\s+', ' ', placeholder.strip())
-            
-            # Try multiple variations of the placeholder for better matching
-            placeholder_variations = [
-                placeholder_clean,
-                placeholder_clean.replace('"', "'"),
-                placeholder_clean.replace("'", '"'),
-                re.sub(r'\s+', ' ', placeholder_clean),
-                placeholder_clean.replace('  ', ' '),
-            ]
-            
-            # Create a replacement block for each variation
-            for variation in placeholder_variations:
-                replacement_blocks.append(f"""{SEARCH_START}
-{variation}
-{DIVIDER}
-{generated_image}
-{REPLACE_END}""")
-        else:
-            # Add new image if we have more generated images than placeholders
-            # Find a good insertion point (after body tag or main content)
-            if '<body' in html_content:
-                body_end = html_content.find('>', html_content.find('<body')) + 1
-                insertion_point = html_content[:body_end] + '\n    '
-                replacement_blocks.append(f"""{SEARCH_START}
-{insertion_point}
-{DIVIDER}
-{insertion_point}
-    {generated_image}
-{REPLACE_END}""")
-    
-    return '\n\n'.join(replacement_blocks)
-def create_image_replacement_blocks_text_to_image_single(html_content: str, prompt: str) -> str:
-    """Create search/replace blocks that generate and insert ONLY ONE text-to-image result.
-
-    Replaces the first detected placeholder; if none found, inserts one image near the top of <body>.
-    """
-    if not prompt or not prompt.strip():
-        return ""
-
-    import re
-
-    # Detect placeholders similarly to the multi-image version
-    placeholder_patterns = [
-        r'<img[^>]*src=["\'](?:placeholder|dummy|sample|example)[^"\']*["\'][^>]*>',
-        r'<img[^>]*src=["\']https?://via\.placeholder\.com[^"\']*["\'][^>]*>',
-        r'<img[^>]*src=["\']https?://picsum\.photos[^"\']*["\'][^>]*>',
-        r'<img[^>]*src=["\']https?://dummyimage\.com[^"\']*["\'][^>]*>',
-        r'<img[^>]*alt=["\'][^"\']*placeholder[^"\']*["\'][^>]*>',
-        r'<img[^>]*class=["\'][^"\']*placeholder[^"\']*["\'][^>]*>',
-        r'<img[^>]*id=["\'][^"\']*placeholder[^"\']*["\'][^>]*>',
-        r'<img[^>]*src=["\']data:image[^"\']*["\'][^>]*>',
-        r'<img[^>]*src=["\']#["\'][^>]*>',
-        r'<img[^>]*src=["\']about:blank["\'][^>]*>',
-    ]
-
-    placeholder_images = []
-    for pattern in placeholder_patterns:
-        matches = re.findall(pattern, html_content, re.IGNORECASE)
-        if matches:
-            placeholder_images.extend(matches)
-    
-    # Filter out HF URLs from placeholders (they are real generated content)
-    placeholder_images = [img for img in placeholder_images if 'huggingface.co/datasets/' not in img]
-    
-    # Filter out HF URLs from placeholders (they are real generated content)
-    placeholder_images = [img for img in placeholder_images if 'huggingface.co/datasets/' not in img]
-
-    # Fallback to any <img> if no placeholders
-    if not placeholder_images:
-        img_pattern = r'<img[^>]*>'
-        placeholder_images = re.findall(img_pattern, html_content)
-
-    # Generate a single image
-    image_html = generate_image_with_hunyuan(prompt, 0, token=None)  # TODO: Pass token from parent context
-    if image_html.startswith("Error"):
-        return ""
-
-    # Replace first placeholder if present
-    if placeholder_images:
-        placeholder = placeholder_images[0]
-        placeholder_clean = re.sub(r'\s+', ' ', placeholder.strip())
-        placeholder_variations = [
-            placeholder_clean,
-            placeholder_clean.replace('"', "'"),
-            placeholder_clean.replace("'", '"'),
-            re.sub(r'\s+', ' ', placeholder_clean),
-            placeholder_clean.replace('  ', ' '),
-        ]
-        blocks = []
-        for variation in placeholder_variations:
-            blocks.append(f"""{SEARCH_START}
-{variation}
-{DIVIDER}
-{image_html}
-{REPLACE_END}""")
-        return '\n\n'.join(blocks)
-
-    # Otherwise insert after <body>
-    if '<body' in html_content:
-        body_end = html_content.find('>', html_content.find('<body')) + 1
-        insertion_point = html_content[:body_end] + '\n    '
-        return f"""{SEARCH_START}
-{insertion_point}
-{DIVIDER}
-{insertion_point}
-    {image_html}
-{REPLACE_END}"""
-
-    # If no <body>, just append
-    return f"{SEARCH_START}\n\n{DIVIDER}\n{image_html}\n{REPLACE_END}"
-
-def create_video_replacement_blocks_text_to_video(html_content: str, prompt: str, session_id: str | None = None) -> str:
-    """Create search/replace blocks that generate and insert ONLY ONE text-to-video result.
-
-    Replaces the first detected <img> placeholder; if none found, inserts one video near the top of <body>.
-    """
-    if not prompt or not prompt.strip():
-        return ""
-
-    import re
-
-    # Detect the same placeholders as image counterparts, to replace the first image slot with a video
-    placeholder_patterns = [
-        r'<img[^>]*src=["\'](?:placeholder|dummy|sample|example)[^"\']*["\'][^>]*>',
-        r'<img[^>]*src=["\']https?://via\.placeholder\.com[^"\']*["\'][^>]*>',
-        r'<img[^>]*src=["\']https?://picsum\.photos[^"\']*["\'][^>]*>',
-        r'<img[^>]*src=["\']https?://dummyimage\.com[^"\']*["\'][^>]*>',
-        r'<img[^>]*alt=["\'][^"\']*placeholder[^"\']*["\'][^>]*>',
-        r'<img[^>]*class=["\'][^"\']*placeholder[^"\']*["\'][^>]*>',
-        r'<img[^>]*id=["\'][^"\']*placeholder[^"\']*["\'][^>]*>',
-        r'<img[^>]*src=["\']data:image[^"\']*["\'][^>]*>',
-        r'<img[^>]*src=["\']#["\'][^>]*>',
-        r'<img[^>]*src=["\']about:blank["\'][^>]*>',
-    ]
-
-    placeholder_images = []
-    for pattern in placeholder_patterns:
-        matches = re.findall(pattern, html_content, re.IGNORECASE)
-        if matches:
-            placeholder_images.extend(matches)
-    
-    # Filter out HF URLs from placeholders (they are real generated content)
-    placeholder_images = [img for img in placeholder_images if 'huggingface.co/datasets/' not in img]
-
-    if not placeholder_images:
-        img_pattern = r'<img[^>]*>'
-        placeholder_images = re.findall(img_pattern, html_content)
-
-    video_html = generate_video_from_text(prompt, session_id=session_id, token=None)  # TODO: Pass token from parent context
-    if video_html.startswith("Error"):
-        return ""
-
-    # Replace first placeholder if present
-    if placeholder_images:
-        placeholder = placeholder_images[0]
-        placeholder_clean = re.sub(r'\s+', ' ', placeholder.strip())
-        placeholder_variations = [
-            placeholder,
-            placeholder_clean,
-            placeholder_clean.replace('"', "'"),
-            placeholder_clean.replace("'", '"'),
-            re.sub(r'\s+', ' ', placeholder_clean),
-            placeholder_clean.replace('  ', ' '),
-        ]
-        blocks = []
-        for variation in placeholder_variations:
-            blocks.append(f"""{SEARCH_START}
-{variation}
-{DIVIDER}
-{video_html}
-{REPLACE_END}""")
-        return '\n\n'.join(blocks)
-
-    # Otherwise insert after <body> with proper container
-    if '<body' in html_content:
-        body_start = html_content.find('<body')
-        body_end = html_content.find('>', body_start) + 1
-        opening_body_tag = html_content[body_start:body_end]
-        
-        # Look for existing container elements to insert into
-        body_content_start = body_end
-        
-        # Try to find a good insertion point within existing content structure
-        patterns_to_try = [
-            r'<main[^>]*>',
-            r'<section[^>]*class="[^"]*hero[^"]*"[^>]*>',
-            r'<div[^>]*class="[^"]*container[^"]*"[^>]*>',
-            r'<header[^>]*>',
-        ]
-        
-        insertion_point = None
-        for pattern in patterns_to_try:
-            import re
-            match = re.search(pattern, html_content[body_content_start:], re.IGNORECASE)
-            if match:
-                match_end = body_content_start + match.end()
-                # Find the end of this tag
-                tag_content = html_content[body_content_start + match.start():match_end]
-                insertion_point = html_content[:match_end] + '\n        '
-                break
-        
-        if not insertion_point:
-            # Fallback to right after body tag with container div
-            insertion_point = html_content[:body_end] + '\n    '
-            video_with_container = f'<div class="video-container" style="margin: 20px 0; text-align: center;">\n        {video_html}\n    </div>'
-            return f"""{SEARCH_START}
-{insertion_point}
-{DIVIDER}
-{insertion_point}
-    {video_with_container}
-{REPLACE_END}"""
-        else:
-            return f"""{SEARCH_START}
-{insertion_point}
-{DIVIDER}
-{insertion_point}
-        {video_html}
-{REPLACE_END}"""
+    except Exception:
+        # Non-fatal in preview
+        pass
 
-    # If no <body>, just append
-    return f"{SEARCH_START}\n\n{DIVIDER}\n{video_html}\n{REPLACE_END}"
+    return doc
 
-def create_music_replacement_blocks_text_to_music(html_content: str, prompt: str, session_id: str | None = None) -> str:
-    """Create search/replace blocks that insert ONE generated <audio> near the top of <body>.
+def extract_html_document(text: str) -> str:
+    """Return substring starting from the first <!DOCTYPE html> or <html> if present, else original text.
 
-    Unlike images/videos which replace placeholders, music doesn't map to an <img> tag.
-    We simply insert an <audio> player after the opening <body>.
+    This ignores prose or planning notes before the actual HTML so previews don't break.
     """
-    if not prompt or not prompt.strip():
-        return ""
-
-    audio_html = generate_music_from_text(prompt, session_id=session_id, token=None)  # TODO: Pass token from parent context
-    if audio_html.startswith("Error"):
-        return ""
-
-    # Prefer inserting after the first <section>...</section> if present; else after <body>
-    import re
-    section_match = re.search(r"<section\b[\s\S]*?</section>", html_content, flags=re.IGNORECASE)
-    if section_match:
-        section_html = section_match.group(0)
-        section_clean = re.sub(r"\s+", " ", section_html.strip())
-        variations = [
-            section_html,
-            section_clean,
-            section_clean.replace('"', "'"),
-            section_clean.replace("'", '"'),
-            re.sub(r"\s+", " ", section_clean),
-        ]
-        blocks = []
-        for v in variations:
-            blocks.append(f"""{SEARCH_START}
-{v}
-{DIVIDER}
-{v}\n    {audio_html}
-{REPLACE_END}""")
-        return "\n\n".join(blocks)
-    if '<body' in html_content:
-        body_end = html_content.find('>', html_content.find('<body')) + 1
-        insertion_point = html_content[:body_end] + '\n    '
-        return f"""{SEARCH_START}
-{insertion_point}
-{DIVIDER}
-{insertion_point}
-    {audio_html}
-{REPLACE_END}"""
+    if not text:
+        return text
+    lower = text.lower()
+    idx = lower.find("<!doctype html")
+    if idx == -1:
+        idx = lower.find("<html")
+    return text[idx:] if idx != -1 else text
 
-    # If no <body>, just append
-    return f"{SEARCH_START}\n\n{DIVIDER}\n{audio_html}\n{REPLACE_END}"
-def create_image_replacement_blocks_from_input_image(html_content: str, user_prompt: str, input_image_data, max_images: int = 1) -> str:
-    """Create search/replace blocks using image-to-image generation with a provided input image.
+def parse_svelte_output(text):
+    """Parse Svelte output to extract individual files.
 
-    Mirrors placeholder detection from create_image_replacement_blocks but uses generate_image_to_image.
+    Supports dynamic multi-file using === filename === sections (preferred),
+    and falls back to ```svelte / ```css code blocks for minimal projects.
     """
-    if not user_prompt:
-        return ""
-
-    import re
-
-    placeholder_patterns = [
-        r'<img[^>]*src=["\'](?:placeholder|dummy|sample|example)[^"\']*["\'][^>]*>',
-        r'<img[^>]*src=["\']https?://via\.placeholder\.com[^"\']*["\'][^>]*>',
-        r'<img[^>]*src=["\']https?://picsum\.photos[^"\']*["\'][^>]*>',
-        r'<img[^>]*src=["\']https?://dummyimage\.com[^"\']*["\'][^>]*>',
-        r'<img[^>]*alt=["\'][^"\']*placeholder[^"\']*["\'][^>]*>',
-        r'<img[^>]*class=["\'][^"\']*placeholder[^"\']*["\'][^>]*>',
-        r'<img[^>]*id=["\'][^"\']*placeholder[^"\']*["\'][^>]*>',
-        r'<img[^>]*src=["\']data:image[^"\']*["\'][^>]*>',
-        r'<img[^>]*src=["\']#["\'][^>]*>',
-        r'<img[^>]*src=["\']about:blank["\'][^>]*>',
-    ]
+    if not text:
+        return {}
 
-    placeholder_images = []
-    for pattern in placeholder_patterns:
-        matches = re.findall(pattern, html_content, re.IGNORECASE)
-        placeholder_images.extend(matches)
-    
-    # Filter out HF URLs from placeholders (they are real generated content)
-    placeholder_images = [img for img in placeholder_images if 'huggingface.co/datasets/' not in img]
+    # Preferred: multi-file sections (works for any filenames)
+    try:
+        files = parse_multipage_html_output(text) or {}
+    except Exception:
+        files = {}
 
-    if not placeholder_images:
-        img_pattern = r'<img[^>]*>'
-        placeholder_images = re.findall(img_pattern, html_content)
-        # Filter HF URLs from fallback images too
-        placeholder_images = [img for img in placeholder_images if 'huggingface.co/datasets/' not in img]
+    if isinstance(files, dict) and files:
+        return files
 
-    div_placeholder_patterns = [
-        r'<div[^>]*class=["\'][^"\']*(?:image|img|photo|picture)[^"\']*["\'][^>]*>.*?</div>',
-        r'<div[^>]*id=["\'][^"\']*(?:image|img|photo|picture)[^"\']*["\'][^>]*>.*?</div>',
-    ]
-    for pattern in div_placeholder_patterns:
-        matches = re.findall(pattern, html_content, re.IGNORECASE | re.DOTALL)
-        placeholder_images.extend(matches)
-
-    num_images_needed = len(placeholder_images)
-    num_to_replace = min(num_images_needed, max(0, int(max_images)))
-    if num_images_needed == 0:
-        # No placeholders; generate one image to append (only if at least one upload is present)
-        if num_to_replace <= 0:
-            return ""
-        prompts = extract_image_prompts_from_text(user_prompt, 1)
-        if not prompts:
-            return ""
-        image_html = generate_image_to_image(input_image_data, prompts[0], token=None)  # TODO: Pass token from parent context
-        if image_html.startswith("Error"):
-            return ""
-        return f"{SEARCH_START}\n\n{DIVIDER}\n<div class=\"generated-images\">{image_html}</div>\n{REPLACE_END}"
-
-    if num_to_replace <= 0:
-        return ""
-    image_prompts = extract_image_prompts_from_text(user_prompt, num_to_replace)
+    # Fallback: code fences for minimal two-file output
+    import re
+    results = {}
+    svelte_match = re.search(r"```svelte\s*\n([\s\S]+?)\n```", text, re.IGNORECASE)
+    if svelte_match:
+        results['src/App.svelte'] = svelte_match.group(1).strip()
+    css_match = re.search(r"```css\s*\n([\s\S]+?)\n```", text, re.IGNORECASE)
+    if css_match:
+        results['src/app.css'] = css_match.group(1).strip()
+    return results
 
-    generated_images = []
-    for i, prompt in enumerate(image_prompts):
-        image_html = generate_image_to_image(input_image_data, prompt, token=None)  # TODO: Pass token from parent context
-        if not image_html.startswith("Error"):
-            generated_images.append((i, image_html))
+def parse_react_output(text):
+    """Parse React/Next.js output to extract individual files.
 
-    if not generated_images:
-        return ""
+    Supports multi-file sections using === filename === sections.
+    """
+    if not text:
+        return {}
 
-    replacement_blocks = []
-    for i, (prompt_index, generated_image) in enumerate(generated_images):
-        if i < num_to_replace and i < len(placeholder_images):
-            placeholder = placeholder_images[i]
-            placeholder_clean = re.sub(r'\s+', ' ', placeholder.strip())
-            placeholder_variations = [
-                placeholder_clean,
-                placeholder_clean.replace('"', "'"),
-                placeholder_clean.replace("'", '"'),
-                re.sub(r'\s+', ' ', placeholder_clean),
-                placeholder_clean.replace('  ', ' '),
-            ]
-            for variation in placeholder_variations:
-                replacement_blocks.append(f"""{SEARCH_START}
-{variation}
-{DIVIDER}
-{generated_image}
-{REPLACE_END}""")
-        # Do not insert additional images beyond the uploaded count
+    # Use the generic multipage parser
+    try:
+        files = parse_multipage_html_output(text) or {}
+    except Exception:
+        files = {}
 
-    return '\n\n'.join(replacement_blocks)
+    return files if isinstance(files, dict) and files else {}
 
-def create_video_replacement_blocks_from_input_image(html_content: str, user_prompt: str, input_image_data, session_id: str | None = None) -> str:
-    """Create search/replace blocks that replace the first <img> (or placeholder) with a generated <video>.
+def format_svelte_output(files):
+    """Format Svelte files into === filename === sections (generic)."""
+    return format_multipage_output(files)
+def infer_svelte_dependencies(files: Dict[str, str]) -> Dict[str, str]:
+    """Infer npm dependencies from Svelte/TS imports across generated files.
 
-    Uses generate_video_from_image to produce a single video and swaps it in.
+    Returns mapping of package name -> semver (string). Uses conservative defaults
+    when versions aren't known. Adds special-cased versions when known.
     """
-    if not user_prompt:
-        return ""
-
-    import re
-    print("[Image2Video] Creating replacement blocks for video insertion")
-
-    placeholder_patterns = [
-        r'<img[^>]*src=["\'](?:placeholder|dummy|sample|example)[^"\']*["\'][^>]*>',
-        r'<img[^>]*src=["\']https?://via\.placeholder\.com[^"\']*["\'][^>]*>',
-        r'<img[^>]*src=["\']https?://picsum\.photos[^"\']*["\'][^>]*>',
-        r'<img[^>]*src=["\']https?://dummyimage\.com[^"\']*["\'][^>]*>',
-        r'<img[^>]*alt=["\'][^"\']*placeholder[^"\']*["\'][^>]*>',
-        r'<img[^>]*class=["\'][^"\']*placeholder[^"\']*["\'][^>]*>',
-        r'<img[^>]*id=["\'][^"\']*placeholder[^"\']*["\'][^>]*>',
-        r'<img[^>]*src=["\']data:image[^"\']*["\'][^>]*>',
-        r'<img[^>]*src=["\']#["\'][^>]*>',
-        r'<img[^>]*src=["\']about:blank["\'][^>]*>',
-    ]
-
-    placeholder_images = []
-    for pattern in placeholder_patterns:
-        matches = re.findall(pattern, html_content, re.IGNORECASE)
-        if matches:
-            placeholder_images.extend(matches)
-    
-    # Filter out HF URLs from placeholders (they are real generated content)
-    placeholder_images = [img for img in placeholder_images if 'huggingface.co/datasets/' not in img]
+    import re as _re
+    deps: Dict[str, str] = {}
+    import_from = _re.compile(r"import\s+[^;]*?from\s+['\"]([^'\"]+)['\"]", _re.IGNORECASE)
+    bare_import = _re.compile(r"import\s+['\"]([^'\"]+)['\"]", _re.IGNORECASE)
 
-    if not placeholder_images:
-        img_pattern = r'<img[^>]*>'
-        placeholder_images = re.findall(img_pattern, html_content)
-    print(f"[Image2Video] Found {len(placeholder_images)} candidate <img> elements")
+    def maybe_add(pkg: str):
+        if not pkg or pkg.startswith('.') or pkg.startswith('/') or pkg.startswith('http'):
+            return
+        if pkg.startswith('svelte'):
+            return
+        if pkg not in deps:
+            # Default to wildcard; adjust known packages below
+            deps[pkg] = "*"
 
-    video_html = generate_video_from_image(input_image_data, user_prompt, session_id=session_id, token=None)  # TODO: Pass token from parent context
-    try:
-        has_file_src = 'src="' in video_html and video_html.count('src="') >= 1 and 'data:video/mp4;base64' not in video_html.split('src="', 1)[1]
-        print(f"[Image2Video] Generated video HTML length={len(video_html)}; has_file_src={has_file_src}")
-    except Exception:
-        pass
-    if video_html.startswith("Error"):
-        print("[Image2Video] Video generation returned error; aborting replacement")
-        return ""
+    for path, content in (files or {}).items():
+        if not isinstance(content, str):
+            continue
+        for m in import_from.finditer(content):
+            maybe_add(m.group(1))
+        for m in bare_import.finditer(content):
+            maybe_add(m.group(1))
 
-    if placeholder_images:
-        placeholder = placeholder_images[0]
-        placeholder_clean = re.sub(r'\s+', ' ', placeholder.strip())
-        print("[Image2Video] Replacing first image placeholder with video")
-        placeholder_variations = [
-            # Try the exact string first to maximize replacement success
-            placeholder,
-            placeholder_clean,
-            placeholder_clean.replace('"', "'"),
-            placeholder_clean.replace("'", '"'),
-            re.sub(r'\s+', ' ', placeholder_clean),
-            placeholder_clean.replace('  ', ' '),
-        ]
-        blocks = []
-        for variation in placeholder_variations:
-            blocks.append(f"""{SEARCH_START}
-{variation}
-{DIVIDER}
-{video_html}
-{REPLACE_END}""")
-        return '\n\n'.join(blocks)
-
-    if '<body' in html_content:
-        body_start = html_content.find('<body')
-        body_end = html_content.find('>', body_start) + 1
-        opening_body_tag = html_content[body_start:body_end]
-        print("[Image2Video] No <img> found; inserting video right after the opening <body> tag")
-        print(f"[Image2Video] Opening <body> tag snippet: {opening_body_tag[:120]}")
-        return f"""{SEARCH_START}
-{opening_body_tag}
-{DIVIDER}
-{opening_body_tag}
-    {video_html}
-{REPLACE_END}"""
+    # Pin known versions when sensible
+    if '@gradio/dataframe' in deps:
+        deps['@gradio/dataframe'] = '^0.19.1'
 
-    print("[Image2Video] No <body> tag; appending video via replacement block")
-    return f"{SEARCH_START}\n\n{DIVIDER}\n{video_html}\n{REPLACE_END}"
+    return deps
 
-def create_video_replacement_blocks_from_input_video(html_content: str, user_prompt: str, input_video_data, session_id: str | None = None) -> str:
-    """Create search/replace blocks that replace the first <video> (or placeholder) with a generated <video>.
+def build_svelte_package_json(existing_json_text: str | None, detected_dependencies: Dict[str, str]) -> str:
+    """Create or merge a package.json for Svelte spaces.
 
-    Uses generate_video_from_video to produce a single video and swaps it in.
+    - If existing_json_text is provided, merge detected deps into its dependencies.
+    - Otherwise, start from the template defaults provided by the user and add deps.
+    - Always preserve template scripts and devDependencies.
     """
-    if not user_prompt:
-        return ""
-
-    import re
-    print("[Video2Video] Creating replacement blocks for video replacement")
-
-    # Look for existing video elements first
-    video_patterns = [
-        r'<video[^>]*>.*?</video>',
-        r'<video[^>]*/>',
-        r'<video[^>]*></video>',
-    ]
+    import json as _json
+    # Template from the user's Svelte space scaffold
+    template = {
+        "name": "svelte",
+        "private": True,
+        "version": "0.0.0",
+        "type": "module",
+        "scripts": {
+            "dev": "vite",
+            "build": "vite build",
+            "preview": "vite preview",
+            "check": "svelte-check --tsconfig ./tsconfig.app.json && tsc -p tsconfig.node.json"
+        },
+        "devDependencies": {
+            "@sveltejs/vite-plugin-svelte": "^5.0.3",
+            "@tsconfig/svelte": "^5.0.4",
+            "svelte": "^5.28.1",
+            "svelte-check": "^4.1.6",
+            "typescript": "~5.8.3",
+            "vite": "^6.3.5"
+        }
+    }
 
-    placeholder_videos = []
-    for pattern in video_patterns:
-        matches = re.findall(pattern, html_content, re.IGNORECASE | re.DOTALL)
-        if matches:
-            placeholder_videos.extend(matches)
-    
-    # If no videos found, look for video placeholders or divs that might represent videos
-    if not placeholder_videos:
-        placeholder_patterns = [
-            r'<div[^>]*class=["\'][^"\']*video[^"\']*["\'][^>]*>.*?</div>',
-            r'<div[^>]*id=["\'][^"\']*video[^"\']*["\'][^>]*>.*?</div>',
-            r'<iframe[^>]*src=["\'][^"\']*youtube[^"\']*["\'][^>]*>.*?</iframe>',
-            r'<iframe[^>]*src=["\'][^"\']*vimeo[^"\']*["\'][^>]*>.*?</iframe>',
-        ]
-        for pattern in placeholder_patterns:
-            matches = re.findall(pattern, html_content, re.IGNORECASE | re.DOTALL)
-            if matches:
-                placeholder_videos.extend(matches)
+    result = template
+    if existing_json_text:
+        try:
+            parsed = _json.loads(existing_json_text)
+            # Merge with template as base, keeping template scripts/devDependencies if missing in parsed
+            result = {
+                **template,
+                **{k: v for k, v in parsed.items() if k not in ("scripts", "devDependencies")},
+            }
+            # If parsed contains its own scripts/devDependencies, prefer parsed to respect user's file
+            if isinstance(parsed.get("scripts"), dict):
+                result["scripts"] = parsed["scripts"]
+            if isinstance(parsed.get("devDependencies"), dict):
+                result["devDependencies"] = parsed["devDependencies"]
+        except Exception:
+            # Fallback to template if parse fails
+            result = template
 
-    print(f"[Video2Video] Found {len(placeholder_videos)} candidate video elements")
+    # Merge dependencies
+    existing_deps = result.get("dependencies", {})
+    if not isinstance(existing_deps, dict):
+        existing_deps = {}
+    merged = {**existing_deps, **(detected_dependencies or {})}
+    if merged:
+        result["dependencies"] = merged
+    else:
+        result.pop("dependencies", None)
 
-    video_html = generate_video_from_video(input_video_data, user_prompt, session_id=session_id, token=None)
-    try:
-        has_file_src = 'src="' in video_html and video_html.count('src="') >= 1 and 'data:video/mp4;base64' not in video_html.split('src="', 1)[1]
-        print(f"[Video2Video] Generated video HTML length={len(video_html)}; has_file_src={has_file_src}")
-    except Exception:
-        pass
-    if video_html.startswith("Error"):
-        print("[Video2Video] Video generation returned error; aborting replacement")
-        return ""
+    return _json.dumps(result, indent=2, ensure_ascii=False) + "\n"
 
-    if placeholder_videos:
-        placeholder = placeholder_videos[0]
-        placeholder_clean = re.sub(r'\s+', ' ', placeholder.strip())
-        print("[Video2Video] Replacing first video placeholder with generated video")
-        placeholder_variations = [
-            # Try the exact string first to maximize replacement success
-            placeholder,
-            placeholder_clean,
-            placeholder_clean.replace('"', "'"),
-            placeholder_clean.replace("'", '"'),
-            re.sub(r'\s+', ' ', placeholder_clean),
-            placeholder_clean.replace('  ', ' '),
-        ]
-        blocks = []
-        for variation in placeholder_variations:
-            blocks.append(f"""{SEARCH_START}
-{variation}
-{DIVIDER}
-{video_html}
-{REPLACE_END}""")
-        return '\n\n'.join(blocks)
-
-    if '<body' in html_content:
-        body_start = html_content.find('<body')
-        body_end = html_content.find('>', body_start) + 1
-        opening_body_tag = html_content[body_start:body_end]
-        print("[Video2Video] No <video> found; inserting video right after the opening <body> tag")
-        print(f"[Video2Video] Opening <body> tag snippet: {opening_body_tag[:120]}")
-        return f"""{SEARCH_START}
-{opening_body_tag}
-{DIVIDER}
-{opening_body_tag}
-    {video_html}
-{REPLACE_END}"""
+def history_render(history: History):
+    return gr.update(visible=True), history
 
-    print("[Video2Video] No <body> tag; appending video via replacement block")
-    return f"{SEARCH_START}\n\n{DIVIDER}\n{video_html}\n{REPLACE_END}"
+def clear_history():
+    return [], [], None, ""  # Empty lists for both tuple format and chatbot messages, None for file, empty string for website URL
 
-def apply_generated_media_to_html_REMOVED():
-    """Apply text/image/video/music replacements to HTML content.
+def update_image_input_visibility(model):
+    """Update image input visibility based on selected model"""
+    is_ernie_vl = model.get("id") == "baidu/ERNIE-4.5-VL-424B-A47B-Base-PT"
+    is_glm_vl = model.get("id") == "THUDM/GLM-4.1V-9B-Thinking"
+    is_glm_45v = model.get("id") == "zai-org/GLM-4.5V"
+    return gr.update(visible=is_ernie_vl or is_glm_vl or is_glm_45v)
 
-    - Works with single-document HTML strings
-    - Also supports multi-page outputs formatted as === filename === sections by
-      applying changes to the HTML entrypoint (index.html if present) and
-      returning the updated multi-page text.
-    """
-    # Detect multi-page sections and choose an entry HTML to modify
-    is_multipage = False
-    multipage_files: Dict[str, str] = {}
-    entry_html_path: str | None = None
-    try:
-        multipage_files = parse_multipage_html_output(html_content) or {}
-        if multipage_files:
-            is_multipage = True
-            if 'index.html' in multipage_files:
-                entry_html_path = 'index.html'
-            else:
-                html_paths = [p for p in multipage_files.keys() if p.lower().endswith('.html')]
-                entry_html_path = html_paths[0] if html_paths else None
-    except Exception:
-        is_multipage = False
-        multipage_files = {}
-        entry_html_path = None
+def process_image_for_model(image):
+    """Convert image to base64 for model input"""
+    if image is None:
+        return None
+    
+    # Convert numpy array to PIL Image if needed
+    import io
+    import base64
+    import numpy as np
+    from PIL import Image
+    
+    # Handle numpy array from Gradio
+    if isinstance(image, np.ndarray):
+        image = Image.fromarray(image)
+    
+    buffer = io.BytesIO()
+    image.save(buffer, format='PNG')
+    img_str = base64.b64encode(buffer.getvalue()).decode('utf-8')
+    return f"data:image/png;base64,{img_str}"
 
-    result = multipage_files.get(entry_html_path, html_content) if is_multipage and entry_html_path else html_content
-    try:
-        print(
-            f"[MediaApply] enable_i2v={enable_image_to_video}, enable_i2i={enable_image_to_image}, "
-            f"enable_t2i={enable_text_to_image}, enable_t2v={enable_text_to_video}, enable_v2v={enable_video_to_video}, enable_t2m={enable_text_to_music}, enable_iv2a={enable_image_video_to_animation}, has_image={input_image_data is not None}, has_video={input_video_data is not None}, has_anim_video={animation_video_data is not None}"
-        )
-        
-        # If image+video-to-animation is enabled, generate animated video and return.
-        if enable_image_video_to_animation and input_image_data is not None and animation_video_data is not None and (result.strip().startswith('<!DOCTYPE html>') or result.strip().startswith('<html')):
-            print(f"[MediaApply] Running image+video-to-animation with mode={animation_mode}, quality={animation_quality}")
-            try:
-                animation_html_tag = generate_animation_from_image_video(
-                    input_image_data, 
-                    animation_video_data, 
-                    user_prompt or "", 
-                    model_id=animation_mode, 
-                    model=animation_quality, 
-                    session_id=session_id, 
-                    token=token
-                )
-                if not (animation_html_tag or "").startswith("Error"):
-                    # Validate animation video HTML before attempting placement
-                    if validate_video_html(animation_html_tag):
-                        blocks_anim = llm_place_media(result, animation_html_tag, media_kind="video")
-                    else:
-                        print("[MediaApply] Generated animation HTML failed validation, skipping LLM placement")
-                        blocks_anim = ""
-                else:
-                    print(f"[MediaApply] Animation generation failed: {animation_html_tag}")
-                    blocks_anim = ""
-            except Exception as e:
-                print(f"[MediaApply] Exception during animation generation: {str(e)}")
-                blocks_anim = ""
-            
-            # If LLM placement failed, use fallback placement
-            if not blocks_anim:
-                # Create simple replacement block for animation video
-                blocks_anim = f"""{SEARCH_START}
-</head>
 
-{DIVIDER}
-</head>
-<div class="animation-container" style="margin: 20px 0; text-align: center;">
-    {animation_html_tag}
-</div>
-{REPLACE_END}"""
-            
-            if blocks_anim:
-                print("[MediaApply] Applying animation replacement blocks")
-                result = apply_search_replace_changes(result, blocks_anim)
-                if is_multipage and entry_html_path:
-                    multipage_files[entry_html_path] = result
-                    return format_multipage_output(multipage_files)
-                return result
-
-        # If image-to-video is enabled, replace the first image with a generated video and return.
-        if enable_image_to_video and input_image_data is not None and (result.strip().startswith('<!DOCTYPE html>') or result.strip().startswith('<html')):
-            i2v_prompt = (image_to_video_prompt or user_prompt or "").strip()
-            print(f"[MediaApply] Running image-to-video with prompt len={len(i2v_prompt)}")
-            try:
-                video_html_tag = generate_video_from_image(input_image_data, i2v_prompt, session_id=session_id, token=token)
-                if not (video_html_tag or "").startswith("Error"):
-                    # Validate video HTML before attempting placement
-                    if validate_video_html(video_html_tag):
-                        blocks_v = llm_place_media(result, video_html_tag, media_kind="video")
-                    else:
-                        print("[MediaApply] Generated video HTML failed validation, skipping LLM placement")
-                        blocks_v = ""
-                else:
-                    print(f"[MediaApply] Video generation failed: {video_html_tag}")
-                    blocks_v = ""
-            except Exception as e:
-                print(f"[MediaApply] Exception during image-to-video generation: {str(e)}")
-                blocks_v = ""
-            if not blocks_v:
-                blocks_v = create_video_replacement_blocks_from_input_image(result, i2v_prompt, input_image_data, session_id=session_id)
-            if blocks_v:
-                print("[MediaApply] Applying image-to-video replacement blocks")
-                before_len = len(result)
-                result_after = apply_search_replace_changes(result, blocks_v)
-                after_len = len(result_after)
-                changed = (result_after != result)
-                print(f"[MediaApply] i2v blocks length={len(blocks_v)}; html before={before_len}, after={after_len}, changed={changed}")
-                if not changed:
-                    print("[MediaApply] DEBUG: Replacement did not change content. Dumping first block:")
-                    try:
-                        first_block = blocks_v.split(REPLACE_END)[0][:1000]
-                        print(first_block)
-                    except Exception:
-                        pass
-                result = result_after
-            else:
-                print("[MediaApply] No i2v replacement blocks generated")
-            if is_multipage and entry_html_path:
-                multipage_files[entry_html_path] = result
-                return format_multipage_output(multipage_files)
-            return result
-
-        # If video-to-video is enabled, replace the first video with a generated video and return.
-        if enable_video_to_video and input_video_data is not None and (result.strip().startswith('<!DOCTYPE html>') or result.strip().startswith('<html')):
-            v2v_prompt = (video_to_video_prompt or user_prompt or "").strip()
-            print(f"[MediaApply] Running video-to-video with prompt len={len(v2v_prompt)}")
-            try:
-                video_html_tag = generate_video_from_video(input_video_data, v2v_prompt, session_id=session_id, token=token)
-                if not (video_html_tag or "").startswith("Error"):
-                    # Validate video HTML before attempting placement
-                    if validate_video_html(video_html_tag):
-                        blocks_v = llm_place_media(result, video_html_tag, media_kind="video")
-                    else:
-                        print("[MediaApply] Generated video HTML failed validation, skipping LLM placement")
-                        blocks_v = ""
-                else:
-                    print(f"[MediaApply] Video generation failed: {video_html_tag}")
-                    blocks_v = ""
-            except Exception as e:
-                print(f"[MediaApply] Exception during video-to-video generation: {str(e)}")
-                blocks_v = ""
-            if not blocks_v:
-                # Create fallback video replacement blocks
-                blocks_v = create_video_replacement_blocks_from_input_video(result, v2v_prompt, input_video_data, session_id=session_id)
-            if blocks_v:
-                print("[MediaApply] Applying video-to-video replacement blocks")
-                before_len = len(result)
-                result_after = apply_search_replace_changes(result, blocks_v)
-                after_len = len(result_after)
-                changed = (result_after != result)
-                print(f"[MediaApply] v2v blocks length={len(blocks_v)}; html before={before_len}, after={after_len}, changed={changed}")
-                if not changed:
-                    print("[MediaApply] DEBUG: Replacement did not change content. Dumping first block:")
-                    try:
-                        first_block = blocks_v.split(REPLACE_END)[0][:1000]
-                        print(first_block)
-                    except Exception:
-                        pass
-                result = result_after
-            else:
-                print("[MediaApply] No v2v replacement blocks generated")
-            if is_multipage and entry_html_path:
-                multipage_files[entry_html_path] = result
-                return format_multipage_output(multipage_files)
-            return result
-
-        # If text-to-video is enabled, insert a generated video (no input image required) and return.
-        if enable_text_to_video and (result.strip().startswith('<!DOCTYPE html>') or result.strip().startswith('<html')):
-            t2v_prompt = (text_to_video_prompt or user_prompt or "").strip()
-            print(f"[MediaApply] Running text-to-video with prompt len={len(t2v_prompt)}")
-            try:
-                video_html_tag = generate_video_from_text(t2v_prompt, session_id=session_id, token=token)
-                if not (video_html_tag or "").startswith("Error"):
-                    # Validate video HTML before attempting placement
-                    if validate_video_html(video_html_tag):
-                        blocks_tv = llm_place_media(result, video_html_tag, media_kind="video")
-                    else:
-                        print("[MediaApply] Generated video HTML failed validation, skipping LLM placement")
-                        blocks_tv = ""
-                else:
-                    print(f"[MediaApply] Video generation failed: {video_html_tag}")
-                    blocks_tv = ""
-            except Exception as e:
-                print(f"[MediaApply] Exception during text-to-video generation: {str(e)}")
-                blocks_tv = ""
-            if not blocks_tv:
-                blocks_tv = create_video_replacement_blocks_text_to_video(result, t2v_prompt, session_id=session_id)
-            if blocks_tv:
-                print("[MediaApply] Applying text-to-video replacement blocks")
-                result = apply_search_replace_changes(result, blocks_tv)
-            else:
-                print("[MediaApply] No t2v replacement blocks generated")
-            if is_multipage and entry_html_path:
-                multipage_files[entry_html_path] = result
-                return format_multipage_output(multipage_files)
-            return result
-
-        # If text-to-music is enabled, insert a generated audio player near the top of body and return.
-        if enable_text_to_music and (result.strip().startswith('<!DOCTYPE html>') or result.strip().startswith('<html')):
-            t2m_prompt = (text_to_music_prompt or user_prompt or "").strip()
-            print(f"[MediaApply] Running text-to-music with prompt len={len(t2m_prompt)}")
-            try:
-                audio_html_tag = generate_music_from_text(t2m_prompt, session_id=session_id, token=token)
-                if not (audio_html_tag or "").startswith("Error"):
-                    blocks_tm = llm_place_media(result, audio_html_tag, media_kind="audio")
-                else:
-                    blocks_tm = ""
-            except Exception:
-                blocks_tm = ""
-            if not blocks_tm:
-                blocks_tm = create_music_replacement_blocks_text_to_music(result, t2m_prompt, session_id=session_id)
-            if blocks_tm:
-                print("[MediaApply] Applying text-to-music replacement blocks")
-                result = apply_search_replace_changes(result, blocks_tm)
-            else:
-                print("[MediaApply] No t2m replacement blocks generated")
-            if is_multipage and entry_html_path:
-                multipage_files[entry_html_path] = result
-                return format_multipage_output(multipage_files)
-            return result
-
-        # If an input image is provided and image-to-image is enabled, we only replace one image
-        # and skip text-to-image to satisfy the requirement to replace exactly the number of uploaded images.
-        if enable_image_to_image and input_image_data is not None and (result.strip().startswith('<!DOCTYPE html>') or result.strip().startswith('<html')):
-            i2i_prompt = (image_to_image_prompt or user_prompt or "").strip()
-            try:
-                image_html_tag = generate_image_to_image(input_image_data, i2i_prompt, token=token)
-                if not (image_html_tag or "").startswith("Error"):
-                    blocks2 = llm_place_media(result, image_html_tag, media_kind="image")
-                else:
-                    blocks2 = ""
-            except Exception:
-                blocks2 = ""
-            if not blocks2:
-                blocks2 = create_image_replacement_blocks_from_input_image(result, i2i_prompt, input_image_data, max_images=1)
-            if blocks2:
-                result = apply_search_replace_changes(result, blocks2)
-            if is_multipage and entry_html_path:
-                multipage_files[entry_html_path] = result
-                return format_multipage_output(multipage_files)
-            return result
-
-        if enable_text_to_image and (result.strip().startswith('<!DOCTYPE html>') or result.strip().startswith('<html')):
-            t2i_prompt = (text_to_image_prompt or user_prompt or "").strip()
-            print(f"[MediaApply] Running text-to-image with prompt len={len(t2i_prompt)}")
-            # Single-image flow for text-to-image (LLM placement first, fallback deterministic)
-            try:
-                print(f"[MediaApply] Calling generate_image_with_hunyuan with prompt: {t2i_prompt[:50]}...")
-                image_html_tag = generate_image_with_hunyuan(t2i_prompt, 0, token=token)
-                print(f"[MediaApply] Image generation result: {image_html_tag[:200]}...")
-                if not (image_html_tag or "").startswith("Error"):
-                    print("[MediaApply] Attempting LLM placement of image...")
-                    blocks = llm_place_media(result, image_html_tag, media_kind="image")
-                    print(f"[MediaApply] LLM placement result: {len(blocks) if blocks else 0} chars")
-                else:
-                    print(f"[MediaApply] Image generation failed: {image_html_tag}")
-                    blocks = ""
-            except Exception as e:
-                print(f"[MediaApply] Exception during image generation: {str(e)}")
-                blocks = ""
-            if not blocks:
-                blocks = create_image_replacement_blocks_text_to_image_single(result, t2i_prompt)
-            if blocks:
-                print("[MediaApply] Applying text-to-image replacement blocks")
-                result = apply_search_replace_changes(result, blocks)
-    except Exception:
-        import traceback
-        print("[MediaApply] Exception during media application:")
-        traceback.print_exc()
-        return html_content
-    if is_multipage and entry_html_path:
-        multipage_files[entry_html_path] = result
-        return format_multipage_output(multipage_files)
-    return result
 
 def create_multimodal_message(text, image=None):
     """Create a chat message. For broad provider compatibility, always return content as a string.
@@ -6334,10 +4191,8 @@ Generate the exact search/replace blocks needed to make these changes."""
     try:
         cleanup_session_videos(session_id)
         cleanup_session_audio(session_id)
-        cleanup_session_media(session_id)
         reap_old_videos()
         reap_old_audio()
-        reap_old_media()
     except Exception:
         pass
 
@@ -10223,9 +8078,6 @@ if __name__ == "__main__":
     # Initialize FastRTC documentation system
     initialize_fastrtc_docs()
     
-    # Clean up any orphaned temporary files from previous runs
-    cleanup_all_temp_media_on_startup()
-    
     demo.queue(api_open=False, default_concurrency_limit=20).launch(
         show_api=False, 
         ssr_mode=True,