Spaces:
Running on Zero
Running on Zero
| """ | |
| ACE-Step v1.5 - HuggingFace Space Entry Point | |
| This file serves as the entry point for HuggingFace Space deployment. | |
| It initializes the service and launches the Gradio interface. | |
| ZeroGPU Support: | |
| - ZeroGPU uses the 'spaces' package to intercept CUDA operations | |
| - Models are loaded to "cuda" during startup but actual GPU allocation is deferred | |
| - Handlers are registered globally so forked processes inherit them without pickling | |
| - @spaces.GPU decorators are on top-level Gradio event handlers, not internal functions | |
| - nano-vllm uses direct CUDA APIs that bypass spaces interception, so we use PyTorch backend | |
| """ | |
| import os | |
| import sys | |
| # Get current directory (app.py location) | |
| current_dir = os.path.dirname(os.path.abspath(__file__)) | |
| # Add nano-vllm to Python path (local package) | |
| nano_vllm_path = os.path.join(current_dir, "acestep", "third_parts", "nano-vllm") | |
| if os.path.exists(nano_vllm_path): | |
| sys.path.insert(0, nano_vllm_path) | |
| # Disable Gradio analytics | |
| os.environ["GRADIO_ANALYTICS_ENABLED"] = "False" | |
| # Clear proxy settings that may affect Gradio | |
| for proxy_var in ['http_proxy', 'https_proxy', 'HTTP_PROXY', 'HTTPS_PROXY', 'ALL_PROXY']: | |
| os.environ.pop(proxy_var, None) | |
| # Import spaces for ZeroGPU support (must be imported before torch for proper interception) | |
| # This is a no-op if not running on HuggingFace Spaces | |
| try: | |
| import spaces | |
| HAS_SPACES = True | |
| except ImportError: | |
| HAS_SPACES = False | |
| import torch | |
| from acestep.handler import AceStepHandler | |
| from acestep.llm_inference import LLMHandler | |
| from acestep.dataset_handler import DatasetHandler | |
| from acestep.gradio_ui import create_gradio_interface | |
| # پچ فوقالعاده برای استخراج خودکار مسیر متنی و تبدیل خودکار فرمتهای غیر استاندارد گوشی (مانند m4a) به wav استاندارد | |
| original_load_audio_file = AceStepHandler._load_audio_file | |
| import subprocess | |
| import uuid | |
| def convert_to_standard_wav(audio_file_path: str) -> str: | |
| """ | |
| مبدل صوتی پویا: فایلهای غیر استاندارد را به فرمت استودیویی WAV تبدیل میکند تا با تضمین ۱۰۰٪ لود شوند | |
| """ | |
| if not audio_file_path or not os.path.exists(audio_file_path): | |
| return audio_file_path | |
| ext = os.path.splitext(audio_file_path)[1].lower() | |
| # لیست فرمتهای رایج گوشی که نیاز به تبدیل خودکار به فرمت استاندارد wav دارند | |
| if ext in [".m4a", ".aac", ".3gp", ".amr", ".webm", ".ogg", ".opus", ".mp4"]: | |
| logger_name = "Auto-Convert" | |
| print(f"[{logger_name}] Detected format {ext}. Converting {audio_file_path} to standard WAV...") | |
| try: | |
| uploads_dir = os.path.join(current_dir, "data", "shared_uploads") | |
| os.makedirs(uploads_dir, exist_ok=True) | |
| temp_wav = os.path.join(uploads_dir, f"converted_{uuid.uuid4().hex}.wav") | |
| # فرآیند تبدیل بلادرنگ و سریع با ابزار سیستمی ffmpeg | |
| cmd = [ | |
| "ffmpeg", "-y", | |
| "-i", audio_file_path, | |
| "-ac", "2", | |
| "-ar", "48000", | |
| "-acodec", "pcm_s16le", | |
| temp_wav | |
| ] | |
| subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True) | |
| print(f"[{logger_name}] Converted successfully to: {temp_wav}") | |
| return temp_wav | |
| except Exception as e: | |
| print(f"[{logger_name}] Failed to convert: {e}", file=sys.stderr) | |
| return audio_file_path | |
| def patched_load_audio_file(self, audio_file): | |
| if isinstance(audio_file, list) and len(audio_file) > 0: | |
| audio_file = audio_file[0] | |
| if isinstance(audio_file, dict): | |
| if "path" in audio_file and audio_file["path"]: | |
| audio_file = audio_file["path"] | |
| elif "value" in audio_file and audio_file["value"]: | |
| audio_file = audio_file["value"] | |
| # تبدیل خودکار فرمتهای موبایل قبل از تحویل به موتور دکودر | |
| if isinstance(audio_file, str): | |
| audio_file = convert_to_standard_wav(audio_file) | |
| return original_load_audio_file(self, audio_file) | |
| AceStepHandler._load_audio_file = patched_load_audio_file | |
| # پچ هوشمند برای جلوگیری از کرش تفکیک صدا در خودِ اسپیس و صادر کردن خطای صریح در صورت عدم پردازش فایل صوتی مرجع | |
| original_prepare_batch = AceStepHandler._prepare_batch | |
| def patched_prepare_batch(self, *args, **kwargs): | |
| args_list = list(args) | |
| # اگر کاربر فایل صوتی مرجع فرستاده ولی پردازش نشده و مقدار NoneType است، خطای واضح صادر میکنیم تا برنامه متوقف شود | |
| # و از تولید خروجی بیصدا یا پیشفرض جلوگیری به عمل آید. | |
| if "refer_audios" in kwargs and kwargs["refer_audios"] is None: | |
| raise ValueError("The uploaded Reference Audio could not be processed. Please make sure the audio file is valid, has sound, and is not corrupted.") | |
| elif len(args_list) > 5 and args_list[5] is None: | |
| raise ValueError("The uploaded Reference Audio could not be processed. Please make sure the audio file is valid, has sound, and is not corrupted.") | |
| return original_prepare_batch(self, *args_list, **kwargs) | |
| AceStepHandler._prepare_batch = patched_prepare_batch | |
| # پچ هوشمند و پویا برای پیدا کردن و تصحیح فیلد تعداد تولید در تمام مراحل اجرای متدها | |
| import acestep.gradio_ui.events.results_handlers as res_h | |
| original_gen = res_h.generate_with_batch_management | |
| def patched_generate_with_batch_management(*args, **kwargs): | |
| args_list = list(args) | |
| if len(args_list) > 14: | |
| if args_list[14] is None or (isinstance(args_list[14], (int, float)) and args_list[14] < 1): | |
| args_list[14] = 1 | |
| if "batch_size_input" in kwargs and (kwargs["batch_size_input"] is None or kwargs["batch_size_input"] < 1): | |
| kwargs["batch_size_input"] = 1 | |
| generator = original_gen(*args_list, **kwargs) | |
| for partial_result in generator: | |
| if isinstance(partial_result, tuple): | |
| modified_result = list(partial_result) | |
| for i, item in enumerate(modified_result): | |
| if isinstance(item, dict) and "batch_size_input" in item: | |
| if item["batch_size_input"] is None or item["batch_size_input"] < 1: | |
| item["batch_size_input"] = 1 | |
| yield tuple(modified_result) | |
| else: | |
| yield partial_result | |
| res_h.generate_with_batch_management = patched_generate_with_batch_management | |
| # پچ هوشمند برای حذف محدودیت قفل مرورگر در فیلدهای عددی با مقدار حداقل ۱ | |
| import gradio as gr | |
| original_number_init = gr.Number.__init__ | |
| def patched_number_init(self, *args, **kwargs): | |
| if "minimum" in kwargs and kwargs["minimum"] == 1: | |
| kwargs["minimum"] = None # رفع کلی مشکل فیلدهای با محدودیت حداقل ۱ عددی | |
| original_number_init(self, *args, **kwargs) | |
| gr.Number.__init__ = patched_number_init | |
| # اعمال پچ اصلاحی برای غیرفعال کردن Flash Attention ناسازگار روی ZeroGPU و استفاده از موتور پایدار sdpa | |
| AceStepHandler.is_flash_attention_available = lambda self: False | |
| AceStepHandler.is_flash_attn3_available = lambda self: False | |
| AceStepHandler.get_best_attn_implementation = lambda self: "sdpa" | |
| LLMHandler.is_flash_attention_available = lambda self: False | |
| LLMHandler.is_flash_attn3_available = lambda self: False | |
| LLMHandler.get_best_attn_implementation = lambda self: "sdpa" | |
| # Detect ZeroGPU environment | |
| IS_HUGGINGFACE_SPACE = os.environ.get("SPACE_ID") is not None | |
| # ZeroGPU detection: check env var OR assume ZeroGPU for all HF Spaces (safer default) | |
| # The SPACE_HARDWARE env var is unreliable, so we assume ZeroGPU if on HF Space | |
| IS_ZEROGPU = IS_HUGGINGFACE_SPACE or os.environ.get("ZEROGPU") is not None | |
| def get_gpu_memory_gb(): | |
| """ | |
| Get GPU memory in GB. Returns 0 if no GPU is available. | |
| """ | |
| try: | |
| if torch.cuda.is_available(): | |
| total_memory = torch.cuda.get_device_properties(0).total_memory | |
| memory_gb = total_memory / (1024**3) | |
| return memory_gb | |
| else: | |
| return 0 | |
| except Exception as e: | |
| print(f"Warning: Failed to detect GPU memory: {e}", file=sys.stderr) | |
| return 0 | |
| def get_persistent_storage_path(): | |
| """ | |
| Detect and return a writable persistent storage path. | |
| """ | |
| # Check for local checkpoint directory override (for development) | |
| checkpoint_dir_override = os.environ.get("CHECKPOINT_DIR") | |
| if checkpoint_dir_override: | |
| if checkpoint_dir_override.endswith("/checkpoints") or checkpoint_dir_override.endswith("\\checkpoints"): | |
| checkpoint_dir_override = os.path.dirname(checkpoint_dir_override) | |
| if os.path.exists(checkpoint_dir_override): | |
| print(f"Using local checkpoint directory (CHECKPOINT_DIR): {checkpoint_dir_override}") | |
| return checkpoint_dir_override | |
| else: | |
| print(f"Warning: CHECKPOINT_DIR path does not exist: {checkpoint_dir_override}") | |
| # Try HuggingFace Space persistent storage first | |
| hf_data_path = "/data" | |
| # Check if /data exists and is writable | |
| if os.path.exists(hf_data_path): | |
| try: | |
| test_file = os.path.join(hf_data_path, ".write_test") | |
| with open(test_file, 'w') as f: | |
| f.write("test") | |
| os.remove(test_file) | |
| print(f"Using HuggingFace persistent storage: {hf_data_path}") | |
| return hf_data_path | |
| except (PermissionError, OSError) as e: | |
| print(f"Warning: /data exists but is not writable: {e}") | |
| # Fall back to app directory (non-persistent but works without special config) | |
| fallback_path = os.path.join(current_dir, "data") | |
| os.makedirs(fallback_path, exist_ok=True) | |
| print(f"Using local storage (non-persistent): {fallback_path}") | |
| print("Note: To enable persistent storage, configure it in HuggingFace Space settings") | |
| return fallback_path | |
| def main(): | |
| """Main entry point for HuggingFace Space""" | |
| # Check for DEBUG_UI mode (skip model initialization for UI development) | |
| debug_ui = os.environ.get("DEBUG_UI", "").lower() in ("1", "true", "yes") | |
| if debug_ui: | |
| print("=" * 60) | |
| print("DEBUG_UI mode enabled - skipping model initialization") | |
| print("UI will be fully functional but generation is disabled") | |
| print("=" * 60) | |
| # Log ZeroGPU detection | |
| if IS_ZEROGPU: | |
| print("=" * 60) | |
| print("ZeroGPU environment detected") | |
| print("- Using spaces package for GPU allocation") | |
| print("- PyTorch backend forced for LLM (nano-vllm incompatible)") | |
| print("- GPU will be allocated on-demand during generation") | |
| print("=" * 60) | |
| # Get persistent storage path (auto-detect) | |
| persistent_storage_path = get_persistent_storage_path() | |
| # Detect GPU memory for auto-configuration | |
| # Note: In ZeroGPU, GPU may not be available during startup, so this may return 0 | |
| gpu_memory_gb = get_gpu_memory_gb() | |
| # For ZeroGPU, we don't need CPU offload as GPU is allocated dynamically | |
| if IS_ZEROGPU: | |
| auto_offload = False | |
| print("ZeroGPU: CPU offload disabled (GPU allocated on-demand)") | |
| else: | |
| auto_offload = gpu_memory_gb > 0 and gpu_memory_gb < 16 | |
| if not debug_ui and not IS_ZEROGPU: | |
| if auto_offload: | |
| print(f"Detected GPU memory: {gpu_memory_gb:.2f} GB (< 16GB)") | |
| print("Auto-enabling CPU offload to reduce GPU memory usage") | |
| elif gpu_memory_gb > 0: | |
| print(f"Detected GPU memory: {gpu_memory_gb:.2f} GB (>= 16GB)") | |
| print("CPU offload disabled by default") | |
| else: | |
| print("No GPU detected, running on CPU") | |
| # Create handler instances | |
| print("Creating handlers...") | |
| dit_handler = AceStepHandler(persistent_storage_path=persistent_storage_path) | |
| llm_handler = LLMHandler(persistent_storage_path=persistent_storage_path) | |
| dataset_handler = DatasetHandler() | |
| # Service mode configuration from environment variables | |
| config_path = os.environ.get( | |
| "SERVICE_MODE_DIT_MODEL", | |
| "acestep-v15-xl-turbo" | |
| ) | |
| # Second DiT model - default to turbo-shift3 for two-model setup | |
| config_path_2 = os.environ.get("SERVICE_MODE_DIT_MODEL_2", "acestep-v15-turbo").strip() | |
| lm_model_path = os.environ.get( | |
| "SERVICE_MODE_LM_MODEL", | |
| "acestep-5Hz-lm-1.7B" | |
| ) | |
| # For ZeroGPU, force PyTorch backend (nano-vllm uses direct CUDA APIs) | |
| if IS_ZEROGPU: | |
| backend = "pt" | |
| else: | |
| backend = os.environ.get("SERVICE_MODE_BACKEND", "vllm") | |
| device = "auto" | |
| print(f"Service mode configuration:") | |
| print(f" DiT model 1: {config_path}") | |
| if config_path_2: | |
| print(f" DiT model 2: {config_path_2}") | |
| print(f" LM model: {lm_model_path}") | |
| print(f" Backend: {backend}") | |
| print(f" Offload to CPU: {auto_offload}") | |
| print(f" DEBUG_UI: {debug_ui}") | |
| print(f" ZeroGPU: {IS_ZEROGPU}") | |
| # Determine flash attention availability | |
| use_flash_attention = dit_handler.is_flash_attention_available() | |
| print(f" Flash Attention: {use_flash_attention}") | |
| # Initialize models (skip in DEBUG_UI mode) | |
| init_status = "" | |
| enable_generate = False | |
| dit_handler_2 = None | |
| if debug_ui: | |
| # In DEBUG_UI mode, skip all model initialization | |
| init_status = "⚠️ DEBUG_UI mode - models not loaded\nUI is functional but generation is disabled" | |
| enable_generate = False | |
| print("Skipping model initialization (DEBUG_UI mode)") | |
| else: | |
| # Initialize primary DiT model | |
| print(f"Initializing DiT model 1: {config_path}...") | |
| init_status, enable_generate = dit_handler.initialize_service( | |
| project_root=current_dir, | |
| config_path=config_path, | |
| device=device, | |
| use_flash_attention=use_flash_attention, | |
| compile_model=False, | |
| offload_to_cpu=auto_offload, | |
| offload_dit_to_cpu=False | |
| ) | |
| if not enable_generate: | |
| print(f"Warning: DiT model 1 initialization issue: {init_status}", file=sys.stderr) | |
| else: | |
| print("DiT model 1 initialized successfully") | |
| # Initialize second DiT model if configured | |
| if config_path_2: | |
| print(f"Initializing DiT model 2: {config_path_2}...") | |
| dit_handler_2 = AceStepHandler(persistent_storage_path=persistent_storage_path) | |
| # Share VAE, text_encoder, and silence_latent from the first handler to save memory | |
| init_status_2, enable_generate_2 = dit_handler_2.initialize_service( | |
| project_root=current_dir, | |
| config_path=config_path_2, | |
| device=device, | |
| use_flash_attention=use_flash_attention, | |
| compile_model=False, | |
| offload_to_cpu=auto_offload, | |
| offload_dit_to_cpu=False, | |
| # Share components from first handler | |
| shared_vae=dit_handler.vae, | |
| shared_text_encoder=dit_handler.text_encoder, | |
| shared_text_tokenizer=dit_handler.text_tokenizer, | |
| shared_silence_latent=dit_handler.silence_latent, | |
| ) | |
| if not enable_generate_2: | |
| print(f"Warning: DiT model 2 initialization issue: {init_status_2}", file=sys.stderr) | |
| init_status += f"\n⚠️ DiT model 2 failed: {init_status_2}" | |
| else: | |
| print("DiT model 2 initialized successfully") | |
| init_status += f"\n✅ DiT model 2: {config_path_2}" | |
| # Initialize LM model | |
| checkpoint_dir = dit_handler._get_checkpoint_dir() | |
| print(f"Initializing 5Hz LM: {lm_model_path}...") | |
| lm_status, lm_success = llm_handler.initialize( | |
| checkpoint_dir=checkpoint_dir, | |
| lm_model_path=lm_model_path, | |
| backend=backend, | |
| device=device, | |
| offload_to_cpu=auto_offload, | |
| dtype=dit_handler.dtype | |
| ) | |
| if lm_success: | |
| print("5Hz LM initialized successfully") | |
| init_status += f"\n{lm_status}" | |
| else: | |
| print(f"Warning: 5Hz LM initialization failed: {lm_status}", file=sys.stderr) | |
| init_status += f"\n{lm_status}" | |
| # Build available models list for UI | |
| available_dit_models = [config_path] | |
| if config_path_2 and dit_handler_2 is not None: | |
| available_dit_models.append(config_path_2) | |
| # Prepare initialization parameters for UI | |
| init_params = { | |
| 'pre_initialized': True, | |
| 'service_mode': True, | |
| 'checkpoint': None, | |
| 'config_path': config_path, | |
| 'config_path_2': config_path_2 if config_path_2 else None, | |
| 'device': device, | |
| 'init_llm': True, | |
| 'lm_model_path': lm_model_path, | |
| 'backend': backend, | |
| 'use_flash_attention': use_flash_attention, | |
| 'offload_to_cpu': auto_offload, | |
| 'offload_dit_to_cpu': False, | |
| 'init_status': init_status, | |
| 'enable_generate': enable_generate, | |
| 'dit_handler': dit_handler, | |
| 'dit_handler_2': dit_handler_2, | |
| 'available_dit_models': available_dit_models, | |
| 'llm_handler': llm_handler, | |
| 'language': 'en', | |
| 'persistent_storage_path': persistent_storage_path, | |
| 'debug_ui': debug_ui, | |
| } | |
| print("Service initialization completed!") | |
| # Create Gradio interface with pre-initialized handlers | |
| print("Creating Gradio interface...") | |
| demo = create_gradio_interface( | |
| dit_handler, | |
| llm_handler, | |
| dataset_handler, | |
| init_params=init_params, | |
| language='en' | |
| ) | |
| # پیادهسازی سرویس اختصاصی آپلود اشتراکی برای اشتراکگذاری بینقص فایلهای صوتی بین کانتینرهای ZeroGPU | |
| from fastapi import UploadFile, File | |
| import shutil | |
| async def custom_upload(file: UploadFile = File(...)): | |
| # ساخت دایرکتوری در حافظه مشترک و دائم پروژه | |
| uploads_dir = os.path.join(current_dir, "data", "shared_uploads") | |
| os.makedirs(uploads_dir, exist_ok=True) | |
| # ذخیرهسازی مطمئن فایل صوتی | |
| file_path = os.path.join(uploads_dir, file.filename) | |
| with open(file_path, "wb") as buffer: | |
| shutil.copyfileobj(file.file, buffer) | |
| return {"path": file_path} | |
| # فعال کردن پویا و مستقیم دکمههای دانلود برای تمام کامپوننتهای صوتی | |
| for component in demo.blocks.values(): | |
| if isinstance(component, gr.Audio): | |
| component.buttons = ["play", "download"] | |
| # Enable queue for multi-user support | |
| print("Enabling queue for multi-user support...") | |
| demo.queue(max_size=20) | |
| # Launch | |
| print("Launching server on 0.0.0.0:7860...") | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False, | |
| show_error=True, | |
| ) | |
| if __name__ == "__main__": | |
| main() |