Ace-Step-v1.5 / app.py
Opera8's picture
Update app.py
ea53be2 verified
Raw
History Blame Contribute Delete
19.8 kB
"""
ACE-Step v1.5 - HuggingFace Space Entry Point
This file serves as the entry point for HuggingFace Space deployment.
It initializes the service and launches the Gradio interface.
ZeroGPU Support:
- ZeroGPU uses the 'spaces' package to intercept CUDA operations
- Models are loaded to "cuda" during startup but actual GPU allocation is deferred
- Handlers are registered globally so forked processes inherit them without pickling
- @spaces.GPU decorators are on top-level Gradio event handlers, not internal functions
- nano-vllm uses direct CUDA APIs that bypass spaces interception, so we use PyTorch backend
"""
import os
import sys
# Get current directory (app.py location)
current_dir = os.path.dirname(os.path.abspath(__file__))
# Add nano-vllm to Python path (local package)
nano_vllm_path = os.path.join(current_dir, "acestep", "third_parts", "nano-vllm")
if os.path.exists(nano_vllm_path):
sys.path.insert(0, nano_vllm_path)
# Disable Gradio analytics
os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
# Clear proxy settings that may affect Gradio
for proxy_var in ['http_proxy', 'https_proxy', 'HTTP_PROXY', 'HTTPS_PROXY', 'ALL_PROXY']:
os.environ.pop(proxy_var, None)
# Import spaces for ZeroGPU support (must be imported before torch for proper interception)
# This is a no-op if not running on HuggingFace Spaces
try:
import spaces
HAS_SPACES = True
except ImportError:
HAS_SPACES = False
import torch
from acestep.handler import AceStepHandler
from acestep.llm_inference import LLMHandler
from acestep.dataset_handler import DatasetHandler
from acestep.gradio_ui import create_gradio_interface
# پچ فوق‌العاده برای استخراج خودکار مسیر متنی و تبدیل خودکار فرمت‌های غیر استاندارد گوشی (مانند m4a) به wav استاندارد
original_load_audio_file = AceStepHandler._load_audio_file
import subprocess
import uuid
def convert_to_standard_wav(audio_file_path: str) -> str:
"""
مبدل صوتی پویا: فایل‌های غیر استاندارد را به فرمت استودیویی WAV تبدیل می‌کند تا با تضمین ۱۰۰٪ لود شوند
"""
if not audio_file_path or not os.path.exists(audio_file_path):
return audio_file_path
ext = os.path.splitext(audio_file_path)[1].lower()
# لیست فرمت‌های رایج گوشی که نیاز به تبدیل خودکار به فرمت استاندارد wav دارند
if ext in [".m4a", ".aac", ".3gp", ".amr", ".webm", ".ogg", ".opus", ".mp4"]:
logger_name = "Auto-Convert"
print(f"[{logger_name}] Detected format {ext}. Converting {audio_file_path} to standard WAV...")
try:
uploads_dir = os.path.join(current_dir, "data", "shared_uploads")
os.makedirs(uploads_dir, exist_ok=True)
temp_wav = os.path.join(uploads_dir, f"converted_{uuid.uuid4().hex}.wav")
# فرآیند تبدیل بلادرنگ و سریع با ابزار سیستمی ffmpeg
cmd = [
"ffmpeg", "-y",
"-i", audio_file_path,
"-ac", "2",
"-ar", "48000",
"-acodec", "pcm_s16le",
temp_wav
]
subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
print(f"[{logger_name}] Converted successfully to: {temp_wav}")
return temp_wav
except Exception as e:
print(f"[{logger_name}] Failed to convert: {e}", file=sys.stderr)
return audio_file_path
def patched_load_audio_file(self, audio_file):
if isinstance(audio_file, list) and len(audio_file) > 0:
audio_file = audio_file[0]
if isinstance(audio_file, dict):
if "path" in audio_file and audio_file["path"]:
audio_file = audio_file["path"]
elif "value" in audio_file and audio_file["value"]:
audio_file = audio_file["value"]
# تبدیل خودکار فرمت‌های موبایل قبل از تحویل به موتور دکودر
if isinstance(audio_file, str):
audio_file = convert_to_standard_wav(audio_file)
return original_load_audio_file(self, audio_file)
AceStepHandler._load_audio_file = patched_load_audio_file
# پچ هوشمند برای جلوگیری از کرش تفکیک صدا در خودِ اسپیس و صادر کردن خطای صریح در صورت عدم پردازش فایل صوتی مرجع
original_prepare_batch = AceStepHandler._prepare_batch
def patched_prepare_batch(self, *args, **kwargs):
args_list = list(args)
# اگر کاربر فایل صوتی مرجع فرستاده ولی پردازش نشده و مقدار NoneType است، خطای واضح صادر می‌کنیم تا برنامه متوقف شود
# و از تولید خروجی بی‌صدا یا پیش‌فرض جلوگیری به عمل آید.
if "refer_audios" in kwargs and kwargs["refer_audios"] is None:
raise ValueError("The uploaded Reference Audio could not be processed. Please make sure the audio file is valid, has sound, and is not corrupted.")
elif len(args_list) > 5 and args_list[5] is None:
raise ValueError("The uploaded Reference Audio could not be processed. Please make sure the audio file is valid, has sound, and is not corrupted.")
return original_prepare_batch(self, *args_list, **kwargs)
AceStepHandler._prepare_batch = patched_prepare_batch
# پچ هوشمند و پویا برای پیدا کردن و تصحیح فیلد تعداد تولید در تمام مراحل اجرای متدها
import acestep.gradio_ui.events.results_handlers as res_h
original_gen = res_h.generate_with_batch_management
def patched_generate_with_batch_management(*args, **kwargs):
args_list = list(args)
if len(args_list) > 14:
if args_list[14] is None or (isinstance(args_list[14], (int, float)) and args_list[14] < 1):
args_list[14] = 1
if "batch_size_input" in kwargs and (kwargs["batch_size_input"] is None or kwargs["batch_size_input"] < 1):
kwargs["batch_size_input"] = 1
generator = original_gen(*args_list, **kwargs)
for partial_result in generator:
if isinstance(partial_result, tuple):
modified_result = list(partial_result)
for i, item in enumerate(modified_result):
if isinstance(item, dict) and "batch_size_input" in item:
if item["batch_size_input"] is None or item["batch_size_input"] < 1:
item["batch_size_input"] = 1
yield tuple(modified_result)
else:
yield partial_result
res_h.generate_with_batch_management = patched_generate_with_batch_management
# پچ هوشمند برای حذف محدودیت قفل مرورگر در فیلدهای عددی با مقدار حداقل ۱
import gradio as gr
original_number_init = gr.Number.__init__
def patched_number_init(self, *args, **kwargs):
if "minimum" in kwargs and kwargs["minimum"] == 1:
kwargs["minimum"] = None # رفع کلی مشکل فیلدهای با محدودیت حداقل ۱ عددی
original_number_init(self, *args, **kwargs)
gr.Number.__init__ = patched_number_init
# اعمال پچ اصلاحی برای غیرفعال کردن Flash Attention ناسازگار روی ZeroGPU و استفاده از موتور پایدار sdpa
AceStepHandler.is_flash_attention_available = lambda self: False
AceStepHandler.is_flash_attn3_available = lambda self: False
AceStepHandler.get_best_attn_implementation = lambda self: "sdpa"
LLMHandler.is_flash_attention_available = lambda self: False
LLMHandler.is_flash_attn3_available = lambda self: False
LLMHandler.get_best_attn_implementation = lambda self: "sdpa"
# Detect ZeroGPU environment
IS_HUGGINGFACE_SPACE = os.environ.get("SPACE_ID") is not None
# ZeroGPU detection: check env var OR assume ZeroGPU for all HF Spaces (safer default)
# The SPACE_HARDWARE env var is unreliable, so we assume ZeroGPU if on HF Space
IS_ZEROGPU = IS_HUGGINGFACE_SPACE or os.environ.get("ZEROGPU") is not None
def get_gpu_memory_gb():
"""
Get GPU memory in GB. Returns 0 if no GPU is available.
"""
try:
if torch.cuda.is_available():
total_memory = torch.cuda.get_device_properties(0).total_memory
memory_gb = total_memory / (1024**3)
return memory_gb
else:
return 0
except Exception as e:
print(f"Warning: Failed to detect GPU memory: {e}", file=sys.stderr)
return 0
def get_persistent_storage_path():
"""
Detect and return a writable persistent storage path.
"""
# Check for local checkpoint directory override (for development)
checkpoint_dir_override = os.environ.get("CHECKPOINT_DIR")
if checkpoint_dir_override:
if checkpoint_dir_override.endswith("/checkpoints") or checkpoint_dir_override.endswith("\\checkpoints"):
checkpoint_dir_override = os.path.dirname(checkpoint_dir_override)
if os.path.exists(checkpoint_dir_override):
print(f"Using local checkpoint directory (CHECKPOINT_DIR): {checkpoint_dir_override}")
return checkpoint_dir_override
else:
print(f"Warning: CHECKPOINT_DIR path does not exist: {checkpoint_dir_override}")
# Try HuggingFace Space persistent storage first
hf_data_path = "/data"
# Check if /data exists and is writable
if os.path.exists(hf_data_path):
try:
test_file = os.path.join(hf_data_path, ".write_test")
with open(test_file, 'w') as f:
f.write("test")
os.remove(test_file)
print(f"Using HuggingFace persistent storage: {hf_data_path}")
return hf_data_path
except (PermissionError, OSError) as e:
print(f"Warning: /data exists but is not writable: {e}")
# Fall back to app directory (non-persistent but works without special config)
fallback_path = os.path.join(current_dir, "data")
os.makedirs(fallback_path, exist_ok=True)
print(f"Using local storage (non-persistent): {fallback_path}")
print("Note: To enable persistent storage, configure it in HuggingFace Space settings")
return fallback_path
def main():
"""Main entry point for HuggingFace Space"""
# Check for DEBUG_UI mode (skip model initialization for UI development)
debug_ui = os.environ.get("DEBUG_UI", "").lower() in ("1", "true", "yes")
if debug_ui:
print("=" * 60)
print("DEBUG_UI mode enabled - skipping model initialization")
print("UI will be fully functional but generation is disabled")
print("=" * 60)
# Log ZeroGPU detection
if IS_ZEROGPU:
print("=" * 60)
print("ZeroGPU environment detected")
print("- Using spaces package for GPU allocation")
print("- PyTorch backend forced for LLM (nano-vllm incompatible)")
print("- GPU will be allocated on-demand during generation")
print("=" * 60)
# Get persistent storage path (auto-detect)
persistent_storage_path = get_persistent_storage_path()
# Detect GPU memory for auto-configuration
# Note: In ZeroGPU, GPU may not be available during startup, so this may return 0
gpu_memory_gb = get_gpu_memory_gb()
# For ZeroGPU, we don't need CPU offload as GPU is allocated dynamically
if IS_ZEROGPU:
auto_offload = False
print("ZeroGPU: CPU offload disabled (GPU allocated on-demand)")
else:
auto_offload = gpu_memory_gb > 0 and gpu_memory_gb < 16
if not debug_ui and not IS_ZEROGPU:
if auto_offload:
print(f"Detected GPU memory: {gpu_memory_gb:.2f} GB (< 16GB)")
print("Auto-enabling CPU offload to reduce GPU memory usage")
elif gpu_memory_gb > 0:
print(f"Detected GPU memory: {gpu_memory_gb:.2f} GB (>= 16GB)")
print("CPU offload disabled by default")
else:
print("No GPU detected, running on CPU")
# Create handler instances
print("Creating handlers...")
dit_handler = AceStepHandler(persistent_storage_path=persistent_storage_path)
llm_handler = LLMHandler(persistent_storage_path=persistent_storage_path)
dataset_handler = DatasetHandler()
# Service mode configuration from environment variables
config_path = os.environ.get(
"SERVICE_MODE_DIT_MODEL",
"acestep-v15-xl-turbo"
)
# Second DiT model - default to turbo-shift3 for two-model setup
config_path_2 = os.environ.get("SERVICE_MODE_DIT_MODEL_2", "acestep-v15-turbo").strip()
lm_model_path = os.environ.get(
"SERVICE_MODE_LM_MODEL",
"acestep-5Hz-lm-1.7B"
)
# For ZeroGPU, force PyTorch backend (nano-vllm uses direct CUDA APIs)
if IS_ZEROGPU:
backend = "pt"
else:
backend = os.environ.get("SERVICE_MODE_BACKEND", "vllm")
device = "auto"
print(f"Service mode configuration:")
print(f" DiT model 1: {config_path}")
if config_path_2:
print(f" DiT model 2: {config_path_2}")
print(f" LM model: {lm_model_path}")
print(f" Backend: {backend}")
print(f" Offload to CPU: {auto_offload}")
print(f" DEBUG_UI: {debug_ui}")
print(f" ZeroGPU: {IS_ZEROGPU}")
# Determine flash attention availability
use_flash_attention = dit_handler.is_flash_attention_available()
print(f" Flash Attention: {use_flash_attention}")
# Initialize models (skip in DEBUG_UI mode)
init_status = ""
enable_generate = False
dit_handler_2 = None
if debug_ui:
# In DEBUG_UI mode, skip all model initialization
init_status = "⚠️ DEBUG_UI mode - models not loaded\nUI is functional but generation is disabled"
enable_generate = False
print("Skipping model initialization (DEBUG_UI mode)")
else:
# Initialize primary DiT model
print(f"Initializing DiT model 1: {config_path}...")
init_status, enable_generate = dit_handler.initialize_service(
project_root=current_dir,
config_path=config_path,
device=device,
use_flash_attention=use_flash_attention,
compile_model=False,
offload_to_cpu=auto_offload,
offload_dit_to_cpu=False
)
if not enable_generate:
print(f"Warning: DiT model 1 initialization issue: {init_status}", file=sys.stderr)
else:
print("DiT model 1 initialized successfully")
# Initialize second DiT model if configured
if config_path_2:
print(f"Initializing DiT model 2: {config_path_2}...")
dit_handler_2 = AceStepHandler(persistent_storage_path=persistent_storage_path)
# Share VAE, text_encoder, and silence_latent from the first handler to save memory
init_status_2, enable_generate_2 = dit_handler_2.initialize_service(
project_root=current_dir,
config_path=config_path_2,
device=device,
use_flash_attention=use_flash_attention,
compile_model=False,
offload_to_cpu=auto_offload,
offload_dit_to_cpu=False,
# Share components from first handler
shared_vae=dit_handler.vae,
shared_text_encoder=dit_handler.text_encoder,
shared_text_tokenizer=dit_handler.text_tokenizer,
shared_silence_latent=dit_handler.silence_latent,
)
if not enable_generate_2:
print(f"Warning: DiT model 2 initialization issue: {init_status_2}", file=sys.stderr)
init_status += f"\n⚠️ DiT model 2 failed: {init_status_2}"
else:
print("DiT model 2 initialized successfully")
init_status += f"\n✅ DiT model 2: {config_path_2}"
# Initialize LM model
checkpoint_dir = dit_handler._get_checkpoint_dir()
print(f"Initializing 5Hz LM: {lm_model_path}...")
lm_status, lm_success = llm_handler.initialize(
checkpoint_dir=checkpoint_dir,
lm_model_path=lm_model_path,
backend=backend,
device=device,
offload_to_cpu=auto_offload,
dtype=dit_handler.dtype
)
if lm_success:
print("5Hz LM initialized successfully")
init_status += f"\n{lm_status}"
else:
print(f"Warning: 5Hz LM initialization failed: {lm_status}", file=sys.stderr)
init_status += f"\n{lm_status}"
# Build available models list for UI
available_dit_models = [config_path]
if config_path_2 and dit_handler_2 is not None:
available_dit_models.append(config_path_2)
# Prepare initialization parameters for UI
init_params = {
'pre_initialized': True,
'service_mode': True,
'checkpoint': None,
'config_path': config_path,
'config_path_2': config_path_2 if config_path_2 else None,
'device': device,
'init_llm': True,
'lm_model_path': lm_model_path,
'backend': backend,
'use_flash_attention': use_flash_attention,
'offload_to_cpu': auto_offload,
'offload_dit_to_cpu': False,
'init_status': init_status,
'enable_generate': enable_generate,
'dit_handler': dit_handler,
'dit_handler_2': dit_handler_2,
'available_dit_models': available_dit_models,
'llm_handler': llm_handler,
'language': 'en',
'persistent_storage_path': persistent_storage_path,
'debug_ui': debug_ui,
}
print("Service initialization completed!")
# Create Gradio interface with pre-initialized handlers
print("Creating Gradio interface...")
demo = create_gradio_interface(
dit_handler,
llm_handler,
dataset_handler,
init_params=init_params,
language='en'
)
# پیاده‌سازی سرویس اختصاصی آپلود اشتراکی برای اشتراک‌گذاری بی‌نقص فایل‌های صوتی بین کانتینرهای ZeroGPU
from fastapi import UploadFile, File
import shutil
@demo.app.post("/custom_upload")
async def custom_upload(file: UploadFile = File(...)):
# ساخت دایرکتوری در حافظه مشترک و دائم پروژه
uploads_dir = os.path.join(current_dir, "data", "shared_uploads")
os.makedirs(uploads_dir, exist_ok=True)
# ذخیره‌سازی مطمئن فایل صوتی
file_path = os.path.join(uploads_dir, file.filename)
with open(file_path, "wb") as buffer:
shutil.copyfileobj(file.file, buffer)
return {"path": file_path}
# فعال کردن پویا و مستقیم دکمه‌های دانلود برای تمام کامپوننت‌های صوتی
for component in demo.blocks.values():
if isinstance(component, gr.Audio):
component.buttons = ["play", "download"]
# Enable queue for multi-user support
print("Enabling queue for multi-user support...")
demo.queue(max_size=20)
# Launch
print("Launching server on 0.0.0.0:7860...")
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=False,
show_error=True,
)
if __name__ == "__main__":
main()