Ace-Step-v1.5

Running on Zero

App Files Files Community

Ace-Step-v1.5 / app.py

Opera8

Update app.py

ea53be2 verified 12 days ago

Raw

History Blame Contribute Delete

19.8 kB

	"""
	ACE-Step v1.5 - HuggingFace Space Entry Point
	This file serves as the entry point for HuggingFace Space deployment.
	It initializes the service and launches the Gradio interface.
	ZeroGPU Support:
	- ZeroGPU uses the 'spaces' package to intercept CUDA operations
	- Models are loaded to "cuda" during startup but actual GPU allocation is deferred
	- Handlers are registered globally so forked processes inherit them without pickling
	- @spaces.GPU decorators are on top-level Gradio event handlers, not internal functions
	- nano-vllm uses direct CUDA APIs that bypass spaces interception, so we use PyTorch backend
	"""
	import os
	import sys

	# Get current directory (app.py location)
	current_dir = os.path.dirname(os.path.abspath(__file__))

	# Add nano-vllm to Python path (local package)
	nano_vllm_path = os.path.join(current_dir, "acestep", "third_parts", "nano-vllm")
	if os.path.exists(nano_vllm_path):
	sys.path.insert(0, nano_vllm_path)

	# Disable Gradio analytics
	os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"

	# Clear proxy settings that may affect Gradio
	for proxy_var in ['http_proxy', 'https_proxy', 'HTTP_PROXY', 'HTTPS_PROXY', 'ALL_PROXY']:
	os.environ.pop(proxy_var, None)

	# Import spaces for ZeroGPU support (must be imported before torch for proper interception)
	# This is a no-op if not running on HuggingFace Spaces
	try:
	import spaces
	HAS_SPACES = True
	except ImportError:
	HAS_SPACES = False

	import torch
	from acestep.handler import AceStepHandler
	from acestep.llm_inference import LLMHandler
	from acestep.dataset_handler import DatasetHandler
	from acestep.gradio_ui import create_gradio_interface

	# پچ فوق‌العاده برای استخراج خودکار مسیر متنی و تبدیل خودکار فرمت‌های غیر استاندارد گوشی (مانند m4a) به wav استاندارد
	original_load_audio_file = AceStepHandler._load_audio_file
	import subprocess
	import uuid

	def convert_to_standard_wav(audio_file_path: str) -> str:
	"""
	مبدل صوتی پویا: فایل‌های غیر استاندارد را به فرمت استودیویی WAV تبدیل می‌کند تا با تضمین ۱۰۰٪ لود شوند
	"""
	if not audio_file_path or not os.path.exists(audio_file_path):
	return audio_file_path

	ext = os.path.splitext(audio_file_path)[1].lower()
	# لیست فرمت‌های رایج گوشی که نیاز به تبدیل خودکار به فرمت استاندارد wav دارند
	if ext in [".m4a", ".aac", ".3gp", ".amr", ".webm", ".ogg", ".opus", ".mp4"]:
	logger_name = "Auto-Convert"
	print(f"[{logger_name}] Detected format {ext}. Converting {audio_file_path} to standard WAV...")
	try:
	uploads_dir = os.path.join(current_dir, "data", "shared_uploads")
	os.makedirs(uploads_dir, exist_ok=True)

	temp_wav = os.path.join(uploads_dir, f"converted_{uuid.uuid4().hex}.wav")

	# فرآیند تبدیل بلادرنگ و سریع با ابزار سیستمی ffmpeg
	cmd = [
	"ffmpeg", "-y",
	"-i", audio_file_path,
	"-ac", "2",
	"-ar", "48000",
	"-acodec", "pcm_s16le",
	temp_wav
	]

	subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
	print(f"[{logger_name}] Converted successfully to: {temp_wav}")
	return temp_wav
	except Exception as e:
	print(f"[{logger_name}] Failed to convert: {e}", file=sys.stderr)
	return audio_file_path

	def patched_load_audio_file(self, audio_file):
	if isinstance(audio_file, list) and len(audio_file) > 0:
	audio_file = audio_file[0]
	if isinstance(audio_file, dict):
	if "path" in audio_file and audio_file["path"]:
	audio_file = audio_file["path"]
	elif "value" in audio_file and audio_file["value"]:
	audio_file = audio_file["value"]

	# تبدیل خودکار فرمت‌های موبایل قبل از تحویل به موتور دکودر
	if isinstance(audio_file, str):
	audio_file = convert_to_standard_wav(audio_file)

	return original_load_audio_file(self, audio_file)

	AceStepHandler._load_audio_file = patched_load_audio_file

	# پچ هوشمند برای جلوگیری از کرش تفکیک صدا در خودِ اسپیس و صادر کردن خطای صریح در صورت عدم پردازش فایل صوتی مرجع
	original_prepare_batch = AceStepHandler._prepare_batch

	def patched_prepare_batch(self, args, *kwargs):
	args_list = list(args)

	# اگر کاربر فایل صوتی مرجع فرستاده ولی پردازش نشده و مقدار NoneType است، خطای واضح صادر می‌کنیم تا برنامه متوقف شود
	# و از تولید خروجی بی‌صدا یا پیش‌فرض جلوگیری به عمل آید.
	if "refer_audios" in kwargs and kwargs["refer_audios"] is None:
	raise ValueError("The uploaded Reference Audio could not be processed. Please make sure the audio file is valid, has sound, and is not corrupted.")
	elif len(args_list) > 5 and args_list[5] is None:
	raise ValueError("The uploaded Reference Audio could not be processed. Please make sure the audio file is valid, has sound, and is not corrupted.")

	return original_prepare_batch(self, args_list, *kwargs)

	AceStepHandler._prepare_batch = patched_prepare_batch

	# پچ هوشمند و پویا برای پیدا کردن و تصحیح فیلد تعداد تولید در تمام مراحل اجرای متدها
	import acestep.gradio_ui.events.results_handlers as res_h
	original_gen = res_h.generate_with_batch_management

	def patched_generate_with_batch_management(args, *kwargs):
	args_list = list(args)
	if len(args_list) > 14:
	if args_list[14] is None or (isinstance(args_list[14], (int, float)) and args_list[14] < 1):
	args_list[14] = 1
	if "batch_size_input" in kwargs and (kwargs["batch_size_input"] is None or kwargs["batch_size_input"] < 1):
	kwargs["batch_size_input"] = 1

	generator = original_gen(args_list, *kwargs)
	for partial_result in generator:
	if isinstance(partial_result, tuple):
	modified_result = list(partial_result)
	for i, item in enumerate(modified_result):
	if isinstance(item, dict) and "batch_size_input" in item:
	if item["batch_size_input"] is None or item["batch_size_input"] < 1:
	item["batch_size_input"] = 1
	yield tuple(modified_result)
	else:
	yield partial_result

	res_h.generate_with_batch_management = patched_generate_with_batch_management

	# پچ هوشمند برای حذف محدودیت قفل مرورگر در فیلدهای عددی با مقدار حداقل ۱
	import gradio as gr
	original_number_init = gr.Number.__init__
	def patched_number_init(self, args, *kwargs):
	if "minimum" in kwargs and kwargs["minimum"] == 1:
	kwargs["minimum"] = None # رفع کلی مشکل فیلدهای با محدودیت حداقل ۱ عددی
	original_number_init(self, args, *kwargs)
	gr.Number.__init__ = patched_number_init

	# اعمال پچ اصلاحی برای غیرفعال کردن Flash Attention ناسازگار روی ZeroGPU و استفاده از موتور پایدار sdpa
	AceStepHandler.is_flash_attention_available = lambda self: False
	AceStepHandler.is_flash_attn3_available = lambda self: False
	AceStepHandler.get_best_attn_implementation = lambda self: "sdpa"

	LLMHandler.is_flash_attention_available = lambda self: False
	LLMHandler.is_flash_attn3_available = lambda self: False
	LLMHandler.get_best_attn_implementation = lambda self: "sdpa"

	# Detect ZeroGPU environment
	IS_HUGGINGFACE_SPACE = os.environ.get("SPACE_ID") is not None
	# ZeroGPU detection: check env var OR assume ZeroGPU for all HF Spaces (safer default)
	# The SPACE_HARDWARE env var is unreliable, so we assume ZeroGPU if on HF Space
	IS_ZEROGPU = IS_HUGGINGFACE_SPACE or os.environ.get("ZEROGPU") is not None


	def get_gpu_memory_gb():
	"""
	Get GPU memory in GB. Returns 0 if no GPU is available.
	"""
	try:
	if torch.cuda.is_available():
	total_memory = torch.cuda.get_device_properties(0).total_memory
	memory_gb = total_memory / (1024**3)
	return memory_gb
	else:
	return 0
	except Exception as e:
	print(f"Warning: Failed to detect GPU memory: {e}", file=sys.stderr)
	return 0


	def get_persistent_storage_path():
	"""
	Detect and return a writable persistent storage path.
	"""
	# Check for local checkpoint directory override (for development)
	checkpoint_dir_override = os.environ.get("CHECKPOINT_DIR")
	if checkpoint_dir_override:
	if checkpoint_dir_override.endswith("/checkpoints") or checkpoint_dir_override.endswith("\\checkpoints"):
	checkpoint_dir_override = os.path.dirname(checkpoint_dir_override)
	if os.path.exists(checkpoint_dir_override):
	print(f"Using local checkpoint directory (CHECKPOINT_DIR): {checkpoint_dir_override}")
	return checkpoint_dir_override
	else:
	print(f"Warning: CHECKPOINT_DIR path does not exist: {checkpoint_dir_override}")

	# Try HuggingFace Space persistent storage first
	hf_data_path = "/data"

	# Check if /data exists and is writable
	if os.path.exists(hf_data_path):
	try:
	test_file = os.path.join(hf_data_path, ".write_test")
	with open(test_file, 'w') as f:
	f.write("test")
	os.remove(test_file)
	print(f"Using HuggingFace persistent storage: {hf_data_path}")
	return hf_data_path
	except (PermissionError, OSError) as e:
	print(f"Warning: /data exists but is not writable: {e}")

	# Fall back to app directory (non-persistent but works without special config)
	fallback_path = os.path.join(current_dir, "data")
	os.makedirs(fallback_path, exist_ok=True)
	print(f"Using local storage (non-persistent): {fallback_path}")
	print("Note: To enable persistent storage, configure it in HuggingFace Space settings")
	return fallback_path


	def main():
	"""Main entry point for HuggingFace Space"""

	# Check for DEBUG_UI mode (skip model initialization for UI development)
	debug_ui = os.environ.get("DEBUG_UI", "").lower() in ("1", "true", "yes")
	if debug_ui:
	print("=" * 60)
	print("DEBUG_UI mode enabled - skipping model initialization")
	print("UI will be fully functional but generation is disabled")
	print("=" * 60)

	# Log ZeroGPU detection
	if IS_ZEROGPU:
	print("=" * 60)
	print("ZeroGPU environment detected")
	print("- Using spaces package for GPU allocation")
	print("- PyTorch backend forced for LLM (nano-vllm incompatible)")
	print("- GPU will be allocated on-demand during generation")
	print("=" * 60)

	# Get persistent storage path (auto-detect)
	persistent_storage_path = get_persistent_storage_path()

	# Detect GPU memory for auto-configuration
	# Note: In ZeroGPU, GPU may not be available during startup, so this may return 0
	gpu_memory_gb = get_gpu_memory_gb()

	# For ZeroGPU, we don't need CPU offload as GPU is allocated dynamically
	if IS_ZEROGPU:
	auto_offload = False
	print("ZeroGPU: CPU offload disabled (GPU allocated on-demand)")
	else:
	auto_offload = gpu_memory_gb > 0 and gpu_memory_gb < 16

	if not debug_ui and not IS_ZEROGPU:
	if auto_offload:
	print(f"Detected GPU memory: {gpu_memory_gb:.2f} GB (< 16GB)")
	print("Auto-enabling CPU offload to reduce GPU memory usage")
	elif gpu_memory_gb > 0:
	print(f"Detected GPU memory: {gpu_memory_gb:.2f} GB (>= 16GB)")
	print("CPU offload disabled by default")
	else:
	print("No GPU detected, running on CPU")

	# Create handler instances
	print("Creating handlers...")
	dit_handler = AceStepHandler(persistent_storage_path=persistent_storage_path)
	llm_handler = LLMHandler(persistent_storage_path=persistent_storage_path)
	dataset_handler = DatasetHandler()

	# Service mode configuration from environment variables
	config_path = os.environ.get(
	"SERVICE_MODE_DIT_MODEL",
	"acestep-v15-xl-turbo"
	)
	# Second DiT model - default to turbo-shift3 for two-model setup
	config_path_2 = os.environ.get("SERVICE_MODE_DIT_MODEL_2", "acestep-v15-turbo").strip()

	lm_model_path = os.environ.get(
	"SERVICE_MODE_LM_MODEL",
	"acestep-5Hz-lm-1.7B"
	)
	# For ZeroGPU, force PyTorch backend (nano-vllm uses direct CUDA APIs)
	if IS_ZEROGPU:
	backend = "pt"
	else:
	backend = os.environ.get("SERVICE_MODE_BACKEND", "vllm")
	device = "auto"

	print(f"Service mode configuration:")
	print(f" DiT model 1: {config_path}")
	if config_path_2:
	print(f" DiT model 2: {config_path_2}")
	print(f" LM model: {lm_model_path}")
	print(f" Backend: {backend}")
	print(f" Offload to CPU: {auto_offload}")
	print(f" DEBUG_UI: {debug_ui}")
	print(f" ZeroGPU: {IS_ZEROGPU}")

	# Determine flash attention availability
	use_flash_attention = dit_handler.is_flash_attention_available()
	print(f" Flash Attention: {use_flash_attention}")

	# Initialize models (skip in DEBUG_UI mode)
	init_status = ""
	enable_generate = False
	dit_handler_2 = None

	if debug_ui:
	# In DEBUG_UI mode, skip all model initialization
	init_status = "⚠️ DEBUG_UI mode - models not loaded\nUI is functional but generation is disabled"
	enable_generate = False
	print("Skipping model initialization (DEBUG_UI mode)")
	else:
	# Initialize primary DiT model
	print(f"Initializing DiT model 1: {config_path}...")
	init_status, enable_generate = dit_handler.initialize_service(
	project_root=current_dir,
	config_path=config_path,
	device=device,
	use_flash_attention=use_flash_attention,
	compile_model=False,
	offload_to_cpu=auto_offload,
	offload_dit_to_cpu=False
	)

	if not enable_generate:
	print(f"Warning: DiT model 1 initialization issue: {init_status}", file=sys.stderr)
	else:
	print("DiT model 1 initialized successfully")

	# Initialize second DiT model if configured
	if config_path_2:
	print(f"Initializing DiT model 2: {config_path_2}...")
	dit_handler_2 = AceStepHandler(persistent_storage_path=persistent_storage_path)

	# Share VAE, text_encoder, and silence_latent from the first handler to save memory
	init_status_2, enable_generate_2 = dit_handler_2.initialize_service(
	project_root=current_dir,
	config_path=config_path_2,
	device=device,
	use_flash_attention=use_flash_attention,
	compile_model=False,
	offload_to_cpu=auto_offload,
	offload_dit_to_cpu=False,
	# Share components from first handler
	shared_vae=dit_handler.vae,
	shared_text_encoder=dit_handler.text_encoder,
	shared_text_tokenizer=dit_handler.text_tokenizer,
	shared_silence_latent=dit_handler.silence_latent,
	)

	if not enable_generate_2:
	print(f"Warning: DiT model 2 initialization issue: {init_status_2}", file=sys.stderr)
	init_status += f"\n⚠️ DiT model 2 failed: {init_status_2}"
	else:
	print("DiT model 2 initialized successfully")
	init_status += f"\n✅ DiT model 2: {config_path_2}"

	# Initialize LM model
	checkpoint_dir = dit_handler._get_checkpoint_dir()
	print(f"Initializing 5Hz LM: {lm_model_path}...")
	lm_status, lm_success = llm_handler.initialize(
	checkpoint_dir=checkpoint_dir,
	lm_model_path=lm_model_path,
	backend=backend,
	device=device,
	offload_to_cpu=auto_offload,
	dtype=dit_handler.dtype
	)

	if lm_success:
	print("5Hz LM initialized successfully")
	init_status += f"\n{lm_status}"
	else:
	print(f"Warning: 5Hz LM initialization failed: {lm_status}", file=sys.stderr)
	init_status += f"\n{lm_status}"

	# Build available models list for UI
	available_dit_models = [config_path]
	if config_path_2 and dit_handler_2 is not None:
	available_dit_models.append(config_path_2)

	# Prepare initialization parameters for UI
	init_params = {
	'pre_initialized': True,
	'service_mode': True,
	'checkpoint': None,
	'config_path': config_path,
	'config_path_2': config_path_2 if config_path_2 else None,
	'device': device,
	'init_llm': True,
	'lm_model_path': lm_model_path,
	'backend': backend,
	'use_flash_attention': use_flash_attention,
	'offload_to_cpu': auto_offload,
	'offload_dit_to_cpu': False,
	'init_status': init_status,
	'enable_generate': enable_generate,
	'dit_handler': dit_handler,
	'dit_handler_2': dit_handler_2,
	'available_dit_models': available_dit_models,
	'llm_handler': llm_handler,
	'language': 'en',
	'persistent_storage_path': persistent_storage_path,
	'debug_ui': debug_ui,
	}

	print("Service initialization completed!")

	# Create Gradio interface with pre-initialized handlers
	print("Creating Gradio interface...")
	demo = create_gradio_interface(
	dit_handler,
	llm_handler,
	dataset_handler,
	init_params=init_params,
	language='en'
	)

	# پیاده‌سازی سرویس اختصاصی آپلود اشتراکی برای اشتراک‌گذاری بی‌نقص فایل‌های صوتی بین کانتینرهای ZeroGPU
	from fastapi import UploadFile, File
	import shutil

	@demo.app.post("/custom_upload")
	async def custom_upload(file: UploadFile = File(...)):
	# ساخت دایرکتوری در حافظه مشترک و دائم پروژه
	uploads_dir = os.path.join(current_dir, "data", "shared_uploads")
	os.makedirs(uploads_dir, exist_ok=True)

	# ذخیره‌سازی مطمئن فایل صوتی
	file_path = os.path.join(uploads_dir, file.filename)
	with open(file_path, "wb") as buffer:
	shutil.copyfileobj(file.file, buffer)

	return {"path": file_path}

	# فعال کردن پویا و مستقیم دکمه‌های دانلود برای تمام کامپوننت‌های صوتی
	for component in demo.blocks.values():
	if isinstance(component, gr.Audio):
	component.buttons = ["play", "download"]

	# Enable queue for multi-user support
	print("Enabling queue for multi-user support...")
	demo.queue(max_size=20)

	# Launch
	print("Launching server on 0.0.0.0:7860...")
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False,
	show_error=True,
	)


	if __name__ == "__main__":
	main()