Ace-Step-v1.5

Paused

App Files Files Community

Ace-Step-v1.5 / app.py

ChuxiJ

set default_concurrency_limit & set bs=2 no interactive

e19bc36 3 months ago

raw

history blame contribute delete

11.1 kB

	"""
	ACE-Step v1.5 - HuggingFace Space Entry Point

	This file serves as the entry point for HuggingFace Space deployment.
	It initializes the service and launches the Gradio interface.
	"""
	import os
	import sys

	# Get current directory (app.py location)
	current_dir = os.path.dirname(os.path.abspath(__file__))

	# Add nano-vllm to Python path (local package)
	nano_vllm_path = os.path.join(current_dir, "acestep", "third_parts", "nano-vllm")
	if os.path.exists(nano_vllm_path):
	sys.path.insert(0, nano_vllm_path)

	# Disable Gradio analytics
	os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"

	# Clear proxy settings that may affect Gradio
	for proxy_var in ['http_proxy', 'https_proxy', 'HTTP_PROXY', 'HTTPS_PROXY', 'ALL_PROXY']:
	os.environ.pop(proxy_var, None)

	import torch
	from acestep.handler import AceStepHandler
	from acestep.llm_inference import LLMHandler
	from acestep.dataset_handler import DatasetHandler
	from acestep.gradio_ui import create_gradio_interface


	def get_gpu_memory_gb():
	"""
	Get GPU memory in GB. Returns 0 if no GPU is available.
	"""
	try:
	if torch.cuda.is_available():
	total_memory = torch.cuda.get_device_properties(0).total_memory
	memory_gb = total_memory / (1024**3)
	return memory_gb
	else:
	return 0
	except Exception as e:
	print(f"Warning: Failed to detect GPU memory: {e}", file=sys.stderr)
	return 0


	def get_persistent_storage_path():
	"""
	Detect and return a writable persistent storage path.

	HuggingFace Space persistent storage requirements:
	1. Must be enabled in Space settings
	2. Path is typically /data for Docker SDK
	3. Falls back to app directory if /data is not writable

	Local development:
	- Set CHECKPOINT_DIR environment variable to use local checkpoints
	Example: CHECKPOINT_DIR=/path/to/checkpoints python app.py
	The path should be the parent directory of 'checkpoints' folder
	"""
	# Check for local checkpoint directory override (for development)
	checkpoint_dir_override = os.environ.get("CHECKPOINT_DIR")
	if checkpoint_dir_override:
	# If user specifies the checkpoints folder directly, use its parent
	if checkpoint_dir_override.endswith("/checkpoints") or checkpoint_dir_override.endswith("\\checkpoints"):
	checkpoint_dir_override = os.path.dirname(checkpoint_dir_override)
	if os.path.exists(checkpoint_dir_override):
	print(f"Using local checkpoint directory (CHECKPOINT_DIR): {checkpoint_dir_override}")
	return checkpoint_dir_override
	else:
	print(f"Warning: CHECKPOINT_DIR path does not exist: {checkpoint_dir_override}")

	# Try HuggingFace Space persistent storage first
	hf_data_path = "/data"

	# Check if /data exists and is writable
	if os.path.exists(hf_data_path):
	try:
	test_file = os.path.join(hf_data_path, ".write_test")
	with open(test_file, 'w') as f:
	f.write("test")
	os.remove(test_file)
	print(f"Using HuggingFace persistent storage: {hf_data_path}")
	return hf_data_path
	except (PermissionError, OSError) as e:
	print(f"Warning: /data exists but is not writable: {e}")

	# Fall back to app directory (non-persistent but works without special config)
	fallback_path = os.path.join(current_dir, "data")
	os.makedirs(fallback_path, exist_ok=True)
	print(f"Using local storage (non-persistent): {fallback_path}")
	print("Note: To enable persistent storage, configure it in HuggingFace Space settings")
	return fallback_path


	def main():
	"""Main entry point for HuggingFace Space"""

	# Check for DEBUG_UI mode (skip model initialization for UI development)
	debug_ui = os.environ.get("DEBUG_UI", "").lower() in ("1", "true", "yes")
	if debug_ui:
	print("=" * 60)
	print("DEBUG_UI mode enabled - skipping model initialization")
	print("UI will be fully functional but generation is disabled")
	print("=" * 60)

	# Get persistent storage path (auto-detect)
	persistent_storage_path = get_persistent_storage_path()

	# Detect GPU memory for auto-configuration
	gpu_memory_gb = get_gpu_memory_gb()
	auto_offload = gpu_memory_gb > 0 and gpu_memory_gb < 16

	if not debug_ui:
	if auto_offload:
	print(f"Detected GPU memory: {gpu_memory_gb:.2f} GB (< 16GB)")
	print("Auto-enabling CPU offload to reduce GPU memory usage")
	elif gpu_memory_gb > 0:
	print(f"Detected GPU memory: {gpu_memory_gb:.2f} GB (>= 16GB)")
	print("CPU offload disabled by default")
	else:
	print("No GPU detected, running on CPU")

	# Create handler instances
	print("Creating handlers...")
	dit_handler = AceStepHandler(persistent_storage_path=persistent_storage_path)
	llm_handler = LLMHandler(persistent_storage_path=persistent_storage_path)
	dataset_handler = DatasetHandler()

	# Service mode configuration from environment variables
	config_path = os.environ.get(
	"SERVICE_MODE_DIT_MODEL",
	"acestep-v15-turbo"
	)
	# Second DiT model - default to turbo-shift3 for two-model setup
	config_path_2 = os.environ.get("SERVICE_MODE_DIT_MODEL_2", "acestep-v15-turbo-shift3").strip()

	lm_model_path = os.environ.get(
	"SERVICE_MODE_LM_MODEL",
	"acestep-5Hz-lm-1.7B"
	)
	backend = os.environ.get("SERVICE_MODE_BACKEND", "vllm")
	device = "auto"

	print(f"Service mode configuration:")
	print(f" DiT model 1: {config_path}")
	if config_path_2:
	print(f" DiT model 2: {config_path_2}")
	print(f" LM model: {lm_model_path}")
	print(f" Backend: {backend}")
	print(f" Offload to CPU: {auto_offload}")
	print(f" DEBUG_UI: {debug_ui}")

	# Determine flash attention availability
	use_flash_attention = dit_handler.is_flash_attention_available()
	print(f" Flash Attention: {use_flash_attention}")

	# Initialize models (skip in DEBUG_UI mode)
	init_status = ""
	enable_generate = False
	dit_handler_2 = None

	if debug_ui:
	# In DEBUG_UI mode, skip all model initialization
	init_status = "⚠️ DEBUG_UI mode - models not loaded\nUI is functional but generation is disabled"
	enable_generate = False
	print("Skipping model initialization (DEBUG_UI mode)")
	else:
	# Initialize primary DiT model
	print(f"Initializing DiT model 1: {config_path}...")
	init_status, enable_generate = dit_handler.initialize_service(
	project_root=current_dir,
	config_path=config_path,
	device=device,
	use_flash_attention=use_flash_attention,
	compile_model=False,
	offload_to_cpu=auto_offload,
	offload_dit_to_cpu=False
	)

	if not enable_generate:
	print(f"Warning: DiT model 1 initialization issue: {init_status}", file=sys.stderr)
	else:
	print("DiT model 1 initialized successfully")

	# Initialize second DiT model if configured
	if config_path_2:
	print(f"Initializing DiT model 2: {config_path_2}...")
	dit_handler_2 = AceStepHandler(persistent_storage_path=persistent_storage_path)

	# Share VAE, text_encoder, and silence_latent from the first handler to save memory
	init_status_2, enable_generate_2 = dit_handler_2.initialize_service(
	project_root=current_dir,
	config_path=config_path_2,
	device=device,
	use_flash_attention=use_flash_attention,
	compile_model=False,
	offload_to_cpu=auto_offload,
	offload_dit_to_cpu=False,
	# Share components from first handler
	shared_vae=dit_handler.vae,
	shared_text_encoder=dit_handler.text_encoder,
	shared_text_tokenizer=dit_handler.text_tokenizer,
	shared_silence_latent=dit_handler.silence_latent,
	)

	if not enable_generate_2:
	print(f"Warning: DiT model 2 initialization issue: {init_status_2}", file=sys.stderr)
	init_status += f"\n⚠️ DiT model 2 failed: {init_status_2}"
	else:
	print("DiT model 2 initialized successfully")
	init_status += f"\n✅ DiT model 2: {config_path_2}"

	# Initialize LM model
	checkpoint_dir = dit_handler._get_checkpoint_dir()
	print(f"Initializing 5Hz LM: {lm_model_path}...")
	lm_status, lm_success = llm_handler.initialize(
	checkpoint_dir=checkpoint_dir,
	lm_model_path=lm_model_path,
	backend=backend,
	device=device,
	offload_to_cpu=auto_offload,
	dtype=dit_handler.dtype
	)

	if lm_success:
	print("5Hz LM initialized successfully")
	init_status += f"\n{lm_status}"
	else:
	print(f"Warning: 5Hz LM initialization failed: {lm_status}", file=sys.stderr)
	init_status += f"\n{lm_status}"

	# Build available models list for UI
	available_dit_models = [config_path]
	if config_path_2 and dit_handler_2 is not None:
	available_dit_models.append(config_path_2)

	# Prepare initialization parameters for UI
	init_params = {
	'pre_initialized': True,
	'service_mode': True,
	'checkpoint': None,
	'config_path': config_path,
	'config_path_2': config_path_2 if config_path_2 else None,
	'device': device,
	'init_llm': True,
	'lm_model_path': lm_model_path,
	'backend': backend,
	'use_flash_attention': use_flash_attention,
	'offload_to_cpu': auto_offload,
	'offload_dit_to_cpu': False,
	'init_status': init_status,
	'enable_generate': enable_generate,
	'dit_handler': dit_handler,
	'dit_handler_2': dit_handler_2,
	'available_dit_models': available_dit_models,
	'llm_handler': llm_handler,
	'language': 'en',
	'persistent_storage_path': persistent_storage_path,
	'debug_ui': debug_ui,
	}

	print("Service initialization completed!")

	# Create Gradio interface with pre-initialized handlers
	print("Creating Gradio interface...")
	demo = create_gradio_interface(
	dit_handler,
	llm_handler,
	dataset_handler,
	init_params=init_params,
	language='en'
	)

	# Enable queue for multi-user support
	print("Enabling queue for multi-user support...")
	demo.queue(max_size=20, default_concurrency_limit=1)

	# Launch
	print("Launching server on 0.0.0.0:7860...")
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False,
	show_error=True,
	)


	if __name__ == "__main__":
	main()