Ace-Step-v1.5 / app.py
ChuxiJ's picture
set default_concurrency_limit & set bs=2 no interactive
e19bc36
"""
ACE-Step v1.5 - HuggingFace Space Entry Point
This file serves as the entry point for HuggingFace Space deployment.
It initializes the service and launches the Gradio interface.
"""
import os
import sys
# Get current directory (app.py location)
current_dir = os.path.dirname(os.path.abspath(__file__))
# Add nano-vllm to Python path (local package)
nano_vllm_path = os.path.join(current_dir, "acestep", "third_parts", "nano-vllm")
if os.path.exists(nano_vllm_path):
sys.path.insert(0, nano_vllm_path)
# Disable Gradio analytics
os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
# Clear proxy settings that may affect Gradio
for proxy_var in ['http_proxy', 'https_proxy', 'HTTP_PROXY', 'HTTPS_PROXY', 'ALL_PROXY']:
os.environ.pop(proxy_var, None)
import torch
from acestep.handler import AceStepHandler
from acestep.llm_inference import LLMHandler
from acestep.dataset_handler import DatasetHandler
from acestep.gradio_ui import create_gradio_interface
def get_gpu_memory_gb():
"""
Get GPU memory in GB. Returns 0 if no GPU is available.
"""
try:
if torch.cuda.is_available():
total_memory = torch.cuda.get_device_properties(0).total_memory
memory_gb = total_memory / (1024**3)
return memory_gb
else:
return 0
except Exception as e:
print(f"Warning: Failed to detect GPU memory: {e}", file=sys.stderr)
return 0
def get_persistent_storage_path():
"""
Detect and return a writable persistent storage path.
HuggingFace Space persistent storage requirements:
1. Must be enabled in Space settings
2. Path is typically /data for Docker SDK
3. Falls back to app directory if /data is not writable
Local development:
- Set CHECKPOINT_DIR environment variable to use local checkpoints
Example: CHECKPOINT_DIR=/path/to/checkpoints python app.py
The path should be the parent directory of 'checkpoints' folder
"""
# Check for local checkpoint directory override (for development)
checkpoint_dir_override = os.environ.get("CHECKPOINT_DIR")
if checkpoint_dir_override:
# If user specifies the checkpoints folder directly, use its parent
if checkpoint_dir_override.endswith("/checkpoints") or checkpoint_dir_override.endswith("\\checkpoints"):
checkpoint_dir_override = os.path.dirname(checkpoint_dir_override)
if os.path.exists(checkpoint_dir_override):
print(f"Using local checkpoint directory (CHECKPOINT_DIR): {checkpoint_dir_override}")
return checkpoint_dir_override
else:
print(f"Warning: CHECKPOINT_DIR path does not exist: {checkpoint_dir_override}")
# Try HuggingFace Space persistent storage first
hf_data_path = "/data"
# Check if /data exists and is writable
if os.path.exists(hf_data_path):
try:
test_file = os.path.join(hf_data_path, ".write_test")
with open(test_file, 'w') as f:
f.write("test")
os.remove(test_file)
print(f"Using HuggingFace persistent storage: {hf_data_path}")
return hf_data_path
except (PermissionError, OSError) as e:
print(f"Warning: /data exists but is not writable: {e}")
# Fall back to app directory (non-persistent but works without special config)
fallback_path = os.path.join(current_dir, "data")
os.makedirs(fallback_path, exist_ok=True)
print(f"Using local storage (non-persistent): {fallback_path}")
print("Note: To enable persistent storage, configure it in HuggingFace Space settings")
return fallback_path
def main():
"""Main entry point for HuggingFace Space"""
# Check for DEBUG_UI mode (skip model initialization for UI development)
debug_ui = os.environ.get("DEBUG_UI", "").lower() in ("1", "true", "yes")
if debug_ui:
print("=" * 60)
print("DEBUG_UI mode enabled - skipping model initialization")
print("UI will be fully functional but generation is disabled")
print("=" * 60)
# Get persistent storage path (auto-detect)
persistent_storage_path = get_persistent_storage_path()
# Detect GPU memory for auto-configuration
gpu_memory_gb = get_gpu_memory_gb()
auto_offload = gpu_memory_gb > 0 and gpu_memory_gb < 16
if not debug_ui:
if auto_offload:
print(f"Detected GPU memory: {gpu_memory_gb:.2f} GB (< 16GB)")
print("Auto-enabling CPU offload to reduce GPU memory usage")
elif gpu_memory_gb > 0:
print(f"Detected GPU memory: {gpu_memory_gb:.2f} GB (>= 16GB)")
print("CPU offload disabled by default")
else:
print("No GPU detected, running on CPU")
# Create handler instances
print("Creating handlers...")
dit_handler = AceStepHandler(persistent_storage_path=persistent_storage_path)
llm_handler = LLMHandler(persistent_storage_path=persistent_storage_path)
dataset_handler = DatasetHandler()
# Service mode configuration from environment variables
config_path = os.environ.get(
"SERVICE_MODE_DIT_MODEL",
"acestep-v15-turbo"
)
# Second DiT model - default to turbo-shift3 for two-model setup
config_path_2 = os.environ.get("SERVICE_MODE_DIT_MODEL_2", "acestep-v15-turbo-shift3").strip()
lm_model_path = os.environ.get(
"SERVICE_MODE_LM_MODEL",
"acestep-5Hz-lm-1.7B"
)
backend = os.environ.get("SERVICE_MODE_BACKEND", "vllm")
device = "auto"
print(f"Service mode configuration:")
print(f" DiT model 1: {config_path}")
if config_path_2:
print(f" DiT model 2: {config_path_2}")
print(f" LM model: {lm_model_path}")
print(f" Backend: {backend}")
print(f" Offload to CPU: {auto_offload}")
print(f" DEBUG_UI: {debug_ui}")
# Determine flash attention availability
use_flash_attention = dit_handler.is_flash_attention_available()
print(f" Flash Attention: {use_flash_attention}")
# Initialize models (skip in DEBUG_UI mode)
init_status = ""
enable_generate = False
dit_handler_2 = None
if debug_ui:
# In DEBUG_UI mode, skip all model initialization
init_status = "⚠️ DEBUG_UI mode - models not loaded\nUI is functional but generation is disabled"
enable_generate = False
print("Skipping model initialization (DEBUG_UI mode)")
else:
# Initialize primary DiT model
print(f"Initializing DiT model 1: {config_path}...")
init_status, enable_generate = dit_handler.initialize_service(
project_root=current_dir,
config_path=config_path,
device=device,
use_flash_attention=use_flash_attention,
compile_model=False,
offload_to_cpu=auto_offload,
offload_dit_to_cpu=False
)
if not enable_generate:
print(f"Warning: DiT model 1 initialization issue: {init_status}", file=sys.stderr)
else:
print("DiT model 1 initialized successfully")
# Initialize second DiT model if configured
if config_path_2:
print(f"Initializing DiT model 2: {config_path_2}...")
dit_handler_2 = AceStepHandler(persistent_storage_path=persistent_storage_path)
# Share VAE, text_encoder, and silence_latent from the first handler to save memory
init_status_2, enable_generate_2 = dit_handler_2.initialize_service(
project_root=current_dir,
config_path=config_path_2,
device=device,
use_flash_attention=use_flash_attention,
compile_model=False,
offload_to_cpu=auto_offload,
offload_dit_to_cpu=False,
# Share components from first handler
shared_vae=dit_handler.vae,
shared_text_encoder=dit_handler.text_encoder,
shared_text_tokenizer=dit_handler.text_tokenizer,
shared_silence_latent=dit_handler.silence_latent,
)
if not enable_generate_2:
print(f"Warning: DiT model 2 initialization issue: {init_status_2}", file=sys.stderr)
init_status += f"\n⚠️ DiT model 2 failed: {init_status_2}"
else:
print("DiT model 2 initialized successfully")
init_status += f"\n✅ DiT model 2: {config_path_2}"
# Initialize LM model
checkpoint_dir = dit_handler._get_checkpoint_dir()
print(f"Initializing 5Hz LM: {lm_model_path}...")
lm_status, lm_success = llm_handler.initialize(
checkpoint_dir=checkpoint_dir,
lm_model_path=lm_model_path,
backend=backend,
device=device,
offload_to_cpu=auto_offload,
dtype=dit_handler.dtype
)
if lm_success:
print("5Hz LM initialized successfully")
init_status += f"\n{lm_status}"
else:
print(f"Warning: 5Hz LM initialization failed: {lm_status}", file=sys.stderr)
init_status += f"\n{lm_status}"
# Build available models list for UI
available_dit_models = [config_path]
if config_path_2 and dit_handler_2 is not None:
available_dit_models.append(config_path_2)
# Prepare initialization parameters for UI
init_params = {
'pre_initialized': True,
'service_mode': True,
'checkpoint': None,
'config_path': config_path,
'config_path_2': config_path_2 if config_path_2 else None,
'device': device,
'init_llm': True,
'lm_model_path': lm_model_path,
'backend': backend,
'use_flash_attention': use_flash_attention,
'offload_to_cpu': auto_offload,
'offload_dit_to_cpu': False,
'init_status': init_status,
'enable_generate': enable_generate,
'dit_handler': dit_handler,
'dit_handler_2': dit_handler_2,
'available_dit_models': available_dit_models,
'llm_handler': llm_handler,
'language': 'en',
'persistent_storage_path': persistent_storage_path,
'debug_ui': debug_ui,
}
print("Service initialization completed!")
# Create Gradio interface with pre-initialized handlers
print("Creating Gradio interface...")
demo = create_gradio_interface(
dit_handler,
llm_handler,
dataset_handler,
init_params=init_params,
language='en'
)
# Enable queue for multi-user support
print("Enabling queue for multi-user support...")
demo.queue(max_size=20, default_concurrency_limit=1)
# Launch
print("Launching server on 0.0.0.0:7860...")
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=False,
show_error=True,
)
if __name__ == "__main__":
main()