Spaces:

WeReCooking
/

ACE-Step-CPU

Running

App Files Files Community

ACE-Step-CPU / app.py

Nekochu

update defaults: LR 3e-4, rank 32, alpha 2x rank (per Side-Step author)

04c031f about 1 month ago

raw

history blame

29.4 kB

	"""ACE-Step 1.5 XL (CPU) - Gradio frontend + CLI for ace-server GGUF inference"""

	import os
	import sys
	import time
	import json
	import argparse
	import tempfile
	import subprocess
	import shutil
	import requests
	import logging

	from train_engine import (
	preprocess_audio,
	train_lora_generator,
	cancel_training,
	get_trained_loras as _get_trained_loras_engine,
	)

	logger = logging.getLogger(__name__)

	# ---------------------------------------------------------------------------
	# Configurable limits (edit here, not buried in code)
	# ---------------------------------------------------------------------------

	MAX_AUDIO_DURATION = 240 # seconds, cap per audio file for training
	MAX_TRAINING_TIME = 28800 # 8 hours hard training timeout (seconds)
	MAX_AUDIO_FILES = 50 # max number of training audio files per run

	# ---------------------------------------------------------------------------
	# Paths & constants
	# ---------------------------------------------------------------------------

	ACE_SERVER = os.environ.get("ACE_SERVER", "http://127.0.0.1:8085")
	OUTPUT_DIR = os.environ.get("ACE_OUTPUT_DIR", "/app/outputs")
	os.makedirs(OUTPUT_DIR, exist_ok=True)

	ACE_CHECKPOINT_DIR = os.environ.get("ACE_CHECKPOINT_DIR", "/app/checkpoints")
	ACE_SOURCE_DIR = "/app/ace-step-source"
	ACE_HF_MODEL = "ACE-Step/Ace-Step1.5"
	ADAPTER_DIR = os.environ.get("ACE_ADAPTER_DIR", "/app/adapters")
	MODELS_DIR = os.environ.get("ACE_MODELS_DIR", "/app/models")

	ACE_SERVER_BIN = "/app/ace-server"

	# HF repo for on-demand GGUF downloads
	GGUF_HF_REPO = "Serveurperso/ACE-Step-1.5-GGUF"

	# ---------------------------------------------------------------------------
	# ace-server helpers
	# ---------------------------------------------------------------------------

	def _server_ok():
	try:
	return requests.get(f"{ACE_SERVER}/health", timeout=5).status_code == 200
	except Exception:
	return False


	def _get_props():
	"""Fetch server properties (models, adapters)."""
	try:
	r = requests.get(f"{ACE_SERVER}/props", timeout=10)
	if r.status_code == 200:
	return r.json()
	except Exception:
	pass
	return {}


	def _poll_job(job_id, timeout=600, progress_cb=None):
	"""Poll a job until done/error/timeout. Returns (status, elapsed)."""
	t0 = time.time()
	while time.time() - t0 < timeout:
	try:
	r = requests.get(f"{ACE_SERVER}/job", params={"id": job_id}, timeout=10)
	data = r.json()
	status = data.get("status", "unknown")
	if progress_cb:
	progress_cb(status, data)
	if status in ("done", "error"):
	return status, time.time() - t0
	except Exception:
	pass
	time.sleep(2)
	return "timeout", time.time() - t0


	def _fetch_result(job_id, timeout=60):
	"""Fetch result bytes/json for a completed job."""
	r = requests.get(
	f"{ACE_SERVER}/job",
	params={"id": job_id, "result": 1},
	timeout=timeout,
	)
	return r


	def _run_pipeline(caption, lyrics, bpm, duration, seed, steps, output_format,
	adapter=None, lm_model=None, progress_cb=None):
	"""Run full LM -> synth pipeline. Returns (audio_path, status_msg) or raises."""
	t0 = time.time()

	# -- Build LM request --
	req = {"caption": caption or "upbeat electronic dance music"}
	req["lyrics"] = lyrics if lyrics and lyrics.strip() else "[Instrumental]"

	if bpm and int(bpm) > 0:
	req["bpm"] = int(bpm)
	if duration and float(duration) > 0:
	req["duration"] = min(float(duration), 300)
	if seed is not None and int(seed) >= 0:
	req["seed"] = int(seed)
	if steps and int(steps) > 0:
	req["inference_steps"] = int(steps)
	if adapter:
	req["adapter"] = adapter
	if lm_model:
	req["model"] = lm_model

	fmt = output_format if output_format in ("wav", "mp3") else "mp3"
	synth_fmt = "wav16" if fmt == "wav" else "mp3"
	suffix = f".{fmt}"

	# -- LM phase --
	if progress_cb:
	progress_cb("lm_submit", None)
	r = requests.post(f"{ACE_SERVER}/lm", json=req, timeout=30)
	if r.status_code != 200:
	raise RuntimeError(f"LM submit failed: {r.status_code} {r.text}")
	lm_job_id = r.json().get("id")

	if progress_cb:
	progress_cb("lm_poll", {"job_id": lm_job_id})
	lm_status, lm_elapsed = _poll_job(lm_job_id, timeout=900)
	if lm_status != "done":
	raise RuntimeError(f"LM {lm_status} after {lm_elapsed:.0f}s")

	# Fetch LM result
	r = _fetch_result(lm_job_id)
	lm_results = r.json()
	if not isinstance(lm_results, list) or len(lm_results) == 0:
	raise RuntimeError(f"LM returned no results: {lm_results}")
	synth_request = lm_results[0]

	# -- Synth phase --
	synth_request["output_format"] = synth_fmt
	if adapter:
	synth_request["adapter"] = adapter
	synth_request["synth_model"] = "acestep-v15-turbo-Q4_K_M.gguf"
	if progress_cb:
	progress_cb("synth_submit", None)
	r = requests.post(f"{ACE_SERVER}/synth", json=synth_request, timeout=30)
	if r.status_code != 200:
	raise RuntimeError(f"Synth submit failed: {r.status_code} {r.text}")
	synth_job_id = r.json().get("id")

	if progress_cb:
	progress_cb("synth_poll", {"job_id": synth_job_id})
	synth_status, synth_elapsed = _poll_job(synth_job_id, timeout=600)
	if synth_status != "done":
	raise RuntimeError(f"Synth {synth_status} after {synth_elapsed:.0f}s")

	# Fetch audio
	if progress_cb:
	progress_cb("fetch", None)
	r = _fetch_result(synth_job_id, timeout=60)
	if r.status_code != 200:
	raise RuntimeError(f"Audio fetch failed: {r.status_code}")

	tmp = tempfile.NamedTemporaryFile(suffix=suffix, dir=OUTPUT_DIR, delete=False)
	tmp.write(r.content)
	tmp.close()

	elapsed = time.time() - t0
	msg = f"Done in {elapsed:.0f}s \| {duration}s audio, {steps} steps, {fmt}"
	return tmp.name, msg


	# ---------------------------------------------------------------------------
	# LM model scanning & on-demand download
	# ---------------------------------------------------------------------------

	DEFAULT_LM = "acestep-5Hz-lm-1.7B-Q8_0.gguf"

	AVAILABLE_LM_MODELS = [
	"acestep-5Hz-lm-1.7B-Q8_0.gguf",
	"acestep-5Hz-lm-0.6B-Q8_0.gguf",
	"acestep-5Hz-lm-4B-Q5_K_M.gguf",
	]

	def _scan_lm_models():
	"""Return LM model choices. Installed shown as-is, others need download."""
	installed = set()
	if os.path.isdir(MODELS_DIR):
	for f in os.listdir(MODELS_DIR):
	if "-lm-" in f and f.endswith(".gguf"):
	installed.add(f)
	choices = []
	for m in AVAILABLE_LM_MODELS:
	if m in installed:
	choices.append(m)
	else:
	choices.append(f"{m} [not installed]")
	return choices


	def _download_lm_model(filename):
	"""Download a GGUF LM model from HF if not already present."""
	dest = os.path.join(MODELS_DIR, filename)
	if os.path.isfile(dest):
	return dest
	try:
	from huggingface_hub import hf_hub_download
	path = hf_hub_download(
	repo_id=GGUF_HF_REPO,
	filename=filename,
	local_dir=MODELS_DIR,
	)
	return path
	except Exception as exc:
	logger.error("Failed to download %s: %s", filename, exc)
	return None


	# ---------------------------------------------------------------------------
	# LoRA listing for UI dropdowns
	# ---------------------------------------------------------------------------

	def _list_lora_choices():
	"""Return list of LoRA choices for dropdown, including 'None'."""
	choices = ["None (no LoRA)"]
	if os.path.isdir(ADAPTER_DIR):
	for d in os.listdir(ADAPTER_DIR):
	if os.path.isdir(os.path.join(ADAPTER_DIR, d)):
	choices.append(d)
	return choices


	# ---------------------------------------------------------------------------
	# ace-server stop/start helpers
	# ---------------------------------------------------------------------------

	_ace_proc = None

	def _stop_ace_server():
	"""Stop ace-server process."""
	global _ace_proc
	logger.info("[ace-server] Stopping...")
	if _ace_proc and _ace_proc.poll() is None:
	_ace_proc.terminate()
	try:
	_ace_proc.wait(timeout=10)
	except subprocess.TimeoutExpired:
	_ace_proc.kill()
	_ace_proc = None
	logger.info("[ace-server] Stopped (tracked PID)")
	else:
	try:
	subprocess.run(["pkill", "ace-server"],
	stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL,
	timeout=10)
	logger.info("[ace-server] Stopped (pkill)")
	except Exception:
	pass
	time.sleep(1)


	def _start_ace_server():
	"""Start ace-server in background and wait for health."""
	global _ace_proc
	logger.info("[ace-server] Starting with --adapters %s", ADAPTER_DIR)
	try:
	_ace_proc = subprocess.Popen(
	[ACE_SERVER_BIN, "--host", "127.0.0.1", "--port", "8085",
	"--models", MODELS_DIR, "--adapters", ADAPTER_DIR, "--max-batch", "1"],
	)
	except Exception as exc:
	logger.error("[ace-server] Failed to start: %s", exc)
	return False

	for _ in range(30):
	if _server_ok():
	logger.info("[ace-server] Healthy")
	return True
	time.sleep(2)
	logger.error("[ace-server] Health check timeout")
	return False


	# ---------------------------------------------------------------------------
	# CLI mode
	# ---------------------------------------------------------------------------

	def cli_main():
	parser = argparse.ArgumentParser(
	description="ACE-Step 1.5 XL (CPU) - CLI inference via ace-server",
	)
	parser.add_argument("caption", nargs="?", default="upbeat electronic dance music",
	help="Music description / caption")
	parser.add_argument("--lyrics", "-l", default="[Instrumental]",
	help="Lyrics text (use '[Instrumental]' for no vocals)")
	parser.add_argument("--bpm", type=int, default=120, help="Beats per minute")
	parser.add_argument("--duration", "-d", type=float, default=10,
	help="Duration in seconds (max 300)")
	parser.add_argument("--steps", "-s", type=int, default=8,
	help="Inference steps (1-32)")
	parser.add_argument("--seed", type=int, default=-1,
	help="Random seed (-1 for random)")
	parser.add_argument("--format", "-f", choices=["wav", "mp3"], default="wav",
	help="Output audio format")
	parser.add_argument("--adapter", "-a", default=None,
	help="LoRA adapter name")
	parser.add_argument("-o", "--output", default=None,
	help="Output file path (default: auto in outputs dir)")
	parser.add_argument("--server", default=None,
	help="ace-server URL (default: http://127.0.0.1:8085)")

	args = parser.parse_args()

	if args.server:
	global ACE_SERVER
	ACE_SERVER = args.server

	if not _server_ok():
	print(f"ERROR: ace-server not reachable at {ACE_SERVER}", file=sys.stderr)
	sys.exit(1)

	seed = args.seed if args.seed >= 0 else None

	def cli_progress(phase, data):
	phases = {
	"lm_submit": "Submitting LM job...",
	"lm_poll": f"LM generating (job {data['job_id']})..." if data else "LM generating...",
	"synth_submit": "Submitting synth job...",
	"synth_poll": f"Synthesizing (job {data['job_id']})..." if data else "Synthesizing...",
	"fetch": "Fetching audio...",
	}
	msg = phases.get(phase, phase)
	print(f" [{phase}] {msg}")

	print(f"ACE-Step CLI \| caption: {args.caption}")
	print(f" lyrics: {args.lyrics} \| bpm: {args.bpm} \| duration: {args.duration}s "
	f"\| steps: {args.steps} \| seed: {args.seed} \| format: {args.format}")

	try:
	audio_path, status = _run_pipeline(
	caption=args.caption,
	lyrics=args.lyrics,
	bpm=args.bpm,
	duration=args.duration,
	seed=seed,
	steps=args.steps,
	output_format=args.format,
	adapter=args.adapter,
	progress_cb=cli_progress,
	)
	except RuntimeError as e:
	print(f"ERROR: {e}", file=sys.stderr)
	sys.exit(1)

	# Move to requested output path if specified
	if args.output:
	out_dir = os.path.dirname(os.path.abspath(args.output))
	os.makedirs(out_dir, exist_ok=True)
	shutil.move(audio_path, args.output)
	audio_path = args.output

	print(f" {status}")
	print(f" Output: {audio_path}")


	# ---------------------------------------------------------------------------
	# Gradio UI mode
	# ---------------------------------------------------------------------------

	def gradio_main():
	import gradio as gr
	import gc

	# -- Persistent training log buffer (survives across yields) --
	_train_log_lines = []

	# -- Generate tab handler --
	def generate_music(caption, lyrics, instrumental, bpm, duration, seed,
	steps, lora_select, lm_model_select,
	progress=gr.Progress(track_tqdm=True)):
	if not _server_ok():
	return None, "ace-server not running. Check logs."

	if instrumental or not lyrics or lyrics.strip() == "":
	lyrics = "[Instrumental]"

	actual_seed = None if seed is None or int(seed) < 0 else int(seed)
	adapter = None if lora_select == "None (no LoRA)" else lora_select
	lm_model_file = lm_model_select.replace(" [not installed]", "") if lm_model_select else None
	if lm_model_file and "[not installed]" in (lm_model_select or ""):
	_download_lm_model(lm_model_file)
	lm_model = lm_model_file

	progress_map = {
	"lm_submit": (0.05, "Submitting LM job..."),
	"lm_poll": (0.10, "LM generating..."),
	"synth_submit": (0.40, "Submitting synth job..."),
	"synth_poll": (0.50, "Synthesizing audio..."),
	"fetch": (0.90, "Fetching audio..."),
	}

	def gr_progress(phase, data):
	pct, desc = progress_map.get(phase, (0.5, phase))
	if data and "job_id" in data:
	desc += f" (job {data['job_id']})"
	progress(pct, desc=desc)

	try:
	audio_path, status = _run_pipeline(
	caption=caption,
	lyrics=lyrics,
	bpm=bpm,
	duration=duration,
	seed=actual_seed,
	steps=steps,
	output_format="mp3",
	adapter=adapter,
	lm_model=lm_model,
	progress_cb=gr_progress,
	)
	return audio_path, status
	except RuntimeError as e:
	return None, str(e)
	except Exception as e:
	return None, f"Unexpected error: {e}"

	# -- Server info helper --
	def get_server_status():
	if not _server_ok():
	return "ace-server: OFFLINE"
	props = _get_props()
	lines = ["ace-server: ONLINE"]
	if props:
	lines.append(json.dumps(props, indent=2))
	return "\n".join(lines)

	# -- Training generator (direct integration, no subprocess) --
	def train_lora_ui(audio_files, lora_name, epochs, lr, rank):
	"""Generator that yields (train_log, train_btn_update, cancel_btn_update)."""
	import gc as _gc

	_train_log_lines.clear()
	train_start = time.time()

	def _log(msg):
	_train_log_lines.append(msg)

	def _log_text():
	return "\n".join(_train_log_lines)

	# -- Validation --
	if not audio_files:
	_log("[FAIL] No audio files uploaded.")
	yield _log_text(), gr.update(visible=True), gr.update(visible=False)
	return

	if len(audio_files) > MAX_AUDIO_FILES:
	_log(f"[FAIL] Too many files ({len(audio_files)}). Max: {MAX_AUDIO_FILES}")
	yield _log_text(), gr.update(visible=True), gr.update(visible=False)
	return

	lora_name = (lora_name or "").strip() or "my-lora"
	# Sanitize: alphanumeric, dash, underscore only
	lora_name = "".join(c if c.isalnum() or c in "-_" else "-" for c in lora_name)

	epochs = max(1, min(int(epochs), 10))
	lr = float(lr)
	rank = max(1, min(int(rank), 64))

	work_dir = os.path.join(OUTPUT_DIR, "train_workspace", lora_name)
	os.makedirs(work_dir, exist_ok=True)
	audio_dir = os.path.join(work_dir, "audio_input")
	os.makedirs(audio_dir, exist_ok=True)
	adapter_out = os.path.join(ADAPTER_DIR, lora_name)
	os.makedirs(adapter_out, exist_ok=True)

	# Copy uploaded audio files
	_log(f"[INFO] Preparing {len(audio_files)} audio files...")
	yield _log_text(), gr.update(visible=False), gr.update(visible=True)

	for f in audio_files:
	src = f.name if hasattr(f, "name") else str(f)
	shutil.copy2(src, os.path.join(audio_dir, os.path.basename(src)))

	_log(f"[INFO] LoRA: '{lora_name}' \| Files: {len(audio_files)} \| "
	f"Epochs: {epochs} \| LR: {lr} \| Rank: {rank}")
	yield _log_text(), gr.update(visible=False), gr.update(visible=True)

	# Stop ace-server before training (frees memory)
	_log("[INFO] Stopping ace-server for training...")
	yield _log_text(), gr.update(visible=False), gr.update(visible=True)
	_stop_ace_server()
	_gc.collect()

	try:
	# -- Phase 1: Preprocessing --
	_log("[Step 1/2] Preprocessing audio...")
	yield _log_text(), gr.update(visible=False), gr.update(visible=True)

	preprocessed_dir = os.path.join(work_dir, "preprocessed_tensors")

	def preprocess_progress(current, total, desc):
	_log(f" {desc} ({current}/{total})")

	result = preprocess_audio(
	audio_dir=audio_dir,
	output_dir=preprocessed_dir,
	checkpoint_dir=ACE_CHECKPOINT_DIR,
	device="cpu",
	variant="turbo",
	max_duration=float(MAX_AUDIO_DURATION),
	progress_callback=preprocess_progress,
	cancel_check=lambda: False,
	)
	yield _log_text(), gr.update(visible=False), gr.update(visible=True)

	processed = result.get("processed", 0)
	failed = result.get("failed", 0)
	total = result.get("total", 0)
	_log(f"[OK] Preprocessed: {processed}/{total} (failed: {failed})")
	yield _log_text(), gr.update(visible=False), gr.update(visible=True)

	if processed == 0:
	_log("[FAIL] No files preprocessed successfully. Cannot train.")
	yield _log_text(), gr.update(visible=True), gr.update(visible=False)
	return

	_gc.collect()

	# -- Phase 2: Training --
	_log("[Step 2/2] Training LoRA...")
	yield _log_text(), gr.update(visible=False), gr.update(visible=True)

	for msg in train_lora_generator(
	dataset_dir=preprocessed_dir,
	output_dir=adapter_out,
	checkpoint_dir=ACE_CHECKPOINT_DIR,
	epochs=epochs,
	lr=lr,
	rank=rank,
	alpha=rank * 2,
	dropout=0.0,
	batch_size=1,
	gradient_accumulation_steps=4,
	warmup_steps=100,
	weight_decay=0.01,
	max_grad_norm=1.0,
	save_every_n_epochs=max(1, epochs // 2),
	seed=42,
	variant="turbo",
	device="cpu",
	log_every=5,
	):
	# Timeout check
	elapsed = time.time() - train_start
	if elapsed > MAX_TRAINING_TIME:
	_log(f"[WARN] Training timed out after {int(elapsed)}s")
	cancel_training()
	break

	_log(msg)
	yield _log_text(), gr.update(visible=False), gr.update(visible=True)

	if msg.strip() == "[DONE]":
	break

	_log(f"[INFO] Total time: {time.time() - train_start:.0f}s")
	yield _log_text(), gr.update(visible=False), gr.update(visible=True)

	except Exception as exc:
	_log(f"[FAIL] Training error: {exc}")
	import traceback
	_log(traceback.format_exc())
	yield _log_text(), gr.update(visible=True), gr.update(visible=False)

	finally:
	# Always restart ace-server
	_log("[INFO] Restarting ace-server...")
	yield _log_text(), gr.update(visible=False), gr.update(visible=True)
	_gc.collect()
	ok = _start_ace_server()
	if ok:
	_log("[OK] ace-server restarted successfully")
	else:
	_log("[WARN] ace-server may not have restarted -- check logs")
	yield _log_text(), gr.update(visible=True), gr.update(visible=False)

	# -- Cancel handler --
	def _on_cancel():
	cancel_training()
	logger.info("Cancel requested by user")
	return "Cancelling after current epoch... please wait"

	# -- Check log handler --
	def _check_log():
	if _train_log_lines:
	return "\n".join(_train_log_lines)
	return "No training log available."

	# -- Build LM model choices --
	def _lm_model_choices():
	return _scan_lm_models()

	# -- Build UI --
	CSS = """
	.compact-row { gap: 8px !important; }
	.status-box textarea { font-family: monospace; font-size: 13px; }
	"""

	with gr.Blocks(title="ACE-Step 1.5 XL (CPU)", css=CSS) as demo:

	with gr.Tabs():
	# ============================================================
	# Tab 1: Generate Music
	# ============================================================
	with gr.Tab("Generate Music"):
	gr.Markdown(
	"[ACE-Step 1.5 XL (CPU)](https://github.com/ace-step/ACE-Step-1.5) "
	"GGUF Q4_K_M via "
	"[acestep.cpp](https://github.com/ServeurpersoCom/acestep.cpp)"
	)

	with gr.Row(elem_classes="compact-row"):
	with gr.Column(scale=2):
	caption = gr.Textbox(
	label="Music Description",
	lines=2,
	value="upbeat electronic dance music, energetic synth leads",
	)
	lyrics = gr.Textbox(
	label="Lyrics",
	lines=3,
	value="[Instrumental]",
	placeholder="Enter lyrics or [Instrumental] for no vocals",
	)
	with gr.Column(scale=1):
	audio_out = gr.Audio(label="Output", type="filepath")
	status = gr.Textbox(
	label="Status",
	interactive=False,
	lines=2,
	elem_classes="status-box",
	)

	with gr.Row(elem_classes="compact-row"):
	instrumental = gr.Checkbox(label="Instrumental", value=True, scale=1)
	bpm = gr.Number(label="BPM", value=120, minimum=0, maximum=300, scale=1)
	duration = gr.Slider(
	label="Duration (s)", minimum=10, maximum=120,
	value=10, step=5, scale=1,
	)
	steps = gr.Slider(
	label="Steps", minimum=1, maximum=32,
	value=8, step=1, scale=1,
	)
	seed = gr.Number(label="Seed (-1=random)", value=-1, scale=1)

	with gr.Row(elem_classes="compact-row"):
	lora_select = gr.Dropdown(
	label="LoRA", choices=_list_lora_choices(),
	value="None (no LoRA)", scale=1,
	allow_custom_value=True,
	)
	lm_model_select = gr.Dropdown(
	label="LM Model", choices=_lm_model_choices(),
	value=DEFAULT_LM, scale=1,
	)

	with gr.Row(elem_classes="compact-row"):
	gen_btn = gr.Button("Generate Music", variant="primary", scale=2)
	status_btn = gr.Button("Server Status", scale=1)

	gen_btn.click(
	fn=generate_music,
	inputs=[caption, lyrics, instrumental, bpm, duration,
	seed, steps, lora_select, lm_model_select],
	outputs=[audio_out, status],
	api_name="generate",
	)

	status_btn.click(
	fn=get_server_status,
	inputs=[],
	outputs=[status],
	api_name="server_status",
	)

	# ============================================================
	# Tab 2: Train LoRA
	# ============================================================
	with gr.Tab("Train LoRA"):
	gr.Markdown(
	"### LoRA Training\n"
	"Fine-tune ACE-Step on your audio. "
	"CPU training is slow -- ace-server stops during training."
	)

	with gr.Row(elem_classes="compact-row"):
	with gr.Column(scale=2):
	train_audio = gr.File(
	label="Training Audio Files",
	file_count="multiple",
	file_types=["audio"],
	)
	with gr.Column(scale=1):
	lora_name = gr.Textbox(label="LoRA Name", value="my-lora")
	train_epochs = gr.Slider(
	label="Epochs", minimum=1, maximum=1000,
	value=3, step=1,
	)
	train_lr = gr.Number(label="Learning Rate", value=3e-4)
	train_rank = gr.Slider(
	label="Rank (r)", minimum=1, maximum=128,
	value=32, step=1,
	)

	with gr.Row(elem_classes="compact-row"):
	train_btn = gr.Button("Train", variant="primary", scale=2)
	cancel_btn = gr.Button("Cancel Training", variant="stop", visible=False, scale=1)
	log_btn = gr.Button("Check Log", scale=1)

	train_log = gr.Textbox(
	label="Training Log",
	interactive=False,
	lines=12,
	elem_classes="status-box",
	)

	# Training generator -- yields (log, train_btn, cancel_btn)
	train_event = train_btn.click(
	train_lora_ui,
	inputs=[train_audio, lora_name, train_epochs, train_lr, train_rank],
	outputs=[train_log, train_btn, cancel_btn],
	api_name="train_lora",
	concurrency_limit=1,
	)

	# After training completes, restore buttons and refresh LoRA dropdown
	def _post_training():
	return (
	gr.update(visible=True),
	gr.update(visible=False),
	gr.update(choices=_list_lora_choices()),
	)

	train_event.then(
	_post_training,
	outputs=[train_btn, cancel_btn, lora_select],
	)

	# Cancel: set the flag, update status
	cancel_btn.click(
	_on_cancel,
	outputs=[train_log],
	)

	# Check log: show last training output
	log_btn.click(
	_check_log,
	outputs=[train_log],
	api_name="check_log",
	)

	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	mcp_server=True,
	)


	# ---------------------------------------------------------------------------
	# Entry point
	# ---------------------------------------------------------------------------

	if __name__ == "__main__":
	# If any CLI arguments besides the script name, run CLI mode
	# (Gradio sets no extra args; start.sh calls `python3 /app/app.py`)
	if len(sys.argv) > 1:
	cli_main()
	else:
	gradio_main()