Spaces:

varunm2004
/

Dataset-auto

Running

App Files Files Community

Dataset-auto / app.py

varunm2004

Update app.py

ed9dbbe verified 7 days ago

raw

history blame contribute delete

34.6 kB

	"""
	app.py — WAN 2.1 Dataset Creator (HuggingFace Spaces Edition)
	Gradio-powered UI for preparing video + caption datasets for WAN 2.1 LoRA training.

	Tabs:
	1. 🎬 Video Ingest — Upload, trim, validate source videos
	2. ✍️ Caption Studio — Write / template-build captions per clip
	3. ✅ Validator — Check pairs, naming, frame counts
	4. 📦 Export & Handoff — Final dataset summary + zip download

	Differences from Colab version:
	- No Google Drive sync (not available on HF Spaces)
	- Export produces a downloadable .zip instead of Drive copy
	- FFmpeg auto-installed at startup if not present
	- All paths relative to /tmp/dataset_builder (HF writable space)
	- share=False, server_name="0.0.0.0" for Spaces compatibility
	"""

	import os
	import sys
	import glob
	import json
	import shutil
	import warnings
	import subprocess
	import tempfile
	import zipfile
	from pathlib import Path
	from datetime import datetime

	# ── Auto-install FFmpeg on HuggingFace Spaces ─────────────────────────────────
	def _ensure_ffmpeg():
	try:
	subprocess.run(["ffmpeg", "-version"], capture_output=True, check=True)
	except (FileNotFoundError, subprocess.CalledProcessError):
	print("⚙️ FFmpeg not found — installing via apt-get...")
	subprocess.run(["apt-get", "update", "-qq"], capture_output=True)
	subprocess.run(["apt-get", "install", "-y", "-qq", "ffmpeg"], capture_output=True)
	print("✅ FFmpeg installed.")

	_ensure_ffmpeg()

	import gradio as gr

	# ── Path Setup (HF Spaces uses /tmp for writable storage) ─────────────────────
	BASE_DIR = "/tmp/dataset_builder"
	VIDEO_DIR = os.path.join(BASE_DIR, "videos")
	CAPTION_DIR = os.path.join(BASE_DIR, "captions")
	EXPORT_DIR = os.path.join(BASE_DIR, "exports")

	for d in [VIDEO_DIR, CAPTION_DIR, EXPORT_DIR]:
	os.makedirs(d, exist_ok=True)


	# ═════════════════════════════════════════════════════════════════════════════
	# HELPERS
	# ═════════════════════════════════════════════════════════════════════════════

	def _probe_video(path: str) -> dict:
	"""Use ffprobe to get video metadata."""
	try:
	cmd = [
	"ffprobe", "-v", "quiet", "-print_format", "json",
	"-show_streams", "-show_format", path
	]
	result = subprocess.run(cmd, capture_output=True, text=True, timeout=15)
	data = json.loads(result.stdout)
	vstream = next((s for s in data.get("streams", []) if s.get("codec_type") == "video"), {})
	duration = float(data.get("format", {}).get("duration", 0))

	fps_raw = vstream.get("r_frame_rate", "0/1")
	num, den = fps_raw.split("/")
	fps = round(float(num) / float(den), 2) if float(den) else 0

	w = int(vstream.get("width", 0))
	h = int(vstream.get("height", 0))
	frames = int(vstream.get("nb_frames", 0)) or int(duration * fps)

	return {"duration": round(duration, 2), "fps": fps, "width": w,
	"height": h, "frames": frames, "ok": True}
	except Exception as e:
	return {"duration": 0, "fps": 0, "width": 0, "height": 0,
	"frames": 0, "ok": False, "error": str(e)}


	def _sanitize_name(name: str) -> str:
	"""Lowercase, replace non-alphanumeric chars with underscores."""
	import re
	name = os.path.splitext(name)[0]
	name = name.lower()
	name = re.sub(r"[^a-z0-9_]", "_", name)
	name = re.sub(r"_+", "_", name).strip("_")
	return name


	def _get_all_videos() -> list:
	"""Return list of dicts for every video in VIDEO_DIR."""
	videos = []
	for f in sorted(glob.glob(os.path.join(VIDEO_DIR, "*.mp4"))):
	stem = Path(f).stem
	cap_path = os.path.join(CAPTION_DIR, f"{stem}.txt")
	caption = open(cap_path).read().strip() if os.path.exists(cap_path) else ""
	meta = _probe_video(f)
	videos.append({
	"stem": stem,
	"video_path": f,
	"caption_path": cap_path,
	"has_caption": os.path.exists(cap_path),
	"caption": caption,
	**meta,
	})
	return videos


	def _validation_issues(v: dict) -> list:
	issues = []
	if not v["has_caption"]: issues.append("❌ Missing caption file")
	if v["duration"] < 1: issues.append("⚠️ Duration < 1s (too short)")
	if v["duration"] > 10: issues.append("⚠️ Duration > 10s (trim recommended)")
	if v["frames"] < 8: issues.append("❌ Fewer than 8 frames")
	if v["fps"] < 18: issues.append("⚠️ Low FPS (< 18)")
	if v["width"] < 640: issues.append("⚠️ Resolution below 720p")
	if v["caption"] and len(v["caption"]) < 20:
	issues.append("⚠️ Caption very short (< 20 chars)")
	return issues


	# ═════════════════════════════════════════════════════════════════════════════
	# TAB 1 — VIDEO INGEST
	# ═════════════════════════════════════════════════════════════════════════════

	def ingest_videos(files):
	if not files:
	return "No files selected.", video_gallery_md()

	log = []
	for f in files:
	raw_name = os.path.basename(f.name)
	stem = _sanitize_name(raw_name)
	dest_name = f"{stem}.mp4"
	dest = os.path.join(VIDEO_DIR, dest_name)

	shutil.copy(f.name, dest)
	meta = _probe_video(dest)

	if meta["ok"]:
	warns = []
	if meta["duration"] > 10: warns.append(f"duration {meta['duration']}s > 10s")
	if meta["frames"] < 8: warns.append(f"only {meta['frames']} frames")
	warn_str = f" ⚠️ {', '.join(warns)}" if warns else ""
	log.append(
	f"✅ {dest_name} — {meta['duration']}s \| "
	f"{meta['fps']}fps \| {meta['width']}×{meta['height']}{warn_str}"
	)
	else:
	log.append(f"⚠️ {dest_name} — saved (ffprobe unavailable, verify manually)")

	return "\n".join(log), video_gallery_md()


	def trim_video(source_path, start_time, end_time, output_stem):
	if not source_path:
	return "❌ No source file path provided.", video_gallery_md()

	stem = _sanitize_name(output_stem) if output_stem.strip() else _sanitize_name(Path(source_path).stem) + "_trimmed"
	dest = os.path.join(VIDEO_DIR, f"{stem}.mp4")

	try:
	cmd = [
	"ffmpeg", "-y", "-i", source_path,
	"-ss", str(start_time), "-to", str(end_time),
	"-c:v", "libx264", "-c:a", "aac", dest
	]
	result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
	if result.returncode != 0:
	return f"❌ FFmpeg error:\n{result.stderr[-500:]}", video_gallery_md()

	meta = _probe_video(dest)
	return (
	f"✅ Trimmed → {stem}.mp4\n"
	f" Duration: {meta['duration']}s \| FPS: {meta['fps']} \| "
	f"{meta['width']}×{meta['height']} \| Frames: {meta['frames']}"
	), video_gallery_md()
	except Exception as e:
	return f"❌ Error: {e}", video_gallery_md()


	def delete_video(stem):
	stem = stem.strip()
	if not stem:
	return "❌ No stem provided.", video_gallery_md()

	msgs = []
	for ext, folder in [(".mp4", VIDEO_DIR), (".txt", CAPTION_DIR)]:
	path = os.path.join(folder, f"{stem}{ext}")
	if os.path.exists(path):
	os.remove(path)
	msgs.append(f"🗑️ Deleted {stem}{ext}")

	if not msgs:
	msgs.append(f"⚠️ No files found for stem: '{stem}'")
	return "\n".join(msgs), video_gallery_md()


	def video_gallery_md() -> str:
	videos = _get_all_videos()
	if not videos:
	return "📭 No videos yet. Upload `.mp4` files above."

	rows = []
	for v in videos:
	cap_icon = "✅" if v["has_caption"] else "⚠️"
	dur = f"{v['duration']}s" if v["duration"] else "?"
	fps = f"{v['fps']}fps" if v["fps"] else "?"
	res = f"{v['width']}×{v['height']}" if v["width"] else "?"
	rows.append(f"\| `{v['stem']}` \| {dur} \| {fps} \| {res} \| {cap_icon} \|")

	header = (
	f"### 🎬 {len(videos)} Video(s) in Dataset\n"
	"\| Stem \| Duration \| FPS \| Resolution \| Caption \|\n"
	"\|------\|----------\|-----\|------------\|---------\|"
	)
	return header + "\n" + "\n".join(rows)


	def get_video_stems():
	return [Path(f).stem for f in sorted(glob.glob(os.path.join(VIDEO_DIR, "*.mp4")))]


	# ═════════════════════════════════════════════════════════════════════════════
	# TAB 2 — CAPTION STUDIO
	# ═════════════════════════════════════════════════════════════════════════════

	def load_caption_for_stem(stem):
	if not stem:
	return "", "Select a video above."
	cap_path = os.path.join(CAPTION_DIR, f"{stem}.txt")
	if os.path.exists(cap_path):
	return open(cap_path).read(), f"📂 Loaded caption for `{stem}`"
	return "", f"📭 No caption yet for `{stem}` — write one and save."


	def save_caption(stem, caption_text):
	if not stem:
	return "❌ No video selected.", caption_summary_md()
	if not caption_text.strip():
	return "❌ Caption is empty.", caption_summary_md()

	cap_path = os.path.join(CAPTION_DIR, f"{stem}.txt")
	with open(cap_path, "w", encoding="utf-8") as f:
	f.write(caption_text.strip())
	return f"✅ Saved caption for `{stem}`", caption_summary_md()


	def build_caption_from_template(subject, action, environment, lighting, camera):
	parts = [p.strip() for p in [subject, action, environment] if p.strip()]
	s1 = ("A " + " ".join(parts) + ".") if parts else ""
	s2 = (lighting.strip() + ".") if lighting.strip() else ""
	s3 = (camera.strip() + ".") if camera.strip() else ""
	return " ".join(s for s in [s1, s2, s3] if s)


	def caption_summary_md() -> str:
	videos = _get_all_videos()
	if not videos:
	return "📭 No videos loaded yet."

	rows = []
	for v in videos:
	if v["has_caption"] and v["caption"]:
	preview = v["caption"][:60].replace("\n", " ")
	preview += "…" if len(v["caption"]) > 60 else ""
	rows.append(f"\| `{v['stem']}` \| ✅ \| {preview} \|")
	else:
	rows.append(f"\| `{v['stem']}` \| ⚠️ Missing \| — \|")

	paired = sum(1 for v in videos if v["has_caption"] and v["caption"])
	header = (
	f"### ✍️ Caption Status — {paired}/{len(videos)} complete\n"
	"\| Video \| Status \| Preview \|\n\|-------\|--------\|---------\|"
	)
	return header + "\n" + "\n".join(rows)


	def generate_bulk_template():
	videos = _get_all_videos()
	if not videos:
	return "📭 No videos loaded."
	lines = []
	for v in videos:
	lines.append(f"--- {v['stem']}")
	lines.append(v["caption"] if v["caption"] else
	"A [subject] [action] [environment]. [lighting]. [camera shot].")
	lines.append("")
	return "\n".join(lines)


	def save_all_bulk_captions(bulk_text: str):
	if not bulk_text.strip():
	return "❌ No text provided.", caption_summary_md()

	saved, current_stem, current_lines = [], None, []

	for line in bulk_text.splitlines():
	if line.startswith("---"):
	if current_stem and current_lines:
	cap_path = os.path.join(CAPTION_DIR, f"{current_stem}.txt")
	with open(cap_path, "w") as f:
	f.write("\n".join(current_lines).strip())
	saved.append(current_stem)
	current_stem = line.lstrip("- ").strip()
	current_lines = []
	elif current_stem is not None:
	current_lines.append(line)

	if current_stem and current_lines:
	cap_path = os.path.join(CAPTION_DIR, f"{current_stem}.txt")
	with open(cap_path, "w") as f:
	f.write("\n".join(current_lines).strip())
	saved.append(current_stem)

	return f"✅ Saved {len(saved)} caption(s): {', '.join(saved)}", caption_summary_md()


	# ═════════════════════════════════════════════════════════════════════════════
	# TAB 3 — VALIDATOR
	# ═════════════════════════════════════════════════════════════════════════════

	def run_full_validation():
	videos = _get_all_videos()
	if not videos:
	return "📭 No videos to validate. Upload files in the Video Ingest tab.", ""

	all_ok, has_warn, has_err = [], [], []
	detail_rows = []

	for v in videos:
	issues = _validation_issues(v)
	errors = [i for i in issues if i.startswith("❌")]
	warns = [i for i in issues if i.startswith("⚠️")]

	if errors:
	has_err.append(v["stem"]); status = "❌ Error"
	elif warns:
	has_warn.append(v["stem"]); status = "⚠️ Warning"
	else:
	all_ok.append(v["stem"]); status = "✅ Ready"

	issue_str = " \| ".join(issues) if issues else "—"
	detail_rows.append(
	f"\| `{v['stem']}` \| {v['duration']}s \| {v['frames']} \| {status} \| {issue_str} \|"
	)

	summary = (
	f"### Validation Complete — {len(videos)} video(s)\n\n"
	f"✅ Ready: {len(all_ok)} \| "
	f"⚠️ Warnings: {len(has_warn)} \| "
	f"❌ Errors: {len(has_err)}\n\n"
	)
	if has_err:
	summary += f"Must fix before export: {', '.join(f'`{s}`' for s in has_err)}\n\n"
	if has_warn:
	summary += f"Review recommended: {', '.join(f'`{s}`' for s in has_warn)}\n\n"
	if not has_err and not has_warn:
	summary += "🎉 All clips are ready to export!\n\n"

	header = (
	"\| Video \| Duration \| Frames \| Status \| Issues \|\n"
	"\|-------\|----------\|--------\|--------\|--------\|"
	)
	detail = header + "\n" + "\n".join(detail_rows)
	return summary, detail


	def naming_check_report():
	videos = _get_all_videos()
	if not videos:
	return "📭 No videos loaded."

	import re
	issues = []
	for v in videos:
	stem = v["stem"]
	if re.search(r"[^a-z0-9_]", stem):
	issues.append(f"⚠️ `{stem}` — invalid characters (use a-z, 0-9, _ only)")
	if stem != stem.lower():
	issues.append(f"⚠️ `{stem}` — contains uppercase")

	return "\n".join(issues) if issues else "✅ All filenames valid."


	# ═════════════════════════════════════════════════════════════════════════════
	# TAB 4 — EXPORT & DOWNLOAD
	# ═════════════════════════════════════════════════════════════════════════════

	def dataset_summary_md() -> str:
	videos = _get_all_videos()
	if not videos:
	return "📭 No dataset yet."

	paired = sum(1 for v in videos if v["has_caption"])
	total_dur = sum(v["duration"] for v in videos)
	ready = sum(1 for v in videos if not _validation_issues(v))

	return f"""### 📋 Dataset Summary

	\| Metric \| Value \|
	\|--------\|-------\|
	\| Total videos \| {len(videos)} \|
	\| Captioned \| {paired} / {len(videos)} \|
	\| Ready to encode \| {ready} / {len(videos)} \|
	\| Total duration \| {total_dur:.1f}s ({total_dur/60:.1f} min) \|

	### Quick Checklist

	- {"✅" if len(videos) >= 10 else "⚠️"} 10–20 clips (`{len(videos)}` loaded)
	- {"✅" if all(v["duration"] >= 2 for v in videos) else "⚠️"} All clips ≥ 2 seconds
	- {"✅" if all(v["duration"] <= 10 for v in videos) else "⚠️"} All clips ≤ 10 seconds
	- {"✅" if all(v["frames"] >= 8 for v in videos) else "❌"} All clips have ≥ 8 frames
	- {"✅" if paired == len(videos) else "❌"} All videos have captions
	- {"✅" if ready == len(videos) else "⚠️"} No validation errors
	"""


	def export_dataset_zip():
	videos = _get_all_videos()
	if not videos:
	return "❌ No videos to export.", None, dataset_summary_md()

	fatal = [
	v for v in videos
	if any(i.startswith("❌") for i in _validation_issues(v))
	]
	if fatal:
	stems = ", ".join(f"`{v['stem']}`" for v in fatal)
	return f"❌ Fix errors first: {stems}", None, dataset_summary_md()

	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
	zip_path = os.path.join(EXPORT_DIR, f"wan21_dataset_{timestamp}.zip")

	with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
	for v in videos:
	zf.write(v["video_path"], arcname=f"{v['stem']}.mp4")
	if v["has_caption"]:
	zf.write(v["caption_path"], arcname=f"{v['stem']}.txt")

	size_mb = os.path.getsize(zip_path) / (1024 * 1024)
	msg = (
	f"✅ Exported {len(videos)} pairs → `wan21_dataset_{timestamp}.zip` "
	f"({size_mb:.1f} MB)\n\n"
	f"Click Download ZIP below to save it."
	)
	return msg, zip_path, dataset_summary_md()


	# ═════════════════════════════════════════════════════════════════════════════
	# THEME & CSS
	# ═════════════════════════════════════════════════════════════════════════════

	THEME = gr.themes.Base(
	primary_hue=gr.themes.colors.violet,
	secondary_hue=gr.themes.colors.purple,
	neutral_hue=gr.themes.colors.slate,
	font=gr.themes.GoogleFont("Inter"),
	).set(
	body_background_fill="#0a0a0f",
	body_background_fill_dark="#0a0a0f",
	block_background_fill="#12121a",
	block_background_fill_dark="#12121a",
	block_border_color="#1e1e2e",
	block_border_color_dark="#1e1e2e",
	block_label_text_color="#a78bfa",
	block_label_text_color_dark="#a78bfa",
	block_title_text_color="#e2e8f0",
	block_title_text_color_dark="#e2e8f0",
	body_text_color="#cbd5e1",
	body_text_color_dark="#cbd5e1",
	button_primary_background_fill="#7c3aed",
	button_primary_background_fill_dark="#7c3aed",
	button_primary_background_fill_hover="#6d28d9",
	button_primary_background_fill_hover_dark="#6d28d9",
	button_primary_text_color="#ffffff",
	button_primary_text_color_dark="#ffffff",
	button_secondary_background_fill="#1e1e2e",
	button_secondary_background_fill_dark="#1e1e2e",
	button_secondary_text_color="#a78bfa",
	button_secondary_text_color_dark="#a78bfa",
	input_background_fill="#1a1a2e",
	input_background_fill_dark="#1a1a2e",
	input_border_color="#2d2d44",
	input_border_color_dark="#2d2d44",
	shadow_drop="0 4px 14px rgba(124, 58, 237, 0.08)",
	shadow_drop_lg="0 8px 24px rgba(124, 58, 237, 0.12)",
	)

	CSS = """
	.gradio-container { max-width: 980px !important; margin: auto; }
	.main-title {
	text-align: center;
	background: linear-gradient(135deg, #7c3aed 0%, #a78bfa 50%, #c4b5fd 100%);
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	font-size: 2.1rem;
	font-weight: 800;
	margin-bottom: 0.15rem;
	letter-spacing: -0.5px;
	}
	.sub-title {
	text-align: center;
	color: #64748b;
	font-size: 0.92rem;
	margin-bottom: 1rem;
	}
	.status-bar {
	padding: 10px 16px;
	background: linear-gradient(135deg, #1a1a2e, #16162a);
	border: 1px solid #2d2d44;
	border-radius: 8px;
	font-size: 0.9rem;
	}
	.tip-box {
	background: #13131f;
	border-left: 3px solid #7c3aed;
	border-radius: 0 8px 8px 0;
	padding: 10px 14px;
	margin: 6px 0;
	font-size: 0.88rem;
	color: #94a3b8;
	}
	"""


	# ═════════════════════════════════════════════════════════════════════════════
	# UI ASSEMBLY
	# ═════════════════════════════════════════════════════════════════════════════

	def build_ui():
	with warnings.catch_warnings():
	warnings.simplefilter("ignore", DeprecationWarning)
	blocks = gr.Blocks(theme=THEME, css=CSS, title="WAN 2.1 Dataset Creator")

	with blocks:

	gr.HTML("<div class='main-title'>🎬 WAN 2.1 Dataset Creator</div>")
	gr.HTML("<div class='sub-title'>Prepare · Caption · Validate · Export → WAN 2.1 Latent Cacher</div>")
	gr.Markdown(value=dataset_summary_md, elem_classes=["status-bar"])

	# ── TAB 1: VIDEO INGEST ───────────────────────────────────────────
	with gr.Tab("🎬 Video Ingest", id="ingest"):
	gr.Markdown(
	"### Step 1 & 2 — Upload & Trim Source Clips\n"
	"Upload raw `.mp4` files. Filenames are auto-sanitised to `lowercase_underscore`."
	)
	gr.HTML("<div class='tip-box'>💡 <b>Sweet spot:</b> 3–5 second clips at 720p+, 24–30fps. "
	"Aim for 10–20 clips per LoRA concept.</div>")

	gr.Markdown("#### 📤 Upload Videos")
	with gr.Row():
	upload_files = gr.File(
	label="Drop .mp4 files here",
	file_count="multiple",
	file_types=[".mp4"],
	scale=3,
	)
	upload_btn = gr.Button("⬆️ Ingest Files", variant="primary", scale=1)

	ingest_log = gr.Textbox(label="Ingest Log", lines=5, interactive=False)

	gr.Markdown("---\n#### ✂️ Trim a Clip with FFmpeg")
	with gr.Row():
	trim_source = gr.Textbox(
	label="Source path (full path in /tmp/dataset_builder/videos/)",
	placeholder="/tmp/dataset_builder/videos/raw_footage.mp4",
	scale=3,
	)
	trim_stem = gr.Textbox(label="Output stem name", placeholder="clip_01", scale=1)

	with gr.Row():
	trim_start = gr.Number(label="Start (seconds)", value=0, minimum=0)
	trim_end = gr.Number(label="End (seconds)", value=5, minimum=0)
	trim_btn = gr.Button("✂️ Trim & Save", variant="primary")

	trim_log = gr.Textbox(label="Trim Log", lines=3, interactive=False)

	gr.Markdown("---\n#### 🗑️ Remove a Clip")
	with gr.Row():
	del_stem = gr.Textbox(label="Stem to delete", placeholder="clip_01", scale=3)
	del_btn = gr.Button("🗑️ Delete", variant="secondary", scale=1)
	del_log = gr.Textbox(label="Delete Log", lines=2, interactive=False)

	gr.Markdown("---")
	refresh_gallery_btn = gr.Button("🔄 Refresh Gallery", variant="secondary")
	gallery_md = gr.Markdown(value=video_gallery_md)

	upload_btn.click(fn=ingest_videos, inputs=[upload_files], outputs=[ingest_log, gallery_md])
	upload_files.upload(fn=ingest_videos, inputs=[upload_files], outputs=[ingest_log, gallery_md])
	trim_btn.click(fn=trim_video, inputs=[trim_source, trim_start, trim_end, trim_stem], outputs=[trim_log, gallery_md])
	del_btn.click(fn=delete_video, inputs=[del_stem], outputs=[del_log, gallery_md])
	refresh_gallery_btn.click(fn=video_gallery_md, outputs=[gallery_md])

	# ── TAB 2: CAPTION STUDIO ─────────────────────────────────────────
	with gr.Tab("✍️ Caption Studio", id="captions"):
	gr.Markdown(
	"### Step 3 — Write Captions\n"
	"Every `.mp4` needs a matching `.txt` caption describing subject, action, "
	"environment, lighting, and camera."
	)
	gr.HTML("<div class='tip-box'>💡 Good captions: <b>subject + action + environment + "
	"lighting + camera</b>. 1–3 sentences. Specific and consistent.</div>")

	with gr.Tabs():

	with gr.Tab("🖊️ Per-Clip Editor"):
	with gr.Row():
	stem_dropdown = gr.Dropdown(
	label="Select Video",
	choices=get_video_stems(),
	scale=3,
	)
	refresh_stems_btn = gr.Button("🔄", scale=1, variant="secondary")

	load_status = gr.Markdown("")
	caption_box = gr.Textbox(
	label="Caption Text",
	lines=5,
	placeholder=(
	"A fluffy orange tabby cat playing with a red ball on a wooden floor.\n"
	"Warm natural sunlight streams through a window.\n"
	"Low angle shot with shallow depth of field."
	),
	)

	gr.Markdown("#### 🧩 Template Builder")
	gr.HTML("<div class='tip-box'>💡 Fill the fields and click Build to generate a caption draft, "
	"then edit it freely before saving.</div>")

	with gr.Row():
	t_subject = gr.Textbox(label="Subject", placeholder="fluffy orange tabby cat")
	t_action = gr.Textbox(label="Action", placeholder="playing with a red ball")
	with gr.Row():
	t_env = gr.Textbox(label="Environment", placeholder="on a wooden floor in a living room")
	t_lighting = gr.Textbox(label="Lighting", placeholder="warm natural sunlight from a window")
	t_camera = gr.Textbox(label="Camera / Shot", placeholder="low angle shot, shallow depth of field")

	with gr.Row():
	build_btn = gr.Button("🧩 Build from Template", variant="secondary")
	save_btn = gr.Button("💾 Save Caption", variant="primary")

	save_status = gr.Markdown("")
	cap_summary = gr.Markdown(value=caption_summary_md)

	stem_dropdown.change(
	fn=load_caption_for_stem,
	inputs=[stem_dropdown],
	outputs=[caption_box, load_status],
	)
	refresh_stems_btn.click(
	fn=lambda: gr.Dropdown(choices=get_video_stems()),
	outputs=[stem_dropdown],
	)
	build_btn.click(
	fn=build_caption_from_template,
	inputs=[t_subject, t_action, t_env, t_lighting, t_camera],
	outputs=[caption_box],
	)
	save_btn.click(
	fn=save_caption,
	inputs=[stem_dropdown, caption_box],
	outputs=[save_status, cap_summary],
	)

	with gr.Tab("📝 Bulk Editor"):
	gr.Markdown(
	"Edit all captions at once. Format:\n"
	"```\n--- stem_name\nYour caption text here.\n\n--- next_stem\n...\n```\n"
	"Click Load Template to pre-fill existing captions."
	)
	with gr.Row():
	load_bulk_btn = gr.Button("📂 Load Template", variant="secondary")
	save_bulk_btn = gr.Button("💾 Save All", variant="primary")
	bulk_box = gr.Textbox(label="Bulk Caption Editor", lines=22,
	placeholder="Click 'Load Template' to start…")
	bulk_status = gr.Markdown("")
	bulk_summary = gr.Markdown(value=caption_summary_md)

	load_bulk_btn.click(fn=generate_bulk_template, outputs=[bulk_box])
	save_bulk_btn.click(
	fn=save_all_bulk_captions,
	inputs=[bulk_box],
	outputs=[bulk_status, bulk_summary],
	)

	# ── TAB 3: VALIDATOR ──────────────────────────────────────────────
	with gr.Tab("✅ Validator", id="validate"):
	gr.Markdown(
	"### Step 4 — Validate Your Dataset\n"
	"Check every clip for duration, frame count, resolution, naming, and captions."
	)

	with gr.Row():
	validate_btn = gr.Button("▶ Run Full Validation", variant="primary", scale=2)
	naming_btn = gr.Button("🔤 Check Naming Only", variant="secondary", scale=1)

	validation_summary = gr.Markdown("")
	naming_report = gr.Markdown("")
	validation_detail = gr.Markdown("")

	gr.Markdown("---\n#### 📏 Requirements Reference")
	gr.Markdown("""
	\| Parameter \| Requirement \|
	\|-----------\|-------------\|
	\| Format \| `.mp4` only \|
	\| Duration \| 2–10 seconds (sweet spot: 3–5s) \|
	\| FPS \| 24–30 fps recommended \|
	\| Resolution \| 720p+ (auto-resized to 480×832) \|
	\| Min frames \| 8 frames minimum \|
	\| Caption \| Required, 1–3 sentences \|
	\| Filename \| Lowercase, underscores, no spaces \|
	""")
	validate_btn.click(fn=run_full_validation, outputs=[validation_summary, validation_detail])
	naming_btn.click(fn=naming_check_report, outputs=[naming_report])

	# ── TAB 4: EXPORT ─────────────────────────────────────────────────
	with gr.Tab("📦 Export & Download", id="export"):
	gr.Markdown(
	"### Step 5 — Export Dataset\n"
	"Packages all validated video + caption pairs into a single `.zip` for download."
	)
	gr.Markdown(value=dataset_summary_md, label="Dataset Summary")
	gr.HTML("<div class='tip-box'>💡 Fix all ❌ validation errors before exporting. "
	"⚠️ warnings are safe to ignore.</div>")

	export_btn = gr.Button("📦 Build & Download ZIP", variant="primary", size="lg")
	export_status = gr.Markdown("")
	download_file = gr.File(label="⬇️ Download ZIP", visible=True)
	export_summary = gr.Markdown(value=dataset_summary_md)

	gr.Markdown("---\n#### 📋 Dataset Checklist")
	gr.Markdown("""
	```
	DATASET CHECKLIST
	─────────────────────────────────────────
	□ 10–20 clips, each 3–5 seconds
	□ All .mp4 format, 720p+, 24–30 fps
	□ Matching .txt caption for EVERY video
	□ Filenames: lowercase, underscores, no spaces
	□ Captions: 1–3 sentences —
	subject · action · environment · lighting · camera
	□ No watermarks, black frames, or blurry footage
	□ All pairs validated ✅ in Validator tab
	```
	""")

	gr.Markdown("---\n#### ⚠️ Common Issues")
	gr.Markdown("""
	\| Problem \| Solution \|
	\|---------\|----------\|
	\| "Missing caption" \| Create `.txt` with exact same stem as `.mp4` \|
	\| "Only N frames, need 8" \| Clip too short — use ≥ 1 second at 24fps \|
	\| Blurry latent outputs \| Source too low-res — use 720p+ footage \|
	\| LoRA overfits \| More variety — different angles, lighting, backgrounds \|
	\| LoRA doesn't learn concept \| Captions too vague — be more specific \|
	""")

	export_btn.click(
	fn=export_dataset_zip,
	outputs=[export_status, download_file, export_summary],
	)

	# ── FOOTER ────────────────────────────────────────────────────────
	gr.HTML(
	"<div style='text-align:center;color:#475569;font-size:0.8rem;margin-top:1rem;'>"
	"WAN 2.1 Dataset Creator • HuggingFace Spaces Edition • video pairs → safetensors"
	"</div>"
	)

	return blocks


	# ═════════════════════════════════════════════════════════════════════════════
	# LAUNCH (HuggingFace Spaces — no share=True needed)
	# ═════════════════════════════════════════════════════════════════════════════

	if __name__ == "__main__":
	app = build_ui()
	app.queue()
	app.launch(share=True)