Spaces:

Daankular
/

Sulphur

Running on Zero

App Files Files Community

Sulphur / app.py

Daankular

Pre-download Gemma at startup; bump guidance to 5.0 for prompt adherence

499128d 2 days ago

raw

history blame contribute delete

7.39 kB

	"""
	Sulphur — Image to Video (HF Spaces).
	Clones Wan2GP and downloads models on first run.
	Generation is handled by generate.py called as a subprocess inside @spaces.GPU.
	"""

	import os
	import sys
	import subprocess
	import shutil
	import tempfile
	import threading
	import json
	from pathlib import Path

	import gradio as gr
	import spaces

	_HF_TOKEN = os.environ.get("HF_TOKEN")
	_PERSISTENT = Path("/data") if Path("/data").exists() else Path(tempfile.gettempdir())
	WAN2GP_ROOT = _PERSISTENT / "Wan2GP"
	CKPTS_DIR = WAN2GP_ROOT / "ckpts"
	LORAS_DIR = WAN2GP_ROOT / "loras" / "ltx2"
	FINETUNES_DIR = WAN2GP_ROOT / "finetunes"
	GENERATE_PY = Path(__file__).parent / "generate.py"

	SULPHUR_ASSETS = [
	("SulphurAI/Sulphur-2-base", "sulphur_distil_bf16.safetensors", CKPTS_DIR),
	]
	LTX_ASSETS = [
	("SulphurAI/Sulphur-2-base", "distill_loras/ltx-2.3-22b-distilled-lora-1.1_fro90_ceil72_condsafe.safetensors", LORAS_DIR),
	("DeepBeepMeep/LTX-2", "ltx-2.3-22b_vae.safetensors", CKPTS_DIR),
	("DeepBeepMeep/LTX-2", "ltx-2.3-22b_text_embedding_projection.safetensors", CKPTS_DIR),
	("DeepBeepMeep/LTX-2", "ltx-2.3-22b_embeddings_connector.safetensors", CKPTS_DIR),
	]

	SULPHUR_FINETUNE = {
	"model": {
	"name": "Sulphur 2 Base",
	"visible": True,
	"architecture": "ltx2_22B",
	"parent_model_type": "ltx2_22B",
	"description": "LTX-2.3 fine-tuned i2v. Distilled checkpoint.",
	# Full distilled model — do NOT also preload the rank-768 LoRA (README: use one or the other)
	"URLs": [str(CKPTS_DIR / "sulphur_distil_bf16.safetensors")],
	"preload_URLs": [],
	},
	"num_inference_steps": 8,
	"video_length": 81,
	"resolution": "832x480",
	"guidance_scale": 3.5,
	"alt_guidance_scale": 3.5,
	}

	_setup_lock = threading.Lock()
	_setup_done = False


	def _download(repo_id, filename, dest_dir):
	from huggingface_hub import hf_hub_download
	dest_dir.mkdir(parents=True, exist_ok=True)
	dest = dest_dir / Path(filename).name # flat — strip any subfolder
	if dest.exists():
	print(f"[download] cached: {dest.name}")
	return
	print(f"[download] {repo_id}/{filename}")
	hf_hub_download(repo_id=repo_id, filename=filename,
	local_dir=str(dest_dir), token=_HF_TOKEN)
	# hf_hub_download preserves subfolder structure; flatten to dest_dir root
	downloaded = dest_dir / filename
	if downloaded.exists() and not dest.exists():
	shutil.move(str(downloaded), str(dest))


	def setup():
	global _setup_done
	with _setup_lock:
	if _setup_done:
	return
	_setup_done = True

	if not (WAN2GP_ROOT / "shared" / "api.py").exists():
	WAN2GP_ROOT.mkdir(parents=True, exist_ok=True)
	print("[setup] Cloning Wan2GP...")
	subprocess.run(
	["git", "clone", "--depth=1",
	"https://github.com/deepbeepmeep/Wan2GP.git", str(WAN2GP_ROOT)],
	check=True,
	)

	for repo, fname, dest in SULPHUR_ASSETS + LTX_ASSETS:
	_download(repo, fname, dest)

	# Gemma text encoder — must stay in its subfolder (Wan2GP looks there by name)
	_gemma_folder = "gemma-3-12b-it-qat-q4_0-unquantized"
	_gemma_file = f"{_gemma_folder}_quanto_bf16_int8.safetensors"
	gemma_dest = CKPTS_DIR / _gemma_folder / _gemma_file
	if not gemma_dest.exists():
	from huggingface_hub import hf_hub_download
	print("[download] Gemma text encoder...")
	hf_hub_download(
	repo_id="DeepBeepMeep/LTX-2",
	filename=f"{_gemma_folder}/{_gemma_file}",
	local_dir=str(CKPTS_DIR),
	token=_HF_TOKEN,
	)
	else:
	print("[download] cached: Gemma text encoder")

	FINETUNES_DIR.mkdir(parents=True, exist_ok=True)
	(FINETUNES_DIR / "sulphur_2_base.json").write_text(json.dumps(SULPHUR_FINETUNE, indent=2))
	print("[setup] Done.")


	setup()

	RESOLUTIONS = ["832x480", "480x832", "640x640", "1024x576", "576x1024"]


	@spaces.GPU(duration=120)
	def generate_video(image, prompt, resolution, steps, guidance_scale, frames, seed):
	if image is None:
	raise gr.Error("Please upload an image.")
	if not prompt.strip():
	raise gr.Error("Please enter a prompt.")

	out_file = Path(tempfile.mkdtemp()) / "output.mp4"
	env = {**os.environ, "WAN2GP_ROOT": str(WAN2GP_ROOT)}

	cmd = [
	sys.executable, str(GENERATE_PY),
	"--image", image,
	"--prompt", prompt,
	"--output", str(out_file),
	"--model", "sulphur-2",
	"--seed", str(int(seed)),
	"--resolution", resolution,
	"--steps", str(int(steps)),
	"--guidance_scale", str(float(guidance_scale)),
	"--frames", str(int(frames)),
	]

	log_lines = []
	proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
	text=True, bufsize=0, env=env)

	buf = ""
	while True:
	chunk = proc.stdout.read(256)
	if not chunk:
	break
	buf += chunk
	# Split on \r or \n — tqdm uses \r to overwrite progress lines
	parts = buf.replace("\r", "\n").split("\n")
	buf = parts[-1]
	for part in parts[:-1]:
	stripped = part.strip()
	if not stripped:
	continue
	# Overwrite last line if it looks like a progress bar update
	if log_lines and ("%" in stripped or "it/s" in stripped or "step" in stripped.lower()):
	log_lines[-1] = stripped
	else:
	log_lines.append(stripped)
	print(stripped)
	yield None, "\n".join(log_lines[-30:])

	proc.wait()
	log = "\n".join(log_lines)

	if proc.returncode != 0 or not out_file.exists():
	yield None, log + "\n\n[ERROR] Generation failed."
	return

	final = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
	shutil.copy2(out_file, final.name)
	yield final.name, log + "\n\n[DONE]"


	with gr.Blocks(title="Sulphur — Image to Video") as demo:
	gr.Markdown("# Sulphur — Image to Video\nPowered by Wan2GP · Sulphur-2 distilled finetune")
	with gr.Row():
	with gr.Column(scale=1):
	image_in = gr.Image(type="filepath", label="Input Image")
	prompt_in = gr.Textbox(label="Prompt", placeholder="Describe the motion…", lines=3)
	with gr.Accordion("Advanced", open=False):
	resolution_dd = gr.Dropdown(RESOLUTIONS, value="832x480", label="Resolution")
	steps_sl = gr.Slider(1, 50, value=8, step=1, label="Steps")
	guidance_sl = gr.Slider(1.0, 10.0, value=5.0, step=0.5, label="Guidance Scale")
	frames_sl = gr.Slider(17, 257, value=81, step=8, label="Frames")
	seed_num = gr.Number(value=-1, label="Seed (-1 = random)", precision=0)
	run_btn = gr.Button("Generate", variant="primary")
	with gr.Column(scale=1):
	video_out = gr.Video(label="Output Video")
	log_out = gr.Textbox(label="Log", lines=10, interactive=False)

	run_btn.click(
	fn=generate_video,
	inputs=[image_in, prompt_in, resolution_dd, steps_sl, guidance_sl, frames_sl, seed_num],
	outputs=[video_out, log_out],
	)

	if __name__ == "__main__":
	demo.launch(theme=gr.themes.Soft())