Sulphur / app.py
Daankular's picture
Pre-download Gemma at startup; bump guidance to 5.0 for prompt adherence
499128d
"""
Sulphur — Image to Video (HF Spaces).
Clones Wan2GP and downloads models on first run.
Generation is handled by generate.py called as a subprocess inside @spaces.GPU.
"""
import os
import sys
import subprocess
import shutil
import tempfile
import threading
import json
from pathlib import Path
import gradio as gr
import spaces
_HF_TOKEN = os.environ.get("HF_TOKEN")
_PERSISTENT = Path("/data") if Path("/data").exists() else Path(tempfile.gettempdir())
WAN2GP_ROOT = _PERSISTENT / "Wan2GP"
CKPTS_DIR = WAN2GP_ROOT / "ckpts"
LORAS_DIR = WAN2GP_ROOT / "loras" / "ltx2"
FINETUNES_DIR = WAN2GP_ROOT / "finetunes"
GENERATE_PY = Path(__file__).parent / "generate.py"
SULPHUR_ASSETS = [
("SulphurAI/Sulphur-2-base", "sulphur_distil_bf16.safetensors", CKPTS_DIR),
]
LTX_ASSETS = [
("SulphurAI/Sulphur-2-base", "distill_loras/ltx-2.3-22b-distilled-lora-1.1_fro90_ceil72_condsafe.safetensors", LORAS_DIR),
("DeepBeepMeep/LTX-2", "ltx-2.3-22b_vae.safetensors", CKPTS_DIR),
("DeepBeepMeep/LTX-2", "ltx-2.3-22b_text_embedding_projection.safetensors", CKPTS_DIR),
("DeepBeepMeep/LTX-2", "ltx-2.3-22b_embeddings_connector.safetensors", CKPTS_DIR),
]
SULPHUR_FINETUNE = {
"model": {
"name": "Sulphur 2 Base",
"visible": True,
"architecture": "ltx2_22B",
"parent_model_type": "ltx2_22B",
"description": "LTX-2.3 fine-tuned i2v. Distilled checkpoint.",
# Full distilled model — do NOT also preload the rank-768 LoRA (README: use one or the other)
"URLs": [str(CKPTS_DIR / "sulphur_distil_bf16.safetensors")],
"preload_URLs": [],
},
"num_inference_steps": 8,
"video_length": 81,
"resolution": "832x480",
"guidance_scale": 3.5,
"alt_guidance_scale": 3.5,
}
_setup_lock = threading.Lock()
_setup_done = False
def _download(repo_id, filename, dest_dir):
from huggingface_hub import hf_hub_download
dest_dir.mkdir(parents=True, exist_ok=True)
dest = dest_dir / Path(filename).name # flat — strip any subfolder
if dest.exists():
print(f"[download] cached: {dest.name}")
return
print(f"[download] {repo_id}/{filename}")
hf_hub_download(repo_id=repo_id, filename=filename,
local_dir=str(dest_dir), token=_HF_TOKEN)
# hf_hub_download preserves subfolder structure; flatten to dest_dir root
downloaded = dest_dir / filename
if downloaded.exists() and not dest.exists():
shutil.move(str(downloaded), str(dest))
def setup():
global _setup_done
with _setup_lock:
if _setup_done:
return
_setup_done = True
if not (WAN2GP_ROOT / "shared" / "api.py").exists():
WAN2GP_ROOT.mkdir(parents=True, exist_ok=True)
print("[setup] Cloning Wan2GP...")
subprocess.run(
["git", "clone", "--depth=1",
"https://github.com/deepbeepmeep/Wan2GP.git", str(WAN2GP_ROOT)],
check=True,
)
for repo, fname, dest in SULPHUR_ASSETS + LTX_ASSETS:
_download(repo, fname, dest)
# Gemma text encoder — must stay in its subfolder (Wan2GP looks there by name)
_gemma_folder = "gemma-3-12b-it-qat-q4_0-unquantized"
_gemma_file = f"{_gemma_folder}_quanto_bf16_int8.safetensors"
gemma_dest = CKPTS_DIR / _gemma_folder / _gemma_file
if not gemma_dest.exists():
from huggingface_hub import hf_hub_download
print("[download] Gemma text encoder...")
hf_hub_download(
repo_id="DeepBeepMeep/LTX-2",
filename=f"{_gemma_folder}/{_gemma_file}",
local_dir=str(CKPTS_DIR),
token=_HF_TOKEN,
)
else:
print("[download] cached: Gemma text encoder")
FINETUNES_DIR.mkdir(parents=True, exist_ok=True)
(FINETUNES_DIR / "sulphur_2_base.json").write_text(json.dumps(SULPHUR_FINETUNE, indent=2))
print("[setup] Done.")
setup()
RESOLUTIONS = ["832x480", "480x832", "640x640", "1024x576", "576x1024"]
@spaces.GPU(duration=120)
def generate_video(image, prompt, resolution, steps, guidance_scale, frames, seed):
if image is None:
raise gr.Error("Please upload an image.")
if not prompt.strip():
raise gr.Error("Please enter a prompt.")
out_file = Path(tempfile.mkdtemp()) / "output.mp4"
env = {**os.environ, "WAN2GP_ROOT": str(WAN2GP_ROOT)}
cmd = [
sys.executable, str(GENERATE_PY),
"--image", image,
"--prompt", prompt,
"--output", str(out_file),
"--model", "sulphur-2",
"--seed", str(int(seed)),
"--resolution", resolution,
"--steps", str(int(steps)),
"--guidance_scale", str(float(guidance_scale)),
"--frames", str(int(frames)),
]
log_lines = []
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
text=True, bufsize=0, env=env)
buf = ""
while True:
chunk = proc.stdout.read(256)
if not chunk:
break
buf += chunk
# Split on \r or \n — tqdm uses \r to overwrite progress lines
parts = buf.replace("\r", "\n").split("\n")
buf = parts[-1]
for part in parts[:-1]:
stripped = part.strip()
if not stripped:
continue
# Overwrite last line if it looks like a progress bar update
if log_lines and ("%" in stripped or "it/s" in stripped or "step" in stripped.lower()):
log_lines[-1] = stripped
else:
log_lines.append(stripped)
print(stripped)
yield None, "\n".join(log_lines[-30:])
proc.wait()
log = "\n".join(log_lines)
if proc.returncode != 0 or not out_file.exists():
yield None, log + "\n\n[ERROR] Generation failed."
return
final = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
shutil.copy2(out_file, final.name)
yield final.name, log + "\n\n[DONE]"
with gr.Blocks(title="Sulphur — Image to Video") as demo:
gr.Markdown("# Sulphur — Image to Video\nPowered by Wan2GP · Sulphur-2 distilled finetune")
with gr.Row():
with gr.Column(scale=1):
image_in = gr.Image(type="filepath", label="Input Image")
prompt_in = gr.Textbox(label="Prompt", placeholder="Describe the motion…", lines=3)
with gr.Accordion("Advanced", open=False):
resolution_dd = gr.Dropdown(RESOLUTIONS, value="832x480", label="Resolution")
steps_sl = gr.Slider(1, 50, value=8, step=1, label="Steps")
guidance_sl = gr.Slider(1.0, 10.0, value=5.0, step=0.5, label="Guidance Scale")
frames_sl = gr.Slider(17, 257, value=81, step=8, label="Frames")
seed_num = gr.Number(value=-1, label="Seed (-1 = random)", precision=0)
run_btn = gr.Button("Generate", variant="primary")
with gr.Column(scale=1):
video_out = gr.Video(label="Output Video")
log_out = gr.Textbox(label="Log", lines=10, interactive=False)
run_btn.click(
fn=generate_video,
inputs=[image_in, prompt_in, resolution_dd, steps_sl, guidance_sl, frames_sl, seed_num],
outputs=[video_out, log_out],
)
if __name__ == "__main__":
demo.launch(theme=gr.themes.Soft())