File size: 7,392 Bytes
7469732
f4ac1b9
c838058
 
7469732
 
 
 
 
 
 
 
f4ac1b9
7469732
 
 
 
 
c838058
 
 
 
 
7469732
c838058
7469732
 
4fe229d
7469732
f4ac1b9
483e424
f4ac1b9
7469732
f4ac1b9
7469732
 
f4ac1b9
7469732
 
 
 
 
 
4fe229d
7469732
4fe229d
7469732
 
 
 
 
 
 
 
 
 
 
 
c838058
7469732
 
0103a70
7469732
f4ac1b9
051110a
f4ac1b9
 
 
0103a70
 
 
 
051110a
 
7469732
 
 
 
 
 
 
 
 
f4ac1b9
 
7469732
f4ac1b9
 
7469732
acd62f0
f4ac1b9
 
051110a
499128d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7469732
 
f4ac1b9
 
7469732
 
 
 
c838058
7469732
 
ceb42ce
24a1162
 
 
 
 
 
c838058
 
 
 
 
 
 
 
24a1162
 
c838058
24a1162
 
 
c838058
 
 
24a1162
e131563
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c838058
24a1162
c838058
24a1162
c838058
24a1162
 
c838058
 
 
24a1162
7469732
 
ad05bf8
7469732
 
 
c838058
 
7469732
f4ac1b9
 
499128d
f4ac1b9
 
7469732
 
 
c838058
7469732
 
 
 
 
 
 
 
ad05bf8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
"""
Sulphur — Image to Video (HF Spaces).
Clones Wan2GP and downloads models on first run.
Generation is handled by generate.py called as a subprocess inside @spaces.GPU.
"""

import os
import sys
import subprocess
import shutil
import tempfile
import threading
import json
from pathlib import Path

import gradio as gr
import spaces

_HF_TOKEN    = os.environ.get("HF_TOKEN")
_PERSISTENT  = Path("/data") if Path("/data").exists() else Path(tempfile.gettempdir())
WAN2GP_ROOT  = _PERSISTENT / "Wan2GP"
CKPTS_DIR    = WAN2GP_ROOT / "ckpts"
LORAS_DIR    = WAN2GP_ROOT / "loras" / "ltx2"
FINETUNES_DIR = WAN2GP_ROOT / "finetunes"
GENERATE_PY  = Path(__file__).parent / "generate.py"

SULPHUR_ASSETS = [
    ("SulphurAI/Sulphur-2-base", "sulphur_distil_bf16.safetensors", CKPTS_DIR),
]
LTX_ASSETS = [
    ("SulphurAI/Sulphur-2-base", "distill_loras/ltx-2.3-22b-distilled-lora-1.1_fro90_ceil72_condsafe.safetensors", LORAS_DIR),
    ("DeepBeepMeep/LTX-2", "ltx-2.3-22b_vae.safetensors",                    CKPTS_DIR),
    ("DeepBeepMeep/LTX-2", "ltx-2.3-22b_text_embedding_projection.safetensors", CKPTS_DIR),
    ("DeepBeepMeep/LTX-2", "ltx-2.3-22b_embeddings_connector.safetensors",   CKPTS_DIR),
]

SULPHUR_FINETUNE = {
    "model": {
        "name": "Sulphur 2 Base",
        "visible": True,
        "architecture": "ltx2_22B",
        "parent_model_type": "ltx2_22B",
        "description": "LTX-2.3 fine-tuned i2v. Distilled checkpoint.",
        # Full distilled model — do NOT also preload the rank-768 LoRA (README: use one or the other)
        "URLs": [str(CKPTS_DIR / "sulphur_distil_bf16.safetensors")],
        "preload_URLs": [],
    },
    "num_inference_steps": 8,
    "video_length": 81,
    "resolution": "832x480",
    "guidance_scale": 3.5,
    "alt_guidance_scale": 3.5,
}

_setup_lock = threading.Lock()
_setup_done = False


def _download(repo_id, filename, dest_dir):
    from huggingface_hub import hf_hub_download
    dest_dir.mkdir(parents=True, exist_ok=True)
    dest = dest_dir / Path(filename).name  # flat — strip any subfolder
    if dest.exists():
        print(f"[download] cached: {dest.name}")
        return
    print(f"[download] {repo_id}/{filename}")
    hf_hub_download(repo_id=repo_id, filename=filename,
                    local_dir=str(dest_dir), token=_HF_TOKEN)
    # hf_hub_download preserves subfolder structure; flatten to dest_dir root
    downloaded = dest_dir / filename
    if downloaded.exists() and not dest.exists():
        shutil.move(str(downloaded), str(dest))


def setup():
    global _setup_done
    with _setup_lock:
        if _setup_done:
            return
        _setup_done = True

    if not (WAN2GP_ROOT / "shared" / "api.py").exists():
        WAN2GP_ROOT.mkdir(parents=True, exist_ok=True)
        print("[setup] Cloning Wan2GP...")
        subprocess.run(
            ["git", "clone", "--depth=1",
             "https://github.com/deepbeepmeep/Wan2GP.git", str(WAN2GP_ROOT)],
            check=True,
        )

    for repo, fname, dest in SULPHUR_ASSETS + LTX_ASSETS:
        _download(repo, fname, dest)

    # Gemma text encoder — must stay in its subfolder (Wan2GP looks there by name)
    _gemma_folder = "gemma-3-12b-it-qat-q4_0-unquantized"
    _gemma_file = f"{_gemma_folder}_quanto_bf16_int8.safetensors"
    gemma_dest = CKPTS_DIR / _gemma_folder / _gemma_file
    if not gemma_dest.exists():
        from huggingface_hub import hf_hub_download
        print("[download] Gemma text encoder...")
        hf_hub_download(
            repo_id="DeepBeepMeep/LTX-2",
            filename=f"{_gemma_folder}/{_gemma_file}",
            local_dir=str(CKPTS_DIR),
            token=_HF_TOKEN,
        )
    else:
        print("[download] cached: Gemma text encoder")

    FINETUNES_DIR.mkdir(parents=True, exist_ok=True)
    (FINETUNES_DIR / "sulphur_2_base.json").write_text(json.dumps(SULPHUR_FINETUNE, indent=2))
    print("[setup] Done.")


setup()

RESOLUTIONS = ["832x480", "480x832", "640x640", "1024x576", "576x1024"]


@spaces.GPU(duration=120)
def generate_video(image, prompt, resolution, steps, guidance_scale, frames, seed):
    if image is None:
        raise gr.Error("Please upload an image.")
    if not prompt.strip():
        raise gr.Error("Please enter a prompt.")

    out_file = Path(tempfile.mkdtemp()) / "output.mp4"
    env = {**os.environ, "WAN2GP_ROOT": str(WAN2GP_ROOT)}

    cmd = [
        sys.executable, str(GENERATE_PY),
        "--image",  image,
        "--prompt", prompt,
        "--output", str(out_file),
        "--model",  "sulphur-2",
        "--seed",   str(int(seed)),
        "--resolution", resolution,
        "--steps",  str(int(steps)),
        "--guidance_scale", str(float(guidance_scale)),
        "--frames", str(int(frames)),
    ]

    log_lines = []
    proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
                            text=True, bufsize=0, env=env)

    buf = ""
    while True:
        chunk = proc.stdout.read(256)
        if not chunk:
            break
        buf += chunk
        # Split on \r or \n — tqdm uses \r to overwrite progress lines
        parts = buf.replace("\r", "\n").split("\n")
        buf = parts[-1]
        for part in parts[:-1]:
            stripped = part.strip()
            if not stripped:
                continue
            # Overwrite last line if it looks like a progress bar update
            if log_lines and ("%" in stripped or "it/s" in stripped or "step" in stripped.lower()):
                log_lines[-1] = stripped
            else:
                log_lines.append(stripped)
            print(stripped)
            yield None, "\n".join(log_lines[-30:])

    proc.wait()
    log = "\n".join(log_lines)

    if proc.returncode != 0 or not out_file.exists():
        yield None, log + "\n\n[ERROR] Generation failed."
        return

    final = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
    shutil.copy2(out_file, final.name)
    yield final.name, log + "\n\n[DONE]"


with gr.Blocks(title="Sulphur — Image to Video") as demo:
    gr.Markdown("# Sulphur — Image to Video\nPowered by Wan2GP · Sulphur-2 distilled finetune")
    with gr.Row():
        with gr.Column(scale=1):
            image_in  = gr.Image(type="filepath", label="Input Image")
            prompt_in = gr.Textbox(label="Prompt", placeholder="Describe the motion…", lines=3)
            with gr.Accordion("Advanced", open=False):
                resolution_dd = gr.Dropdown(RESOLUTIONS, value="832x480", label="Resolution")
                steps_sl      = gr.Slider(1, 50, value=8,   step=1,   label="Steps")
                guidance_sl   = gr.Slider(1.0, 10.0, value=5.0, step=0.5, label="Guidance Scale")
                frames_sl     = gr.Slider(17, 257, value=81, step=8,  label="Frames")
                seed_num      = gr.Number(value=-1, label="Seed (-1 = random)", precision=0)
            run_btn = gr.Button("Generate", variant="primary")
        with gr.Column(scale=1):
            video_out = gr.Video(label="Output Video")
            log_out   = gr.Textbox(label="Log", lines=10, interactive=False)

    run_btn.click(
        fn=generate_video,
        inputs=[image_in, prompt_in, resolution_dd, steps_sl, guidance_sl, frames_sl, seed_num],
        outputs=[video_out, log_out],
    )

if __name__ == "__main__":
    demo.launch(theme=gr.themes.Soft())