Spaces:

Jdbbd
/

Fggfg

Paused

App Files Files Community

Ksjsjjdj commited on Dec 19, 2025

Commit

44fb0fb

verified ·

1 Parent(s): 458786f

Update app.py

Browse files

Files changed (1) hide show

app.py +76 -40

app.py CHANGED Viewed

@@ -3,7 +3,10 @@ os.system("pip install -q git+https://github.com/tolgacangoz/diffusers.git@integ
 os.system("pip install -q ./spaces-0.1.0-py3-none-any.whl || pip install -q spaces || true")
 from huggingface_hub import snapshot_download
 MODEL_ID = "tolgacangoz/Wan2.2-S2V-14B-Diffusers"
-LOCAL_DIR = snapshot_download(repo_id=MODEL_ID, repo_type="model")
 import gradio as gr
 from pathlib import Path
 from PIL import Image
@@ -41,11 +44,29 @@ def load_audio(audio):
         return wav, sr
     except Exception:
         return None, None
 @spaces.GPU(duration=120)
 def generate_video_gpu(image, audio_file):
     global pipe
     import torch
-    import tempfile, subprocess
     from pathlib import Path as _P
     try:
         from diffusers import WanSpeechToVideoPipeline as PipelineClass
@@ -55,51 +76,66 @@ def generate_video_gpu(image, audio_file):
         except Exception:
             from diffusers import DiffusionPipeline as PipelineClass
     dtype = torch.float16
-    if pipe is None:
-        pipe = PipelineClass.from_pretrained(
-            LOCAL_DIR,
-            torch_dtype=dtype,
-            use_safetensors=True,
-            device_map="balanced"
-        )
-    audio_array, sample_rate = load_audio(audio_file)
-    if audio_array is None or sample_rate is None:
-        return None
-    init_image = image.convert("RGB")
-    out = pipe(
-        image=init_image,
-        audio=audio_array,
-        audio_sample_rate=sample_rate,
-        num_inference_steps=25,
-        guidance_scale=4.0,
-        frame_rate=16,
-        max_frames=64,
-    )
-    frames = getattr(out, "frames", getattr(out, "images", out))
-    out_path = "wan_s2v_output.mp4"
     try:
-        from diffusers.utils import export_to_video
-        export_to_video(frames, out_path, fps=16)
     except Exception:
-        tmpdir = tempfile.mkdtemp()
-        for i, f in enumerate(frames):
-            fname = _P(tmpdir) / f"frame_{i:04d}.png"
-            if hasattr(f, "save"):
-                f.save(fname)
-            else:
-                Image.fromarray((np.array(f) * 255).astype("uint8")).save(fname)
-        subprocess.run([
-            "ffmpeg", "-y", "-framerate", "16",
-            "-i", str(_P(tmpdir) / "frame_%04d.png"),
-            "-c:v", "libx264", "-pix_fmt", "yuv420p", out_path
-        ], check=True)
-    return out_path
 def generate_video(image, audio):
     return generate_video_gpu(image, audio)
 with gr.Blocks() as demo:
     gr.Markdown("# Wan2.2-S2V Gradio Space")
     with gr.Row():
-        img = gr.Image(label="Imagen de referencia")
         audio = gr.Audio(label="Audio (.wav)", type="numpy")
     btn = gr.Button("Generar Video")
     out_video = gr.Video(label="Resultado de Video")

 os.system("pip install -q ./spaces-0.1.0-py3-none-any.whl || pip install -q spaces || true")
 from huggingface_hub import snapshot_download
 MODEL_ID = "tolgacangoz/Wan2.2-S2V-14B-Diffusers"
+try:
+    LOCAL_DIR = snapshot_download(repo_id=MODEL_ID, repo_type="model")
+except Exception:
+    LOCAL_DIR = MODEL_ID
 import gradio as gr
 from pathlib import Path
 from PIL import Image
         return wav, sr
     except Exception:
         return None, None
+def to_pil(image):
+    if image is None:
+        return None
+    if isinstance(image, Image.Image):
+        return image.convert("RGB")
+    if isinstance(image, (str, Path)):
+        return Image.open(str(image)).convert("RGB")
+    arr = np.array(image)
+    if arr.dtype != np.uint8:
+        if arr.max() <= 1.0:
+            arr = (arr * 255).clip(0,255).astype("uint8")
+        else:
+            arr = arr.clip(0,255).astype("uint8")
+    if arr.ndim == 2:
+        arr = np.stack([arr]*3, axis=-1)
+    if arr.ndim == 3 and arr.shape[2] == 4:
+        arr = arr[..., :3]
+    return Image.fromarray(arr)
 @spaces.GPU(duration=120)
 def generate_video_gpu(image, audio_file):
     global pipe
     import torch
+    import tempfile, subprocess, traceback
     from pathlib import Path as _P
     try:
         from diffusers import WanSpeechToVideoPipeline as PipelineClass
         except Exception:
             from diffusers import DiffusionPipeline as PipelineClass
     dtype = torch.float16
     try:
+        if pipe is None:
+            try:
+                pipe = PipelineClass.from_pretrained(
+                    LOCAL_DIR,
+                    torch_dtype=dtype,
+                    use_safetensors=True,
+                    device_map="balanced"
+                )
+            except Exception:
+                pipe = PipelineClass.from_pretrained(
+                    MODEL_ID,
+                    torch_dtype=dtype,
+                    use_safetensors=True,
+                    device_map="balanced"
+                )
+        audio_array, sample_rate = load_audio(audio_file)
+        if audio_array is None or sample_rate is None:
+            return None
+        init_image = to_pil(image)
+        if init_image is None:
+            return None
+        out = pipe(
+            image=init_image,
+            audio=audio_array,
+            audio_sample_rate=sample_rate,
+            num_inference_steps=25,
+            guidance_scale=4.0,
+            frame_rate=16,
+            max_frames=64,
+        )
+        frames = getattr(out, "frames", getattr(out, "images", out))
+        out_path = "wan_s2v_output.mp4"
+        try:
+            from diffusers.utils import export_to_video
+            export_to_video(frames, out_path, fps=16)
+        except Exception:
+            tmpdir = tempfile.mkdtemp()
+            for i, f in enumerate(frames):
+                fname = _P(tmpdir) / f"frame_{i:04d}.png"
+                if hasattr(f, "save"):
+                    f.save(fname)
+                else:
+                    Image.fromarray((np.array(f) * 255).clip(0,255).astype("uint8")).save(fname)
+            subprocess.run([
+                "ffmpeg", "-y", "-framerate", "16",
+                "-i", str(_P(tmpdir) / "frame_%04d.png"),
+                "-c:v", "libx264", "-pix_fmt", "yuv420p", out_path
+            ], check=True)
+        return out_path
     except Exception:
+        with open("error.log", "a") as _f:
+            _f.write(traceback.format_exc() + "\n")
+        return None
 def generate_video(image, audio):
     return generate_video_gpu(image, audio)
 with gr.Blocks() as demo:
     gr.Markdown("# Wan2.2-S2V Gradio Space")
     with gr.Row():
+        img = gr.Image(label="Imagen de referencia", type="numpy")
         audio = gr.Audio(label="Audio (.wav)", type="numpy")
     btn = gr.Button("Generar Video")
     out_video = gr.Video(label="Resultado de Video")