Spaces:

banao-tech
/

model-testing

Build error

App Files Files Community

banao-tech commited on 17 days ago

Commit

4fabefa

verified ·

1 Parent(s): 04f6d97

Update app.py

Browse files

Files changed (1) hide show

app.py +103 -26

app.py CHANGED Viewed

@@ -3,54 +3,131 @@ import torch
 import subprocess
 import os
 from pathlib import Path
-def setup():
-    """One-time setup"""
-    if not Path("DiffSynth-Studio").exists():
-        subprocess.run("git clone https://github.com/modelscope/DiffSynth-Studio.git", shell=True)
-        subprocess.run("pip install -e ./DiffSynth-Studio", shell=True)
 def generate_video(image, audio):
     try:
         if not image or not audio:
-            return None, "Please upload both image and audio!"
-        setup()
         # Run inference
-        output_path = "output.mp4"
         cmd = f"""
-cd DiffSynth-Studio && python examples/video_generation/musetalk.py \
---image_path ../{image} \
---audio_path ../{audio} \
---output_path ../{output_path}
 """
         result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
-        if Path(output_path).exists():
-            return output_path, "✅ Video generated successfully!"
         else:
-            return None, f"❌ Failed: {result.stderr}"
     except Exception as e:
         return None, f"❌ Error: {str(e)}"
 # Gradio Interface
-with gr.Blocks(title="AI Lip Sync") as app:
-    gr.Markdown("# 🎤 AI Lip Sync Generator")
-    gr.Markdown("Upload a face image and audio to generate lip-synced video")
     with gr.Row():
-        with gr.Column():
-            image_input = gr.Image(type="filepath", label="📷 Face Image")
-            audio_input = gr.Audio(type="filepath", label="🎵 Audio File")
-            generate_btn = gr.Button("🚀 Generate Video", variant="primary")
-        with gr.Column():
-            video_output = gr.Video(label="📹 Generated Video")
-            status_output = gr.Textbox(label="Status", lines=2)
     generate_btn.click(
         fn=generate_video,

 import subprocess
 import os
 from pathlib import Path
+import shutil
+def setup_hallo():
+    """Setup Hallo2 on first run"""
+    if Path("hallo2_installed.txt").exists():
+        return True
+    try:
+        print("Installing Hallo2...")
+        # Clone repo
+        subprocess.run("git clone https://github.com/fudan-generative-vision/hallo2.git /tmp/hallo2", shell=True, check=True)
+        # Install requirements
+        subprocess.run("pip install -q diffusers[torch] transformers av insightface onnxruntime-gpu", shell=True, check=True)
+        # Download models
+        subprocess.run("huggingface-cli download fudan-generative-ai/hallo2 --local-dir /tmp/hallo2/pretrained_models", shell=True, check=True)
+        Path("hallo2_installed.txt").touch()
+        print("✅ Hallo2 setup complete!")
+        return True
+    except Exception as e:
+        print(f"Setup error: {e}")
+        return False
 def generate_video(image, audio):
+    """Generate lip-synced video"""
     try:
         if not image or not audio:
+            return None, "❌ Please upload both image and audio!"
+        # Setup on first run
+        if not setup_hallo():
+            return None, "❌ Setup failed"
+        # Prepare paths
+        output_dir = Path("/tmp/outputs")
+        output_dir.mkdir(exist_ok=True)
+        output_file = output_dir / "result.mp4"
+        # Copy inputs
+        img_path = "/tmp/input_img.jpg"
+        aud_path = "/tmp/input_audio.wav"
+        shutil.copy(image, img_path)
+        shutil.copy(audio, aud_path)
         # Run inference
+        print("🎬 Generating video...")
         cmd = f"""
+cd /tmp/hallo2 && python scripts/inference.py \
+  --source_image {img_path} \
+  --driving_audio {aud_path} \
+  --output {output_file} \
+  --pose_weight 1.0 \
+  --face_weight 1.0 \
+  --lip_weight 1.0
 """
         result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
+        if output_file.exists():
+            return str(output_file), "✅ Video generated successfully!"
         else:
+            return None, f"❌ Generation failed. Error: {result.stderr[:200]}"
     except Exception as e:
         return None, f"❌ Error: {str(e)}"
 # Gradio Interface
+with gr.Blocks(theme=gr.themes.Soft(), title="AI Lip Sync") as app:
+    gr.Markdown("""
+    # 🎤 AI Lip Sync Generator
+    Upload a portrait image and audio to create a realistic lip-synced video!
+    **⚡ Fast generation on T4 GPU (~30-60 seconds)**
+    """)
     with gr.Row():
+        with gr.Column(scale=1):
+            gr.Markdown("### 📤 Upload Files")
+            image_input = gr.Image(
+                type="filepath",
+                label="Portrait Image (JPG/PNG)",
+                height=300
+            )
+            audio_input = gr.Audio(
+                type="filepath",
+                label="Audio File (WAV/MP3)"
+            )
+            generate_btn = gr.Button(
+                "🚀 Generate Lip-Synced Video",
+                variant="primary",
+                size="lg"
+            )
+        with gr.Column(scale=1):
+            gr.Markdown("### 📹 Output")
+            video_output = gr.Video(
+                label="Generated Video",
+                height=400
+            )
+            status_output = gr.Textbox(
+                label="Status",
+                lines=3,
+                interactive=False
+            )
+    gr.Markdown("""
+    ---
+    ### 💡 Tips:
+    - Use clear, front-facing portrait images
+    - Best resolution: 512x512 or higher
+    - Audio length: Up to 60 seconds recommended
+    - First generation will download models (~2GB)
+    ### ⏱️ Performance:
+    - First run: ~3-5 minutes (model download)
+    - Subsequent runs: ~30-60 seconds per video
+    """)
     generate_btn.click(
         fn=generate_video,