Spaces:

banao-tech
/

model-testing

Build error

App Files Files Community

banao-tech commited on 14 days ago

Commit

0cd19a9

verified ·

1 Parent(s): 80bb301

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -116

app.py CHANGED Viewed

@@ -1,139 +1,62 @@
 import gradio as gr
-import torch
-import subprocess
-import os
-from pathlib import Path
 import shutil
-def setup_hallo():
-    """Setup Hallo2 on first run"""
-    if Path("hallo2_installed.txt").exists():
-        return True
-    try:
-        print("Installing Hallo2...")
-        # Clone repo
-        subprocess.run("git clone https://github.com/fudan-generative-vision/hallo2.git /tmp/hallo2", shell=True, check=True)
-        # Install requirements
-        subprocess.run("pip install -q diffusers[torch] transformers av insightface onnxruntime-gpu", shell=True, check=True)
-        # Download models
-        subprocess.run("huggingface-cli download fudan-generative-ai/hallo2 --local-dir /tmp/hallo2/pretrained_models", shell=True, check=True)
-        Path("hallo2_installed.txt").touch()
-        print("✅ Hallo2 setup complete!")
-        return True
-    except Exception as e:
-        print(f"Setup error: {e}")
-        return False
 def generate_video(image, audio):
-    """Generate lip-synced video"""
     try:
         if not image or not audio:
-            return None, "❌ Please upload both image and audio!"
-        # Setup on first run
-        if not setup_hallo():
-            return None, "❌ Setup failed"
-        # Prepare paths
-        output_dir = Path("/tmp/outputs")
-        output_dir.mkdir(exist_ok=True)
-        output_file = output_dir / "result.mp4"
-        # Copy inputs
-        img_path = "/tmp/input_img.jpg"
-        aud_path = "/tmp/input_audio.wav"
-        shutil.copy(image, img_path)
-        shutil.copy(audio, aud_path)
-        # Run inference
-        print("🎬 Generating video...")
-        cmd = f"""
-cd /tmp/hallo2 && python scripts/inference.py \
-  --source_image {img_path} \
-  --driving_audio {aud_path} \
-  --output {output_file} \
-  --pose_weight 1.0 \
-  --face_weight 1.0 \
-  --lip_weight 1.0
-"""
-        result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
-        if output_file.exists():
-            return str(output_file), "✅ Video generated successfully!"
         else:
-            return None, f"❌ Generation failed. Error: {result.stderr[:200]}"
     except Exception as e:
         return None, f"❌ Error: {str(e)}"
-# Gradio Interface
-with gr.Blocks(theme=gr.themes.Soft(), title="AI Lip Sync") as app:
-    gr.Markdown("""
-    # 🎤 AI Lip Sync Generator
-    Upload a portrait image and audio to create a realistic lip-synced video!
-    **⚡ Fast generation on T4 GPU (~30-60 seconds)**
-    """)
     with gr.Row():
-        with gr.Column(scale=1):
-            gr.Markdown("### 📤 Upload Files")
-            image_input = gr.Image(
-                type="filepath",
-                label="Portrait Image (JPG/PNG)",
-                height=300
-            )
-            audio_input = gr.Audio(
-                type="filepath",
-                label="Audio File (WAV/MP3)"
-            )
-            generate_btn = gr.Button(
-                "🚀 Generate Lip-Synced Video",
-                variant="primary",
-                size="lg"
-            )
-        with gr.Column(scale=1):
-            gr.Markdown("### 📹 Output")
-            video_output = gr.Video(
-                label="Generated Video",
-                height=400
-            )
-            status_output = gr.Textbox(
-                label="Status",
-                lines=3,
-                interactive=False
-            )
-    gr.Markdown("""
-    ---
-    ### 💡 Tips:
-    - Use clear, front-facing portrait images
-    - Best resolution: 512x512 or higher
-    - Audio length: Up to 60 seconds recommended
-    - First generation will download models (~2GB)
-    ### ⏱️ Performance:
-    - First run: ~3-5 minutes (model download)
-    - Subsequent runs: ~30-60 seconds per video
     """)
-    generate_btn.click(
-        fn=generate_video,
-        inputs=[image_input, audio_input],
-        outputs=[video_output, status_output]
-    )
-if __name__ == "__main__":
-    app.launch()

 import gradio as gr
+from gradio_client import Client, handle_file
 import shutil
+from pathlib import Path
 def generate_video(image, audio):
+    """Use existing HF Space API"""
     try:
         if not image or not audio:
+            return None, "❌ Please upload both files!"
+        print("🔄 Connecting to API...")
+        # Use the working MuseTalk space
+        client = Client("TMElyralab/MuseTalk")
+        print("📤 Uploading files...")
+        result = client.predict(
+            audio_path=handle_file(audio),
+            video_path=None,
+            bbox_shift=0,
+            api_name="/predict"
+        )
+        # Result is a file path
+        if result and Path(result).exists():
+            # Copy to local output
+            output = "result.mp4"
+            shutil.copy(result, output)
+            return output, "✅ Video generated successfully!"
         else:
+            return None, "❌ API returned no result"
     except Exception as e:
         return None, f"❌ Error: {str(e)}"
+# Gradio UI
+with gr.Blocks(theme=gr.themes.Soft()) as app:
+    gr.Markdown("# 🎬 AI Lip Sync Generator")
+    gr.Markdown("Upload a face image and audio to create lip-synced video")
     with gr.Row():
+        with gr.Column():
+            img = gr.Image(type="filepath", label="📷 Face Image")
+            aud = gr.Audio(type="filepath", label="🎵 Audio File")
+            btn = gr.Button("🚀 Generate Video", variant="primary")
+        with gr.Column():
+            vid = gr.Video(label="📹 Result")
+            status = gr.Textbox(label="Status", lines=2)
+    btn.click(generate_video, [img, aud], [vid, status])
+    gr.Markdown("""
+    ### 💡 Notes:
+    - Uses MuseTalk API (no local installation needed)
+    - Processing time: 30-90 seconds
+    - Best with clear front-facing images
     """)
+app.launch()