banao-tech commited on
Commit
0cd19a9
Β·
verified Β·
1 Parent(s): 80bb301

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -116
app.py CHANGED
@@ -1,139 +1,62 @@
1
  import gradio as gr
2
- import torch
3
- import subprocess
4
- import os
5
- from pathlib import Path
6
  import shutil
7
-
8
- def setup_hallo():
9
- """Setup Hallo2 on first run"""
10
- if Path("hallo2_installed.txt").exists():
11
- return True
12
-
13
- try:
14
- print("Installing Hallo2...")
15
-
16
- # Clone repo
17
- subprocess.run("git clone https://github.com/fudan-generative-vision/hallo2.git /tmp/hallo2", shell=True, check=True)
18
-
19
- # Install requirements
20
- subprocess.run("pip install -q diffusers[torch] transformers av insightface onnxruntime-gpu", shell=True, check=True)
21
-
22
- # Download models
23
- subprocess.run("huggingface-cli download fudan-generative-ai/hallo2 --local-dir /tmp/hallo2/pretrained_models", shell=True, check=True)
24
-
25
- Path("hallo2_installed.txt").touch()
26
- print("βœ… Hallo2 setup complete!")
27
- return True
28
-
29
- except Exception as e:
30
- print(f"Setup error: {e}")
31
- return False
32
 
33
  def generate_video(image, audio):
34
- """Generate lip-synced video"""
35
  try:
36
  if not image or not audio:
37
- return None, "❌ Please upload both image and audio!"
38
 
39
- # Setup on first run
40
- if not setup_hallo():
41
- return None, "❌ Setup failed"
42
 
43
- # Prepare paths
44
- output_dir = Path("/tmp/outputs")
45
- output_dir.mkdir(exist_ok=True)
46
- output_file = output_dir / "result.mp4"
47
 
48
- # Copy inputs
49
- img_path = "/tmp/input_img.jpg"
50
- aud_path = "/tmp/input_audio.wav"
51
- shutil.copy(image, img_path)
52
- shutil.copy(audio, aud_path)
53
 
54
- # Run inference
55
- print("🎬 Generating video...")
 
 
 
 
56
 
57
- cmd = f"""
58
- cd /tmp/hallo2 && python scripts/inference.py \
59
- --source_image {img_path} \
60
- --driving_audio {aud_path} \
61
- --output {output_file} \
62
- --pose_weight 1.0 \
63
- --face_weight 1.0 \
64
- --lip_weight 1.0
65
- """
66
-
67
- result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
68
-
69
- if output_file.exists():
70
- return str(output_file), "βœ… Video generated successfully!"
71
  else:
72
- return None, f"❌ Generation failed. Error: {result.stderr[:200]}"
73
 
74
  except Exception as e:
75
  return None, f"❌ Error: {str(e)}"
76
 
77
- # Gradio Interface
78
- with gr.Blocks(theme=gr.themes.Soft(), title="AI Lip Sync") as app:
79
-
80
- gr.Markdown("""
81
- # 🎀 AI Lip Sync Generator
82
-
83
- Upload a portrait image and audio to create a realistic lip-synced video!
84
-
85
- **⚑ Fast generation on T4 GPU (~30-60 seconds)**
86
- """)
87
 
88
  with gr.Row():
89
- with gr.Column(scale=1):
90
- gr.Markdown("### πŸ“€ Upload Files")
91
- image_input = gr.Image(
92
- type="filepath",
93
- label="Portrait Image (JPG/PNG)",
94
- height=300
95
- )
96
- audio_input = gr.Audio(
97
- type="filepath",
98
- label="Audio File (WAV/MP3)"
99
- )
100
-
101
- generate_btn = gr.Button(
102
- "πŸš€ Generate Lip-Synced Video",
103
- variant="primary",
104
- size="lg"
105
- )
106
 
107
- with gr.Column(scale=1):
108
- gr.Markdown("### πŸ“Ή Output")
109
- video_output = gr.Video(
110
- label="Generated Video",
111
- height=400
112
- )
113
- status_output = gr.Textbox(
114
- label="Status",
115
- lines=3,
116
- interactive=False
117
- )
118
 
119
- gr.Markdown("""
120
- ---
121
- ### πŸ’‘ Tips:
122
- - Use clear, front-facing portrait images
123
- - Best resolution: 512x512 or higher
124
- - Audio length: Up to 60 seconds recommended
125
- - First generation will download models (~2GB)
126
 
127
- ### ⏱️ Performance:
128
- - First run: ~3-5 minutes (model download)
129
- - Subsequent runs: ~30-60 seconds per video
 
 
130
  """)
131
-
132
- generate_btn.click(
133
- fn=generate_video,
134
- inputs=[image_input, audio_input],
135
- outputs=[video_output, status_output]
136
- )
137
 
138
- if __name__ == "__main__":
139
- app.launch()
 
1
  import gradio as gr
2
+ from gradio_client import Client, handle_file
 
 
 
3
  import shutil
4
+ from pathlib import Path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  def generate_video(image, audio):
7
+ """Use existing HF Space API"""
8
  try:
9
  if not image or not audio:
10
+ return None, "❌ Please upload both files!"
11
 
12
+ print("πŸ”„ Connecting to API...")
 
 
13
 
14
+ # Use the working MuseTalk space
15
+ client = Client("TMElyralab/MuseTalk")
 
 
16
 
17
+ print("πŸ“€ Uploading files...")
 
 
 
 
18
 
19
+ result = client.predict(
20
+ audio_path=handle_file(audio),
21
+ video_path=None,
22
+ bbox_shift=0,
23
+ api_name="/predict"
24
+ )
25
 
26
+ # Result is a file path
27
+ if result and Path(result).exists():
28
+ # Copy to local output
29
+ output = "result.mp4"
30
+ shutil.copy(result, output)
31
+ return output, "βœ… Video generated successfully!"
 
 
 
 
 
 
 
 
32
  else:
33
+ return None, "❌ API returned no result"
34
 
35
  except Exception as e:
36
  return None, f"❌ Error: {str(e)}"
37
 
38
+ # Gradio UI
39
+ with gr.Blocks(theme=gr.themes.Soft()) as app:
40
+ gr.Markdown("# 🎬 AI Lip Sync Generator")
41
+ gr.Markdown("Upload a face image and audio to create lip-synced video")
 
 
 
 
 
 
42
 
43
  with gr.Row():
44
+ with gr.Column():
45
+ img = gr.Image(type="filepath", label="πŸ“· Face Image")
46
+ aud = gr.Audio(type="filepath", label="🎡 Audio File")
47
+ btn = gr.Button("πŸš€ Generate Video", variant="primary")
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
+ with gr.Column():
50
+ vid = gr.Video(label="πŸ“Ή Result")
51
+ status = gr.Textbox(label="Status", lines=2)
 
 
 
 
 
 
 
 
52
 
53
+ btn.click(generate_video, [img, aud], [vid, status])
 
 
 
 
 
 
54
 
55
+ gr.Markdown("""
56
+ ### πŸ’‘ Notes:
57
+ - Uses MuseTalk API (no local installation needed)
58
+ - Processing time: 30-90 seconds
59
+ - Best with clear front-facing images
60
  """)
 
 
 
 
 
 
61
 
62
+ app.launch()