AIBRUH commited on
Commit
fcd08af
Β·
verified Β·
1 Parent(s): db72923

Upload folder using huggingface_hub

Browse files
hf-space-eve/README.md ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: EVE - Talking Avatar
3
+ emoji: πŸ‘©
4
+ colorFrom: purple
5
+ colorTo: indigo
6
+ sdk: gradio
7
+ sdk_version: 5.23.0
8
+ app_file: app.py
9
+ pinned: true
10
+ license: mit
11
+ ---
12
+
13
+ # EVE - Talking Avatar
14
+
15
+ Audio-driven talking avatar powered by **Wav2Lip** + **Edge TTS**.
16
+
17
+ ## Pipeline
18
+ Text β†’ Edge TTS (WAV) β†’ Wav2Lip (HF ZeroGPU) β†’ Animated Video
19
+
20
+ ## Credits
21
+ - **Wav2Lip**: Rudrabha et al. (audio-driven lip sync)
22
+ - **Hallo4**: Fudan University Generative Vision Lab (SIGGRAPH Asia 2025)
23
+ - **Edge TTS**: Microsoft (en-US-AvaMultilingualNeural)
hf-space-eve/app.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """EVE β€” Talking Avatar Demo.
2
+
3
+ Pipeline: Text β†’ Edge TTS (WAV) β†’ Wav2Lip (HF ZeroGPU) β†’ Animated Video
4
+
5
+ Uses the proven Wav2Lip pipeline for fast lip-sync animation.
6
+ Hallo4 (SIGGRAPH Asia 2025) available via separate L40S GPU job.
7
+ """
8
+
9
+ import asyncio
10
+ import os
11
+ import tempfile
12
+
13
+ import cv2
14
+ import gradio as gr
15
+ import numpy as np
16
+ import soundfile as sf
17
+
18
+
19
+ EDGE_TTS_VOICE = "en-US-AvaMultilingualNeural"
20
+
21
+
22
+ async def generate_tts(text: str) -> str:
23
+ """Text β†’ WAV via Edge TTS."""
24
+ import edge_tts
25
+
26
+ mp3_path = os.path.join(tempfile.gettempdir(), "eve_tts.mp3")
27
+ wav_path = os.path.join(tempfile.gettempdir(), "eve_tts.wav")
28
+
29
+ communicate = edge_tts.Communicate(text, EDGE_TTS_VOICE)
30
+ await communicate.save(mp3_path)
31
+
32
+ data, sr = sf.read(mp3_path)
33
+ sf.write(wav_path, data, sr, subtype="PCM_16")
34
+ return wav_path
35
+
36
+
37
+ def animate_with_wav2lip(image_path: str, wav_path: str) -> str | None:
38
+ """Image + WAV β†’ animated video via Wav2Lip HF Space."""
39
+ from gradio_client import Client, handle_file
40
+
41
+ client = Client("pragnakalp/Wav2lip-ZeroGPU")
42
+ result = client.predict(
43
+ input_image=handle_file(image_path),
44
+ input_audio=handle_file(wav_path),
45
+ api_name="/run_infrence",
46
+ )
47
+
48
+ video_path = result.get("video", result) if isinstance(result, dict) else result
49
+ if video_path and os.path.exists(video_path):
50
+ return video_path
51
+ return None
52
+
53
+
54
+ def eve_speak(text: str, image, progress=gr.Progress()) -> str | None:
55
+ """Main pipeline: Text β†’ TTS β†’ Wav2Lip β†’ Video."""
56
+ if not text.strip():
57
+ return None
58
+
59
+ # Save uploaded image
60
+ if image is not None:
61
+ img_path = os.path.join(tempfile.gettempdir(), "eve_ref.png")
62
+ if isinstance(image, np.ndarray):
63
+ img = cv2.resize(image, (512, 512))
64
+ cv2.imwrite(img_path, cv2.cvtColor(img, cv2.COLOR_RGB2BGR))
65
+ elif isinstance(image, str):
66
+ img = cv2.imread(image)
67
+ img = cv2.resize(img, (512, 512))
68
+ cv2.imwrite(img_path, img)
69
+ else:
70
+ # Use default Eve
71
+ img_path = os.path.join(os.path.dirname(__file__), "eve-512.png")
72
+ if not os.path.exists(img_path):
73
+ return None
74
+
75
+ progress(0.2, desc="Generating voice...")
76
+ wav_path = asyncio.run(generate_tts(text))
77
+
78
+ progress(0.4, desc="Animating face with Wav2Lip...")
79
+ try:
80
+ video = animate_with_wav2lip(img_path, wav_path)
81
+ if video:
82
+ progress(1.0, desc="Done!")
83
+ return video
84
+ except Exception as e:
85
+ progress(1.0, desc=f"Error: {str(e)[:80]}")
86
+ print(f"Wav2Lip error: {e}")
87
+
88
+ return None
89
+
90
+
91
+ # ── Gradio UI ────────────────────────────────────────────────────────────────
92
+ with gr.Blocks(
93
+ title="EVE - Talking Avatar",
94
+ theme=gr.themes.Base(primary_hue="violet", neutral_hue="slate"),
95
+ css="""
96
+ .eve-title { text-align: center; font-size: 2.5em; font-weight: 200;
97
+ letter-spacing: 0.3em; color: #a78bfa; margin: 20px 0; }
98
+ .eve-sub { text-align: center; color: #666; margin-bottom: 20px; }
99
+ """,
100
+ ) as demo:
101
+ gr.HTML("<h1 class='eve-title'>E V E</h1>")
102
+ gr.HTML("<p class='eve-sub'>Audio-driven talking avatar | Edge TTS + Wav2Lip</p>")
103
+
104
+ with gr.Row():
105
+ with gr.Column(scale=2):
106
+ output_video = gr.Video(label="Eve", autoplay=True, height=500)
107
+ with gr.Column(scale=1):
108
+ ref_image = gr.Image(
109
+ label="Reference Face (or use default Eve)",
110
+ type="numpy",
111
+ value=os.path.join(os.path.dirname(__file__), "eve-512.png")
112
+ if os.path.exists(os.path.join(os.path.dirname(__file__), "eve-512.png"))
113
+ else None,
114
+ )
115
+ text_input = gr.Textbox(
116
+ label="Talk to Eve",
117
+ placeholder="Type something for Eve to say...",
118
+ lines=3,
119
+ value="Hello! I am Eve, your digital companion. I am so happy to meet you!",
120
+ )
121
+ generate_btn = gr.Button("Make Eve Speak", variant="primary", size="lg")
122
+ gr.HTML(
123
+ "<div style='margin-top:15px;padding:10px;background:#1a1a2e;"
124
+ "border-radius:8px;font-size:0.8em;color:#888;'>"
125
+ "<b>Pipeline:</b> Text β†’ Edge TTS β†’ Wav2Lip (GPU) β†’ Video<br>"
126
+ "<b>Voice:</b> en-US-AvaMultilingualNeural<br>"
127
+ "<b>Credits:</b> Wav2Lip (Rudrabha et al.), "
128
+ "Hallo4 (Fudan Generative Vision, SIGGRAPH Asia 2025)"
129
+ "</div>"
130
+ )
131
+
132
+ generate_btn.click(
133
+ fn=eve_speak,
134
+ inputs=[text_input, ref_image],
135
+ outputs=[output_video],
136
+ )
137
+
138
+ demo.launch(server_name="0.0.0.0", server_port=7860)
hf-space-eve/requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio_client
2
+ edge-tts
3
+ soundfile
4
+ opencv-python-headless
5
+ numpy