Upload folder using huggingface_hub
Browse files- hf-space-eve/README.md +23 -0
- hf-space-eve/app.py +138 -0
- hf-space-eve/requirements.txt +5 -0
hf-space-eve/README.md
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: EVE - Talking Avatar
|
| 3 |
+
emoji: π©
|
| 4 |
+
colorFrom: purple
|
| 5 |
+
colorTo: indigo
|
| 6 |
+
sdk: gradio
|
| 7 |
+
sdk_version: 5.23.0
|
| 8 |
+
app_file: app.py
|
| 9 |
+
pinned: true
|
| 10 |
+
license: mit
|
| 11 |
+
---
|
| 12 |
+
|
| 13 |
+
# EVE - Talking Avatar
|
| 14 |
+
|
| 15 |
+
Audio-driven talking avatar powered by **Wav2Lip** + **Edge TTS**.
|
| 16 |
+
|
| 17 |
+
## Pipeline
|
| 18 |
+
Text β Edge TTS (WAV) β Wav2Lip (HF ZeroGPU) β Animated Video
|
| 19 |
+
|
| 20 |
+
## Credits
|
| 21 |
+
- **Wav2Lip**: Rudrabha et al. (audio-driven lip sync)
|
| 22 |
+
- **Hallo4**: Fudan University Generative Vision Lab (SIGGRAPH Asia 2025)
|
| 23 |
+
- **Edge TTS**: Microsoft (en-US-AvaMultilingualNeural)
|
hf-space-eve/app.py
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""EVE β Talking Avatar Demo.
|
| 2 |
+
|
| 3 |
+
Pipeline: Text β Edge TTS (WAV) β Wav2Lip (HF ZeroGPU) β Animated Video
|
| 4 |
+
|
| 5 |
+
Uses the proven Wav2Lip pipeline for fast lip-sync animation.
|
| 6 |
+
Hallo4 (SIGGRAPH Asia 2025) available via separate L40S GPU job.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import asyncio
|
| 10 |
+
import os
|
| 11 |
+
import tempfile
|
| 12 |
+
|
| 13 |
+
import cv2
|
| 14 |
+
import gradio as gr
|
| 15 |
+
import numpy as np
|
| 16 |
+
import soundfile as sf
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
EDGE_TTS_VOICE = "en-US-AvaMultilingualNeural"
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
async def generate_tts(text: str) -> str:
|
| 23 |
+
"""Text β WAV via Edge TTS."""
|
| 24 |
+
import edge_tts
|
| 25 |
+
|
| 26 |
+
mp3_path = os.path.join(tempfile.gettempdir(), "eve_tts.mp3")
|
| 27 |
+
wav_path = os.path.join(tempfile.gettempdir(), "eve_tts.wav")
|
| 28 |
+
|
| 29 |
+
communicate = edge_tts.Communicate(text, EDGE_TTS_VOICE)
|
| 30 |
+
await communicate.save(mp3_path)
|
| 31 |
+
|
| 32 |
+
data, sr = sf.read(mp3_path)
|
| 33 |
+
sf.write(wav_path, data, sr, subtype="PCM_16")
|
| 34 |
+
return wav_path
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def animate_with_wav2lip(image_path: str, wav_path: str) -> str | None:
|
| 38 |
+
"""Image + WAV β animated video via Wav2Lip HF Space."""
|
| 39 |
+
from gradio_client import Client, handle_file
|
| 40 |
+
|
| 41 |
+
client = Client("pragnakalp/Wav2lip-ZeroGPU")
|
| 42 |
+
result = client.predict(
|
| 43 |
+
input_image=handle_file(image_path),
|
| 44 |
+
input_audio=handle_file(wav_path),
|
| 45 |
+
api_name="/run_infrence",
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
video_path = result.get("video", result) if isinstance(result, dict) else result
|
| 49 |
+
if video_path and os.path.exists(video_path):
|
| 50 |
+
return video_path
|
| 51 |
+
return None
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def eve_speak(text: str, image, progress=gr.Progress()) -> str | None:
|
| 55 |
+
"""Main pipeline: Text β TTS β Wav2Lip β Video."""
|
| 56 |
+
if not text.strip():
|
| 57 |
+
return None
|
| 58 |
+
|
| 59 |
+
# Save uploaded image
|
| 60 |
+
if image is not None:
|
| 61 |
+
img_path = os.path.join(tempfile.gettempdir(), "eve_ref.png")
|
| 62 |
+
if isinstance(image, np.ndarray):
|
| 63 |
+
img = cv2.resize(image, (512, 512))
|
| 64 |
+
cv2.imwrite(img_path, cv2.cvtColor(img, cv2.COLOR_RGB2BGR))
|
| 65 |
+
elif isinstance(image, str):
|
| 66 |
+
img = cv2.imread(image)
|
| 67 |
+
img = cv2.resize(img, (512, 512))
|
| 68 |
+
cv2.imwrite(img_path, img)
|
| 69 |
+
else:
|
| 70 |
+
# Use default Eve
|
| 71 |
+
img_path = os.path.join(os.path.dirname(__file__), "eve-512.png")
|
| 72 |
+
if not os.path.exists(img_path):
|
| 73 |
+
return None
|
| 74 |
+
|
| 75 |
+
progress(0.2, desc="Generating voice...")
|
| 76 |
+
wav_path = asyncio.run(generate_tts(text))
|
| 77 |
+
|
| 78 |
+
progress(0.4, desc="Animating face with Wav2Lip...")
|
| 79 |
+
try:
|
| 80 |
+
video = animate_with_wav2lip(img_path, wav_path)
|
| 81 |
+
if video:
|
| 82 |
+
progress(1.0, desc="Done!")
|
| 83 |
+
return video
|
| 84 |
+
except Exception as e:
|
| 85 |
+
progress(1.0, desc=f"Error: {str(e)[:80]}")
|
| 86 |
+
print(f"Wav2Lip error: {e}")
|
| 87 |
+
|
| 88 |
+
return None
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
# ββ Gradio UI ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 92 |
+
with gr.Blocks(
|
| 93 |
+
title="EVE - Talking Avatar",
|
| 94 |
+
theme=gr.themes.Base(primary_hue="violet", neutral_hue="slate"),
|
| 95 |
+
css="""
|
| 96 |
+
.eve-title { text-align: center; font-size: 2.5em; font-weight: 200;
|
| 97 |
+
letter-spacing: 0.3em; color: #a78bfa; margin: 20px 0; }
|
| 98 |
+
.eve-sub { text-align: center; color: #666; margin-bottom: 20px; }
|
| 99 |
+
""",
|
| 100 |
+
) as demo:
|
| 101 |
+
gr.HTML("<h1 class='eve-title'>E V E</h1>")
|
| 102 |
+
gr.HTML("<p class='eve-sub'>Audio-driven talking avatar | Edge TTS + Wav2Lip</p>")
|
| 103 |
+
|
| 104 |
+
with gr.Row():
|
| 105 |
+
with gr.Column(scale=2):
|
| 106 |
+
output_video = gr.Video(label="Eve", autoplay=True, height=500)
|
| 107 |
+
with gr.Column(scale=1):
|
| 108 |
+
ref_image = gr.Image(
|
| 109 |
+
label="Reference Face (or use default Eve)",
|
| 110 |
+
type="numpy",
|
| 111 |
+
value=os.path.join(os.path.dirname(__file__), "eve-512.png")
|
| 112 |
+
if os.path.exists(os.path.join(os.path.dirname(__file__), "eve-512.png"))
|
| 113 |
+
else None,
|
| 114 |
+
)
|
| 115 |
+
text_input = gr.Textbox(
|
| 116 |
+
label="Talk to Eve",
|
| 117 |
+
placeholder="Type something for Eve to say...",
|
| 118 |
+
lines=3,
|
| 119 |
+
value="Hello! I am Eve, your digital companion. I am so happy to meet you!",
|
| 120 |
+
)
|
| 121 |
+
generate_btn = gr.Button("Make Eve Speak", variant="primary", size="lg")
|
| 122 |
+
gr.HTML(
|
| 123 |
+
"<div style='margin-top:15px;padding:10px;background:#1a1a2e;"
|
| 124 |
+
"border-radius:8px;font-size:0.8em;color:#888;'>"
|
| 125 |
+
"<b>Pipeline:</b> Text β Edge TTS β Wav2Lip (GPU) β Video<br>"
|
| 126 |
+
"<b>Voice:</b> en-US-AvaMultilingualNeural<br>"
|
| 127 |
+
"<b>Credits:</b> Wav2Lip (Rudrabha et al.), "
|
| 128 |
+
"Hallo4 (Fudan Generative Vision, SIGGRAPH Asia 2025)"
|
| 129 |
+
"</div>"
|
| 130 |
+
)
|
| 131 |
+
|
| 132 |
+
generate_btn.click(
|
| 133 |
+
fn=eve_speak,
|
| 134 |
+
inputs=[text_input, ref_image],
|
| 135 |
+
outputs=[output_video],
|
| 136 |
+
)
|
| 137 |
+
|
| 138 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|
hf-space-eve/requirements.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio_client
|
| 2 |
+
edge-tts
|
| 3 |
+
soundfile
|
| 4 |
+
opencv-python-headless
|
| 5 |
+
numpy
|