- Dockerfile +81 -0
- README.md +1 -4
- app.py +180 -186
- requirements.txt +13 -13
Dockerfile
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM pytorch/pytorch:2.0.1-cuda11.7-cudnn8-devel
|
| 2 |
+
|
| 3 |
+
ENV DEBIAN_FRONTEND=noninteractive
|
| 4 |
+
|
| 5 |
+
# Set the MKL_THREADING_LAYER environment variable to GNU
|
| 6 |
+
ENV MKL_THREADING_LAYER=GNU
|
| 7 |
+
|
| 8 |
+
# Install system dependencies including those required for dlib
|
| 9 |
+
RUN apt-get update && apt-get install -y \
|
| 10 |
+
git \
|
| 11 |
+
wget \
|
| 12 |
+
libgl1-mesa-glx \
|
| 13 |
+
libglib2.0-0 \
|
| 14 |
+
ffmpeg \
|
| 15 |
+
libx264-dev \
|
| 16 |
+
build-essential \
|
| 17 |
+
cmake \
|
| 18 |
+
libopenblas-dev \
|
| 19 |
+
liblapack-dev \
|
| 20 |
+
libx11-dev \
|
| 21 |
+
libgtk-3-dev \
|
| 22 |
+
python3-dev
|
| 23 |
+
|
| 24 |
+
RUN useradd -m -u 1000 user
|
| 25 |
+
|
| 26 |
+
USER user
|
| 27 |
+
|
| 28 |
+
ENV HOME=/home/user \
|
| 29 |
+
PATH=/home/user/.local/bin:$PATH \
|
| 30 |
+
PYTHONPATH=$HOME/app \
|
| 31 |
+
PYTHONUNBUFFERED=1 \
|
| 32 |
+
GRADIO_ALLOW_FLAGGING=never \
|
| 33 |
+
GRADIO_NUM_PORTS=1 \
|
| 34 |
+
GRADIO_SERVER_NAME=0.0.0.0 \
|
| 35 |
+
GRADIO_THEME=huggingface \
|
| 36 |
+
GRADIO_SHARE=False \
|
| 37 |
+
SYSTEM=spaces
|
| 38 |
+
|
| 39 |
+
# Set the working directory to the user's home directory
|
| 40 |
+
WORKDIR $HOME/app
|
| 41 |
+
|
| 42 |
+
# Print detailed Python information
|
| 43 |
+
RUN python -c "import sys; print(f'Python {sys.version}')"
|
| 44 |
+
|
| 45 |
+
# Clone the repository
|
| 46 |
+
RUN git clone -b dev https://github.com/fffiloni/dreamtalk $HOME/app
|
| 47 |
+
|
| 48 |
+
# Download model checkpoints
|
| 49 |
+
RUN wget https://huggingface.co/camenduru/dreamtalk/resolve/main/damo/dreamtalk/checkpoints/denoising_network.pth -O $HOME/app/checkpoints/denoising_network.pth
|
| 50 |
+
RUN wget https://huggingface.co/camenduru/dreamtalk/resolve/main/damo/dreamtalk/checkpoints/renderer.pt -O $HOME/app/checkpoints/renderer.pt
|
| 51 |
+
|
| 52 |
+
# Install Python dependencies
|
| 53 |
+
RUN pip install --no-cache-dir \
|
| 54 |
+
urllib3==1.26.6 \
|
| 55 |
+
transformers==4.28.1 \
|
| 56 |
+
yacs==0.1.8 \
|
| 57 |
+
scipy==1.10.1 \
|
| 58 |
+
scikit-image==0.20.0 \
|
| 59 |
+
scikit-learn==1.2.2 \
|
| 60 |
+
PyYAML==6.0 \
|
| 61 |
+
Pillow==9.5.0 \
|
| 62 |
+
numpy==1.24.2 \
|
| 63 |
+
opencv-python==4.7.0.72 \
|
| 64 |
+
imageio==2.27.0 \
|
| 65 |
+
ffmpeg-python \
|
| 66 |
+
av==11.0.0 \
|
| 67 |
+
moviepy==1.0.3
|
| 68 |
+
|
| 69 |
+
RUN pip install gradio
|
| 70 |
+
|
| 71 |
+
# Install dlib with verbose output
|
| 72 |
+
RUN pip install --verbose --no-cache-dir dlib-bin
|
| 73 |
+
|
| 74 |
+
COPY app.py .
|
| 75 |
+
|
| 76 |
+
# Set the environment variable to specify the GPU device
|
| 77 |
+
ENV CUDA_DEVICE_ORDER=PCI_BUS_ID
|
| 78 |
+
ENV CUDA_VISIBLE_DEVICES=0
|
| 79 |
+
|
| 80 |
+
# Run your app.py script
|
| 81 |
+
CMD ["python", "app.py"]
|
README.md
CHANGED
|
@@ -3,10 +3,7 @@ title: Tkvn
|
|
| 3 |
emoji: 💬
|
| 4 |
colorFrom: yellow
|
| 5 |
colorTo: purple
|
| 6 |
-
sdk:
|
| 7 |
-
sdk_version: 5.0.1
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
---
|
| 11 |
-
|
| 12 |
-
An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).
|
|
|
|
| 3 |
emoji: 💬
|
| 4 |
colorFrom: yellow
|
| 5 |
colorTo: purple
|
| 6 |
+
sdk: docker
|
|
|
|
| 7 |
app_file: app.py
|
| 8 |
pinned: false
|
| 9 |
---
|
|
|
|
|
|
app.py
CHANGED
|
@@ -1,200 +1,194 @@
|
|
| 1 |
-
import os
|
| 2 |
import gradio as gr
|
| 3 |
-
from
|
| 4 |
-
import
|
| 5 |
-
from pixeltable.iterators import FrameIterator
|
| 6 |
-
from pixeltable.functions.video import extract_audio
|
| 7 |
-
from pixeltable.functions.audio import get_metadata
|
| 8 |
-
|
| 9 |
-
# Lưu khóa API Groq
|
| 10 |
-
if 'GROQ_API_KEY' not in os.environ:
|
| 11 |
-
os.environ['GROQ_API_KEY'] = input('Nhập khóa API Groq của bạn: ')
|
| 12 |
-
|
| 13 |
-
MAX_VIDEO_SIZE_MB = 35
|
| 14 |
-
CONCURRENCY_LIMIT = 1
|
| 15 |
-
|
| 16 |
-
# Hàm gọi Groq API
|
| 17 |
-
def call_groq_api(prompt, model_name, max_tokens=500):
|
| 18 |
-
client = Groq(api_key=os.environ.get('GROQ_API_KEY'))
|
| 19 |
-
|
| 20 |
-
response = client.chat.completions.create(
|
| 21 |
-
model=model_name,
|
| 22 |
-
messages=prompt,
|
| 23 |
-
max_tokens=max_tokens
|
| 24 |
-
)
|
| 25 |
-
return response.choices[0].message.content
|
| 26 |
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
if not video_file:
|
| 32 |
-
return "Vui lòng tải lên tệp video.", None, None, None
|
| 33 |
-
|
| 34 |
try:
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
return f"Tệp video lớn hơn {MAX_VIDEO_SIZE_MB} MB. Vui lòng tải lên tệp nhỏ hơn.", None, None, None
|
| 39 |
-
|
| 40 |
-
progress(0.2, desc="Đang tạo bảng và cột tính toán...")
|
| 41 |
-
|
| 42 |
-
# Tạo bảng, view và cột tính toán
|
| 43 |
-
pxt.drop_dir('directory', force=True)
|
| 44 |
-
pxt.create_dir('directory')
|
| 45 |
-
|
| 46 |
-
t = pxt.create_table(
|
| 47 |
-
'directory.video_table', {
|
| 48 |
-
"video": pxt.Video,
|
| 49 |
-
"sm_type": pxt.String
|
| 50 |
-
}
|
| 51 |
-
)
|
| 52 |
-
|
| 53 |
-
frames_view = pxt.create_view(
|
| 54 |
-
"directory.frames",
|
| 55 |
-
t,
|
| 56 |
-
iterator=FrameIterator.create(video=t.video, fps=1)
|
| 57 |
)
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
# Định nghĩa hàm UDF để tạo prompt cho LLM
|
| 68 |
-
@pxt.udf
|
| 69 |
-
def prompt(A: str, B: str) -> list[dict]:
|
| 70 |
-
system_msg = 'Bạn là chuyên gia trong việc tạo nội dung mạng xã hội và tạo bài đăng hiệu quả dựa trên nội dung người dùng. Tuân thủ quy tắc và ràng buộc của nền tảng mạng xã hội.'
|
| 71 |
-
user_msg = f'A: "{A}" \n B: "{B}"'
|
| 72 |
-
return [
|
| 73 |
-
{'role': 'system', 'content': system_msg},
|
| 74 |
-
{'role': 'user', 'content': user_msg}
|
| 75 |
-
]
|
| 76 |
-
|
| 77 |
-
# Áp dụng UDF để tạo cột mới
|
| 78 |
-
t.add_computed_column(message=prompt(t.sm_type, t.transcription_text))
|
| 79 |
-
|
| 80 |
-
progress(0.6, desc="Đang gọi mô hình ngôn ngữ lớn...")
|
| 81 |
-
|
| 82 |
-
# Gọi Groq API để tạo bài đăng
|
| 83 |
-
t.add_computed_column(response=call_groq_api(messages=t.message, model_name="llama3-70b-8192", max_tokens=500))
|
| 84 |
-
t.add_computed_column(answer=t.response.choices[0].message.content)
|
| 85 |
-
|
| 86 |
-
progress(0.8, desc="Đang chèn video và trích xuất dữ liệu...")
|
| 87 |
-
|
| 88 |
-
# Chèn video vào bảng
|
| 89 |
-
t.insert([{
|
| 90 |
-
"video": video_file,
|
| 91 |
-
"sm_type": social_media_type
|
| 92 |
-
}])
|
| 93 |
-
|
| 94 |
-
# Truy xuất kết quả
|
| 95 |
-
social_media_post = t.select(t.answer).tail(1)['answer'][0]
|
| 96 |
-
audio = t.select(t.audio).tail(1)['audio'][0]
|
| 97 |
-
thumbnails = frames_view.select(frames_view.frame).tail(6)['frame']
|
| 98 |
-
transcription_text = t.select(t.transcription_text).tail(1)['transcription_text'][0]
|
| 99 |
-
|
| 100 |
-
# Hiển thị kết quả
|
| 101 |
-
return social_media_post, thumbnails, transcription_text, audio
|
| 102 |
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
def gradio_interface():
|
| 108 |
-
with gr.Blocks(theme=gr.themes.Base()) as demo:
|
| 109 |
-
gr.Markdown(
|
| 110 |
-
"""
|
| 111 |
-
📹 Công cụ Tạo Bài Đăng Mạng Xã Hội từ Video
|
| 112 |
-
Biến video của bạn thành nội dung mạng xã hội hấp dẫn b���ng AI
|
| 113 |
-
"""
|
| 114 |
-
)
|
| 115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
with gr.Row():
|
| 117 |
-
# Cột bên trái - Điều khiển đầu vào
|
| 118 |
with gr.Column():
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
with gr.Group():
|
| 126 |
-
gr.Markdown("### 🎯 Nền tảng Mục tiêu")
|
| 127 |
-
social_media_type = gr.Radio(
|
| 128 |
choices=[
|
| 129 |
-
"
|
| 130 |
-
"
|
| 131 |
-
"
|
| 132 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
],
|
| 134 |
-
value=
|
| 135 |
-
label="Chọn nơi bạn muốn chia sẻ:",
|
| 136 |
-
interactive=True
|
| 137 |
)
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
)
|
| 145 |
-
|
| 146 |
-
|
| 147 |
with gr.Column():
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
)
|
| 162 |
-
|
| 163 |
-
gr.Markdown("### 📝 Văn bản Đã Trích xuất")
|
| 164 |
-
df_output = gr.Textbox(
|
| 165 |
-
label="Văn bản Đã Trích xuất",
|
| 166 |
-
show_copy_button=True,
|
| 167 |
-
lines=8
|
| 168 |
-
)
|
| 169 |
-
|
| 170 |
-
gr.Markdown("### 🎵 Âm thanh Đã Trích xuất")
|
| 171 |
-
audio = gr.Audio(
|
| 172 |
-
label="Âm thanh",
|
| 173 |
-
show_download_button=True,
|
| 174 |
-
type="filepath"
|
| 175 |
-
)
|
| 176 |
-
|
| 177 |
-
# Kết nối nút nhấn với hàm xử lý
|
| 178 |
-
generate_btn.click(
|
| 179 |
-
fn=process_and_generate_post,
|
| 180 |
-
inputs=[video_input, social_media_type],
|
| 181 |
-
outputs=[output, thumbnail, df_output, audio],
|
| 182 |
-
api_name="generate",
|
| 183 |
-
show_progress="full",
|
| 184 |
-
trigger_mode='once'
|
| 185 |
-
)
|
| 186 |
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
# Khởi chạy giao diện Gradio
|
| 190 |
-
if __name__ == "__main__":
|
| 191 |
-
demo = gradio_interface()
|
| 192 |
-
demo.launch(
|
| 193 |
-
server_name="0.0.0.0", # Cho phép truy cập từ bên ngoài
|
| 194 |
-
server_port=7860, # Cổng mặc định của Gradio
|
| 195 |
-
share=False, # Tắt tính năng chia sẻ
|
| 196 |
-
show_api=False, # Ẩn tài liệu API
|
| 197 |
-
show_error=False, # Ẩn lỗi chi tiết
|
| 198 |
-
ssl_verify=True, # Bật xác minh SSL
|
| 199 |
-
quiet=True # Giảm đầu ra console
|
| 200 |
-
)
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
from moviepy.editor import VideoFileClip
|
| 3 |
+
import datetime
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
+
import subprocess
|
| 6 |
+
import sys
|
| 7 |
+
|
| 8 |
+
def install_dlib():
|
|
|
|
|
|
|
|
|
|
| 9 |
try:
|
| 10 |
+
subprocess.run(
|
| 11 |
+
[sys.executable, "-m", "pip", "install", "--no-cache-dir", "dlib==19.24.1", "-vvv"],
|
| 12 |
+
check=True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
)
|
| 14 |
+
print("dlib successfully installed!")
|
| 15 |
+
except subprocess.CalledProcessError as e:
|
| 16 |
+
print(f"Failed to install dlib: {e}")
|
| 17 |
+
|
| 18 |
+
#install_dlib()
|
| 19 |
+
|
| 20 |
+
def convert_to_mp4_with_aac(input_path, output_path):
|
| 21 |
+
# Load the video
|
| 22 |
+
video = VideoFileClip(input_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
+
# Set the output format to mp4 with AAC codec
|
| 25 |
+
video.write_videofile(output_path, codec="libx264", audio_codec="aac")
|
| 26 |
+
|
| 27 |
+
return output_path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
+
|
| 30 |
+
# Function to check if the audio file path exists in the list
|
| 31 |
+
def check_file_exists(file_path, audio_list):
|
| 32 |
+
return file_path in audio_list
|
| 33 |
+
|
| 34 |
+
def load_audio(audio_listed):
|
| 35 |
+
if audio_listed is None:
|
| 36 |
+
return None
|
| 37 |
+
else:
|
| 38 |
+
return f"data/audio/{audio_listed}"
|
| 39 |
+
|
| 40 |
+
def execute_command(command: str) -> None:
|
| 41 |
+
subprocess.run(command, check=True)
|
| 42 |
+
|
| 43 |
+
def infer(audio_input, image_path, emotional_style):
|
| 44 |
+
# Get the current timestamp
|
| 45 |
+
timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
|
| 46 |
+
|
| 47 |
+
output_name = f"lipsynced_result_{timestamp}"
|
| 48 |
+
|
| 49 |
+
command = [
|
| 50 |
+
f"python",
|
| 51 |
+
f"inference_for_demo_video.py",
|
| 52 |
+
f"--wav_path={audio_input}",
|
| 53 |
+
f"--style_clip_path=data/style_clip/3DMM/{emotional_style}",
|
| 54 |
+
f"--pose_path=data/pose/RichardShelby_front_neutral_level1_001.mat",
|
| 55 |
+
f"--image_path={image_path}",
|
| 56 |
+
f"--cfg_scale=1.0",
|
| 57 |
+
f"--max_gen_len=30",
|
| 58 |
+
f"--output_name={output_name}"
|
| 59 |
+
]
|
| 60 |
+
|
| 61 |
+
execute_command(command)
|
| 62 |
+
|
| 63 |
+
# Convert video to compatible codecs
|
| 64 |
+
input_file = f"output_video/{output_name}.mp4"
|
| 65 |
+
output_file = f"{output_name}.mp4"
|
| 66 |
+
|
| 67 |
+
result = convert_to_mp4_with_aac(input_file, output_file)
|
| 68 |
+
|
| 69 |
+
return result
|
| 70 |
+
|
| 71 |
+
css="""
|
| 72 |
+
#col-container{
|
| 73 |
+
margin: 0 auto;
|
| 74 |
+
max-width: 940px;
|
| 75 |
+
}
|
| 76 |
+
#project-links{
|
| 77 |
+
margin: 0 0 12px !important;
|
| 78 |
+
column-gap: 8px;
|
| 79 |
+
display: flex;
|
| 80 |
+
justify-content: center;
|
| 81 |
+
flex-wrap: nowrap;
|
| 82 |
+
flex-direction: row;
|
| 83 |
+
align-items: center;
|
| 84 |
+
}
|
| 85 |
+
#run-btn{
|
| 86 |
+
border: var(--button-border-width) solid var(--button-primary-border-color);
|
| 87 |
+
background: var(--button-primary-background-fill);
|
| 88 |
+
color: var(--button-primary-text-color);
|
| 89 |
+
}
|
| 90 |
+
#run-btn:hover{
|
| 91 |
+
border-color: var(--button-primary-border-color-hover);
|
| 92 |
+
background: var(--button-primary-background-fill-hover);
|
| 93 |
+
color: var(--button-primary-text-color-hover);
|
| 94 |
+
}
|
| 95 |
+
"""
|
| 96 |
+
with gr.Blocks(css=css) as demo:
|
| 97 |
+
with gr.Column(elem_id="col-container"):
|
| 98 |
+
gr.HTML("""
|
| 99 |
+
<h2 style="text-align: center;">DreamTalk</h2>
|
| 100 |
+
<p style="text-align: center;">When Expressive Talking Head Generation Meets Diffusion Probabilistic Models</p>
|
| 101 |
+
<p style="margin:12px auto;display: flex;justify-content: center;">
|
| 102 |
+
<a href="https://huggingface.co/spaces/fffiloni/dreamtalk?duplicate=true"><img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-lg.svg" alt="Duplicate this Space"></a>
|
| 103 |
+
</p>
|
| 104 |
+
|
| 105 |
+
""")
|
| 106 |
with gr.Row():
|
|
|
|
| 107 |
with gr.Column():
|
| 108 |
+
image_path = gr.Image(label="Image", type="filepath", sources=["upload"])
|
| 109 |
+
audio_input = gr.Audio(label="Audio input", type="filepath", sources=["upload"], value="data/audio/acknowledgement_english.m4a")
|
| 110 |
+
with gr.Row():
|
| 111 |
+
audio_list = gr.Dropdown(
|
| 112 |
+
label="Choose an audio (optional)",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
choices=[
|
| 114 |
+
"German1.wav", "German2.wav", "German3.wav", "German4.wav",
|
| 115 |
+
"acknowledgement_chinese.m4a", "acknowledgement_english.m4a",
|
| 116 |
+
"chinese1_haierlizhi.wav", "chinese2_guanyu.wav",
|
| 117 |
+
"french1.wav", "french2.wav", "french3.wav",
|
| 118 |
+
"italian1.wav", "italian2.wav", "italian3.wav",
|
| 119 |
+
"japan1.wav", "japan2.wav", "japan3.wav",
|
| 120 |
+
"korean1.wav", "korean2.wav", "korean3.wav",
|
| 121 |
+
"noisy_audio_cafeter_snr_0.wav", "noisy_audio_meeting_snr_0.wav", "noisy_audio_meeting_snr_10.wav", "noisy_audio_meeting_snr_20.wav", "noisy_audio_narrative.wav", "noisy_audio_office_snr_0.wav", "out_of_domain_narrative.wav",
|
| 122 |
+
"spanish1.wav", "spanish2.wav", "spanish3.wav"
|
| 123 |
+
],
|
| 124 |
+
value = "acknowledgement_english.m4a"
|
| 125 |
+
)
|
| 126 |
+
audio_list.change(
|
| 127 |
+
fn = load_audio,
|
| 128 |
+
inputs = [audio_list],
|
| 129 |
+
outputs = [audio_input]
|
| 130 |
+
)
|
| 131 |
+
emotional_style = gr.Dropdown(
|
| 132 |
+
label = "emotional style",
|
| 133 |
+
choices = [
|
| 134 |
+
"M030_front_angry_level3_001.mat",
|
| 135 |
+
"M030_front_contempt_level3_001.mat",
|
| 136 |
+
"M030_front_disgusted_level3_001.mat",
|
| 137 |
+
"M030_front_fear_level3_001.mat",
|
| 138 |
+
"M030_front_happy_level3_001.mat",
|
| 139 |
+
"M030_front_neutral_level1_001.mat",
|
| 140 |
+
"M030_front_sad_level3_001.mat",
|
| 141 |
+
"M030_front_surprised_level3_001.mat",
|
| 142 |
+
"W009_front_angry_level3_001.mat",
|
| 143 |
+
"W009_front_contempt_level3_001.mat",
|
| 144 |
+
"W009_front_disgusted_level3_001.mat",
|
| 145 |
+
"W009_front_fear_level3_001.mat",
|
| 146 |
+
"W009_front_happy_level3_001.mat",
|
| 147 |
+
"W009_front_neutral_level1_001.mat",
|
| 148 |
+
"W009_front_sad_level3_001.mat",
|
| 149 |
+
"W009_front_surprised_level3_001.mat",
|
| 150 |
+
"W011_front_angry_level3_001.mat",
|
| 151 |
+
"W011_front_contempt_level3_001.mat",
|
| 152 |
+
"W011_front_disgusted_level3_001.mat",
|
| 153 |
+
"W011_front_fear_level3_001.mat",
|
| 154 |
+
"W011_front_happy_level3_001.mat",
|
| 155 |
+
"W011_front_neutral_level1_001.mat",
|
| 156 |
+
"W011_front_sad_level3_001.mat",
|
| 157 |
+
"W011_front_surprised_level3_001.mat"
|
| 158 |
],
|
| 159 |
+
value = "M030_front_neutral_level1_001.mat"
|
|
|
|
|
|
|
| 160 |
)
|
| 161 |
+
gr.Examples(
|
| 162 |
+
examples = [
|
| 163 |
+
"data/src_img/uncropped/face3.png",
|
| 164 |
+
"data/src_img/uncropped/male_face.png",
|
| 165 |
+
"data/src_img/uncropped/uncut_src_img.jpg",
|
| 166 |
+
"data/src_img/cropped/chpa5.png",
|
| 167 |
+
"data/src_img/cropped/cut_img.png",
|
| 168 |
+
"data/src_img/cropped/f30.png",
|
| 169 |
+
"data/src_img/cropped/menglu2.png",
|
| 170 |
+
"data/src_img/cropped/nscu2.png",
|
| 171 |
+
"data/src_img/cropped/zp1.png",
|
| 172 |
+
"data/src_img/cropped/zt12.png"
|
| 173 |
+
],
|
| 174 |
+
inputs=[image_path],
|
| 175 |
+
examples_per_page=5
|
| 176 |
)
|
| 177 |
+
with gr.Row():
|
| 178 |
+
run_btn = gr.Button("Run", elem_id="run-btn")
|
| 179 |
with gr.Column():
|
| 180 |
+
output_video = gr.Video(format="mp4")
|
| 181 |
+
gr.HTML("""
|
| 182 |
+
<p id="project-links" align="center">
|
| 183 |
+
<a href='https://dreamtalk-project.github.io/'><img src='https://img.shields.io/badge/Project-Page-Green'></a> <a href='https://arxiv.org/abs/2312.09767'><img src='https://img.shields.io/badge/Paper-Arxiv-red'></a> <a href='https://youtu.be/VF4vlE6ZqWQ'><img src='https://badges.aleen42.com/src/youtube.svg'></a>
|
| 184 |
+
</p>
|
| 185 |
+
<img src="https://github.com/ali-vilab/dreamtalk/raw/main/media/teaser.gif" style="margin: 0 auto;border-radius: 10px;" />
|
| 186 |
+
""")
|
| 187 |
+
|
| 188 |
+
run_btn.click(
|
| 189 |
+
fn = infer,
|
| 190 |
+
inputs = [audio_input, image_path, emotional_style],
|
| 191 |
+
outputs = [output_video]
|
| 192 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
|
| 194 |
+
demo.queue(max_size=20).launch(share=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
|
@@ -1,13 +1,13 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
Pillow
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
|
|
|
| 1 |
+
dlib==19.24.0
|
| 2 |
+
yacs==0.1.8
|
| 3 |
+
scipy==1.7.3
|
| 4 |
+
scikit-image==0.19.3
|
| 5 |
+
scikit-learn==1.0.2
|
| 6 |
+
PyYAML==6.0
|
| 7 |
+
Pillow==9.1.0
|
| 8 |
+
opencv-python
|
| 9 |
+
imageio==2.18.0
|
| 10 |
+
ffmpeg-python==0.2.0
|
| 11 |
+
av==10.0.0
|
| 12 |
+
moviepy<2
|
| 13 |
+
gradio
|