TDN-M commited on
Commit
f8cb197
·
1 Parent(s): 4e83d2f
Files changed (4) hide show
  1. Dockerfile +81 -0
  2. README.md +1 -4
  3. app.py +180 -186
  4. requirements.txt +13 -13
Dockerfile ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM pytorch/pytorch:2.0.1-cuda11.7-cudnn8-devel
2
+
3
+ ENV DEBIAN_FRONTEND=noninteractive
4
+
5
+ # Set the MKL_THREADING_LAYER environment variable to GNU
6
+ ENV MKL_THREADING_LAYER=GNU
7
+
8
+ # Install system dependencies including those required for dlib
9
+ RUN apt-get update && apt-get install -y \
10
+ git \
11
+ wget \
12
+ libgl1-mesa-glx \
13
+ libglib2.0-0 \
14
+ ffmpeg \
15
+ libx264-dev \
16
+ build-essential \
17
+ cmake \
18
+ libopenblas-dev \
19
+ liblapack-dev \
20
+ libx11-dev \
21
+ libgtk-3-dev \
22
+ python3-dev
23
+
24
+ RUN useradd -m -u 1000 user
25
+
26
+ USER user
27
+
28
+ ENV HOME=/home/user \
29
+ PATH=/home/user/.local/bin:$PATH \
30
+ PYTHONPATH=$HOME/app \
31
+ PYTHONUNBUFFERED=1 \
32
+ GRADIO_ALLOW_FLAGGING=never \
33
+ GRADIO_NUM_PORTS=1 \
34
+ GRADIO_SERVER_NAME=0.0.0.0 \
35
+ GRADIO_THEME=huggingface \
36
+ GRADIO_SHARE=False \
37
+ SYSTEM=spaces
38
+
39
+ # Set the working directory to the user's home directory
40
+ WORKDIR $HOME/app
41
+
42
+ # Print detailed Python information
43
+ RUN python -c "import sys; print(f'Python {sys.version}')"
44
+
45
+ # Clone the repository
46
+ RUN git clone -b dev https://github.com/fffiloni/dreamtalk $HOME/app
47
+
48
+ # Download model checkpoints
49
+ RUN wget https://huggingface.co/camenduru/dreamtalk/resolve/main/damo/dreamtalk/checkpoints/denoising_network.pth -O $HOME/app/checkpoints/denoising_network.pth
50
+ RUN wget https://huggingface.co/camenduru/dreamtalk/resolve/main/damo/dreamtalk/checkpoints/renderer.pt -O $HOME/app/checkpoints/renderer.pt
51
+
52
+ # Install Python dependencies
53
+ RUN pip install --no-cache-dir \
54
+ urllib3==1.26.6 \
55
+ transformers==4.28.1 \
56
+ yacs==0.1.8 \
57
+ scipy==1.10.1 \
58
+ scikit-image==0.20.0 \
59
+ scikit-learn==1.2.2 \
60
+ PyYAML==6.0 \
61
+ Pillow==9.5.0 \
62
+ numpy==1.24.2 \
63
+ opencv-python==4.7.0.72 \
64
+ imageio==2.27.0 \
65
+ ffmpeg-python \
66
+ av==11.0.0 \
67
+ moviepy==1.0.3
68
+
69
+ RUN pip install gradio
70
+
71
+ # Install dlib with verbose output
72
+ RUN pip install --verbose --no-cache-dir dlib-bin
73
+
74
+ COPY app.py .
75
+
76
+ # Set the environment variable to specify the GPU device
77
+ ENV CUDA_DEVICE_ORDER=PCI_BUS_ID
78
+ ENV CUDA_VISIBLE_DEVICES=0
79
+
80
+ # Run your app.py script
81
+ CMD ["python", "app.py"]
README.md CHANGED
@@ -3,10 +3,7 @@ title: Tkvn
3
  emoji: 💬
4
  colorFrom: yellow
5
  colorTo: purple
6
- sdk: gradio
7
- sdk_version: 5.0.1
8
  app_file: app.py
9
  pinned: false
10
  ---
11
-
12
- An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).
 
3
  emoji: 💬
4
  colorFrom: yellow
5
  colorTo: purple
6
+ sdk: docker
 
7
  app_file: app.py
8
  pinned: false
9
  ---
 
 
app.py CHANGED
@@ -1,200 +1,194 @@
1
- import os
2
  import gradio as gr
3
- from groq import Groq
4
- import pixeltable as pxt
5
- from pixeltable.iterators import FrameIterator
6
- from pixeltable.functions.video import extract_audio
7
- from pixeltable.functions.audio import get_metadata
8
-
9
- # Lưu khóa API Groq
10
- if 'GROQ_API_KEY' not in os.environ:
11
- os.environ['GROQ_API_KEY'] = input('Nhập khóa API Groq của bạn: ')
12
-
13
- MAX_VIDEO_SIZE_MB = 35
14
- CONCURRENCY_LIMIT = 1
15
-
16
- # Hàm gọi Groq API
17
- def call_groq_api(prompt, model_name, max_tokens=500):
18
- client = Groq(api_key=os.environ.get('GROQ_API_KEY'))
19
-
20
- response = client.chat.completions.create(
21
- model=model_name,
22
- messages=prompt,
23
- max_tokens=max_tokens
24
- )
25
- return response.choices[0].message.content
26
 
27
- # Hàm xử lý chính
28
- def process_and_generate_post(video_file, social_media_type, progress=gr.Progress()):
29
- progress(0, desc="Đang khởi tạo...")
30
-
31
- if not video_file:
32
- return "Vui lòng tải lên tệp video.", None, None, None
33
-
34
  try:
35
- # Kiểm tra kích thước tệp video
36
- video_size = os.path.getsize(video_file) / (1024 * 1024) # Chuyển sang MB
37
- if video_size > MAX_VIDEO_SIZE_MB:
38
- return f"Tệp video lớn hơn {MAX_VIDEO_SIZE_MB} MB. Vui lòng tải lên tệp nhỏ hơn.", None, None, None
39
-
40
- progress(0.2, desc="Đang tạo bảng và cột tính toán...")
41
-
42
- # Tạo bảng, view và cột tính toán
43
- pxt.drop_dir('directory', force=True)
44
- pxt.create_dir('directory')
45
-
46
- t = pxt.create_table(
47
- 'directory.video_table', {
48
- "video": pxt.Video,
49
- "sm_type": pxt.String
50
- }
51
- )
52
-
53
- frames_view = pxt.create_view(
54
- "directory.frames",
55
- t,
56
- iterator=FrameIterator.create(video=t.video, fps=1)
57
  )
58
-
59
- # Tạo cột tính toán để lưu các phép biến đổi
60
- t.add_computed_column(audio=extract_audio(t.video, format='mp3'))
61
- t.add_computed_column(metadata=get_metadata(t.audio))
62
- t.add_computed_column(transcription=openai.transcriptions(audio=t.audio, model='whisper-1'))
63
- t.add_computed_column(transcription_text=t.transcription.text)
64
-
65
- progress(0.4, desc="Đang tạo hàm UDF...")
66
-
67
- # Định nghĩa hàm UDF để tạo prompt cho LLM
68
- @pxt.udf
69
- def prompt(A: str, B: str) -> list[dict]:
70
- system_msg = 'Bạn là chuyên gia trong việc tạo nội dung mạng xã hội và tạo bài đăng hiệu quả dựa trên nội dung người dùng. Tuân thủ quy tắc và ràng buộc của nền tảng mạng xã hội.'
71
- user_msg = f'A: "{A}" \n B: "{B}"'
72
- return [
73
- {'role': 'system', 'content': system_msg},
74
- {'role': 'user', 'content': user_msg}
75
- ]
76
-
77
- # Áp dụng UDF để tạo cột mới
78
- t.add_computed_column(message=prompt(t.sm_type, t.transcription_text))
79
-
80
- progress(0.6, desc="Đang gọi mô hình ngôn ngữ lớn...")
81
-
82
- # Gọi Groq API để tạo bài đăng
83
- t.add_computed_column(response=call_groq_api(messages=t.message, model_name="llama3-70b-8192", max_tokens=500))
84
- t.add_computed_column(answer=t.response.choices[0].message.content)
85
-
86
- progress(0.8, desc="Đang chèn video và trích xuất dữ liệu...")
87
-
88
- # Chèn video vào bảng
89
- t.insert([{
90
- "video": video_file,
91
- "sm_type": social_media_type
92
- }])
93
-
94
- # Truy xuất kết quả
95
- social_media_post = t.select(t.answer).tail(1)['answer'][0]
96
- audio = t.select(t.audio).tail(1)['audio'][0]
97
- thumbnails = frames_view.select(frames_view.frame).tail(6)['frame']
98
- transcription_text = t.select(t.transcription_text).tail(1)['transcription_text'][0]
99
-
100
- # Hiển thị kết quả
101
- return social_media_post, thumbnails, transcription_text, audio
102
 
103
- except Exception as e:
104
- return f"Đã xảy ra lỗi: {str(e)}", None, None, None
105
-
106
- # Giao diện Gradio
107
- def gradio_interface():
108
- with gr.Blocks(theme=gr.themes.Base()) as demo:
109
- gr.Markdown(
110
- """
111
- 📹 Công cụ Tạo Bài Đăng Mạng Xã Hội từ Video
112
- Biến video của bạn thành nội dung mạng xã hội hấp dẫn b���ng AI
113
- """
114
- )
115
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  with gr.Row():
117
- # Cột bên trái - Điều khiển đầu vào
118
  with gr.Column():
119
- video_input = gr.File(
120
- label="Tải lên Video",
121
- file_types=[".mp4", ".avi", ".mov"],
122
- height='400px'
123
- )
124
-
125
- with gr.Group():
126
- gr.Markdown("### 🎯 Nền tảng Mục tiêu")
127
- social_media_type = gr.Radio(
128
  choices=[
129
- "X (Twitter)",
130
- "Facebook",
131
- "LinkedIn",
132
- "Instagram"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  ],
134
- value="X (Twitter)",
135
- label="Chọn nơi bạn muốn chia sẻ:",
136
- interactive=True
137
  )
138
-
139
- generate_btn = gr.Button(
140
- "🚀 Tạo Bài Đăng",
141
- scale=1,
142
- size="lg",
143
- variant="primary"
 
 
 
 
 
 
 
 
 
144
  )
145
-
146
- # Cột bên phải - Hiển thị kết quả
147
  with gr.Column():
148
- output = gr.Textbox(
149
- label="✨ Bài Đăng Đã Tạo",
150
- show_copy_button=True,
151
- lines=4
152
- )
153
-
154
- gr.Markdown("### 🖼️ Hình thu nhỏ")
155
- thumbnail = gr.Gallery(
156
- label="Chọn hình thu nhỏ ưa thích",
157
- show_download_button=True,
158
- show_fullscreen_button=True,
159
- height='200px',
160
- object_fit="contain"
161
- )
162
-
163
- gr.Markdown("### 📝 Văn bản Đã Trích xuất")
164
- df_output = gr.Textbox(
165
- label="Văn bản Đã Trích xuất",
166
- show_copy_button=True,
167
- lines=8
168
- )
169
-
170
- gr.Markdown("### 🎵 Âm thanh Đã Trích xuất")
171
- audio = gr.Audio(
172
- label="Âm thanh",
173
- show_download_button=True,
174
- type="filepath"
175
- )
176
-
177
- # Kết nối nút nhấn với hàm xử lý
178
- generate_btn.click(
179
- fn=process_and_generate_post,
180
- inputs=[video_input, social_media_type],
181
- outputs=[output, thumbnail, df_output, audio],
182
- api_name="generate",
183
- show_progress="full",
184
- trigger_mode='once'
185
- )
186
 
187
- return demo
188
-
189
- # Khởi chạy giao diện Gradio
190
- if __name__ == "__main__":
191
- demo = gradio_interface()
192
- demo.launch(
193
- server_name="0.0.0.0", # Cho phép truy cập từ bên ngoài
194
- server_port=7860, # Cổng mặc định của Gradio
195
- share=False, # Tắt tính năng chia sẻ
196
- show_api=False, # Ẩn tài liệu API
197
- show_error=False, # Ẩn lỗi chi tiết
198
- ssl_verify=True, # Bật xác minh SSL
199
- quiet=True # Giảm đầu ra console
200
- )
 
 
1
  import gradio as gr
2
+ from moviepy.editor import VideoFileClip
3
+ import datetime
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
+ import subprocess
6
+ import sys
7
+
8
+ def install_dlib():
 
 
 
9
  try:
10
+ subprocess.run(
11
+ [sys.executable, "-m", "pip", "install", "--no-cache-dir", "dlib==19.24.1", "-vvv"],
12
+ check=True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  )
14
+ print("dlib successfully installed!")
15
+ except subprocess.CalledProcessError as e:
16
+ print(f"Failed to install dlib: {e}")
17
+
18
+ #install_dlib()
19
+
20
+ def convert_to_mp4_with_aac(input_path, output_path):
21
+ # Load the video
22
+ video = VideoFileClip(input_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
+ # Set the output format to mp4 with AAC codec
25
+ video.write_videofile(output_path, codec="libx264", audio_codec="aac")
26
+
27
+ return output_path
 
 
 
 
 
 
 
 
28
 
29
+
30
+ # Function to check if the audio file path exists in the list
31
+ def check_file_exists(file_path, audio_list):
32
+ return file_path in audio_list
33
+
34
+ def load_audio(audio_listed):
35
+ if audio_listed is None:
36
+ return None
37
+ else:
38
+ return f"data/audio/{audio_listed}"
39
+
40
+ def execute_command(command: str) -> None:
41
+ subprocess.run(command, check=True)
42
+
43
+ def infer(audio_input, image_path, emotional_style):
44
+ # Get the current timestamp
45
+ timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
46
+
47
+ output_name = f"lipsynced_result_{timestamp}"
48
+
49
+ command = [
50
+ f"python",
51
+ f"inference_for_demo_video.py",
52
+ f"--wav_path={audio_input}",
53
+ f"--style_clip_path=data/style_clip/3DMM/{emotional_style}",
54
+ f"--pose_path=data/pose/RichardShelby_front_neutral_level1_001.mat",
55
+ f"--image_path={image_path}",
56
+ f"--cfg_scale=1.0",
57
+ f"--max_gen_len=30",
58
+ f"--output_name={output_name}"
59
+ ]
60
+
61
+ execute_command(command)
62
+
63
+ # Convert video to compatible codecs
64
+ input_file = f"output_video/{output_name}.mp4"
65
+ output_file = f"{output_name}.mp4"
66
+
67
+ result = convert_to_mp4_with_aac(input_file, output_file)
68
+
69
+ return result
70
+
71
+ css="""
72
+ #col-container{
73
+ margin: 0 auto;
74
+ max-width: 940px;
75
+ }
76
+ #project-links{
77
+ margin: 0 0 12px !important;
78
+ column-gap: 8px;
79
+ display: flex;
80
+ justify-content: center;
81
+ flex-wrap: nowrap;
82
+ flex-direction: row;
83
+ align-items: center;
84
+ }
85
+ #run-btn{
86
+ border: var(--button-border-width) solid var(--button-primary-border-color);
87
+ background: var(--button-primary-background-fill);
88
+ color: var(--button-primary-text-color);
89
+ }
90
+ #run-btn:hover{
91
+ border-color: var(--button-primary-border-color-hover);
92
+ background: var(--button-primary-background-fill-hover);
93
+ color: var(--button-primary-text-color-hover);
94
+ }
95
+ """
96
+ with gr.Blocks(css=css) as demo:
97
+ with gr.Column(elem_id="col-container"):
98
+ gr.HTML("""
99
+ <h2 style="text-align: center;">DreamTalk</h2>
100
+ <p style="text-align: center;">When Expressive Talking Head Generation Meets Diffusion Probabilistic Models</p>
101
+ <p style="margin:12px auto;display: flex;justify-content: center;">
102
+ <a href="https://huggingface.co/spaces/fffiloni/dreamtalk?duplicate=true"><img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-lg.svg" alt="Duplicate this Space"></a>
103
+ </p>
104
+
105
+ """)
106
  with gr.Row():
 
107
  with gr.Column():
108
+ image_path = gr.Image(label="Image", type="filepath", sources=["upload"])
109
+ audio_input = gr.Audio(label="Audio input", type="filepath", sources=["upload"], value="data/audio/acknowledgement_english.m4a")
110
+ with gr.Row():
111
+ audio_list = gr.Dropdown(
112
+ label="Choose an audio (optional)",
 
 
 
 
113
  choices=[
114
+ "German1.wav", "German2.wav", "German3.wav", "German4.wav",
115
+ "acknowledgement_chinese.m4a", "acknowledgement_english.m4a",
116
+ "chinese1_haierlizhi.wav", "chinese2_guanyu.wav",
117
+ "french1.wav", "french2.wav", "french3.wav",
118
+ "italian1.wav", "italian2.wav", "italian3.wav",
119
+ "japan1.wav", "japan2.wav", "japan3.wav",
120
+ "korean1.wav", "korean2.wav", "korean3.wav",
121
+ "noisy_audio_cafeter_snr_0.wav", "noisy_audio_meeting_snr_0.wav", "noisy_audio_meeting_snr_10.wav", "noisy_audio_meeting_snr_20.wav", "noisy_audio_narrative.wav", "noisy_audio_office_snr_0.wav", "out_of_domain_narrative.wav",
122
+ "spanish1.wav", "spanish2.wav", "spanish3.wav"
123
+ ],
124
+ value = "acknowledgement_english.m4a"
125
+ )
126
+ audio_list.change(
127
+ fn = load_audio,
128
+ inputs = [audio_list],
129
+ outputs = [audio_input]
130
+ )
131
+ emotional_style = gr.Dropdown(
132
+ label = "emotional style",
133
+ choices = [
134
+ "M030_front_angry_level3_001.mat",
135
+ "M030_front_contempt_level3_001.mat",
136
+ "M030_front_disgusted_level3_001.mat",
137
+ "M030_front_fear_level3_001.mat",
138
+ "M030_front_happy_level3_001.mat",
139
+ "M030_front_neutral_level1_001.mat",
140
+ "M030_front_sad_level3_001.mat",
141
+ "M030_front_surprised_level3_001.mat",
142
+ "W009_front_angry_level3_001.mat",
143
+ "W009_front_contempt_level3_001.mat",
144
+ "W009_front_disgusted_level3_001.mat",
145
+ "W009_front_fear_level3_001.mat",
146
+ "W009_front_happy_level3_001.mat",
147
+ "W009_front_neutral_level1_001.mat",
148
+ "W009_front_sad_level3_001.mat",
149
+ "W009_front_surprised_level3_001.mat",
150
+ "W011_front_angry_level3_001.mat",
151
+ "W011_front_contempt_level3_001.mat",
152
+ "W011_front_disgusted_level3_001.mat",
153
+ "W011_front_fear_level3_001.mat",
154
+ "W011_front_happy_level3_001.mat",
155
+ "W011_front_neutral_level1_001.mat",
156
+ "W011_front_sad_level3_001.mat",
157
+ "W011_front_surprised_level3_001.mat"
158
  ],
159
+ value = "M030_front_neutral_level1_001.mat"
 
 
160
  )
161
+ gr.Examples(
162
+ examples = [
163
+ "data/src_img/uncropped/face3.png",
164
+ "data/src_img/uncropped/male_face.png",
165
+ "data/src_img/uncropped/uncut_src_img.jpg",
166
+ "data/src_img/cropped/chpa5.png",
167
+ "data/src_img/cropped/cut_img.png",
168
+ "data/src_img/cropped/f30.png",
169
+ "data/src_img/cropped/menglu2.png",
170
+ "data/src_img/cropped/nscu2.png",
171
+ "data/src_img/cropped/zp1.png",
172
+ "data/src_img/cropped/zt12.png"
173
+ ],
174
+ inputs=[image_path],
175
+ examples_per_page=5
176
  )
177
+ with gr.Row():
178
+ run_btn = gr.Button("Run", elem_id="run-btn")
179
  with gr.Column():
180
+ output_video = gr.Video(format="mp4")
181
+ gr.HTML("""
182
+ <p id="project-links" align="center">
183
+ <a href='https://dreamtalk-project.github.io/'><img src='https://img.shields.io/badge/Project-Page-Green'></a> <a href='https://arxiv.org/abs/2312.09767'><img src='https://img.shields.io/badge/Paper-Arxiv-red'></a> <a href='https://youtu.be/VF4vlE6ZqWQ'><img src='https://badges.aleen42.com/src/youtube.svg'></a>
184
+ </p>
185
+ <img src="https://github.com/ali-vilab/dreamtalk/raw/main/media/teaser.gif" style="margin: 0 auto;border-radius: 10px;" />
186
+ """)
187
+
188
+ run_btn.click(
189
+ fn = infer,
190
+ inputs = [audio_input, image_path, emotional_style],
191
+ outputs = [output_video]
192
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
 
194
+ demo.queue(max_size=20).launch(share=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,13 +1,13 @@
1
- # Core libraries
2
- gradio
3
- groq
4
- pixeltable
5
- pytesseract
6
- PyPDF2
7
- Pillow
8
- # Additional dependencies
9
- requests
10
- numpy
11
- moviepy>=1.0.3
12
- ffmpeg
13
- openai
 
1
+ dlib==19.24.0
2
+ yacs==0.1.8
3
+ scipy==1.7.3
4
+ scikit-image==0.19.3
5
+ scikit-learn==1.0.2
6
+ PyYAML==6.0
7
+ Pillow==9.1.0
8
+ opencv-python
9
+ imageio==2.18.0
10
+ ffmpeg-python==0.2.0
11
+ av==10.0.0
12
+ moviepy<2
13
+ gradio