Spaces:

TDN-M
/

Tkvn

Sleeping

App Files Files Community

TDN-M commited on Jul 28, 2025

Commit

f8cb197

1 Parent(s): 4e83d2f

k

Browse files

Files changed (4) hide show

Dockerfile +81 -0
README.md +1 -4
app.py +180 -186
requirements.txt +13 -13

Dockerfile ADDED Viewed

	@@ -0,0 +1,81 @@

+FROM pytorch/pytorch:2.0.1-cuda11.7-cudnn8-devel
+ENV DEBIAN_FRONTEND=noninteractive
+# Set the MKL_THREADING_LAYER environment variable to GNU
+ENV MKL_THREADING_LAYER=GNU
+# Install system dependencies including those required for dlib
+RUN apt-get update && apt-get install -y \
+    git \
+    wget \
+    libgl1-mesa-glx \
+    libglib2.0-0 \
+    ffmpeg \
+    libx264-dev \
+    build-essential \
+    cmake \
+    libopenblas-dev \
+    liblapack-dev \
+    libx11-dev \
+    libgtk-3-dev \
+    python3-dev
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+	PATH=/home/user/.local/bin:$PATH \
+    PYTHONPATH=$HOME/app \
+	PYTHONUNBUFFERED=1 \
+	GRADIO_ALLOW_FLAGGING=never \
+	GRADIO_NUM_PORTS=1 \
+	GRADIO_SERVER_NAME=0.0.0.0 \
+	GRADIO_THEME=huggingface \
+    GRADIO_SHARE=False \
+	SYSTEM=spaces
+# Set the working directory to the user's home directory
+WORKDIR $HOME/app
+# Print detailed Python information
+RUN python -c "import sys; print(f'Python {sys.version}')"
+# Clone the repository
+RUN git clone -b dev https://github.com/fffiloni/dreamtalk $HOME/app
+# Download model checkpoints
+RUN wget https://huggingface.co/camenduru/dreamtalk/resolve/main/damo/dreamtalk/checkpoints/denoising_network.pth -O $HOME/app/checkpoints/denoising_network.pth
+RUN wget https://huggingface.co/camenduru/dreamtalk/resolve/main/damo/dreamtalk/checkpoints/renderer.pt -O $HOME/app/checkpoints/renderer.pt
+# Install Python dependencies
+RUN pip install --no-cache-dir \
+    urllib3==1.26.6 \
+    transformers==4.28.1 \
+    yacs==0.1.8 \
+    scipy==1.10.1 \
+    scikit-image==0.20.0 \
+    scikit-learn==1.2.2 \
+    PyYAML==6.0 \
+    Pillow==9.5.0 \
+    numpy==1.24.2 \
+    opencv-python==4.7.0.72 \
+    imageio==2.27.0 \
+    ffmpeg-python \
+    av==11.0.0 \
+    moviepy==1.0.3
+RUN pip install gradio
+# Install dlib with verbose output
+RUN pip install --verbose --no-cache-dir dlib-bin
+COPY app.py .
+# Set the environment variable to specify the GPU device
+ENV CUDA_DEVICE_ORDER=PCI_BUS_ID
+ENV CUDA_VISIBLE_DEVICES=0
+# Run your app.py script
+CMD ["python", "app.py"]

README.md CHANGED Viewed

@@ -3,10 +3,7 @@ title: Tkvn
 emoji: 💬
 colorFrom: yellow
 colorTo: purple
-sdk: gradio
-sdk_version: 5.0.1
 app_file: app.py
 pinned: false
 ---
-An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).

 emoji: 💬
 colorFrom: yellow
 colorTo: purple
+sdk: docker
 app_file: app.py
 pinned: false
 ---

app.py CHANGED Viewed

@@ -1,200 +1,194 @@
-import os
 import gradio as gr
-from groq import Groq
-import pixeltable as pxt
-from pixeltable.iterators import FrameIterator
-from pixeltable.functions.video import extract_audio
-from pixeltable.functions.audio import get_metadata
-# Lưu khóa API Groq
-if 'GROQ_API_KEY' not in os.environ:
-    os.environ['GROQ_API_KEY'] = input('Nhập khóa API Groq của bạn: ')
-MAX_VIDEO_SIZE_MB = 35
-CONCURRENCY_LIMIT = 1
-# Hàm gọi Groq API
-def call_groq_api(prompt, model_name, max_tokens=500):
-    client = Groq(api_key=os.environ.get('GROQ_API_KEY'))
-    response = client.chat.completions.create(
-        model=model_name,
-        messages=prompt,
-        max_tokens=max_tokens
-    )
-    return response.choices[0].message.content
-# Hàm xử lý chính
-def process_and_generate_post(video_file, social_media_type, progress=gr.Progress()):
-    progress(0, desc="Đang khởi tạo...")
-    if not video_file:
-        return "Vui lòng tải lên tệp video.", None, None, None
     try:
-        # Kiểm tra kích thước tệp video
-        video_size = os.path.getsize(video_file) / (1024 * 1024)  # Chuyển sang MB
-        if video_size > MAX_VIDEO_SIZE_MB:
-            return f"Tệp video lớn hơn {MAX_VIDEO_SIZE_MB} MB. Vui lòng tải lên tệp nhỏ hơn.", None, None, None
-        progress(0.2, desc="Đang tạo bảng và cột tính toán...")
-        # Tạo bảng, view và cột tính toán
-        pxt.drop_dir('directory', force=True)
-        pxt.create_dir('directory')
-        t = pxt.create_table(
-            'directory.video_table', {
-            "video": pxt.Video,
-            "sm_type": pxt.String
-            }
-        )
-        frames_view = pxt.create_view(
-            "directory.frames",
-            t,
-            iterator=FrameIterator.create(video=t.video, fps=1)
         )
-        # Tạo cột tính toán để lưu các phép biến đổi
-        t.add_computed_column(audio=extract_audio(t.video, format='mp3'))
-        t.add_computed_column(metadata=get_metadata(t.audio))
-        t.add_computed_column(transcription=openai.transcriptions(audio=t.audio, model='whisper-1'))
-        t.add_computed_column(transcription_text=t.transcription.text)
-        progress(0.4, desc="Đang tạo hàm UDF...")
-        # Định nghĩa hàm UDF để tạo prompt cho LLM
-        @pxt.udf
-        def prompt(A: str, B: str) -> list[dict]:
-            system_msg = 'Bạn là chuyên gia trong việc tạo nội dung mạng xã hội và tạo bài đăng hiệu quả dựa trên nội dung người dùng. Tuân thủ quy tắc và ràng buộc của nền tảng mạng xã hội.'
-            user_msg = f'A: "{A}" \n B: "{B}"'
-            return [
-                {'role': 'system', 'content': system_msg},
-                {'role': 'user', 'content': user_msg}
-            ]
-        # Áp dụng UDF để tạo cột mới
-        t.add_computed_column(message=prompt(t.sm_type, t.transcription_text))
-        progress(0.6, desc="Đang gọi mô hình ngôn ngữ lớn...")
-        # Gọi Groq API để tạo bài đăng
-        t.add_computed_column(response=call_groq_api(messages=t.message, model_name="llama3-70b-8192", max_tokens=500))
-        t.add_computed_column(answer=t.response.choices[0].message.content)
-        progress(0.8, desc="Đang chèn video và trích xuất dữ liệu...")
-        # Chèn video vào bảng
-        t.insert([{
-            "video": video_file,
-            "sm_type": social_media_type
-        }])
-        # Truy xuất kết quả
-        social_media_post = t.select(t.answer).tail(1)['answer'][0]
-        audio = t.select(t.audio).tail(1)['audio'][0]
-        thumbnails = frames_view.select(frames_view.frame).tail(6)['frame']
-        transcription_text = t.select(t.transcription_text).tail(1)['transcription_text'][0]
-        # Hiển thị kết quả
-        return social_media_post, thumbnails, transcription_text, audio
-    except Exception as e:
-        return f"Đã xảy ra lỗi: {str(e)}", None, None, None
-# Giao diện Gradio
-def gradio_interface():
-    with gr.Blocks(theme=gr.themes.Base()) as demo:
-        gr.Markdown(
-            """
-            📹 Công cụ Tạo Bài Đăng Mạng Xã Hội từ Video
-            Biến video của bạn thành nội dung mạng xã hội hấp dẫn b���ng AI
-            """
-        )
         with gr.Row():
-            # Cột bên trái - Điều khiển đầu vào
             with gr.Column():
-                video_input = gr.File(
-                    label="Tải lên Video",
-                    file_types=[".mp4", ".avi", ".mov"],
-                    height='400px'
-                )
-                with gr.Group():
-                    gr.Markdown("### 🎯 Nền tảng Mục tiêu")
-                    social_media_type = gr.Radio(
                         choices=[
-                            "X (Twitter)",
-                            "Facebook",
-                            "LinkedIn",
-                            "Instagram"
                         ],
-                        value="X (Twitter)",
-                        label="Chọn nơi bạn muốn chia sẻ:",
-                        interactive=True
                     )
-                generate_btn = gr.Button(
-                    "🚀 Tạo Bài Đăng",
-                    scale=1,
-                    size="lg",
-                    variant="primary"
                 )
-            # Cột bên phải - Hiển thị kết quả
             with gr.Column():
-                output = gr.Textbox(
-                    label="✨ Bài Đăng Đã Tạo",
-                    show_copy_button=True,
-                    lines=4
-                )
-                gr.Markdown("### 🖼️ Hình thu nhỏ")
-                thumbnail = gr.Gallery(
-                    label="Chọn hình thu nhỏ ưa thích",
-                    show_download_button=True,
-                    show_fullscreen_button=True,
-                    height='200px',
-                    object_fit="contain"
-                )
-                gr.Markdown("### 📝 Văn bản Đã Trích xuất")
-                df_output = gr.Textbox(
-                    label="Văn bản Đã Trích xuất",
-                    show_copy_button=True,
-                    lines=8
-                )
-                gr.Markdown("### 🎵 Âm thanh Đã Trích xuất")
-                audio = gr.Audio(
-                    label="Âm thanh",
-                    show_download_button=True,
-                    type="filepath"
-                )
-        # Kết nối nút nhấn với hàm xử lý
-        generate_btn.click(
-            fn=process_and_generate_post,
-            inputs=[video_input, social_media_type],
-            outputs=[output, thumbnail, df_output, audio],
-            api_name="generate",
-            show_progress="full",
-            trigger_mode='once'
-        )
-    return demo
-# Khởi chạy giao diện Gradio
-if __name__ == "__main__":
-    demo = gradio_interface()
-    demo.launch(
-        server_name="0.0.0.0",  # Cho phép truy cập từ bên ngoài
-        server_port=7860,       # Cổng mặc định của Gradio
-        share=False,            # Tắt tính năng chia sẻ
-        show_api=False,         # Ẩn tài liệu API
-        show_error=False,       # Ẩn lỗi chi tiết
-        ssl_verify=True,        # Bật xác minh SSL
-        quiet=True              # Giảm đầu ra console
-    )

 import gradio as gr
+from moviepy.editor import VideoFileClip
+import datetime
+import subprocess
+import sys
+def install_dlib():
     try:
+        subprocess.run(
+            [sys.executable, "-m", "pip", "install", "--no-cache-dir", "dlib==19.24.1", "-vvv"],
+            check=True
         )
+        print("dlib successfully installed!")
+    except subprocess.CalledProcessError as e:
+        print(f"Failed to install dlib: {e}")
+#install_dlib()
+def convert_to_mp4_with_aac(input_path, output_path):
+    # Load the video
+    video = VideoFileClip(input_path)
+    # Set the output format to mp4 with AAC codec
+    video.write_videofile(output_path, codec="libx264", audio_codec="aac")
+    return output_path
+# Function to check if the audio file path exists in the list
+def check_file_exists(file_path, audio_list):
+    return file_path in audio_list
+def load_audio(audio_listed):
+    if audio_listed is None:
+        return None
+    else:
+        return f"data/audio/{audio_listed}"
+def execute_command(command: str) -> None:
+    subprocess.run(command, check=True)
+def infer(audio_input, image_path, emotional_style):
+    # Get the current timestamp
+    timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
+    output_name = f"lipsynced_result_{timestamp}"
+    command = [
+        f"python",
+        f"inference_for_demo_video.py",
+        f"--wav_path={audio_input}",
+        f"--style_clip_path=data/style_clip/3DMM/{emotional_style}",
+        f"--pose_path=data/pose/RichardShelby_front_neutral_level1_001.mat",
+        f"--image_path={image_path}",
+        f"--cfg_scale=1.0",
+        f"--max_gen_len=30",
+        f"--output_name={output_name}"
+    ]
+    execute_command(command)
+    # Convert video to compatible codecs
+    input_file = f"output_video/{output_name}.mp4"
+    output_file = f"{output_name}.mp4"
+    result = convert_to_mp4_with_aac(input_file, output_file)
+    return result
+css="""
+#col-container{
+    margin: 0 auto;
+    max-width: 940px;
+}
+#project-links{
+    margin: 0 0 12px !important;
+    column-gap: 8px;
+    display: flex;
+    justify-content: center;
+    flex-wrap: nowrap;
+    flex-direction: row;
+    align-items: center;
+}
+#run-btn{
+    border: var(--button-border-width) solid var(--button-primary-border-color);
+    background: var(--button-primary-background-fill);
+    color: var(--button-primary-text-color);
+}
+#run-btn:hover{
+    border-color: var(--button-primary-border-color-hover);
+    background: var(--button-primary-background-fill-hover);
+    color: var(--button-primary-text-color-hover);
+}
+"""
+with gr.Blocks(css=css) as demo:
+    with gr.Column(elem_id="col-container"):
+        gr.HTML("""
+        <h2 style="text-align: center;">DreamTalk</h2>
+        <p style="text-align: center;">When Expressive Talking Head Generation Meets Diffusion Probabilistic Models</p>
+        <p style="margin:12px auto;display: flex;justify-content: center;">
+            <a href="https://huggingface.co/spaces/fffiloni/dreamtalk?duplicate=true"><img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-lg.svg" alt="Duplicate this Space"></a>
+        </p>
+        """)
         with gr.Row():
             with gr.Column():
+                image_path = gr.Image(label="Image", type="filepath", sources=["upload"])
+                audio_input = gr.Audio(label="Audio input", type="filepath", sources=["upload"], value="data/audio/acknowledgement_english.m4a")
+                with gr.Row():
+                    audio_list = gr.Dropdown(
+                        label="Choose an audio (optional)",
                         choices=[
+                            "German1.wav", "German2.wav", "German3.wav", "German4.wav",
+                            "acknowledgement_chinese.m4a", "acknowledgement_english.m4a",
+                            "chinese1_haierlizhi.wav", "chinese2_guanyu.wav",
+                            "french1.wav", "french2.wav", "french3.wav",
+                            "italian1.wav", "italian2.wav", "italian3.wav",
+                            "japan1.wav", "japan2.wav", "japan3.wav",
+                            "korean1.wav", "korean2.wav", "korean3.wav",
+                            "noisy_audio_cafeter_snr_0.wav", "noisy_audio_meeting_snr_0.wav", "noisy_audio_meeting_snr_10.wav", "noisy_audio_meeting_snr_20.wav", "noisy_audio_narrative.wav", "noisy_audio_office_snr_0.wav", "out_of_domain_narrative.wav",
+                            "spanish1.wav", "spanish2.wav", "spanish3.wav"
+                            ],
+                        value = "acknowledgement_english.m4a"
+                    )
+                    audio_list.change(
+                        fn = load_audio,
+                        inputs = [audio_list],
+                        outputs = [audio_input]
+                    )
+                    emotional_style = gr.Dropdown(
+                        label = "emotional style",
+                        choices = [
+                            "M030_front_angry_level3_001.mat",
+                            "M030_front_contempt_level3_001.mat",
+                            "M030_front_disgusted_level3_001.mat",
+                            "M030_front_fear_level3_001.mat",
+                            "M030_front_happy_level3_001.mat",
+                            "M030_front_neutral_level1_001.mat",
+                            "M030_front_sad_level3_001.mat",
+                            "M030_front_surprised_level3_001.mat",
+                            "W009_front_angry_level3_001.mat",
+                            "W009_front_contempt_level3_001.mat",
+                            "W009_front_disgusted_level3_001.mat",
+                            "W009_front_fear_level3_001.mat",
+                            "W009_front_happy_level3_001.mat",
+                            "W009_front_neutral_level1_001.mat",
+                            "W009_front_sad_level3_001.mat",
+                            "W009_front_surprised_level3_001.mat",
+                            "W011_front_angry_level3_001.mat",
+                            "W011_front_contempt_level3_001.mat",
+                            "W011_front_disgusted_level3_001.mat",
+                            "W011_front_fear_level3_001.mat",
+                            "W011_front_happy_level3_001.mat",
+                            "W011_front_neutral_level1_001.mat",
+                            "W011_front_sad_level3_001.mat",
+                            "W011_front_surprised_level3_001.mat"
                         ],
+                        value = "M030_front_neutral_level1_001.mat"
                     )
+                gr.Examples(
+                    examples = [
+                        "data/src_img/uncropped/face3.png",
+                        "data/src_img/uncropped/male_face.png",
+                        "data/src_img/uncropped/uncut_src_img.jpg",
+                        "data/src_img/cropped/chpa5.png",
+                        "data/src_img/cropped/cut_img.png",
+                        "data/src_img/cropped/f30.png",
+                        "data/src_img/cropped/menglu2.png",
+                        "data/src_img/cropped/nscu2.png",
+                        "data/src_img/cropped/zp1.png",
+                        "data/src_img/cropped/zt12.png"
+                    ],
+                    inputs=[image_path],
+                    examples_per_page=5
                 )
+                with gr.Row():
+                    run_btn = gr.Button("Run", elem_id="run-btn")
             with gr.Column():
+                output_video = gr.Video(format="mp4")
+                gr.HTML("""
+                <p id="project-links" align="center">
+                  <a href='https://dreamtalk-project.github.io/'><img src='https://img.shields.io/badge/Project-Page-Green'></a> <a href='https://arxiv.org/abs/2312.09767'><img src='https://img.shields.io/badge/Paper-Arxiv-red'></a> <a href='https://youtu.be/VF4vlE6ZqWQ'><img src='https://badges.aleen42.com/src/youtube.svg'></a>
+                </p>
+                <img src="https://github.com/ali-vilab/dreamtalk/raw/main/media/teaser.gif" style="margin: 0 auto;border-radius: 10px;" />
+                """)
+    run_btn.click(
+        fn = infer,
+        inputs = [audio_input, image_path, emotional_style],
+        outputs = [output_video]
+    )
+demo.queue(max_size=20).launch(share=True)

requirements.txt CHANGED Viewed

@@ -1,13 +1,13 @@
-# Core libraries
-gradio
-groq
-pixeltable
-pytesseract
-PyPDF2
-Pillow
-# Additional dependencies
-requests
-numpy
-moviepy>=1.0.3
-ffmpeg
-openai

+dlib==19.24.0
+yacs==0.1.8
+scipy==1.7.3
+scikit-image==0.19.3
+scikit-learn==1.0.2
+PyYAML==6.0
+Pillow==9.1.0
+opencv-python
+imageio==2.18.0
+ffmpeg-python==0.2.0
+av==10.0.0
+moviepy<2
+gradio