File size: 5,252 Bytes
1ce4e4b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
from pathlib import Path

import gradio as gr

from pipeline import (
    copy_file_to_dir,
    extract_face_and_coords,
    make_run_dir,
    merge_synced_face,
)

BASE_DIR = Path(__file__).resolve().parent
WORK_DIR = BASE_DIR / "work"
WORK_DIR.mkdir(parents=True, exist_ok=True)


def _normalize_upload_path(file_obj):
    if file_obj is None:
        return None
    if isinstance(file_obj, str):
        return file_obj
    return str(file_obj)


def run_step1(main_video):
    try:
        main_path = _normalize_upload_path(main_video)
        if not main_path:
            raise ValueError("Please upload the main/original video.")

        run_dir = make_run_dir(WORK_DIR, "step1")
        local_main = copy_file_to_dir(main_path, run_dir, "main_video.mp4")

        coords_path, cropped_face_path, bbox = extract_face_and_coords(
            video_path=str(local_main),
            output_dir=str(run_dir),
            coords_name="face_coords_avg.pkl",
            cropped_name="cropped_face.mp4",
        )

        status = f"Step 1 completed. Face bbox saved: {bbox}"
        return status, cropped_face_path, cropped_face_path, coords_path
    except Exception as exc:
        return f"Step 1 failed: {exc}", None, None, None


def run_step3(main_video, synced_face_video, face_coords):
    try:
        main_path = _normalize_upload_path(main_video)
        synced_path = _normalize_upload_path(synced_face_video)
        coords_path = _normalize_upload_path(face_coords)

        if not main_path:
            raise ValueError("Please upload the original/main video.")
        if not synced_path:
            raise ValueError("Please upload the synced face video from manual Step 2.")
        if not coords_path:
            raise ValueError("Please upload face coordinates (.pkl) from Step 1.")

        run_dir = make_run_dir(WORK_DIR, "step3")
        local_main = copy_file_to_dir(main_path, run_dir, "original_video.mp4")
        local_synced = copy_file_to_dir(synced_path, run_dir, "synced_face_video.mp4")
        local_coords = copy_file_to_dir(coords_path, run_dir, "face_coords_avg.pkl")

        final_path = run_dir / "final_output_with_audio.mp4"
        output_path, audio_used = merge_synced_face(
            original_video_path=str(local_main),
            synced_face_video_path=str(local_synced),
            face_coords_path=str(local_coords),
            final_output_path=str(final_path),
        )

        if audio_used == "synced_face_video":
            status = "Step 3 completed. Final video generated with audio from synced face video."
        else:
            status = "Step 3 completed. Final video generated without muxed audio (audio track not found)."

        return status, output_path, output_path
    except Exception as exc:
        return f"Step 3 failed: {exc}", None, None


with gr.Blocks(title="Dub Module - Step 1 and Step 3") as demo:
    gr.Markdown(
        """
# Dub Module Gradio App (Step 1 + Step 3)
Workflow follows `how_to.txt` in this repo with these app boundaries:
- Step 1 is in-app: extract cropped face + `face_coords_avg.pkl`.
- Step 2 is manual and outside the app.
- Step 3 is in-app: merge synced face video back to original and produce final video.
- Separate audio upload is skipped because synced face video audio is used.
        """
    )

    with gr.Tab("Step 1 - Extract Face + Coordinates"):
        gr.Markdown("Upload the main video to generate cropped face video and face coordinates.")
        s1_video = gr.File(label="Main Video", file_types=["video"], type="filepath")
        s1_run = gr.Button("Run Step 1")
        s1_status = gr.Textbox(label="Status", interactive=False)
        s1_preview = gr.Video(label="Cropped Face Preview")
        s1_face_file = gr.File(label="Download Cropped Face Video")
        s1_coords_file = gr.File(label="Download Face Coordinates (.pkl)")

        s1_run.click(
            fn=run_step1,
            inputs=[s1_video],
            outputs=[s1_status, s1_preview, s1_face_file, s1_coords_file],
        )

    with gr.Tab("Step 2 - Manual (Outside App)"):
        gr.Markdown(
            """
Do manual lip-sync generation outside this app using the Step 1 cropped face video.
Then return to Step 3 tab with:
1. Original main video
2. Synced face video (with audio)
3. `face_coords_avg.pkl`
            """
        )

    with gr.Tab("Step 3 - Merge and Final Video"):
        gr.Markdown("Upload inputs from Step 1 and manual Step 2 to generate final output video.")
        s3_main_video = gr.File(label="Original Main Video", file_types=["video"], type="filepath")
        s3_synced_video = gr.File(label="Synced Face Video", file_types=["video"], type="filepath")
        s3_coords = gr.File(label="Face Coordinates (.pkl)", file_types=[".pkl"], type="filepath")
        s3_run = gr.Button("Run Step 3")
        s3_status = gr.Textbox(label="Status", interactive=False)
        s3_preview = gr.Video(label="Final Output Preview")
        s3_file = gr.File(label="Download Final Video")

        s3_run.click(
            fn=run_step3,
            inputs=[s3_main_video, s3_synced_video, s3_coords],
            outputs=[s3_status, s3_preview, s3_file],
        )


if __name__ == "__main__":
    demo.launch()