tushar310's picture
v1
1ce4e4b verified
from pathlib import Path
import gradio as gr
from pipeline import (
copy_file_to_dir,
extract_face_and_coords,
make_run_dir,
merge_synced_face,
)
BASE_DIR = Path(__file__).resolve().parent
WORK_DIR = BASE_DIR / "work"
WORK_DIR.mkdir(parents=True, exist_ok=True)
def _normalize_upload_path(file_obj):
if file_obj is None:
return None
if isinstance(file_obj, str):
return file_obj
return str(file_obj)
def run_step1(main_video):
try:
main_path = _normalize_upload_path(main_video)
if not main_path:
raise ValueError("Please upload the main/original video.")
run_dir = make_run_dir(WORK_DIR, "step1")
local_main = copy_file_to_dir(main_path, run_dir, "main_video.mp4")
coords_path, cropped_face_path, bbox = extract_face_and_coords(
video_path=str(local_main),
output_dir=str(run_dir),
coords_name="face_coords_avg.pkl",
cropped_name="cropped_face.mp4",
)
status = f"Step 1 completed. Face bbox saved: {bbox}"
return status, cropped_face_path, cropped_face_path, coords_path
except Exception as exc:
return f"Step 1 failed: {exc}", None, None, None
def run_step3(main_video, synced_face_video, face_coords):
try:
main_path = _normalize_upload_path(main_video)
synced_path = _normalize_upload_path(synced_face_video)
coords_path = _normalize_upload_path(face_coords)
if not main_path:
raise ValueError("Please upload the original/main video.")
if not synced_path:
raise ValueError("Please upload the synced face video from manual Step 2.")
if not coords_path:
raise ValueError("Please upload face coordinates (.pkl) from Step 1.")
run_dir = make_run_dir(WORK_DIR, "step3")
local_main = copy_file_to_dir(main_path, run_dir, "original_video.mp4")
local_synced = copy_file_to_dir(synced_path, run_dir, "synced_face_video.mp4")
local_coords = copy_file_to_dir(coords_path, run_dir, "face_coords_avg.pkl")
final_path = run_dir / "final_output_with_audio.mp4"
output_path, audio_used = merge_synced_face(
original_video_path=str(local_main),
synced_face_video_path=str(local_synced),
face_coords_path=str(local_coords),
final_output_path=str(final_path),
)
if audio_used == "synced_face_video":
status = "Step 3 completed. Final video generated with audio from synced face video."
else:
status = "Step 3 completed. Final video generated without muxed audio (audio track not found)."
return status, output_path, output_path
except Exception as exc:
return f"Step 3 failed: {exc}", None, None
with gr.Blocks(title="Dub Module - Step 1 and Step 3") as demo:
gr.Markdown(
"""
# Dub Module Gradio App (Step 1 + Step 3)
Workflow follows `how_to.txt` in this repo with these app boundaries:
- Step 1 is in-app: extract cropped face + `face_coords_avg.pkl`.
- Step 2 is manual and outside the app.
- Step 3 is in-app: merge synced face video back to original and produce final video.
- Separate audio upload is skipped because synced face video audio is used.
"""
)
with gr.Tab("Step 1 - Extract Face + Coordinates"):
gr.Markdown("Upload the main video to generate cropped face video and face coordinates.")
s1_video = gr.File(label="Main Video", file_types=["video"], type="filepath")
s1_run = gr.Button("Run Step 1")
s1_status = gr.Textbox(label="Status", interactive=False)
s1_preview = gr.Video(label="Cropped Face Preview")
s1_face_file = gr.File(label="Download Cropped Face Video")
s1_coords_file = gr.File(label="Download Face Coordinates (.pkl)")
s1_run.click(
fn=run_step1,
inputs=[s1_video],
outputs=[s1_status, s1_preview, s1_face_file, s1_coords_file],
)
with gr.Tab("Step 2 - Manual (Outside App)"):
gr.Markdown(
"""
Do manual lip-sync generation outside this app using the Step 1 cropped face video.
Then return to Step 3 tab with:
1. Original main video
2. Synced face video (with audio)
3. `face_coords_avg.pkl`
"""
)
with gr.Tab("Step 3 - Merge and Final Video"):
gr.Markdown("Upload inputs from Step 1 and manual Step 2 to generate final output video.")
s3_main_video = gr.File(label="Original Main Video", file_types=["video"], type="filepath")
s3_synced_video = gr.File(label="Synced Face Video", file_types=["video"], type="filepath")
s3_coords = gr.File(label="Face Coordinates (.pkl)", file_types=[".pkl"], type="filepath")
s3_run = gr.Button("Run Step 3")
s3_status = gr.Textbox(label="Status", interactive=False)
s3_preview = gr.Video(label="Final Output Preview")
s3_file = gr.File(label="Download Final Video")
s3_run.click(
fn=run_step3,
inputs=[s3_main_video, s3_synced_video, s3_coords],
outputs=[s3_status, s3_preview, s3_file],
)
if __name__ == "__main__":
demo.launch()