from pathlib import Path import gradio as gr from pipeline import ( copy_file_to_dir, extract_face_and_coords, make_run_dir, merge_synced_face, ) BASE_DIR = Path(__file__).resolve().parent WORK_DIR = BASE_DIR / "work" WORK_DIR.mkdir(parents=True, exist_ok=True) def _normalize_upload_path(file_obj): if file_obj is None: return None if isinstance(file_obj, str): return file_obj return str(file_obj) def run_step1(main_video): try: main_path = _normalize_upload_path(main_video) if not main_path: raise ValueError("Please upload the main/original video.") run_dir = make_run_dir(WORK_DIR, "step1") local_main = copy_file_to_dir(main_path, run_dir, "main_video.mp4") coords_path, cropped_face_path, bbox = extract_face_and_coords( video_path=str(local_main), output_dir=str(run_dir), coords_name="face_coords_avg.pkl", cropped_name="cropped_face.mp4", ) status = f"Step 1 completed. Face bbox saved: {bbox}" return status, cropped_face_path, cropped_face_path, coords_path except Exception as exc: return f"Step 1 failed: {exc}", None, None, None def run_step3(main_video, synced_face_video, face_coords): try: main_path = _normalize_upload_path(main_video) synced_path = _normalize_upload_path(synced_face_video) coords_path = _normalize_upload_path(face_coords) if not main_path: raise ValueError("Please upload the original/main video.") if not synced_path: raise ValueError("Please upload the synced face video from manual Step 2.") if not coords_path: raise ValueError("Please upload face coordinates (.pkl) from Step 1.") run_dir = make_run_dir(WORK_DIR, "step3") local_main = copy_file_to_dir(main_path, run_dir, "original_video.mp4") local_synced = copy_file_to_dir(synced_path, run_dir, "synced_face_video.mp4") local_coords = copy_file_to_dir(coords_path, run_dir, "face_coords_avg.pkl") final_path = run_dir / "final_output_with_audio.mp4" output_path, audio_used = merge_synced_face( original_video_path=str(local_main), synced_face_video_path=str(local_synced), face_coords_path=str(local_coords), final_output_path=str(final_path), ) if audio_used == "synced_face_video": status = "Step 3 completed. Final video generated with audio from synced face video." else: status = "Step 3 completed. Final video generated without muxed audio (audio track not found)." return status, output_path, output_path except Exception as exc: return f"Step 3 failed: {exc}", None, None with gr.Blocks(title="Dub Module - Step 1 and Step 3") as demo: gr.Markdown( """ # Dub Module Gradio App (Step 1 + Step 3) Workflow follows `how_to.txt` in this repo with these app boundaries: - Step 1 is in-app: extract cropped face + `face_coords_avg.pkl`. - Step 2 is manual and outside the app. - Step 3 is in-app: merge synced face video back to original and produce final video. - Separate audio upload is skipped because synced face video audio is used. """ ) with gr.Tab("Step 1 - Extract Face + Coordinates"): gr.Markdown("Upload the main video to generate cropped face video and face coordinates.") s1_video = gr.File(label="Main Video", file_types=["video"], type="filepath") s1_run = gr.Button("Run Step 1") s1_status = gr.Textbox(label="Status", interactive=False) s1_preview = gr.Video(label="Cropped Face Preview") s1_face_file = gr.File(label="Download Cropped Face Video") s1_coords_file = gr.File(label="Download Face Coordinates (.pkl)") s1_run.click( fn=run_step1, inputs=[s1_video], outputs=[s1_status, s1_preview, s1_face_file, s1_coords_file], ) with gr.Tab("Step 2 - Manual (Outside App)"): gr.Markdown( """ Do manual lip-sync generation outside this app using the Step 1 cropped face video. Then return to Step 3 tab with: 1. Original main video 2. Synced face video (with audio) 3. `face_coords_avg.pkl` """ ) with gr.Tab("Step 3 - Merge and Final Video"): gr.Markdown("Upload inputs from Step 1 and manual Step 2 to generate final output video.") s3_main_video = gr.File(label="Original Main Video", file_types=["video"], type="filepath") s3_synced_video = gr.File(label="Synced Face Video", file_types=["video"], type="filepath") s3_coords = gr.File(label="Face Coordinates (.pkl)", file_types=[".pkl"], type="filepath") s3_run = gr.Button("Run Step 3") s3_status = gr.Textbox(label="Status", interactive=False) s3_preview = gr.Video(label="Final Output Preview") s3_file = gr.File(label="Download Final Video") s3_run.click( fn=run_step3, inputs=[s3_main_video, s3_synced_video, s3_coords], outputs=[s3_status, s3_preview, s3_file], ) if __name__ == "__main__": demo.launch()