Spaces:

jbilcke-hf
/

ReCamMaster

Paused

App Files Files Community

jbilcke-hf commited on Jul 16

Commit

a383ee8

verified ·

1 Parent(s): 4bf9661

Update app.py

Browse files

Files changed (1) hide show

app.py +253 -57

app.py CHANGED Viewed

@@ -20,6 +20,10 @@ from huggingface_hub import hf_hub_download
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 # Camera transformation types
 CAMERA_TRANSFORMATIONS = {
     "1": "Pan Right",
@@ -39,13 +43,72 @@ model_manager = None
 pipe = None
 is_model_loaded = False
-def download_recammaster_checkpoint():
     """Download ReCamMaster checkpoint from HuggingFace using huggingface_hub"""
-    # Define paths
-    repo_id = "KwaiVGI/ReCamMaster-Wan2.1"
-    filename = "step20000.ckpt"
-    checkpoint_dir = Path("models/ReCamMaster/checkpoints")
-    checkpoint_path = checkpoint_dir / filename
     # Check if already exists
     if checkpoint_path.exists():
@@ -53,28 +116,156 @@ def download_recammaster_checkpoint():
         return checkpoint_path
     # Create directory if it doesn't exist
-    checkpoint_dir.mkdir(parents=True, exist_ok=True)
     # Download the checkpoint
     logger.info("Downloading ReCamMaster checkpoint from HuggingFace...")
-    logger.info(f"Repository: {repo_id}")
-    logger.info(f"File: {filename}")
     logger.info(f"Destination: {checkpoint_path}")
     try:
         # Download using huggingface_hub
         downloaded_path = hf_hub_download(
-            repo_id=repo_id,
-            filename=filename,
-            local_dir=checkpoint_dir,
             local_dir_use_symlinks=False
         )
         logger.info(f"✓ Successfully downloaded ReCamMaster checkpoint to {downloaded_path}!")
         return downloaded_path
     except Exception as e:
         logger.error(f"✗ Error downloading checkpoint: {e}")
         raise
 class Camera(object):
     def __init__(self, c2w):
         c2w_mat = np.array(c2w).reshape(4, 4)
@@ -117,40 +308,70 @@ def load_models(progress_callback=None):
     try:
         logger.info("Starting model loading...")
-        # First ensure the checkpoint is downloaded
         if progress_callback:
-            progress_callback(0.05, desc="Checking for ReCamMaster checkpoint...")
         try:
-            ckpt_path = download_recammaster_checkpoint()
             logger.info(f"Using checkpoint at {ckpt_path}")
         except Exception as e:
             error_msg = f"Error downloading ReCamMaster checkpoint: {str(e)}"
             logger.error(error_msg)
             return error_msg
         if progress_callback:
-            progress_callback(0.1, desc="Loading model manager...")
         # Load Wan2.1 pre-trained models
         model_manager = ModelManager(torch_dtype=torch.bfloat16, device="cpu")
         if progress_callback:
-            progress_callback(0.3, desc="Loading Wan2.1 models...")
-        model_manager.load_models([
-            "models/Wan-AI/Wan2.1-T2V-1.3B/diffusion_pytorch_model.safetensors",
-            "models/Wan-AI/Wan2.1-T2V-1.3B/models_t5_umt5-xxl-enc-bf16.pth",
-            "models/Wan-AI/Wan2.1-T2V-1.3B/Wan2.1_VAE.pth",
-        ])
         if progress_callback:
-            progress_callback(0.5, desc="Creating pipeline...")
         pipe = WanVideoReCamMasterPipeline.from_model_manager(model_manager, device="cuda")
         if progress_callback:
-            progress_callback(0.7, desc="Initializing ReCamMaster modules...")
         # Initialize additional modules introduced in ReCamMaster
         dim = pipe.dit.blocks[0].self_attn.q.weight.shape[0]
@@ -262,7 +483,7 @@ def process_video_for_recammaster(video_path, text_prompt, cam_type, height=480,
     video_tensor = frames.unsqueeze(0)  # Add batch dimension
     # Load camera trajectory
-    tgt_camera_path = "./example_test_data/cameras/camera_extrinsics.json"
     with open(tgt_camera_path, 'r') as file:
         cam_data = json.load(file)
@@ -360,23 +581,13 @@ def generate_recammaster_video(
         return None, f"Error: {str(e)}"
 # Create Gradio interface
-with gr.Blocks(title="ReCamMaster Demo") as demo:
-    # Show loading status
-    loading_status = gr.Textbox(
-        label="Model Loading Status",
-        value="Loading models, please wait...",
-        interactive=False,
-        visible=True
-    )
-    gr.Markdown("""
-    # 🎥 ReCamMaster Demo
     ReCamMaster allows you to re-capture videos with novel camera trajectories.
     Upload a video and select a camera transformation to see the magic!
-    **Note:** The ReCamMaster checkpoint will be automatically downloaded from HuggingFace when you start the app.
-    You still need to download Wan2.1 models using `python download_wan2.1.py` before running this demo.
     """)
     with gr.Row():
@@ -407,24 +618,8 @@ with gr.Blocks(title="ReCamMaster Demo") as demo:
             # Output section
             output_video = gr.Video(label="Output Video")
             status_output = gr.Textbox(label="Generation Status", interactive=False)
-    # Example videos
-    gr.Markdown("### Example Videos")
-    gr.Examples(
-        examples=[
-            ["example_test_data/videos/case0.mp4", "A person dancing", "1"],
-            ["example_test_data/videos/case1.mp4", "A scenic view", "5"],
-        ],
-        inputs=[video_input, text_prompt, camera_type],
-    )
-    # Load models automatically when the interface loads
-    def on_load():
-        status = load_models()
-        return gr.update(value=status, visible=True if "Error" in status else False)
-    demo.load(on_load, outputs=[loading_status])
     # Event handlers
     generate_btn.click(
         fn=generate_recammaster_video,
@@ -433,4 +628,5 @@ with gr.Blocks(title="ReCamMaster Demo") as demo:
     )
 if __name__ == "__main__":
     demo.launch(share=True)

 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+# Get model storage path from environment variable or use default
+MODELS_ROOT_DIR = os.environ.get("RECAMMASTER_MODELS_DIR", "/data/models")
+logger.info(f"Using models root directory: {MODELS_ROOT_DIR}")
 # Camera transformation types
 CAMERA_TRANSFORMATIONS = {
     "1": "Pan Right",
 pipe = None
 is_model_loaded = False
+# Define model repositories and files
+WAN21_REPO_ID = "Wan-AI/Wan2.1-T2V-1.3B"
+WAN21_LOCAL_DIR = f"{MODELS_ROOT_DIR}/Wan-AI/Wan2.1-T2V-1.3B"
+WAN21_FILES = [
+    "diffusion_pytorch_model.safetensors",
+    "models_t5_umt5-xxl-enc-bf16.pth",
+    "Wan2.1_VAE.pth"
+]
+RECAMMASTER_REPO_ID = "KwaiVGI/ReCamMaster-Wan2.1"
+RECAMMASTER_CHECKPOINT_FILE = "step20000.ckpt"
+RECAMMASTER_LOCAL_DIR = f"{MODELS_ROOT_DIR}/ReCamMaster/checkpoints"
+# Define test data directory
+TEST_DATA_DIR = "example_test_data"
+def download_wan21_models(progress_callback=None):
+    """Download Wan2.1 model files from HuggingFace"""
+    total_files = len(WAN21_FILES)
+    downloaded_paths = []
+    # Create directory if it doesn't exist
+    Path(WAN21_LOCAL_DIR).mkdir(parents=True, exist_ok=True)
+    for i, filename in enumerate(WAN21_FILES):
+        local_path = Path(WAN21_LOCAL_DIR) / filename
+        # Update progress
+        if progress_callback:
+            progress_callback(i/total_files, desc=f"Checking Wan2.1 file {i+1}/{total_files}: {filename}")
+        # Check if already exists
+        if local_path.exists():
+            logger.info(f"✓ {filename} already exists at {local_path}")
+            downloaded_paths.append(str(local_path))
+            continue
+        # Download the file
+        logger.info(f"Downloading {filename} from {WAN21_REPO_ID}...")
+        if progress_callback:
+            progress_callback(i/total_files, desc=f"Downloading Wan2.1 file {i+1}/{total_files}: {filename}")
+        try:
+            # Download using huggingface_hub
+            downloaded_path = hf_hub_download(
+                repo_id=WAN21_REPO_ID,
+                filename=filename,
+                local_dir=WAN21_LOCAL_DIR,
+                local_dir_use_symlinks=False
+            )
+            logger.info(f"✓ Successfully downloaded {filename} to {downloaded_path}!")
+            downloaded_paths.append(downloaded_path)
+        except Exception as e:
+            logger.error(f"✗ Error downloading {filename}: {e}")
+            raise
+    if progress_callback:
+        progress_callback(1.0, desc=f"All Wan2.1 models downloaded successfully!")
+    return downloaded_paths
+def download_recammaster_checkpoint(progress_callback=None):
     """Download ReCamMaster checkpoint from HuggingFace using huggingface_hub"""
+    checkpoint_path = Path(RECAMMASTER_LOCAL_DIR) / RECAMMASTER_CHECKPOINT_FILE
     # Check if already exists
     if checkpoint_path.exists():
         return checkpoint_path
     # Create directory if it doesn't exist
+    Path(RECAMMASTER_LOCAL_DIR).mkdir(parents=True, exist_ok=True)
     # Download the checkpoint
     logger.info("Downloading ReCamMaster checkpoint from HuggingFace...")
+    logger.info(f"Repository: {RECAMMASTER_REPO_ID}")
+    logger.info(f"File: {RECAMMASTER_CHECKPOINT_FILE}")
     logger.info(f"Destination: {checkpoint_path}")
+    if progress_callback:
+        progress_callback(0.0, desc=f"Downloading ReCamMaster checkpoint...")
     try:
         # Download using huggingface_hub
         downloaded_path = hf_hub_download(
+            repo_id=RECAMMASTER_REPO_ID,
+            filename=RECAMMASTER_CHECKPOINT_FILE,
+            local_dir=RECAMMASTER_LOCAL_DIR,
             local_dir_use_symlinks=False
         )
         logger.info(f"✓ Successfully downloaded ReCamMaster checkpoint to {downloaded_path}!")
+        if progress_callback:
+            progress_callback(1.0, desc=f"ReCamMaster checkpoint downloaded successfully!")
         return downloaded_path
     except Exception as e:
         logger.error(f"✗ Error downloading checkpoint: {e}")
         raise
+def create_test_data_structure(progress_callback=None):
+    """Create sample camera extrinsics data for testing"""
+    if progress_callback:
+        progress_callback(0.0, desc="Creating test data structure...")
+    # Create directories
+    data_dir = Path(f"{TEST_DATA_DIR}/cameras")
+    videos_dir = Path(f"{TEST_DATA_DIR}/videos")
+    data_dir.mkdir(parents=True, exist_ok=True)
+    videos_dir.mkdir(parents=True, exist_ok=True)
+    camera_file = data_dir / "camera_extrinsics.json"
+    # Skip if file already exists
+    if camera_file.exists():
+        logger.info(f"✓ Camera extrinsics already exist at {camera_file}")
+        if progress_callback:
+            progress_callback(1.0, desc="Test data structure already exists")
+        return
+    if progress_callback:
+        progress_callback(0.3, desc="Generating camera extrinsics data...")
+    # Generate sample camera data
+    camera_data = {}
+    # Create 81 frames with 10 camera trajectories each
+    for frame_idx in range(81):
+        frame_key = f"frame{frame_idx}"
+        camera_data[frame_key] = {}
+        for cam_idx in range(1, 11):  # Camera types 1-10
+            # Create a sample camera matrix (this is just an example - replace with actual logic if needed)
+            # In reality, these would be calculated based on specific camera movement patterns
+            # Create a base identity matrix
+            base_matrix = np.eye(4)
+            # Add some variation based on frame and camera type
+            # This is a simplistic example - real camera movements would be more complex
+            if cam_idx == 1:  # Pan Right
+                base_matrix[0, 3] = 0.01 * frame_idx  # Move right over time
+            elif cam_idx == 2:  # Pan Left
+                base_matrix[0, 3] = -0.01 * frame_idx  # Move left over time
+            elif cam_idx == 3:  # Tilt Up
+                # Rotate around X-axis
+                angle = 0.005 * frame_idx
+                base_matrix[1, 1] = np.cos(angle)
+                base_matrix[1, 2] = -np.sin(angle)
+                base_matrix[2, 1] = np.sin(angle)
+                base_matrix[2, 2] = np.cos(angle)
+            elif cam_idx == 4:  # Tilt Down
+                # Rotate around X-axis (opposite direction)
+                angle = -0.005 * frame_idx
+                base_matrix[1, 1] = np.cos(angle)
+                base_matrix[1, 2] = -np.sin(angle)
+                base_matrix[2, 1] = np.sin(angle)
+                base_matrix[2, 2] = np.cos(angle)
+            elif cam_idx == 5:  # Zoom In
+                base_matrix[2, 3] = -0.01 * frame_idx  # Move forward over time
+            elif cam_idx == 6:  # Zoom Out
+                base_matrix[2, 3] = 0.01 * frame_idx  # Move backward over time
+            elif cam_idx == 7:  # Translate Up (with rotation)
+                base_matrix[1, 3] = 0.01 * frame_idx  # Move up over time
+                angle = 0.003 * frame_idx
+                base_matrix[0, 0] = np.cos(angle)
+                base_matrix[0, 2] = np.sin(angle)
+                base_matrix[2, 0] = -np.sin(angle)
+                base_matrix[2, 2] = np.cos(angle)
+            elif cam_idx == 8:  # Translate Down (with rotation)
+                base_matrix[1, 3] = -0.01 * frame_idx  # Move down over time
+                angle = -0.003 * frame_idx
+                base_matrix[0, 0] = np.cos(angle)
+                base_matrix[0, 2] = np.sin(angle)
+                base_matrix[2, 0] = -np.sin(angle)
+                base_matrix[2, 2] = np.cos(angle)
+            elif cam_idx == 9:  # Arc Left (with rotation)
+                angle = 0.005 * frame_idx
+                radius = 2.0
+                base_matrix[0, 3] = -radius * np.sin(angle)
+                base_matrix[2, 3] = -radius * np.cos(angle) + radius
+                # Rotate to look at center
+                look_angle = angle + np.pi
+                base_matrix[0, 0] = np.cos(look_angle)
+                base_matrix[0, 2] = np.sin(look_angle)
+                base_matrix[2, 0] = -np.sin(look_angle)
+                base_matrix[2, 2] = np.cos(look_angle)
+            elif cam_idx == 10:  # Arc Right (with rotation)
+                angle = -0.005 * frame_idx
+                radius = 2.0
+                base_matrix[0, 3] = -radius * np.sin(angle)
+                base_matrix[2, 3] = -radius * np.cos(angle) + radius
+                # Rotate to look at center
+                look_angle = angle + np.pi
+                base_matrix[0, 0] = np.cos(look_angle)
+                base_matrix[0, 2] = np.sin(look_angle)
+                base_matrix[2, 0] = -np.sin(look_angle)
+                base_matrix[2, 2] = np.cos(look_angle)
+            # Format the matrix as a string (as expected by the app)
+            matrix_str = ' '.join([' '.join([str(base_matrix[i, j]) for j in range(4)]) for i in range(4)])
+            matrix_str = '[ ' + matrix_str.replace(' ', ' ] [ ', 3) + ' ]'
+            camera_data[frame_key][f"cam{cam_idx:02d}"] = matrix_str
+    if progress_callback:
+        progress_callback(0.7, desc="Saving camera extrinsics data...")
+    # Save camera extrinsics to JSON file
+    with open(camera_file, 'w') as f:
+        json.dump(camera_data, f, indent=2)
+    logger.info(f"Created sample camera extrinsics at {camera_file}")
+    logger.info(f"Created directory for example videos at {videos_dir}")
+    if progress_callback:
+        progress_callback(1.0, desc="Test data structure created successfully!")
 class Camera(object):
     def __init__(self, c2w):
         c2w_mat = np.array(c2w).reshape(4, 4)
     try:
         logger.info("Starting model loading...")
+        # First create the test data structure
+        if progress_callback:
+            progress_callback(0.05, desc="Setting up test data structure...")
+        try:
+            create_test_data_structure(progress_callback)
+        except Exception as e:
+            error_msg = f"Error creating test data structure: {str(e)}"
+            logger.error(error_msg)
+            return error_msg
+        # Second, ensure the checkpoint is downloaded
         if progress_callback:
+            progress_callback(0.1, desc="Checking for ReCamMaster checkpoint...")
         try:
+            ckpt_path = download_recammaster_checkpoint(progress_callback)
             logger.info(f"Using checkpoint at {ckpt_path}")
         except Exception as e:
             error_msg = f"Error downloading ReCamMaster checkpoint: {str(e)}"
             logger.error(error_msg)
             return error_msg
+        # Third, download Wan2.1 models if needed
+        if progress_callback:
+            progress_callback(0.2, desc="Checking for Wan2.1 models...")
+        try:
+            wan21_paths = download_wan21_models(progress_callback)
+            logger.info(f"Using Wan2.1 models: {wan21_paths}")
+        except Exception as e:
+            error_msg = f"Error downloading Wan2.1 models: {str(e)}"
+            logger.error(error_msg)
+            return error_msg
+        # Now, load the models
         if progress_callback:
+            progress_callback(0.4, desc="Loading model manager...")
         # Load Wan2.1 pre-trained models
         model_manager = ModelManager(torch_dtype=torch.bfloat16, device="cpu")
         if progress_callback:
+            progress_callback(0.5, desc="Loading Wan2.1 models...")
+        # Build full paths for the model files
+        model_files = [f"{WAN21_LOCAL_DIR}/{filename}" for filename in WAN21_FILES]
+        for model_file in model_files:
+            logger.info(f"Loading model from: {model_file}")
+            if not os.path.exists(model_file):
+                error_msg = f"Error: Model file not found: {model_file}"
+                logger.error(error_msg)
+                return error_msg
+        model_manager.load_models(model_files)
         if progress_callback:
+            progress_callback(0.7, desc="Creating pipeline...")
         pipe = WanVideoReCamMasterPipeline.from_model_manager(model_manager, device="cuda")
         if progress_callback:
+            progress_callback(0.8, desc="Initializing ReCamMaster modules...")
         # Initialize additional modules introduced in ReCamMaster
         dim = pipe.dit.blocks[0].self_attn.q.weight.shape[0]
     video_tensor = frames.unsqueeze(0)  # Add batch dimension
     # Load camera trajectory
+    tgt_camera_path = f"./{TEST_DATA_DIR}/cameras/camera_extrinsics.json"
     with open(tgt_camera_path, 'r') as file:
         cam_data = json.load(file)
         return None, f"Error: {str(e)}"
 # Create Gradio interface
+with gr.Blocks(title="ReCamMaster") as demo:
+    gr.Markdown(f"""
+    # 🎥 ReCamMaster
     ReCamMaster allows you to re-capture videos with novel camera trajectories.
     Upload a video and select a camera transformation to see the magic!
     """)
     with gr.Row():
             # Output section
             output_video = gr.Video(label="Output Video")
             status_output = gr.Textbox(label="Generation Status", interactive=False)
     # Event handlers
     generate_btn.click(
         fn=generate_recammaster_video,
     )
 if __name__ == "__main__":
+    load_models()
     demo.launch(share=True)