Spaces:

yslan
/

worldmem

Running on Zero

App Files Files Community

xizaoqu commited on Apr 8, 2025

Commit

254ff82

1 Parent(s): 6e9cdb7

update

Browse files

Files changed (2) hide show

algorithms/worldmem/df_video.py +0 -1
app.py +6 -5

algorithms/worldmem/df_video.py CHANGED Viewed

@@ -813,7 +813,6 @@ class WorldMemMinecraft(DiffusionForcingBase):
             new_pose_condition = last_pose_condition + new_pose_condition_offset
             new_pose_condition[:,3:] = new_pose_condition[:,3:] * 15
             new_pose_condition[:,3:] %= 360
-            print(new_pose_condition)
             self.actions = torch.cat([self.actions, curr_actions[None, None].to(device)])
             self.poses = torch.cat([self.poses, new_pose_condition[None].to(device)])
             new_c2w_mat = euler_to_camera_to_world_matrix(new_pose_condition)

             new_pose_condition = last_pose_condition + new_pose_condition_offset
             new_pose_condition[:,3:] = new_pose_condition[:,3:] * 15
             new_pose_condition[:,3:] %= 360
             self.actions = torch.cat([self.actions, curr_actions[None, None].to(device)])
             self.poses = torch.cat([self.poses, new_pose_condition[None].to(device)])
             new_c2w_mat = euler_to_camera_to_world_matrix(new_pose_condition)

app.py CHANGED Viewed

@@ -173,13 +173,14 @@ def run(cfg: DictConfig):
     memory_frames.append(load_image_as_tensor(DEFAULT_IMAGE))
     @spaces.GPU()
-    def run_interactive(algo, first_frame, action, first_pose, curr_frame, device):
         new_frame = algo.interactive(first_frame,
                                         action,
                                         first_pose,
                                         curr_frame,
                                         device=device)
-        return algo, new_frame
     def set_denoising_steps(denoising_steps, sampling_timesteps_state):
         algo.sampling_timesteps = denoising_steps
@@ -197,7 +198,7 @@ def run(cfg: DictConfig):
         for i in range(len(actions)):
             memory_curr_frame += 1
-            algo, new_frame = run_interactive(algo, memory_frames[0],
                                           actions[i],
                                           None,
                                           memory_curr_frame,
@@ -229,7 +230,7 @@ def run(cfg: DictConfig):
         memory_curr_frame = 0
         input_history = ""
-        algo, _ = run_interactive(algo, memory_frames[0],
                                     actions[0],
                                     poses[0],
                                     memory_curr_frame,
@@ -242,7 +243,7 @@ def run(cfg: DictConfig):
         reset()
         return SELECTED_IMAGE
-    algo, _ = run_interactive(algo, memory_frames[0],
                                 actions[0],
                                 poses[0],
                                 memory_curr_frame,

     memory_frames.append(load_image_as_tensor(DEFAULT_IMAGE))
     @spaces.GPU()
+    def run_interactive(first_frame, action, first_pose, curr_frame, device):
+        global algo
         new_frame = algo.interactive(first_frame,
                                         action,
                                         first_pose,
                                         curr_frame,
                                         device=device)
+        return new_frame
     def set_denoising_steps(denoising_steps, sampling_timesteps_state):
         algo.sampling_timesteps = denoising_steps
         for i in range(len(actions)):
             memory_curr_frame += 1
+            new_frame = run_interactive(memory_frames[0],
                                           actions[i],
                                           None,
                                           memory_curr_frame,
         memory_curr_frame = 0
         input_history = ""
+        _ = run_interactive(memory_frames[0],
                                     actions[0],
                                     poses[0],
                                     memory_curr_frame,
         reset()
         return SELECTED_IMAGE
+    _ = run_interactive(memory_frames[0],
                                 actions[0],
                                 poses[0],
                                 memory_curr_frame,