xizaoqu commited on
Commit ·
254ff82
1
Parent(s): 6e9cdb7
update
Browse files- algorithms/worldmem/df_video.py +0 -1
- app.py +6 -5
algorithms/worldmem/df_video.py
CHANGED
|
@@ -813,7 +813,6 @@ class WorldMemMinecraft(DiffusionForcingBase):
|
|
| 813 |
new_pose_condition = last_pose_condition + new_pose_condition_offset
|
| 814 |
new_pose_condition[:,3:] = new_pose_condition[:,3:] * 15
|
| 815 |
new_pose_condition[:,3:] %= 360
|
| 816 |
-
print(new_pose_condition)
|
| 817 |
self.actions = torch.cat([self.actions, curr_actions[None, None].to(device)])
|
| 818 |
self.poses = torch.cat([self.poses, new_pose_condition[None].to(device)])
|
| 819 |
new_c2w_mat = euler_to_camera_to_world_matrix(new_pose_condition)
|
|
|
|
| 813 |
new_pose_condition = last_pose_condition + new_pose_condition_offset
|
| 814 |
new_pose_condition[:,3:] = new_pose_condition[:,3:] * 15
|
| 815 |
new_pose_condition[:,3:] %= 360
|
|
|
|
| 816 |
self.actions = torch.cat([self.actions, curr_actions[None, None].to(device)])
|
| 817 |
self.poses = torch.cat([self.poses, new_pose_condition[None].to(device)])
|
| 818 |
new_c2w_mat = euler_to_camera_to_world_matrix(new_pose_condition)
|
app.py
CHANGED
|
@@ -173,13 +173,14 @@ def run(cfg: DictConfig):
|
|
| 173 |
memory_frames.append(load_image_as_tensor(DEFAULT_IMAGE))
|
| 174 |
|
| 175 |
@spaces.GPU()
|
| 176 |
-
def run_interactive(
|
|
|
|
| 177 |
new_frame = algo.interactive(first_frame,
|
| 178 |
action,
|
| 179 |
first_pose,
|
| 180 |
curr_frame,
|
| 181 |
device=device)
|
| 182 |
-
return
|
| 183 |
|
| 184 |
def set_denoising_steps(denoising_steps, sampling_timesteps_state):
|
| 185 |
algo.sampling_timesteps = denoising_steps
|
|
@@ -197,7 +198,7 @@ def run(cfg: DictConfig):
|
|
| 197 |
|
| 198 |
for i in range(len(actions)):
|
| 199 |
memory_curr_frame += 1
|
| 200 |
-
|
| 201 |
actions[i],
|
| 202 |
None,
|
| 203 |
memory_curr_frame,
|
|
@@ -229,7 +230,7 @@ def run(cfg: DictConfig):
|
|
| 229 |
memory_curr_frame = 0
|
| 230 |
input_history = ""
|
| 231 |
|
| 232 |
-
|
| 233 |
actions[0],
|
| 234 |
poses[0],
|
| 235 |
memory_curr_frame,
|
|
@@ -242,7 +243,7 @@ def run(cfg: DictConfig):
|
|
| 242 |
reset()
|
| 243 |
return SELECTED_IMAGE
|
| 244 |
|
| 245 |
-
|
| 246 |
actions[0],
|
| 247 |
poses[0],
|
| 248 |
memory_curr_frame,
|
|
|
|
| 173 |
memory_frames.append(load_image_as_tensor(DEFAULT_IMAGE))
|
| 174 |
|
| 175 |
@spaces.GPU()
|
| 176 |
+
def run_interactive(first_frame, action, first_pose, curr_frame, device):
|
| 177 |
+
global algo
|
| 178 |
new_frame = algo.interactive(first_frame,
|
| 179 |
action,
|
| 180 |
first_pose,
|
| 181 |
curr_frame,
|
| 182 |
device=device)
|
| 183 |
+
return new_frame
|
| 184 |
|
| 185 |
def set_denoising_steps(denoising_steps, sampling_timesteps_state):
|
| 186 |
algo.sampling_timesteps = denoising_steps
|
|
|
|
| 198 |
|
| 199 |
for i in range(len(actions)):
|
| 200 |
memory_curr_frame += 1
|
| 201 |
+
new_frame = run_interactive(memory_frames[0],
|
| 202 |
actions[i],
|
| 203 |
None,
|
| 204 |
memory_curr_frame,
|
|
|
|
| 230 |
memory_curr_frame = 0
|
| 231 |
input_history = ""
|
| 232 |
|
| 233 |
+
_ = run_interactive(memory_frames[0],
|
| 234 |
actions[0],
|
| 235 |
poses[0],
|
| 236 |
memory_curr_frame,
|
|
|
|
| 243 |
reset()
|
| 244 |
return SELECTED_IMAGE
|
| 245 |
|
| 246 |
+
_ = run_interactive(memory_frames[0],
|
| 247 |
actions[0],
|
| 248 |
poses[0],
|
| 249 |
memory_curr_frame,
|