dwko commited on
Commit
f9f9c70
ยท
verified ยท
1 Parent(s): 6d18ebb

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +110 -0
README.md CHANGED
@@ -95,4 +95,114 @@ Chain-of-Causation (per trajectory):
95
  [['Nudge to the left to pass the stopped truck encroaching into the lane.']]
96
  minADE: 1.7749525 meters
97
  Note: VLA-reasoning models produce nondeterministic outputs due to trajectory sampling, hardware differences, etc. With num_traj_samples=1 (set for GPU memory compatibility), variance in minADE is expected. For visual sanity checks, see notebooks/inference.ipynb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  ```
 
95
  [['Nudge to the left to pass the stopped truck encroaching into the lane.']]
96
  minADE: 1.7749525 meters
97
  Note: VLA-reasoning models produce nondeterministic outputs due to trajectory sampling, hardware differences, etc. With num_traj_samples=1 (set for GPU memory compatibility), variance in minADE is expected. For visual sanity checks, see notebooks/inference.ipynb
98
+ ```
99
+
100
+
101
+ ๋‚˜๋Š” 1์žฅ์˜ ์ด๋ฏธ์ง€๋กœ ํŒ๋…ํ•˜๋Š” ๊ฒƒ์„ ํ…Œ์ŠคํŠธ ํ•˜๋ ค๊ณ  ์•„๋ž˜์™€ ๊ฐ™์€ ์˜ˆ์ œ๋ฅผ ๋งŒ๋“ค์—ˆ๋‹ค.
102
+
103
+ ```python
104
+ #ZeroTime init Base Image(1 photo on load image)
105
+ import torch
106
+ import numpy as np
107
+ from PIL import Image
108
+ from alpamayo_r1.models.alpamayo_r1 import AlpamayoR1
109
+ from alpamayo_r1.load_physical_aiavdataset import load_physical_aiavdataset
110
+ from alpamayo_r1 import helper
111
+
112
+ num_history_steps = 16 # ๊ณผ๊ฑฐ ์Šคํ… ์ˆ˜
113
+ num_future_steps = 64 # ๋ฏธ๋ž˜ ์Šคํ… ์ˆ˜
114
+
115
+ # ๋”๋ฏธ ์œ„์น˜ ๋ฐ์ดํ„ฐ (xyz ์ขŒํ‘œ)
116
+ ego_history_xyz = torch.zeros((1, 1, num_history_steps, 3)) # (batch, agent, steps, xyz)
117
+ ego_future_xyz = torch.zeros((1, 1, num_future_steps, 3))
118
+
119
+ # ๋”๋ฏธ ํšŒ์ „ ๋ฐ์ดํ„ฐ (3x3 ํšŒ์ „ํ–‰๋ ฌ)
120
+ ego_history_rot = torch.eye(3).repeat(1, 1, num_history_steps, 1, 1) # (1,1,steps,3,3)
121
+ ego_future_rot = torch.eye(3).repeat(1, 1, num_future_steps, 1, 1)
122
+
123
+ print("ego_history_xyz:", ego_history_xyz.shape)
124
+ print("ego_future_xyz:", ego_future_xyz.shape)
125
+ print("ego_history_rot:", ego_history_rot.shape)
126
+ print("ego_future_rot:", ego_future_rot.shape)
127
+ N_cameras = 1
128
+ camera_indices = torch.arange(N_cameras, dtype=torch.long) # (N_cameras,) - long ํƒ€์ž… ๋ช…์‹œ
129
+
130
+ data={
131
+ "camera_indices": camera_indices, # (N_cameras,)
132
+ "ego_history_xyz": ego_history_xyz, # (1, 1, num_history_steps, 3)
133
+ "ego_history_rot": ego_history_rot, # (1, 1, num_history_steps, 3, 3)
134
+ "ego_future_xyz": ego_future_xyz, # (1, 1, num_future_steps, 3)
135
+ "ego_future_rot": ego_future_rot, # (1, 1, num_future_steps, 3, 3)
136
+ # "relative_timestamps": relative_timestamps, # (N_cameras, num_frames)
137
+ # "absolute_timestamps": absolute_timestamps # (N_cameras, num_frames)
138
+ }
139
+ img_path = "IMG_20260116_065921.jpg"
140
+ # ์˜ˆ์ธกํ•˜๊ณ  ์‹ถ์€ JPG ํŒŒ์ผ ๊ฒฝ๋กœ
141
+ image = Image.open(img_path).convert("RGB")
142
+ # helper.create_message๋Š” tensor ์ž…๋ ฅ์„ ๊ธฐ๋Œ€ํ•˜๋ฏ€๋กœ ๋ณ€ํ™˜
143
+ # PIL Image๋ฅผ numpy array๋กœ ๋ณ€ํ™˜ ํ›„ float32๋กœ ๋ณ€ํ™˜
144
+ image_array = np.array(image).astype(np.float32) / 255.0 # 0-1 ๋ฒ”์œ„๋กœ ์ •๊ทœํ™”
145
+ image_tensor = torch.from_numpy(image_array).unsqueeze(0) # [batch, H, W, C]
146
+ # ๋ฉ”์‹œ์ง€ ์ƒ์„ฑ
147
+ messages = helper.create_message(image_tensor)
148
+
149
+ # Example clip ID
150
+ model_path = "Alpamayo-R1-10B-4bit"
151
+ model = AlpamayoR1.from_pretrained(model_path, dtype=torch.bfloat16).to("cuda")
152
+ processor = helper.get_processor(model.tokenizer)
153
+
154
+
155
+
156
+ # ์„ค์ •๊ฐ’
157
+
158
+ inputs = processor.apply_chat_template(
159
+ messages,
160
+ tokenize=True,
161
+ add_generation_prompt=False,
162
+ continue_final_message=True,
163
+ return_dict=True,
164
+ return_tensors="pt",
165
+ )
166
+
167
+ model_inputs = {
168
+ "tokenized_data": inputs,
169
+ "ego_history_xyz": data["ego_history_xyz"],
170
+ "ego_history_rot": data["ego_history_rot"],
171
+ }
172
+
173
+ model_inputs = helper.to_device(model_inputs, "cuda")
174
+
175
+ torch.cuda.manual_seed_all(42)
176
+ with torch.autocast("cuda", dtype=torch.bfloat16):
177
+ pred_xyz, pred_rot, extra = model.sample_trajectories_from_data_with_vlm_rollout(
178
+ data=model_inputs,
179
+ top_p=0.98,
180
+ temperature=0.6,
181
+ num_traj_samples=1, # Feel free to raise this for more output trajectories and CoC traces.
182
+ max_generation_length=256,
183
+ return_extra=True,
184
+ )
185
+
186
+ # the size is [batch_size, num_traj_sets, num_traj_samples]
187
+ print("Chain-of-Causation (per trajectory):\n", extra["cot"][0])
188
+
189
+ gt_xy = data["ego_future_xyz"].cpu()[0, 0, :, :2].T.numpy()
190
+ pred_xy = pred_xyz.cpu().numpy()[0, 0, :, :, :2].transpose(0, 2, 1)
191
+ diff = np.linalg.norm(pred_xy - gt_xy[None, ...], axis=1).mean(-1)
192
+ min_ade = diff.min()
193
+ print("minADE:", min_ade, "meters")
194
+ print(
195
+ "Note: VLA-reasoning models produce nondeterministic outputs due to trajectory sampling, "
196
+ "hardware differences, etc. With num_traj_samples=1 (set for GPU memory compatibility), "
197
+ "variance in minADE is expected. For visual sanity checks, see notebooks/inference.ipynb"
198
+ )
199
+ ```
200
+
201
+ ```output
202
+
203
+ Chain-of-Causation (per trajectory):
204
+ [['Keep lane to continue driving since the lane ahead is clear.']]
205
+ minADE: 0.55852604 meters
206
+ Note: VLA-reasoning models produce nondeterministic outputs due to trajectory sampling, hardware differences, etc. With num_traj_samples=1 (set for GPU memory compatibility), variance in minADE is expected. For visual sanity checks, see notebooks/inference.ipynb
207
+
208
  ```