dwko
/

Alpamayo-R1-10B-4bit

@@ -95,4 +95,114 @@ Chain-of-Causation (per trajectory):
 [['Nudge to the left to pass the stopped truck encroaching into the lane.']]
 minADE: 1.7749525 meters
 Note: VLA-reasoning models produce nondeterministic outputs due to trajectory sampling, hardware differences, etc. With num_traj_samples=1 (set for GPU memory compatibility), variance in minADE is expected. For visual sanity checks, see notebooks/inference.ipynb
 ```

 [['Nudge to the left to pass the stopped truck encroaching into the lane.']]
 minADE: 1.7749525 meters
 Note: VLA-reasoning models produce nondeterministic outputs due to trajectory sampling, hardware differences, etc. With num_traj_samples=1 (set for GPU memory compatibility), variance in minADE is expected. For visual sanity checks, see notebooks/inference.ipynb
+```
+나는 1장의 이미지로 판독하는 것을 테스트 하려고 아래와 같은 예제를 만들었다.
+```python
+#ZeroTime init Base Image(1 photo on load image)
+import torch
+import numpy as np
+from PIL import Image
+from alpamayo_r1.models.alpamayo_r1 import AlpamayoR1
+from alpamayo_r1.load_physical_aiavdataset import load_physical_aiavdataset
+from alpamayo_r1 import helper
+num_history_steps = 16   # 과거 스텝 수
+num_future_steps = 64    # 미래 스텝 수
+# 더미 위치 데이터 (xyz 좌표)
+ego_history_xyz = torch.zeros((1, 1, num_history_steps, 3))   # (batch, agent, steps, xyz)
+ego_future_xyz  = torch.zeros((1, 1, num_future_steps, 3))
+# 더미 회전 데이터 (3x3 회전행렬)
+ego_history_rot = torch.eye(3).repeat(1, 1, num_history_steps, 1, 1)  # (1,1,steps,3,3)
+ego_future_rot  = torch.eye(3).repeat(1, 1, num_future_steps, 1, 1)
+print("ego_history_xyz:", ego_history_xyz.shape)
+print("ego_future_xyz:", ego_future_xyz.shape)
+print("ego_history_rot:", ego_history_rot.shape)
+print("ego_future_rot:", ego_future_rot.shape)
+N_cameras = 1
+camera_indices = torch.arange(N_cameras, dtype=torch.long)  # (N_cameras,) - long 타입 명시
+data={
+        "camera_indices": camera_indices,  # (N_cameras,)
+        "ego_history_xyz": ego_history_xyz,  # (1, 1, num_history_steps, 3)
+        "ego_history_rot": ego_history_rot,  # (1, 1, num_history_steps, 3, 3)
+        "ego_future_xyz": ego_future_xyz,  # (1, 1, num_future_steps, 3)
+        "ego_future_rot": ego_future_rot,  # (1, 1, num_future_steps, 3, 3)
+#        "relative_timestamps": relative_timestamps,  # (N_cameras, num_frames)
+#        "absolute_timestamps": absolute_timestamps # (N_cameras, num_frames)
+}
+img_path = "IMG_20260116_065921.jpg"
+# 예측하고 싶은 JPG 파일 경로
+image = Image.open(img_path).convert("RGB")
+# helper.create_message는 tensor 입력을 기대하므로 변환
+# PIL Image를 numpy array로 변환 후 float32로 변환
+image_array = np.array(image).astype(np.float32) / 255.0  # 0-1 범위로 정규화
+image_tensor = torch.from_numpy(image_array).unsqueeze(0)  # [batch, H, W, C]
+# 메시지 생성
+messages = helper.create_message(image_tensor)
+# Example clip ID
+model_path = "Alpamayo-R1-10B-4bit"
+model = AlpamayoR1.from_pretrained(model_path, dtype=torch.bfloat16).to("cuda")
+processor = helper.get_processor(model.tokenizer)
+# 설정값
+inputs = processor.apply_chat_template(
+    messages,
+    tokenize=True,
+    add_generation_prompt=False,
+    continue_final_message=True,
+    return_dict=True,
+    return_tensors="pt",
+)
+model_inputs = {
+    "tokenized_data": inputs,
+    "ego_history_xyz": data["ego_history_xyz"],
+    "ego_history_rot": data["ego_history_rot"],
+}
+model_inputs = helper.to_device(model_inputs, "cuda")
+torch.cuda.manual_seed_all(42)
+with torch.autocast("cuda", dtype=torch.bfloat16):
+    pred_xyz, pred_rot, extra = model.sample_trajectories_from_data_with_vlm_rollout(
+        data=model_inputs,
+        top_p=0.98,
+        temperature=0.6,
+        num_traj_samples=1,  # Feel free to raise this for more output trajectories and CoC traces.
+        max_generation_length=256,
+        return_extra=True,
+    )
+# the size is [batch_size, num_traj_sets, num_traj_samples]
+print("Chain-of-Causation (per trajectory):\n", extra["cot"][0])
+gt_xy = data["ego_future_xyz"].cpu()[0, 0, :, :2].T.numpy()
+pred_xy = pred_xyz.cpu().numpy()[0, 0, :, :, :2].transpose(0, 2, 1)
+diff = np.linalg.norm(pred_xy - gt_xy[None, ...], axis=1).mean(-1)
+min_ade = diff.min()
+print("minADE:", min_ade, "meters")
+print(
+    "Note: VLA-reasoning models produce nondeterministic outputs due to trajectory sampling, "
+    "hardware differences, etc. With num_traj_samples=1 (set for GPU memory compatibility), "
+    "variance in minADE is expected. For visual sanity checks, see notebooks/inference.ipynb"
+)
+```
+```output
+Chain-of-Causation (per trajectory):
+ [['Keep lane to continue driving since the lane ahead is clear.']]
+minADE: 0.55852604 meters
+Note: VLA-reasoning models produce nondeterministic outputs due to trajectory sampling, hardware differences, etc. With num_traj_samples=1 (set for GPU memory compatibility), variance in minADE is expected. For visual sanity checks, see notebooks/inference.ipynb
 ```