File size: 8,278 Bytes
06fb74f 7791ace 06fb74f 1b6fafb 06fb74f 0c5a356 1b6fafb eb8d03a 82ca028 8dd509e 1b6fafb 82ca028 eb8d03a 8dd509e 6d18ebb 1b6fafb eb8d03a 06fb74f 844a84b 06fb74f eb8d03a 06fb74f 844a84b 06fb74f 026dcf0 06fb74f 18c18e4 eb8d03a 18c18e4 23a0a84 45e9d42 18c18e4 23a0a84 18c18e4 f9f9c70 2436f31 f9f9c70 23a0a84 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 |
---
license: apache-2.0
base_model:
- nvidia/Alpamayo-R1-10B
---
nvidia/Alpamayo-R1-10B 4bit Model.
μ΄λͺ¨λΈμ μμ¨μ£Όν μ€ μμ§λ λ°μ΄ν°λ‘ μ΄λ²€νΈλ₯Ό μμΈ‘νλ μ©λλ‘ νμ©ν μ μμ΅λλ€.
μμ¨μ£Όνμ νλκ² μλλΌ μμ¨μ£Όν μ€ νΉμ μν©μ΄ λ°μν κ²μ μλ €μ£Όλ κΈ°λ₯μ ν©λλ€.
```Runinfo
model download ./Alpamayo-R1-10B-4bit
GPU 12G/16G Memory Run able
12G Memory is num_frames is 1 ~ 8, over OOM
Transformers is 4.57.5 ( 5.0.0rc not run)
nvidia/Alpamayo-R1-10B μ΄ λμ©λ λ©λͺ¨λ¦¬λ₯Ό μꡬνκ³ 4bit λ‘ λ‘λ©νμ¬ μ μ₯ν λͺ¨λΈμ
λλ€.
12G μμλ μ€νκ°λ₯ν΄μ‘μ΅λλ€λ§ μ£Όμ΄μ§λ νλ μμλ 1~8μ λ, κ·Έ μ΄μμ΄λ©΄ OOMμ΄ λ¨μ΄μ§λλ€.
νΈλμ€ν¬λ¨Έ λ²μ 5.0.0rcμμλ λμνμ§ μμ΅λλ€.
git clone https://github.com/NVlabs/alpamayo νκ³
cd alpamayo
pip install . λ‘ μ€μΉν΄μΌ ν©λλ€λ§
pyproject.tomlμ μμ νλκ² μ’μ΅λλ€.
python 3.13μ μ¬μ©νλ©΄ requires-python = "==3.13.*"
transformers μ torchλ₯Ό λΌμΈμ μ κ±°νκ³ μ€μΉνλ©΄ μ€μΉλ λ²μ μ΄ κ΅μ²΄λμ§ μμ΅λλ€.
```
-----------------------------------
```python
import torch
import numpy as np
from alpamayo_r1.models.alpamayo_r1 import AlpamayoR1
from alpamayo_r1.load_physical_aiavdataset import load_physical_aiavdataset
from alpamayo_r1 import helper
model_path = "Alpamayo-R1-10B-4bit"
model = AlpamayoR1.from_pretrained(model_path, dtype=torch.bfloat16).to("cuda")
processor = helper.get_processor(model.tokenizer)
clip_id = "030c760c-ae38-49aa-9ad8-f5650a545d26"
print(f"Loading dataset for clip_id: {clip_id}...")
#need set access token or huggingface-cli login...
data = load_physical_aiavdataset(clip_id, t0_us=15_100_000,num_frames=1)
print("Dataset loaded.")
messages = helper.create_message(data["image_frames"].flatten(0, 1))
inputs = processor.apply_chat_template(
messages,
tokenize=True,
add_generation_prompt=False,
continue_final_message=True,
return_dict=True,
return_tensors="pt",
)
model_inputs = {
"tokenized_data": inputs,
"ego_history_xyz": data["ego_history_xyz"],
"ego_history_rot": data["ego_history_rot"],
}
model_inputs = helper.to_device(model_inputs, "cuda")
torch.cuda.manual_seed_all(42)
with torch.autocast("cuda", dtype=torch.bfloat16):
pred_xyz, pred_rot, extra = model.sample_trajectories_from_data_with_vlm_rollout(
data=model_inputs,
top_p=0.98,
temperature=0.6,
num_traj_samples=1, # Feel free to raise this for more output trajectories and CoC traces.
max_generation_length=256,
return_extra=True,
)
print("Chain-of-Causation (per trajectory):\n", extra["cot"][0])
gt_xy = data["ego_future_xyz"].cpu()[0, 0, :, :2].T.numpy()
pred_xy = pred_xyz.cpu().numpy()[0, 0, :, :, :2].transpose(0, 2, 1)
diff = np.linalg.norm(pred_xy - gt_xy[None, ...], axis=1).mean(-1)
min_ade = diff.min()
print("minADE:", min_ade, "meters")
print(
"Note: VLA-reasoning models produce nondeterministic outputs due to trajectory sampling, "
"hardware differences, etc. With num_traj_samples=1 (set for GPU memory compatibility), "
"variance in minADE is expected. For visual sanity checks, see notebooks/inference.ipynb"
)
```
--------------------
```Result:
Chain-of-Causation (per trajectory):
[['Nudge to the left to pass the stopped truck encroaching into the lane.']]
minADE: 1.7749525 meters
Note: VLA-reasoning models produce nondeterministic outputs due to trajectory sampling, hardware differences, etc. With num_traj_samples=1 (set for GPU memory compatibility), variance in minADE is expected. For visual sanity checks, see notebooks/inference.ipynb
```
λλ 1μ₯μ μ΄λ―Έμ§λ‘ νλ
νλ κ²μ ν
μ€νΈ νλ €κ³ μλμ κ°μ μμ λ₯Ό λ§λ€μλ€.
λ°μ΄ν° λ‘λ© μμ΄ κΈ°λ³Έ μ΄κΈ°νλ₯Ό νμ¬ μμμ μμ μμνλ κ²μμ μμνλ€.
ꡬλνκΈ° μν΄μ μ΅μ 12G μ΄μμΈ GPUμΉ΄λλ₯Ό μ¬μ©ν΄μΌ νκ³ , μλ΅μλ λ κ½€ μ§μ°μ΄ κ±Έλ €
μ€μ μλμ°¨μ μ μ©νκΈ°μ 무리μΈκ² κ°λ€.
```python
#ZeroTime init Base Image(1 photo on load image)
import torch
import numpy as np
from PIL import Image
from alpamayo_r1.models.alpamayo_r1 import AlpamayoR1
from alpamayo_r1.load_physical_aiavdataset import load_physical_aiavdataset
from alpamayo_r1 import helper
num_history_steps = 16 # κ³Όκ±° μ€ν
μ
num_future_steps = 64 # λ―Έλ μ€ν
μ
# λλ―Έ μμΉ λ°μ΄ν° (xyz μ’ν)
ego_history_xyz = torch.zeros((1, 1, num_history_steps, 3)) # (batch, agent, steps, xyz)
ego_future_xyz = torch.zeros((1, 1, num_future_steps, 3))
# λλ―Έ νμ λ°μ΄ν° (3x3 νμ νλ ¬)
ego_history_rot = torch.eye(3).repeat(1, 1, num_history_steps, 1, 1) # (1,1,steps,3,3)
ego_future_rot = torch.eye(3).repeat(1, 1, num_future_steps, 1, 1)
print("ego_history_xyz:", ego_history_xyz.shape)
print("ego_future_xyz:", ego_future_xyz.shape)
print("ego_history_rot:", ego_history_rot.shape)
print("ego_future_rot:", ego_future_rot.shape)
N_cameras = 1
camera_indices = torch.arange(N_cameras, dtype=torch.long) # (N_cameras,) - long νμ
λͺ
μ
data={
"camera_indices": camera_indices, # (N_cameras,)
"ego_history_xyz": ego_history_xyz, # (1, 1, num_history_steps, 3)
"ego_history_rot": ego_history_rot, # (1, 1, num_history_steps, 3, 3)
"ego_future_xyz": ego_future_xyz, # (1, 1, num_future_steps, 3)
"ego_future_rot": ego_future_rot, # (1, 1, num_future_steps, 3, 3)
# "relative_timestamps": relative_timestamps, # (N_cameras, num_frames)
# "absolute_timestamps": absolute_timestamps # (N_cameras, num_frames)
}
img_path = "IMG_20260116_065921.jpg"
# μμΈ‘νκ³ μΆμ JPG νμΌ κ²½λ‘
image = Image.open(img_path).convert("RGB")
# helper.create_messageλ tensor μ
λ ₯μ κΈ°λνλ―λ‘ λ³ν
# PIL Imageλ₯Ό numpy arrayλ‘ λ³ν ν float32λ‘ λ³ν
image_array = np.array(image).astype(np.float32) / 255.0 # 0-1 λ²μλ‘ μ κ·ν
image_tensor = torch.from_numpy(image_array).unsqueeze(0) # [batch, H, W, C]
# λ©μμ§ μμ±
messages = helper.create_message(image_tensor)
# Example clip ID
model_path = "Alpamayo-R1-10B-4bit"
model = AlpamayoR1.from_pretrained(model_path, dtype=torch.bfloat16).to("cuda")
processor = helper.get_processor(model.tokenizer)
# μ€μ κ°
inputs = processor.apply_chat_template(
messages,
tokenize=True,
add_generation_prompt=False,
continue_final_message=True,
return_dict=True,
return_tensors="pt",
)
model_inputs = {
"tokenized_data": inputs,
"ego_history_xyz": data["ego_history_xyz"],
"ego_history_rot": data["ego_history_rot"],
}
model_inputs = helper.to_device(model_inputs, "cuda")
torch.cuda.manual_seed_all(42)
with torch.autocast("cuda", dtype=torch.bfloat16):
pred_xyz, pred_rot, extra = model.sample_trajectories_from_data_with_vlm_rollout(
data=model_inputs,
top_p=0.98,
temperature=0.6,
num_traj_samples=1, # Feel free to raise this for more output trajectories and CoC traces.
max_generation_length=256,
return_extra=True,
)
# the size is [batch_size, num_traj_sets, num_traj_samples]
print("Chain-of-Causation (per trajectory):\n", extra["cot"][0])
gt_xy = data["ego_future_xyz"].cpu()[0, 0, :, :2].T.numpy()
pred_xy = pred_xyz.cpu().numpy()[0, 0, :, :, :2].transpose(0, 2, 1)
diff = np.linalg.norm(pred_xy - gt_xy[None, ...], axis=1).mean(-1)
min_ade = diff.min()
print("minADE:", min_ade, "meters")
print(
"Note: VLA-reasoning models produce nondeterministic outputs due to trajectory sampling, "
"hardware differences, etc. With num_traj_samples=1 (set for GPU memory compatibility), "
"variance in minADE is expected. For visual sanity checks, see notebooks/inference.ipynb"
)
```
```output
Chain-of-Causation (per trajectory):
[['Keep lane to continue driving since the lane ahead is clear.']]
minADE: 0.55852604 meters
Note: VLA-reasoning models produce nondeterministic outputs due to trajectory sampling, hardware differences, etc. With num_traj_samples=1 (set for GPU memory compatibility), variance in minADE is expected. For visual sanity checks, see notebooks/inference.ipynb
``` |