dwko
/

Alpamayo-R1-10B-4bit

4-bit precision

Model card Files Files and versions

dwko commited on Jan 15

Commit

06fb74f

·

verified ·

1 Parent(s): 2c5c417

Update README.md

Files changed (1) hide show

README.md +70 -3

README.md CHANGED Viewed

@@ -1,3 +1,70 @@
----
-license: apache-2.0
----

+---
+license: apache-2.0
+---
+nvidia/Alpamayo-R1-10B 4bit Model.
+----  model download  ./Alpamayo-R1-10B-4bit
+----  GPU 12G Memory Run abble, num_frames is 1 ~ 8, over OOM..
+import torch
+import numpy as np
+from alpamayo_r1.models.alpamayo_r1 import AlpamayoR1
+from alpamayo_r1.load_physical_aiavdataset import load_physical_aiavdataset
+from alpamayo_r1 import helper
+# Example clip ID
+model_path = "Alpamayo-R1-10B-4bit"
+model = AlpamayoR1.from_pretrained(model_path, dtype=torch.bfloat16).to("cuda")
+processor = helper.get_processor(model.tokenizer)
+# Example clip ID
+clip_id = "030c760c-ae38-49aa-9ad8-f5650a545d26"
+print(f"Loading dataset for clip_id: {clip_id}...")
+data = load_physical_aiavdataset(clip_id, t0_us=15_100_000,num_frames=1)
+print(f"{data}")
+print("Dataset loaded.")
+messages = helper.create_message(data["image_frames"].flatten(0, 1))
+inputs = processor.apply_chat_template(
+    messages,
+    tokenize=True,
+    add_generation_prompt=False,
+    continue_final_message=True,
+    return_dict=True,
+    return_tensors="pt",
+)
+model_inputs = {
+    "tokenized_data": inputs,
+    "ego_history_xyz": data["ego_history_xyz"],
+    "ego_history_rot": data["ego_history_rot"],
+}
+model_inputs = helper.to_device(model_inputs, "cuda")
+torch.cuda.manual_seed_all(42)
+with torch.autocast("cuda", dtype=torch.bfloat16):
+    pred_xyz, pred_rot, extra = model.sample_trajectories_from_data_with_vlm_rollout(
+        data=model_inputs,
+        top_p=0.98,
+        temperature=0.6,
+        num_traj_samples=1,  # Feel free to raise this for more output trajectories and CoC traces.
+        max_generation_length=256,
+        return_extra=True,
+    )
+# the size is [batch_size, num_traj_sets, num_traj_samples]
+print("Chain-of-Causation (per trajectory):\n", extra["cot"][0])
+gt_xy = data["ego_future_xyz"].cpu()[0, 0, :, :2].T.numpy()
+pred_xy = pred_xyz.cpu().numpy()[0, 0, :, :, :2].transpose(0, 2, 1)
+diff = np.linalg.norm(pred_xy - gt_xy[None, ...], axis=1).mean(-1)
+min_ade = diff.min()
+print("minADE:", min_ade, "meters")
+print(
+    "Note: VLA-reasoning models produce nondeterministic outputs due to trajectory sampling, "
+    "hardware differences, etc. With num_traj_samples=1 (set for GPU memory compatibility), "
+    "variance in minADE is expected. For visual sanity checks, see notebooks/inference.ipynb"
+)