robomind-vla / tests.py
mitvho09's picture
RoboMind VLA: vision-language reward model for robot locomotion (built with Codex)
321ba64 verified
Raw
History Blame Contribute Delete
11.4 kB
"""
RoboMind VLA — Task 12: tests.py
End-to-end tests and verification for the entire pipeline.
Runs on Modal CPU for data checks, GPU for model checks.
Usage:
modal run tests.py
"""
from __future__ import annotations
import json
import modal
image = (
modal.Image.debian_slim(python_version="3.11")
.pip_install(
"numpy<2",
"torch==2.4.0",
"torchvision==0.19.0",
"transformers==4.40.0",
"peft==0.11.1",
"accelerate==0.30.1",
"datasets",
"pillow",
"huggingface_hub",
)
.run_commands(
"python -c \""
"import os, sys; "
"d = os.path.join(sys.prefix, 'lib/python3.11/site-packages/flash_attn'); "
"os.makedirs(d, exist_ok=True); "
"open(os.path.join(d, '__init__.py'), 'w').write(''); "
"open(os.path.join(d, 'flash_attn_interface.py'), 'w').write("
"'def flash_attn_func(*a, **kw): raise NotImplementedError\\n"
"def flash_attn_varlen_func(*a, **kw): raise NotImplementedError\\n'); "
"print('flash_attn stub created')\""
)
)
app = modal.App("robomind-tests")
volume = modal.Volume.from_name("robomind-data", create_if_missing=True)
@app.function(image=image, secrets=[modal.Secret.from_name("huggingface-secret")], timeout=300)
def test_data_repos():
"""Test 1: Verify HF data repos exist and have correct structure."""
from huggingface_hub import HfApi, hf_hub_download
import os
from huggingface_hub import login
hf_token = os.environ.get("HF_TOKEN")
if hf_token:
login(token=hf_token)
api = HfApi()
results = {}
# Check robomind-rollouts
try:
files = api.list_repo_files("mitvho09/robomind-rollouts")
mp4_count = len([f for f in files if f.endswith(".mp4")])
has_metadata = "metadata.jsonl" in files
results["rollouts"] = {
"exists": True,
"mp4_files": mp4_count,
"has_metadata": has_metadata,
"total_files": len(files),
}
print(f"[test] rollouts: {mp4_count} mp4s, metadata={has_metadata}")
except Exception as e:
results["rollouts"] = {"exists": False, "error": str(e)}
print(f"[test] rollouts FAILED: {e}")
# Check robomind-loco-judge-dataset
try:
files = api.list_repo_files("mitvho09/robomind-loco-judge-dataset")
results["dataset"] = {
"exists": True,
"total_files": len(files),
}
print(f"[test] dataset: {len(files)} files")
except Exception as e:
results["dataset"] = {"exists": False, "error": str(e)}
print(f"[test] dataset FAILED: {e}")
# Check adapter
try:
files = api.list_repo_files("mitvho09/robomind-minicpm-loco-lora")
has_adapter = "adapter_model.safetensors" in files
has_config = "adapter_config.json" in files
results["adapter"] = {
"exists": True,
"has_adapter_weights": has_adapter,
"has_config": has_config,
"total_files": len(files),
}
print(f"[test] adapter: weights={has_adapter}, config={has_config}")
except Exception as e:
results["adapter"] = {"exists": False, "error": str(e)}
print(f"[test] adapter FAILED: {e}")
return results
@app.function(image=image, timeout=300)
def test_dataset_structure():
"""Test 2: Verify dataset has correct schema and content."""
from datasets import load_dataset
ds = load_dataset("mitvho09/robomind-loco-judge-dataset", split="train")
results = {"n_samples": len(ds)}
required_cols = ["env", "tier", "episode_id", "images", "target_json"]
for col in required_cols:
results[f"has_{col}"] = col in ds.column_names
envs = set(ds["env"])
tiers = set(ds["tier"])
results["envs"] = sorted(envs)
results["tiers"] = sorted(tiers)
n_images = [len(row["images"]) for row in ds]
results["min_images"] = min(n_images)
results["max_images"] = max(n_images)
results["avg_images"] = sum(n_images) / len(n_images)
target_jsons = [json.loads(row["target_json"]) for row in ds]
all_keys = set()
for tj in target_jsons:
all_keys.update(tj.keys())
results["target_json_keys"] = sorted(all_keys)
print(f"[test] dataset: {results['n_samples']} samples, {len(envs)} envs, {len(tiers)} tiers")
print(f"[test] images per sample: min={results['min_images']}, max={results['max_images']}")
print(f"[test] envs: {results['envs']}")
print(f"[test] target keys: {results['target_json_keys']}")
return results
@app.function(
image=image,
gpu="A100-40GB",
volumes={"/data": volume},
secrets=[modal.Secret.from_name("huggingface-secret")],
timeout=600,
)
def test_model_load():
"""Test 3: Verify model loads correctly with LoRA adapter."""
import torch
from transformers import AutoModel, AutoTokenizer
from peft import PeftModel
from huggingface_hub import login
import os
hf_token = os.environ.get("HF_TOKEN")
if hf_token:
login(token=hf_token)
results = {}
try:
tokenizer = AutoTokenizer.from_pretrained(
"openbmb/MiniCPM-V-2_6", trust_remote_code=True
)
results["tokenizer_loaded"] = True
print("[test] tokenizer loaded")
except Exception as e:
results["tokenizer_loaded"] = False
results["tokenizer_error"] = str(e)
print(f"[test] tokenizer FAILED: {e}")
return results
try:
base_model = AutoModel.from_pretrained(
"openbmb/MiniCPM-V-2_6",
trust_remote_code=True,
torch_dtype=torch.bfloat16,
device_map="auto",
)
results["base_model_loaded"] = True
print("[test] base model loaded")
except Exception as e:
results["base_model_loaded"] = False
results["base_model_error"] = str(e)
print(f"[test] base model FAILED: {e}")
return results
try:
model = PeftModel.from_pretrained(base_model, "mitvho09/robomind-minicpm-loco-lora")
results["adapter_loaded"] = True
print("[test] LoRA adapter loaded")
except Exception as e:
results["adapter_loaded"] = False
results["adapter_error"] = str(e)
print(f"[test] adapter FAILED: {e}")
return results
model.eval()
results["model_ready"] = True
print("[test] model ready for inference")
return results
@app.function(
image=image,
gpu="A100-40GB",
volumes={"/data": volume},
secrets=[modal.Secret.from_name("huggingface-secret")],
timeout=600,
)
def test_inference():
"""Test 4: Run a dummy inference to verify the model generates output."""
import os
import torch
import json
from transformers import AutoModel, AutoTokenizer
from peft import PeftModel
from huggingface_hub import login
from PIL import Image
import numpy as np
hf_token = os.environ.get("HF_TOKEN")
if hf_token:
login(token=hf_token)
results = {}
tokenizer = AutoTokenizer.from_pretrained(
"openbmb/MiniCPM-V-2_6", trust_remote_code=True
)
base_model = AutoModel.from_pretrained(
"openbmb/MiniCPM-V-2_6",
trust_remote_code=True,
torch_dtype=torch.bfloat16,
device_map="auto",
)
model = PeftModel.from_pretrained(base_model, "mitvho09/robomind-minicpm-loco-lora")
model.eval()
dummy_images = [
Image.fromarray(np.random.randint(0, 255, (224, 224, 3), dtype=np.uint8))
for _ in range(3)
]
instruction = (
"You are RoboMind VLA, a vision-language reward model for humanoid "
"locomotion. You are shown keyframes from a robot locomotion rollout. "
"The robot was commanded to \"walk forward\". Analyze the rollout and "
"respond with ONLY a JSON object with these exact keys: timestep_range, "
"phase, command, command_followed, stability, fall_risk, gait_quality, "
"predicted_reward, anomaly, explanation."
)
image_tokens = "\n".join(f"<image_{k:02d}>" for k in range(3))
user_content = f"{image_tokens}\n{instruction}"
with torch.no_grad():
output = model.chat(
image=dummy_images,
msgs=[{"role": "user", "content": user_content}],
tokenizer=tokenizer,
max_new_tokens=256,
)
response = output if isinstance(output, str) else str(output)
results["response_length"] = len(response)
results["response_preview"] = response[:300]
has_json = "{" in response and "}" in response
results["contains_json"] = has_json
print(f"[test] inference OK: {len(response)} chars")
print(f"[test] response: {response[:300]}")
return results
@app.function(image=image, timeout=300)
def test_volume_data():
"""Test 5: Verify Modal volume has expected data."""
import os
results = {}
ft_dir = "/data/ft"
if os.path.exists(ft_dir):
images_dir = os.path.join(ft_dir, "images")
if os.path.exists(images_dir):
n_images = len([f for f in os.listdir(images_dir) if f.endswith(".jpg")])
results["ft_images"] = n_images
print(f"[test] volume images: {n_images}")
train_json = os.path.join(ft_dir, "train.json")
if os.path.exists(train_json):
with open(train_json) as f:
data = json.load(f)
results["train_samples"] = len(data)
print(f"[test] volume train.json: {len(data)} samples")
lora_dir = os.path.join(ft_dir, "lora_output")
if os.path.exists(lora_dir):
results["lora_output_exists"] = True
has_adapter = any("adapter_model" in f for f in os.listdir(lora_dir))
results["has_adapter_in_output"] = has_adapter
print(f"[test] LoRA output exists, adapter={has_adapter}")
mcpm_dir = "/data/MiniCPM-V"
results["minicpm_cloned"] = os.path.exists(mcpm_dir)
print(f"[test] volume data: {results}")
return results
@app.local_entrypoint()
def main():
"""Run all tests."""
print("=" * 60)
print("RoboMind VLA — End-to-End Tests")
print("=" * 60)
all_results = {}
print("\n--- Test 1: HF Data Repos ---")
all_results["data_repos"] = test_data_repos.remote()
print("\n--- Test 2: Dataset Structure ---")
all_results["dataset_structure"] = test_dataset_structure.remote()
print("\n--- Test 3: Model Load ---")
all_results["model_load"] = test_model_load.remote()
print("\n--- Test 4: Inference ---")
all_results["inference"] = test_inference.remote()
print("\n--- Test 5: Volume Data ---")
all_results["volume_data"] = test_volume_data.remote()
print("\n" + "=" * 60)
print("SUMMARY")
print("=" * 60)
passed = 0
failed = 0
for test_name, result in all_results.items():
has_error = any("error" in str(v).lower() for v in result.values() if isinstance(v, str))
has_false = any(v is False for v in result.values() if isinstance(v, bool))
if has_error or has_false:
status = "FAIL"
failed += 1
else:
status = "PASS"
passed += 1
print(f" {test_name}: {status}")
print(f"\nTotal: {passed} passed, {failed} failed, {passed + failed} total")
return all_results