import os, json issues = [] ok = [] model_path = "/workspace/rl4phyx/models/Qwen2.5-VL-3B-Instruct" if os.path.isdir(model_path): files = os.listdir(model_path) ok.append(f"Model: {len([f for f in files if f.endswith('.safetensors')])} safetensors") else: issues.append(f"Model NOT found: {model_path}") data_path = "/workspace/rl4phyx/RL4Phyx/SFT/sft_train/sft_train_formatted.jsonl" if os.path.isfile(data_path): with open(data_path) as f: lines = f.readlines() ok.append(f"SFT data: {len(lines)} samples") first = json.loads(lines[0]) for c in first.get("messages",[{}])[0].get("content",[]): if c.get("type") == "image": ip = c["image"].replace("file://","") ok.append(f"Image exists: {os.path.isfile(ip)} ({ip})") else: issues.append(f"Data NOT found: {data_path}") for f in ["train_sft.py","run_sft.sh","ds_zero2.json"]: p = f"/workspace/rl4phyx/RL4Phyx/SFT/{f}" if os.path.isfile(p): ok.append(f"{f}: OK") else: issues.append(f"{f} NOT found") out = "/workspace/rl4phyx/RL4Phyx/SFT/checkpoints/" os.makedirs(out, exist_ok=True) ok.append(f"Output dir: writable") import torch ok.append(f"GPUs: {torch.cuda.device_count()} x {torch.cuda.get_device_name(0)}") print("=== OK ===") for x in ok: print(x) print(f"\n=== ISSUES ({len(issues)}) ===") for x in issues: print(x) if not issues: print("None! Ready to train.")