rl4phyx-backup / root_scripts /check_ready.py
YUNTA88's picture
Upload root_scripts/check_ready.py with huggingface_hub
0e2fdc4 verified
import os, json
issues = []
ok = []
model_path = "/workspace/rl4phyx/models/Qwen2.5-VL-3B-Instruct"
if os.path.isdir(model_path):
files = os.listdir(model_path)
ok.append(f"Model: {len([f for f in files if f.endswith('.safetensors')])} safetensors")
else:
issues.append(f"Model NOT found: {model_path}")
data_path = "/workspace/rl4phyx/RL4Phyx/SFT/sft_train/sft_train_formatted.jsonl"
if os.path.isfile(data_path):
with open(data_path) as f:
lines = f.readlines()
ok.append(f"SFT data: {len(lines)} samples")
first = json.loads(lines[0])
for c in first.get("messages",[{}])[0].get("content",[]):
if c.get("type") == "image":
ip = c["image"].replace("file://","")
ok.append(f"Image exists: {os.path.isfile(ip)} ({ip})")
else:
issues.append(f"Data NOT found: {data_path}")
for f in ["train_sft.py","run_sft.sh","ds_zero2.json"]:
p = f"/workspace/rl4phyx/RL4Phyx/SFT/{f}"
if os.path.isfile(p):
ok.append(f"{f}: OK")
else:
issues.append(f"{f} NOT found")
out = "/workspace/rl4phyx/RL4Phyx/SFT/checkpoints/"
os.makedirs(out, exist_ok=True)
ok.append(f"Output dir: writable")
import torch
ok.append(f"GPUs: {torch.cuda.device_count()} x {torch.cuda.get_device_name(0)}")
print("=== OK ===")
for x in ok: print(x)
print(f"\n=== ISSUES ({len(issues)}) ===")
for x in issues: print(x)
if not issues: print("None! Ready to train.")