File size: 1,428 Bytes
0e2fdc4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import os, json
issues = []
ok = []

model_path = "/workspace/rl4phyx/models/Qwen2.5-VL-3B-Instruct"
if os.path.isdir(model_path):
    files = os.listdir(model_path)
    ok.append(f"Model: {len([f for f in files if f.endswith('.safetensors')])} safetensors")
else:
    issues.append(f"Model NOT found: {model_path}")

data_path = "/workspace/rl4phyx/RL4Phyx/SFT/sft_train/sft_train_formatted.jsonl"
if os.path.isfile(data_path):
    with open(data_path) as f:
        lines = f.readlines()
    ok.append(f"SFT data: {len(lines)} samples")
    first = json.loads(lines[0])
    for c in first.get("messages",[{}])[0].get("content",[]):
        if c.get("type") == "image":
            ip = c["image"].replace("file://","")
            ok.append(f"Image exists: {os.path.isfile(ip)} ({ip})")
else:
    issues.append(f"Data NOT found: {data_path}")

for f in ["train_sft.py","run_sft.sh","ds_zero2.json"]:
    p = f"/workspace/rl4phyx/RL4Phyx/SFT/{f}"
    if os.path.isfile(p):
        ok.append(f"{f}: OK")
    else:
        issues.append(f"{f} NOT found")

out = "/workspace/rl4phyx/RL4Phyx/SFT/checkpoints/"
os.makedirs(out, exist_ok=True)
ok.append(f"Output dir: writable")

import torch
ok.append(f"GPUs: {torch.cuda.device_count()} x {torch.cuda.get_device_name(0)}")

print("=== OK ===")
for x in ok: print(x)
print(f"\n=== ISSUES ({len(issues)}) ===")
for x in issues: print(x)
if not issues: print("None! Ready to train.")