File size: 1,202 Bytes
3040bf7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
"""Diagnose the training environment on the RunPod."""
import sys
print(f"Python: {sys.version}")

# Check transformers
try:
    import transformers
    print(f"Transformers: {transformers.__version__}")
    from transformers import TrainingArguments
    print("TrainingArguments: OK")
except Exception as e:
    print(f"Transformers error: {e}")

# Check TRL
try:
    import trl
    print(f"TRL: {trl.__version__}")
except Exception as e:
    print(f"TRL import error: {e}")

try:
    from trl import GRPOConfig, GRPOTrainer
    print("TRL GRPOTrainer: OK")
except Exception as e:
    print(f"TRL GRPOTrainer error: {e}")

# Check Unsloth GRPO
try:
    from unsloth.trainer import UnslothGRPOTrainer, UnslothGRPOConfig
    print("UnslothGRPOTrainer: OK")
except Exception as e:
    print(f"UnslothGRPOTrainer error: {e}")

# Check torch + CUDA
try:
    import torch
    print(f"Torch: {torch.__version__}, CUDA: {torch.cuda.is_available()}, GPU: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'N/A'}")
except Exception as e:
    print(f"Torch error: {e}")

# Check peft
try:
    import peft
    print(f"PEFT: {peft.__version__}")
except Exception as e:
    print(f"PEFT error: {e}")