Upload ray_tune_logs/error.txt with huggingface_hub
Browse files- ray_tune_logs/error.txt +20 -0
ray_tune_logs/error.txt
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Failure # 3 (occurred at 2026-02-24_10-08-24)
|
| 2 |
+
[36mray::_Inner.train()[39m (pid=3649560, ip=10.2.1.31, actor_id=14bdf81e16347950468b531001000000, repr=TorchTrainer)
|
| 3 |
+
File "/common/home/users/m/mq.nguyen.2023/.conda/envs/sac_lm/lib/python3.10/site-packages/ray/tune/trainable/trainable.py", line 331, in train
|
| 4 |
+
raise skipped from exception_cause(skipped)
|
| 5 |
+
File "/common/home/users/m/mq.nguyen.2023/.conda/envs/sac_lm/lib/python3.10/site-packages/ray/train/_internal/utils.py", line 57, in check_for_failure
|
| 6 |
+
ray.get(object_ref)
|
| 7 |
+
ray.exceptions.RayTaskError(TypeError): [36mray::_RayTrainWorker__execute.get_next()[39m (pid=3650408, ip=10.2.1.31, actor_id=d649fa4b329c5c67be9e5c5201000000, repr=<ray.train._internal.worker_group.RayTrainWorker object at 0x14d6dc9f2f20>)
|
| 8 |
+
File "/common/home/users/m/mq.nguyen.2023/.conda/envs/sac_lm/lib/python3.10/site-packages/ray/train/_internal/worker_group.py", line 35, in __execute
|
| 9 |
+
raise skipped from exception_cause(skipped)
|
| 10 |
+
File "/common/home/users/m/mq.nguyen.2023/.conda/envs/sac_lm/lib/python3.10/site-packages/ray/train/_internal/utils.py", line 160, in discard_return_wrapper
|
| 11 |
+
train_func(*args, **kwargs)
|
| 12 |
+
File "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/tutils/train_utils.py", line 2303, in train_offpolicy_fn
|
| 13 |
+
inner_training_loop(
|
| 14 |
+
File "/common/home/users/m/mq.nguyen.2023/.conda/envs/sac_lm/lib/python3.10/site-packages/accelerate/utils/memory.py", line 177, in decorator
|
| 15 |
+
return function(batch_size, *args, **kwargs)
|
| 16 |
+
File "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/tutils/train_utils.py", line 2183, in inner_training_loop
|
| 17 |
+
for k, v in rl_collate(rl_samples).items()}
|
| 18 |
+
File "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/tutils/utils.py", line 1251, in __call__
|
| 19 |
+
other_batch[k] += sample[k]
|
| 20 |
+
TypeError: 'int' object is not iterable
|