nbtpj commited on
Commit
0311933
·
verified ·
1 Parent(s): 8594ff4

Upload ray_tune_logs/error.txt with huggingface_hub

Browse files
Files changed (1) hide show
  1. ray_tune_logs/error.txt +20 -0
ray_tune_logs/error.txt ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Failure # 3 (occurred at 2026-02-24_10-08-24)
2
+ ray::_Inner.train() (pid=3649560, ip=10.2.1.31, actor_id=14bdf81e16347950468b531001000000, repr=TorchTrainer)
3
+ File "/common/home/users/m/mq.nguyen.2023/.conda/envs/sac_lm/lib/python3.10/site-packages/ray/tune/trainable/trainable.py", line 331, in train
4
+ raise skipped from exception_cause(skipped)
5
+ File "/common/home/users/m/mq.nguyen.2023/.conda/envs/sac_lm/lib/python3.10/site-packages/ray/train/_internal/utils.py", line 57, in check_for_failure
6
+ ray.get(object_ref)
7
+ ray.exceptions.RayTaskError(TypeError): ray::_RayTrainWorker__execute.get_next() (pid=3650408, ip=10.2.1.31, actor_id=d649fa4b329c5c67be9e5c5201000000, repr=<ray.train._internal.worker_group.RayTrainWorker object at 0x14d6dc9f2f20>)
8
+ File "/common/home/users/m/mq.nguyen.2023/.conda/envs/sac_lm/lib/python3.10/site-packages/ray/train/_internal/worker_group.py", line 35, in __execute
9
+ raise skipped from exception_cause(skipped)
10
+ File "/common/home/users/m/mq.nguyen.2023/.conda/envs/sac_lm/lib/python3.10/site-packages/ray/train/_internal/utils.py", line 160, in discard_return_wrapper
11
+ train_func(*args, **kwargs)
12
+ File "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/tutils/train_utils.py", line 2303, in train_offpolicy_fn
13
+ inner_training_loop(
14
+ File "/common/home/users/m/mq.nguyen.2023/.conda/envs/sac_lm/lib/python3.10/site-packages/accelerate/utils/memory.py", line 177, in decorator
15
+ return function(batch_size, *args, **kwargs)
16
+ File "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/tutils/train_utils.py", line 2183, in inner_training_loop
17
+ for k, v in rl_collate(rl_samples).items()}
18
+ File "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/tutils/utils.py", line 1251, in __call__
19
+ other_batch[k] += sample[k]
20
+ TypeError: 'int' object is not iterable