diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs1:obs30:stride1:seed11/checkpoint_p0/best_000048760_24969216_reward_1857.820.pth b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs1:obs30:stride1:seed11/checkpoint_p0/best_000048760_24969216_reward_1857.820.pth new file mode 100644 index 0000000000000000000000000000000000000000..f06a18aa5dd4c6990ecbc8094cef56e10854238c --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs1:obs30:stride1:seed11/checkpoint_p0/best_000048760_24969216_reward_1857.820.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51df0ce7b8c5ecc55db9c68a7d43fa12b53018851a6f12010b5b5ad13bf75fa6 +size 20560697 diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs1:obs30:stride1:seed11/checkpoint_p0/checkpoint_000030208_15466496.pth b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs1:obs30:stride1:seed11/checkpoint_p0/checkpoint_000030208_15466496.pth new file mode 100644 index 0000000000000000000000000000000000000000..b87bc4c6a18cc9d353c9e2a9459be676bedac4cb --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs1:obs30:stride1:seed11/checkpoint_p0/checkpoint_000030208_15466496.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa97a0549cdcd1dcdfccfb52cd125a4d6015307ecbfb25c11deb54e8b94d601a +size 20561057 diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs1:obs30:stride1:seed11/checkpoint_p0/checkpoint_000048840_25034752.pth b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs1:obs30:stride1:seed11/checkpoint_p0/checkpoint_000048840_25034752.pth new file mode 100644 index 0000000000000000000000000000000000000000..cc0432b4158242b7fb2aa94dffb84b7486b6a707 --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs1:obs30:stride1:seed11/checkpoint_p0/checkpoint_000048840_25034752.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bc1d095497373c1d2535a6b650fb630a01276732cad99f85d4ebe2517bcd919 +size 20561057 diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs1:obs30:stride1:seed11/episode_metrics.jsonl b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs1:obs30:stride1:seed11/episode_metrics.jsonl index 0f876db182405d5a23acb334b7873abf4c65786b..3a741e7083834c1c9a532458874d09474bfdee3e 100644 --- a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs1:obs30:stride1:seed11/episode_metrics.jsonl +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs1:obs30:stride1:seed11/episode_metrics.jsonl @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c927907159808ae8684f0ce82f3c4643009e96a8d0f7c76a9ebbe6bd4dcd41f9 -size 25614023 +oid sha256:af94c9275bde2e4e8ba98196a581c793623234f58e7367a6ecb052a314703f61 +size 21698537 diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs1:obs30:stride1:seed11/git.diff b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs1:obs30:stride1:seed11/git.diff index d0a0248311155ef2db8d42aec19fc1abcda13cd5..466fb8b9b61c2e47b54ca5d7f5f930e28515b107 100644 --- a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs1:obs30:stride1:seed11/git.diff +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs1:obs30:stride1:seed11/git.diff @@ -24,3 +24,10 @@ index 18376d9..646fe8f 100644 return "train" +diff --git a/starVLA b/starVLA +index ab3380d..9d8c567 160000 +--- a/starVLA ++++ b/starVLA +@@ -1 +1 @@ +-Subproject commit ab3380dfbd1de9649c15d154cc41b97788674537 ++Subproject commit 9d8c567188a3aa2a825296016cf17f3977101d8f diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs1:obs30:stride1:seed11/sf_log.txt b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs1:obs30:stride1:seed11/sf_log.txt index 7367d89e39187effb22ef239457eecf763dedc03..d5f16a1bddd8ac00849464d3e2a25fdc64adfd42 100644 --- a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs1:obs30:stride1:seed11/sf_log.txt +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs1:obs30:stride1:seed11/sf_log.txt @@ -1,30 +1,30 @@ -[2026-06-06 15:37:32,976][29401] Saving configuration to results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs1_seed11/config.json... -[2026-06-06 15:37:33,093][29401] Using GPUs [0] for process 0 (actually maps to GPUs [1]) -[2026-06-06 15:37:33,095][29401] Rollout worker 0 uses device cuda:0 -[2026-06-06 15:37:33,096][29401] Using GPUs [0] for process 1 (actually maps to GPUs [1]) -[2026-06-06 15:37:33,096][29401] Rollout worker 1 uses device cuda:0 -[2026-06-06 15:37:34,467][29401] Using GPUs [0] for process 0 (actually maps to GPUs [1]) -[2026-06-06 15:37:34,468][29401] InferenceWorker_p0-w0: min num requests: 1 -[2026-06-06 15:37:34,479][29401] Using GPUs [0] for process 0 (actually maps to GPUs [1]) -[2026-06-06 15:37:34,488][29401] Using GPUs [0] for process 1 (actually maps to GPUs [1]) -[2026-06-06 15:37:34,491][29401] Starting all processes... -[2026-06-06 15:37:34,491][29401] Starting process learner_proc0 -[2026-06-06 15:37:37,601][29401] Starting all processes... -[2026-06-06 15:37:37,611][29401] Starting process rollout_proc0 -[2026-06-06 15:37:37,612][29401] Starting process rollout_proc1 -[2026-06-06 15:37:37,614][29401] Starting process inference_proc0-0 -[2026-06-06 15:37:38,022][31857] Using GPUs [0] for process 0 (actually maps to GPUs [1]) -[2026-06-06 15:37:38,022][31857] Set environment var CUDA_VISIBLE_DEVICES to '1' (GPU indices [0]) for learning process 0 -[2026-06-06 15:37:38,023][31857] Num visible devices: 1 -[2026-06-06 15:37:38,024][31857] Setting fixed seed 11 -[2026-06-06 15:37:38,026][31857] Using GPUs [0] for process 0 (actually maps to GPUs [1]) -[2026-06-06 15:37:38,026][31857] Initializing actor-critic model on device cuda:0 -[2026-06-06 15:37:38,026][31857] RunningMeanStd input shape: (3, 84, 84) -[2026-06-06 15:37:38,035][31857] RunningMeanStd input shape: (1,) -[2026-06-06 15:37:38,051][31857] ConvEncoder: input_channels=3 -[2026-06-06 15:37:38,199][31857] Conv encoder output size: 512 -[2026-06-06 15:37:38,202][31857] Created Actor Critic model with architecture: -[2026-06-06 15:37:38,203][31857] ActorCriticSharedWeights( +[2026-06-07 02:47:36,220][464927] Saving configuration to results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs1_seed11/config.json... +[2026-06-07 02:47:36,285][464927] Using GPUs [0] for process 0 (actually maps to GPUs [1]) +[2026-06-07 02:47:36,285][464927] Rollout worker 0 uses device cuda:0 +[2026-06-07 02:47:36,286][464927] Using GPUs [0] for process 1 (actually maps to GPUs [1]) +[2026-06-07 02:47:36,286][464927] Rollout worker 1 uses device cuda:0 +[2026-06-07 02:47:37,685][464927] Using GPUs [0] for process 0 (actually maps to GPUs [1]) +[2026-06-07 02:47:37,685][464927] InferenceWorker_p0-w0: min num requests: 1 +[2026-06-07 02:47:37,690][464927] Using GPUs [0] for process 0 (actually maps to GPUs [1]) +[2026-06-07 02:47:37,694][464927] Using GPUs [0] for process 1 (actually maps to GPUs [1]) +[2026-06-07 02:47:37,695][464927] Starting all processes... +[2026-06-07 02:47:37,695][464927] Starting process learner_proc0 +[2026-06-07 02:47:38,858][464927] Starting all processes... +[2026-06-07 02:47:38,862][464927] Starting process inference_proc0-0 +[2026-06-07 02:47:38,863][464927] Starting process rollout_proc0 +[2026-06-07 02:47:38,863][464927] Starting process rollout_proc1 +[2026-06-07 02:47:39,650][472025] Using GPUs [0] for process 0 (actually maps to GPUs [1]) +[2026-06-07 02:47:39,650][472025] Set environment var CUDA_VISIBLE_DEVICES to '1' (GPU indices [0]) for learning process 0 +[2026-06-07 02:47:39,650][472025] Num visible devices: 1 +[2026-06-07 02:47:39,651][472025] Setting fixed seed 11 +[2026-06-07 02:47:39,652][472025] Using GPUs [0] for process 0 (actually maps to GPUs [1]) +[2026-06-07 02:47:39,652][472025] Initializing actor-critic model on device cuda:0 +[2026-06-07 02:47:39,652][472025] RunningMeanStd input shape: (3, 84, 84) +[2026-06-07 02:47:39,661][472025] RunningMeanStd input shape: (1,) +[2026-06-07 02:47:39,670][472025] ConvEncoder: input_channels=3 +[2026-06-07 02:47:39,741][472025] Conv encoder output size: 512 +[2026-06-07 02:47:39,743][472025] Created Actor Critic model with architecture: +[2026-06-07 02:47:39,743][472025] ActorCriticSharedWeights( (obs_normalizer): ObservationNormalizer( (running_mean_std): RunningMeanStdDictInPlace( (running_mean_std): ModuleDict( @@ -65,6313 +65,5021 @@ (distribution_linear): Linear(in_features=512, out_features=2, bias=True) ) ) -[2026-06-06 15:37:38,226][31857] Using optimizer -[2026-06-06 15:37:40,142][31857] No checkpoints found -[2026-06-06 15:37:40,143][31857] Did not load from checkpoint, starting from scratch! -[2026-06-06 15:37:40,143][31857] Initialized policy 0 weights for model version 0 -[2026-06-06 15:37:40,147][31857] LearnerWorker_p0 finished initialization! -[2026-06-06 15:37:40,147][31857] Using GPUs [0] for process 0 (actually maps to GPUs [1]) -[2026-06-06 15:37:41,630][32122] Worker 1 uses CPU cores [128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255] -[2026-06-06 15:37:41,630][32122] Using GPUs [0] for process 1 (actually maps to GPUs [1]) -[2026-06-06 15:37:41,631][32122] Set environment var CUDA_VISIBLE_DEVICES to '1' (GPU indices [0]) for actor process 1 -[2026-06-06 15:37:41,631][32122] Num visible devices: 1 -[2026-06-06 15:37:42,139][32123] Using GPUs [0] for process 0 (actually maps to GPUs [1]) -[2026-06-06 15:37:42,140][32123] Set environment var CUDA_VISIBLE_DEVICES to '1' (GPU indices [0]) for inference process 0 -[2026-06-06 15:37:42,140][32123] Num visible devices: 1 -[2026-06-06 15:37:42,187][32123] RunningMeanStd input shape: (3, 84, 84) -[2026-06-06 15:37:42,197][32123] RunningMeanStd input shape: (1,) -[2026-06-06 15:37:42,218][32123] ConvEncoder: input_channels=3 -[2026-06-06 15:37:42,279][29401] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) -[2026-06-06 15:37:42,346][32123] Conv encoder output size: 512 -[2026-06-06 15:37:42,436][29401] Inference worker 0-0 is ready! -[2026-06-06 15:37:42,438][29401] All inference workers are ready! Signal rollout workers to start! -[2026-06-06 15:37:42,439][32122] EnvRunner 1-0 uses policy 0 -[2026-06-06 15:37:43,186][32121] Worker 0 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127] -[2026-06-06 15:37:43,188][32121] Using GPUs [0] for process 0 (actually maps to GPUs [1]) -[2026-06-06 15:37:43,188][32121] Set environment var CUDA_VISIBLE_DEVICES to '1' (GPU indices [0]) for actor process 0 -[2026-06-06 15:37:43,189][32121] Num visible devices: 1 -[2026-06-06 15:37:43,191][32121] EnvRunner 0-0 uses policy 0 -[2026-06-06 15:37:47,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 947.2. Samples: 4736. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) -[2026-06-06 15:37:47,280][29401] Avg episode reward: [(0, '-7.528')] -[2026-06-06 15:37:49,501][31857] Signal inference workers to stop experience collection... -[2026-06-06 15:37:49,515][32123] InferenceWorker_p0-w0: stopping experience collection -[2026-06-06 15:37:51,317][31857] Signal inference workers to resume experience collection... -[2026-06-06 15:37:51,318][32123] InferenceWorker_p0-w0: resuming experience collection -[2026-06-06 15:37:51,619][31857] EvtLoop [learner_proc0_evt_loop, process=learner_proc0] unhandled exception in slot='on_new_training_batch' connected to emitter=Emitter(object_id='Batcher_0', signal_name='training_batches_available'), args=(1,) -Traceback (most recent call last): - File "/venv/latency/lib/python3.10/site-packages/signal_slot/signal_slot.py", line 355, in _process_signal - slot_callable(*args) - File "/workspace/latency-sensitive-bench/sample-factory/sample_factory/algo/learning/learner_worker.py", line 150, in on_new_training_batch - stats = self.learner.train(self.batcher.training_batches[batch_idx]) - File "/workspace/latency-sensitive-bench/sample-factory/sample_factory/algo/learning/learner.py", line 1036, in train - buff, experience_size, num_invalids = self._prepare_batch(batch) - File "/workspace/latency-sensitive-bench/sample-factory/sample_factory/algo/learning/learner.py", line 1006, in _prepare_batch - d[k] = v.reshape((dataset_size,) + tuple(v.shape[2:])) -torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 2.58 GiB. GPU 0 has a total capacity of 23.52 GiB of which 2.31 GiB is free. Process 1843854 has 4.12 GiB memory in use. Process 1844143 has 4.12 GiB memory in use. Process 1844906 has 684.00 MiB memory in use. Process 1847069 has 682.00 MiB memory in use. Process 2213740 has 1.74 GiB memory in use. Process 2218641 has 7.01 GiB memory in use. Process 2219377 has 1.16 GiB memory in use. Process 2219378 has 542.00 MiB memory in use. Process 2219375 has 1.16 GiB memory in use. Of the allocated memory 3.95 GiB is allocated by PyTorch, and 2.59 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) -[2026-06-06 15:37:51,620][31857] Unhandled exception CUDA out of memory. Tried to allocate 2.58 GiB. GPU 0 has a total capacity of 23.52 GiB of which 2.31 GiB is free. Process 1843854 has 4.12 GiB memory in use. Process 1844143 has 4.12 GiB memory in use. Process 1844906 has 684.00 MiB memory in use. Process 1847069 has 682.00 MiB memory in use. Process 2213740 has 1.74 GiB memory in use. Process 2218641 has 7.01 GiB memory in use. Process 2219377 has 1.16 GiB memory in use. Process 2219378 has 542.00 MiB memory in use. Process 2219375 has 1.16 GiB memory in use. Of the allocated memory 3.95 GiB is allocated by PyTorch, and 2.59 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) in evt loop learner_proc0_evt_loop -[2026-06-06 15:37:52,279][29401] Fps is (10 sec: 3276.8, 60 sec: 3276.8, 300 sec: 3276.8). Total num frames: 32768. Throughput: 0: 7129.6. Samples: 71296. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:37:52,281][29401] Avg episode reward: [(0, '-7.102')] -[2026-06-06 15:37:54,448][29401] Heartbeat connected on Batcher_0 -[2026-06-06 15:37:54,468][29401] Heartbeat connected on InferenceWorker_p0-w0 -[2026-06-06 15:37:54,480][29401] Heartbeat connected on RolloutWorker_w0 -[2026-06-06 15:37:54,489][29401] Heartbeat connected on RolloutWorker_w1 -[2026-06-06 15:37:57,279][29401] Fps is (10 sec: 3276.7, 60 sec: 2184.5, 300 sec: 2184.5). Total num frames: 32768. Throughput: 0: 6971.7. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:37:57,282][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:38:02,279][29401] Fps is (10 sec: 0.0, 60 sec: 1638.4, 300 sec: 1638.4). Total num frames: 32768. Throughput: 0: 5228.8. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:38:02,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:38:07,279][29401] Fps is (10 sec: 0.0, 60 sec: 1310.7, 300 sec: 1310.7). Total num frames: 32768. Throughput: 0: 4183.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:38:07,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:38:12,279][29401] Fps is (10 sec: 0.0, 60 sec: 1092.3, 300 sec: 1092.3). Total num frames: 32768. Throughput: 0: 3485.9. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:38:12,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:38:17,279][29401] Fps is (10 sec: 0.0, 60 sec: 936.2, 300 sec: 936.2). Total num frames: 32768. Throughput: 0: 2987.9. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:38:17,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:38:22,279][29401] Fps is (10 sec: 0.0, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 32768. Throughput: 0: 2614.4. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:38:22,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:38:27,279][29401] Fps is (10 sec: 0.0, 60 sec: 728.2, 300 sec: 728.2). Total num frames: 32768. Throughput: 0: 2323.9. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:38:27,281][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:38:32,279][29401] Fps is (10 sec: 0.0, 60 sec: 655.4, 300 sec: 655.4). Total num frames: 32768. Throughput: 0: 2218.7. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:38:32,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:38:37,279][29401] Fps is (10 sec: 0.0, 60 sec: 595.8, 300 sec: 595.8). Total num frames: 32768. Throughput: 0: 739.6. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:38:37,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:38:42,279][29401] Fps is (10 sec: 0.0, 60 sec: 546.1, 300 sec: 546.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:38:42,281][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:38:47,279][29401] Fps is (10 sec: 0.0, 60 sec: 546.1, 300 sec: 504.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:38:47,281][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:38:52,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 468.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:38:52,282][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:38:57,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 436.9). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:38:57,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:39:02,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 409.6). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:39:02,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:39:07,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 385.5). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:39:07,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:39:12,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 364.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:39:12,281][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:39:17,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 344.9). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:39:17,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:39:22,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 327.7). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:39:22,281][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:39:27,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 312.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:39:27,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:39:32,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 297.9). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:39:32,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:39:37,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 284.9). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:39:37,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:39:42,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 273.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:39:42,282][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:39:47,281][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 262.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:39:47,282][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:39:52,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 252.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:39:52,281][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:39:57,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 242.7). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:39:57,281][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:40:02,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 234.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:40:02,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:40:07,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 226.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:40:07,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:40:12,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 218.5). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:40:12,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:40:17,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 211.4). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:40:17,281][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:40:22,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 204.8). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:40:22,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:40:27,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 198.6). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:40:27,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:40:27,286][29401] Components not started: LearnerWorker_p0, wait_time=183.9 seconds -[2026-06-06 15:40:32,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 192.8). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:40:32,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:40:37,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 187.2). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:40:37,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:40:42,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 182.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:40:42,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:40:47,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 177.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:40:47,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:40:52,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 172.5). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:40:52,281][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:40:57,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 168.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:40:57,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:41:02,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 163.8). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:41:02,281][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:41:07,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 159.8). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:41:07,281][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:41:12,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 156.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:41:12,281][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:41:17,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 152.4). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:41:17,281][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:41:22,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 148.9). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:41:22,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:41:27,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 145.6). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:41:27,281][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:41:32,280][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 142.5). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:41:32,281][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:41:37,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 139.4). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:41:37,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:41:42,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 136.5). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:41:42,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:41:47,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 133.7). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:41:47,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:41:52,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 131.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:41:52,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:41:57,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 128.5). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:41:57,281][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:42:02,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 126.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:42:02,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:42:07,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 123.7). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:42:07,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:42:12,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 121.4). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:42:12,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:42:17,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 119.2). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:42:17,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:42:22,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 117.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:42:22,281][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:42:27,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 115.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:42:27,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:42:32,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 113.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:42:32,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:42:37,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 111.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:42:37,281][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:42:42,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 111.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:42:42,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:42:47,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:42:47,281][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:42:52,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:42:52,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:42:57,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:42:57,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:43:02,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:43:02,281][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:43:07,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:43:07,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:43:12,280][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:43:12,282][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:43:17,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:43:17,281][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:43:22,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:43:22,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:43:27,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:43:27,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:43:27,288][29401] Components not started: LearnerWorker_p0, wait_time=363.9 seconds -[2026-06-06 15:43:32,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:43:32,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:43:37,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:43:37,281][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:43:42,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:43:42,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:43:47,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:43:47,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:43:52,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:43:52,281][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:43:57,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:43:57,281][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:44:02,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:44:02,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:44:07,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:44:07,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:44:12,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:44:12,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:44:17,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:44:17,281][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:44:22,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:44:22,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:44:27,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:44:27,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:44:32,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:44:32,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:44:37,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:44:37,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:44:42,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:44:42,281][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:44:47,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:44:47,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:44:52,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:44:52,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:44:57,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:44:57,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:45:02,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:45:02,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:45:07,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:45:07,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:45:12,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:45:12,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:45:17,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:45:17,281][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:45:22,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:45:22,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:45:27,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:45:27,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:45:32,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:45:32,281][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:45:37,280][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:45:37,281][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:45:42,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:45:42,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:45:47,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:45:47,282][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:45:52,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:45:52,282][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:45:57,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:45:57,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:46:02,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:46:02,280][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:46:07,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:46:07,281][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:46:12,280][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:46:12,282][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:46:17,280][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:46:17,281][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:46:22,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:46:22,281][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:46:27,279][29401] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 104576. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-06 15:46:27,282][29401] Avg episode reward: [(0, '-7.388')] -[2026-06-06 15:46:27,291][29401] Components not started: LearnerWorker_p0, wait_time=543.9 seconds -[2026-06-06 15:46:27,292][29401] Components take too long to start: LearnerWorker_p0. Aborting the experiment! - - - -[2026-06-06 15:46:27,296][29401] Component Batcher_0 stopped! -[2026-06-06 15:46:27,296][31857] Stopping Batcher_0... -[2026-06-06 15:46:27,297][31857] Loop batcher_evt_loop terminating... -[2026-06-06 15:46:27,297][29401] Waiting for ['LearnerWorker_p0', 'InferenceWorker_p0-w0', 'RolloutWorker_w0', 'RolloutWorker_w1'] to stop... -[2026-06-06 15:46:27,297][32122] Stopping RolloutWorker_w1... -[2026-06-06 15:46:27,298][29401] Component RolloutWorker_w1 stopped! -[2026-06-06 15:46:27,298][32122] Loop rollout_proc1_evt_loop terminating... -[2026-06-06 15:46:27,299][29401] Waiting for ['LearnerWorker_p0', 'InferenceWorker_p0-w0', 'RolloutWorker_w0'] to stop... -[2026-06-06 15:46:27,299][29401] Component RolloutWorker_w0 stopped! -[2026-06-06 15:46:27,297][32121] Stopping RolloutWorker_w0... -[2026-06-06 15:46:27,299][29401] Waiting for ['LearnerWorker_p0', 'InferenceWorker_p0-w0'] to stop... -[2026-06-06 15:46:27,300][32121] Loop rollout_proc0_evt_loop terminating... -[2026-06-06 15:46:27,423][32123] Weights refcount: 2 0 -[2026-06-06 15:46:27,426][32123] Stopping InferenceWorker_p0-w0... -[2026-06-06 15:46:27,427][32123] Loop inference_proc0-0_evt_loop terminating... -[2026-06-06 15:46:27,427][29401] Component InferenceWorker_p0-w0 stopped! -[2026-06-06 15:46:27,428][29401] Waiting for ['LearnerWorker_p0'] to stop... -[2026-06-07 01:46:39,198][309672] Saving configuration to results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs1_seed11/config.json... -[2026-06-07 01:46:39,264][309672] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2026-06-07 01:46:39,264][309672] Rollout worker 0 uses device cuda:0 -[2026-06-07 01:46:39,265][309672] Using GPUs [0] for process 1 (actually maps to GPUs [0]) -[2026-06-07 01:46:39,265][309672] Rollout worker 1 uses device cuda:0 -[2026-06-07 01:46:41,256][309672] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2026-06-07 01:46:41,259][309672] InferenceWorker_p0-w0: min num requests: 1 -[2026-06-07 01:46:41,266][309672] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2026-06-07 01:46:41,271][309672] Using GPUs [0] for process 1 (actually maps to GPUs [0]) -[2026-06-07 01:46:41,273][309672] Starting all processes... -[2026-06-07 01:46:41,273][309672] Starting process learner_proc0 -[2026-06-07 01:46:42,745][309672] Starting all processes... -[2026-06-07 01:46:42,750][309672] Starting process inference_proc0-0 -[2026-06-07 01:46:42,751][309672] Starting process rollout_proc0 -[2026-06-07 01:46:42,752][309672] Starting process rollout_proc1 -[2026-06-07 01:46:43,336][314623] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2026-06-07 01:46:43,336][314623] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 -[2026-06-07 01:46:43,337][314623] Num visible devices: 1 -[2026-06-07 01:46:43,338][314623] Setting fixed seed 11 -[2026-06-07 01:46:43,339][314623] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2026-06-07 01:46:43,339][314623] Initializing actor-critic model on device cuda:0 -[2026-06-07 01:46:43,340][314623] RunningMeanStd input shape: (3, 84, 84) -[2026-06-07 01:46:43,349][314623] RunningMeanStd input shape: (1,) -[2026-06-07 01:46:43,367][314623] ConvEncoder: input_channels=3 -[2026-06-07 01:46:43,486][314623] Conv encoder output size: 512 -[2026-06-07 01:46:43,489][314623] Created Actor Critic model with architecture: -[2026-06-07 01:46:43,489][314623] ActorCriticSharedWeights( - (obs_normalizer): ObservationNormalizer( - (running_mean_std): RunningMeanStdDictInPlace( - (running_mean_std): ModuleDict( - (obs): RunningMeanStdInPlace() - ) - ) - ) - (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) - (encoder): MultiInputEncoder( - (encoders): ModuleDict( - (obs): ConvEncoder( - (enc): RecursiveScriptModule( - original_name=ConvEncoderImpl - (conv_head): RecursiveScriptModule( - original_name=Sequential - (0): RecursiveScriptModule(original_name=Conv2d) - (1): RecursiveScriptModule(original_name=ELU) - (2): RecursiveScriptModule(original_name=Conv2d) - (3): RecursiveScriptModule(original_name=ELU) - (4): RecursiveScriptModule(original_name=Conv2d) - (5): RecursiveScriptModule(original_name=ELU) - ) - (mlp_layers): RecursiveScriptModule( - original_name=Sequential - (0): RecursiveScriptModule(original_name=Linear) - (1): RecursiveScriptModule(original_name=ELU) - ) - ) - ) - ) - ) - (core): ModelCoreIdentity() - (decoder): MlpDecoder( - (mlp): Identity() - ) - (critic_linear): Linear(in_features=512, out_features=1, bias=True) - (action_parameterization): ActionParameterizationDefault( - (distribution_linear): Linear(in_features=512, out_features=2, bias=True) - ) -) -[2026-06-07 01:46:43,499][314623] Using optimizer -[2026-06-07 01:46:44,693][314623] No checkpoints found -[2026-06-07 01:46:44,693][314623] Did not load from checkpoint, starting from scratch! -[2026-06-07 01:46:44,693][314623] Initialized policy 0 weights for model version 0 -[2026-06-07 01:46:44,695][314623] LearnerWorker_p0 finished initialization! -[2026-06-07 01:46:44,695][314623] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2026-06-07 01:46:44,896][309672] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) -[2026-06-07 01:46:45,986][315146] Worker 0 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127] -[2026-06-07 01:46:45,986][315146] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2026-06-07 01:46:45,987][315146] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for actor process 0 -[2026-06-07 01:46:45,988][315146] Num visible devices: 1 -[2026-06-07 01:46:46,008][315145] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2026-06-07 01:46:46,008][315145] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 -[2026-06-07 01:46:46,009][315145] Num visible devices: 1 -[2026-06-07 01:46:46,026][315145] RunningMeanStd input shape: (3, 84, 84) -[2026-06-07 01:46:46,034][315145] RunningMeanStd input shape: (1,) -[2026-06-07 01:46:46,053][315145] ConvEncoder: input_channels=3 -[2026-06-07 01:46:46,140][315145] Conv encoder output size: 512 -[2026-06-07 01:46:46,180][309672] Inference worker 0-0 is ready! -[2026-06-07 01:46:46,181][309672] All inference workers are ready! Signal rollout workers to start! -[2026-06-07 01:46:46,182][315146] EnvRunner 0-0 uses policy 0 -[2026-06-07 01:46:46,911][315147] Worker 1 uses CPU cores [128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255] -[2026-06-07 01:46:46,911][315147] Using GPUs [0] for process 1 (actually maps to GPUs [0]) -[2026-06-07 01:46:46,912][315147] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for actor process 1 -[2026-06-07 01:46:46,913][315147] Num visible devices: 1 -[2026-06-07 01:46:46,914][315147] EnvRunner 1-0 uses policy 0 -[2026-06-07 01:46:49,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 5171.0. Samples: 25856. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) -[2026-06-07 01:46:49,898][309672] Avg episode reward: [(0, '-7.552')] -[2026-06-07 01:46:51,594][314623] Signal inference workers to stop experience collection... -[2026-06-07 01:46:51,612][315145] InferenceWorker_p0-w0: stopping experience collection -[2026-06-07 01:46:51,622][315145] Updated weights for policy 0, policy_version 10 (0.0008) -[2026-06-07 01:46:53,432][314623] Signal inference workers to resume experience collection... -[2026-06-07 01:46:53,433][315145] InferenceWorker_p0-w0: resuming experience collection -[2026-06-07 01:46:53,658][314623] EvtLoop [learner_proc0_evt_loop, process=learner_proc0] unhandled exception in slot='on_new_training_batch' connected to emitter=Emitter(object_id='Batcher_0', signal_name='training_batches_available'), args=(1,) -Traceback (most recent call last): - File "/venv/latency/lib/python3.10/site-packages/signal_slot/signal_slot.py", line 355, in _process_signal - slot_callable(*args) - File "/workspace/latency-sensitive-bench/sample-factory/sample_factory/algo/learning/learner_worker.py", line 150, in on_new_training_batch - stats = self.learner.train(self.batcher.training_batches[batch_idx]) - File "/workspace/latency-sensitive-bench/sample-factory/sample_factory/algo/learning/learner.py", line 1036, in train - buff, experience_size, num_invalids = self._prepare_batch(batch) - File "/workspace/latency-sensitive-bench/sample-factory/sample_factory/algo/learning/learner.py", line 1006, in _prepare_batch - d[k] = v.reshape((dataset_size,) + tuple(v.shape[2:])) -torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 2.58 GiB. GPU 0 has a total capacity of 23.52 GiB of which 321.75 MiB is free. Process 1048753 has 1.74 GiB memory in use. Process 1048749 has 1.74 GiB memory in use. Process 1056272 has 7.01 GiB memory in use. Process 1056335 has 6.98 GiB memory in use. Process 1057215 has 542.00 MiB memory in use. Process 1057217 has 1.16 GiB memory in use. Process 1057219 has 1.16 GiB memory in use. Process 1057330 has 1.16 GiB memory in use. Process 1057329 has 526.00 MiB memory in use. Process 1057328 has 1.16 GiB memory in use. Of the allocated memory 3.95 GiB is allocated by PyTorch, and 2.59 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) -[2026-06-07 01:46:53,658][314623] Unhandled exception CUDA out of memory. Tried to allocate 2.58 GiB. GPU 0 has a total capacity of 23.52 GiB of which 321.75 MiB is free. Process 1048753 has 1.74 GiB memory in use. Process 1048749 has 1.74 GiB memory in use. Process 1056272 has 7.01 GiB memory in use. Process 1056335 has 6.98 GiB memory in use. Process 1057215 has 542.00 MiB memory in use. Process 1057217 has 1.16 GiB memory in use. Process 1057219 has 1.16 GiB memory in use. Process 1057330 has 1.16 GiB memory in use. Process 1057329 has 526.00 MiB memory in use. Process 1057328 has 1.16 GiB memory in use. Of the allocated memory 3.95 GiB is allocated by PyTorch, and 2.59 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) in evt loop learner_proc0_evt_loop -[2026-06-07 01:46:54,896][309672] Fps is (10 sec: 3276.7, 60 sec: 3276.7, 300 sec: 3276.7). Total num frames: 32768. Throughput: 0: 7052.7. Samples: 70528. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:46:54,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:46:59,896][309672] Fps is (10 sec: 3276.8, 60 sec: 2184.5, 300 sec: 2184.5). Total num frames: 32768. Throughput: 0: 6715.7. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:46:59,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:47:01,244][309672] Heartbeat connected on Batcher_0 -[2026-06-07 01:47:01,256][309672] Heartbeat connected on InferenceWorker_p0-w0 -[2026-06-07 01:47:01,267][309672] Heartbeat connected on RolloutWorker_w0 -[2026-06-07 01:47:01,272][309672] Heartbeat connected on RolloutWorker_w1 -[2026-06-07 01:47:04,896][309672] Fps is (10 sec: 0.0, 60 sec: 1638.4, 300 sec: 1638.4). Total num frames: 32768. Throughput: 0: 5036.7. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:47:04,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:47:09,896][309672] Fps is (10 sec: 0.0, 60 sec: 1310.7, 300 sec: 1310.7). Total num frames: 32768. Throughput: 0: 4029.4. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:47:09,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:47:14,896][309672] Fps is (10 sec: 0.0, 60 sec: 1092.3, 300 sec: 1092.3). Total num frames: 32768. Throughput: 0: 3357.9. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:47:14,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:47:19,896][309672] Fps is (10 sec: 0.0, 60 sec: 936.2, 300 sec: 936.2). Total num frames: 32768. Throughput: 0: 2878.2. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:47:19,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:47:24,896][309672] Fps is (10 sec: 0.0, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 32768. Throughput: 0: 2518.4. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:47:24,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:47:29,896][309672] Fps is (10 sec: 0.0, 60 sec: 728.2, 300 sec: 728.2). Total num frames: 32768. Throughput: 0: 2238.6. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:47:29,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:47:34,896][309672] Fps is (10 sec: 0.0, 60 sec: 655.4, 300 sec: 655.4). Total num frames: 32768. Throughput: 0: 1664.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:47:34,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:47:39,896][309672] Fps is (10 sec: 0.0, 60 sec: 595.8, 300 sec: 595.8). Total num frames: 32768. Throughput: 0: 671.3. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:47:39,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:47:44,896][309672] Fps is (10 sec: 0.0, 60 sec: 546.1, 300 sec: 546.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:47:44,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:47:49,896][309672] Fps is (10 sec: 0.0, 60 sec: 546.1, 300 sec: 504.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:47:49,898][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:47:54,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 468.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:47:54,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:47:59,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 436.9). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:47:59,898][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:48:04,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 409.6). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:48:04,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:48:09,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 385.5). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:48:09,899][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:48:14,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 364.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:48:14,898][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:48:19,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 344.9). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:48:19,898][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:48:24,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 327.7). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:48:24,898][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:48:29,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 312.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:48:29,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:48:34,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 297.9). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:48:34,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:48:39,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 284.9). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:48:39,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:48:44,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 273.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:48:44,898][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:48:49,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 262.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:48:49,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:48:54,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 252.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:48:54,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:48:59,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 242.7). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:48:59,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:49:04,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 234.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:49:04,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:49:09,899][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 226.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:49:09,906][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:49:14,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 218.5). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:49:14,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:49:19,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 211.4). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:49:19,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:49:24,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 204.8). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:49:24,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:49:29,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 198.6). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:49:29,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:49:34,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 192.8). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:49:34,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:49:34,905][309672] Components not started: LearnerWorker_p0, wait_time=182.8 seconds -[2026-06-07 01:49:39,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 187.2). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:49:39,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:49:44,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 182.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:49:44,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:49:49,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 177.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:49:49,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:49:54,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 172.5). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:49:54,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:49:59,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 168.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:49:59,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:50:04,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 163.8). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:50:04,898][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:50:09,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 159.8). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:50:09,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:50:14,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 156.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:50:14,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:50:19,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 152.4). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:50:19,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:50:24,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 148.9). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:50:24,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:50:29,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 145.6). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:50:29,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:50:34,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 142.5). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:50:34,898][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:50:39,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 139.4). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:50:39,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:50:44,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 136.5). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:50:44,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:50:49,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 133.7). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:50:49,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:50:54,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 131.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:50:54,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:50:59,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 128.5). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:50:59,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:51:04,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 126.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:51:04,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:51:09,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 123.7). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:51:09,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:51:14,897][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 121.4). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:51:14,899][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:51:19,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 119.2). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:51:19,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:51:24,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 117.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:51:24,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:51:29,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 115.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:51:29,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:51:34,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 113.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:51:34,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:51:39,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 111.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:51:39,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:51:44,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 111.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:51:44,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:51:49,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:51:49,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:51:54,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:51:54,896][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:51:59,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:51:59,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:52:04,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:52:04,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:52:09,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:52:09,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:52:14,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:52:14,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:52:19,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:52:19,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:52:24,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:52:24,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:52:29,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:52:29,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:52:34,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:52:34,896][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:52:34,904][309672] Components not started: LearnerWorker_p0, wait_time=362.8 seconds -[2026-06-07 01:52:39,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:52:39,896][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:52:44,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:52:44,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:52:49,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:52:49,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:52:54,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:52:54,896][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:52:59,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:52:59,896][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:53:04,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:53:04,896][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:53:09,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:53:09,896][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:53:14,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:53:14,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:53:19,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:53:19,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:53:24,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:53:24,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:53:29,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:53:29,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:53:34,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:53:34,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:53:39,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:53:39,896][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:53:44,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:53:44,896][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:53:49,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:53:49,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:53:54,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:53:54,896][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:53:59,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:53:59,896][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:54:04,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:54:04,896][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:54:09,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:54:09,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:54:14,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:54:14,896][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:54:19,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:54:19,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:54:24,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:54:24,896][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:54:29,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:54:29,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:54:34,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:54:34,896][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:54:39,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:54:39,896][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:54:44,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:54:44,896][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:54:49,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:54:49,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:54:54,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:54:54,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:54:59,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:54:59,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:55:04,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:55:04,896][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:55:09,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:55:09,896][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:55:14,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:55:14,896][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:55:19,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:55:19,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:55:24,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:55:24,896][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:55:29,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:55:29,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:55:34,896][309672] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 01:55:34,897][309672] Avg episode reward: [(0, '-7.519')] -[2026-06-07 01:55:34,903][309672] Components not started: LearnerWorker_p0, wait_time=542.8 seconds -[2026-06-07 01:55:34,903][309672] Components take too long to start: LearnerWorker_p0. Aborting the experiment! - - - -[2026-06-07 01:55:34,906][315147] Stopping RolloutWorker_w1... -[2026-06-07 01:55:34,906][314623] Stopping Batcher_0... -[2026-06-07 01:55:34,906][315147] Loop rollout_proc1_evt_loop terminating... -[2026-06-07 01:55:34,906][309672] Component RolloutWorker_w1 stopped! -[2026-06-07 01:55:34,906][314623] Loop batcher_evt_loop terminating... -[2026-06-07 01:55:34,907][309672] Waiting for ['Batcher_0', 'LearnerWorker_p0', 'InferenceWorker_p0-w0', 'RolloutWorker_w0'] to stop... -[2026-06-07 01:55:34,906][315146] Stopping RolloutWorker_w0... -[2026-06-07 01:55:34,908][315146] Loop rollout_proc0_evt_loop terminating... -[2026-06-07 01:55:34,907][309672] Component Batcher_0 stopped! -[2026-06-07 01:55:34,908][309672] Waiting for ['LearnerWorker_p0', 'InferenceWorker_p0-w0', 'RolloutWorker_w0'] to stop... -[2026-06-07 01:55:34,909][309672] Component RolloutWorker_w0 stopped! -[2026-06-07 01:55:34,910][309672] Waiting for ['LearnerWorker_p0', 'InferenceWorker_p0-w0'] to stop... -[2026-06-07 01:55:34,979][315145] Weights refcount: 2 0 -[2026-06-07 01:55:34,980][315145] Stopping InferenceWorker_p0-w0... -[2026-06-07 01:55:34,980][315145] Loop inference_proc0-0_evt_loop terminating... -[2026-06-07 01:55:34,980][309672] Component InferenceWorker_p0-w0 stopped! -[2026-06-07 01:55:34,981][309672] Waiting for ['LearnerWorker_p0'] to stop... -[2026-06-07 02:10:24,296][321787] Saving configuration to results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs1_seed11/config.json... -[2026-06-07 02:10:24,361][321787] Using GPUs [0] for process 0 (actually maps to GPUs [1]) -[2026-06-07 02:10:24,362][321787] Rollout worker 0 uses device cuda:0 -[2026-06-07 02:10:24,362][321787] Using GPUs [0] for process 1 (actually maps to GPUs [1]) -[2026-06-07 02:10:24,362][321787] Rollout worker 1 uses device cuda:0 -[2026-06-07 02:10:25,487][321787] Using GPUs [0] for process 0 (actually maps to GPUs [1]) -[2026-06-07 02:10:25,488][321787] InferenceWorker_p0-w0: min num requests: 1 -[2026-06-07 02:10:25,494][321787] Using GPUs [0] for process 0 (actually maps to GPUs [1]) -[2026-06-07 02:10:25,500][321787] Using GPUs [0] for process 1 (actually maps to GPUs [1]) -[2026-06-07 02:10:25,500][321787] Starting all processes... -[2026-06-07 02:10:25,501][321787] Starting process learner_proc0 -[2026-06-07 02:10:26,879][321787] Starting all processes... -[2026-06-07 02:10:26,884][321787] Starting process inference_proc0-0 -[2026-06-07 02:10:26,884][321787] Starting process rollout_proc0 -[2026-06-07 02:10:26,884][321787] Starting process rollout_proc1 -[2026-06-07 02:10:27,354][324273] Using GPUs [0] for process 0 (actually maps to GPUs [1]) -[2026-06-07 02:10:27,354][324273] Set environment var CUDA_VISIBLE_DEVICES to '1' (GPU indices [0]) for learning process 0 -[2026-06-07 02:10:27,354][324273] Num visible devices: 1 -[2026-06-07 02:10:27,355][324273] Setting fixed seed 11 -[2026-06-07 02:10:27,356][324273] Using GPUs [0] for process 0 (actually maps to GPUs [1]) -[2026-06-07 02:10:27,356][324273] Initializing actor-critic model on device cuda:0 -[2026-06-07 02:10:27,357][324273] RunningMeanStd input shape: (3, 84, 84) -[2026-06-07 02:10:27,363][324273] RunningMeanStd input shape: (1,) -[2026-06-07 02:10:27,373][324273] ConvEncoder: input_channels=3 -[2026-06-07 02:10:27,481][324273] Conv encoder output size: 512 -[2026-06-07 02:10:27,483][324273] Created Actor Critic model with architecture: -[2026-06-07 02:10:27,483][324273] ActorCriticSharedWeights( - (obs_normalizer): ObservationNormalizer( - (running_mean_std): RunningMeanStdDictInPlace( - (running_mean_std): ModuleDict( - (obs): RunningMeanStdInPlace() - ) - ) - ) - (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) - (encoder): MultiInputEncoder( - (encoders): ModuleDict( - (obs): ConvEncoder( - (enc): RecursiveScriptModule( - original_name=ConvEncoderImpl - (conv_head): RecursiveScriptModule( - original_name=Sequential - (0): RecursiveScriptModule(original_name=Conv2d) - (1): RecursiveScriptModule(original_name=ELU) - (2): RecursiveScriptModule(original_name=Conv2d) - (3): RecursiveScriptModule(original_name=ELU) - (4): RecursiveScriptModule(original_name=Conv2d) - (5): RecursiveScriptModule(original_name=ELU) - ) - (mlp_layers): RecursiveScriptModule( - original_name=Sequential - (0): RecursiveScriptModule(original_name=Linear) - (1): RecursiveScriptModule(original_name=ELU) - ) - ) - ) - ) - ) - (core): ModelCoreIdentity() - (decoder): MlpDecoder( - (mlp): Identity() - ) - (critic_linear): Linear(in_features=512, out_features=1, bias=True) - (action_parameterization): ActionParameterizationDefault( - (distribution_linear): Linear(in_features=512, out_features=2, bias=True) - ) -) -[2026-06-07 02:10:27,500][324273] Using optimizer -[2026-06-07 02:10:28,672][324273] No checkpoints found -[2026-06-07 02:10:28,673][324273] Did not load from checkpoint, starting from scratch! -[2026-06-07 02:10:28,673][324273] Initialized policy 0 weights for model version 0 -[2026-06-07 02:10:28,675][324273] LearnerWorker_p0 finished initialization! -[2026-06-07 02:10:28,675][324273] Using GPUs [0] for process 0 (actually maps to GPUs [1]) -[2026-06-07 02:10:29,201][324537] Worker 1 uses CPU cores [128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255] -[2026-06-07 02:10:29,202][324537] Using GPUs [0] for process 1 (actually maps to GPUs [1]) -[2026-06-07 02:10:29,202][324537] Set environment var CUDA_VISIBLE_DEVICES to '1' (GPU indices [0]) for actor process 1 -[2026-06-07 02:10:29,206][324537] Num visible devices: 1 -[2026-06-07 02:10:30,013][324535] Using GPUs [0] for process 0 (actually maps to GPUs [1]) -[2026-06-07 02:10:30,014][324535] Set environment var CUDA_VISIBLE_DEVICES to '1' (GPU indices [0]) for inference process 0 -[2026-06-07 02:10:30,015][324535] Num visible devices: 1 -[2026-06-07 02:10:30,042][324535] RunningMeanStd input shape: (3, 84, 84) -[2026-06-07 02:10:30,055][324535] RunningMeanStd input shape: (1,) -[2026-06-07 02:10:30,078][324535] ConvEncoder: input_channels=3 -[2026-06-07 02:10:30,162][324535] Conv encoder output size: 512 -[2026-06-07 02:10:30,182][324536] Worker 0 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127] -[2026-06-07 02:10:30,181][321787] Inference worker 0-0 is ready! -[2026-06-07 02:10:30,183][324536] Using GPUs [0] for process 0 (actually maps to GPUs [1]) -[2026-06-07 02:10:30,183][324536] Set environment var CUDA_VISIBLE_DEVICES to '1' (GPU indices [0]) for actor process 0 -[2026-06-07 02:10:30,183][321787] All inference workers are ready! Signal rollout workers to start! -[2026-06-07 02:10:30,183][324536] Num visible devices: 1 -[2026-06-07 02:10:30,184][324536] EnvRunner 0-0 uses policy 0 -[2026-06-07 02:10:30,185][324537] EnvRunner 1-0 uses policy 0 -[2026-06-07 02:10:31,010][321787] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) -[2026-06-07 02:10:33,309][324273] Signal inference workers to stop experience collection... -[2026-06-07 02:10:33,315][324535] InferenceWorker_p0-w0: stopping experience collection -[2026-06-07 02:10:34,505][324273] Signal inference workers to resume experience collection... -[2026-06-07 02:10:34,506][324535] InferenceWorker_p0-w0: resuming experience collection -[2026-06-07 02:10:34,856][324535] Updated weights for policy 0, policy_version 73 (0.0075) -[2026-06-07 02:10:35,065][324535] Updated weights for policy 0, policy_version 83 (0.0007) -[2026-06-07 02:10:35,265][324535] Updated weights for policy 0, policy_version 93 (0.0007) -[2026-06-07 02:10:35,468][324535] Updated weights for policy 0, policy_version 103 (0.0007) -[2026-06-07 02:10:35,685][324535] Updated weights for policy 0, policy_version 113 (0.0008) -[2026-06-07 02:10:35,884][324535] Updated weights for policy 0, policy_version 123 (0.0007) -[2026-06-07 02:10:36,010][321787] Fps is (10 sec: 13107.7, 60 sec: 13107.7, 300 sec: 13107.7). Total num frames: 65536. Throughput: 0: 12902.9. Samples: 64512. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) -[2026-06-07 02:10:36,011][321787] Avg episode reward: [(0, '-7.219')] -[2026-06-07 02:10:36,327][324535] Updated weights for policy 0, policy_version 133 (0.0007) -[2026-06-07 02:10:36,521][324535] Updated weights for policy 0, policy_version 143 (0.0007) -[2026-06-07 02:10:36,747][324535] Updated weights for policy 0, policy_version 153 (0.0007) -[2026-06-07 02:10:36,960][324535] Updated weights for policy 0, policy_version 163 (0.0008) -[2026-06-07 02:10:37,157][324535] Updated weights for policy 0, policy_version 173 (0.0007) -[2026-06-07 02:10:37,382][324535] Updated weights for policy 0, policy_version 183 (0.0008) -[2026-06-07 02:10:37,895][324535] Updated weights for policy 0, policy_version 193 (0.0008) -[2026-06-07 02:10:38,118][324535] Updated weights for policy 0, policy_version 203 (0.0007) -[2026-06-07 02:10:38,318][324535] Updated weights for policy 0, policy_version 213 (0.0007) -[2026-06-07 02:10:38,516][324535] Updated weights for policy 0, policy_version 223 (0.0010) -[2026-06-07 02:10:38,744][324535] Updated weights for policy 0, policy_version 233 (0.0009) -[2026-06-07 02:10:38,949][324535] Updated weights for policy 0, policy_version 243 (0.0007) -[2026-06-07 02:10:39,142][324535] Updated weights for policy 0, policy_version 253 (0.0009) -[2026-06-07 02:10:39,618][324535] Updated weights for policy 0, policy_version 263 (0.0007) -[2026-06-07 02:10:39,837][324535] Updated weights for policy 0, policy_version 273 (0.0007) -[2026-06-07 02:10:40,034][324535] Updated weights for policy 0, policy_version 283 (0.0007) -[2026-06-07 02:10:40,240][324535] Updated weights for policy 0, policy_version 293 (0.0007) -[2026-06-07 02:10:40,440][324535] Updated weights for policy 0, policy_version 303 (0.0007) -[2026-06-07 02:10:40,658][324535] Updated weights for policy 0, policy_version 313 (0.0007) -[2026-06-07 02:10:41,010][321787] Fps is (10 sec: 16384.1, 60 sec: 16384.1, 300 sec: 16384.1). Total num frames: 163840. Throughput: 0: 15718.5. Samples: 157184. Policy #0 lag: (min: 63.0, avg: 89.7, max: 127.0) -[2026-06-07 02:10:41,012][321787] Avg episode reward: [(0, '-6.595')] -[2026-06-07 02:10:41,175][324535] Updated weights for policy 0, policy_version 323 (0.0008) -[2026-06-07 02:10:41,383][324535] Updated weights for policy 0, policy_version 333 (0.0007) -[2026-06-07 02:10:41,569][324535] Updated weights for policy 0, policy_version 343 (0.0007) -[2026-06-07 02:10:41,792][324535] Updated weights for policy 0, policy_version 353 (0.0007) -[2026-06-07 02:10:42,018][324535] Updated weights for policy 0, policy_version 363 (0.0010) -[2026-06-07 02:10:42,225][324535] Updated weights for policy 0, policy_version 373 (0.0008) -[2026-06-07 02:10:42,417][324535] Updated weights for policy 0, policy_version 383 (0.0007) -[2026-06-07 02:10:42,439][324273] Saving new best policy, reward=-6.595! -[2026-06-07 02:10:42,906][324535] Updated weights for policy 0, policy_version 393 (0.0007) -[2026-06-07 02:10:43,124][324535] Updated weights for policy 0, policy_version 403 (0.0008) -[2026-06-07 02:10:43,328][324535] Updated weights for policy 0, policy_version 413 (0.0008) -[2026-06-07 02:10:43,532][324535] Updated weights for policy 0, policy_version 423 (0.0007) -[2026-06-07 02:10:43,742][324535] Updated weights for policy 0, policy_version 433 (0.0007) -[2026-06-07 02:10:43,967][324535] Updated weights for policy 0, policy_version 443 (0.0008) -[2026-06-07 02:10:44,490][324535] Updated weights for policy 0, policy_version 453 (0.0007) -[2026-06-07 02:10:44,691][324535] Updated weights for policy 0, policy_version 463 (0.0008) -[2026-06-07 02:10:44,913][324535] Updated weights for policy 0, policy_version 473 (0.0006) -[2026-06-07 02:10:45,121][324535] Updated weights for policy 0, policy_version 483 (0.0007) -[2026-06-07 02:10:45,350][324535] Updated weights for policy 0, policy_version 493 (0.0009) -[2026-06-07 02:10:45,478][321787] Heartbeat connected on Batcher_0 -[2026-06-07 02:10:45,501][321787] Heartbeat connected on RolloutWorker_w1 -[2026-06-07 02:10:45,502][321787] Heartbeat connected on InferenceWorker_p0-w0 -[2026-06-07 02:10:45,533][321787] Heartbeat connected on RolloutWorker_w0 -[2026-06-07 02:10:45,540][324535] Updated weights for policy 0, policy_version 503 (0.0007) -[2026-06-07 02:10:45,747][321787] Heartbeat connected on LearnerWorker_p0 -[2026-06-07 02:10:46,010][321787] Fps is (10 sec: 19660.8, 60 sec: 17476.5, 300 sec: 17476.5). Total num frames: 262144. Throughput: 0: 18688.2. Samples: 280320. Policy #0 lag: (min: 21.0, avg: 42.8, max: 85.0) -[2026-06-07 02:10:46,011][321787] Avg episode reward: [(0, '-4.786')] -[2026-06-07 02:10:46,093][324535] Updated weights for policy 0, policy_version 513 (0.0007) -[2026-06-07 02:10:46,296][324535] Updated weights for policy 0, policy_version 523 (0.0007) -[2026-06-07 02:10:46,500][324535] Updated weights for policy 0, policy_version 533 (0.0008) -[2026-06-07 02:10:46,691][324535] Updated weights for policy 0, policy_version 543 (0.0010) -[2026-06-07 02:10:46,912][324535] Updated weights for policy 0, policy_version 553 (0.0008) -[2026-06-07 02:10:47,114][324535] Updated weights for policy 0, policy_version 563 (0.0007) -[2026-06-07 02:10:47,311][324535] Updated weights for policy 0, policy_version 573 (0.0010) -[2026-06-07 02:10:47,366][324273] Saving new best policy, reward=-4.786! -[2026-06-07 02:10:47,779][324535] Updated weights for policy 0, policy_version 583 (0.0009) -[2026-06-07 02:10:47,962][324535] Updated weights for policy 0, policy_version 593 (0.0007) -[2026-06-07 02:10:48,209][324535] Updated weights for policy 0, policy_version 603 (0.0007) -[2026-06-07 02:10:48,424][324535] Updated weights for policy 0, policy_version 613 (0.0006) -[2026-06-07 02:10:48,626][324535] Updated weights for policy 0, policy_version 623 (0.0007) -[2026-06-07 02:10:48,835][324535] Updated weights for policy 0, policy_version 633 (0.0007) -[2026-06-07 02:10:49,394][324535] Updated weights for policy 0, policy_version 643 (0.0006) -[2026-06-07 02:10:49,612][324535] Updated weights for policy 0, policy_version 653 (0.0007) -[2026-06-07 02:10:49,814][324535] Updated weights for policy 0, policy_version 663 (0.0007) -[2026-06-07 02:10:50,019][324535] Updated weights for policy 0, policy_version 673 (0.0007) -[2026-06-07 02:10:50,242][324535] Updated weights for policy 0, policy_version 683 (0.0007) -[2026-06-07 02:10:50,460][324535] Updated weights for policy 0, policy_version 693 (0.0006) -[2026-06-07 02:10:50,665][324535] Updated weights for policy 0, policy_version 703 (0.0007) -[2026-06-07 02:10:51,011][321787] Fps is (10 sec: 19658.9, 60 sec: 18021.6, 300 sec: 18021.6). Total num frames: 360448. Throughput: 0: 17023.2. Samples: 340480. Policy #0 lag: (min: 24.0, avg: 61.6, max: 88.0) -[2026-06-07 02:10:51,013][321787] Avg episode reward: [(0, '0.834')] -[2026-06-07 02:10:51,197][324535] Updated weights for policy 0, policy_version 713 (0.0009) -[2026-06-07 02:10:51,410][324535] Updated weights for policy 0, policy_version 723 (0.0009) -[2026-06-07 02:10:51,623][324535] Updated weights for policy 0, policy_version 733 (0.0007) -[2026-06-07 02:10:51,828][324535] Updated weights for policy 0, policy_version 743 (0.0008) -[2026-06-07 02:10:52,042][324535] Updated weights for policy 0, policy_version 754 (0.0010) -[2026-06-07 02:10:52,249][324535] Updated weights for policy 0, policy_version 764 (0.0016) -[2026-06-07 02:10:52,340][324273] Saving new best policy, reward=0.834! -[2026-06-07 02:10:52,883][324535] Updated weights for policy 0, policy_version 774 (0.0010) -[2026-06-07 02:10:53,087][324535] Updated weights for policy 0, policy_version 784 (0.0011) -[2026-06-07 02:10:53,318][324535] Updated weights for policy 0, policy_version 795 (0.0010) -[2026-06-07 02:10:53,504][324535] Updated weights for policy 0, policy_version 805 (0.0007) -[2026-06-07 02:10:53,723][324535] Updated weights for policy 0, policy_version 815 (0.0006) -[2026-06-07 02:10:53,924][324535] Updated weights for policy 0, policy_version 825 (0.0007) -[2026-06-07 02:10:54,589][324535] Updated weights for policy 0, policy_version 837 (0.0007) -[2026-06-07 02:10:54,827][324535] Updated weights for policy 0, policy_version 848 (0.0007) -[2026-06-07 02:10:55,031][324535] Updated weights for policy 0, policy_version 858 (0.0007) -[2026-06-07 02:10:55,260][324535] Updated weights for policy 0, policy_version 868 (0.0007) -[2026-06-07 02:10:55,478][324535] Updated weights for policy 0, policy_version 878 (0.0007) -[2026-06-07 02:10:55,754][324535] Updated weights for policy 0, policy_version 890 (0.0007) -[2026-06-07 02:10:56,010][321787] Fps is (10 sec: 19660.8, 60 sec: 18350.2, 300 sec: 18350.2). Total num frames: 458752. Throughput: 0: 18068.6. Samples: 451712. Policy #0 lag: (min: 63.0, avg: 85.3, max: 127.0) -[2026-06-07 02:10:56,011][321787] Avg episode reward: [(0, '3.846')] -[2026-06-07 02:10:56,017][324273] Saving new best policy, reward=3.846! -[2026-06-07 02:10:56,547][324535] Updated weights for policy 0, policy_version 900 (0.0007) -[2026-06-07 02:10:56,743][324535] Updated weights for policy 0, policy_version 910 (0.0007) -[2026-06-07 02:10:56,927][324535] Updated weights for policy 0, policy_version 920 (0.0008) -[2026-06-07 02:10:57,175][324535] Updated weights for policy 0, policy_version 932 (0.0010) -[2026-06-07 02:10:57,415][324535] Updated weights for policy 0, policy_version 945 (0.0011) -[2026-06-07 02:10:57,612][324535] Updated weights for policy 0, policy_version 955 (0.0011) -[2026-06-07 02:10:58,486][324535] Updated weights for policy 0, policy_version 965 (0.0011) -[2026-06-07 02:10:58,674][324535] Updated weights for policy 0, policy_version 975 (0.0009) -[2026-06-07 02:10:58,887][324535] Updated weights for policy 0, policy_version 985 (0.0011) -[2026-06-07 02:10:59,134][324535] Updated weights for policy 0, policy_version 995 (0.0007) -[2026-06-07 02:10:59,323][324535] Updated weights for policy 0, policy_version 1005 (0.0007) -[2026-06-07 02:10:59,536][324535] Updated weights for policy 0, policy_version 1015 (0.0007) -[2026-06-07 02:11:00,453][324535] Updated weights for policy 0, policy_version 1026 (0.0007) -[2026-06-07 02:11:00,670][324535] Updated weights for policy 0, policy_version 1038 (0.0007) -[2026-06-07 02:11:00,876][324535] Updated weights for policy 0, policy_version 1048 (0.0007) -[2026-06-07 02:11:01,010][321787] Fps is (10 sec: 16385.7, 60 sec: 17476.4, 300 sec: 17476.4). Total num frames: 524288. Throughput: 0: 18457.7. Samples: 553728. Policy #0 lag: (min: 63.0, avg: 85.3, max: 127.0) -[2026-06-07 02:11:01,011][321787] Avg episode reward: [(0, '3.735')] -[2026-06-07 02:11:01,111][324535] Updated weights for policy 0, policy_version 1059 (0.0007) -[2026-06-07 02:11:01,359][324535] Updated weights for policy 0, policy_version 1071 (0.0007) -[2026-06-07 02:11:01,574][324535] Updated weights for policy 0, policy_version 1081 (0.0007) -[2026-06-07 02:11:02,490][324535] Updated weights for policy 0, policy_version 1091 (0.0007) -[2026-06-07 02:11:02,678][324535] Updated weights for policy 0, policy_version 1101 (0.0007) -[2026-06-07 02:11:02,894][324535] Updated weights for policy 0, policy_version 1111 (0.0007) -[2026-06-07 02:11:03,126][324535] Updated weights for policy 0, policy_version 1122 (0.0007) -[2026-06-07 02:11:03,336][324535] Updated weights for policy 0, policy_version 1132 (0.0007) -[2026-06-07 02:11:03,530][324535] Updated weights for policy 0, policy_version 1142 (0.0007) -[2026-06-07 02:11:03,733][324535] Updated weights for policy 0, policy_version 1152 (0.0007) -[2026-06-07 02:11:04,742][324535] Updated weights for policy 0, policy_version 1163 (0.0007) -[2026-06-07 02:11:04,929][324535] Updated weights for policy 0, policy_version 1173 (0.0007) -[2026-06-07 02:11:05,174][324535] Updated weights for policy 0, policy_version 1184 (0.0007) -[2026-06-07 02:11:05,364][324535] Updated weights for policy 0, policy_version 1194 (0.0007) -[2026-06-07 02:11:05,552][324535] Updated weights for policy 0, policy_version 1204 (0.0007) -[2026-06-07 02:11:05,772][324535] Updated weights for policy 0, policy_version 1215 (0.0007) -[2026-06-07 02:11:06,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17788.4, 300 sec: 17788.4). Total num frames: 622592. Throughput: 0: 17082.6. Samples: 597888. Policy #0 lag: (min: 0.0, avg: 13.2, max: 64.0) -[2026-06-07 02:11:06,011][321787] Avg episode reward: [(0, '4.102')] -[2026-06-07 02:11:06,017][324273] Saving new best policy, reward=4.102! -[2026-06-07 02:11:06,733][324535] Updated weights for policy 0, policy_version 1225 (0.0007) -[2026-06-07 02:11:06,924][324535] Updated weights for policy 0, policy_version 1235 (0.0007) -[2026-06-07 02:11:07,139][324535] Updated weights for policy 0, policy_version 1245 (0.0007) -[2026-06-07 02:11:07,357][324535] Updated weights for policy 0, policy_version 1255 (0.0007) -[2026-06-07 02:11:07,564][324535] Updated weights for policy 0, policy_version 1265 (0.0007) -[2026-06-07 02:11:07,760][324535] Updated weights for policy 0, policy_version 1275 (0.0007) -[2026-06-07 02:11:08,731][324535] Updated weights for policy 0, policy_version 1285 (0.0007) -[2026-06-07 02:11:08,950][324535] Updated weights for policy 0, policy_version 1295 (0.0007) -[2026-06-07 02:11:09,138][324535] Updated weights for policy 0, policy_version 1305 (0.0007) -[2026-06-07 02:11:09,340][324535] Updated weights for policy 0, policy_version 1315 (0.0007) -[2026-06-07 02:11:09,567][324535] Updated weights for policy 0, policy_version 1325 (0.0007) -[2026-06-07 02:11:09,777][324535] Updated weights for policy 0, policy_version 1335 (0.0006) -[2026-06-07 02:11:10,742][324535] Updated weights for policy 0, policy_version 1345 (0.0007) -[2026-06-07 02:11:10,948][324535] Updated weights for policy 0, policy_version 1355 (0.0007) -[2026-06-07 02:11:11,010][321787] Fps is (10 sec: 16383.3, 60 sec: 17203.1, 300 sec: 17203.1). Total num frames: 688128. Throughput: 0: 17359.9. Samples: 694400. Policy #0 lag: (min: 63.0, avg: 77.0, max: 127.0) -[2026-06-07 02:11:11,012][321787] Avg episode reward: [(0, '4.135')] -[2026-06-07 02:11:11,154][324535] Updated weights for policy 0, policy_version 1365 (0.0007) -[2026-06-07 02:11:11,384][324535] Updated weights for policy 0, policy_version 1376 (0.0007) -[2026-06-07 02:11:11,570][324535] Updated weights for policy 0, policy_version 1386 (0.0007) -[2026-06-07 02:11:11,788][324535] Updated weights for policy 0, policy_version 1396 (0.0007) -[2026-06-07 02:11:11,996][324535] Updated weights for policy 0, policy_version 1406 (0.0007) -[2026-06-07 02:11:12,017][324273] Saving new best policy, reward=4.135! -[2026-06-07 02:11:12,934][324535] Updated weights for policy 0, policy_version 1416 (0.0007) -[2026-06-07 02:11:13,134][324535] Updated weights for policy 0, policy_version 1426 (0.0007) -[2026-06-07 02:11:13,326][324535] Updated weights for policy 0, policy_version 1436 (0.0007) -[2026-06-07 02:11:13,530][324535] Updated weights for policy 0, policy_version 1446 (0.0007) -[2026-06-07 02:11:13,745][324535] Updated weights for policy 0, policy_version 1456 (0.0007) -[2026-06-07 02:11:13,975][324535] Updated weights for policy 0, policy_version 1467 (0.0007) -[2026-06-07 02:11:14,970][324535] Updated weights for policy 0, policy_version 1478 (0.0007) -[2026-06-07 02:11:15,169][324535] Updated weights for policy 0, policy_version 1488 (0.0007) -[2026-06-07 02:11:15,383][324535] Updated weights for policy 0, policy_version 1498 (0.0007) -[2026-06-07 02:11:15,599][324535] Updated weights for policy 0, policy_version 1509 (0.0007) -[2026-06-07 02:11:15,823][324535] Updated weights for policy 0, policy_version 1519 (0.0007) -[2026-06-07 02:11:16,003][324535] Updated weights for policy 0, policy_version 1529 (0.0007) -[2026-06-07 02:11:16,010][321787] Fps is (10 sec: 13107.2, 60 sec: 16748.2, 300 sec: 16748.2). Total num frames: 753664. Throughput: 0: 17610.0. Samples: 792448. Policy #0 lag: (min: 63.0, avg: 75.8, max: 127.0) -[2026-06-07 02:11:16,011][321787] Avg episode reward: [(0, '4.652')] -[2026-06-07 02:11:16,126][324273] Saving new best policy, reward=4.652! -[2026-06-07 02:11:16,909][324535] Updated weights for policy 0, policy_version 1539 (0.0007) -[2026-06-07 02:11:17,132][324535] Updated weights for policy 0, policy_version 1550 (0.0007) -[2026-06-07 02:11:17,333][324535] Updated weights for policy 0, policy_version 1560 (0.0007) -[2026-06-07 02:11:17,566][324535] Updated weights for policy 0, policy_version 1570 (0.0007) -[2026-06-07 02:11:17,774][324535] Updated weights for policy 0, policy_version 1580 (0.0008) -[2026-06-07 02:11:18,008][324535] Updated weights for policy 0, policy_version 1590 (0.0008) -[2026-06-07 02:11:18,201][324535] Updated weights for policy 0, policy_version 1600 (0.0007) -[2026-06-07 02:11:19,136][324535] Updated weights for policy 0, policy_version 1610 (0.0007) -[2026-06-07 02:11:19,370][324535] Updated weights for policy 0, policy_version 1621 (0.0007) -[2026-06-07 02:11:19,591][324535] Updated weights for policy 0, policy_version 1632 (0.0009) -[2026-06-07 02:11:19,792][324535] Updated weights for policy 0, policy_version 1642 (0.0007) -[2026-06-07 02:11:19,986][324535] Updated weights for policy 0, policy_version 1652 (0.0010) -[2026-06-07 02:11:20,209][324535] Updated weights for policy 0, policy_version 1662 (0.0012) -[2026-06-07 02:11:21,010][321787] Fps is (10 sec: 16384.6, 60 sec: 17039.4, 300 sec: 17039.4). Total num frames: 851968. Throughput: 0: 17115.0. Samples: 834688. Policy #0 lag: (min: 2.0, avg: 22.2, max: 66.0) -[2026-06-07 02:11:21,011][321787] Avg episode reward: [(0, '5.057')] -[2026-06-07 02:11:21,194][324535] Updated weights for policy 0, policy_version 1672 (0.0007) -[2026-06-07 02:11:21,401][324535] Updated weights for policy 0, policy_version 1682 (0.0007) -[2026-06-07 02:11:21,623][324535] Updated weights for policy 0, policy_version 1692 (0.0007) -[2026-06-07 02:11:21,839][324535] Updated weights for policy 0, policy_version 1702 (0.0007) -[2026-06-07 02:11:22,083][324535] Updated weights for policy 0, policy_version 1713 (0.0007) -[2026-06-07 02:11:22,278][324535] Updated weights for policy 0, policy_version 1723 (0.0007) -[2026-06-07 02:11:22,374][324273] Saving new best policy, reward=5.057! -[2026-06-07 02:11:23,218][324535] Updated weights for policy 0, policy_version 1733 (0.0007) -[2026-06-07 02:11:23,477][324535] Updated weights for policy 0, policy_version 1745 (0.0007) -[2026-06-07 02:11:23,712][324535] Updated weights for policy 0, policy_version 1756 (0.0007) -[2026-06-07 02:11:23,917][324535] Updated weights for policy 0, policy_version 1766 (0.0007) -[2026-06-07 02:11:24,155][324535] Updated weights for policy 0, policy_version 1777 (0.0008) -[2026-06-07 02:11:24,350][324535] Updated weights for policy 0, policy_version 1787 (0.0007) -[2026-06-07 02:11:25,271][324535] Updated weights for policy 0, policy_version 1797 (0.0007) -[2026-06-07 02:11:25,470][324535] Updated weights for policy 0, policy_version 1807 (0.0007) -[2026-06-07 02:11:25,679][324535] Updated weights for policy 0, policy_version 1818 (0.0007) -[2026-06-07 02:11:25,882][324535] Updated weights for policy 0, policy_version 1828 (0.0007) -[2026-06-07 02:11:26,010][321787] Fps is (10 sec: 16384.0, 60 sec: 16681.9, 300 sec: 16681.9). Total num frames: 917504. Throughput: 0: 17197.5. Samples: 931072. Policy #0 lag: (min: 42.0, avg: 66.5, max: 106.0) -[2026-06-07 02:11:26,011][321787] Avg episode reward: [(0, '5.525')] -[2026-06-07 02:11:26,086][324535] Updated weights for policy 0, policy_version 1839 (0.0007) -[2026-06-07 02:11:26,301][324535] Updated weights for policy 0, policy_version 1850 (0.0007) -[2026-06-07 02:11:26,433][324273] Saving new best policy, reward=5.525! -[2026-06-07 02:11:27,288][324535] Updated weights for policy 0, policy_version 1860 (0.0007) -[2026-06-07 02:11:27,530][324535] Updated weights for policy 0, policy_version 1871 (0.0007) -[2026-06-07 02:11:27,777][324535] Updated weights for policy 0, policy_version 1883 (0.0007) -[2026-06-07 02:11:28,003][324535] Updated weights for policy 0, policy_version 1893 (0.0007) -[2026-06-07 02:11:28,225][324535] Updated weights for policy 0, policy_version 1903 (0.0007) -[2026-06-07 02:11:28,430][324535] Updated weights for policy 0, policy_version 1913 (0.0007) -[2026-06-07 02:11:29,368][324535] Updated weights for policy 0, policy_version 1925 (0.0007) -[2026-06-07 02:11:29,592][324535] Updated weights for policy 0, policy_version 1936 (0.0007) -[2026-06-07 02:11:29,871][324535] Updated weights for policy 0, policy_version 1949 (0.0007) -[2026-06-07 02:11:30,100][324535] Updated weights for policy 0, policy_version 1961 (0.0007) -[2026-06-07 02:11:30,353][324535] Updated weights for policy 0, policy_version 1973 (0.0007) -[2026-06-07 02:11:31,010][321787] Fps is (10 sec: 16383.7, 60 sec: 16930.1, 300 sec: 16930.1). Total num frames: 1015808. Throughput: 0: 16620.0. Samples: 1028224. Policy #0 lag: (min: 2.0, avg: 26.4, max: 66.0) -[2026-06-07 02:11:31,012][321787] Avg episode reward: [(0, '5.805')] -[2026-06-07 02:11:31,023][324273] Saving new best policy, reward=5.805! -[2026-06-07 02:11:31,384][324535] Updated weights for policy 0, policy_version 1985 (0.0007) -[2026-06-07 02:11:31,609][324535] Updated weights for policy 0, policy_version 1995 (0.0007) -[2026-06-07 02:11:31,787][324535] Updated weights for policy 0, policy_version 2005 (0.0007) -[2026-06-07 02:11:31,997][324535] Updated weights for policy 0, policy_version 2015 (0.0007) -[2026-06-07 02:11:32,205][324535] Updated weights for policy 0, policy_version 2025 (0.0007) -[2026-06-07 02:11:32,423][324535] Updated weights for policy 0, policy_version 2035 (0.0007) -[2026-06-07 02:11:32,643][324535] Updated weights for policy 0, policy_version 2046 (0.0007) -[2026-06-07 02:11:33,685][324535] Updated weights for policy 0, policy_version 2059 (0.0007) -[2026-06-07 02:11:33,887][324535] Updated weights for policy 0, policy_version 2069 (0.0007) -[2026-06-07 02:11:34,074][324535] Updated weights for policy 0, policy_version 2079 (0.0007) -[2026-06-07 02:11:34,285][324535] Updated weights for policy 0, policy_version 2090 (0.0007) -[2026-06-07 02:11:34,533][324535] Updated weights for policy 0, policy_version 2102 (0.0007) -[2026-06-07 02:11:35,552][324535] Updated weights for policy 0, policy_version 2113 (0.0007) -[2026-06-07 02:11:35,756][324535] Updated weights for policy 0, policy_version 2123 (0.0007) -[2026-06-07 02:11:36,010][321787] Fps is (10 sec: 16384.0, 60 sec: 16930.1, 300 sec: 16636.1). Total num frames: 1081344. Throughput: 0: 16464.0. Samples: 1081344. Policy #0 lag: (min: 63.0, avg: 75.9, max: 127.0) -[2026-06-07 02:11:36,011][321787] Avg episode reward: [(0, '6.282')] -[2026-06-07 02:11:36,011][324535] Updated weights for policy 0, policy_version 2136 (0.0007) -[2026-06-07 02:11:36,238][324535] Updated weights for policy 0, policy_version 2147 (0.0007) -[2026-06-07 02:11:36,454][324535] Updated weights for policy 0, policy_version 2157 (0.0007) -[2026-06-07 02:11:36,670][324535] Updated weights for policy 0, policy_version 2168 (0.0007) -[2026-06-07 02:11:36,831][324273] Saving new best policy, reward=6.282! -[2026-06-07 02:11:37,660][324535] Updated weights for policy 0, policy_version 2178 (0.0007) -[2026-06-07 02:11:37,872][324535] Updated weights for policy 0, policy_version 2188 (0.0010) -[2026-06-07 02:11:38,093][324535] Updated weights for policy 0, policy_version 2198 (0.0011) -[2026-06-07 02:11:38,307][324535] Updated weights for policy 0, policy_version 2208 (0.0011) -[2026-06-07 02:11:38,510][324535] Updated weights for policy 0, policy_version 2218 (0.0010) -[2026-06-07 02:11:38,746][324535] Updated weights for policy 0, policy_version 2229 (0.0011) -[2026-06-07 02:11:38,968][324535] Updated weights for policy 0, policy_version 2240 (0.0011) -[2026-06-07 02:11:39,884][324535] Updated weights for policy 0, policy_version 2250 (0.0007) -[2026-06-07 02:11:40,116][324535] Updated weights for policy 0, policy_version 2261 (0.0007) -[2026-06-07 02:11:40,335][324535] Updated weights for policy 0, policy_version 2272 (0.0007) -[2026-06-07 02:11:40,592][324535] Updated weights for policy 0, policy_version 2284 (0.0007) -[2026-06-07 02:11:40,797][324535] Updated weights for policy 0, policy_version 2294 (0.0007) -[2026-06-07 02:11:41,004][324535] Updated weights for policy 0, policy_version 2304 (0.0007) -[2026-06-07 02:11:41,010][321787] Fps is (10 sec: 16384.3, 60 sec: 16930.2, 300 sec: 16852.2). Total num frames: 1179648. Throughput: 0: 16022.8. Samples: 1172736. Policy #0 lag: (min: 63.0, avg: 76.2, max: 127.0) -[2026-06-07 02:11:41,011][321787] Avg episode reward: [(0, '6.929')] -[2026-06-07 02:11:41,017][324273] Saving new best policy, reward=6.929! -[2026-06-07 02:11:41,999][324535] Updated weights for policy 0, policy_version 2316 (0.0007) -[2026-06-07 02:11:42,184][324535] Updated weights for policy 0, policy_version 2326 (0.0007) -[2026-06-07 02:11:42,402][324535] Updated weights for policy 0, policy_version 2336 (0.0007) -[2026-06-07 02:11:42,651][324535] Updated weights for policy 0, policy_version 2348 (0.0007) -[2026-06-07 02:11:42,839][324535] Updated weights for policy 0, policy_version 2358 (0.0007) -[2026-06-07 02:11:43,062][324535] Updated weights for policy 0, policy_version 2368 (0.0007) -[2026-06-07 02:11:44,052][324535] Updated weights for policy 0, policy_version 2379 (0.0007) -[2026-06-07 02:11:44,246][324535] Updated weights for policy 0, policy_version 2389 (0.0007) -[2026-06-07 02:11:44,464][324535] Updated weights for policy 0, policy_version 2399 (0.0007) -[2026-06-07 02:11:44,701][324535] Updated weights for policy 0, policy_version 2410 (0.0007) -[2026-06-07 02:11:44,905][324535] Updated weights for policy 0, policy_version 2420 (0.0007) -[2026-06-07 02:11:45,873][324535] Updated weights for policy 0, policy_version 2433 (0.0007) -[2026-06-07 02:11:46,010][321787] Fps is (10 sec: 16384.0, 60 sec: 16384.0, 300 sec: 16602.5). Total num frames: 1245184. Throughput: 0: 15775.3. Samples: 1263616. Policy #0 lag: (min: 29.0, avg: 42.6, max: 93.0) -[2026-06-07 02:11:46,011][321787] Avg episode reward: [(0, '7.572')] -[2026-06-07 02:11:46,072][324535] Updated weights for policy 0, policy_version 2443 (0.0007) -[2026-06-07 02:11:46,270][324535] Updated weights for policy 0, policy_version 2453 (0.0007) -[2026-06-07 02:11:46,486][324535] Updated weights for policy 0, policy_version 2463 (0.0007) -[2026-06-07 02:11:46,708][324535] Updated weights for policy 0, policy_version 2473 (0.0007) -[2026-06-07 02:11:46,930][324535] Updated weights for policy 0, policy_version 2483 (0.0007) -[2026-06-07 02:11:47,126][324535] Updated weights for policy 0, policy_version 2493 (0.0007) -[2026-06-07 02:11:47,182][324273] Saving new best policy, reward=7.572! -[2026-06-07 02:11:48,054][324535] Updated weights for policy 0, policy_version 2503 (0.0007) -[2026-06-07 02:11:48,268][324535] Updated weights for policy 0, policy_version 2513 (0.0007) -[2026-06-07 02:11:48,491][324535] Updated weights for policy 0, policy_version 2523 (0.0007) -[2026-06-07 02:11:48,701][324535] Updated weights for policy 0, policy_version 2533 (0.0007) -[2026-06-07 02:11:48,913][324535] Updated weights for policy 0, policy_version 2543 (0.0007) -[2026-06-07 02:11:49,171][324535] Updated weights for policy 0, policy_version 2555 (0.0007) -[2026-06-07 02:11:50,178][324535] Updated weights for policy 0, policy_version 2566 (0.0009) -[2026-06-07 02:11:50,392][324535] Updated weights for policy 0, policy_version 2576 (0.0010) -[2026-06-07 02:11:50,594][324535] Updated weights for policy 0, policy_version 2586 (0.0010) -[2026-06-07 02:11:50,839][324535] Updated weights for policy 0, policy_version 2598 (0.0011) -[2026-06-07 02:11:51,010][321787] Fps is (10 sec: 13107.2, 60 sec: 15838.1, 300 sec: 16384.0). Total num frames: 1310720. Throughput: 0: 16017.1. Samples: 1318656. Policy #0 lag: (min: 62.0, avg: 75.2, max: 126.0) -[2026-06-07 02:11:51,011][321787] Avg episode reward: [(0, '8.166')] -[2026-06-07 02:11:51,042][324535] Updated weights for policy 0, policy_version 2608 (0.0007) -[2026-06-07 02:11:51,260][324535] Updated weights for policy 0, policy_version 2618 (0.0007) -[2026-06-07 02:11:51,387][324273] Saving new best policy, reward=8.166! -[2026-06-07 02:11:52,213][324535] Updated weights for policy 0, policy_version 2629 (0.0007) -[2026-06-07 02:11:52,446][324535] Updated weights for policy 0, policy_version 2640 (0.0007) -[2026-06-07 02:11:52,651][324535] Updated weights for policy 0, policy_version 2650 (0.0007) -[2026-06-07 02:11:52,861][324535] Updated weights for policy 0, policy_version 2661 (0.0007) -[2026-06-07 02:11:53,059][324535] Updated weights for policy 0, policy_version 2671 (0.0007) -[2026-06-07 02:11:53,278][324535] Updated weights for policy 0, policy_version 2681 (0.0007) -[2026-06-07 02:11:54,238][324535] Updated weights for policy 0, policy_version 2691 (0.0007) -[2026-06-07 02:11:54,432][324535] Updated weights for policy 0, policy_version 2701 (0.0007) -[2026-06-07 02:11:54,629][324535] Updated weights for policy 0, policy_version 2711 (0.0007) -[2026-06-07 02:11:54,873][324535] Updated weights for policy 0, policy_version 2722 (0.0007) -[2026-06-07 02:11:55,085][324535] Updated weights for policy 0, policy_version 2732 (0.0007) -[2026-06-07 02:11:55,280][324535] Updated weights for policy 0, policy_version 2742 (0.0007) -[2026-06-07 02:11:56,010][321787] Fps is (10 sec: 16384.0, 60 sec: 15837.9, 300 sec: 16576.8). Total num frames: 1409024. Throughput: 0: 16014.4. Samples: 1415040. Policy #0 lag: (min: 41.0, avg: 71.9, max: 105.0) -[2026-06-07 02:11:56,011][321787] Avg episode reward: [(0, '8.595')] -[2026-06-07 02:11:56,016][324273] Saving new best policy, reward=8.595! -[2026-06-07 02:11:56,171][324535] Updated weights for policy 0, policy_version 2753 (0.0007) -[2026-06-07 02:11:56,378][324535] Updated weights for policy 0, policy_version 2764 (0.0007) -[2026-06-07 02:11:56,610][324535] Updated weights for policy 0, policy_version 2775 (0.0007) -[2026-06-07 02:11:56,832][324535] Updated weights for policy 0, policy_version 2786 (0.0007) -[2026-06-07 02:11:57,054][324535] Updated weights for policy 0, policy_version 2796 (0.0007) -[2026-06-07 02:11:57,274][324535] Updated weights for policy 0, policy_version 2807 (0.0007) -[2026-06-07 02:11:58,276][324535] Updated weights for policy 0, policy_version 2818 (0.0007) -[2026-06-07 02:11:58,499][324535] Updated weights for policy 0, policy_version 2829 (0.0007) -[2026-06-07 02:11:58,715][324535] Updated weights for policy 0, policy_version 2839 (0.0007) -[2026-06-07 02:11:58,925][324535] Updated weights for policy 0, policy_version 2849 (0.0007) -[2026-06-07 02:11:59,125][324535] Updated weights for policy 0, policy_version 2859 (0.0007) -[2026-06-07 02:11:59,312][324535] Updated weights for policy 0, policy_version 2869 (0.0007) -[2026-06-07 02:11:59,547][324273] Early stopping after 8 epochs (64 sgd steps), loss delta 0.0000001 -[2026-06-07 02:11:59,548][324535] Updated weights for policy 0, policy_version 2880 (0.0007) -[2026-06-07 02:12:00,514][324535] Updated weights for policy 0, policy_version 2891 (0.0007) -[2026-06-07 02:12:00,779][324535] Updated weights for policy 0, policy_version 2903 (0.0007) -[2026-06-07 02:12:00,975][324535] Updated weights for policy 0, policy_version 2913 (0.0006) -[2026-06-07 02:12:01,010][321787] Fps is (10 sec: 16384.0, 60 sec: 15837.9, 300 sec: 16384.0). Total num frames: 1474560. Throughput: 0: 16039.8. Samples: 1514240. Policy #0 lag: (min: 41.0, avg: 71.9, max: 105.0) -[2026-06-07 02:12:01,011][321787] Avg episode reward: [(0, '8.778')] -[2026-06-07 02:12:01,187][324535] Updated weights for policy 0, policy_version 2923 (0.0006) -[2026-06-07 02:12:01,395][324535] Updated weights for policy 0, policy_version 2933 (0.0007) -[2026-06-07 02:12:01,616][324535] Updated weights for policy 0, policy_version 2943 (0.0007) -[2026-06-07 02:12:01,634][324273] Saving new best policy, reward=8.778! -[2026-06-07 02:12:02,504][324535] Updated weights for policy 0, policy_version 2953 (0.0007) -[2026-06-07 02:12:02,777][324535] Updated weights for policy 0, policy_version 2966 (0.0007) -[2026-06-07 02:12:02,984][324535] Updated weights for policy 0, policy_version 2976 (0.0007) -[2026-06-07 02:12:03,292][324535] Updated weights for policy 0, policy_version 2991 (0.0007) -[2026-06-07 02:12:03,518][324535] Updated weights for policy 0, policy_version 3002 (0.0007) -[2026-06-07 02:12:04,517][324535] Updated weights for policy 0, policy_version 3013 (0.0009) -[2026-06-07 02:12:04,719][324535] Updated weights for policy 0, policy_version 3023 (0.0011) -[2026-06-07 02:12:04,968][324535] Updated weights for policy 0, policy_version 3035 (0.0011) -[2026-06-07 02:12:05,152][324535] Updated weights for policy 0, policy_version 3045 (0.0011) -[2026-06-07 02:12:05,399][324535] Updated weights for policy 0, policy_version 3057 (0.0012) -[2026-06-07 02:12:05,627][324535] Updated weights for policy 0, policy_version 3068 (0.0012) -[2026-06-07 02:12:06,010][321787] Fps is (10 sec: 16384.0, 60 sec: 15837.9, 300 sec: 16556.5). Total num frames: 1572864. Throughput: 0: 16005.7. Samples: 1554944. Policy #0 lag: (min: 63.0, avg: 77.1, max: 127.0) -[2026-06-07 02:12:06,011][321787] Avg episode reward: [(0, '9.303')] -[2026-06-07 02:12:06,016][324273] Saving new best policy, reward=9.303! -[2026-06-07 02:12:06,627][324535] Updated weights for policy 0, policy_version 3078 (0.0010) -[2026-06-07 02:12:06,849][324535] Updated weights for policy 0, policy_version 3089 (0.0011) -[2026-06-07 02:12:07,064][324535] Updated weights for policy 0, policy_version 3100 (0.0009) -[2026-06-07 02:12:07,353][324535] Updated weights for policy 0, policy_version 3113 (0.0012) -[2026-06-07 02:12:07,558][324535] Updated weights for policy 0, policy_version 3123 (0.0011) -[2026-06-07 02:12:07,796][324535] Updated weights for policy 0, policy_version 3134 (0.0012) -[2026-06-07 02:12:08,731][324535] Updated weights for policy 0, policy_version 3144 (0.0008) -[2026-06-07 02:12:08,926][324535] Updated weights for policy 0, policy_version 3154 (0.0007) -[2026-06-07 02:12:09,151][324535] Updated weights for policy 0, policy_version 3164 (0.0007) -[2026-06-07 02:12:09,347][324535] Updated weights for policy 0, policy_version 3174 (0.0007) -[2026-06-07 02:12:09,571][324535] Updated weights for policy 0, policy_version 3185 (0.0009) -[2026-06-07 02:12:09,798][324535] Updated weights for policy 0, policy_version 3196 (0.0011) -[2026-06-07 02:12:10,782][324535] Updated weights for policy 0, policy_version 3207 (0.0008) -[2026-06-07 02:12:10,990][324535] Updated weights for policy 0, policy_version 3218 (0.0007) -[2026-06-07 02:12:11,010][321787] Fps is (10 sec: 16383.9, 60 sec: 15838.0, 300 sec: 16384.0). Total num frames: 1638400. Throughput: 0: 16034.1. Samples: 1652608. Policy #0 lag: (min: 63.0, avg: 76.6, max: 127.0) -[2026-06-07 02:12:11,011][321787] Avg episode reward: [(0, '11.247')] -[2026-06-07 02:12:11,215][324535] Updated weights for policy 0, policy_version 3229 (0.0007) -[2026-06-07 02:12:11,425][324535] Updated weights for policy 0, policy_version 3240 (0.0007) -[2026-06-07 02:12:11,655][324535] Updated weights for policy 0, policy_version 3251 (0.0007) -[2026-06-07 02:12:11,921][324535] Updated weights for policy 0, policy_version 3262 (0.0007) -[2026-06-07 02:12:11,946][324273] Saving new best policy, reward=11.247! -[2026-06-07 02:12:12,870][324535] Updated weights for policy 0, policy_version 3272 (0.0007) -[2026-06-07 02:12:13,101][324535] Updated weights for policy 0, policy_version 3282 (0.0007) -[2026-06-07 02:12:13,292][324535] Updated weights for policy 0, policy_version 3292 (0.0010) -[2026-06-07 02:12:13,553][324535] Updated weights for policy 0, policy_version 3304 (0.0010) -[2026-06-07 02:12:13,754][324535] Updated weights for policy 0, policy_version 3315 (0.0007) -[2026-06-07 02:12:13,969][324535] Updated weights for policy 0, policy_version 3325 (0.0007) -[2026-06-07 02:12:14,870][324535] Updated weights for policy 0, policy_version 3335 (0.0007) -[2026-06-07 02:12:15,106][324535] Updated weights for policy 0, policy_version 3347 (0.0007) -[2026-06-07 02:12:15,330][324535] Updated weights for policy 0, policy_version 3358 (0.0007) -[2026-06-07 02:12:15,516][324535] Updated weights for policy 0, policy_version 3368 (0.0007) -[2026-06-07 02:12:15,701][324535] Updated weights for policy 0, policy_version 3378 (0.0007) -[2026-06-07 02:12:15,925][324535] Updated weights for policy 0, policy_version 3389 (0.0007) -[2026-06-07 02:12:16,010][321787] Fps is (10 sec: 16384.0, 60 sec: 16384.0, 300 sec: 16540.1). Total num frames: 1736704. Throughput: 0: 16039.9. Samples: 1750016. Policy #0 lag: (min: 52.0, avg: 63.4, max: 116.0) -[2026-06-07 02:12:16,011][321787] Avg episode reward: [(0, '11.827')] -[2026-06-07 02:12:16,017][324273] Saving new best policy, reward=11.827! -[2026-06-07 02:12:16,912][324535] Updated weights for policy 0, policy_version 3401 (0.0009) -[2026-06-07 02:12:17,149][324535] Updated weights for policy 0, policy_version 3412 (0.0007) -[2026-06-07 02:12:17,384][324535] Updated weights for policy 0, policy_version 3424 (0.0007) -[2026-06-07 02:12:17,603][324535] Updated weights for policy 0, policy_version 3434 (0.0007) -[2026-06-07 02:12:17,808][324535] Updated weights for policy 0, policy_version 3444 (0.0007) -[2026-06-07 02:12:18,761][324535] Updated weights for policy 0, policy_version 3457 (0.0007) -[2026-06-07 02:12:19,007][324535] Updated weights for policy 0, policy_version 3468 (0.0007) -[2026-06-07 02:12:19,219][324535] Updated weights for policy 0, policy_version 3478 (0.0007) -[2026-06-07 02:12:19,427][324535] Updated weights for policy 0, policy_version 3490 (0.0007) -[2026-06-07 02:12:19,671][324535] Updated weights for policy 0, policy_version 3501 (0.0007) -[2026-06-07 02:12:19,892][324535] Updated weights for policy 0, policy_version 3512 (0.0007) -[2026-06-07 02:12:20,842][324535] Updated weights for policy 0, policy_version 3523 (0.0007) -[2026-06-07 02:12:21,010][321787] Fps is (10 sec: 16384.1, 60 sec: 15837.9, 300 sec: 16384.0). Total num frames: 1802240. Throughput: 0: 15843.6. Samples: 1794304. Policy #0 lag: (min: 63.0, avg: 76.8, max: 127.0) -[2026-06-07 02:12:21,011][321787] Avg episode reward: [(0, '13.577')] -[2026-06-07 02:12:21,067][324535] Updated weights for policy 0, policy_version 3534 (0.0007) -[2026-06-07 02:12:21,274][324535] Updated weights for policy 0, policy_version 3544 (0.0007) -[2026-06-07 02:12:21,479][324535] Updated weights for policy 0, policy_version 3554 (0.0007) -[2026-06-07 02:12:21,675][324535] Updated weights for policy 0, policy_version 3564 (0.0007) -[2026-06-07 02:12:21,906][324535] Updated weights for policy 0, policy_version 3575 (0.0007) -[2026-06-07 02:12:22,078][324273] Saving new best policy, reward=13.577! -[2026-06-07 02:12:22,855][324535] Updated weights for policy 0, policy_version 3587 (0.0007) -[2026-06-07 02:12:23,070][324535] Updated weights for policy 0, policy_version 3597 (0.0007) -[2026-06-07 02:12:23,254][324535] Updated weights for policy 0, policy_version 3607 (0.0007) -[2026-06-07 02:12:23,462][324535] Updated weights for policy 0, policy_version 3617 (0.0007) -[2026-06-07 02:12:23,693][324535] Updated weights for policy 0, policy_version 3628 (0.0007) -[2026-06-07 02:12:23,891][324535] Updated weights for policy 0, policy_version 3638 (0.0007) -[2026-06-07 02:12:24,829][324535] Updated weights for policy 0, policy_version 3649 (0.0007) -[2026-06-07 02:12:25,052][324535] Updated weights for policy 0, policy_version 3659 (0.0007) -[2026-06-07 02:12:25,256][324535] Updated weights for policy 0, policy_version 3670 (0.0007) -[2026-06-07 02:12:25,442][324535] Updated weights for policy 0, policy_version 3680 (0.0007) -[2026-06-07 02:12:25,630][324535] Updated weights for policy 0, policy_version 3690 (0.0007) -[2026-06-07 02:12:25,838][324535] Updated weights for policy 0, policy_version 3700 (0.0007) -[2026-06-07 02:12:26,010][321787] Fps is (10 sec: 13107.2, 60 sec: 15837.9, 300 sec: 16241.6). Total num frames: 1867776. Throughput: 0: 15951.7. Samples: 1890560. Policy #0 lag: (min: 63.0, avg: 76.8, max: 127.0) -[2026-06-07 02:12:26,011][321787] Avg episode reward: [(0, '14.080')] -[2026-06-07 02:12:26,030][324535] Updated weights for policy 0, policy_version 3710 (0.0007) -[2026-06-07 02:12:26,062][324273] Saving new best policy, reward=14.080! -[2026-06-07 02:12:26,988][324535] Updated weights for policy 0, policy_version 3723 (0.0007) -[2026-06-07 02:12:27,218][324535] Updated weights for policy 0, policy_version 3734 (0.0007) -[2026-06-07 02:12:27,449][324535] Updated weights for policy 0, policy_version 3745 (0.0007) -[2026-06-07 02:12:27,664][324535] Updated weights for policy 0, policy_version 3756 (0.0007) -[2026-06-07 02:12:27,879][324535] Updated weights for policy 0, policy_version 3767 (0.0007) -[2026-06-07 02:12:28,751][324535] Updated weights for policy 0, policy_version 3777 (0.0007) -[2026-06-07 02:12:28,965][324535] Updated weights for policy 0, policy_version 3787 (0.0007) -[2026-06-07 02:12:29,216][324535] Updated weights for policy 0, policy_version 3799 (0.0007) -[2026-06-07 02:12:29,429][324535] Updated weights for policy 0, policy_version 3809 (0.0007) -[2026-06-07 02:12:29,640][324535] Updated weights for policy 0, policy_version 3819 (0.0010) -[2026-06-07 02:12:29,895][324535] Updated weights for policy 0, policy_version 3830 (0.0011) -[2026-06-07 02:12:30,817][324535] Updated weights for policy 0, policy_version 3842 (0.0010) -[2026-06-07 02:12:31,010][321787] Fps is (10 sec: 16384.0, 60 sec: 15837.9, 300 sec: 16384.0). Total num frames: 1966080. Throughput: 0: 16116.6. Samples: 1988864. Policy #0 lag: (min: 1.0, avg: 41.9, max: 65.0) -[2026-06-07 02:12:31,011][321787] Avg episode reward: [(0, '18.084')] -[2026-06-07 02:12:31,025][324535] Updated weights for policy 0, policy_version 3853 (0.0007) -[2026-06-07 02:12:31,247][324535] Updated weights for policy 0, policy_version 3863 (0.0007) -[2026-06-07 02:12:31,483][324535] Updated weights for policy 0, policy_version 3874 (0.0007) -[2026-06-07 02:12:31,699][324535] Updated weights for policy 0, policy_version 3885 (0.0007) -[2026-06-07 02:12:31,893][324535] Updated weights for policy 0, policy_version 3895 (0.0007) -[2026-06-07 02:12:32,077][324273] Saving new best policy, reward=18.084! -[2026-06-07 02:12:32,837][324535] Updated weights for policy 0, policy_version 3906 (0.0008) -[2026-06-07 02:12:33,062][324535] Updated weights for policy 0, policy_version 3917 (0.0008) -[2026-06-07 02:12:33,309][324535] Updated weights for policy 0, policy_version 3929 (0.0011) -[2026-06-07 02:12:33,517][324535] Updated weights for policy 0, policy_version 3939 (0.0009) -[2026-06-07 02:12:33,737][324535] Updated weights for policy 0, policy_version 3950 (0.0008) -[2026-06-07 02:12:33,973][324535] Updated weights for policy 0, policy_version 3961 (0.0011) -[2026-06-07 02:12:34,888][324535] Updated weights for policy 0, policy_version 3972 (0.0008) -[2026-06-07 02:12:35,107][324535] Updated weights for policy 0, policy_version 3983 (0.0007) -[2026-06-07 02:12:35,330][324535] Updated weights for policy 0, policy_version 3995 (0.0007) -[2026-06-07 02:12:35,553][324535] Updated weights for policy 0, policy_version 4006 (0.0007) -[2026-06-07 02:12:35,810][324535] Updated weights for policy 0, policy_version 4018 (0.0007) -[2026-06-07 02:12:35,996][324535] Updated weights for policy 0, policy_version 4028 (0.0007) -[2026-06-07 02:12:36,010][321787] Fps is (10 sec: 16384.0, 60 sec: 15837.9, 300 sec: 16253.0). Total num frames: 2031616. Throughput: 0: 16105.2. Samples: 2043392. Policy #0 lag: (min: 63.0, avg: 75.6, max: 127.0) -[2026-06-07 02:12:36,011][321787] Avg episode reward: [(0, '19.128')] -[2026-06-07 02:12:36,068][324273] Saving new best policy, reward=19.128! -[2026-06-07 02:12:36,955][324535] Updated weights for policy 0, policy_version 4038 (0.0007) -[2026-06-07 02:12:37,160][324535] Updated weights for policy 0, policy_version 4048 (0.0007) -[2026-06-07 02:12:37,352][324535] Updated weights for policy 0, policy_version 4058 (0.0006) -[2026-06-07 02:12:37,582][324535] Updated weights for policy 0, policy_version 4069 (0.0007) -[2026-06-07 02:12:37,780][324535] Updated weights for policy 0, policy_version 4080 (0.0007) -[2026-06-07 02:12:38,003][324535] Updated weights for policy 0, policy_version 4091 (0.0007) -[2026-06-07 02:12:38,891][324535] Updated weights for policy 0, policy_version 4101 (0.0007) -[2026-06-07 02:12:39,132][324535] Updated weights for policy 0, policy_version 4112 (0.0007) -[2026-06-07 02:12:39,338][324535] Updated weights for policy 0, policy_version 4122 (0.0007) -[2026-06-07 02:12:39,554][324535] Updated weights for policy 0, policy_version 4132 (0.0007) -[2026-06-07 02:12:39,769][324535] Updated weights for policy 0, policy_version 4142 (0.0007) -[2026-06-07 02:12:40,025][324535] Updated weights for policy 0, policy_version 4154 (0.0007) -[2026-06-07 02:12:40,892][324535] Updated weights for policy 0, policy_version 4164 (0.0007) -[2026-06-07 02:12:41,010][321787] Fps is (10 sec: 16384.0, 60 sec: 15837.9, 300 sec: 16384.0). Total num frames: 2129920. Throughput: 0: 16184.9. Samples: 2143360. Policy #0 lag: (min: 63.0, avg: 75.0, max: 127.0) -[2026-06-07 02:12:41,011][321787] Avg episode reward: [(0, '21.898')] -[2026-06-07 02:12:41,096][324535] Updated weights for policy 0, policy_version 4175 (0.0007) -[2026-06-07 02:12:41,328][324535] Updated weights for policy 0, policy_version 4186 (0.0007) -[2026-06-07 02:12:41,543][324535] Updated weights for policy 0, policy_version 4197 (0.0007) -[2026-06-07 02:12:41,783][324535] Updated weights for policy 0, policy_version 4209 (0.0007) -[2026-06-07 02:12:42,029][324535] Updated weights for policy 0, policy_version 4221 (0.0009) -[2026-06-07 02:12:42,071][324273] Saving new best policy, reward=21.898! -[2026-06-07 02:12:42,975][324535] Updated weights for policy 0, policy_version 4232 (0.0008) -[2026-06-07 02:12:43,216][324535] Updated weights for policy 0, policy_version 4243 (0.0007) -[2026-06-07 02:12:43,426][324535] Updated weights for policy 0, policy_version 4253 (0.0007) -[2026-06-07 02:12:43,656][324535] Updated weights for policy 0, policy_version 4263 (0.0007) -[2026-06-07 02:12:43,854][324535] Updated weights for policy 0, policy_version 4273 (0.0008) -[2026-06-07 02:12:44,082][324535] Updated weights for policy 0, policy_version 4284 (0.0007) -[2026-06-07 02:12:44,931][324535] Updated weights for policy 0, policy_version 4294 (0.0007) -[2026-06-07 02:12:45,133][324535] Updated weights for policy 0, policy_version 4304 (0.0006) -[2026-06-07 02:12:45,349][324535] Updated weights for policy 0, policy_version 4314 (0.0007) -[2026-06-07 02:12:45,597][324535] Updated weights for policy 0, policy_version 4326 (0.0011) -[2026-06-07 02:12:45,804][324535] Updated weights for policy 0, policy_version 4336 (0.0011) -[2026-06-07 02:12:46,010][321787] Fps is (10 sec: 16383.6, 60 sec: 15837.8, 300 sec: 16262.6). Total num frames: 2195456. Throughput: 0: 16159.2. Samples: 2241408. Policy #0 lag: (min: 27.0, avg: 72.6, max: 91.0) -[2026-06-07 02:12:46,012][321787] Avg episode reward: [(0, '25.830')] -[2026-06-07 02:12:46,018][324535] Updated weights for policy 0, policy_version 4346 (0.0010) -[2026-06-07 02:12:46,126][324273] Saving new best policy, reward=25.830! -[2026-06-07 02:12:46,913][324535] Updated weights for policy 0, policy_version 4358 (0.0011) -[2026-06-07 02:12:47,164][324535] Updated weights for policy 0, policy_version 4370 (0.0011) -[2026-06-07 02:12:47,358][324535] Updated weights for policy 0, policy_version 4380 (0.0011) -[2026-06-07 02:12:47,556][324535] Updated weights for policy 0, policy_version 4390 (0.0011) -[2026-06-07 02:12:47,806][324535] Updated weights for policy 0, policy_version 4401 (0.0012) -[2026-06-07 02:12:47,998][324535] Updated weights for policy 0, policy_version 4411 (0.0011) -[2026-06-07 02:12:48,878][324535] Updated weights for policy 0, policy_version 4422 (0.0011) -[2026-06-07 02:12:49,088][324535] Updated weights for policy 0, policy_version 4432 (0.0008) -[2026-06-07 02:12:49,284][324535] Updated weights for policy 0, policy_version 4442 (0.0007) -[2026-06-07 02:12:49,476][324535] Updated weights for policy 0, policy_version 4452 (0.0007) -[2026-06-07 02:12:49,678][324535] Updated weights for policy 0, policy_version 4462 (0.0007) -[2026-06-07 02:12:49,884][324535] Updated weights for policy 0, policy_version 4472 (0.0007) -[2026-06-07 02:12:50,721][324535] Updated weights for policy 0, policy_version 4482 (0.0007) -[2026-06-07 02:12:50,937][324535] Updated weights for policy 0, policy_version 4492 (0.0007) -[2026-06-07 02:12:51,010][321787] Fps is (10 sec: 16384.0, 60 sec: 16384.0, 300 sec: 16384.0). Total num frames: 2293760. Throughput: 0: 16219.0. Samples: 2284800. Policy #0 lag: (min: 31.0, avg: 45.2, max: 95.0) -[2026-06-07 02:12:51,011][321787] Avg episode reward: [(0, '28.776')] -[2026-06-07 02:12:51,167][324535] Updated weights for policy 0, policy_version 4503 (0.0007) -[2026-06-07 02:12:51,379][324535] Updated weights for policy 0, policy_version 4513 (0.0007) -[2026-06-07 02:12:51,583][324535] Updated weights for policy 0, policy_version 4524 (0.0007) -[2026-06-07 02:12:51,784][324535] Updated weights for policy 0, policy_version 4534 (0.0007) -[2026-06-07 02:12:51,999][324273] Saving new best policy, reward=28.776! -[2026-06-07 02:12:52,001][324535] Updated weights for policy 0, policy_version 4544 (0.0007) -[2026-06-07 02:12:52,906][324535] Updated weights for policy 0, policy_version 4555 (0.0007) -[2026-06-07 02:12:53,141][324535] Updated weights for policy 0, policy_version 4566 (0.0007) -[2026-06-07 02:12:53,367][324535] Updated weights for policy 0, policy_version 4577 (0.0007) -[2026-06-07 02:12:53,582][324535] Updated weights for policy 0, policy_version 4588 (0.0007) -[2026-06-07 02:12:53,784][324535] Updated weights for policy 0, policy_version 4598 (0.0007) -[2026-06-07 02:12:54,639][324535] Updated weights for policy 0, policy_version 4609 (0.0007) -[2026-06-07 02:12:54,843][324535] Updated weights for policy 0, policy_version 4619 (0.0007) -[2026-06-07 02:12:55,035][324535] Updated weights for policy 0, policy_version 4629 (0.0007) -[2026-06-07 02:12:55,231][324535] Updated weights for policy 0, policy_version 4639 (0.0007) -[2026-06-07 02:12:55,464][324535] Updated weights for policy 0, policy_version 4649 (0.0007) -[2026-06-07 02:12:55,663][324535] Updated weights for policy 0, policy_version 4659 (0.0007) -[2026-06-07 02:12:55,902][324535] Updated weights for policy 0, policy_version 4669 (0.0007) -[2026-06-07 02:12:56,010][321787] Fps is (10 sec: 19661.3, 60 sec: 16384.0, 300 sec: 16497.0). Total num frames: 2392064. Throughput: 0: 16341.4. Samples: 2387968. Policy #0 lag: (min: 42.0, avg: 85.8, max: 106.0) -[2026-06-07 02:12:56,011][321787] Avg episode reward: [(0, '37.061')] -[2026-06-07 02:12:56,017][324273] Saving new best policy, reward=37.061! -[2026-06-07 02:12:56,711][324535] Updated weights for policy 0, policy_version 4679 (0.0007) -[2026-06-07 02:12:56,925][324535] Updated weights for policy 0, policy_version 4689 (0.0007) -[2026-06-07 02:12:57,133][324535] Updated weights for policy 0, policy_version 4699 (0.0007) -[2026-06-07 02:12:57,356][324535] Updated weights for policy 0, policy_version 4709 (0.0007) -[2026-06-07 02:12:57,553][324535] Updated weights for policy 0, policy_version 4719 (0.0007) -[2026-06-07 02:12:57,760][324535] Updated weights for policy 0, policy_version 4729 (0.0007) -[2026-06-07 02:12:58,619][324535] Updated weights for policy 0, policy_version 4739 (0.0007) -[2026-06-07 02:12:58,826][324535] Updated weights for policy 0, policy_version 4749 (0.0007) -[2026-06-07 02:12:59,029][324535] Updated weights for policy 0, policy_version 4759 (0.0007) -[2026-06-07 02:12:59,240][324535] Updated weights for policy 0, policy_version 4769 (0.0007) -[2026-06-07 02:12:59,442][324535] Updated weights for policy 0, policy_version 4779 (0.0007) -[2026-06-07 02:12:59,662][324535] Updated weights for policy 0, policy_version 4790 (0.0007) -[2026-06-07 02:13:00,559][324535] Updated weights for policy 0, policy_version 4802 (0.0007) -[2026-06-07 02:13:00,769][324535] Updated weights for policy 0, policy_version 4812 (0.0007) -[2026-06-07 02:13:00,972][324535] Updated weights for policy 0, policy_version 4822 (0.0007) -[2026-06-07 02:13:01,010][321787] Fps is (10 sec: 16384.0, 60 sec: 16384.0, 300 sec: 16384.0). Total num frames: 2457600. Throughput: 0: 16483.6. Samples: 2491776. Policy #0 lag: (min: 42.0, avg: 85.8, max: 106.0) -[2026-06-07 02:13:01,011][321787] Avg episode reward: [(0, '37.961')] -[2026-06-07 02:13:01,186][324535] Updated weights for policy 0, policy_version 4832 (0.0007) -[2026-06-07 02:13:01,392][324535] Updated weights for policy 0, policy_version 4842 (0.0007) -[2026-06-07 02:13:01,615][324535] Updated weights for policy 0, policy_version 4852 (0.0007) -[2026-06-07 02:13:01,826][324535] Updated weights for policy 0, policy_version 4862 (0.0007) -[2026-06-07 02:13:01,866][324273] Saving new best policy, reward=37.961! -[2026-06-07 02:13:02,628][324535] Updated weights for policy 0, policy_version 4872 (0.0008) -[2026-06-07 02:13:02,838][324535] Updated weights for policy 0, policy_version 4882 (0.0010) -[2026-06-07 02:13:03,041][324535] Updated weights for policy 0, policy_version 4892 (0.0011) -[2026-06-07 02:13:03,264][324535] Updated weights for policy 0, policy_version 4903 (0.0010) -[2026-06-07 02:13:03,475][324535] Updated weights for policy 0, policy_version 4913 (0.0007) -[2026-06-07 02:13:03,677][324535] Updated weights for policy 0, policy_version 4923 (0.0007) -[2026-06-07 02:13:04,486][324535] Updated weights for policy 0, policy_version 4933 (0.0007) -[2026-06-07 02:13:04,682][324535] Updated weights for policy 0, policy_version 4943 (0.0007) -[2026-06-07 02:13:04,925][324535] Updated weights for policy 0, policy_version 4954 (0.0007) -[2026-06-07 02:13:05,135][324535] Updated weights for policy 0, policy_version 4964 (0.0007) -[2026-06-07 02:13:05,332][324535] Updated weights for policy 0, policy_version 4974 (0.0006) -[2026-06-07 02:13:05,573][324535] Updated weights for policy 0, policy_version 4985 (0.0006) -[2026-06-07 02:13:06,010][321787] Fps is (10 sec: 16384.1, 60 sec: 16384.0, 300 sec: 16489.7). Total num frames: 2555904. Throughput: 0: 16580.3. Samples: 2540416. Policy #0 lag: (min: 63.0, avg: 77.4, max: 127.0) -[2026-06-07 02:13:06,011][321787] Avg episode reward: [(0, '47.094')] -[2026-06-07 02:13:06,015][324273] Saving new best policy, reward=46.550! -[2026-06-07 02:13:06,369][324535] Updated weights for policy 0, policy_version 4995 (0.0010) -[2026-06-07 02:13:06,589][324535] Updated weights for policy 0, policy_version 5006 (0.0011) -[2026-06-07 02:13:06,805][324535] Updated weights for policy 0, policy_version 5016 (0.0011) -[2026-06-07 02:13:07,013][324535] Updated weights for policy 0, policy_version 5026 (0.0011) -[2026-06-07 02:13:07,215][324535] Updated weights for policy 0, policy_version 5036 (0.0011) -[2026-06-07 02:13:07,415][324535] Updated weights for policy 0, policy_version 5046 (0.0011) -[2026-06-07 02:13:07,620][324535] Updated weights for policy 0, policy_version 5056 (0.0011) -[2026-06-07 02:13:08,477][324535] Updated weights for policy 0, policy_version 5067 (0.0007) -[2026-06-07 02:13:08,671][324535] Updated weights for policy 0, policy_version 5077 (0.0007) -[2026-06-07 02:13:08,873][324535] Updated weights for policy 0, policy_version 5087 (0.0007) -[2026-06-07 02:13:09,075][324535] Updated weights for policy 0, policy_version 5097 (0.0007) -[2026-06-07 02:13:09,319][324535] Updated weights for policy 0, policy_version 5108 (0.0007) -[2026-06-07 02:13:09,518][324535] Updated weights for policy 0, policy_version 5118 (0.0007) -[2026-06-07 02:13:10,328][324535] Updated weights for policy 0, policy_version 5128 (0.0010) -[2026-06-07 02:13:10,557][324535] Updated weights for policy 0, policy_version 5139 (0.0011) -[2026-06-07 02:13:10,762][324535] Updated weights for policy 0, policy_version 5149 (0.0010) -[2026-06-07 02:13:10,963][324535] Updated weights for policy 0, policy_version 5159 (0.0007) -[2026-06-07 02:13:11,010][321787] Fps is (10 sec: 16383.7, 60 sec: 16384.0, 300 sec: 16384.0). Total num frames: 2621440. Throughput: 0: 16685.4. Samples: 2641408. Policy #0 lag: (min: 63.0, avg: 76.7, max: 127.0) -[2026-06-07 02:13:11,011][321787] Avg episode reward: [(0, '45.285')] -[2026-06-07 02:13:11,195][324535] Updated weights for policy 0, policy_version 5170 (0.0007) -[2026-06-07 02:13:11,399][324535] Updated weights for policy 0, policy_version 5181 (0.0009) -[2026-06-07 02:13:12,209][324535] Updated weights for policy 0, policy_version 5191 (0.0011) -[2026-06-07 02:13:12,413][324535] Updated weights for policy 0, policy_version 5201 (0.0010) -[2026-06-07 02:13:12,618][324535] Updated weights for policy 0, policy_version 5211 (0.0011) -[2026-06-07 02:13:12,823][324535] Updated weights for policy 0, policy_version 5221 (0.0011) -[2026-06-07 02:13:13,027][324535] Updated weights for policy 0, policy_version 5231 (0.0007) -[2026-06-07 02:13:13,235][324535] Updated weights for policy 0, policy_version 5241 (0.0007) -[2026-06-07 02:13:14,033][324535] Updated weights for policy 0, policy_version 5251 (0.0007) -[2026-06-07 02:13:14,253][324535] Updated weights for policy 0, policy_version 5262 (0.0007) -[2026-06-07 02:13:14,470][324535] Updated weights for policy 0, policy_version 5272 (0.0008) -[2026-06-07 02:13:14,669][324535] Updated weights for policy 0, policy_version 5282 (0.0010) -[2026-06-07 02:13:14,863][324535] Updated weights for policy 0, policy_version 5292 (0.0010) -[2026-06-07 02:13:15,063][324535] Updated weights for policy 0, policy_version 5302 (0.0010) -[2026-06-07 02:13:15,243][324535] Updated weights for policy 0, policy_version 5312 (0.0010) -[2026-06-07 02:13:16,010][321787] Fps is (10 sec: 16383.9, 60 sec: 16384.0, 300 sec: 16483.3). Total num frames: 2719744. Throughput: 0: 16742.4. Samples: 2742272. Policy #0 lag: (min: 63.0, avg: 76.7, max: 127.0) -[2026-06-07 02:13:16,011][321787] Avg episode reward: [(0, '65.997')] -[2026-06-07 02:13:16,077][324535] Updated weights for policy 0, policy_version 5322 (0.0011) -[2026-06-07 02:13:16,284][324535] Updated weights for policy 0, policy_version 5332 (0.0011) -[2026-06-07 02:13:16,507][324535] Updated weights for policy 0, policy_version 5342 (0.0011) -[2026-06-07 02:13:16,705][324535] Updated weights for policy 0, policy_version 5352 (0.0011) -[2026-06-07 02:13:16,909][324535] Updated weights for policy 0, policy_version 5362 (0.0011) -[2026-06-07 02:13:17,118][324535] Updated weights for policy 0, policy_version 5372 (0.0010) -[2026-06-07 02:13:17,214][324273] Saving new best policy, reward=65.997! -[2026-06-07 02:13:17,943][324535] Updated weights for policy 0, policy_version 5382 (0.0007) -[2026-06-07 02:13:18,157][324535] Updated weights for policy 0, policy_version 5392 (0.0007) -[2026-06-07 02:13:18,385][324535] Updated weights for policy 0, policy_version 5402 (0.0007) -[2026-06-07 02:13:18,583][324535] Updated weights for policy 0, policy_version 5412 (0.0007) -[2026-06-07 02:13:18,789][324535] Updated weights for policy 0, policy_version 5422 (0.0007) -[2026-06-07 02:13:18,996][324535] Updated weights for policy 0, policy_version 5432 (0.0007) -[2026-06-07 02:13:19,768][324535] Updated weights for policy 0, policy_version 5442 (0.0007) -[2026-06-07 02:13:19,970][324535] Updated weights for policy 0, policy_version 5452 (0.0011) -[2026-06-07 02:13:20,169][324535] Updated weights for policy 0, policy_version 5462 (0.0009) -[2026-06-07 02:13:20,371][324535] Updated weights for policy 0, policy_version 5472 (0.0009) -[2026-06-07 02:13:20,582][324535] Updated weights for policy 0, policy_version 5482 (0.0007) -[2026-06-07 02:13:20,798][324535] Updated weights for policy 0, policy_version 5493 (0.0007) -[2026-06-07 02:13:21,010][321787] Fps is (10 sec: 16384.3, 60 sec: 16384.0, 300 sec: 16384.0). Total num frames: 2785280. Throughput: 0: 16833.4. Samples: 2800896. Policy #0 lag: (min: 63.0, avg: 76.9, max: 127.0) -[2026-06-07 02:13:21,011][321787] Avg episode reward: [(0, '70.819')] -[2026-06-07 02:13:21,023][324273] Saving new best policy, reward=70.819! -[2026-06-07 02:13:21,678][324535] Updated weights for policy 0, policy_version 5505 (0.0007) -[2026-06-07 02:13:21,908][324535] Updated weights for policy 0, policy_version 5516 (0.0011) -[2026-06-07 02:13:22,127][324535] Updated weights for policy 0, policy_version 5526 (0.0007) -[2026-06-07 02:13:22,333][324535] Updated weights for policy 0, policy_version 5536 (0.0007) -[2026-06-07 02:13:22,549][324535] Updated weights for policy 0, policy_version 5546 (0.0007) -[2026-06-07 02:13:22,739][324535] Updated weights for policy 0, policy_version 5556 (0.0007) -[2026-06-07 02:13:22,956][324535] Updated weights for policy 0, policy_version 5566 (0.0006) -[2026-06-07 02:13:23,739][324535] Updated weights for policy 0, policy_version 5576 (0.0007) -[2026-06-07 02:13:23,944][324535] Updated weights for policy 0, policy_version 5586 (0.0007) -[2026-06-07 02:13:24,182][324535] Updated weights for policy 0, policy_version 5597 (0.0007) -[2026-06-07 02:13:24,421][324535] Updated weights for policy 0, policy_version 5608 (0.0007) -[2026-06-07 02:13:24,620][324535] Updated weights for policy 0, policy_version 5618 (0.0007) -[2026-06-07 02:13:24,836][324535] Updated weights for policy 0, policy_version 5628 (0.0007) -[2026-06-07 02:13:25,669][324535] Updated weights for policy 0, policy_version 5640 (0.0007) -[2026-06-07 02:13:25,875][324535] Updated weights for policy 0, policy_version 5650 (0.0007) -[2026-06-07 02:13:26,010][321787] Fps is (10 sec: 16384.0, 60 sec: 16930.1, 300 sec: 16477.6). Total num frames: 2883584. Throughput: 0: 16850.5. Samples: 2901632. Policy #0 lag: (min: 63.0, avg: 77.1, max: 127.0) -[2026-06-07 02:13:26,011][321787] Avg episode reward: [(0, '65.716')] -[2026-06-07 02:13:26,129][324535] Updated weights for policy 0, policy_version 5662 (0.0007) -[2026-06-07 02:13:26,322][324535] Updated weights for policy 0, policy_version 5672 (0.0007) -[2026-06-07 02:13:26,524][324535] Updated weights for policy 0, policy_version 5682 (0.0007) -[2026-06-07 02:13:26,740][324535] Updated weights for policy 0, policy_version 5693 (0.0007) -[2026-06-07 02:13:27,577][324535] Updated weights for policy 0, policy_version 5704 (0.0009) -[2026-06-07 02:13:27,792][324535] Updated weights for policy 0, policy_version 5714 (0.0011) -[2026-06-07 02:13:27,990][324535] Updated weights for policy 0, policy_version 5724 (0.0008) -[2026-06-07 02:13:28,185][324535] Updated weights for policy 0, policy_version 5734 (0.0009) -[2026-06-07 02:13:28,430][324535] Updated weights for policy 0, policy_version 5746 (0.0011) -[2026-06-07 02:13:28,661][324535] Updated weights for policy 0, policy_version 5757 (0.0011) -[2026-06-07 02:13:29,457][324535] Updated weights for policy 0, policy_version 5768 (0.0008) -[2026-06-07 02:13:29,677][324535] Updated weights for policy 0, policy_version 5778 (0.0007) -[2026-06-07 02:13:29,858][324535] Updated weights for policy 0, policy_version 5788 (0.0007) -[2026-06-07 02:13:30,069][324535] Updated weights for policy 0, policy_version 5798 (0.0007) -[2026-06-07 02:13:30,254][324535] Updated weights for policy 0, policy_version 5808 (0.0007) -[2026-06-07 02:13:30,459][324535] Updated weights for policy 0, policy_version 5818 (0.0007) -[2026-06-07 02:13:31,010][321787] Fps is (10 sec: 19660.8, 60 sec: 16930.1, 300 sec: 16566.1). Total num frames: 2981888. Throughput: 0: 16901.8. Samples: 3001984. Policy #0 lag: (min: 63.0, avg: 77.1, max: 127.0) -[2026-06-07 02:13:31,011][321787] Avg episode reward: [(0, '79.682')] -[2026-06-07 02:13:31,017][324273] Saving new best policy, reward=79.682! -[2026-06-07 02:13:31,279][324535] Updated weights for policy 0, policy_version 5828 (0.0007) -[2026-06-07 02:13:31,503][324535] Updated weights for policy 0, policy_version 5838 (0.0007) -[2026-06-07 02:13:31,716][324535] Updated weights for policy 0, policy_version 5848 (0.0007) -[2026-06-07 02:13:31,954][324535] Updated weights for policy 0, policy_version 5859 (0.0007) -[2026-06-07 02:13:32,154][324535] Updated weights for policy 0, policy_version 5869 (0.0007) -[2026-06-07 02:13:32,371][324535] Updated weights for policy 0, policy_version 5879 (0.0007) -[2026-06-07 02:13:33,134][324535] Updated weights for policy 0, policy_version 5889 (0.0007) -[2026-06-07 02:13:33,362][324535] Updated weights for policy 0, policy_version 5899 (0.0007) -[2026-06-07 02:13:33,608][324535] Updated weights for policy 0, policy_version 5911 (0.0007) -[2026-06-07 02:13:33,850][324535] Updated weights for policy 0, policy_version 5922 (0.0011) -[2026-06-07 02:13:34,045][324535] Updated weights for policy 0, policy_version 5932 (0.0011) -[2026-06-07 02:13:34,268][324535] Updated weights for policy 0, policy_version 5943 (0.0007) -[2026-06-07 02:13:35,053][324535] Updated weights for policy 0, policy_version 5953 (0.0007) -[2026-06-07 02:13:35,258][324535] Updated weights for policy 0, policy_version 5963 (0.0007) -[2026-06-07 02:13:35,473][324535] Updated weights for policy 0, policy_version 5973 (0.0007) -[2026-06-07 02:13:35,672][324535] Updated weights for policy 0, policy_version 5983 (0.0007) -[2026-06-07 02:13:35,945][324535] Updated weights for policy 0, policy_version 5995 (0.0007) -[2026-06-07 02:13:36,010][321787] Fps is (10 sec: 16384.1, 60 sec: 16930.2, 300 sec: 16472.6). Total num frames: 3047424. Throughput: 0: 17211.8. Samples: 3059328. Policy #0 lag: (min: 63.0, avg: 77.4, max: 127.0) -[2026-06-07 02:13:36,011][321787] Avg episode reward: [(0, '78.132')] -[2026-06-07 02:13:36,161][324535] Updated weights for policy 0, policy_version 6005 (0.0007) -[2026-06-07 02:13:36,365][324535] Updated weights for policy 0, policy_version 6015 (0.0007) -[2026-06-07 02:13:37,175][324535] Updated weights for policy 0, policy_version 6025 (0.0007) -[2026-06-07 02:13:37,403][324535] Updated weights for policy 0, policy_version 6037 (0.0007) -[2026-06-07 02:13:37,618][324535] Updated weights for policy 0, policy_version 6047 (0.0007) -[2026-06-07 02:13:37,836][324535] Updated weights for policy 0, policy_version 6058 (0.0007) -[2026-06-07 02:13:38,053][324535] Updated weights for policy 0, policy_version 6069 (0.0007) -[2026-06-07 02:13:38,257][324535] Updated weights for policy 0, policy_version 6079 (0.0007) -[2026-06-07 02:13:39,058][324535] Updated weights for policy 0, policy_version 6089 (0.0007) -[2026-06-07 02:13:39,261][324535] Updated weights for policy 0, policy_version 6099 (0.0008) -[2026-06-07 02:13:39,465][324535] Updated weights for policy 0, policy_version 6109 (0.0007) -[2026-06-07 02:13:39,662][324535] Updated weights for policy 0, policy_version 6119 (0.0007) -[2026-06-07 02:13:39,867][324535] Updated weights for policy 0, policy_version 6129 (0.0007) -[2026-06-07 02:13:40,080][324535] Updated weights for policy 0, policy_version 6139 (0.0008) -[2026-06-07 02:13:40,841][324535] Updated weights for policy 0, policy_version 6149 (0.0007) -[2026-06-07 02:13:41,010][321787] Fps is (10 sec: 16384.0, 60 sec: 16930.1, 300 sec: 16556.5). Total num frames: 3145728. Throughput: 0: 17163.4. Samples: 3160320. Policy #0 lag: (min: 20.0, avg: 64.1, max: 78.0) -[2026-06-07 02:13:41,011][321787] Avg episode reward: [(0, '74.984')] -[2026-06-07 02:13:41,050][324535] Updated weights for policy 0, policy_version 6160 (0.0007) -[2026-06-07 02:13:41,268][324535] Updated weights for policy 0, policy_version 6170 (0.0007) -[2026-06-07 02:13:41,495][324535] Updated weights for policy 0, policy_version 6182 (0.0006) -[2026-06-07 02:13:41,690][324535] Updated weights for policy 0, policy_version 6192 (0.0007) -[2026-06-07 02:13:41,899][324535] Updated weights for policy 0, policy_version 6202 (0.0007) -[2026-06-07 02:13:42,731][324535] Updated weights for policy 0, policy_version 6213 (0.0007) -[2026-06-07 02:13:42,998][324535] Updated weights for policy 0, policy_version 6225 (0.0007) -[2026-06-07 02:13:43,209][324535] Updated weights for policy 0, policy_version 6235 (0.0007) -[2026-06-07 02:13:43,462][324535] Updated weights for policy 0, policy_version 6247 (0.0007) -[2026-06-07 02:13:43,677][324535] Updated weights for policy 0, policy_version 6257 (0.0007) -[2026-06-07 02:13:43,882][324535] Updated weights for policy 0, policy_version 6267 (0.0006) -[2026-06-07 02:13:44,677][324535] Updated weights for policy 0, policy_version 6277 (0.0007) -[2026-06-07 02:13:44,863][324535] Updated weights for policy 0, policy_version 6287 (0.0007) -[2026-06-07 02:13:45,082][324535] Updated weights for policy 0, policy_version 6297 (0.0007) -[2026-06-07 02:13:45,336][324535] Updated weights for policy 0, policy_version 6309 (0.0007) -[2026-06-07 02:13:45,565][324535] Updated weights for policy 0, policy_version 6320 (0.0007) -[2026-06-07 02:13:45,773][324535] Updated weights for policy 0, policy_version 6330 (0.0007) -[2026-06-07 02:13:46,010][321787] Fps is (10 sec: 19660.6, 60 sec: 17476.3, 300 sec: 16636.1). Total num frames: 3244032. Throughput: 0: 17117.9. Samples: 3262080. Policy #0 lag: (min: 20.0, avg: 64.1, max: 78.0) -[2026-06-07 02:13:46,011][321787] Avg episode reward: [(0, '85.446')] -[2026-06-07 02:13:46,016][324273] Saving new best policy, reward=85.446! -[2026-06-07 02:13:46,591][324535] Updated weights for policy 0, policy_version 6340 (0.0007) -[2026-06-07 02:13:46,816][324535] Updated weights for policy 0, policy_version 6350 (0.0007) -[2026-06-07 02:13:47,032][324535] Updated weights for policy 0, policy_version 6360 (0.0007) -[2026-06-07 02:13:47,246][324535] Updated weights for policy 0, policy_version 6370 (0.0007) -[2026-06-07 02:13:47,453][324535] Updated weights for policy 0, policy_version 6380 (0.0007) -[2026-06-07 02:13:47,674][324535] Updated weights for policy 0, policy_version 6390 (0.0007) -[2026-06-07 02:13:47,870][324535] Updated weights for policy 0, policy_version 6400 (0.0007) -[2026-06-07 02:13:48,630][324535] Updated weights for policy 0, policy_version 6410 (0.0007) -[2026-06-07 02:13:48,858][324535] Updated weights for policy 0, policy_version 6421 (0.0007) -[2026-06-07 02:13:49,071][324535] Updated weights for policy 0, policy_version 6431 (0.0007) -[2026-06-07 02:13:49,269][324535] Updated weights for policy 0, policy_version 6441 (0.0007) -[2026-06-07 02:13:49,499][324535] Updated weights for policy 0, policy_version 6452 (0.0007) -[2026-06-07 02:13:49,703][324535] Updated weights for policy 0, policy_version 6462 (0.0007) -[2026-06-07 02:13:50,457][324535] Updated weights for policy 0, policy_version 6472 (0.0007) -[2026-06-07 02:13:50,694][324535] Updated weights for policy 0, policy_version 6483 (0.0007) -[2026-06-07 02:13:50,892][324535] Updated weights for policy 0, policy_version 6493 (0.0007) -[2026-06-07 02:13:51,010][321787] Fps is (10 sec: 16383.9, 60 sec: 16930.1, 300 sec: 16547.8). Total num frames: 3309568. Throughput: 0: 17240.1. Samples: 3316224. Policy #0 lag: (min: 63.0, avg: 77.7, max: 127.0) -[2026-06-07 02:13:51,011][321787] Avg episode reward: [(0, '99.830')] -[2026-06-07 02:13:51,105][324535] Updated weights for policy 0, policy_version 6503 (0.0007) -[2026-06-07 02:13:51,324][324535] Updated weights for policy 0, policy_version 6513 (0.0007) -[2026-06-07 02:13:51,526][324535] Updated weights for policy 0, policy_version 6523 (0.0007) -[2026-06-07 02:13:51,614][324273] Saving new best policy, reward=99.830! -[2026-06-07 02:13:52,298][324535] Updated weights for policy 0, policy_version 6533 (0.0007) -[2026-06-07 02:13:52,495][324535] Updated weights for policy 0, policy_version 6543 (0.0007) -[2026-06-07 02:13:52,709][324535] Updated weights for policy 0, policy_version 6553 (0.0007) -[2026-06-07 02:13:52,913][324535] Updated weights for policy 0, policy_version 6563 (0.0007) -[2026-06-07 02:13:53,106][324535] Updated weights for policy 0, policy_version 6573 (0.0007) -[2026-06-07 02:13:53,303][324535] Updated weights for policy 0, policy_version 6583 (0.0007) -[2026-06-07 02:13:54,090][324535] Updated weights for policy 0, policy_version 6593 (0.0007) -[2026-06-07 02:13:54,320][324535] Updated weights for policy 0, policy_version 6604 (0.0007) -[2026-06-07 02:13:54,523][324535] Updated weights for policy 0, policy_version 6614 (0.0007) -[2026-06-07 02:13:54,771][324535] Updated weights for policy 0, policy_version 6625 (0.0007) -[2026-06-07 02:13:54,973][324535] Updated weights for policy 0, policy_version 6635 (0.0007) -[2026-06-07 02:13:55,176][324535] Updated weights for policy 0, policy_version 6645 (0.0007) -[2026-06-07 02:13:55,383][324535] Updated weights for policy 0, policy_version 6655 (0.0007) -[2026-06-07 02:13:56,010][321787] Fps is (10 sec: 16384.0, 60 sec: 16930.1, 300 sec: 16623.8). Total num frames: 3407872. Throughput: 0: 17305.7. Samples: 3420160. Policy #0 lag: (min: 10.0, avg: 54.8, max: 79.0) -[2026-06-07 02:13:56,011][321787] Avg episode reward: [(0, '101.606')] -[2026-06-07 02:13:56,159][324535] Updated weights for policy 0, policy_version 6665 (0.0007) -[2026-06-07 02:13:56,387][324535] Updated weights for policy 0, policy_version 6675 (0.0007) -[2026-06-07 02:13:56,618][324535] Updated weights for policy 0, policy_version 6686 (0.0007) -[2026-06-07 02:13:56,832][324535] Updated weights for policy 0, policy_version 6696 (0.0007) -[2026-06-07 02:13:57,039][324535] Updated weights for policy 0, policy_version 6706 (0.0007) -[2026-06-07 02:13:57,237][324535] Updated weights for policy 0, policy_version 6716 (0.0007) -[2026-06-07 02:13:57,325][324273] Saving new best policy, reward=101.606! -[2026-06-07 02:13:58,043][324535] Updated weights for policy 0, policy_version 6726 (0.0007) -[2026-06-07 02:13:58,235][324535] Updated weights for policy 0, policy_version 6736 (0.0007) -[2026-06-07 02:13:58,465][324535] Updated weights for policy 0, policy_version 6747 (0.0007) -[2026-06-07 02:13:58,673][324535] Updated weights for policy 0, policy_version 6757 (0.0007) -[2026-06-07 02:13:58,880][324535] Updated weights for policy 0, policy_version 6767 (0.0007) -[2026-06-07 02:13:59,073][324535] Updated weights for policy 0, policy_version 6777 (0.0007) -[2026-06-07 02:13:59,834][324535] Updated weights for policy 0, policy_version 6787 (0.0007) -[2026-06-07 02:14:00,039][324535] Updated weights for policy 0, policy_version 6797 (0.0007) -[2026-06-07 02:14:00,247][324535] Updated weights for policy 0, policy_version 6808 (0.0007) -[2026-06-07 02:14:00,462][324535] Updated weights for policy 0, policy_version 6818 (0.0007) -[2026-06-07 02:14:00,663][324535] Updated weights for policy 0, policy_version 6828 (0.0007) -[2026-06-07 02:14:00,874][324535] Updated weights for policy 0, policy_version 6838 (0.0007) -[2026-06-07 02:14:01,011][321787] Fps is (10 sec: 16383.3, 60 sec: 16930.0, 300 sec: 16540.0). Total num frames: 3473408. Throughput: 0: 17319.6. Samples: 3521664. Policy #0 lag: (min: 10.0, avg: 54.8, max: 79.0) -[2026-06-07 02:14:01,011][321787] Avg episode reward: [(0, '108.056')] -[2026-06-07 02:14:01,076][324273] Saving new best policy, reward=108.056! -[2026-06-07 02:14:01,079][324535] Updated weights for policy 0, policy_version 6848 (0.0007) -[2026-06-07 02:14:01,852][324535] Updated weights for policy 0, policy_version 6858 (0.0007) -[2026-06-07 02:14:02,076][324535] Updated weights for policy 0, policy_version 6868 (0.0007) -[2026-06-07 02:14:02,267][324535] Updated weights for policy 0, policy_version 6878 (0.0007) -[2026-06-07 02:14:02,463][324535] Updated weights for policy 0, policy_version 6888 (0.0007) -[2026-06-07 02:14:02,684][324535] Updated weights for policy 0, policy_version 6898 (0.0007) -[2026-06-07 02:14:02,911][324535] Updated weights for policy 0, policy_version 6908 (0.0007) -[2026-06-07 02:14:03,703][324535] Updated weights for policy 0, policy_version 6918 (0.0007) -[2026-06-07 02:14:03,886][324535] Updated weights for policy 0, policy_version 6928 (0.0007) -[2026-06-07 02:14:04,095][324535] Updated weights for policy 0, policy_version 6938 (0.0007) -[2026-06-07 02:14:04,303][324535] Updated weights for policy 0, policy_version 6948 (0.0007) -[2026-06-07 02:14:04,532][324535] Updated weights for policy 0, policy_version 6959 (0.0007) -[2026-06-07 02:14:04,738][324535] Updated weights for policy 0, policy_version 6969 (0.0007) -[2026-06-07 02:14:05,518][324535] Updated weights for policy 0, policy_version 6979 (0.0007) -[2026-06-07 02:14:05,719][324535] Updated weights for policy 0, policy_version 6989 (0.0006) -[2026-06-07 02:14:05,926][324535] Updated weights for policy 0, policy_version 6999 (0.0007) -[2026-06-07 02:14:06,010][321787] Fps is (10 sec: 16383.8, 60 sec: 16930.1, 300 sec: 16612.6). Total num frames: 3571712. Throughput: 0: 17117.8. Samples: 3571200. Policy #0 lag: (min: 52.0, avg: 66.5, max: 116.0) -[2026-06-07 02:14:06,011][321787] Avg episode reward: [(0, '102.920')] -[2026-06-07 02:14:06,133][324535] Updated weights for policy 0, policy_version 7009 (0.0007) -[2026-06-07 02:14:06,349][324535] Updated weights for policy 0, policy_version 7019 (0.0007) -[2026-06-07 02:14:06,564][324535] Updated weights for policy 0, policy_version 7029 (0.0007) -[2026-06-07 02:14:06,761][324535] Updated weights for policy 0, policy_version 7039 (0.0007) -[2026-06-07 02:14:07,522][324535] Updated weights for policy 0, policy_version 7049 (0.0007) -[2026-06-07 02:14:07,735][324535] Updated weights for policy 0, policy_version 7059 (0.0007) -[2026-06-07 02:14:07,930][324535] Updated weights for policy 0, policy_version 7069 (0.0007) -[2026-06-07 02:14:08,120][324535] Updated weights for policy 0, policy_version 7079 (0.0007) -[2026-06-07 02:14:08,321][324535] Updated weights for policy 0, policy_version 7089 (0.0007) -[2026-06-07 02:14:08,520][324535] Updated weights for policy 0, policy_version 7099 (0.0007) -[2026-06-07 02:14:09,312][324535] Updated weights for policy 0, policy_version 7109 (0.0007) -[2026-06-07 02:14:09,509][324535] Updated weights for policy 0, policy_version 7119 (0.0007) -[2026-06-07 02:14:09,718][324535] Updated weights for policy 0, policy_version 7129 (0.0007) -[2026-06-07 02:14:09,982][324535] Updated weights for policy 0, policy_version 7141 (0.0008) -[2026-06-07 02:14:10,216][324535] Updated weights for policy 0, policy_version 7151 (0.0007) -[2026-06-07 02:14:10,406][324535] Updated weights for policy 0, policy_version 7161 (0.0007) -[2026-06-07 02:14:11,010][321787] Fps is (10 sec: 19661.8, 60 sec: 17476.3, 300 sec: 16681.9). Total num frames: 3670016. Throughput: 0: 17345.4. Samples: 3682176. Policy #0 lag: (min: 37.0, avg: 51.5, max: 101.0) -[2026-06-07 02:14:11,011][321787] Avg episode reward: [(0, '112.924')] -[2026-06-07 02:14:11,182][324535] Updated weights for policy 0, policy_version 7171 (0.0009) -[2026-06-07 02:14:11,385][324535] Updated weights for policy 0, policy_version 7181 (0.0010) -[2026-06-07 02:14:11,593][324535] Updated weights for policy 0, policy_version 7191 (0.0010) -[2026-06-07 02:14:11,820][324535] Updated weights for policy 0, policy_version 7202 (0.0010) -[2026-06-07 02:14:12,043][324535] Updated weights for policy 0, policy_version 7212 (0.0010) -[2026-06-07 02:14:12,239][324535] Updated weights for policy 0, policy_version 7222 (0.0007) -[2026-06-07 02:14:12,446][324273] Saving new best policy, reward=112.924! -[2026-06-07 02:14:12,448][324535] Updated weights for policy 0, policy_version 7232 (0.0007) -[2026-06-07 02:14:13,207][324535] Updated weights for policy 0, policy_version 7242 (0.0007) -[2026-06-07 02:14:13,401][324535] Updated weights for policy 0, policy_version 7252 (0.0007) -[2026-06-07 02:14:13,611][324535] Updated weights for policy 0, policy_version 7262 (0.0006) -[2026-06-07 02:14:13,829][324535] Updated weights for policy 0, policy_version 7272 (0.0006) -[2026-06-07 02:14:14,026][324535] Updated weights for policy 0, policy_version 7282 (0.0006) -[2026-06-07 02:14:14,232][324535] Updated weights for policy 0, policy_version 7292 (0.0007) -[2026-06-07 02:14:15,045][324535] Updated weights for policy 0, policy_version 7302 (0.0007) -[2026-06-07 02:14:15,263][324535] Updated weights for policy 0, policy_version 7312 (0.0007) -[2026-06-07 02:14:15,462][324535] Updated weights for policy 0, policy_version 7322 (0.0007) -[2026-06-07 02:14:15,659][324535] Updated weights for policy 0, policy_version 7332 (0.0007) -[2026-06-07 02:14:15,857][324535] Updated weights for policy 0, policy_version 7342 (0.0007) -[2026-06-07 02:14:16,010][321787] Fps is (10 sec: 16384.2, 60 sec: 16930.1, 300 sec: 16602.5). Total num frames: 3735552. Throughput: 0: 17382.4. Samples: 3784192. Policy #0 lag: (min: 37.0, avg: 51.5, max: 101.0) -[2026-06-07 02:14:16,011][321787] Avg episode reward: [(0, '114.167')] -[2026-06-07 02:14:16,061][324535] Updated weights for policy 0, policy_version 7352 (0.0007) -[2026-06-07 02:14:16,230][324273] Saving new best policy, reward=114.167! -[2026-06-07 02:14:16,838][324535] Updated weights for policy 0, policy_version 7362 (0.0007) -[2026-06-07 02:14:17,046][324535] Updated weights for policy 0, policy_version 7372 (0.0007) -[2026-06-07 02:14:17,244][324535] Updated weights for policy 0, policy_version 7382 (0.0007) -[2026-06-07 02:14:17,478][324535] Updated weights for policy 0, policy_version 7392 (0.0007) -[2026-06-07 02:14:17,681][324535] Updated weights for policy 0, policy_version 7402 (0.0007) -[2026-06-07 02:14:17,917][324535] Updated weights for policy 0, policy_version 7413 (0.0007) -[2026-06-07 02:14:18,125][324535] Updated weights for policy 0, policy_version 7423 (0.0007) -[2026-06-07 02:14:18,873][324535] Updated weights for policy 0, policy_version 7433 (0.0011) -[2026-06-07 02:14:19,100][324535] Updated weights for policy 0, policy_version 7444 (0.0007) -[2026-06-07 02:14:19,321][324535] Updated weights for policy 0, policy_version 7454 (0.0007) -[2026-06-07 02:14:19,537][324535] Updated weights for policy 0, policy_version 7464 (0.0008) -[2026-06-07 02:14:19,754][324535] Updated weights for policy 0, policy_version 7475 (0.0007) -[2026-06-07 02:14:19,958][324535] Updated weights for policy 0, policy_version 7485 (0.0007) -[2026-06-07 02:14:20,750][324535] Updated weights for policy 0, policy_version 7495 (0.0007) -[2026-06-07 02:14:20,939][324535] Updated weights for policy 0, policy_version 7505 (0.0007) -[2026-06-07 02:14:21,010][321787] Fps is (10 sec: 16383.8, 60 sec: 17476.2, 300 sec: 16668.9). Total num frames: 3833856. Throughput: 0: 17134.9. Samples: 3830400. Policy #0 lag: (min: 31.0, avg: 46.3, max: 95.0) -[2026-06-07 02:14:21,011][321787] Avg episode reward: [(0, '123.616')] -[2026-06-07 02:14:21,146][324535] Updated weights for policy 0, policy_version 7515 (0.0007) -[2026-06-07 02:14:21,352][324535] Updated weights for policy 0, policy_version 7525 (0.0007) -[2026-06-07 02:14:21,580][324535] Updated weights for policy 0, policy_version 7535 (0.0007) -[2026-06-07 02:14:21,788][324535] Updated weights for policy 0, policy_version 7545 (0.0007) -[2026-06-07 02:14:21,922][324273] Saving new best policy, reward=123.616! -[2026-06-07 02:14:22,543][324535] Updated weights for policy 0, policy_version 7555 (0.0007) -[2026-06-07 02:14:22,764][324535] Updated weights for policy 0, policy_version 7565 (0.0007) -[2026-06-07 02:14:22,970][324535] Updated weights for policy 0, policy_version 7575 (0.0007) -[2026-06-07 02:14:23,164][324535] Updated weights for policy 0, policy_version 7585 (0.0007) -[2026-06-07 02:14:23,362][324535] Updated weights for policy 0, policy_version 7595 (0.0006) -[2026-06-07 02:14:23,556][324535] Updated weights for policy 0, policy_version 7605 (0.0007) -[2026-06-07 02:14:23,776][324535] Updated weights for policy 0, policy_version 7615 (0.0008) -[2026-06-07 02:14:24,602][324535] Updated weights for policy 0, policy_version 7626 (0.0007) -[2026-06-07 02:14:24,812][324535] Updated weights for policy 0, policy_version 7636 (0.0007) -[2026-06-07 02:14:25,013][324535] Updated weights for policy 0, policy_version 7646 (0.0007) -[2026-06-07 02:14:25,215][324535] Updated weights for policy 0, policy_version 7656 (0.0007) -[2026-06-07 02:14:25,456][324535] Updated weights for policy 0, policy_version 7667 (0.0007) -[2026-06-07 02:14:25,669][324535] Updated weights for policy 0, policy_version 7677 (0.0007) -[2026-06-07 02:14:26,010][321787] Fps is (10 sec: 19660.8, 60 sec: 17476.3, 300 sec: 16732.6). Total num frames: 3932160. Throughput: 0: 17356.8. Samples: 3941376. Policy #0 lag: (min: 63.0, avg: 77.7, max: 127.0) -[2026-06-07 02:14:26,011][321787] Avg episode reward: [(0, '124.916')] -[2026-06-07 02:14:26,018][324273] Saving new best policy, reward=124.916! -[2026-06-07 02:14:26,423][324535] Updated weights for policy 0, policy_version 7687 (0.0007) -[2026-06-07 02:14:26,636][324535] Updated weights for policy 0, policy_version 7697 (0.0007) -[2026-06-07 02:14:26,855][324535] Updated weights for policy 0, policy_version 7708 (0.0007) -[2026-06-07 02:14:27,077][324535] Updated weights for policy 0, policy_version 7718 (0.0007) -[2026-06-07 02:14:27,279][324535] Updated weights for policy 0, policy_version 7728 (0.0007) -[2026-06-07 02:14:27,465][324535] Updated weights for policy 0, policy_version 7738 (0.0007) -[2026-06-07 02:14:28,227][324535] Updated weights for policy 0, policy_version 7748 (0.0007) -[2026-06-07 02:14:28,472][324535] Updated weights for policy 0, policy_version 7760 (0.0009) -[2026-06-07 02:14:28,689][324535] Updated weights for policy 0, policy_version 7770 (0.0012) -[2026-06-07 02:14:28,926][324535] Updated weights for policy 0, policy_version 7781 (0.0009) -[2026-06-07 02:14:29,139][324535] Updated weights for policy 0, policy_version 7791 (0.0013) -[2026-06-07 02:14:29,353][324535] Updated weights for policy 0, policy_version 7801 (0.0011) -[2026-06-07 02:14:30,138][324535] Updated weights for policy 0, policy_version 7811 (0.0010) -[2026-06-07 02:14:30,345][324535] Updated weights for policy 0, policy_version 7821 (0.0007) -[2026-06-07 02:14:30,572][324535] Updated weights for policy 0, policy_version 7831 (0.0007) -[2026-06-07 02:14:30,764][324535] Updated weights for policy 0, policy_version 7841 (0.0007) -[2026-06-07 02:14:30,965][324535] Updated weights for policy 0, policy_version 7851 (0.0007) -[2026-06-07 02:14:31,010][321787] Fps is (10 sec: 16384.2, 60 sec: 16930.1, 300 sec: 16657.1). Total num frames: 3997696. Throughput: 0: 17390.9. Samples: 4044672. Policy #0 lag: (min: 63.0, avg: 77.7, max: 127.0) -[2026-06-07 02:14:31,011][321787] Avg episode reward: [(0, '134.862')] -[2026-06-07 02:14:31,164][324535] Updated weights for policy 0, policy_version 7861 (0.0007) -[2026-06-07 02:14:31,373][324535] Updated weights for policy 0, policy_version 7871 (0.0007) -[2026-06-07 02:14:31,392][324273] Saving new best policy, reward=134.862! -[2026-06-07 02:14:32,107][324535] Updated weights for policy 0, policy_version 7881 (0.0007) -[2026-06-07 02:14:32,368][324535] Updated weights for policy 0, policy_version 7893 (0.0007) -[2026-06-07 02:14:32,588][324535] Updated weights for policy 0, policy_version 7903 (0.0007) -[2026-06-07 02:14:32,809][324535] Updated weights for policy 0, policy_version 7913 (0.0007) -[2026-06-07 02:14:32,988][324535] Updated weights for policy 0, policy_version 7923 (0.0007) -[2026-06-07 02:14:33,215][324535] Updated weights for policy 0, policy_version 7934 (0.0007) -[2026-06-07 02:14:33,978][324535] Updated weights for policy 0, policy_version 7944 (0.0007) -[2026-06-07 02:14:34,183][324535] Updated weights for policy 0, policy_version 7954 (0.0007) -[2026-06-07 02:14:34,400][324535] Updated weights for policy 0, policy_version 7964 (0.0007) -[2026-06-07 02:14:34,606][324535] Updated weights for policy 0, policy_version 7974 (0.0007) -[2026-06-07 02:14:34,806][324535] Updated weights for policy 0, policy_version 7984 (0.0007) -[2026-06-07 02:14:35,022][324535] Updated weights for policy 0, policy_version 7994 (0.0007) -[2026-06-07 02:14:35,781][324535] Updated weights for policy 0, policy_version 8004 (0.0007) -[2026-06-07 02:14:36,005][324535] Updated weights for policy 0, policy_version 8014 (0.0007) -[2026-06-07 02:14:36,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.2, 300 sec: 16718.4). Total num frames: 4096000. Throughput: 0: 17149.2. Samples: 4087936. Policy #0 lag: (min: 78.0, avg: 117.0, max: 147.0) -[2026-06-07 02:14:36,011][321787] Avg episode reward: [(0, '152.872')] -[2026-06-07 02:14:36,209][324535] Updated weights for policy 0, policy_version 8024 (0.0007) -[2026-06-07 02:14:36,438][324535] Updated weights for policy 0, policy_version 8034 (0.0007) -[2026-06-07 02:14:36,632][324535] Updated weights for policy 0, policy_version 8044 (0.0007) -[2026-06-07 02:14:36,834][324535] Updated weights for policy 0, policy_version 8054 (0.0007) -[2026-06-07 02:14:37,023][324273] Saving new best policy, reward=152.872! -[2026-06-07 02:14:37,024][324535] Updated weights for policy 0, policy_version 8064 (0.0007) -[2026-06-07 02:14:37,833][324535] Updated weights for policy 0, policy_version 8075 (0.0007) -[2026-06-07 02:14:38,041][324535] Updated weights for policy 0, policy_version 8085 (0.0007) -[2026-06-07 02:14:38,255][324535] Updated weights for policy 0, policy_version 8095 (0.0010) -[2026-06-07 02:14:38,474][324535] Updated weights for policy 0, policy_version 8106 (0.0010) -[2026-06-07 02:14:38,691][324535] Updated weights for policy 0, policy_version 8117 (0.0012) -[2026-06-07 02:14:38,910][324535] Updated weights for policy 0, policy_version 8127 (0.0007) -[2026-06-07 02:14:39,688][324535] Updated weights for policy 0, policy_version 8137 (0.0007) -[2026-06-07 02:14:39,898][324535] Updated weights for policy 0, policy_version 8148 (0.0007) -[2026-06-07 02:14:40,102][324535] Updated weights for policy 0, policy_version 8158 (0.0007) -[2026-06-07 02:14:40,313][324535] Updated weights for policy 0, policy_version 8168 (0.0007) -[2026-06-07 02:14:40,519][324535] Updated weights for policy 0, policy_version 8178 (0.0007) -[2026-06-07 02:14:40,715][324535] Updated weights for policy 0, policy_version 8188 (0.0007) -[2026-06-07 02:14:41,010][321787] Fps is (10 sec: 19660.7, 60 sec: 17476.2, 300 sec: 16777.2). Total num frames: 4194304. Throughput: 0: 17245.8. Samples: 4196224. Policy #0 lag: (min: 78.0, avg: 117.0, max: 147.0) -[2026-06-07 02:14:41,011][321787] Avg episode reward: [(0, '158.423')] -[2026-06-07 02:14:41,016][324273] Saving new best policy, reward=158.423! -[2026-06-07 02:14:41,482][324535] Updated weights for policy 0, policy_version 8198 (0.0007) -[2026-06-07 02:14:41,701][324535] Updated weights for policy 0, policy_version 8208 (0.0007) -[2026-06-07 02:14:41,919][324535] Updated weights for policy 0, policy_version 8218 (0.0007) -[2026-06-07 02:14:42,119][324535] Updated weights for policy 0, policy_version 8228 (0.0007) -[2026-06-07 02:14:42,345][324535] Updated weights for policy 0, policy_version 8239 (0.0007) -[2026-06-07 02:14:42,554][324535] Updated weights for policy 0, policy_version 8249 (0.0007) -[2026-06-07 02:14:43,326][324535] Updated weights for policy 0, policy_version 8259 (0.0007) -[2026-06-07 02:14:43,543][324535] Updated weights for policy 0, policy_version 8269 (0.0007) -[2026-06-07 02:14:43,749][324535] Updated weights for policy 0, policy_version 8279 (0.0011) -[2026-06-07 02:14:43,937][324535] Updated weights for policy 0, policy_version 8289 (0.0007) -[2026-06-07 02:14:44,137][324535] Updated weights for policy 0, policy_version 8299 (0.0007) -[2026-06-07 02:14:44,347][324535] Updated weights for policy 0, policy_version 8309 (0.0007) -[2026-06-07 02:14:44,549][324535] Updated weights for policy 0, policy_version 8319 (0.0007) -[2026-06-07 02:14:45,300][324535] Updated weights for policy 0, policy_version 8329 (0.0007) -[2026-06-07 02:14:45,498][324535] Updated weights for policy 0, policy_version 8339 (0.0007) -[2026-06-07 02:14:45,714][324535] Updated weights for policy 0, policy_version 8349 (0.0007) -[2026-06-07 02:14:45,928][324535] Updated weights for policy 0, policy_version 8359 (0.0007) -[2026-06-07 02:14:46,010][321787] Fps is (10 sec: 16383.8, 60 sec: 16930.1, 300 sec: 16705.3). Total num frames: 4259840. Throughput: 0: 17365.5. Samples: 4303104. Policy #0 lag: (min: 63.0, avg: 79.0, max: 127.0) -[2026-06-07 02:14:46,011][321787] Avg episode reward: [(0, '173.289')] -[2026-06-07 02:14:46,117][324535] Updated weights for policy 0, policy_version 8369 (0.0007) -[2026-06-07 02:14:46,327][324535] Updated weights for policy 0, policy_version 8379 (0.0007) -[2026-06-07 02:14:46,418][324273] Saving new best policy, reward=173.289! -[2026-06-07 02:14:47,076][324535] Updated weights for policy 0, policy_version 8389 (0.0007) -[2026-06-07 02:14:47,291][324535] Updated weights for policy 0, policy_version 8399 (0.0007) -[2026-06-07 02:14:47,495][324535] Updated weights for policy 0, policy_version 8409 (0.0007) -[2026-06-07 02:14:47,732][324535] Updated weights for policy 0, policy_version 8419 (0.0007) -[2026-06-07 02:14:47,934][324535] Updated weights for policy 0, policy_version 8429 (0.0007) -[2026-06-07 02:14:48,131][324535] Updated weights for policy 0, policy_version 8439 (0.0007) -[2026-06-07 02:14:48,883][324535] Updated weights for policy 0, policy_version 8449 (0.0007) -[2026-06-07 02:14:49,082][324535] Updated weights for policy 0, policy_version 8459 (0.0007) -[2026-06-07 02:14:49,284][324535] Updated weights for policy 0, policy_version 8469 (0.0007) -[2026-06-07 02:14:49,513][324535] Updated weights for policy 0, policy_version 8479 (0.0007) -[2026-06-07 02:14:49,718][324535] Updated weights for policy 0, policy_version 8489 (0.0007) -[2026-06-07 02:14:49,914][324535] Updated weights for policy 0, policy_version 8499 (0.0007) -[2026-06-07 02:14:50,136][324535] Updated weights for policy 0, policy_version 8510 (0.0007) -[2026-06-07 02:14:50,889][324535] Updated weights for policy 0, policy_version 8520 (0.0007) -[2026-06-07 02:14:51,010][321787] Fps is (10 sec: 16384.1, 60 sec: 17476.3, 300 sec: 16762.1). Total num frames: 4358144. Throughput: 0: 17260.1. Samples: 4347904. Policy #0 lag: (min: 63.0, avg: 79.0, max: 127.0) -[2026-06-07 02:14:51,011][321787] Avg episode reward: [(0, '175.615')] -[2026-06-07 02:14:51,112][324535] Updated weights for policy 0, policy_version 8530 (0.0007) -[2026-06-07 02:14:51,348][324535] Updated weights for policy 0, policy_version 8541 (0.0007) -[2026-06-07 02:14:51,553][324535] Updated weights for policy 0, policy_version 8551 (0.0007) -[2026-06-07 02:14:51,760][324535] Updated weights for policy 0, policy_version 8561 (0.0007) -[2026-06-07 02:14:51,955][324535] Updated weights for policy 0, policy_version 8571 (0.0007) -[2026-06-07 02:14:52,060][324273] Saving new best policy, reward=175.615! -[2026-06-07 02:14:52,722][324535] Updated weights for policy 0, policy_version 8581 (0.0007) -[2026-06-07 02:14:52,931][324535] Updated weights for policy 0, policy_version 8591 (0.0007) -[2026-06-07 02:14:53,115][324535] Updated weights for policy 0, policy_version 8601 (0.0007) -[2026-06-07 02:14:53,300][324535] Updated weights for policy 0, policy_version 8611 (0.0007) -[2026-06-07 02:14:53,498][324535] Updated weights for policy 0, policy_version 8621 (0.0007) -[2026-06-07 02:14:53,708][324535] Updated weights for policy 0, policy_version 8631 (0.0007) -[2026-06-07 02:14:54,485][324535] Updated weights for policy 0, policy_version 8641 (0.0007) -[2026-06-07 02:14:54,717][324535] Updated weights for policy 0, policy_version 8652 (0.0007) -[2026-06-07 02:14:54,934][324535] Updated weights for policy 0, policy_version 8662 (0.0007) -[2026-06-07 02:14:55,143][324535] Updated weights for policy 0, policy_version 8673 (0.0007) -[2026-06-07 02:14:55,352][324535] Updated weights for policy 0, policy_version 8683 (0.0007) -[2026-06-07 02:14:55,568][324535] Updated weights for policy 0, policy_version 8694 (0.0007) -[2026-06-07 02:14:55,767][324535] Updated weights for policy 0, policy_version 8704 (0.0007) -[2026-06-07 02:14:56,010][321787] Fps is (10 sec: 19661.0, 60 sec: 17476.3, 300 sec: 16816.8). Total num frames: 4456448. Throughput: 0: 17223.1. Samples: 4457216. Policy #0 lag: (min: 63.0, avg: 78.2, max: 127.0) -[2026-06-07 02:14:56,011][321787] Avg episode reward: [(0, '204.029')] -[2026-06-07 02:14:56,015][324273] Saving new best policy, reward=204.029! -[2026-06-07 02:14:56,583][324535] Updated weights for policy 0, policy_version 8716 (0.0007) -[2026-06-07 02:14:56,798][324535] Updated weights for policy 0, policy_version 8726 (0.0007) -[2026-06-07 02:14:57,004][324535] Updated weights for policy 0, policy_version 8736 (0.0007) -[2026-06-07 02:14:57,216][324535] Updated weights for policy 0, policy_version 8746 (0.0007) -[2026-06-07 02:14:57,425][324535] Updated weights for policy 0, policy_version 8756 (0.0007) -[2026-06-07 02:14:57,635][324535] Updated weights for policy 0, policy_version 8766 (0.0007) -[2026-06-07 02:14:58,410][324535] Updated weights for policy 0, policy_version 8777 (0.0007) -[2026-06-07 02:14:58,611][324535] Updated weights for policy 0, policy_version 8787 (0.0007) -[2026-06-07 02:14:58,806][324535] Updated weights for policy 0, policy_version 8797 (0.0007) -[2026-06-07 02:14:59,024][324535] Updated weights for policy 0, policy_version 8807 (0.0007) -[2026-06-07 02:14:59,245][324535] Updated weights for policy 0, policy_version 8818 (0.0007) -[2026-06-07 02:14:59,445][324535] Updated weights for policy 0, policy_version 8828 (0.0007) -[2026-06-07 02:15:00,185][324535] Updated weights for policy 0, policy_version 8838 (0.0007) -[2026-06-07 02:15:00,393][324535] Updated weights for policy 0, policy_version 8848 (0.0007) -[2026-06-07 02:15:00,628][324535] Updated weights for policy 0, policy_version 8859 (0.0007) -[2026-06-07 02:15:00,855][324535] Updated weights for policy 0, policy_version 8869 (0.0007) -[2026-06-07 02:15:01,010][321787] Fps is (10 sec: 16383.9, 60 sec: 17476.4, 300 sec: 16748.1). Total num frames: 4521984. Throughput: 0: 17314.1. Samples: 4563328. Policy #0 lag: (min: 63.0, avg: 78.2, max: 127.0) -[2026-06-07 02:15:01,011][321787] Avg episode reward: [(0, '206.751')] -[2026-06-07 02:15:01,069][324535] Updated weights for policy 0, policy_version 8879 (0.0007) -[2026-06-07 02:15:01,262][324535] Updated weights for policy 0, policy_version 8889 (0.0007) -[2026-06-07 02:15:01,406][324273] Saving new best policy, reward=206.751! -[2026-06-07 02:15:01,976][324535] Updated weights for policy 0, policy_version 8899 (0.0007) -[2026-06-07 02:15:02,198][324535] Updated weights for policy 0, policy_version 8910 (0.0007) -[2026-06-07 02:15:02,418][324535] Updated weights for policy 0, policy_version 8920 (0.0007) -[2026-06-07 02:15:02,632][324535] Updated weights for policy 0, policy_version 8930 (0.0007) -[2026-06-07 02:15:02,827][324535] Updated weights for policy 0, policy_version 8940 (0.0007) -[2026-06-07 02:15:03,075][324535] Updated weights for policy 0, policy_version 8951 (0.0007) -[2026-06-07 02:15:03,816][324535] Updated weights for policy 0, policy_version 8961 (0.0007) -[2026-06-07 02:15:04,020][324535] Updated weights for policy 0, policy_version 8971 (0.0007) -[2026-06-07 02:15:04,243][324535] Updated weights for policy 0, policy_version 8981 (0.0007) -[2026-06-07 02:15:04,474][324535] Updated weights for policy 0, policy_version 8991 (0.0007) -[2026-06-07 02:15:04,669][324535] Updated weights for policy 0, policy_version 9001 (0.0007) -[2026-06-07 02:15:04,879][324535] Updated weights for policy 0, policy_version 9011 (0.0007) -[2026-06-07 02:15:05,081][324535] Updated weights for policy 0, policy_version 9021 (0.0007) -[2026-06-07 02:15:05,817][324535] Updated weights for policy 0, policy_version 9031 (0.0007) -[2026-06-07 02:15:06,010][321787] Fps is (10 sec: 16383.7, 60 sec: 17476.2, 300 sec: 16801.0). Total num frames: 4620288. Throughput: 0: 17265.8. Samples: 4607360. Policy #0 lag: (min: 44.0, avg: 59.9, max: 108.0) -[2026-06-07 02:15:06,012][321787] Avg episode reward: [(0, '219.455')] -[2026-06-07 02:15:06,017][324535] Updated weights for policy 0, policy_version 9041 (0.0007) -[2026-06-07 02:15:06,265][324535] Updated weights for policy 0, policy_version 9052 (0.0007) -[2026-06-07 02:15:06,462][324535] Updated weights for policy 0, policy_version 9062 (0.0007) -[2026-06-07 02:15:06,700][324535] Updated weights for policy 0, policy_version 9073 (0.0007) -[2026-06-07 02:15:06,902][324535] Updated weights for policy 0, policy_version 9083 (0.0007) -[2026-06-07 02:15:06,995][324273] Saving new best policy, reward=219.455! -[2026-06-07 02:15:07,661][324535] Updated weights for policy 0, policy_version 9093 (0.0007) -[2026-06-07 02:15:07,868][324535] Updated weights for policy 0, policy_version 9103 (0.0007) -[2026-06-07 02:15:08,081][324535] Updated weights for policy 0, policy_version 9113 (0.0007) -[2026-06-07 02:15:08,287][324535] Updated weights for policy 0, policy_version 9123 (0.0007) -[2026-06-07 02:15:08,492][324535] Updated weights for policy 0, policy_version 9133 (0.0007) -[2026-06-07 02:15:08,697][324535] Updated weights for policy 0, policy_version 9143 (0.0009) -[2026-06-07 02:15:09,474][324535] Updated weights for policy 0, policy_version 9153 (0.0011) -[2026-06-07 02:15:09,687][324535] Updated weights for policy 0, policy_version 9163 (0.0011) -[2026-06-07 02:15:09,895][324535] Updated weights for policy 0, policy_version 9173 (0.0011) -[2026-06-07 02:15:10,113][324535] Updated weights for policy 0, policy_version 9183 (0.0011) -[2026-06-07 02:15:10,337][324535] Updated weights for policy 0, policy_version 9193 (0.0011) -[2026-06-07 02:15:10,548][324535] Updated weights for policy 0, policy_version 9203 (0.0011) -[2026-06-07 02:15:10,759][324535] Updated weights for policy 0, policy_version 9213 (0.0010) -[2026-06-07 02:15:11,010][321787] Fps is (10 sec: 19660.9, 60 sec: 17476.3, 300 sec: 16852.1). Total num frames: 4718592. Throughput: 0: 17220.3. Samples: 4716288. Policy #0 lag: (min: 44.0, avg: 59.9, max: 108.0) -[2026-06-07 02:15:11,011][321787] Avg episode reward: [(0, '216.903')] -[2026-06-07 02:15:11,530][324535] Updated weights for policy 0, policy_version 9224 (0.0011) -[2026-06-07 02:15:11,772][324535] Updated weights for policy 0, policy_version 9235 (0.0009) -[2026-06-07 02:15:11,978][324535] Updated weights for policy 0, policy_version 9246 (0.0007) -[2026-06-07 02:15:12,190][324535] Updated weights for policy 0, policy_version 9256 (0.0007) -[2026-06-07 02:15:12,398][324535] Updated weights for policy 0, policy_version 9266 (0.0007) -[2026-06-07 02:15:12,604][324535] Updated weights for policy 0, policy_version 9276 (0.0007) -[2026-06-07 02:15:13,372][324535] Updated weights for policy 0, policy_version 9286 (0.0006) -[2026-06-07 02:15:13,576][324535] Updated weights for policy 0, policy_version 9296 (0.0007) -[2026-06-07 02:15:13,809][324535] Updated weights for policy 0, policy_version 9306 (0.0010) -[2026-06-07 02:15:14,008][324535] Updated weights for policy 0, policy_version 9316 (0.0012) -[2026-06-07 02:15:14,210][324535] Updated weights for policy 0, policy_version 9326 (0.0011) -[2026-06-07 02:15:14,416][324535] Updated weights for policy 0, policy_version 9336 (0.0011) -[2026-06-07 02:15:15,180][324535] Updated weights for policy 0, policy_version 9346 (0.0011) -[2026-06-07 02:15:15,406][324535] Updated weights for policy 0, policy_version 9356 (0.0011) -[2026-06-07 02:15:15,612][324535] Updated weights for policy 0, policy_version 9366 (0.0008) -[2026-06-07 02:15:15,815][324535] Updated weights for policy 0, policy_version 9376 (0.0007) -[2026-06-07 02:15:16,010][321787] Fps is (10 sec: 16384.3, 60 sec: 17476.3, 300 sec: 16786.4). Total num frames: 4784128. Throughput: 0: 17282.9. Samples: 4822400. Policy #0 lag: (min: 63.0, avg: 78.9, max: 127.0) -[2026-06-07 02:15:16,011][321787] Avg episode reward: [(0, '222.428')] -[2026-06-07 02:15:16,016][324535] Updated weights for policy 0, policy_version 9386 (0.0007) -[2026-06-07 02:15:16,235][324535] Updated weights for policy 0, policy_version 9396 (0.0008) -[2026-06-07 02:15:16,306][324273] Early stopping after 7 epochs (56 sgd steps), loss delta 0.0000002 -[2026-06-07 02:15:16,307][324273] Saving new best policy, reward=222.428! -[2026-06-07 02:15:17,001][324535] Updated weights for policy 0, policy_version 9406 (0.0011) -[2026-06-07 02:15:17,216][324535] Updated weights for policy 0, policy_version 9416 (0.0010) -[2026-06-07 02:15:17,431][324535] Updated weights for policy 0, policy_version 9426 (0.0011) -[2026-06-07 02:15:17,624][324535] Updated weights for policy 0, policy_version 9436 (0.0011) -[2026-06-07 02:15:17,828][324535] Updated weights for policy 0, policy_version 9446 (0.0011) -[2026-06-07 02:15:18,047][324535] Updated weights for policy 0, policy_version 9456 (0.0011) -[2026-06-07 02:15:18,812][324535] Updated weights for policy 0, policy_version 9467 (0.0011) -[2026-06-07 02:15:19,014][324535] Updated weights for policy 0, policy_version 9477 (0.0011) -[2026-06-07 02:15:19,230][324535] Updated weights for policy 0, policy_version 9487 (0.0011) -[2026-06-07 02:15:19,441][324535] Updated weights for policy 0, policy_version 9497 (0.0010) -[2026-06-07 02:15:19,649][324535] Updated weights for policy 0, policy_version 9507 (0.0011) -[2026-06-07 02:15:19,858][324535] Updated weights for policy 0, policy_version 9517 (0.0010) -[2026-06-07 02:15:20,081][324535] Updated weights for policy 0, policy_version 9527 (0.0011) -[2026-06-07 02:15:20,828][324535] Updated weights for policy 0, policy_version 9537 (0.0011) -[2026-06-07 02:15:21,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 16836.0). Total num frames: 4882432. Throughput: 0: 17339.7. Samples: 4868224. Policy #0 lag: (min: 63.0, avg: 78.9, max: 127.0) -[2026-06-07 02:15:21,011][321787] Avg episode reward: [(0, '239.379')] -[2026-06-07 02:15:21,078][324535] Updated weights for policy 0, policy_version 9549 (0.0011) -[2026-06-07 02:15:21,301][324535] Updated weights for policy 0, policy_version 9559 (0.0011) -[2026-06-07 02:15:21,522][324535] Updated weights for policy 0, policy_version 9569 (0.0011) -[2026-06-07 02:15:21,726][324535] Updated weights for policy 0, policy_version 9579 (0.0011) -[2026-06-07 02:15:21,921][324535] Updated weights for policy 0, policy_version 9589 (0.0011) -[2026-06-07 02:15:21,971][324273] Saving new best policy, reward=239.379! -[2026-06-07 02:15:22,654][324535] Updated weights for policy 0, policy_version 9599 (0.0008) -[2026-06-07 02:15:22,855][324535] Updated weights for policy 0, policy_version 9609 (0.0007) -[2026-06-07 02:15:23,052][324535] Updated weights for policy 0, policy_version 9619 (0.0009) -[2026-06-07 02:15:23,281][324535] Updated weights for policy 0, policy_version 9630 (0.0007) -[2026-06-07 02:15:23,494][324535] Updated weights for policy 0, policy_version 9640 (0.0007) -[2026-06-07 02:15:23,698][324535] Updated weights for policy 0, policy_version 9650 (0.0007) -[2026-06-07 02:15:24,447][324535] Updated weights for policy 0, policy_version 9660 (0.0007) -[2026-06-07 02:15:24,634][324535] Updated weights for policy 0, policy_version 9670 (0.0007) -[2026-06-07 02:15:24,859][324535] Updated weights for policy 0, policy_version 9680 (0.0007) -[2026-06-07 02:15:25,088][324535] Updated weights for policy 0, policy_version 9690 (0.0007) -[2026-06-07 02:15:25,298][324535] Updated weights for policy 0, policy_version 9700 (0.0007) -[2026-06-07 02:15:25,523][324535] Updated weights for policy 0, policy_version 9710 (0.0007) -[2026-06-07 02:15:25,705][324535] Updated weights for policy 0, policy_version 9720 (0.0011) -[2026-06-07 02:15:26,010][321787] Fps is (10 sec: 19660.8, 60 sec: 17476.3, 300 sec: 16883.9). Total num frames: 4980736. Throughput: 0: 17456.4. Samples: 4981760. Policy #0 lag: (min: 21.0, avg: 37.3, max: 85.0) -[2026-06-07 02:15:26,011][321787] Avg episode reward: [(0, '244.156')] -[2026-06-07 02:15:26,016][324273] Saving new best policy, reward=244.156! -[2026-06-07 02:15:26,472][324535] Updated weights for policy 0, policy_version 9730 (0.0011) -[2026-06-07 02:15:26,699][324535] Updated weights for policy 0, policy_version 9740 (0.0011) -[2026-06-07 02:15:26,894][324535] Updated weights for policy 0, policy_version 9750 (0.0007) -[2026-06-07 02:15:27,116][324535] Updated weights for policy 0, policy_version 9760 (0.0007) -[2026-06-07 02:15:27,301][324535] Updated weights for policy 0, policy_version 9770 (0.0007) -[2026-06-07 02:15:27,538][324535] Updated weights for policy 0, policy_version 9781 (0.0007) -[2026-06-07 02:15:28,286][324535] Updated weights for policy 0, policy_version 9791 (0.0010) -[2026-06-07 02:15:28,488][324535] Updated weights for policy 0, policy_version 9801 (0.0011) -[2026-06-07 02:15:28,716][324535] Updated weights for policy 0, policy_version 9811 (0.0009) -[2026-06-07 02:15:28,936][324535] Updated weights for policy 0, policy_version 9821 (0.0007) -[2026-06-07 02:15:29,152][324535] Updated weights for policy 0, policy_version 9831 (0.0007) -[2026-06-07 02:15:29,362][324535] Updated weights for policy 0, policy_version 9841 (0.0007) -[2026-06-07 02:15:30,138][324535] Updated weights for policy 0, policy_version 9851 (0.0007) -[2026-06-07 02:15:30,379][324535] Updated weights for policy 0, policy_version 9862 (0.0007) -[2026-06-07 02:15:30,579][324535] Updated weights for policy 0, policy_version 9872 (0.0007) -[2026-06-07 02:15:30,800][324535] Updated weights for policy 0, policy_version 9882 (0.0007) -[2026-06-07 02:15:30,999][324535] Updated weights for policy 0, policy_version 9892 (0.0007) -[2026-06-07 02:15:31,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 16883.8). Total num frames: 5046272. Throughput: 0: 17376.7. Samples: 5085056. Policy #0 lag: (min: 21.0, avg: 37.3, max: 85.0) -[2026-06-07 02:15:31,011][321787] Avg episode reward: [(0, '260.367')] -[2026-06-07 02:15:31,206][324535] Updated weights for policy 0, policy_version 9902 (0.0008) -[2026-06-07 02:15:31,419][324273] Saving new best policy, reward=260.367! -[2026-06-07 02:15:31,421][324535] Updated weights for policy 0, policy_version 9912 (0.0008) -[2026-06-07 02:15:32,186][324535] Updated weights for policy 0, policy_version 9922 (0.0007) -[2026-06-07 02:15:32,397][324535] Updated weights for policy 0, policy_version 9932 (0.0007) -[2026-06-07 02:15:32,612][324535] Updated weights for policy 0, policy_version 9942 (0.0007) -[2026-06-07 02:15:32,803][324535] Updated weights for policy 0, policy_version 9952 (0.0007) -[2026-06-07 02:15:33,014][324535] Updated weights for policy 0, policy_version 9962 (0.0007) -[2026-06-07 02:15:33,223][324535] Updated weights for policy 0, policy_version 9972 (0.0007) -[2026-06-07 02:15:33,969][324535] Updated weights for policy 0, policy_version 9982 (0.0007) -[2026-06-07 02:15:34,168][324535] Updated weights for policy 0, policy_version 9992 (0.0009) -[2026-06-07 02:15:34,379][324535] Updated weights for policy 0, policy_version 10002 (0.0011) -[2026-06-07 02:15:34,611][324535] Updated weights for policy 0, policy_version 10013 (0.0011) -[2026-06-07 02:15:34,820][324535] Updated weights for policy 0, policy_version 10023 (0.0012) -[2026-06-07 02:15:35,021][324535] Updated weights for policy 0, policy_version 10033 (0.0009) -[2026-06-07 02:15:35,808][324535] Updated weights for policy 0, policy_version 10044 (0.0007) -[2026-06-07 02:15:36,010][321787] Fps is (10 sec: 16383.9, 60 sec: 17476.2, 300 sec: 16883.9). Total num frames: 5144576. Throughput: 0: 17390.9. Samples: 5130496. Policy #0 lag: (min: 42.0, avg: 70.1, max: 106.0) -[2026-06-07 02:15:36,011][321787] Avg episode reward: [(0, '273.438')] -[2026-06-07 02:15:36,017][324535] Updated weights for policy 0, policy_version 10055 (0.0007) -[2026-06-07 02:15:36,229][324535] Updated weights for policy 0, policy_version 10065 (0.0007) -[2026-06-07 02:15:36,422][324535] Updated weights for policy 0, policy_version 10075 (0.0009) -[2026-06-07 02:15:36,633][324535] Updated weights for policy 0, policy_version 10086 (0.0011) -[2026-06-07 02:15:36,873][324535] Updated weights for policy 0, policy_version 10098 (0.0011) -[2026-06-07 02:15:36,997][324273] Saving new best policy, reward=273.438! -[2026-06-07 02:15:37,684][324535] Updated weights for policy 0, policy_version 10108 (0.0008) -[2026-06-07 02:15:37,881][324535] Updated weights for policy 0, policy_version 10118 (0.0009) -[2026-06-07 02:15:38,092][324535] Updated weights for policy 0, policy_version 10128 (0.0007) -[2026-06-07 02:15:38,308][324535] Updated weights for policy 0, policy_version 10138 (0.0007) -[2026-06-07 02:15:38,498][324535] Updated weights for policy 0, policy_version 10148 (0.0007) -[2026-06-07 02:15:38,686][324535] Updated weights for policy 0, policy_version 10158 (0.0008) -[2026-06-07 02:15:38,881][324535] Updated weights for policy 0, policy_version 10168 (0.0007) -[2026-06-07 02:15:39,641][324535] Updated weights for policy 0, policy_version 10179 (0.0007) -[2026-06-07 02:15:39,851][324535] Updated weights for policy 0, policy_version 10189 (0.0007) -[2026-06-07 02:15:40,072][324535] Updated weights for policy 0, policy_version 10199 (0.0007) -[2026-06-07 02:15:40,263][324535] Updated weights for policy 0, policy_version 10209 (0.0007) -[2026-06-07 02:15:40,499][324535] Updated weights for policy 0, policy_version 10220 (0.0007) -[2026-06-07 02:15:40,735][324535] Updated weights for policy 0, policy_version 10231 (0.0007) -[2026-06-07 02:15:41,010][321787] Fps is (10 sec: 19660.9, 60 sec: 17476.3, 300 sec: 16883.9). Total num frames: 5242880. Throughput: 0: 17459.2. Samples: 5242880. Policy #0 lag: (min: 22.0, avg: 37.9, max: 86.0) -[2026-06-07 02:15:41,011][321787] Avg episode reward: [(0, '292.960')] -[2026-06-07 02:15:41,016][324273] Saving new best policy, reward=292.960! -[2026-06-07 02:15:41,503][324535] Updated weights for policy 0, policy_version 10241 (0.0009) -[2026-06-07 02:15:41,700][324535] Updated weights for policy 0, policy_version 10251 (0.0008) -[2026-06-07 02:15:41,925][324535] Updated weights for policy 0, policy_version 10262 (0.0007) -[2026-06-07 02:15:42,146][324535] Updated weights for policy 0, policy_version 10273 (0.0007) -[2026-06-07 02:15:42,342][324535] Updated weights for policy 0, policy_version 10283 (0.0007) -[2026-06-07 02:15:42,575][324535] Updated weights for policy 0, policy_version 10294 (0.0007) -[2026-06-07 02:15:43,320][324535] Updated weights for policy 0, policy_version 10304 (0.0007) -[2026-06-07 02:15:43,574][324535] Updated weights for policy 0, policy_version 10316 (0.0007) -[2026-06-07 02:15:43,782][324535] Updated weights for policy 0, policy_version 10326 (0.0007) -[2026-06-07 02:15:44,008][324535] Updated weights for policy 0, policy_version 10336 (0.0008) -[2026-06-07 02:15:44,212][324535] Updated weights for policy 0, policy_version 10346 (0.0008) -[2026-06-07 02:15:44,418][324535] Updated weights for policy 0, policy_version 10356 (0.0008) -[2026-06-07 02:15:45,176][324535] Updated weights for policy 0, policy_version 10366 (0.0007) -[2026-06-07 02:15:45,385][324535] Updated weights for policy 0, policy_version 10376 (0.0007) -[2026-06-07 02:15:45,594][324535] Updated weights for policy 0, policy_version 10386 (0.0007) -[2026-06-07 02:15:45,809][324535] Updated weights for policy 0, policy_version 10396 (0.0008) -[2026-06-07 02:15:46,010][321787] Fps is (10 sec: 16384.1, 60 sec: 17476.3, 300 sec: 16772.8). Total num frames: 5308416. Throughput: 0: 17447.9. Samples: 5348480. Policy #0 lag: (min: 22.0, avg: 37.9, max: 86.0) -[2026-06-07 02:15:46,011][321787] Avg episode reward: [(0, '278.257')] -[2026-06-07 02:15:46,024][324535] Updated weights for policy 0, policy_version 10406 (0.0006) -[2026-06-07 02:15:46,232][324535] Updated weights for policy 0, policy_version 10416 (0.0007) -[2026-06-07 02:15:47,001][324535] Updated weights for policy 0, policy_version 10426 (0.0007) -[2026-06-07 02:15:47,199][324535] Updated weights for policy 0, policy_version 10436 (0.0010) -[2026-06-07 02:15:47,413][324535] Updated weights for policy 0, policy_version 10446 (0.0012) -[2026-06-07 02:15:47,623][324535] Updated weights for policy 0, policy_version 10456 (0.0008) -[2026-06-07 02:15:47,855][324535] Updated weights for policy 0, policy_version 10466 (0.0007) -[2026-06-07 02:15:48,074][324535] Updated weights for policy 0, policy_version 10476 (0.0007) -[2026-06-07 02:15:48,269][324535] Updated weights for policy 0, policy_version 10486 (0.0007) -[2026-06-07 02:15:49,051][324535] Updated weights for policy 0, policy_version 10497 (0.0007) -[2026-06-07 02:15:49,247][324535] Updated weights for policy 0, policy_version 10507 (0.0007) -[2026-06-07 02:15:49,458][324535] Updated weights for policy 0, policy_version 10517 (0.0007) -[2026-06-07 02:15:49,663][324535] Updated weights for policy 0, policy_version 10527 (0.0007) -[2026-06-07 02:15:49,882][324535] Updated weights for policy 0, policy_version 10537 (0.0007) -[2026-06-07 02:15:50,099][324535] Updated weights for policy 0, policy_version 10547 (0.0007) -[2026-06-07 02:15:50,908][324535] Updated weights for policy 0, policy_version 10557 (0.0007) -[2026-06-07 02:15:51,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 16772.8). Total num frames: 5406720. Throughput: 0: 17456.4. Samples: 5392896. Policy #0 lag: (min: 63.0, avg: 78.5, max: 127.0) -[2026-06-07 02:15:51,011][321787] Avg episode reward: [(0, '274.211')] -[2026-06-07 02:15:51,109][324535] Updated weights for policy 0, policy_version 10567 (0.0007) -[2026-06-07 02:15:51,327][324535] Updated weights for policy 0, policy_version 10578 (0.0007) -[2026-06-07 02:15:51,543][324535] Updated weights for policy 0, policy_version 10588 (0.0008) -[2026-06-07 02:15:51,745][324535] Updated weights for policy 0, policy_version 10598 (0.0007) -[2026-06-07 02:15:51,944][324535] Updated weights for policy 0, policy_version 10608 (0.0008) -[2026-06-07 02:15:52,711][324535] Updated weights for policy 0, policy_version 10618 (0.0007) -[2026-06-07 02:15:52,930][324535] Updated weights for policy 0, policy_version 10628 (0.0007) -[2026-06-07 02:15:53,132][324535] Updated weights for policy 0, policy_version 10638 (0.0008) -[2026-06-07 02:15:53,355][324535] Updated weights for policy 0, policy_version 10648 (0.0008) -[2026-06-07 02:15:53,569][324535] Updated weights for policy 0, policy_version 10658 (0.0008) -[2026-06-07 02:15:53,771][324535] Updated weights for policy 0, policy_version 10668 (0.0008) -[2026-06-07 02:15:53,978][324535] Updated weights for policy 0, policy_version 10678 (0.0007) -[2026-06-07 02:15:54,766][324535] Updated weights for policy 0, policy_version 10689 (0.0007) -[2026-06-07 02:15:54,955][324535] Updated weights for policy 0, policy_version 10699 (0.0008) -[2026-06-07 02:15:55,197][324535] Updated weights for policy 0, policy_version 10709 (0.0007) -[2026-06-07 02:15:55,420][324535] Updated weights for policy 0, policy_version 10720 (0.0008) -[2026-06-07 02:15:55,630][324535] Updated weights for policy 0, policy_version 10730 (0.0011) -[2026-06-07 02:15:55,825][324535] Updated weights for policy 0, policy_version 10740 (0.0011) -[2026-06-07 02:15:56,010][321787] Fps is (10 sec: 19660.8, 60 sec: 17476.3, 300 sec: 16883.9). Total num frames: 5505024. Throughput: 0: 17399.5. Samples: 5499264. Policy #0 lag: (min: 63.0, avg: 78.5, max: 127.0) -[2026-06-07 02:15:56,011][321787] Avg episode reward: [(0, '280.626')] -[2026-06-07 02:15:56,606][324535] Updated weights for policy 0, policy_version 10750 (0.0008) -[2026-06-07 02:15:56,817][324535] Updated weights for policy 0, policy_version 10760 (0.0008) -[2026-06-07 02:15:57,062][324535] Updated weights for policy 0, policy_version 10771 (0.0007) -[2026-06-07 02:15:57,265][324535] Updated weights for policy 0, policy_version 10781 (0.0007) -[2026-06-07 02:15:57,483][324535] Updated weights for policy 0, policy_version 10791 (0.0007) -[2026-06-07 02:15:57,673][324535] Updated weights for policy 0, policy_version 10801 (0.0007) -[2026-06-07 02:15:58,379][324535] Updated weights for policy 0, policy_version 10811 (0.0007) -[2026-06-07 02:15:58,584][324535] Updated weights for policy 0, policy_version 10821 (0.0007) -[2026-06-07 02:15:58,791][324535] Updated weights for policy 0, policy_version 10831 (0.0007) -[2026-06-07 02:15:58,991][324535] Updated weights for policy 0, policy_version 10841 (0.0007) -[2026-06-07 02:15:59,197][324535] Updated weights for policy 0, policy_version 10851 (0.0007) -[2026-06-07 02:15:59,402][324535] Updated weights for policy 0, policy_version 10861 (0.0007) -[2026-06-07 02:15:59,618][324535] Updated weights for policy 0, policy_version 10871 (0.0007) -[2026-06-07 02:16:00,387][324535] Updated weights for policy 0, policy_version 10881 (0.0007) -[2026-06-07 02:16:00,606][324535] Updated weights for policy 0, policy_version 10892 (0.0007) -[2026-06-07 02:16:00,804][324535] Updated weights for policy 0, policy_version 10902 (0.0007) -[2026-06-07 02:16:01,010][324535] Updated weights for policy 0, policy_version 10912 (0.0007) -[2026-06-07 02:16:01,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 16772.8). Total num frames: 5570560. Throughput: 0: 17479.1. Samples: 5608960. Policy #0 lag: (min: 63.0, avg: 78.5, max: 127.0) -[2026-06-07 02:16:01,011][321787] Avg episode reward: [(0, '297.702')] -[2026-06-07 02:16:01,222][324535] Updated weights for policy 0, policy_version 10922 (0.0007) -[2026-06-07 02:16:01,419][324535] Updated weights for policy 0, policy_version 10932 (0.0007) -[2026-06-07 02:16:01,493][324273] Saving new best policy, reward=297.702! -[2026-06-07 02:16:02,143][324535] Updated weights for policy 0, policy_version 10942 (0.0007) -[2026-06-07 02:16:02,351][324535] Updated weights for policy 0, policy_version 10952 (0.0007) -[2026-06-07 02:16:02,569][324535] Updated weights for policy 0, policy_version 10962 (0.0007) -[2026-06-07 02:16:02,762][324535] Updated weights for policy 0, policy_version 10972 (0.0007) -[2026-06-07 02:16:02,976][324535] Updated weights for policy 0, policy_version 10982 (0.0007) -[2026-06-07 02:16:03,191][324535] Updated weights for policy 0, policy_version 10992 (0.0007) -[2026-06-07 02:16:03,978][324535] Updated weights for policy 0, policy_version 11003 (0.0007) -[2026-06-07 02:16:04,199][324535] Updated weights for policy 0, policy_version 11013 (0.0007) -[2026-06-07 02:16:04,421][324535] Updated weights for policy 0, policy_version 11023 (0.0007) -[2026-06-07 02:16:04,630][324535] Updated weights for policy 0, policy_version 11033 (0.0007) -[2026-06-07 02:16:04,862][324535] Updated weights for policy 0, policy_version 11043 (0.0007) -[2026-06-07 02:16:05,098][324535] Updated weights for policy 0, policy_version 11054 (0.0007) -[2026-06-07 02:16:05,326][324535] Updated weights for policy 0, policy_version 11064 (0.0007) -[2026-06-07 02:16:06,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 16883.9). Total num frames: 5668864. Throughput: 0: 17450.7. Samples: 5653504. Policy #0 lag: (min: 41.0, avg: 74.8, max: 105.0) -[2026-06-07 02:16:06,011][321787] Avg episode reward: [(0, '324.979')] -[2026-06-07 02:16:06,103][324535] Updated weights for policy 0, policy_version 11074 (0.0007) -[2026-06-07 02:16:06,304][324535] Updated weights for policy 0, policy_version 11084 (0.0007) -[2026-06-07 02:16:06,503][324535] Updated weights for policy 0, policy_version 11094 (0.0007) -[2026-06-07 02:16:06,700][324535] Updated weights for policy 0, policy_version 11104 (0.0007) -[2026-06-07 02:16:06,887][324535] Updated weights for policy 0, policy_version 11114 (0.0007) -[2026-06-07 02:16:07,089][324535] Updated weights for policy 0, policy_version 11124 (0.0007) -[2026-06-07 02:16:07,167][324273] Saving new best policy, reward=324.979! -[2026-06-07 02:16:07,890][324535] Updated weights for policy 0, policy_version 11135 (0.0007) -[2026-06-07 02:16:08,101][324535] Updated weights for policy 0, policy_version 11145 (0.0007) -[2026-06-07 02:16:08,294][324535] Updated weights for policy 0, policy_version 11155 (0.0006) -[2026-06-07 02:16:08,488][324535] Updated weights for policy 0, policy_version 11165 (0.0007) -[2026-06-07 02:16:08,705][324535] Updated weights for policy 0, policy_version 11175 (0.0007) -[2026-06-07 02:16:08,925][324535] Updated weights for policy 0, policy_version 11185 (0.0007) -[2026-06-07 02:16:09,710][324535] Updated weights for policy 0, policy_version 11195 (0.0007) -[2026-06-07 02:16:09,903][324535] Updated weights for policy 0, policy_version 11205 (0.0007) -[2026-06-07 02:16:10,115][324535] Updated weights for policy 0, policy_version 11215 (0.0007) -[2026-06-07 02:16:10,358][324535] Updated weights for policy 0, policy_version 11225 (0.0006) -[2026-06-07 02:16:10,559][324535] Updated weights for policy 0, policy_version 11235 (0.0007) -[2026-06-07 02:16:10,772][324535] Updated weights for policy 0, policy_version 11245 (0.0007) -[2026-06-07 02:16:10,977][324535] Updated weights for policy 0, policy_version 11255 (0.0007) -[2026-06-07 02:16:11,010][321787] Fps is (10 sec: 19660.7, 60 sec: 17476.3, 300 sec: 16994.9). Total num frames: 5767168. Throughput: 0: 17251.5. Samples: 5758080. Policy #0 lag: (min: 63.0, avg: 78.2, max: 127.0) -[2026-06-07 02:16:11,011][321787] Avg episode reward: [(0, '338.609')] -[2026-06-07 02:16:11,015][324273] Saving new best policy, reward=338.609! -[2026-06-07 02:16:11,722][324535] Updated weights for policy 0, policy_version 11265 (0.0007) -[2026-06-07 02:16:11,927][324535] Updated weights for policy 0, policy_version 11275 (0.0007) -[2026-06-07 02:16:12,136][324535] Updated weights for policy 0, policy_version 11285 (0.0007) -[2026-06-07 02:16:12,347][324535] Updated weights for policy 0, policy_version 11295 (0.0007) -[2026-06-07 02:16:12,581][324535] Updated weights for policy 0, policy_version 11306 (0.0007) -[2026-06-07 02:16:12,807][324535] Updated weights for policy 0, policy_version 11316 (0.0007) -[2026-06-07 02:16:13,545][324535] Updated weights for policy 0, policy_version 11326 (0.0008) -[2026-06-07 02:16:13,742][324535] Updated weights for policy 0, policy_version 11336 (0.0006) -[2026-06-07 02:16:13,947][324535] Updated weights for policy 0, policy_version 11346 (0.0006) -[2026-06-07 02:16:14,168][324535] Updated weights for policy 0, policy_version 11356 (0.0007) -[2026-06-07 02:16:14,370][324535] Updated weights for policy 0, policy_version 11366 (0.0006) -[2026-06-07 02:16:14,567][324535] Updated weights for policy 0, policy_version 11376 (0.0007) -[2026-06-07 02:16:15,357][324535] Updated weights for policy 0, policy_version 11386 (0.0007) -[2026-06-07 02:16:15,566][324535] Updated weights for policy 0, policy_version 11396 (0.0009) -[2026-06-07 02:16:15,760][324535] Updated weights for policy 0, policy_version 11406 (0.0010) -[2026-06-07 02:16:15,962][324535] Updated weights for policy 0, policy_version 11416 (0.0007) -[2026-06-07 02:16:16,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 16883.9). Total num frames: 5832704. Throughput: 0: 17447.8. Samples: 5870208. Policy #0 lag: (min: 63.0, avg: 78.2, max: 127.0) -[2026-06-07 02:16:16,011][321787] Avg episode reward: [(0, '326.681')] -[2026-06-07 02:16:16,175][324535] Updated weights for policy 0, policy_version 11426 (0.0007) -[2026-06-07 02:16:16,385][324535] Updated weights for policy 0, policy_version 11436 (0.0007) -[2026-06-07 02:16:16,586][324535] Updated weights for policy 0, policy_version 11446 (0.0007) -[2026-06-07 02:16:17,316][324535] Updated weights for policy 0, policy_version 11456 (0.0007) -[2026-06-07 02:16:17,537][324535] Updated weights for policy 0, policy_version 11466 (0.0007) -[2026-06-07 02:16:17,742][324535] Updated weights for policy 0, policy_version 11476 (0.0007) -[2026-06-07 02:16:17,956][324535] Updated weights for policy 0, policy_version 11486 (0.0007) -[2026-06-07 02:16:18,169][324535] Updated weights for policy 0, policy_version 11496 (0.0007) -[2026-06-07 02:16:18,385][324535] Updated weights for policy 0, policy_version 11506 (0.0007) -[2026-06-07 02:16:19,127][324535] Updated weights for policy 0, policy_version 11516 (0.0007) -[2026-06-07 02:16:19,328][324535] Updated weights for policy 0, policy_version 11526 (0.0007) -[2026-06-07 02:16:19,530][324535] Updated weights for policy 0, policy_version 11536 (0.0007) -[2026-06-07 02:16:19,730][324535] Updated weights for policy 0, policy_version 11546 (0.0006) -[2026-06-07 02:16:19,945][324535] Updated weights for policy 0, policy_version 11556 (0.0007) -[2026-06-07 02:16:20,157][324535] Updated weights for policy 0, policy_version 11566 (0.0007) -[2026-06-07 02:16:20,345][324535] Updated weights for policy 0, policy_version 11576 (0.0007) -[2026-06-07 02:16:21,010][321787] Fps is (10 sec: 16384.1, 60 sec: 17476.3, 300 sec: 16994.9). Total num frames: 5931008. Throughput: 0: 17405.2. Samples: 5913728. Policy #0 lag: (min: 63.0, avg: 78.2, max: 127.0) -[2026-06-07 02:16:21,011][321787] Avg episode reward: [(0, '328.999')] -[2026-06-07 02:16:21,083][324535] Updated weights for policy 0, policy_version 11586 (0.0007) -[2026-06-07 02:16:21,321][324535] Updated weights for policy 0, policy_version 11597 (0.0007) -[2026-06-07 02:16:21,512][324535] Updated weights for policy 0, policy_version 11607 (0.0007) -[2026-06-07 02:16:21,718][324535] Updated weights for policy 0, policy_version 11617 (0.0007) -[2026-06-07 02:16:21,929][324535] Updated weights for policy 0, policy_version 11627 (0.0007) -[2026-06-07 02:16:22,145][324535] Updated weights for policy 0, policy_version 11637 (0.0007) -[2026-06-07 02:16:22,911][324535] Updated weights for policy 0, policy_version 11647 (0.0007) -[2026-06-07 02:16:23,133][324535] Updated weights for policy 0, policy_version 11657 (0.0007) -[2026-06-07 02:16:23,333][324535] Updated weights for policy 0, policy_version 11667 (0.0007) -[2026-06-07 02:16:23,536][324535] Updated weights for policy 0, policy_version 11677 (0.0007) -[2026-06-07 02:16:23,765][324535] Updated weights for policy 0, policy_version 11687 (0.0007) -[2026-06-07 02:16:23,976][324535] Updated weights for policy 0, policy_version 11697 (0.0007) -[2026-06-07 02:16:24,727][324535] Updated weights for policy 0, policy_version 11707 (0.0007) -[2026-06-07 02:16:24,929][324535] Updated weights for policy 0, policy_version 11717 (0.0007) -[2026-06-07 02:16:25,177][324535] Updated weights for policy 0, policy_version 11728 (0.0007) -[2026-06-07 02:16:25,366][324535] Updated weights for policy 0, policy_version 11738 (0.0007) -[2026-06-07 02:16:25,574][324535] Updated weights for policy 0, policy_version 11748 (0.0007) -[2026-06-07 02:16:25,803][324535] Updated weights for policy 0, policy_version 11758 (0.0007) -[2026-06-07 02:16:26,002][324535] Updated weights for policy 0, policy_version 11768 (0.0007) -[2026-06-07 02:16:26,010][321787] Fps is (10 sec: 19660.9, 60 sec: 17476.3, 300 sec: 16994.9). Total num frames: 6029312. Throughput: 0: 17203.2. Samples: 6017024. Policy #0 lag: (min: 43.0, avg: 70.2, max: 107.0) -[2026-06-07 02:16:26,011][321787] Avg episode reward: [(0, '336.773')] -[2026-06-07 02:16:26,720][324535] Updated weights for policy 0, policy_version 11779 (0.0010) -[2026-06-07 02:16:26,948][324535] Updated weights for policy 0, policy_version 11789 (0.0008) -[2026-06-07 02:16:27,164][324535] Updated weights for policy 0, policy_version 11799 (0.0007) -[2026-06-07 02:16:27,375][324535] Updated weights for policy 0, policy_version 11809 (0.0007) -[2026-06-07 02:16:27,598][324535] Updated weights for policy 0, policy_version 11820 (0.0007) -[2026-06-07 02:16:27,794][324535] Updated weights for policy 0, policy_version 11830 (0.0007) -[2026-06-07 02:16:28,558][324535] Updated weights for policy 0, policy_version 11840 (0.0007) -[2026-06-07 02:16:28,773][324535] Updated weights for policy 0, policy_version 11850 (0.0007) -[2026-06-07 02:16:28,981][324535] Updated weights for policy 0, policy_version 11860 (0.0007) -[2026-06-07 02:16:29,199][324535] Updated weights for policy 0, policy_version 11870 (0.0007) -[2026-06-07 02:16:29,402][324535] Updated weights for policy 0, policy_version 11880 (0.0007) -[2026-06-07 02:16:29,613][324535] Updated weights for policy 0, policy_version 11890 (0.0007) -[2026-06-07 02:16:30,361][324535] Updated weights for policy 0, policy_version 11900 (0.0007) -[2026-06-07 02:16:30,581][324535] Updated weights for policy 0, policy_version 11911 (0.0007) -[2026-06-07 02:16:30,796][324535] Updated weights for policy 0, policy_version 11921 (0.0007) -[2026-06-07 02:16:31,001][324535] Updated weights for policy 0, policy_version 11931 (0.0007) -[2026-06-07 02:16:31,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 16994.9). Total num frames: 6094848. Throughput: 0: 17345.4. Samples: 6129024. Policy #0 lag: (min: 43.0, avg: 70.2, max: 107.0) -[2026-06-07 02:16:31,011][321787] Avg episode reward: [(0, '359.641')] -[2026-06-07 02:16:31,217][324535] Updated weights for policy 0, policy_version 11941 (0.0007) -[2026-06-07 02:16:31,412][324535] Updated weights for policy 0, policy_version 11951 (0.0007) -[2026-06-07 02:16:31,605][324273] Saving new best policy, reward=359.641! -[2026-06-07 02:16:32,205][324535] Updated weights for policy 0, policy_version 11961 (0.0007) -[2026-06-07 02:16:32,413][324535] Updated weights for policy 0, policy_version 11971 (0.0007) -[2026-06-07 02:16:32,627][324535] Updated weights for policy 0, policy_version 11981 (0.0007) -[2026-06-07 02:16:32,844][324535] Updated weights for policy 0, policy_version 11991 (0.0007) -[2026-06-07 02:16:33,037][324535] Updated weights for policy 0, policy_version 12001 (0.0007) -[2026-06-07 02:16:33,278][324535] Updated weights for policy 0, policy_version 12012 (0.0007) -[2026-06-07 02:16:33,500][324535] Updated weights for policy 0, policy_version 12022 (0.0007) -[2026-06-07 02:16:34,231][324535] Updated weights for policy 0, policy_version 12032 (0.0007) -[2026-06-07 02:16:34,442][324535] Updated weights for policy 0, policy_version 12043 (0.0007) -[2026-06-07 02:16:34,642][324535] Updated weights for policy 0, policy_version 12053 (0.0007) -[2026-06-07 02:16:34,860][324535] Updated weights for policy 0, policy_version 12063 (0.0007) -[2026-06-07 02:16:35,103][324535] Updated weights for policy 0, policy_version 12075 (0.0007) -[2026-06-07 02:16:35,303][324535] Updated weights for policy 0, policy_version 12085 (0.0007) -[2026-06-07 02:16:36,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 16994.9). Total num frames: 6193152. Throughput: 0: 17351.1. Samples: 6173696. Policy #0 lag: (min: 20.0, avg: 35.0, max: 84.0) -[2026-06-07 02:16:36,011][321787] Avg episode reward: [(0, '352.229')] -[2026-06-07 02:16:36,056][324535] Updated weights for policy 0, policy_version 12095 (0.0007) -[2026-06-07 02:16:36,241][324535] Updated weights for policy 0, policy_version 12105 (0.0007) -[2026-06-07 02:16:36,426][324535] Updated weights for policy 0, policy_version 12115 (0.0007) -[2026-06-07 02:16:36,634][324535] Updated weights for policy 0, policy_version 12125 (0.0007) -[2026-06-07 02:16:36,853][324535] Updated weights for policy 0, policy_version 12135 (0.0007) -[2026-06-07 02:16:37,069][324535] Updated weights for policy 0, policy_version 12145 (0.0007) -[2026-06-07 02:16:37,855][324535] Updated weights for policy 0, policy_version 12155 (0.0007) -[2026-06-07 02:16:38,051][324535] Updated weights for policy 0, policy_version 12165 (0.0007) -[2026-06-07 02:16:38,270][324535] Updated weights for policy 0, policy_version 12175 (0.0007) -[2026-06-07 02:16:38,467][324535] Updated weights for policy 0, policy_version 12185 (0.0007) -[2026-06-07 02:16:38,688][324535] Updated weights for policy 0, policy_version 12196 (0.0007) -[2026-06-07 02:16:38,906][324535] Updated weights for policy 0, policy_version 12206 (0.0007) -[2026-06-07 02:16:39,103][324535] Updated weights for policy 0, policy_version 12216 (0.0007) -[2026-06-07 02:16:39,848][324535] Updated weights for policy 0, policy_version 12226 (0.0007) -[2026-06-07 02:16:40,039][324535] Updated weights for policy 0, policy_version 12236 (0.0007) -[2026-06-07 02:16:40,274][324535] Updated weights for policy 0, policy_version 12247 (0.0007) -[2026-06-07 02:16:40,479][324535] Updated weights for policy 0, policy_version 12257 (0.0007) -[2026-06-07 02:16:40,676][324535] Updated weights for policy 0, policy_version 12267 (0.0007) -[2026-06-07 02:16:40,870][324535] Updated weights for policy 0, policy_version 12277 (0.0007) -[2026-06-07 02:16:41,010][321787] Fps is (10 sec: 19660.7, 60 sec: 17476.3, 300 sec: 17106.0). Total num frames: 6291456. Throughput: 0: 17297.1. Samples: 6277632. Policy #0 lag: (min: 20.0, avg: 35.0, max: 84.0) -[2026-06-07 02:16:41,011][321787] Avg episode reward: [(0, '359.072')] -[2026-06-07 02:16:41,621][324535] Updated weights for policy 0, policy_version 12287 (0.0007) -[2026-06-07 02:16:41,860][324535] Updated weights for policy 0, policy_version 12298 (0.0007) -[2026-06-07 02:16:42,087][324535] Updated weights for policy 0, policy_version 12308 (0.0007) -[2026-06-07 02:16:42,297][324535] Updated weights for policy 0, policy_version 12318 (0.0007) -[2026-06-07 02:16:42,505][324535] Updated weights for policy 0, policy_version 12328 (0.0007) -[2026-06-07 02:16:42,713][324535] Updated weights for policy 0, policy_version 12338 (0.0007) -[2026-06-07 02:16:43,471][324535] Updated weights for policy 0, policy_version 12348 (0.0007) -[2026-06-07 02:16:43,682][324535] Updated weights for policy 0, policy_version 12358 (0.0006) -[2026-06-07 02:16:43,879][324535] Updated weights for policy 0, policy_version 12368 (0.0006) -[2026-06-07 02:16:44,103][324535] Updated weights for policy 0, policy_version 12378 (0.0007) -[2026-06-07 02:16:44,309][324535] Updated weights for policy 0, policy_version 12388 (0.0007) -[2026-06-07 02:16:44,529][324535] Updated weights for policy 0, policy_version 12398 (0.0007) -[2026-06-07 02:16:44,746][324535] Updated weights for policy 0, policy_version 12408 (0.0007) -[2026-06-07 02:16:45,491][324535] Updated weights for policy 0, policy_version 12418 (0.0007) -[2026-06-07 02:16:45,718][324535] Updated weights for policy 0, policy_version 12429 (0.0007) -[2026-06-07 02:16:45,946][324535] Updated weights for policy 0, policy_version 12440 (0.0008) -[2026-06-07 02:16:46,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17106.0). Total num frames: 6356992. Throughput: 0: 17382.4. Samples: 6391168. Policy #0 lag: (min: 63.0, avg: 78.4, max: 127.0) -[2026-06-07 02:16:46,011][321787] Avg episode reward: [(0, '354.065')] -[2026-06-07 02:16:46,156][324535] Updated weights for policy 0, policy_version 12450 (0.0007) -[2026-06-07 02:16:46,364][324535] Updated weights for policy 0, policy_version 12461 (0.0007) -[2026-06-07 02:16:46,559][324535] Updated weights for policy 0, policy_version 12471 (0.0007) -[2026-06-07 02:16:47,343][324535] Updated weights for policy 0, policy_version 12481 (0.0007) -[2026-06-07 02:16:47,576][324535] Updated weights for policy 0, policy_version 12492 (0.0007) -[2026-06-07 02:16:47,779][324535] Updated weights for policy 0, policy_version 12502 (0.0007) -[2026-06-07 02:16:47,980][324535] Updated weights for policy 0, policy_version 12512 (0.0007) -[2026-06-07 02:16:48,178][324535] Updated weights for policy 0, policy_version 12522 (0.0007) -[2026-06-07 02:16:48,383][324535] Updated weights for policy 0, policy_version 12532 (0.0007) -[2026-06-07 02:16:49,144][324535] Updated weights for policy 0, policy_version 12542 (0.0007) -[2026-06-07 02:16:49,340][324535] Updated weights for policy 0, policy_version 12552 (0.0007) -[2026-06-07 02:16:49,572][324535] Updated weights for policy 0, policy_version 12563 (0.0007) -[2026-06-07 02:16:49,791][324535] Updated weights for policy 0, policy_version 12574 (0.0007) -[2026-06-07 02:16:50,011][324535] Updated weights for policy 0, policy_version 12585 (0.0007) -[2026-06-07 02:16:50,224][324535] Updated weights for policy 0, policy_version 12595 (0.0007) -[2026-06-07 02:16:50,983][324535] Updated weights for policy 0, policy_version 12605 (0.0007) -[2026-06-07 02:16:51,010][321787] Fps is (10 sec: 16383.9, 60 sec: 17476.2, 300 sec: 17106.0). Total num frames: 6455296. Throughput: 0: 17371.0. Samples: 6435200. Policy #0 lag: (min: 63.0, avg: 78.4, max: 127.0) -[2026-06-07 02:16:51,011][321787] Avg episode reward: [(0, '378.358')] -[2026-06-07 02:16:51,180][324535] Updated weights for policy 0, policy_version 12615 (0.0007) -[2026-06-07 02:16:51,387][324535] Updated weights for policy 0, policy_version 12625 (0.0007) -[2026-06-07 02:16:51,587][324535] Updated weights for policy 0, policy_version 12636 (0.0007) -[2026-06-07 02:16:51,848][324535] Updated weights for policy 0, policy_version 12647 (0.0007) -[2026-06-07 02:16:52,053][324535] Updated weights for policy 0, policy_version 12657 (0.0007) -[2026-06-07 02:16:52,192][324273] Saving new best policy, reward=378.358! -[2026-06-07 02:16:52,807][324535] Updated weights for policy 0, policy_version 12667 (0.0007) -[2026-06-07 02:16:53,003][324535] Updated weights for policy 0, policy_version 12677 (0.0007) -[2026-06-07 02:16:53,216][324535] Updated weights for policy 0, policy_version 12687 (0.0007) -[2026-06-07 02:16:53,426][324535] Updated weights for policy 0, policy_version 12697 (0.0007) -[2026-06-07 02:16:53,650][324535] Updated weights for policy 0, policy_version 12707 (0.0007) -[2026-06-07 02:16:53,865][324535] Updated weights for policy 0, policy_version 12717 (0.0007) -[2026-06-07 02:16:54,065][324535] Updated weights for policy 0, policy_version 12727 (0.0007) -[2026-06-07 02:16:54,848][324535] Updated weights for policy 0, policy_version 12737 (0.0007) -[2026-06-07 02:16:55,029][324535] Updated weights for policy 0, policy_version 12747 (0.0007) -[2026-06-07 02:16:55,237][324535] Updated weights for policy 0, policy_version 12757 (0.0007) -[2026-06-07 02:16:55,444][324535] Updated weights for policy 0, policy_version 12767 (0.0007) -[2026-06-07 02:16:55,648][324535] Updated weights for policy 0, policy_version 12777 (0.0007) -[2026-06-07 02:16:55,840][324535] Updated weights for policy 0, policy_version 12787 (0.0007) -[2026-06-07 02:16:56,010][321787] Fps is (10 sec: 19660.7, 60 sec: 17476.2, 300 sec: 17217.1). Total num frames: 6553600. Throughput: 0: 17385.2. Samples: 6540416. Policy #0 lag: (min: 63.0, avg: 78.2, max: 127.0) -[2026-06-07 02:16:56,011][321787] Avg episode reward: [(0, '384.911')] -[2026-06-07 02:16:56,015][324273] Saving new best policy, reward=384.911! -[2026-06-07 02:16:56,617][324535] Updated weights for policy 0, policy_version 12797 (0.0007) -[2026-06-07 02:16:56,814][324535] Updated weights for policy 0, policy_version 12807 (0.0007) -[2026-06-07 02:16:57,024][324535] Updated weights for policy 0, policy_version 12817 (0.0007) -[2026-06-07 02:16:57,236][324535] Updated weights for policy 0, policy_version 12827 (0.0007) -[2026-06-07 02:16:57,455][324535] Updated weights for policy 0, policy_version 12838 (0.0007) -[2026-06-07 02:16:57,666][324535] Updated weights for policy 0, policy_version 12848 (0.0007) -[2026-06-07 02:16:58,425][324535] Updated weights for policy 0, policy_version 12858 (0.0007) -[2026-06-07 02:16:58,615][324535] Updated weights for policy 0, policy_version 12868 (0.0007) -[2026-06-07 02:16:58,820][324535] Updated weights for policy 0, policy_version 12878 (0.0007) -[2026-06-07 02:16:59,016][324535] Updated weights for policy 0, policy_version 12888 (0.0007) -[2026-06-07 02:16:59,243][324535] Updated weights for policy 0, policy_version 12899 (0.0007) -[2026-06-07 02:16:59,447][324535] Updated weights for policy 0, policy_version 12909 (0.0007) -[2026-06-07 02:16:59,647][324535] Updated weights for policy 0, policy_version 12919 (0.0007) -[2026-06-07 02:17:00,405][324535] Updated weights for policy 0, policy_version 12929 (0.0007) -[2026-06-07 02:17:00,616][324535] Updated weights for policy 0, policy_version 12939 (0.0007) -[2026-06-07 02:17:00,830][324535] Updated weights for policy 0, policy_version 12949 (0.0007) -[2026-06-07 02:17:01,010][321787] Fps is (10 sec: 16384.2, 60 sec: 17476.3, 300 sec: 17106.0). Total num frames: 6619136. Throughput: 0: 17396.6. Samples: 6653056. Policy #0 lag: (min: 63.0, avg: 78.2, max: 127.0) -[2026-06-07 02:17:01,011][321787] Avg episode reward: [(0, '405.430')] -[2026-06-07 02:17:01,038][324535] Updated weights for policy 0, policy_version 12959 (0.0007) -[2026-06-07 02:17:01,246][324535] Updated weights for policy 0, policy_version 12969 (0.0007) -[2026-06-07 02:17:01,459][324535] Updated weights for policy 0, policy_version 12979 (0.0007) -[2026-06-07 02:17:01,560][324273] Saving new best policy, reward=405.430! -[2026-06-07 02:17:02,226][324535] Updated weights for policy 0, policy_version 12989 (0.0007) -[2026-06-07 02:17:02,440][324535] Updated weights for policy 0, policy_version 12999 (0.0007) -[2026-06-07 02:17:02,639][324535] Updated weights for policy 0, policy_version 13009 (0.0007) -[2026-06-07 02:17:02,842][324535] Updated weights for policy 0, policy_version 13019 (0.0007) -[2026-06-07 02:17:03,050][324535] Updated weights for policy 0, policy_version 13029 (0.0007) -[2026-06-07 02:17:03,262][324535] Updated weights for policy 0, policy_version 13040 (0.0007) -[2026-06-07 02:17:04,041][324535] Updated weights for policy 0, policy_version 13050 (0.0007) -[2026-06-07 02:17:04,259][324535] Updated weights for policy 0, policy_version 13061 (0.0006) -[2026-06-07 02:17:04,476][324535] Updated weights for policy 0, policy_version 13071 (0.0007) -[2026-06-07 02:17:04,675][324535] Updated weights for policy 0, policy_version 13081 (0.0007) -[2026-06-07 02:17:04,886][324535] Updated weights for policy 0, policy_version 13091 (0.0007) -[2026-06-07 02:17:05,094][324535] Updated weights for policy 0, policy_version 13101 (0.0007) -[2026-06-07 02:17:05,314][324535] Updated weights for policy 0, policy_version 13111 (0.0007) -[2026-06-07 02:17:06,010][321787] Fps is (10 sec: 16384.1, 60 sec: 17476.3, 300 sec: 17217.1). Total num frames: 6717440. Throughput: 0: 17427.9. Samples: 6697984. Policy #0 lag: (min: 63.0, avg: 78.2, max: 127.0) -[2026-06-07 02:17:06,011][321787] Avg episode reward: [(0, '412.102')] -[2026-06-07 02:17:06,045][324535] Updated weights for policy 0, policy_version 13121 (0.0006) -[2026-06-07 02:17:06,251][324535] Updated weights for policy 0, policy_version 13131 (0.0008) -[2026-06-07 02:17:06,466][324535] Updated weights for policy 0, policy_version 13141 (0.0007) -[2026-06-07 02:17:06,664][324535] Updated weights for policy 0, policy_version 13151 (0.0007) -[2026-06-07 02:17:06,863][324535] Updated weights for policy 0, policy_version 13161 (0.0007) -[2026-06-07 02:17:07,055][324535] Updated weights for policy 0, policy_version 13171 (0.0007) -[2026-06-07 02:17:07,149][324273] Saving new best policy, reward=412.102! -[2026-06-07 02:17:07,812][324535] Updated weights for policy 0, policy_version 13181 (0.0007) -[2026-06-07 02:17:08,019][324535] Updated weights for policy 0, policy_version 13191 (0.0007) -[2026-06-07 02:17:08,255][324535] Updated weights for policy 0, policy_version 13202 (0.0007) -[2026-06-07 02:17:08,470][324535] Updated weights for policy 0, policy_version 13212 (0.0007) -[2026-06-07 02:17:08,685][324535] Updated weights for policy 0, policy_version 13222 (0.0007) -[2026-06-07 02:17:08,887][324535] Updated weights for policy 0, policy_version 13232 (0.0007) -[2026-06-07 02:17:09,653][324535] Updated weights for policy 0, policy_version 13242 (0.0010) -[2026-06-07 02:17:09,855][324535] Updated weights for policy 0, policy_version 13252 (0.0006) -[2026-06-07 02:17:10,060][324535] Updated weights for policy 0, policy_version 13262 (0.0007) -[2026-06-07 02:17:10,275][324535] Updated weights for policy 0, policy_version 13272 (0.0007) -[2026-06-07 02:17:10,503][324535] Updated weights for policy 0, policy_version 13283 (0.0007) -[2026-06-07 02:17:10,704][324535] Updated weights for policy 0, policy_version 13293 (0.0007) -[2026-06-07 02:17:10,885][324535] Updated weights for policy 0, policy_version 13303 (0.0007) -[2026-06-07 02:17:11,010][321787] Fps is (10 sec: 19660.6, 60 sec: 17476.3, 300 sec: 17217.1). Total num frames: 6815744. Throughput: 0: 17499.0. Samples: 6804480. Policy #0 lag: (min: 20.0, avg: 35.5, max: 84.0) -[2026-06-07 02:17:11,011][321787] Avg episode reward: [(0, '460.678')] -[2026-06-07 02:17:11,016][324273] Saving new best policy, reward=460.678! -[2026-06-07 02:17:11,668][324535] Updated weights for policy 0, policy_version 13313 (0.0007) -[2026-06-07 02:17:11,890][324535] Updated weights for policy 0, policy_version 13323 (0.0007) -[2026-06-07 02:17:12,090][324535] Updated weights for policy 0, policy_version 13333 (0.0007) -[2026-06-07 02:17:12,300][324535] Updated weights for policy 0, policy_version 13343 (0.0007) -[2026-06-07 02:17:12,502][324535] Updated weights for policy 0, policy_version 13353 (0.0008) -[2026-06-07 02:17:12,716][324535] Updated weights for policy 0, policy_version 13363 (0.0007) -[2026-06-07 02:17:13,449][324535] Updated weights for policy 0, policy_version 13373 (0.0007) -[2026-06-07 02:17:13,653][324535] Updated weights for policy 0, policy_version 13383 (0.0007) -[2026-06-07 02:17:13,859][324535] Updated weights for policy 0, policy_version 13393 (0.0007) -[2026-06-07 02:17:14,074][324535] Updated weights for policy 0, policy_version 13404 (0.0007) -[2026-06-07 02:17:14,272][324535] Updated weights for policy 0, policy_version 13414 (0.0007) -[2026-06-07 02:17:14,468][324535] Updated weights for policy 0, policy_version 13424 (0.0007) -[2026-06-07 02:17:15,238][324535] Updated weights for policy 0, policy_version 13434 (0.0008) -[2026-06-07 02:17:15,459][324535] Updated weights for policy 0, policy_version 13444 (0.0007) -[2026-06-07 02:17:15,688][324535] Updated weights for policy 0, policy_version 13455 (0.0007) -[2026-06-07 02:17:15,875][324535] Updated weights for policy 0, policy_version 13465 (0.0007) -[2026-06-07 02:17:16,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17217.1). Total num frames: 6881280. Throughput: 0: 17476.3. Samples: 6915456. Policy #0 lag: (min: 20.0, avg: 35.5, max: 84.0) -[2026-06-07 02:17:16,011][321787] Avg episode reward: [(0, '467.730')] -[2026-06-07 02:17:16,104][324535] Updated weights for policy 0, policy_version 13475 (0.0007) -[2026-06-07 02:17:16,306][324535] Updated weights for policy 0, policy_version 13485 (0.0007) -[2026-06-07 02:17:16,525][324535] Updated weights for policy 0, policy_version 13495 (0.0007) -[2026-06-07 02:17:16,537][324273] Saving new best policy, reward=467.730! -[2026-06-07 02:17:17,306][324535] Updated weights for policy 0, policy_version 13506 (0.0007) -[2026-06-07 02:17:17,523][324535] Updated weights for policy 0, policy_version 13516 (0.0007) -[2026-06-07 02:17:17,748][324535] Updated weights for policy 0, policy_version 13526 (0.0007) -[2026-06-07 02:17:17,953][324535] Updated weights for policy 0, policy_version 13536 (0.0007) -[2026-06-07 02:17:18,164][324535] Updated weights for policy 0, policy_version 13546 (0.0007) -[2026-06-07 02:17:18,368][324535] Updated weights for policy 0, policy_version 13556 (0.0007) -[2026-06-07 02:17:19,098][324535] Updated weights for policy 0, policy_version 13566 (0.0007) -[2026-06-07 02:17:19,289][324535] Updated weights for policy 0, policy_version 13576 (0.0007) -[2026-06-07 02:17:19,497][324535] Updated weights for policy 0, policy_version 13586 (0.0007) -[2026-06-07 02:17:19,720][324535] Updated weights for policy 0, policy_version 13597 (0.0008) -[2026-06-07 02:17:19,940][324535] Updated weights for policy 0, policy_version 13607 (0.0008) -[2026-06-07 02:17:20,164][324535] Updated weights for policy 0, policy_version 13617 (0.0007) -[2026-06-07 02:17:20,915][324535] Updated weights for policy 0, policy_version 13627 (0.0007) -[2026-06-07 02:17:21,010][321787] Fps is (10 sec: 16384.2, 60 sec: 17476.3, 300 sec: 17328.2). Total num frames: 6979584. Throughput: 0: 17473.4. Samples: 6960000. Policy #0 lag: (min: 5.0, avg: 21.3, max: 69.0) -[2026-06-07 02:17:21,011][321787] Avg episode reward: [(0, '448.859')] -[2026-06-07 02:17:21,127][324535] Updated weights for policy 0, policy_version 13637 (0.0007) -[2026-06-07 02:17:21,349][324535] Updated weights for policy 0, policy_version 13647 (0.0007) -[2026-06-07 02:17:21,571][324535] Updated weights for policy 0, policy_version 13657 (0.0007) -[2026-06-07 02:17:21,780][324535] Updated weights for policy 0, policy_version 13667 (0.0007) -[2026-06-07 02:17:21,999][324535] Updated weights for policy 0, policy_version 13677 (0.0008) -[2026-06-07 02:17:22,235][324535] Updated weights for policy 0, policy_version 13687 (0.0010) -[2026-06-07 02:17:22,976][324535] Updated weights for policy 0, policy_version 13697 (0.0011) -[2026-06-07 02:17:23,178][324535] Updated weights for policy 0, policy_version 13707 (0.0011) -[2026-06-07 02:17:23,383][324535] Updated weights for policy 0, policy_version 13717 (0.0010) -[2026-06-07 02:17:23,588][324535] Updated weights for policy 0, policy_version 13727 (0.0011) -[2026-06-07 02:17:23,815][324535] Updated weights for policy 0, policy_version 13737 (0.0010) -[2026-06-07 02:17:24,017][324535] Updated weights for policy 0, policy_version 13747 (0.0010) -[2026-06-07 02:17:24,790][324535] Updated weights for policy 0, policy_version 13757 (0.0011) -[2026-06-07 02:17:25,008][324535] Updated weights for policy 0, policy_version 13767 (0.0011) -[2026-06-07 02:17:25,221][324535] Updated weights for policy 0, policy_version 13777 (0.0011) -[2026-06-07 02:17:25,427][324535] Updated weights for policy 0, policy_version 13787 (0.0011) -[2026-06-07 02:17:25,631][324535] Updated weights for policy 0, policy_version 13797 (0.0011) -[2026-06-07 02:17:25,830][324535] Updated weights for policy 0, policy_version 13807 (0.0011) -[2026-06-07 02:17:26,010][321787] Fps is (10 sec: 19660.8, 60 sec: 17476.3, 300 sec: 17328.2). Total num frames: 7077888. Throughput: 0: 17473.4. Samples: 7063936. Policy #0 lag: (min: 5.0, avg: 21.3, max: 69.0) -[2026-06-07 02:17:26,011][321787] Avg episode reward: [(0, '419.108')] -[2026-06-07 02:17:26,627][324535] Updated weights for policy 0, policy_version 13818 (0.0008) -[2026-06-07 02:17:26,864][324535] Updated weights for policy 0, policy_version 13829 (0.0007) -[2026-06-07 02:17:27,095][324535] Updated weights for policy 0, policy_version 13839 (0.0008) -[2026-06-07 02:17:27,309][324535] Updated weights for policy 0, policy_version 13849 (0.0007) -[2026-06-07 02:17:27,521][324535] Updated weights for policy 0, policy_version 13859 (0.0007) -[2026-06-07 02:17:27,724][324535] Updated weights for policy 0, policy_version 13869 (0.0007) -[2026-06-07 02:17:27,959][324535] Updated weights for policy 0, policy_version 13880 (0.0007) -[2026-06-07 02:17:28,700][324535] Updated weights for policy 0, policy_version 13890 (0.0007) -[2026-06-07 02:17:28,919][324535] Updated weights for policy 0, policy_version 13900 (0.0007) -[2026-06-07 02:17:29,131][324535] Updated weights for policy 0, policy_version 13910 (0.0007) -[2026-06-07 02:17:29,327][324535] Updated weights for policy 0, policy_version 13920 (0.0007) -[2026-06-07 02:17:29,525][324535] Updated weights for policy 0, policy_version 13930 (0.0007) -[2026-06-07 02:17:29,718][324535] Updated weights for policy 0, policy_version 13940 (0.0007) -[2026-06-07 02:17:30,481][324535] Updated weights for policy 0, policy_version 13951 (0.0007) -[2026-06-07 02:17:30,685][324535] Updated weights for policy 0, policy_version 13961 (0.0007) -[2026-06-07 02:17:30,883][324535] Updated weights for policy 0, policy_version 13971 (0.0007) -[2026-06-07 02:17:31,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17328.2). Total num frames: 7143424. Throughput: 0: 17456.4. Samples: 7176704. Policy #0 lag: (min: 63.0, avg: 78.7, max: 127.0) -[2026-06-07 02:17:31,011][321787] Avg episode reward: [(0, '428.717')] -[2026-06-07 02:17:31,105][324535] Updated weights for policy 0, policy_version 13981 (0.0007) -[2026-06-07 02:17:31,306][324535] Updated weights for policy 0, policy_version 13991 (0.0007) -[2026-06-07 02:17:31,514][324535] Updated weights for policy 0, policy_version 14001 (0.0007) -[2026-06-07 02:17:32,278][324535] Updated weights for policy 0, policy_version 14011 (0.0007) -[2026-06-07 02:17:32,490][324535] Updated weights for policy 0, policy_version 14021 (0.0007) -[2026-06-07 02:17:32,672][324535] Updated weights for policy 0, policy_version 14031 (0.0007) -[2026-06-07 02:17:32,880][324535] Updated weights for policy 0, policy_version 14041 (0.0007) -[2026-06-07 02:17:33,084][324535] Updated weights for policy 0, policy_version 14051 (0.0007) -[2026-06-07 02:17:33,306][324535] Updated weights for policy 0, policy_version 14061 (0.0007) -[2026-06-07 02:17:33,520][324535] Updated weights for policy 0, policy_version 14071 (0.0007) -[2026-06-07 02:17:34,289][324535] Updated weights for policy 0, policy_version 14081 (0.0007) -[2026-06-07 02:17:34,517][324535] Updated weights for policy 0, policy_version 14091 (0.0007) -[2026-06-07 02:17:34,750][324535] Updated weights for policy 0, policy_version 14103 (0.0007) -[2026-06-07 02:17:34,954][324535] Updated weights for policy 0, policy_version 14113 (0.0007) -[2026-06-07 02:17:35,164][324535] Updated weights for policy 0, policy_version 14123 (0.0007) -[2026-06-07 02:17:35,374][324535] Updated weights for policy 0, policy_version 14133 (0.0007) -[2026-06-07 02:17:36,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17328.2). Total num frames: 7241728. Throughput: 0: 17490.5. Samples: 7222272. Policy #0 lag: (min: 63.0, avg: 78.7, max: 127.0) -[2026-06-07 02:17:36,011][321787] Avg episode reward: [(0, '439.783')] -[2026-06-07 02:17:36,103][324535] Updated weights for policy 0, policy_version 14143 (0.0007) -[2026-06-07 02:17:36,308][324535] Updated weights for policy 0, policy_version 14153 (0.0007) -[2026-06-07 02:17:36,522][324535] Updated weights for policy 0, policy_version 14164 (0.0007) -[2026-06-07 02:17:36,721][324535] Updated weights for policy 0, policy_version 14174 (0.0007) -[2026-06-07 02:17:36,932][324535] Updated weights for policy 0, policy_version 14184 (0.0006) -[2026-06-07 02:17:37,144][324535] Updated weights for policy 0, policy_version 14194 (0.0006) -[2026-06-07 02:17:37,907][324535] Updated weights for policy 0, policy_version 14204 (0.0007) -[2026-06-07 02:17:38,117][324535] Updated weights for policy 0, policy_version 14214 (0.0007) -[2026-06-07 02:17:38,320][324535] Updated weights for policy 0, policy_version 14224 (0.0007) -[2026-06-07 02:17:38,541][324535] Updated weights for policy 0, policy_version 14234 (0.0007) -[2026-06-07 02:17:38,727][324535] Updated weights for policy 0, policy_version 14244 (0.0007) -[2026-06-07 02:17:38,931][324535] Updated weights for policy 0, policy_version 14254 (0.0007) -[2026-06-07 02:17:39,138][324535] Updated weights for policy 0, policy_version 14264 (0.0007) -[2026-06-07 02:17:39,900][324535] Updated weights for policy 0, policy_version 14274 (0.0007) -[2026-06-07 02:17:40,120][324535] Updated weights for policy 0, policy_version 14284 (0.0007) -[2026-06-07 02:17:40,326][324535] Updated weights for policy 0, policy_version 14294 (0.0007) -[2026-06-07 02:17:40,531][324535] Updated weights for policy 0, policy_version 14304 (0.0007) -[2026-06-07 02:17:40,739][324535] Updated weights for policy 0, policy_version 14314 (0.0007) -[2026-06-07 02:17:40,951][324535] Updated weights for policy 0, policy_version 14324 (0.0007) -[2026-06-07 02:17:41,010][321787] Fps is (10 sec: 16383.9, 60 sec: 16930.1, 300 sec: 17328.2). Total num frames: 7307264. Throughput: 0: 17493.4. Samples: 7327616. Policy #0 lag: (min: 63.0, avg: 78.7, max: 127.0) -[2026-06-07 02:17:41,011][321787] Avg episode reward: [(0, '431.268')] -[2026-06-07 02:17:41,686][324535] Updated weights for policy 0, policy_version 14334 (0.0007) -[2026-06-07 02:17:41,887][324535] Updated weights for policy 0, policy_version 14344 (0.0007) -[2026-06-07 02:17:42,113][324535] Updated weights for policy 0, policy_version 14354 (0.0007) -[2026-06-07 02:17:42,319][324535] Updated weights for policy 0, policy_version 14364 (0.0007) -[2026-06-07 02:17:42,557][324535] Updated weights for policy 0, policy_version 14375 (0.0007) -[2026-06-07 02:17:42,752][324535] Updated weights for policy 0, policy_version 14385 (0.0007) -[2026-06-07 02:17:43,515][324535] Updated weights for policy 0, policy_version 14395 (0.0007) -[2026-06-07 02:17:43,726][324535] Updated weights for policy 0, policy_version 14405 (0.0007) -[2026-06-07 02:17:43,918][324535] Updated weights for policy 0, policy_version 14415 (0.0007) -[2026-06-07 02:17:44,168][324535] Updated weights for policy 0, policy_version 14426 (0.0007) -[2026-06-07 02:17:44,378][324535] Updated weights for policy 0, policy_version 14436 (0.0007) -[2026-06-07 02:17:44,608][324535] Updated weights for policy 0, policy_version 14447 (0.0006) -[2026-06-07 02:17:45,371][324535] Updated weights for policy 0, policy_version 14457 (0.0007) -[2026-06-07 02:17:45,586][324535] Updated weights for policy 0, policy_version 14467 (0.0007) -[2026-06-07 02:17:45,810][324535] Updated weights for policy 0, policy_version 14477 (0.0007) -[2026-06-07 02:17:46,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17328.2). Total num frames: 7405568. Throughput: 0: 17496.2. Samples: 7440384. Policy #0 lag: (min: 63.0, avg: 78.2, max: 127.0) -[2026-06-07 02:17:46,011][321787] Avg episode reward: [(0, '440.785')] -[2026-06-07 02:17:46,025][324535] Updated weights for policy 0, policy_version 14487 (0.0007) -[2026-06-07 02:17:46,228][324535] Updated weights for policy 0, policy_version 14497 (0.0007) -[2026-06-07 02:17:46,452][324535] Updated weights for policy 0, policy_version 14507 (0.0007) -[2026-06-07 02:17:46,673][324535] Updated weights for policy 0, policy_version 14517 (0.0007) -[2026-06-07 02:17:47,403][324535] Updated weights for policy 0, policy_version 14527 (0.0007) -[2026-06-07 02:17:47,612][324535] Updated weights for policy 0, policy_version 14537 (0.0009) -[2026-06-07 02:17:47,810][324535] Updated weights for policy 0, policy_version 14547 (0.0009) -[2026-06-07 02:17:48,018][324535] Updated weights for policy 0, policy_version 14557 (0.0007) -[2026-06-07 02:17:48,215][324535] Updated weights for policy 0, policy_version 14567 (0.0007) -[2026-06-07 02:17:48,423][324535] Updated weights for policy 0, policy_version 14577 (0.0007) -[2026-06-07 02:17:49,170][324535] Updated weights for policy 0, policy_version 14587 (0.0007) -[2026-06-07 02:17:49,377][324535] Updated weights for policy 0, policy_version 14597 (0.0009) -[2026-06-07 02:17:49,590][324535] Updated weights for policy 0, policy_version 14607 (0.0007) -[2026-06-07 02:17:49,801][324535] Updated weights for policy 0, policy_version 14617 (0.0007) -[2026-06-07 02:17:50,008][324535] Updated weights for policy 0, policy_version 14627 (0.0008) -[2026-06-07 02:17:50,206][324535] Updated weights for policy 0, policy_version 14637 (0.0007) -[2026-06-07 02:17:50,420][324535] Updated weights for policy 0, policy_version 14647 (0.0009) -[2026-06-07 02:17:51,010][321787] Fps is (10 sec: 19660.6, 60 sec: 17476.3, 300 sec: 17328.2). Total num frames: 7503872. Throughput: 0: 17479.1. Samples: 7484544. Policy #0 lag: (min: 63.0, avg: 78.2, max: 127.0) -[2026-06-07 02:17:51,011][321787] Avg episode reward: [(0, '417.892')] -[2026-06-07 02:17:51,148][324535] Updated weights for policy 0, policy_version 14657 (0.0007) -[2026-06-07 02:17:51,360][324535] Updated weights for policy 0, policy_version 14667 (0.0007) -[2026-06-07 02:17:51,572][324535] Updated weights for policy 0, policy_version 14677 (0.0007) -[2026-06-07 02:17:51,776][324535] Updated weights for policy 0, policy_version 14687 (0.0007) -[2026-06-07 02:17:51,992][324535] Updated weights for policy 0, policy_version 14697 (0.0007) -[2026-06-07 02:17:52,207][324535] Updated weights for policy 0, policy_version 14707 (0.0007) -[2026-06-07 02:17:52,950][324535] Updated weights for policy 0, policy_version 14717 (0.0007) -[2026-06-07 02:17:53,154][324535] Updated weights for policy 0, policy_version 14727 (0.0007) -[2026-06-07 02:17:53,383][324535] Updated weights for policy 0, policy_version 14738 (0.0007) -[2026-06-07 02:17:53,594][324535] Updated weights for policy 0, policy_version 14748 (0.0007) -[2026-06-07 02:17:53,796][324535] Updated weights for policy 0, policy_version 14758 (0.0007) -[2026-06-07 02:17:54,018][324535] Updated weights for policy 0, policy_version 14768 (0.0007) -[2026-06-07 02:17:54,816][324535] Updated weights for policy 0, policy_version 14780 (0.0009) -[2026-06-07 02:17:55,012][324535] Updated weights for policy 0, policy_version 14790 (0.0007) -[2026-06-07 02:17:55,228][324535] Updated weights for policy 0, policy_version 14800 (0.0007) -[2026-06-07 02:17:55,439][324535] Updated weights for policy 0, policy_version 14810 (0.0007) -[2026-06-07 02:17:55,667][324535] Updated weights for policy 0, policy_version 14820 (0.0007) -[2026-06-07 02:17:55,873][324535] Updated weights for policy 0, policy_version 14830 (0.0007) -[2026-06-07 02:17:56,010][321787] Fps is (10 sec: 16383.9, 60 sec: 16930.1, 300 sec: 17328.2). Total num frames: 7569408. Throughput: 0: 17393.8. Samples: 7587200. Policy #0 lag: (min: 63.0, avg: 78.2, max: 127.0) -[2026-06-07 02:17:56,011][321787] Avg episode reward: [(0, '428.877')] -[2026-06-07 02:17:56,087][324535] Updated weights for policy 0, policy_version 14840 (0.0007) -[2026-06-07 02:17:56,842][324535] Updated weights for policy 0, policy_version 14850 (0.0007) -[2026-06-07 02:17:57,054][324535] Updated weights for policy 0, policy_version 14860 (0.0008) -[2026-06-07 02:17:57,271][324535] Updated weights for policy 0, policy_version 14871 (0.0007) -[2026-06-07 02:17:57,482][324535] Updated weights for policy 0, policy_version 14881 (0.0007) -[2026-06-07 02:17:57,694][324535] Updated weights for policy 0, policy_version 14891 (0.0007) -[2026-06-07 02:17:57,912][324535] Updated weights for policy 0, policy_version 14901 (0.0007) -[2026-06-07 02:17:58,676][324535] Updated weights for policy 0, policy_version 14912 (0.0007) -[2026-06-07 02:17:58,906][324535] Updated weights for policy 0, policy_version 14923 (0.0007) -[2026-06-07 02:17:59,119][324535] Updated weights for policy 0, policy_version 14933 (0.0007) -[2026-06-07 02:17:59,320][324535] Updated weights for policy 0, policy_version 14943 (0.0007) -[2026-06-07 02:17:59,530][324535] Updated weights for policy 0, policy_version 14954 (0.0007) -[2026-06-07 02:17:59,776][324535] Updated weights for policy 0, policy_version 14965 (0.0007) -[2026-06-07 02:18:00,525][324535] Updated weights for policy 0, policy_version 14975 (0.0007) -[2026-06-07 02:18:00,723][324535] Updated weights for policy 0, policy_version 14985 (0.0007) -[2026-06-07 02:18:00,932][324535] Updated weights for policy 0, policy_version 14995 (0.0007) -[2026-06-07 02:18:01,010][321787] Fps is (10 sec: 16384.2, 60 sec: 17476.3, 300 sec: 17328.2). Total num frames: 7667712. Throughput: 0: 17442.1. Samples: 7700352. Policy #0 lag: (min: 38.0, avg: 53.3, max: 102.0) -[2026-06-07 02:18:01,011][321787] Avg episode reward: [(0, '439.813')] -[2026-06-07 02:18:01,138][324535] Updated weights for policy 0, policy_version 15005 (0.0007) -[2026-06-07 02:18:01,353][324535] Updated weights for policy 0, policy_version 15015 (0.0007) -[2026-06-07 02:18:01,553][324535] Updated weights for policy 0, policy_version 15025 (0.0007) -[2026-06-07 02:18:02,313][324535] Updated weights for policy 0, policy_version 15035 (0.0007) -[2026-06-07 02:18:02,516][324535] Updated weights for policy 0, policy_version 15045 (0.0007) -[2026-06-07 02:18:02,717][324535] Updated weights for policy 0, policy_version 15055 (0.0007) -[2026-06-07 02:18:02,926][324535] Updated weights for policy 0, policy_version 15065 (0.0007) -[2026-06-07 02:18:03,128][324535] Updated weights for policy 0, policy_version 15075 (0.0007) -[2026-06-07 02:18:03,331][324535] Updated weights for policy 0, policy_version 15085 (0.0007) -[2026-06-07 02:18:03,537][324535] Updated weights for policy 0, policy_version 15095 (0.0007) -[2026-06-07 02:18:04,278][324535] Updated weights for policy 0, policy_version 15105 (0.0007) -[2026-06-07 02:18:04,479][324535] Updated weights for policy 0, policy_version 15115 (0.0007) -[2026-06-07 02:18:04,674][324535] Updated weights for policy 0, policy_version 15125 (0.0007) -[2026-06-07 02:18:04,888][324535] Updated weights for policy 0, policy_version 15135 (0.0007) -[2026-06-07 02:18:05,102][324535] Updated weights for policy 0, policy_version 15145 (0.0007) -[2026-06-07 02:18:05,301][324535] Updated weights for policy 0, policy_version 15155 (0.0009) -[2026-06-07 02:18:06,010][321787] Fps is (10 sec: 19660.6, 60 sec: 17476.2, 300 sec: 17439.2). Total num frames: 7766016. Throughput: 0: 17504.6. Samples: 7747712. Policy #0 lag: (min: 38.0, avg: 53.3, max: 102.0) -[2026-06-07 02:18:06,011][321787] Avg episode reward: [(0, '466.920')] -[2026-06-07 02:18:06,037][324535] Updated weights for policy 0, policy_version 15165 (0.0009) -[2026-06-07 02:18:06,255][324535] Updated weights for policy 0, policy_version 15176 (0.0010) -[2026-06-07 02:18:06,490][324535] Updated weights for policy 0, policy_version 15187 (0.0011) -[2026-06-07 02:18:06,692][324535] Updated weights for policy 0, policy_version 15197 (0.0007) -[2026-06-07 02:18:06,911][324535] Updated weights for policy 0, policy_version 15207 (0.0007) -[2026-06-07 02:18:07,126][324535] Updated weights for policy 0, policy_version 15217 (0.0007) -[2026-06-07 02:18:07,872][324535] Updated weights for policy 0, policy_version 15227 (0.0007) -[2026-06-07 02:18:08,071][324535] Updated weights for policy 0, policy_version 15237 (0.0007) -[2026-06-07 02:18:08,275][324535] Updated weights for policy 0, policy_version 15247 (0.0007) -[2026-06-07 02:18:08,493][324535] Updated weights for policy 0, policy_version 15257 (0.0007) -[2026-06-07 02:18:08,730][324535] Updated weights for policy 0, policy_version 15268 (0.0007) -[2026-06-07 02:18:08,931][324535] Updated weights for policy 0, policy_version 15278 (0.0007) -[2026-06-07 02:18:09,124][324535] Updated weights for policy 0, policy_version 15288 (0.0007) -[2026-06-07 02:18:09,909][324535] Updated weights for policy 0, policy_version 15298 (0.0007) -[2026-06-07 02:18:10,135][324535] Updated weights for policy 0, policy_version 15308 (0.0007) -[2026-06-07 02:18:10,358][324535] Updated weights for policy 0, policy_version 15318 (0.0007) -[2026-06-07 02:18:10,570][324535] Updated weights for policy 0, policy_version 15328 (0.0007) -[2026-06-07 02:18:10,783][324535] Updated weights for policy 0, policy_version 15338 (0.0007) -[2026-06-07 02:18:10,995][324535] Updated weights for policy 0, policy_version 15348 (0.0007) -[2026-06-07 02:18:11,010][321787] Fps is (10 sec: 16384.1, 60 sec: 16930.2, 300 sec: 17328.2). Total num frames: 7831552. Throughput: 0: 17499.0. Samples: 7851392. Policy #0 lag: (min: 63.0, avg: 78.1, max: 127.0) -[2026-06-07 02:18:11,011][321787] Avg episode reward: [(0, '478.011')] -[2026-06-07 02:18:11,086][324273] Saving new best policy, reward=478.011! -[2026-06-07 02:18:11,744][324535] Updated weights for policy 0, policy_version 15358 (0.0007) -[2026-06-07 02:18:11,958][324535] Updated weights for policy 0, policy_version 15368 (0.0007) -[2026-06-07 02:18:12,222][324535] Updated weights for policy 0, policy_version 15379 (0.0007) -[2026-06-07 02:18:12,436][324535] Updated weights for policy 0, policy_version 15389 (0.0011) -[2026-06-07 02:18:12,616][324535] Updated weights for policy 0, policy_version 15399 (0.0009) -[2026-06-07 02:18:12,804][324535] Updated weights for policy 0, policy_version 15409 (0.0007) -[2026-06-07 02:18:13,577][324535] Updated weights for policy 0, policy_version 15419 (0.0007) -[2026-06-07 02:18:13,795][324535] Updated weights for policy 0, policy_version 15429 (0.0007) -[2026-06-07 02:18:13,994][324535] Updated weights for policy 0, policy_version 15439 (0.0007) -[2026-06-07 02:18:14,190][324535] Updated weights for policy 0, policy_version 15449 (0.0007) -[2026-06-07 02:18:14,406][324535] Updated weights for policy 0, policy_version 15459 (0.0007) -[2026-06-07 02:18:14,625][324535] Updated weights for policy 0, policy_version 15469 (0.0007) -[2026-06-07 02:18:14,824][324535] Updated weights for policy 0, policy_version 15479 (0.0007) -[2026-06-07 02:18:15,578][324535] Updated weights for policy 0, policy_version 15489 (0.0007) -[2026-06-07 02:18:15,789][324535] Updated weights for policy 0, policy_version 15500 (0.0006) -[2026-06-07 02:18:16,004][324535] Updated weights for policy 0, policy_version 15510 (0.0007) -[2026-06-07 02:18:16,010][321787] Fps is (10 sec: 16383.9, 60 sec: 17476.2, 300 sec: 17439.2). Total num frames: 7929856. Throughput: 0: 17481.9. Samples: 7963392. Policy #0 lag: (min: 63.0, avg: 78.1, max: 127.0) -[2026-06-07 02:18:16,011][321787] Avg episode reward: [(0, '491.346')] -[2026-06-07 02:18:16,235][324535] Updated weights for policy 0, policy_version 15520 (0.0007) -[2026-06-07 02:18:16,424][324535] Updated weights for policy 0, policy_version 15530 (0.0007) -[2026-06-07 02:18:16,664][324535] Updated weights for policy 0, policy_version 15541 (0.0006) -[2026-06-07 02:18:16,714][324273] Saving new best policy, reward=491.346! -[2026-06-07 02:18:17,419][324535] Updated weights for policy 0, policy_version 15552 (0.0007) -[2026-06-07 02:18:17,629][324535] Updated weights for policy 0, policy_version 15562 (0.0007) -[2026-06-07 02:18:17,830][324535] Updated weights for policy 0, policy_version 15572 (0.0007) -[2026-06-07 02:18:18,038][324535] Updated weights for policy 0, policy_version 15582 (0.0007) -[2026-06-07 02:18:18,238][324535] Updated weights for policy 0, policy_version 15592 (0.0007) -[2026-06-07 02:18:18,438][324535] Updated weights for policy 0, policy_version 15602 (0.0007) -[2026-06-07 02:18:19,242][324535] Updated weights for policy 0, policy_version 15612 (0.0007) -[2026-06-07 02:18:19,439][324535] Updated weights for policy 0, policy_version 15622 (0.0007) -[2026-06-07 02:18:19,661][324535] Updated weights for policy 0, policy_version 15632 (0.0007) -[2026-06-07 02:18:19,869][324535] Updated weights for policy 0, policy_version 15642 (0.0007) -[2026-06-07 02:18:20,104][324535] Updated weights for policy 0, policy_version 15653 (0.0007) -[2026-06-07 02:18:20,302][324535] Updated weights for policy 0, policy_version 15663 (0.0007) -[2026-06-07 02:18:21,010][321787] Fps is (10 sec: 19660.7, 60 sec: 17476.2, 300 sec: 17439.2). Total num frames: 8028160. Throughput: 0: 17516.1. Samples: 8010496. Policy #0 lag: (min: 63.0, avg: 78.1, max: 127.0) -[2026-06-07 02:18:21,011][321787] Avg episode reward: [(0, '492.868')] -[2026-06-07 02:18:21,049][324535] Updated weights for policy 0, policy_version 15673 (0.0007) -[2026-06-07 02:18:21,260][324535] Updated weights for policy 0, policy_version 15683 (0.0007) -[2026-06-07 02:18:21,466][324535] Updated weights for policy 0, policy_version 15693 (0.0007) -[2026-06-07 02:18:21,674][324535] Updated weights for policy 0, policy_version 15703 (0.0007) -[2026-06-07 02:18:21,872][324535] Updated weights for policy 0, policy_version 15713 (0.0007) -[2026-06-07 02:18:22,064][324535] Updated weights for policy 0, policy_version 15723 (0.0007) -[2026-06-07 02:18:22,268][324535] Updated weights for policy 0, policy_version 15733 (0.0007) -[2026-06-07 02:18:22,318][324273] Saving new best policy, reward=492.868! -[2026-06-07 02:18:23,065][324535] Updated weights for policy 0, policy_version 15744 (0.0007) -[2026-06-07 02:18:23,291][324535] Updated weights for policy 0, policy_version 15754 (0.0007) -[2026-06-07 02:18:23,514][324535] Updated weights for policy 0, policy_version 15765 (0.0007) -[2026-06-07 02:18:23,721][324535] Updated weights for policy 0, policy_version 15775 (0.0007) -[2026-06-07 02:18:23,928][324535] Updated weights for policy 0, policy_version 15785 (0.0007) -[2026-06-07 02:18:24,120][324535] Updated weights for policy 0, policy_version 15795 (0.0007) -[2026-06-07 02:18:24,907][324535] Updated weights for policy 0, policy_version 15805 (0.0007) -[2026-06-07 02:18:25,114][324535] Updated weights for policy 0, policy_version 15815 (0.0007) -[2026-06-07 02:18:25,313][324535] Updated weights for policy 0, policy_version 15825 (0.0006) -[2026-06-07 02:18:25,540][324535] Updated weights for policy 0, policy_version 15836 (0.0007) -[2026-06-07 02:18:25,741][324535] Updated weights for policy 0, policy_version 15846 (0.0007) -[2026-06-07 02:18:25,952][324535] Updated weights for policy 0, policy_version 15856 (0.0007) -[2026-06-07 02:18:26,010][321787] Fps is (10 sec: 16384.2, 60 sec: 16930.1, 300 sec: 17328.2). Total num frames: 8093696. Throughput: 0: 17470.5. Samples: 8113792. Policy #0 lag: (min: 20.0, avg: 36.5, max: 84.0) -[2026-06-07 02:18:26,011][321787] Avg episode reward: [(0, '509.584')] -[2026-06-07 02:18:26,109][324273] Saving new best policy, reward=509.584! -[2026-06-07 02:18:26,722][324535] Updated weights for policy 0, policy_version 15866 (0.0007) -[2026-06-07 02:18:26,953][324535] Updated weights for policy 0, policy_version 15877 (0.0007) -[2026-06-07 02:18:27,165][324535] Updated weights for policy 0, policy_version 15887 (0.0007) -[2026-06-07 02:18:27,360][324535] Updated weights for policy 0, policy_version 15897 (0.0007) -[2026-06-07 02:18:27,591][324535] Updated weights for policy 0, policy_version 15908 (0.0007) -[2026-06-07 02:18:27,796][324535] Updated weights for policy 0, policy_version 15918 (0.0007) -[2026-06-07 02:18:27,973][324535] Updated weights for policy 0, policy_version 15928 (0.0007) -[2026-06-07 02:18:28,769][324535] Updated weights for policy 0, policy_version 15938 (0.0012) -[2026-06-07 02:18:28,962][324535] Updated weights for policy 0, policy_version 15948 (0.0011) -[2026-06-07 02:18:29,176][324535] Updated weights for policy 0, policy_version 15958 (0.0008) -[2026-06-07 02:18:29,383][324535] Updated weights for policy 0, policy_version 15968 (0.0007) -[2026-06-07 02:18:29,604][324535] Updated weights for policy 0, policy_version 15979 (0.0007) -[2026-06-07 02:18:29,809][324535] Updated weights for policy 0, policy_version 15989 (0.0007) -[2026-06-07 02:18:30,576][324535] Updated weights for policy 0, policy_version 15999 (0.0007) -[2026-06-07 02:18:30,791][324535] Updated weights for policy 0, policy_version 16009 (0.0007) -[2026-06-07 02:18:31,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 8192000. Throughput: 0: 17490.5. Samples: 8227456. Policy #0 lag: (min: 20.0, avg: 36.5, max: 84.0) -[2026-06-07 02:18:31,011][321787] Avg episode reward: [(0, '541.264')] -[2026-06-07 02:18:31,017][324535] Updated weights for policy 0, policy_version 16020 (0.0007) -[2026-06-07 02:18:31,228][324535] Updated weights for policy 0, policy_version 16030 (0.0007) -[2026-06-07 02:18:31,426][324535] Updated weights for policy 0, policy_version 16040 (0.0007) -[2026-06-07 02:18:31,656][324535] Updated weights for policy 0, policy_version 16050 (0.0007) -[2026-06-07 02:18:31,775][324273] Saving new best policy, reward=541.264! -[2026-06-07 02:18:32,455][324535] Updated weights for policy 0, policy_version 16060 (0.0007) -[2026-06-07 02:18:32,662][324535] Updated weights for policy 0, policy_version 16070 (0.0007) -[2026-06-07 02:18:32,862][324535] Updated weights for policy 0, policy_version 16080 (0.0007) -[2026-06-07 02:18:33,071][324535] Updated weights for policy 0, policy_version 16090 (0.0007) -[2026-06-07 02:18:33,274][324535] Updated weights for policy 0, policy_version 16100 (0.0007) -[2026-06-07 02:18:33,465][324535] Updated weights for policy 0, policy_version 16110 (0.0006) -[2026-06-07 02:18:33,671][324535] Updated weights for policy 0, policy_version 16120 (0.0007) -[2026-06-07 02:18:34,454][324535] Updated weights for policy 0, policy_version 16130 (0.0007) -[2026-06-07 02:18:34,656][324535] Updated weights for policy 0, policy_version 16140 (0.0007) -[2026-06-07 02:18:34,885][324535] Updated weights for policy 0, policy_version 16151 (0.0007) -[2026-06-07 02:18:35,103][324535] Updated weights for policy 0, policy_version 16161 (0.0007) -[2026-06-07 02:18:35,303][324535] Updated weights for policy 0, policy_version 16171 (0.0007) -[2026-06-07 02:18:35,513][324535] Updated weights for policy 0, policy_version 16181 (0.0007) -[2026-06-07 02:18:36,010][321787] Fps is (10 sec: 19661.1, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 8290304. Throughput: 0: 17581.6. Samples: 8275712. Policy #0 lag: (min: 63.0, avg: 77.8, max: 127.0) -[2026-06-07 02:18:36,011][321787] Avg episode reward: [(0, '549.499')] -[2026-06-07 02:18:36,252][324535] Updated weights for policy 0, policy_version 16191 (0.0008) -[2026-06-07 02:18:36,451][324535] Updated weights for policy 0, policy_version 16201 (0.0007) -[2026-06-07 02:18:36,671][324535] Updated weights for policy 0, policy_version 16211 (0.0009) -[2026-06-07 02:18:36,871][324535] Updated weights for policy 0, policy_version 16221 (0.0011) -[2026-06-07 02:18:37,083][324535] Updated weights for policy 0, policy_version 16231 (0.0009) -[2026-06-07 02:18:37,285][324535] Updated weights for policy 0, policy_version 16241 (0.0007) -[2026-06-07 02:18:37,420][324273] Saving new best policy, reward=549.499! -[2026-06-07 02:18:38,070][324535] Updated weights for policy 0, policy_version 16251 (0.0009) -[2026-06-07 02:18:38,291][324535] Updated weights for policy 0, policy_version 16261 (0.0011) -[2026-06-07 02:18:38,485][324535] Updated weights for policy 0, policy_version 16271 (0.0010) -[2026-06-07 02:18:38,727][324535] Updated weights for policy 0, policy_version 16283 (0.0007) -[2026-06-07 02:18:38,922][324535] Updated weights for policy 0, policy_version 16293 (0.0007) -[2026-06-07 02:18:39,125][324535] Updated weights for policy 0, policy_version 16303 (0.0008) -[2026-06-07 02:18:39,927][324535] Updated weights for policy 0, policy_version 16313 (0.0010) -[2026-06-07 02:18:40,135][324535] Updated weights for policy 0, policy_version 16324 (0.0011) -[2026-06-07 02:18:40,346][324535] Updated weights for policy 0, policy_version 16334 (0.0007) -[2026-06-07 02:18:40,568][324535] Updated weights for policy 0, policy_version 16344 (0.0006) -[2026-06-07 02:18:40,780][324535] Updated weights for policy 0, policy_version 16354 (0.0006) -[2026-06-07 02:18:41,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17328.2). Total num frames: 8355840. Throughput: 0: 17570.2. Samples: 8377856. Policy #0 lag: (min: 63.0, avg: 77.8, max: 127.0) -[2026-06-07 02:18:41,011][321787] Avg episode reward: [(0, '568.643')] -[2026-06-07 02:18:41,029][324535] Updated weights for policy 0, policy_version 16366 (0.0006) -[2026-06-07 02:18:41,234][324273] Saving new best policy, reward=568.643! -[2026-06-07 02:18:41,847][324535] Updated weights for policy 0, policy_version 16377 (0.0006) -[2026-06-07 02:18:42,056][324535] Updated weights for policy 0, policy_version 16387 (0.0006) -[2026-06-07 02:18:42,272][324535] Updated weights for policy 0, policy_version 16397 (0.0006) -[2026-06-07 02:18:42,481][324535] Updated weights for policy 0, policy_version 16407 (0.0007) -[2026-06-07 02:18:42,703][324535] Updated weights for policy 0, policy_version 16417 (0.0006) -[2026-06-07 02:18:42,913][324535] Updated weights for policy 0, policy_version 16427 (0.0007) -[2026-06-07 02:18:43,123][324535] Updated weights for policy 0, policy_version 16437 (0.0007) -[2026-06-07 02:18:43,885][324535] Updated weights for policy 0, policy_version 16447 (0.0007) -[2026-06-07 02:18:44,089][324535] Updated weights for policy 0, policy_version 16457 (0.0007) -[2026-06-07 02:18:44,299][324535] Updated weights for policy 0, policy_version 16467 (0.0007) -[2026-06-07 02:18:44,518][324535] Updated weights for policy 0, policy_version 16477 (0.0007) -[2026-06-07 02:18:44,733][324535] Updated weights for policy 0, policy_version 16487 (0.0007) -[2026-06-07 02:18:44,928][324535] Updated weights for policy 0, policy_version 16497 (0.0007) -[2026-06-07 02:18:45,695][324535] Updated weights for policy 0, policy_version 16507 (0.0007) -[2026-06-07 02:18:45,903][324535] Updated weights for policy 0, policy_version 16517 (0.0007) -[2026-06-07 02:18:46,010][321787] Fps is (10 sec: 16383.9, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 8454144. Throughput: 0: 17442.1. Samples: 8485248. Policy #0 lag: (min: 63.0, avg: 77.8, max: 127.0) -[2026-06-07 02:18:46,011][321787] Avg episode reward: [(0, '569.445')] -[2026-06-07 02:18:46,126][324535] Updated weights for policy 0, policy_version 16527 (0.0007) -[2026-06-07 02:18:46,311][324535] Updated weights for policy 0, policy_version 16537 (0.0006) -[2026-06-07 02:18:46,500][324535] Updated weights for policy 0, policy_version 16547 (0.0007) -[2026-06-07 02:18:46,699][324535] Updated weights for policy 0, policy_version 16557 (0.0007) -[2026-06-07 02:18:46,919][324535] Updated weights for policy 0, policy_version 16567 (0.0007) -[2026-06-07 02:18:46,940][324273] Saving new best policy, reward=569.445! -[2026-06-07 02:18:47,710][324535] Updated weights for policy 0, policy_version 16577 (0.0007) -[2026-06-07 02:18:47,906][324535] Updated weights for policy 0, policy_version 16587 (0.0007) -[2026-06-07 02:18:48,108][324535] Updated weights for policy 0, policy_version 16597 (0.0007) -[2026-06-07 02:18:48,322][324535] Updated weights for policy 0, policy_version 16607 (0.0007) -[2026-06-07 02:18:48,532][324535] Updated weights for policy 0, policy_version 16617 (0.0007) -[2026-06-07 02:18:48,727][324535] Updated weights for policy 0, policy_version 16627 (0.0007) -[2026-06-07 02:18:49,504][324535] Updated weights for policy 0, policy_version 16637 (0.0007) -[2026-06-07 02:18:49,726][324535] Updated weights for policy 0, policy_version 16648 (0.0007) -[2026-06-07 02:18:49,925][324535] Updated weights for policy 0, policy_version 16658 (0.0007) -[2026-06-07 02:18:50,137][324535] Updated weights for policy 0, policy_version 16668 (0.0007) -[2026-06-07 02:18:50,359][324535] Updated weights for policy 0, policy_version 16679 (0.0007) -[2026-06-07 02:18:50,559][324535] Updated weights for policy 0, policy_version 16689 (0.0007) -[2026-06-07 02:18:51,010][321787] Fps is (10 sec: 19660.7, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 8552448. Throughput: 0: 17615.7. Samples: 8540416. Policy #0 lag: (min: 63.0, avg: 77.8, max: 127.0) -[2026-06-07 02:18:51,011][321787] Avg episode reward: [(0, '574.111')] -[2026-06-07 02:18:51,016][324273] Saving new best policy, reward=574.111! -[2026-06-07 02:18:51,356][324535] Updated weights for policy 0, policy_version 16700 (0.0007) -[2026-06-07 02:18:51,567][324535] Updated weights for policy 0, policy_version 16710 (0.0007) -[2026-06-07 02:18:51,768][324535] Updated weights for policy 0, policy_version 16720 (0.0007) -[2026-06-07 02:18:51,981][324535] Updated weights for policy 0, policy_version 16730 (0.0007) -[2026-06-07 02:18:52,187][324535] Updated weights for policy 0, policy_version 16740 (0.0007) -[2026-06-07 02:18:52,373][324535] Updated weights for policy 0, policy_version 16750 (0.0007) -[2026-06-07 02:18:52,574][324535] Updated weights for policy 0, policy_version 16760 (0.0007) -[2026-06-07 02:18:53,364][324535] Updated weights for policy 0, policy_version 16770 (0.0007) -[2026-06-07 02:18:53,579][324535] Updated weights for policy 0, policy_version 16780 (0.0007) -[2026-06-07 02:18:53,776][324535] Updated weights for policy 0, policy_version 16790 (0.0007) -[2026-06-07 02:18:53,998][324535] Updated weights for policy 0, policy_version 16800 (0.0007) -[2026-06-07 02:18:54,210][324535] Updated weights for policy 0, policy_version 16810 (0.0007) -[2026-06-07 02:18:54,429][324535] Updated weights for policy 0, policy_version 16820 (0.0007) -[2026-06-07 02:18:55,188][324535] Updated weights for policy 0, policy_version 16831 (0.0007) -[2026-06-07 02:18:55,427][324535] Updated weights for policy 0, policy_version 16842 (0.0007) -[2026-06-07 02:18:55,614][324535] Updated weights for policy 0, policy_version 16852 (0.0007) -[2026-06-07 02:18:55,818][324535] Updated weights for policy 0, policy_version 16862 (0.0007) -[2026-06-07 02:18:56,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.3). Total num frames: 8617984. Throughput: 0: 17629.9. Samples: 8644736. Policy #0 lag: (min: 63.0, avg: 77.8, max: 127.0) -[2026-06-07 02:18:56,011][321787] Avg episode reward: [(0, '585.287')] -[2026-06-07 02:18:56,031][324535] Updated weights for policy 0, policy_version 16872 (0.0007) -[2026-06-07 02:18:56,264][324535] Updated weights for policy 0, policy_version 16884 (0.0007) -[2026-06-07 02:18:56,350][324273] Saving new best policy, reward=585.287! -[2026-06-07 02:18:57,085][324535] Updated weights for policy 0, policy_version 16894 (0.0007) -[2026-06-07 02:18:57,297][324535] Updated weights for policy 0, policy_version 16904 (0.0007) -[2026-06-07 02:18:57,502][324535] Updated weights for policy 0, policy_version 16914 (0.0007) -[2026-06-07 02:18:57,706][324535] Updated weights for policy 0, policy_version 16924 (0.0007) -[2026-06-07 02:18:57,957][324535] Updated weights for policy 0, policy_version 16935 (0.0007) -[2026-06-07 02:18:58,170][324535] Updated weights for policy 0, policy_version 16945 (0.0007) -[2026-06-07 02:18:58,888][324535] Updated weights for policy 0, policy_version 16955 (0.0007) -[2026-06-07 02:18:59,094][324535] Updated weights for policy 0, policy_version 16965 (0.0007) -[2026-06-07 02:18:59,314][324535] Updated weights for policy 0, policy_version 16976 (0.0007) -[2026-06-07 02:18:59,529][324535] Updated weights for policy 0, policy_version 16986 (0.0007) -[2026-06-07 02:18:59,732][324535] Updated weights for policy 0, policy_version 16996 (0.0007) -[2026-06-07 02:18:59,938][324535] Updated weights for policy 0, policy_version 17006 (0.0007) -[2026-06-07 02:19:00,157][324535] Updated weights for policy 0, policy_version 17016 (0.0007) -[2026-06-07 02:19:00,907][324535] Updated weights for policy 0, policy_version 17026 (0.0007) -[2026-06-07 02:19:01,010][321787] Fps is (10 sec: 16384.1, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 8716288. Throughput: 0: 17408.1. Samples: 8746752. Policy #0 lag: (min: 63.0, avg: 77.8, max: 127.0) -[2026-06-07 02:19:01,011][321787] Avg episode reward: [(0, '583.837')] -[2026-06-07 02:19:01,106][324535] Updated weights for policy 0, policy_version 17036 (0.0007) -[2026-06-07 02:19:01,322][324535] Updated weights for policy 0, policy_version 17046 (0.0007) -[2026-06-07 02:19:01,512][324535] Updated weights for policy 0, policy_version 17056 (0.0007) -[2026-06-07 02:19:01,722][324535] Updated weights for policy 0, policy_version 17066 (0.0007) -[2026-06-07 02:19:01,944][324535] Updated weights for policy 0, policy_version 17076 (0.0007) -[2026-06-07 02:19:02,680][324535] Updated weights for policy 0, policy_version 17086 (0.0007) -[2026-06-07 02:19:02,905][324535] Updated weights for policy 0, policy_version 17096 (0.0007) -[2026-06-07 02:19:03,113][324535] Updated weights for policy 0, policy_version 17106 (0.0007) -[2026-06-07 02:19:03,328][324535] Updated weights for policy 0, policy_version 17116 (0.0007) -[2026-06-07 02:19:03,534][324535] Updated weights for policy 0, policy_version 17126 (0.0008) -[2026-06-07 02:19:03,739][324535] Updated weights for policy 0, policy_version 17136 (0.0007) -[2026-06-07 02:19:04,494][324535] Updated weights for policy 0, policy_version 17147 (0.0008) -[2026-06-07 02:19:04,698][324535] Updated weights for policy 0, policy_version 17157 (0.0007) -[2026-06-07 02:19:04,902][324535] Updated weights for policy 0, policy_version 17167 (0.0007) -[2026-06-07 02:19:05,111][324535] Updated weights for policy 0, policy_version 17177 (0.0007) -[2026-06-07 02:19:05,311][324535] Updated weights for policy 0, policy_version 17187 (0.0007) -[2026-06-07 02:19:05,557][324535] Updated weights for policy 0, policy_version 17199 (0.0007) -[2026-06-07 02:19:06,010][321787] Fps is (10 sec: 19660.8, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 8814592. Throughput: 0: 17644.1. Samples: 8804480. Policy #0 lag: (min: 108.0, avg: 131.9, max: 177.0) -[2026-06-07 02:19:06,011][321787] Avg episode reward: [(0, '606.595')] -[2026-06-07 02:19:06,016][324273] Saving new best policy, reward=606.595! -[2026-06-07 02:19:06,319][324535] Updated weights for policy 0, policy_version 17209 (0.0007) -[2026-06-07 02:19:06,518][324535] Updated weights for policy 0, policy_version 17219 (0.0007) -[2026-06-07 02:19:06,729][324535] Updated weights for policy 0, policy_version 17229 (0.0007) -[2026-06-07 02:19:06,922][324535] Updated weights for policy 0, policy_version 17239 (0.0007) -[2026-06-07 02:19:07,160][324535] Updated weights for policy 0, policy_version 17250 (0.0007) -[2026-06-07 02:19:07,385][324535] Updated weights for policy 0, policy_version 17261 (0.0007) -[2026-06-07 02:19:07,571][324535] Updated weights for policy 0, policy_version 17271 (0.0007) -[2026-06-07 02:19:08,334][324535] Updated weights for policy 0, policy_version 17281 (0.0007) -[2026-06-07 02:19:08,535][324535] Updated weights for policy 0, policy_version 17291 (0.0007) -[2026-06-07 02:19:08,743][324535] Updated weights for policy 0, policy_version 17301 (0.0007) -[2026-06-07 02:19:08,980][324535] Updated weights for policy 0, policy_version 17312 (0.0007) -[2026-06-07 02:19:09,192][324535] Updated weights for policy 0, policy_version 17322 (0.0007) -[2026-06-07 02:19:09,460][324535] Updated weights for policy 0, policy_version 17334 (0.0007) -[2026-06-07 02:19:10,241][324535] Updated weights for policy 0, policy_version 17345 (0.0008) -[2026-06-07 02:19:10,461][324535] Updated weights for policy 0, policy_version 17355 (0.0007) -[2026-06-07 02:19:10,663][324535] Updated weights for policy 0, policy_version 17365 (0.0007) -[2026-06-07 02:19:10,871][324535] Updated weights for policy 0, policy_version 17375 (0.0007) -[2026-06-07 02:19:11,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 8880128. Throughput: 0: 17598.6. Samples: 8905728. Policy #0 lag: (min: 108.0, avg: 131.9, max: 177.0) -[2026-06-07 02:19:11,011][321787] Avg episode reward: [(0, '612.649')] -[2026-06-07 02:19:11,067][324535] Updated weights for policy 0, policy_version 17385 (0.0007) -[2026-06-07 02:19:11,261][324535] Updated weights for policy 0, policy_version 17395 (0.0007) -[2026-06-07 02:19:11,367][324273] Saving new best policy, reward=612.649! -[2026-06-07 02:19:12,068][324535] Updated weights for policy 0, policy_version 17405 (0.0007) -[2026-06-07 02:19:12,270][324535] Updated weights for policy 0, policy_version 17415 (0.0007) -[2026-06-07 02:19:12,491][324535] Updated weights for policy 0, policy_version 17425 (0.0007) -[2026-06-07 02:19:12,704][324535] Updated weights for policy 0, policy_version 17435 (0.0007) -[2026-06-07 02:19:12,922][324535] Updated weights for policy 0, policy_version 17445 (0.0007) -[2026-06-07 02:19:13,142][324535] Updated weights for policy 0, policy_version 17455 (0.0007) -[2026-06-07 02:19:13,880][324535] Updated weights for policy 0, policy_version 17465 (0.0007) -[2026-06-07 02:19:14,092][324535] Updated weights for policy 0, policy_version 17475 (0.0007) -[2026-06-07 02:19:14,304][324535] Updated weights for policy 0, policy_version 17485 (0.0007) -[2026-06-07 02:19:14,528][324535] Updated weights for policy 0, policy_version 17495 (0.0007) -[2026-06-07 02:19:14,734][324535] Updated weights for policy 0, policy_version 17505 (0.0007) -[2026-06-07 02:19:14,931][324535] Updated weights for policy 0, policy_version 17515 (0.0006) -[2026-06-07 02:19:15,154][324535] Updated weights for policy 0, policy_version 17525 (0.0007) -[2026-06-07 02:19:15,910][324535] Updated weights for policy 0, policy_version 17535 (0.0007) -[2026-06-07 02:19:16,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 8978432. Throughput: 0: 17351.1. Samples: 9008256. Policy #0 lag: (min: 108.0, avg: 131.9, max: 177.0) -[2026-06-07 02:19:16,011][321787] Avg episode reward: [(0, '615.920')] -[2026-06-07 02:19:16,119][324535] Updated weights for policy 0, policy_version 17545 (0.0007) -[2026-06-07 02:19:16,331][324535] Updated weights for policy 0, policy_version 17555 (0.0007) -[2026-06-07 02:19:16,546][324535] Updated weights for policy 0, policy_version 17565 (0.0007) -[2026-06-07 02:19:16,737][324535] Updated weights for policy 0, policy_version 17575 (0.0007) -[2026-06-07 02:19:16,947][324535] Updated weights for policy 0, policy_version 17585 (0.0007) -[2026-06-07 02:19:17,085][324273] Saving new best policy, reward=615.920! -[2026-06-07 02:19:17,706][324535] Updated weights for policy 0, policy_version 17595 (0.0007) -[2026-06-07 02:19:17,906][324535] Updated weights for policy 0, policy_version 17605 (0.0007) -[2026-06-07 02:19:18,179][324535] Updated weights for policy 0, policy_version 17615 (0.0007) -[2026-06-07 02:19:18,378][324535] Updated weights for policy 0, policy_version 17625 (0.0007) -[2026-06-07 02:19:18,593][324535] Updated weights for policy 0, policy_version 17635 (0.0006) -[2026-06-07 02:19:18,791][324535] Updated weights for policy 0, policy_version 17645 (0.0007) -[2026-06-07 02:19:19,021][324535] Updated weights for policy 0, policy_version 17656 (0.0007) -[2026-06-07 02:19:19,792][324535] Updated weights for policy 0, policy_version 17666 (0.0007) -[2026-06-07 02:19:19,999][324535] Updated weights for policy 0, policy_version 17676 (0.0007) -[2026-06-07 02:19:20,198][324535] Updated weights for policy 0, policy_version 17686 (0.0007) -[2026-06-07 02:19:20,428][324535] Updated weights for policy 0, policy_version 17697 (0.0007) -[2026-06-07 02:19:20,655][324535] Updated weights for policy 0, policy_version 17707 (0.0007) -[2026-06-07 02:19:20,852][324535] Updated weights for policy 0, policy_version 17717 (0.0007) -[2026-06-07 02:19:21,010][321787] Fps is (10 sec: 19660.8, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 9076736. Throughput: 0: 17541.7. Samples: 9065088. Policy #0 lag: (min: 78.0, avg: 98.5, max: 144.0) -[2026-06-07 02:19:21,011][321787] Avg episode reward: [(0, '640.983')] -[2026-06-07 02:19:21,015][324273] Saving new best policy, reward=640.983! -[2026-06-07 02:19:21,615][324535] Updated weights for policy 0, policy_version 17727 (0.0007) -[2026-06-07 02:19:21,832][324535] Updated weights for policy 0, policy_version 17737 (0.0007) -[2026-06-07 02:19:22,042][324535] Updated weights for policy 0, policy_version 17747 (0.0007) -[2026-06-07 02:19:22,245][324535] Updated weights for policy 0, policy_version 17757 (0.0007) -[2026-06-07 02:19:22,484][324535] Updated weights for policy 0, policy_version 17768 (0.0007) -[2026-06-07 02:19:22,686][324535] Updated weights for policy 0, policy_version 17778 (0.0007) -[2026-06-07 02:19:23,445][324535] Updated weights for policy 0, policy_version 17788 (0.0007) -[2026-06-07 02:19:23,662][324535] Updated weights for policy 0, policy_version 17798 (0.0007) -[2026-06-07 02:19:23,861][324535] Updated weights for policy 0, policy_version 17808 (0.0007) -[2026-06-07 02:19:24,089][324535] Updated weights for policy 0, policy_version 17819 (0.0007) -[2026-06-07 02:19:24,304][324535] Updated weights for policy 0, policy_version 17829 (0.0007) -[2026-06-07 02:19:24,512][324535] Updated weights for policy 0, policy_version 17839 (0.0007) -[2026-06-07 02:19:25,269][324535] Updated weights for policy 0, policy_version 17849 (0.0007) -[2026-06-07 02:19:25,483][324535] Updated weights for policy 0, policy_version 17859 (0.0007) -[2026-06-07 02:19:25,681][324535] Updated weights for policy 0, policy_version 17869 (0.0007) -[2026-06-07 02:19:25,893][324535] Updated weights for policy 0, policy_version 17879 (0.0007) -[2026-06-07 02:19:26,010][321787] Fps is (10 sec: 16383.8, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 9142272. Throughput: 0: 17553.0. Samples: 9167744. Policy #0 lag: (min: 78.0, avg: 98.5, max: 144.0) -[2026-06-07 02:19:26,011][321787] Avg episode reward: [(0, '638.998')] -[2026-06-07 02:19:26,088][324535] Updated weights for policy 0, policy_version 17889 (0.0007) -[2026-06-07 02:19:26,283][324535] Updated weights for policy 0, policy_version 17899 (0.0007) -[2026-06-07 02:19:26,487][324535] Updated weights for policy 0, policy_version 17909 (0.0007) -[2026-06-07 02:19:27,254][324535] Updated weights for policy 0, policy_version 17919 (0.0007) -[2026-06-07 02:19:27,466][324535] Updated weights for policy 0, policy_version 17929 (0.0007) -[2026-06-07 02:19:27,660][324535] Updated weights for policy 0, policy_version 17939 (0.0007) -[2026-06-07 02:19:27,851][324535] Updated weights for policy 0, policy_version 17949 (0.0007) -[2026-06-07 02:19:28,057][324535] Updated weights for policy 0, policy_version 17959 (0.0007) -[2026-06-07 02:19:28,257][324535] Updated weights for policy 0, policy_version 17969 (0.0007) -[2026-06-07 02:19:29,021][324535] Updated weights for policy 0, policy_version 17979 (0.0007) -[2026-06-07 02:19:29,237][324535] Updated weights for policy 0, policy_version 17989 (0.0007) -[2026-06-07 02:19:29,449][324535] Updated weights for policy 0, policy_version 17999 (0.0007) -[2026-06-07 02:19:29,653][324535] Updated weights for policy 0, policy_version 18009 (0.0007) -[2026-06-07 02:19:29,857][324535] Updated weights for policy 0, policy_version 18019 (0.0007) -[2026-06-07 02:19:30,053][324535] Updated weights for policy 0, policy_version 18029 (0.0007) -[2026-06-07 02:19:30,250][324535] Updated weights for policy 0, policy_version 18039 (0.0007) -[2026-06-07 02:19:30,983][324535] Updated weights for policy 0, policy_version 18049 (0.0007) -[2026-06-07 02:19:31,010][321787] Fps is (10 sec: 16383.7, 60 sec: 17476.2, 300 sec: 17439.2). Total num frames: 9240576. Throughput: 0: 17456.3. Samples: 9270784. Policy #0 lag: (min: 4.0, avg: 19.2, max: 68.0) -[2026-06-07 02:19:31,011][321787] Avg episode reward: [(0, '632.217')] -[2026-06-07 02:19:31,190][324535] Updated weights for policy 0, policy_version 18059 (0.0007) -[2026-06-07 02:19:31,420][324535] Updated weights for policy 0, policy_version 18070 (0.0007) -[2026-06-07 02:19:31,625][324535] Updated weights for policy 0, policy_version 18080 (0.0007) -[2026-06-07 02:19:31,829][324535] Updated weights for policy 0, policy_version 18090 (0.0006) -[2026-06-07 02:19:32,046][324535] Updated weights for policy 0, policy_version 18100 (0.0007) -[2026-06-07 02:19:32,838][324535] Updated weights for policy 0, policy_version 18111 (0.0007) -[2026-06-07 02:19:33,054][324535] Updated weights for policy 0, policy_version 18121 (0.0007) -[2026-06-07 02:19:33,249][324535] Updated weights for policy 0, policy_version 18131 (0.0007) -[2026-06-07 02:19:33,467][324535] Updated weights for policy 0, policy_version 18141 (0.0007) -[2026-06-07 02:19:33,681][324535] Updated weights for policy 0, policy_version 18151 (0.0007) -[2026-06-07 02:19:33,893][324535] Updated weights for policy 0, policy_version 18161 (0.0007) -[2026-06-07 02:19:34,631][324535] Updated weights for policy 0, policy_version 18171 (0.0007) -[2026-06-07 02:19:34,842][324535] Updated weights for policy 0, policy_version 18181 (0.0007) -[2026-06-07 02:19:35,054][324535] Updated weights for policy 0, policy_version 18191 (0.0007) -[2026-06-07 02:19:35,258][324535] Updated weights for policy 0, policy_version 18201 (0.0007) -[2026-06-07 02:19:35,453][324535] Updated weights for policy 0, policy_version 18211 (0.0007) -[2026-06-07 02:19:35,662][324535] Updated weights for policy 0, policy_version 18221 (0.0007) -[2026-06-07 02:19:35,870][324535] Updated weights for policy 0, policy_version 18231 (0.0008) -[2026-06-07 02:19:36,010][321787] Fps is (10 sec: 19660.9, 60 sec: 17476.2, 300 sec: 17439.2). Total num frames: 9338880. Throughput: 0: 17513.3. Samples: 9328512. Policy #0 lag: (min: 4.0, avg: 19.2, max: 68.0) -[2026-06-07 02:19:36,011][321787] Avg episode reward: [(0, '649.116')] -[2026-06-07 02:19:36,015][324273] Saving new best policy, reward=649.116! -[2026-06-07 02:19:36,649][324535] Updated weights for policy 0, policy_version 18242 (0.0007) -[2026-06-07 02:19:36,864][324535] Updated weights for policy 0, policy_version 18252 (0.0007) -[2026-06-07 02:19:37,087][324535] Updated weights for policy 0, policy_version 18263 (0.0007) -[2026-06-07 02:19:37,299][324535] Updated weights for policy 0, policy_version 18273 (0.0007) -[2026-06-07 02:19:37,544][324535] Updated weights for policy 0, policy_version 18285 (0.0007) -[2026-06-07 02:19:37,767][324535] Updated weights for policy 0, policy_version 18295 (0.0007) -[2026-06-07 02:19:38,547][324535] Updated weights for policy 0, policy_version 18306 (0.0007) -[2026-06-07 02:19:38,759][324535] Updated weights for policy 0, policy_version 18316 (0.0007) -[2026-06-07 02:19:38,972][324535] Updated weights for policy 0, policy_version 18326 (0.0007) -[2026-06-07 02:19:39,194][324535] Updated weights for policy 0, policy_version 18336 (0.0007) -[2026-06-07 02:19:39,394][324535] Updated weights for policy 0, policy_version 18346 (0.0007) -[2026-06-07 02:19:39,598][324535] Updated weights for policy 0, policy_version 18356 (0.0007) -[2026-06-07 02:19:40,324][324535] Updated weights for policy 0, policy_version 18366 (0.0007) -[2026-06-07 02:19:40,535][324535] Updated weights for policy 0, policy_version 18376 (0.0007) -[2026-06-07 02:19:40,733][324535] Updated weights for policy 0, policy_version 18386 (0.0007) -[2026-06-07 02:19:40,939][324535] Updated weights for policy 0, policy_version 18396 (0.0007) -[2026-06-07 02:19:41,010][321787] Fps is (10 sec: 16384.3, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 9404416. Throughput: 0: 17462.0. Samples: 9430528. Policy #0 lag: (min: 4.0, avg: 19.2, max: 68.0) -[2026-06-07 02:19:41,011][321787] Avg episode reward: [(0, '657.286')] -[2026-06-07 02:19:41,132][324535] Updated weights for policy 0, policy_version 18406 (0.0007) -[2026-06-07 02:19:41,326][324535] Updated weights for policy 0, policy_version 18416 (0.0007) -[2026-06-07 02:19:41,478][324273] Saving new best policy, reward=657.286! -[2026-06-07 02:19:42,081][324535] Updated weights for policy 0, policy_version 18426 (0.0007) -[2026-06-07 02:19:42,285][324535] Updated weights for policy 0, policy_version 18436 (0.0007) -[2026-06-07 02:19:42,499][324535] Updated weights for policy 0, policy_version 18446 (0.0007) -[2026-06-07 02:19:42,723][324535] Updated weights for policy 0, policy_version 18456 (0.0007) -[2026-06-07 02:19:42,932][324535] Updated weights for policy 0, policy_version 18466 (0.0007) -[2026-06-07 02:19:43,163][324535] Updated weights for policy 0, policy_version 18477 (0.0007) -[2026-06-07 02:19:43,378][324535] Updated weights for policy 0, policy_version 18487 (0.0007) -[2026-06-07 02:19:44,131][324535] Updated weights for policy 0, policy_version 18497 (0.0007) -[2026-06-07 02:19:44,344][324535] Updated weights for policy 0, policy_version 18507 (0.0007) -[2026-06-07 02:19:44,546][324535] Updated weights for policy 0, policy_version 18517 (0.0007) -[2026-06-07 02:19:44,759][324535] Updated weights for policy 0, policy_version 18528 (0.0007) -[2026-06-07 02:19:44,963][324535] Updated weights for policy 0, policy_version 18538 (0.0007) -[2026-06-07 02:19:45,176][324535] Updated weights for policy 0, policy_version 18548 (0.0007) -[2026-06-07 02:19:45,928][324535] Updated weights for policy 0, policy_version 18558 (0.0007) -[2026-06-07 02:19:46,010][321787] Fps is (10 sec: 16383.9, 60 sec: 17476.2, 300 sec: 17439.2). Total num frames: 9502720. Throughput: 0: 17476.2. Samples: 9533184. Policy #0 lag: (min: 126.0, avg: 142.5, max: 191.0) -[2026-06-07 02:19:46,011][321787] Avg episode reward: [(0, '662.584')] -[2026-06-07 02:19:46,170][324535] Updated weights for policy 0, policy_version 18569 (0.0007) -[2026-06-07 02:19:46,380][324535] Updated weights for policy 0, policy_version 18579 (0.0007) -[2026-06-07 02:19:46,607][324535] Updated weights for policy 0, policy_version 18589 (0.0008) -[2026-06-07 02:19:46,813][324535] Updated weights for policy 0, policy_version 18599 (0.0007) -[2026-06-07 02:19:47,036][324535] Updated weights for policy 0, policy_version 18609 (0.0008) -[2026-06-07 02:19:47,175][324273] Saving new best policy, reward=662.584! -[2026-06-07 02:19:47,763][324535] Updated weights for policy 0, policy_version 18619 (0.0007) -[2026-06-07 02:19:47,973][324535] Updated weights for policy 0, policy_version 18629 (0.0007) -[2026-06-07 02:19:48,198][324535] Updated weights for policy 0, policy_version 18640 (0.0007) -[2026-06-07 02:19:48,411][324535] Updated weights for policy 0, policy_version 18650 (0.0008) -[2026-06-07 02:19:48,616][324535] Updated weights for policy 0, policy_version 18660 (0.0008) -[2026-06-07 02:19:48,813][324535] Updated weights for policy 0, policy_version 18670 (0.0008) -[2026-06-07 02:19:49,017][324535] Updated weights for policy 0, policy_version 18680 (0.0008) -[2026-06-07 02:19:49,755][324535] Updated weights for policy 0, policy_version 18690 (0.0011) -[2026-06-07 02:19:49,957][324535] Updated weights for policy 0, policy_version 18700 (0.0011) -[2026-06-07 02:19:50,160][324535] Updated weights for policy 0, policy_version 18710 (0.0012) -[2026-06-07 02:19:50,371][324535] Updated weights for policy 0, policy_version 18720 (0.0011) -[2026-06-07 02:19:50,565][324535] Updated weights for policy 0, policy_version 18730 (0.0011) -[2026-06-07 02:19:50,761][324535] Updated weights for policy 0, policy_version 18740 (0.0011) -[2026-06-07 02:19:51,010][321787] Fps is (10 sec: 19660.8, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 9601024. Throughput: 0: 17464.9. Samples: 9590400. Policy #0 lag: (min: 126.0, avg: 142.5, max: 191.0) -[2026-06-07 02:19:51,011][321787] Avg episode reward: [(0, '665.613')] -[2026-06-07 02:19:51,016][324273] Saving new best policy, reward=665.613! -[2026-06-07 02:19:51,519][324535] Updated weights for policy 0, policy_version 18750 (0.0009) -[2026-06-07 02:19:51,720][324535] Updated weights for policy 0, policy_version 18760 (0.0006) -[2026-06-07 02:19:51,918][324535] Updated weights for policy 0, policy_version 18770 (0.0006) -[2026-06-07 02:19:52,150][324535] Updated weights for policy 0, policy_version 18780 (0.0007) -[2026-06-07 02:19:52,359][324535] Updated weights for policy 0, policy_version 18790 (0.0007) -[2026-06-07 02:19:52,576][324535] Updated weights for policy 0, policy_version 18800 (0.0007) -[2026-06-07 02:19:53,317][324535] Updated weights for policy 0, policy_version 18810 (0.0007) -[2026-06-07 02:19:53,524][324535] Updated weights for policy 0, policy_version 18820 (0.0007) -[2026-06-07 02:19:53,750][324535] Updated weights for policy 0, policy_version 18831 (0.0007) -[2026-06-07 02:19:53,935][324535] Updated weights for policy 0, policy_version 18841 (0.0007) -[2026-06-07 02:19:54,149][324535] Updated weights for policy 0, policy_version 18851 (0.0007) -[2026-06-07 02:19:54,373][324535] Updated weights for policy 0, policy_version 18861 (0.0007) -[2026-06-07 02:19:54,581][324535] Updated weights for policy 0, policy_version 18871 (0.0007) -[2026-06-07 02:19:55,345][324535] Updated weights for policy 0, policy_version 18881 (0.0007) -[2026-06-07 02:19:55,552][324535] Updated weights for policy 0, policy_version 18892 (0.0009) -[2026-06-07 02:19:55,757][324535] Updated weights for policy 0, policy_version 18902 (0.0011) -[2026-06-07 02:19:55,985][324535] Updated weights for policy 0, policy_version 18913 (0.0010) -[2026-06-07 02:19:56,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.2, 300 sec: 17439.2). Total num frames: 9666560. Throughput: 0: 17459.2. Samples: 9691392. Policy #0 lag: (min: 126.0, avg: 142.5, max: 191.0) -[2026-06-07 02:19:56,011][321787] Avg episode reward: [(0, '674.001')] -[2026-06-07 02:19:56,196][324535] Updated weights for policy 0, policy_version 18923 (0.0007) -[2026-06-07 02:19:56,411][324535] Updated weights for policy 0, policy_version 18933 (0.0007) -[2026-06-07 02:19:56,471][324273] Saving new best policy, reward=674.001! -[2026-06-07 02:19:57,157][324535] Updated weights for policy 0, policy_version 18943 (0.0008) -[2026-06-07 02:19:57,366][324535] Updated weights for policy 0, policy_version 18953 (0.0011) -[2026-06-07 02:19:57,570][324535] Updated weights for policy 0, policy_version 18963 (0.0007) -[2026-06-07 02:19:57,791][324535] Updated weights for policy 0, policy_version 18973 (0.0007) -[2026-06-07 02:19:58,017][324535] Updated weights for policy 0, policy_version 18983 (0.0007) -[2026-06-07 02:19:58,237][324535] Updated weights for policy 0, policy_version 18993 (0.0007) -[2026-06-07 02:19:58,989][324535] Updated weights for policy 0, policy_version 19004 (0.0007) -[2026-06-07 02:19:59,209][324535] Updated weights for policy 0, policy_version 19014 (0.0007) -[2026-06-07 02:19:59,417][324535] Updated weights for policy 0, policy_version 19024 (0.0007) -[2026-06-07 02:19:59,640][324535] Updated weights for policy 0, policy_version 19034 (0.0007) -[2026-06-07 02:19:59,849][324535] Updated weights for policy 0, policy_version 19044 (0.0007) -[2026-06-07 02:20:00,054][324535] Updated weights for policy 0, policy_version 19054 (0.0007) -[2026-06-07 02:20:00,272][324535] Updated weights for policy 0, policy_version 19064 (0.0007) -[2026-06-07 02:20:01,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.3). Total num frames: 9764864. Throughput: 0: 17430.8. Samples: 9792640. Policy #0 lag: (min: 5.0, avg: 20.3, max: 69.0) -[2026-06-07 02:20:01,011][321787] Avg episode reward: [(0, '689.980')] -[2026-06-07 02:20:01,023][324535] Updated weights for policy 0, policy_version 19074 (0.0007) -[2026-06-07 02:20:01,246][324535] Updated weights for policy 0, policy_version 19084 (0.0007) -[2026-06-07 02:20:01,457][324535] Updated weights for policy 0, policy_version 19094 (0.0007) -[2026-06-07 02:20:01,658][324535] Updated weights for policy 0, policy_version 19104 (0.0007) -[2026-06-07 02:20:01,877][324535] Updated weights for policy 0, policy_version 19114 (0.0007) -[2026-06-07 02:20:02,088][324535] Updated weights for policy 0, policy_version 19124 (0.0007) -[2026-06-07 02:20:02,156][324273] Saving new best policy, reward=689.980! -[2026-06-07 02:20:02,810][324535] Updated weights for policy 0, policy_version 19134 (0.0007) -[2026-06-07 02:20:03,043][324535] Updated weights for policy 0, policy_version 19144 (0.0010) -[2026-06-07 02:20:03,231][324535] Updated weights for policy 0, policy_version 19154 (0.0011) -[2026-06-07 02:20:03,429][324535] Updated weights for policy 0, policy_version 19164 (0.0007) -[2026-06-07 02:20:03,634][324535] Updated weights for policy 0, policy_version 19174 (0.0006) -[2026-06-07 02:20:03,858][324535] Updated weights for policy 0, policy_version 19184 (0.0007) -[2026-06-07 02:20:04,616][324535] Updated weights for policy 0, policy_version 19195 (0.0007) -[2026-06-07 02:20:04,840][324535] Updated weights for policy 0, policy_version 19205 (0.0007) -[2026-06-07 02:20:05,037][324535] Updated weights for policy 0, policy_version 19215 (0.0007) -[2026-06-07 02:20:05,232][324535] Updated weights for policy 0, policy_version 19225 (0.0007) -[2026-06-07 02:20:05,464][324535] Updated weights for policy 0, policy_version 19235 (0.0007) -[2026-06-07 02:20:05,670][324535] Updated weights for policy 0, policy_version 19245 (0.0007) -[2026-06-07 02:20:05,882][324535] Updated weights for policy 0, policy_version 19255 (0.0007) -[2026-06-07 02:20:06,010][321787] Fps is (10 sec: 19661.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 9863168. Throughput: 0: 17442.1. Samples: 9849984. Policy #0 lag: (min: 5.0, avg: 20.3, max: 69.0) -[2026-06-07 02:20:06,011][321787] Avg episode reward: [(0, '692.906')] -[2026-06-07 02:20:06,018][324273] Saving new best policy, reward=692.906! -[2026-06-07 02:20:06,638][324535] Updated weights for policy 0, policy_version 19265 (0.0007) -[2026-06-07 02:20:06,856][324535] Updated weights for policy 0, policy_version 19275 (0.0007) -[2026-06-07 02:20:07,073][324535] Updated weights for policy 0, policy_version 19285 (0.0007) -[2026-06-07 02:20:07,281][324535] Updated weights for policy 0, policy_version 19295 (0.0007) -[2026-06-07 02:20:07,491][324535] Updated weights for policy 0, policy_version 19305 (0.0007) -[2026-06-07 02:20:07,697][324535] Updated weights for policy 0, policy_version 19315 (0.0007) -[2026-06-07 02:20:08,441][324535] Updated weights for policy 0, policy_version 19326 (0.0007) -[2026-06-07 02:20:08,649][324535] Updated weights for policy 0, policy_version 19336 (0.0007) -[2026-06-07 02:20:08,865][324535] Updated weights for policy 0, policy_version 19346 (0.0007) -[2026-06-07 02:20:09,072][324535] Updated weights for policy 0, policy_version 19356 (0.0007) -[2026-06-07 02:20:09,271][324535] Updated weights for policy 0, policy_version 19366 (0.0007) -[2026-06-07 02:20:09,472][324535] Updated weights for policy 0, policy_version 19376 (0.0007) -[2026-06-07 02:20:10,245][324535] Updated weights for policy 0, policy_version 19387 (0.0007) -[2026-06-07 02:20:10,457][324535] Updated weights for policy 0, policy_version 19397 (0.0007) -[2026-06-07 02:20:10,673][324535] Updated weights for policy 0, policy_version 19407 (0.0007) -[2026-06-07 02:20:10,881][324535] Updated weights for policy 0, policy_version 19417 (0.0007) -[2026-06-07 02:20:11,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 9928704. Throughput: 0: 17410.9. Samples: 9951232. Policy #0 lag: (min: 5.0, avg: 20.3, max: 69.0) -[2026-06-07 02:20:11,011][321787] Avg episode reward: [(0, '786.422')] -[2026-06-07 02:20:11,115][324535] Updated weights for policy 0, policy_version 19428 (0.0007) -[2026-06-07 02:20:11,307][324535] Updated weights for policy 0, policy_version 19438 (0.0007) -[2026-06-07 02:20:11,504][324273] Saving new best policy, reward=786.422! -[2026-06-07 02:20:11,506][324535] Updated weights for policy 0, policy_version 19448 (0.0007) -[2026-06-07 02:20:12,265][324535] Updated weights for policy 0, policy_version 19458 (0.0007) -[2026-06-07 02:20:12,464][324535] Updated weights for policy 0, policy_version 19468 (0.0007) -[2026-06-07 02:20:12,661][324535] Updated weights for policy 0, policy_version 19478 (0.0007) -[2026-06-07 02:20:12,884][324535] Updated weights for policy 0, policy_version 19488 (0.0007) -[2026-06-07 02:20:13,102][324535] Updated weights for policy 0, policy_version 19498 (0.0007) -[2026-06-07 02:20:13,314][324535] Updated weights for policy 0, policy_version 19508 (0.0007) -[2026-06-07 02:20:14,059][324535] Updated weights for policy 0, policy_version 19518 (0.0007) -[2026-06-07 02:20:14,264][324535] Updated weights for policy 0, policy_version 19528 (0.0007) -[2026-06-07 02:20:14,490][324535] Updated weights for policy 0, policy_version 19538 (0.0007) -[2026-06-07 02:20:14,703][324535] Updated weights for policy 0, policy_version 19548 (0.0007) -[2026-06-07 02:20:14,901][324535] Updated weights for policy 0, policy_version 19558 (0.0007) -[2026-06-07 02:20:15,114][324535] Updated weights for policy 0, policy_version 19568 (0.0007) -[2026-06-07 02:20:15,834][324535] Updated weights for policy 0, policy_version 19578 (0.0007) -[2026-06-07 02:20:16,010][321787] Fps is (10 sec: 16384.1, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 10027008. Throughput: 0: 17399.5. Samples: 10053760. Policy #0 lag: (min: 40.0, avg: 71.5, max: 103.0) -[2026-06-07 02:20:16,011][321787] Avg episode reward: [(0, '786.303')] -[2026-06-07 02:20:16,020][324535] Updated weights for policy 0, policy_version 19588 (0.0007) -[2026-06-07 02:20:16,234][324535] Updated weights for policy 0, policy_version 19598 (0.0007) -[2026-06-07 02:20:16,433][324535] Updated weights for policy 0, policy_version 19608 (0.0007) -[2026-06-07 02:20:16,628][324535] Updated weights for policy 0, policy_version 19618 (0.0006) -[2026-06-07 02:20:16,825][324535] Updated weights for policy 0, policy_version 19628 (0.0007) -[2026-06-07 02:20:17,023][324535] Updated weights for policy 0, policy_version 19638 (0.0007) -[2026-06-07 02:20:17,770][324535] Updated weights for policy 0, policy_version 19648 (0.0007) -[2026-06-07 02:20:17,968][324535] Updated weights for policy 0, policy_version 19658 (0.0007) -[2026-06-07 02:20:18,197][324535] Updated weights for policy 0, policy_version 19668 (0.0007) -[2026-06-07 02:20:18,406][324535] Updated weights for policy 0, policy_version 19678 (0.0007) -[2026-06-07 02:20:18,596][324535] Updated weights for policy 0, policy_version 19688 (0.0007) -[2026-06-07 02:20:18,813][324535] Updated weights for policy 0, policy_version 19698 (0.0007) -[2026-06-07 02:20:19,556][324535] Updated weights for policy 0, policy_version 19708 (0.0007) -[2026-06-07 02:20:19,763][324535] Updated weights for policy 0, policy_version 19718 (0.0007) -[2026-06-07 02:20:19,960][324535] Updated weights for policy 0, policy_version 19728 (0.0007) -[2026-06-07 02:20:20,174][324535] Updated weights for policy 0, policy_version 19738 (0.0007) -[2026-06-07 02:20:20,380][324535] Updated weights for policy 0, policy_version 19748 (0.0007) -[2026-06-07 02:20:20,579][324535] Updated weights for policy 0, policy_version 19758 (0.0007) -[2026-06-07 02:20:21,010][321787] Fps is (10 sec: 19660.8, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 10125312. Throughput: 0: 17388.1. Samples: 10110976. Policy #0 lag: (min: 40.0, avg: 71.5, max: 103.0) -[2026-06-07 02:20:21,011][321787] Avg episode reward: [(0, '797.123')] -[2026-06-07 02:20:21,015][324273] Saving results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs1_seed11/checkpoint_p0/checkpoint_000019768_10125312.pth... -[2026-06-07 02:20:21,050][324273] Saving new best policy, reward=797.123! -[2026-06-07 02:20:21,375][324535] Updated weights for policy 0, policy_version 19769 (0.0007) -[2026-06-07 02:20:21,569][324535] Updated weights for policy 0, policy_version 19779 (0.0007) -[2026-06-07 02:20:21,785][324535] Updated weights for policy 0, policy_version 19789 (0.0007) -[2026-06-07 02:20:21,976][324535] Updated weights for policy 0, policy_version 19799 (0.0007) -[2026-06-07 02:20:22,197][324535] Updated weights for policy 0, policy_version 19809 (0.0007) -[2026-06-07 02:20:22,416][324535] Updated weights for policy 0, policy_version 19819 (0.0007) -[2026-06-07 02:20:22,614][324535] Updated weights for policy 0, policy_version 19829 (0.0007) -[2026-06-07 02:20:23,368][324535] Updated weights for policy 0, policy_version 19839 (0.0007) -[2026-06-07 02:20:23,559][324535] Updated weights for policy 0, policy_version 19849 (0.0007) -[2026-06-07 02:20:23,803][324535] Updated weights for policy 0, policy_version 19860 (0.0007) -[2026-06-07 02:20:24,013][324535] Updated weights for policy 0, policy_version 19870 (0.0007) -[2026-06-07 02:20:24,212][324535] Updated weights for policy 0, policy_version 19880 (0.0007) -[2026-06-07 02:20:24,413][324535] Updated weights for policy 0, policy_version 19890 (0.0007) -[2026-06-07 02:20:25,187][324535] Updated weights for policy 0, policy_version 19900 (0.0007) -[2026-06-07 02:20:25,404][324535] Updated weights for policy 0, policy_version 19910 (0.0007) -[2026-06-07 02:20:25,610][324535] Updated weights for policy 0, policy_version 19920 (0.0007) -[2026-06-07 02:20:25,823][324535] Updated weights for policy 0, policy_version 19930 (0.0007) -[2026-06-07 02:20:26,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 10190848. Throughput: 0: 17390.9. Samples: 10213120. Policy #0 lag: (min: 40.0, avg: 71.5, max: 103.0) -[2026-06-07 02:20:26,011][321787] Avg episode reward: [(0, '803.658')] -[2026-06-07 02:20:26,030][324535] Updated weights for policy 0, policy_version 19940 (0.0007) -[2026-06-07 02:20:26,244][324535] Updated weights for policy 0, policy_version 19950 (0.0007) -[2026-06-07 02:20:26,453][324273] Saving new best policy, reward=803.658! -[2026-06-07 02:20:26,455][324535] Updated weights for policy 0, policy_version 19960 (0.0007) -[2026-06-07 02:20:27,190][324535] Updated weights for policy 0, policy_version 19970 (0.0007) -[2026-06-07 02:20:27,438][324535] Updated weights for policy 0, policy_version 19981 (0.0007) -[2026-06-07 02:20:27,681][324535] Updated weights for policy 0, policy_version 19992 (0.0007) -[2026-06-07 02:20:27,884][324535] Updated weights for policy 0, policy_version 20002 (0.0007) -[2026-06-07 02:20:28,080][324535] Updated weights for policy 0, policy_version 20012 (0.0007) -[2026-06-07 02:20:28,293][324535] Updated weights for policy 0, policy_version 20022 (0.0007) -[2026-06-07 02:20:29,032][324535] Updated weights for policy 0, policy_version 20032 (0.0007) -[2026-06-07 02:20:29,245][324535] Updated weights for policy 0, policy_version 20042 (0.0007) -[2026-06-07 02:20:29,447][324535] Updated weights for policy 0, policy_version 20052 (0.0007) -[2026-06-07 02:20:29,650][324535] Updated weights for policy 0, policy_version 20062 (0.0007) -[2026-06-07 02:20:29,872][324535] Updated weights for policy 0, policy_version 20072 (0.0007) -[2026-06-07 02:20:30,067][324535] Updated weights for policy 0, policy_version 20082 (0.0007) -[2026-06-07 02:20:30,805][324535] Updated weights for policy 0, policy_version 20093 (0.0007) -[2026-06-07 02:20:31,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 10289152. Throughput: 0: 17362.5. Samples: 10314496. Policy #0 lag: (min: 63.0, avg: 78.8, max: 127.0) -[2026-06-07 02:20:31,011][321787] Avg episode reward: [(0, '802.721')] -[2026-06-07 02:20:31,019][324535] Updated weights for policy 0, policy_version 20103 (0.0007) -[2026-06-07 02:20:31,218][324535] Updated weights for policy 0, policy_version 20113 (0.0007) -[2026-06-07 02:20:31,433][324535] Updated weights for policy 0, policy_version 20123 (0.0007) -[2026-06-07 02:20:31,645][324535] Updated weights for policy 0, policy_version 20133 (0.0007) -[2026-06-07 02:20:31,859][324535] Updated weights for policy 0, policy_version 20143 (0.0007) -[2026-06-07 02:20:32,610][324535] Updated weights for policy 0, policy_version 20153 (0.0007) -[2026-06-07 02:20:32,825][324535] Updated weights for policy 0, policy_version 20163 (0.0007) -[2026-06-07 02:20:33,047][324535] Updated weights for policy 0, policy_version 20173 (0.0007) -[2026-06-07 02:20:33,263][324535] Updated weights for policy 0, policy_version 20183 (0.0007) -[2026-06-07 02:20:33,469][324535] Updated weights for policy 0, policy_version 20193 (0.0007) -[2026-06-07 02:20:33,692][324535] Updated weights for policy 0, policy_version 20203 (0.0007) -[2026-06-07 02:20:33,904][324535] Updated weights for policy 0, policy_version 20214 (0.0007) -[2026-06-07 02:20:34,680][324535] Updated weights for policy 0, policy_version 20224 (0.0007) -[2026-06-07 02:20:34,894][324535] Updated weights for policy 0, policy_version 20235 (0.0007) -[2026-06-07 02:20:35,105][324535] Updated weights for policy 0, policy_version 20245 (0.0007) -[2026-06-07 02:20:35,316][324535] Updated weights for policy 0, policy_version 20255 (0.0007) -[2026-06-07 02:20:35,516][324535] Updated weights for policy 0, policy_version 20265 (0.0007) -[2026-06-07 02:20:35,704][324535] Updated weights for policy 0, policy_version 20275 (0.0007) -[2026-06-07 02:20:36,010][321787] Fps is (10 sec: 19660.9, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 10387456. Throughput: 0: 17362.5. Samples: 10371712. Policy #0 lag: (min: 63.0, avg: 78.8, max: 127.0) -[2026-06-07 02:20:36,011][321787] Avg episode reward: [(0, '827.546')] -[2026-06-07 02:20:36,015][324273] Saving new best policy, reward=827.546! -[2026-06-07 02:20:36,459][324535] Updated weights for policy 0, policy_version 20285 (0.0007) -[2026-06-07 02:20:36,663][324535] Updated weights for policy 0, policy_version 20295 (0.0007) -[2026-06-07 02:20:36,877][324535] Updated weights for policy 0, policy_version 20306 (0.0007) -[2026-06-07 02:20:37,097][324535] Updated weights for policy 0, policy_version 20316 (0.0007) -[2026-06-07 02:20:37,301][324535] Updated weights for policy 0, policy_version 20326 (0.0007) -[2026-06-07 02:20:37,512][324535] Updated weights for policy 0, policy_version 20336 (0.0007) -[2026-06-07 02:20:38,243][324535] Updated weights for policy 0, policy_version 20346 (0.0007) -[2026-06-07 02:20:38,442][324535] Updated weights for policy 0, policy_version 20356 (0.0006) -[2026-06-07 02:20:38,661][324535] Updated weights for policy 0, policy_version 20366 (0.0006) -[2026-06-07 02:20:38,888][324535] Updated weights for policy 0, policy_version 20376 (0.0007) -[2026-06-07 02:20:39,129][324535] Updated weights for policy 0, policy_version 20387 (0.0007) -[2026-06-07 02:20:39,347][324535] Updated weights for policy 0, policy_version 20397 (0.0007) -[2026-06-07 02:20:39,570][324535] Updated weights for policy 0, policy_version 20407 (0.0007) -[2026-06-07 02:20:40,295][324535] Updated weights for policy 0, policy_version 20417 (0.0007) -[2026-06-07 02:20:40,526][324535] Updated weights for policy 0, policy_version 20428 (0.0007) -[2026-06-07 02:20:40,745][324535] Updated weights for policy 0, policy_version 20438 (0.0007) -[2026-06-07 02:20:40,965][324535] Updated weights for policy 0, policy_version 20448 (0.0007) -[2026-06-07 02:20:41,010][321787] Fps is (10 sec: 16384.1, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 10452992. Throughput: 0: 17368.2. Samples: 10472960. Policy #0 lag: (min: 63.0, avg: 78.8, max: 127.0) -[2026-06-07 02:20:41,011][321787] Avg episode reward: [(0, '832.188')] -[2026-06-07 02:20:41,168][324535] Updated weights for policy 0, policy_version 20458 (0.0007) -[2026-06-07 02:20:41,375][324535] Updated weights for policy 0, policy_version 20469 (0.0007) -[2026-06-07 02:20:41,449][324273] Saving new best policy, reward=832.188! -[2026-06-07 02:20:42,150][324535] Updated weights for policy 0, policy_version 20479 (0.0007) -[2026-06-07 02:20:42,363][324535] Updated weights for policy 0, policy_version 20489 (0.0008) -[2026-06-07 02:20:42,571][324535] Updated weights for policy 0, policy_version 20499 (0.0007) -[2026-06-07 02:20:42,788][324535] Updated weights for policy 0, policy_version 20509 (0.0007) -[2026-06-07 02:20:42,982][324535] Updated weights for policy 0, policy_version 20519 (0.0006) -[2026-06-07 02:20:43,193][324535] Updated weights for policy 0, policy_version 20529 (0.0007) -[2026-06-07 02:20:43,918][324535] Updated weights for policy 0, policy_version 20539 (0.0007) -[2026-06-07 02:20:44,131][324535] Updated weights for policy 0, policy_version 20549 (0.0007) -[2026-06-07 02:20:44,358][324535] Updated weights for policy 0, policy_version 20560 (0.0007) -[2026-06-07 02:20:44,589][324535] Updated weights for policy 0, policy_version 20571 (0.0007) -[2026-06-07 02:20:44,788][324535] Updated weights for policy 0, policy_version 20581 (0.0007) -[2026-06-07 02:20:45,015][324535] Updated weights for policy 0, policy_version 20591 (0.0007) -[2026-06-07 02:20:45,737][324535] Updated weights for policy 0, policy_version 20601 (0.0007) -[2026-06-07 02:20:45,942][324535] Updated weights for policy 0, policy_version 20611 (0.0007) -[2026-06-07 02:20:46,010][321787] Fps is (10 sec: 16383.9, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 10551296. Throughput: 0: 17396.6. Samples: 10575488. Policy #0 lag: (min: 14.0, avg: 29.9, max: 78.0) -[2026-06-07 02:20:46,011][321787] Avg episode reward: [(0, '872.885')] -[2026-06-07 02:20:46,153][324535] Updated weights for policy 0, policy_version 20621 (0.0007) -[2026-06-07 02:20:46,342][324535] Updated weights for policy 0, policy_version 20631 (0.0007) -[2026-06-07 02:20:46,564][324535] Updated weights for policy 0, policy_version 20642 (0.0007) -[2026-06-07 02:20:46,759][324535] Updated weights for policy 0, policy_version 20652 (0.0008) -[2026-06-07 02:20:46,959][324535] Updated weights for policy 0, policy_version 20662 (0.0006) -[2026-06-07 02:20:47,003][324273] Saving new best policy, reward=872.885! -[2026-06-07 02:20:47,737][324535] Updated weights for policy 0, policy_version 20672 (0.0007) -[2026-06-07 02:20:47,944][324535] Updated weights for policy 0, policy_version 20682 (0.0007) -[2026-06-07 02:20:48,149][324535] Updated weights for policy 0, policy_version 20692 (0.0007) -[2026-06-07 02:20:48,362][324535] Updated weights for policy 0, policy_version 20702 (0.0007) -[2026-06-07 02:20:48,569][324535] Updated weights for policy 0, policy_version 20712 (0.0007) -[2026-06-07 02:20:48,809][324535] Updated weights for policy 0, policy_version 20723 (0.0007) -[2026-06-07 02:20:49,628][324535] Updated weights for policy 0, policy_version 20736 (0.0007) -[2026-06-07 02:20:49,860][324535] Updated weights for policy 0, policy_version 20747 (0.0007) -[2026-06-07 02:20:50,071][324535] Updated weights for policy 0, policy_version 20757 (0.0007) -[2026-06-07 02:20:50,284][324535] Updated weights for policy 0, policy_version 20767 (0.0008) -[2026-06-07 02:20:50,507][324535] Updated weights for policy 0, policy_version 20778 (0.0007) -[2026-06-07 02:20:50,713][324535] Updated weights for policy 0, policy_version 20788 (0.0007) -[2026-06-07 02:20:51,010][321787] Fps is (10 sec: 19660.9, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 10649600. Throughput: 0: 17371.0. Samples: 10631680. Policy #0 lag: (min: 14.0, avg: 29.9, max: 78.0) -[2026-06-07 02:20:51,011][321787] Avg episode reward: [(0, '874.190')] -[2026-06-07 02:20:51,017][324273] Saving new best policy, reward=874.190! -[2026-06-07 02:20:51,484][324535] Updated weights for policy 0, policy_version 20798 (0.0007) -[2026-06-07 02:20:51,658][324535] Updated weights for policy 0, policy_version 20808 (0.0007) -[2026-06-07 02:20:51,885][324535] Updated weights for policy 0, policy_version 20819 (0.0007) -[2026-06-07 02:20:52,124][324535] Updated weights for policy 0, policy_version 20830 (0.0007) -[2026-06-07 02:20:52,343][324535] Updated weights for policy 0, policy_version 20840 (0.0007) -[2026-06-07 02:20:52,581][324535] Updated weights for policy 0, policy_version 20850 (0.0007) -[2026-06-07 02:20:53,312][324535] Updated weights for policy 0, policy_version 20860 (0.0007) -[2026-06-07 02:20:53,524][324535] Updated weights for policy 0, policy_version 20870 (0.0007) -[2026-06-07 02:20:53,755][324535] Updated weights for policy 0, policy_version 20881 (0.0007) -[2026-06-07 02:20:53,979][324535] Updated weights for policy 0, policy_version 20891 (0.0007) -[2026-06-07 02:20:54,224][324535] Updated weights for policy 0, policy_version 20902 (0.0007) -[2026-06-07 02:20:54,467][324535] Updated weights for policy 0, policy_version 20913 (0.0007) -[2026-06-07 02:20:55,232][324535] Updated weights for policy 0, policy_version 20923 (0.0007) -[2026-06-07 02:20:55,418][324535] Updated weights for policy 0, policy_version 20933 (0.0012) -[2026-06-07 02:20:55,642][324535] Updated weights for policy 0, policy_version 20944 (0.0011) -[2026-06-07 02:20:55,856][324535] Updated weights for policy 0, policy_version 20954 (0.0011) -[2026-06-07 02:20:56,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 10715136. Throughput: 0: 17359.6. Samples: 10732416. Policy #0 lag: (min: 14.0, avg: 29.9, max: 78.0) -[2026-06-07 02:20:56,011][321787] Avg episode reward: [(0, '904.384')] -[2026-06-07 02:20:56,065][324535] Updated weights for policy 0, policy_version 20964 (0.0011) -[2026-06-07 02:20:56,277][324535] Updated weights for policy 0, policy_version 20974 (0.0011) -[2026-06-07 02:20:56,473][324273] Saving new best policy, reward=904.384! -[2026-06-07 02:20:56,477][324535] Updated weights for policy 0, policy_version 20984 (0.0011) -[2026-06-07 02:20:57,258][324535] Updated weights for policy 0, policy_version 20995 (0.0009) -[2026-06-07 02:20:57,473][324535] Updated weights for policy 0, policy_version 21005 (0.0010) -[2026-06-07 02:20:57,688][324535] Updated weights for policy 0, policy_version 21015 (0.0011) -[2026-06-07 02:20:57,892][324535] Updated weights for policy 0, policy_version 21025 (0.0010) -[2026-06-07 02:20:58,101][324535] Updated weights for policy 0, policy_version 21035 (0.0011) -[2026-06-07 02:20:58,327][324535] Updated weights for policy 0, policy_version 21045 (0.0011) -[2026-06-07 02:20:59,102][324535] Updated weights for policy 0, policy_version 21055 (0.0011) -[2026-06-07 02:20:59,310][324535] Updated weights for policy 0, policy_version 21065 (0.0012) -[2026-06-07 02:20:59,497][324535] Updated weights for policy 0, policy_version 21075 (0.0010) -[2026-06-07 02:20:59,721][324535] Updated weights for policy 0, policy_version 21085 (0.0011) -[2026-06-07 02:20:59,956][324535] Updated weights for policy 0, policy_version 21096 (0.0011) -[2026-06-07 02:21:00,185][324535] Updated weights for policy 0, policy_version 21107 (0.0011) -[2026-06-07 02:21:00,928][324535] Updated weights for policy 0, policy_version 21117 (0.0009) -[2026-06-07 02:21:01,010][321787] Fps is (10 sec: 16383.8, 60 sec: 17476.2, 300 sec: 17439.2). Total num frames: 10813440. Throughput: 0: 17345.4. Samples: 10834304. Policy #0 lag: (min: 63.0, avg: 78.3, max: 127.0) -[2026-06-07 02:21:01,011][321787] Avg episode reward: [(0, '902.397')] -[2026-06-07 02:21:01,147][324535] Updated weights for policy 0, policy_version 21127 (0.0007) -[2026-06-07 02:21:01,372][324535] Updated weights for policy 0, policy_version 21138 (0.0007) -[2026-06-07 02:21:01,561][324535] Updated weights for policy 0, policy_version 21148 (0.0007) -[2026-06-07 02:21:01,754][324535] Updated weights for policy 0, policy_version 21158 (0.0007) -[2026-06-07 02:21:01,962][324535] Updated weights for policy 0, policy_version 21168 (0.0011) -[2026-06-07 02:21:02,712][324535] Updated weights for policy 0, policy_version 21178 (0.0007) -[2026-06-07 02:21:02,941][324535] Updated weights for policy 0, policy_version 21189 (0.0007) -[2026-06-07 02:21:03,145][324535] Updated weights for policy 0, policy_version 21199 (0.0007) -[2026-06-07 02:21:03,373][324535] Updated weights for policy 0, policy_version 21210 (0.0007) -[2026-06-07 02:21:03,582][324535] Updated weights for policy 0, policy_version 21220 (0.0007) -[2026-06-07 02:21:03,803][324535] Updated weights for policy 0, policy_version 21231 (0.0007) -[2026-06-07 02:21:04,543][324535] Updated weights for policy 0, policy_version 21241 (0.0007) -[2026-06-07 02:21:04,743][324535] Updated weights for policy 0, policy_version 21251 (0.0007) -[2026-06-07 02:21:04,936][324535] Updated weights for policy 0, policy_version 21261 (0.0008) -[2026-06-07 02:21:05,141][324535] Updated weights for policy 0, policy_version 21271 (0.0007) -[2026-06-07 02:21:05,351][324535] Updated weights for policy 0, policy_version 21281 (0.0007) -[2026-06-07 02:21:05,570][324535] Updated weights for policy 0, policy_version 21291 (0.0007) -[2026-06-07 02:21:05,758][324535] Updated weights for policy 0, policy_version 21301 (0.0007) -[2026-06-07 02:21:06,010][321787] Fps is (10 sec: 19660.7, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 10911744. Throughput: 0: 17348.3. Samples: 10891648. Policy #0 lag: (min: 63.0, avg: 78.3, max: 127.0) -[2026-06-07 02:21:06,011][321787] Avg episode reward: [(0, '892.895')] -[2026-06-07 02:21:06,541][324535] Updated weights for policy 0, policy_version 21312 (0.0007) -[2026-06-07 02:21:06,759][324535] Updated weights for policy 0, policy_version 21322 (0.0007) -[2026-06-07 02:21:06,980][324535] Updated weights for policy 0, policy_version 21333 (0.0007) -[2026-06-07 02:21:07,178][324535] Updated weights for policy 0, policy_version 21343 (0.0007) -[2026-06-07 02:21:07,384][324535] Updated weights for policy 0, policy_version 21353 (0.0007) -[2026-06-07 02:21:07,592][324535] Updated weights for policy 0, policy_version 21363 (0.0007) -[2026-06-07 02:21:08,333][324535] Updated weights for policy 0, policy_version 21373 (0.0007) -[2026-06-07 02:21:08,530][324535] Updated weights for policy 0, policy_version 21383 (0.0007) -[2026-06-07 02:21:08,736][324535] Updated weights for policy 0, policy_version 21393 (0.0007) -[2026-06-07 02:21:08,948][324535] Updated weights for policy 0, policy_version 21403 (0.0007) -[2026-06-07 02:21:09,164][324535] Updated weights for policy 0, policy_version 21413 (0.0007) -[2026-06-07 02:21:09,372][324535] Updated weights for policy 0, policy_version 21423 (0.0007) -[2026-06-07 02:21:10,114][324535] Updated weights for policy 0, policy_version 21433 (0.0007) -[2026-06-07 02:21:10,334][324535] Updated weights for policy 0, policy_version 21443 (0.0007) -[2026-06-07 02:21:10,557][324535] Updated weights for policy 0, policy_version 21454 (0.0007) -[2026-06-07 02:21:10,768][324535] Updated weights for policy 0, policy_version 21464 (0.0007) -[2026-06-07 02:21:10,962][324535] Updated weights for policy 0, policy_version 21474 (0.0007) -[2026-06-07 02:21:11,010][321787] Fps is (10 sec: 16384.2, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 10977280. Throughput: 0: 17362.5. Samples: 10994432. Policy #0 lag: (min: 63.0, avg: 78.3, max: 127.0) -[2026-06-07 02:21:11,011][321787] Avg episode reward: [(0, '920.797')] -[2026-06-07 02:21:11,164][324535] Updated weights for policy 0, policy_version 21484 (0.0007) -[2026-06-07 02:21:11,377][324535] Updated weights for policy 0, policy_version 21494 (0.0007) -[2026-06-07 02:21:11,420][324273] Saving new best policy, reward=920.797! -[2026-06-07 02:21:12,119][324535] Updated weights for policy 0, policy_version 21504 (0.0007) -[2026-06-07 02:21:12,345][324535] Updated weights for policy 0, policy_version 21514 (0.0007) -[2026-06-07 02:21:12,572][324535] Updated weights for policy 0, policy_version 21525 (0.0007) -[2026-06-07 02:21:12,790][324535] Updated weights for policy 0, policy_version 21535 (0.0008) -[2026-06-07 02:21:12,998][324535] Updated weights for policy 0, policy_version 21545 (0.0007) -[2026-06-07 02:21:13,221][324535] Updated weights for policy 0, policy_version 21555 (0.0007) -[2026-06-07 02:21:13,952][324535] Updated weights for policy 0, policy_version 21565 (0.0007) -[2026-06-07 02:21:14,171][324535] Updated weights for policy 0, policy_version 21575 (0.0007) -[2026-06-07 02:21:14,382][324535] Updated weights for policy 0, policy_version 21585 (0.0008) -[2026-06-07 02:21:14,595][324535] Updated weights for policy 0, policy_version 21595 (0.0010) -[2026-06-07 02:21:14,805][324535] Updated weights for policy 0, policy_version 21605 (0.0011) -[2026-06-07 02:21:15,013][324535] Updated weights for policy 0, policy_version 21615 (0.0011) -[2026-06-07 02:21:15,749][324535] Updated weights for policy 0, policy_version 21625 (0.0007) -[2026-06-07 02:21:15,959][324535] Updated weights for policy 0, policy_version 21636 (0.0007) -[2026-06-07 02:21:16,010][321787] Fps is (10 sec: 16384.1, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 11075584. Throughput: 0: 17385.3. Samples: 11096832. Policy #0 lag: (min: 63.0, avg: 78.3, max: 127.0) -[2026-06-07 02:21:16,011][321787] Avg episode reward: [(0, '891.392')] -[2026-06-07 02:21:16,188][324535] Updated weights for policy 0, policy_version 21646 (0.0007) -[2026-06-07 02:21:16,405][324535] Updated weights for policy 0, policy_version 21656 (0.0007) -[2026-06-07 02:21:16,604][324535] Updated weights for policy 0, policy_version 21666 (0.0007) -[2026-06-07 02:21:16,834][324535] Updated weights for policy 0, policy_version 21677 (0.0007) -[2026-06-07 02:21:17,043][324535] Updated weights for policy 0, policy_version 21687 (0.0007) -[2026-06-07 02:21:17,774][324535] Updated weights for policy 0, policy_version 21697 (0.0006) -[2026-06-07 02:21:17,985][324535] Updated weights for policy 0, policy_version 21707 (0.0007) -[2026-06-07 02:21:18,196][324535] Updated weights for policy 0, policy_version 21717 (0.0007) -[2026-06-07 02:21:18,397][324535] Updated weights for policy 0, policy_version 21727 (0.0007) -[2026-06-07 02:21:18,615][324535] Updated weights for policy 0, policy_version 21737 (0.0007) -[2026-06-07 02:21:18,817][324535] Updated weights for policy 0, policy_version 21747 (0.0007) -[2026-06-07 02:21:19,606][324535] Updated weights for policy 0, policy_version 21758 (0.0007) -[2026-06-07 02:21:19,799][324535] Updated weights for policy 0, policy_version 21768 (0.0008) -[2026-06-07 02:21:20,031][324535] Updated weights for policy 0, policy_version 21779 (0.0009) -[2026-06-07 02:21:20,237][324535] Updated weights for policy 0, policy_version 21789 (0.0011) -[2026-06-07 02:21:20,451][324535] Updated weights for policy 0, policy_version 21799 (0.0008) -[2026-06-07 02:21:20,664][324535] Updated weights for policy 0, policy_version 21809 (0.0007) -[2026-06-07 02:21:21,010][321787] Fps is (10 sec: 19660.6, 60 sec: 17476.2, 300 sec: 17439.2). Total num frames: 11173888. Throughput: 0: 17365.3. Samples: 11153152. Policy #0 lag: (min: 63.0, avg: 79.3, max: 127.0) -[2026-06-07 02:21:21,011][321787] Avg episode reward: [(0, '889.906')] -[2026-06-07 02:21:21,417][324535] Updated weights for policy 0, policy_version 21819 (0.0007) -[2026-06-07 02:21:21,638][324535] Updated weights for policy 0, policy_version 21830 (0.0010) -[2026-06-07 02:21:21,856][324535] Updated weights for policy 0, policy_version 21840 (0.0011) -[2026-06-07 02:21:22,064][324535] Updated weights for policy 0, policy_version 21850 (0.0011) -[2026-06-07 02:21:22,257][324535] Updated weights for policy 0, policy_version 21860 (0.0007) -[2026-06-07 02:21:22,472][324535] Updated weights for policy 0, policy_version 21870 (0.0007) -[2026-06-07 02:21:22,674][324535] Updated weights for policy 0, policy_version 21880 (0.0008) -[2026-06-07 02:21:23,449][324535] Updated weights for policy 0, policy_version 21890 (0.0007) -[2026-06-07 02:21:23,656][324535] Updated weights for policy 0, policy_version 21900 (0.0007) -[2026-06-07 02:21:23,884][324535] Updated weights for policy 0, policy_version 21911 (0.0007) -[2026-06-07 02:21:24,099][324535] Updated weights for policy 0, policy_version 21921 (0.0007) -[2026-06-07 02:21:24,303][324535] Updated weights for policy 0, policy_version 21931 (0.0007) -[2026-06-07 02:21:24,502][324535] Updated weights for policy 0, policy_version 21941 (0.0007) -[2026-06-07 02:21:25,243][324535] Updated weights for policy 0, policy_version 21951 (0.0007) -[2026-06-07 02:21:25,455][324535] Updated weights for policy 0, policy_version 21961 (0.0007) -[2026-06-07 02:21:25,669][324535] Updated weights for policy 0, policy_version 21971 (0.0007) -[2026-06-07 02:21:25,899][324535] Updated weights for policy 0, policy_version 21982 (0.0007) -[2026-06-07 02:21:26,010][321787] Fps is (10 sec: 16383.6, 60 sec: 17476.2, 300 sec: 17439.2). Total num frames: 11239424. Throughput: 0: 17405.1. Samples: 11256192. Policy #0 lag: (min: 63.0, avg: 79.3, max: 127.0) -[2026-06-07 02:21:26,011][321787] Avg episode reward: [(0, '914.640')] -[2026-06-07 02:21:26,104][324535] Updated weights for policy 0, policy_version 21992 (0.0007) -[2026-06-07 02:21:26,308][324535] Updated weights for policy 0, policy_version 22002 (0.0007) -[2026-06-07 02:21:27,037][324535] Updated weights for policy 0, policy_version 22012 (0.0006) -[2026-06-07 02:21:27,252][324535] Updated weights for policy 0, policy_version 22022 (0.0007) -[2026-06-07 02:21:27,467][324535] Updated weights for policy 0, policy_version 22032 (0.0007) -[2026-06-07 02:21:27,700][324535] Updated weights for policy 0, policy_version 22042 (0.0007) -[2026-06-07 02:21:27,889][324535] Updated weights for policy 0, policy_version 22052 (0.0007) -[2026-06-07 02:21:28,096][324535] Updated weights for policy 0, policy_version 22062 (0.0007) -[2026-06-07 02:21:28,302][324535] Updated weights for policy 0, policy_version 22072 (0.0007) -[2026-06-07 02:21:29,042][324535] Updated weights for policy 0, policy_version 22082 (0.0007) -[2026-06-07 02:21:29,251][324535] Updated weights for policy 0, policy_version 22093 (0.0007) -[2026-06-07 02:21:29,457][324535] Updated weights for policy 0, policy_version 22103 (0.0007) -[2026-06-07 02:21:29,661][324535] Updated weights for policy 0, policy_version 22113 (0.0007) -[2026-06-07 02:21:29,900][324535] Updated weights for policy 0, policy_version 22123 (0.0007) -[2026-06-07 02:21:30,099][324535] Updated weights for policy 0, policy_version 22133 (0.0007) -[2026-06-07 02:21:30,848][324535] Updated weights for policy 0, policy_version 22143 (0.0007) -[2026-06-07 02:21:31,010][321787] Fps is (10 sec: 16384.1, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 11337728. Throughput: 0: 17385.2. Samples: 11357824. Policy #0 lag: (min: 63.0, avg: 79.3, max: 127.0) -[2026-06-07 02:21:31,011][321787] Avg episode reward: [(0, '915.820')] -[2026-06-07 02:21:31,039][324535] Updated weights for policy 0, policy_version 22153 (0.0011) -[2026-06-07 02:21:31,248][324535] Updated weights for policy 0, policy_version 22163 (0.0011) -[2026-06-07 02:21:31,458][324535] Updated weights for policy 0, policy_version 22173 (0.0011) -[2026-06-07 02:21:31,656][324535] Updated weights for policy 0, policy_version 22183 (0.0011) -[2026-06-07 02:21:31,871][324535] Updated weights for policy 0, policy_version 22193 (0.0010) -[2026-06-07 02:21:32,629][324535] Updated weights for policy 0, policy_version 22203 (0.0010) -[2026-06-07 02:21:32,840][324535] Updated weights for policy 0, policy_version 22213 (0.0007) -[2026-06-07 02:21:33,039][324535] Updated weights for policy 0, policy_version 22223 (0.0008) -[2026-06-07 02:21:33,256][324535] Updated weights for policy 0, policy_version 22233 (0.0008) -[2026-06-07 02:21:33,446][324535] Updated weights for policy 0, policy_version 22243 (0.0011) -[2026-06-07 02:21:33,641][324535] Updated weights for policy 0, policy_version 22253 (0.0009) -[2026-06-07 02:21:33,853][324535] Updated weights for policy 0, policy_version 22263 (0.0011) -[2026-06-07 02:21:34,613][324535] Updated weights for policy 0, policy_version 22273 (0.0009) -[2026-06-07 02:21:34,826][324535] Updated weights for policy 0, policy_version 22283 (0.0007) -[2026-06-07 02:21:35,035][324535] Updated weights for policy 0, policy_version 22293 (0.0007) -[2026-06-07 02:21:35,240][324535] Updated weights for policy 0, policy_version 22303 (0.0007) -[2026-06-07 02:21:35,449][324535] Updated weights for policy 0, policy_version 22313 (0.0009) -[2026-06-07 02:21:35,665][324535] Updated weights for policy 0, policy_version 22323 (0.0011) -[2026-06-07 02:21:36,010][321787] Fps is (10 sec: 19661.2, 60 sec: 17476.2, 300 sec: 17439.2). Total num frames: 11436032. Throughput: 0: 17413.7. Samples: 11415296. Policy #0 lag: (min: 1.0, avg: 32.2, max: 65.0) -[2026-06-07 02:21:36,011][321787] Avg episode reward: [(0, '899.912')] -[2026-06-07 02:21:36,414][324535] Updated weights for policy 0, policy_version 22333 (0.0009) -[2026-06-07 02:21:36,614][324535] Updated weights for policy 0, policy_version 22343 (0.0007) -[2026-06-07 02:21:36,821][324535] Updated weights for policy 0, policy_version 22353 (0.0007) -[2026-06-07 02:21:37,008][324535] Updated weights for policy 0, policy_version 22363 (0.0007) -[2026-06-07 02:21:37,215][324535] Updated weights for policy 0, policy_version 22373 (0.0007) -[2026-06-07 02:21:37,439][324535] Updated weights for policy 0, policy_version 22383 (0.0007) -[2026-06-07 02:21:38,198][324535] Updated weights for policy 0, policy_version 22393 (0.0007) -[2026-06-07 02:21:38,402][324535] Updated weights for policy 0, policy_version 22403 (0.0007) -[2026-06-07 02:21:38,597][324535] Updated weights for policy 0, policy_version 22413 (0.0007) -[2026-06-07 02:21:38,820][324535] Updated weights for policy 0, policy_version 22423 (0.0007) -[2026-06-07 02:21:39,024][324535] Updated weights for policy 0, policy_version 22433 (0.0007) -[2026-06-07 02:21:39,231][324535] Updated weights for policy 0, policy_version 22443 (0.0007) -[2026-06-07 02:21:39,442][324535] Updated weights for policy 0, policy_version 22453 (0.0007) -[2026-06-07 02:21:40,177][324535] Updated weights for policy 0, policy_version 22463 (0.0007) -[2026-06-07 02:21:40,403][324535] Updated weights for policy 0, policy_version 22473 (0.0007) -[2026-06-07 02:21:40,617][324535] Updated weights for policy 0, policy_version 22483 (0.0009) -[2026-06-07 02:21:40,834][324535] Updated weights for policy 0, policy_version 22493 (0.0010) -[2026-06-07 02:21:41,010][321787] Fps is (10 sec: 16383.9, 60 sec: 17476.2, 300 sec: 17439.2). Total num frames: 11501568. Throughput: 0: 17433.6. Samples: 11516928. Policy #0 lag: (min: 1.0, avg: 32.2, max: 65.0) -[2026-06-07 02:21:41,011][321787] Avg episode reward: [(0, '899.352')] -[2026-06-07 02:21:41,069][324535] Updated weights for policy 0, policy_version 22504 (0.0007) -[2026-06-07 02:21:41,285][324535] Updated weights for policy 0, policy_version 22514 (0.0007) -[2026-06-07 02:21:42,022][324535] Updated weights for policy 0, policy_version 22525 (0.0007) -[2026-06-07 02:21:42,246][324535] Updated weights for policy 0, policy_version 22535 (0.0007) -[2026-06-07 02:21:42,462][324535] Updated weights for policy 0, policy_version 22545 (0.0011) -[2026-06-07 02:21:42,666][324535] Updated weights for policy 0, policy_version 22555 (0.0011) -[2026-06-07 02:21:42,889][324535] Updated weights for policy 0, policy_version 22565 (0.0011) -[2026-06-07 02:21:43,088][324535] Updated weights for policy 0, policy_version 22575 (0.0011) -[2026-06-07 02:21:43,848][324535] Updated weights for policy 0, policy_version 22585 (0.0007) -[2026-06-07 02:21:44,050][324535] Updated weights for policy 0, policy_version 22595 (0.0007) -[2026-06-07 02:21:44,273][324535] Updated weights for policy 0, policy_version 22606 (0.0007) -[2026-06-07 02:21:44,486][324535] Updated weights for policy 0, policy_version 22617 (0.0007) -[2026-06-07 02:21:44,686][324535] Updated weights for policy 0, policy_version 22627 (0.0007) -[2026-06-07 02:21:44,888][324535] Updated weights for policy 0, policy_version 22637 (0.0008) -[2026-06-07 02:21:45,103][324535] Updated weights for policy 0, policy_version 22647 (0.0008) -[2026-06-07 02:21:45,833][324535] Updated weights for policy 0, policy_version 22657 (0.0007) -[2026-06-07 02:21:46,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 11599872. Throughput: 0: 17470.6. Samples: 11620480. Policy #0 lag: (min: 1.0, avg: 32.2, max: 65.0) -[2026-06-07 02:21:46,011][321787] Avg episode reward: [(0, '893.814')] -[2026-06-07 02:21:46,024][324535] Updated weights for policy 0, policy_version 22667 (0.0007) -[2026-06-07 02:21:46,218][324535] Updated weights for policy 0, policy_version 22677 (0.0007) -[2026-06-07 02:21:46,436][324535] Updated weights for policy 0, policy_version 22687 (0.0007) -[2026-06-07 02:21:46,646][324535] Updated weights for policy 0, policy_version 22697 (0.0007) -[2026-06-07 02:21:46,841][324535] Updated weights for policy 0, policy_version 22707 (0.0007) -[2026-06-07 02:21:47,582][324535] Updated weights for policy 0, policy_version 22717 (0.0007) -[2026-06-07 02:21:47,791][324535] Updated weights for policy 0, policy_version 22727 (0.0006) -[2026-06-07 02:21:48,003][324535] Updated weights for policy 0, policy_version 22737 (0.0007) -[2026-06-07 02:21:48,205][324535] Updated weights for policy 0, policy_version 22747 (0.0007) -[2026-06-07 02:21:48,419][324535] Updated weights for policy 0, policy_version 22757 (0.0007) -[2026-06-07 02:21:48,625][324535] Updated weights for policy 0, policy_version 22767 (0.0007) -[2026-06-07 02:21:49,401][324535] Updated weights for policy 0, policy_version 22778 (0.0007) -[2026-06-07 02:21:49,605][324535] Updated weights for policy 0, policy_version 22788 (0.0007) -[2026-06-07 02:21:49,818][324535] Updated weights for policy 0, policy_version 22798 (0.0007) -[2026-06-07 02:21:50,014][324535] Updated weights for policy 0, policy_version 22808 (0.0007) -[2026-06-07 02:21:50,216][324535] Updated weights for policy 0, policy_version 22818 (0.0007) -[2026-06-07 02:21:50,436][324535] Updated weights for policy 0, policy_version 22828 (0.0007) -[2026-06-07 02:21:50,658][324535] Updated weights for policy 0, policy_version 22838 (0.0007) -[2026-06-07 02:21:51,010][321787] Fps is (10 sec: 19660.6, 60 sec: 17476.2, 300 sec: 17439.2). Total num frames: 11698176. Throughput: 0: 17402.3. Samples: 11674752. Policy #0 lag: (min: 58.0, avg: 77.2, max: 122.0) -[2026-06-07 02:21:51,012][321787] Avg episode reward: [(0, '910.601')] -[2026-06-07 02:21:51,401][324535] Updated weights for policy 0, policy_version 22848 (0.0007) -[2026-06-07 02:21:51,590][324535] Updated weights for policy 0, policy_version 22858 (0.0007) -[2026-06-07 02:21:51,822][324535] Updated weights for policy 0, policy_version 22869 (0.0007) -[2026-06-07 02:21:52,071][324535] Updated weights for policy 0, policy_version 22880 (0.0007) -[2026-06-07 02:21:52,276][324535] Updated weights for policy 0, policy_version 22890 (0.0007) -[2026-06-07 02:21:52,484][324535] Updated weights for policy 0, policy_version 22900 (0.0007) -[2026-06-07 02:21:53,250][324535] Updated weights for policy 0, policy_version 22910 (0.0007) -[2026-06-07 02:21:53,463][324535] Updated weights for policy 0, policy_version 22920 (0.0007) -[2026-06-07 02:21:53,676][324535] Updated weights for policy 0, policy_version 22930 (0.0007) -[2026-06-07 02:21:53,902][324535] Updated weights for policy 0, policy_version 22940 (0.0007) -[2026-06-07 02:21:54,101][324535] Updated weights for policy 0, policy_version 22950 (0.0007) -[2026-06-07 02:21:54,304][324535] Updated weights for policy 0, policy_version 22960 (0.0007) -[2026-06-07 02:21:55,081][324535] Updated weights for policy 0, policy_version 22970 (0.0007) -[2026-06-07 02:21:55,306][324535] Updated weights for policy 0, policy_version 22981 (0.0007) -[2026-06-07 02:21:55,514][324535] Updated weights for policy 0, policy_version 22991 (0.0007) -[2026-06-07 02:21:55,720][324535] Updated weights for policy 0, policy_version 23001 (0.0007) -[2026-06-07 02:21:55,932][324535] Updated weights for policy 0, policy_version 23011 (0.0007) -[2026-06-07 02:21:56,010][321787] Fps is (10 sec: 16383.9, 60 sec: 17476.2, 300 sec: 17439.2). Total num frames: 11763712. Throughput: 0: 17379.5. Samples: 11776512. Policy #0 lag: (min: 58.0, avg: 77.2, max: 122.0) -[2026-06-07 02:21:56,011][321787] Avg episode reward: [(0, '952.816')] -[2026-06-07 02:21:56,125][324535] Updated weights for policy 0, policy_version 23021 (0.0007) -[2026-06-07 02:21:56,351][324535] Updated weights for policy 0, policy_version 23031 (0.0007) -[2026-06-07 02:21:56,355][324273] Saving new best policy, reward=952.816! -[2026-06-07 02:21:57,132][324535] Updated weights for policy 0, policy_version 23041 (0.0008) -[2026-06-07 02:21:57,342][324535] Updated weights for policy 0, policy_version 23051 (0.0012) -[2026-06-07 02:21:57,551][324535] Updated weights for policy 0, policy_version 23061 (0.0011) -[2026-06-07 02:21:57,766][324535] Updated weights for policy 0, policy_version 23071 (0.0011) -[2026-06-07 02:21:57,978][324535] Updated weights for policy 0, policy_version 23081 (0.0011) -[2026-06-07 02:21:58,169][324535] Updated weights for policy 0, policy_version 23091 (0.0011) -[2026-06-07 02:21:58,938][324535] Updated weights for policy 0, policy_version 23101 (0.0009) -[2026-06-07 02:21:59,141][324535] Updated weights for policy 0, policy_version 23111 (0.0007) -[2026-06-07 02:21:59,355][324535] Updated weights for policy 0, policy_version 23121 (0.0007) -[2026-06-07 02:21:59,599][324535] Updated weights for policy 0, policy_version 23132 (0.0007) -[2026-06-07 02:21:59,812][324535] Updated weights for policy 0, policy_version 23142 (0.0007) -[2026-06-07 02:22:00,004][324535] Updated weights for policy 0, policy_version 23152 (0.0007) -[2026-06-07 02:22:00,749][324535] Updated weights for policy 0, policy_version 23162 (0.0007) -[2026-06-07 02:22:00,959][324535] Updated weights for policy 0, policy_version 23172 (0.0007) -[2026-06-07 02:22:01,010][321787] Fps is (10 sec: 16384.3, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 11862016. Throughput: 0: 17402.3. Samples: 11879936. Policy #0 lag: (min: 58.0, avg: 77.2, max: 122.0) -[2026-06-07 02:22:01,011][321787] Avg episode reward: [(0, '943.869')] -[2026-06-07 02:22:01,154][324535] Updated weights for policy 0, policy_version 23182 (0.0007) -[2026-06-07 02:22:01,354][324535] Updated weights for policy 0, policy_version 23192 (0.0007) -[2026-06-07 02:22:01,568][324535] Updated weights for policy 0, policy_version 23202 (0.0007) -[2026-06-07 02:22:01,769][324535] Updated weights for policy 0, policy_version 23212 (0.0007) -[2026-06-07 02:22:01,958][324535] Updated weights for policy 0, policy_version 23222 (0.0007) -[2026-06-07 02:22:02,735][324535] Updated weights for policy 0, policy_version 23232 (0.0007) -[2026-06-07 02:22:02,921][324535] Updated weights for policy 0, policy_version 23242 (0.0009) -[2026-06-07 02:22:03,148][324535] Updated weights for policy 0, policy_version 23252 (0.0011) -[2026-06-07 02:22:03,344][324535] Updated weights for policy 0, policy_version 23262 (0.0011) -[2026-06-07 02:22:03,530][324535] Updated weights for policy 0, policy_version 23272 (0.0007) -[2026-06-07 02:22:03,734][324535] Updated weights for policy 0, policy_version 23282 (0.0007) -[2026-06-07 02:22:04,484][324535] Updated weights for policy 0, policy_version 23292 (0.0007) -[2026-06-07 02:22:04,690][324535] Updated weights for policy 0, policy_version 23302 (0.0007) -[2026-06-07 02:22:04,899][324535] Updated weights for policy 0, policy_version 23312 (0.0007) -[2026-06-07 02:22:05,096][324535] Updated weights for policy 0, policy_version 23322 (0.0007) -[2026-06-07 02:22:05,293][324535] Updated weights for policy 0, policy_version 23332 (0.0007) -[2026-06-07 02:22:05,515][324535] Updated weights for policy 0, policy_version 23342 (0.0007) -[2026-06-07 02:22:05,719][324535] Updated weights for policy 0, policy_version 23352 (0.0007) -[2026-06-07 02:22:06,010][321787] Fps is (10 sec: 19660.6, 60 sec: 17476.2, 300 sec: 17439.2). Total num frames: 11960320. Throughput: 0: 17433.6. Samples: 11937664. Policy #0 lag: (min: 58.0, avg: 77.2, max: 122.0) -[2026-06-07 02:22:06,012][321787] Avg episode reward: [(0, '972.728')] -[2026-06-07 02:22:06,016][324273] Saving new best policy, reward=972.728! -[2026-06-07 02:22:06,493][324535] Updated weights for policy 0, policy_version 23363 (0.0007) -[2026-06-07 02:22:06,709][324535] Updated weights for policy 0, policy_version 23373 (0.0007) -[2026-06-07 02:22:06,910][324535] Updated weights for policy 0, policy_version 23383 (0.0007) -[2026-06-07 02:22:07,142][324535] Updated weights for policy 0, policy_version 23393 (0.0007) -[2026-06-07 02:22:07,358][324535] Updated weights for policy 0, policy_version 23403 (0.0007) -[2026-06-07 02:22:07,577][324535] Updated weights for policy 0, policy_version 23413 (0.0007) -[2026-06-07 02:22:08,333][324535] Updated weights for policy 0, policy_version 23423 (0.0007) -[2026-06-07 02:22:08,518][324535] Updated weights for policy 0, policy_version 23433 (0.0006) -[2026-06-07 02:22:08,728][324535] Updated weights for policy 0, policy_version 23443 (0.0007) -[2026-06-07 02:22:08,922][324535] Updated weights for policy 0, policy_version 23453 (0.0007) -[2026-06-07 02:22:09,124][324535] Updated weights for policy 0, policy_version 23463 (0.0007) -[2026-06-07 02:22:09,342][324535] Updated weights for policy 0, policy_version 23473 (0.0007) -[2026-06-07 02:22:10,111][324535] Updated weights for policy 0, policy_version 23483 (0.0007) -[2026-06-07 02:22:10,325][324535] Updated weights for policy 0, policy_version 23493 (0.0009) -[2026-06-07 02:22:10,528][324535] Updated weights for policy 0, policy_version 23503 (0.0007) -[2026-06-07 02:22:10,720][324535] Updated weights for policy 0, policy_version 23513 (0.0007) -[2026-06-07 02:22:10,953][324535] Updated weights for policy 0, policy_version 23524 (0.0007) -[2026-06-07 02:22:11,010][321787] Fps is (10 sec: 16383.9, 60 sec: 17476.2, 300 sec: 17439.2). Total num frames: 12025856. Throughput: 0: 17430.8. Samples: 12040576. Policy #0 lag: (min: 42.0, avg: 65.1, max: 106.0) -[2026-06-07 02:22:11,011][321787] Avg episode reward: [(0, '991.988')] -[2026-06-07 02:22:11,160][324535] Updated weights for policy 0, policy_version 23534 (0.0007) -[2026-06-07 02:22:11,378][324273] Saving new best policy, reward=991.988! -[2026-06-07 02:22:11,380][324535] Updated weights for policy 0, policy_version 23544 (0.0007) -[2026-06-07 02:22:12,164][324535] Updated weights for policy 0, policy_version 23554 (0.0007) -[2026-06-07 02:22:12,378][324535] Updated weights for policy 0, policy_version 23564 (0.0007) -[2026-06-07 02:22:12,589][324535] Updated weights for policy 0, policy_version 23574 (0.0007) -[2026-06-07 02:22:12,789][324535] Updated weights for policy 0, policy_version 23584 (0.0007) -[2026-06-07 02:22:12,984][324535] Updated weights for policy 0, policy_version 23594 (0.0007) -[2026-06-07 02:22:13,188][324535] Updated weights for policy 0, policy_version 23604 (0.0006) -[2026-06-07 02:22:13,969][324535] Updated weights for policy 0, policy_version 23614 (0.0007) -[2026-06-07 02:22:14,192][324535] Updated weights for policy 0, policy_version 23624 (0.0007) -[2026-06-07 02:22:14,394][324535] Updated weights for policy 0, policy_version 23634 (0.0007) -[2026-06-07 02:22:14,606][324535] Updated weights for policy 0, policy_version 23644 (0.0007) -[2026-06-07 02:22:14,824][324535] Updated weights for policy 0, policy_version 23654 (0.0007) -[2026-06-07 02:22:15,041][324535] Updated weights for policy 0, policy_version 23664 (0.0007) -[2026-06-07 02:22:15,808][324535] Updated weights for policy 0, policy_version 23674 (0.0007) -[2026-06-07 02:22:16,007][324535] Updated weights for policy 0, policy_version 23684 (0.0007) -[2026-06-07 02:22:16,010][321787] Fps is (10 sec: 16384.2, 60 sec: 17476.2, 300 sec: 17439.2). Total num frames: 12124160. Throughput: 0: 17476.2. Samples: 12144256. Policy #0 lag: (min: 42.0, avg: 65.1, max: 106.0) -[2026-06-07 02:22:16,011][321787] Avg episode reward: [(0, '985.441')] -[2026-06-07 02:22:16,221][324535] Updated weights for policy 0, policy_version 23694 (0.0007) -[2026-06-07 02:22:16,439][324535] Updated weights for policy 0, policy_version 23704 (0.0007) -[2026-06-07 02:22:16,666][324535] Updated weights for policy 0, policy_version 23715 (0.0007) -[2026-06-07 02:22:16,850][324535] Updated weights for policy 0, policy_version 23725 (0.0007) -[2026-06-07 02:22:17,060][324535] Updated weights for policy 0, policy_version 23735 (0.0007) -[2026-06-07 02:22:17,837][324535] Updated weights for policy 0, policy_version 23745 (0.0007) -[2026-06-07 02:22:18,022][324535] Updated weights for policy 0, policy_version 23755 (0.0007) -[2026-06-07 02:22:18,248][324535] Updated weights for policy 0, policy_version 23766 (0.0007) -[2026-06-07 02:22:18,473][324535] Updated weights for policy 0, policy_version 23776 (0.0007) -[2026-06-07 02:22:18,685][324535] Updated weights for policy 0, policy_version 23786 (0.0007) -[2026-06-07 02:22:18,898][324535] Updated weights for policy 0, policy_version 23796 (0.0007) -[2026-06-07 02:22:19,655][324535] Updated weights for policy 0, policy_version 23806 (0.0007) -[2026-06-07 02:22:19,870][324535] Updated weights for policy 0, policy_version 23816 (0.0007) -[2026-06-07 02:22:20,084][324535] Updated weights for policy 0, policy_version 23826 (0.0007) -[2026-06-07 02:22:20,290][324535] Updated weights for policy 0, policy_version 23836 (0.0007) -[2026-06-07 02:22:20,508][324535] Updated weights for policy 0, policy_version 23846 (0.0007) -[2026-06-07 02:22:20,711][324535] Updated weights for policy 0, policy_version 23856 (0.0008) -[2026-06-07 02:22:21,010][321787] Fps is (10 sec: 19660.9, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 12222464. Throughput: 0: 17479.1. Samples: 12201856. Policy #0 lag: (min: 42.0, avg: 65.1, max: 106.0) -[2026-06-07 02:22:21,011][321787] Avg episode reward: [(0, '1004.779')] -[2026-06-07 02:22:21,015][324273] Saving new best policy, reward=1004.779! -[2026-06-07 02:22:21,470][324535] Updated weights for policy 0, policy_version 23866 (0.0007) -[2026-06-07 02:22:21,672][324535] Updated weights for policy 0, policy_version 23876 (0.0007) -[2026-06-07 02:22:21,885][324535] Updated weights for policy 0, policy_version 23886 (0.0007) -[2026-06-07 02:22:22,104][324535] Updated weights for policy 0, policy_version 23896 (0.0007) -[2026-06-07 02:22:22,322][324535] Updated weights for policy 0, policy_version 23906 (0.0007) -[2026-06-07 02:22:22,532][324535] Updated weights for policy 0, policy_version 23916 (0.0007) -[2026-06-07 02:22:22,745][324535] Updated weights for policy 0, policy_version 23926 (0.0008) -[2026-06-07 02:22:23,522][324535] Updated weights for policy 0, policy_version 23936 (0.0007) -[2026-06-07 02:22:23,723][324535] Updated weights for policy 0, policy_version 23946 (0.0007) -[2026-06-07 02:22:23,931][324535] Updated weights for policy 0, policy_version 23956 (0.0007) -[2026-06-07 02:22:24,142][324535] Updated weights for policy 0, policy_version 23966 (0.0007) -[2026-06-07 02:22:24,352][324535] Updated weights for policy 0, policy_version 23976 (0.0007) -[2026-06-07 02:22:24,578][324535] Updated weights for policy 0, policy_version 23986 (0.0007) -[2026-06-07 02:22:25,350][324535] Updated weights for policy 0, policy_version 23996 (0.0007) -[2026-06-07 02:22:25,565][324535] Updated weights for policy 0, policy_version 24006 (0.0007) -[2026-06-07 02:22:25,776][324535] Updated weights for policy 0, policy_version 24016 (0.0007) -[2026-06-07 02:22:25,976][324535] Updated weights for policy 0, policy_version 24026 (0.0007) -[2026-06-07 02:22:26,010][321787] Fps is (10 sec: 16384.1, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 12288000. Throughput: 0: 17479.1. Samples: 12303488. Policy #0 lag: (min: 63.0, avg: 78.9, max: 127.0) -[2026-06-07 02:22:26,011][321787] Avg episode reward: [(0, '1001.315')] -[2026-06-07 02:22:26,187][324535] Updated weights for policy 0, policy_version 24036 (0.0007) -[2026-06-07 02:22:26,396][324535] Updated weights for policy 0, policy_version 24046 (0.0007) -[2026-06-07 02:22:26,605][324535] Updated weights for policy 0, policy_version 24056 (0.0007) -[2026-06-07 02:22:27,373][324535] Updated weights for policy 0, policy_version 24066 (0.0007) -[2026-06-07 02:22:27,590][324535] Updated weights for policy 0, policy_version 24076 (0.0007) -[2026-06-07 02:22:27,820][324535] Updated weights for policy 0, policy_version 24087 (0.0007) -[2026-06-07 02:22:28,020][324535] Updated weights for policy 0, policy_version 24097 (0.0006) -[2026-06-07 02:22:28,238][324535] Updated weights for policy 0, policy_version 24107 (0.0007) -[2026-06-07 02:22:28,465][324535] Updated weights for policy 0, policy_version 24117 (0.0007) -[2026-06-07 02:22:29,253][324535] Updated weights for policy 0, policy_version 24128 (0.0006) -[2026-06-07 02:22:29,447][324535] Updated weights for policy 0, policy_version 24138 (0.0006) -[2026-06-07 02:22:29,674][324535] Updated weights for policy 0, policy_version 24148 (0.0006) -[2026-06-07 02:22:29,884][324535] Updated weights for policy 0, policy_version 24158 (0.0006) -[2026-06-07 02:22:30,098][324535] Updated weights for policy 0, policy_version 24168 (0.0007) -[2026-06-07 02:22:30,308][324535] Updated weights for policy 0, policy_version 24178 (0.0007) -[2026-06-07 02:22:31,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 12386304. Throughput: 0: 17473.4. Samples: 12406784. Policy #0 lag: (min: 63.0, avg: 78.9, max: 127.0) -[2026-06-07 02:22:31,011][321787] Avg episode reward: [(0, '1008.428')] -[2026-06-07 02:22:31,120][324535] Updated weights for policy 0, policy_version 24188 (0.0007) -[2026-06-07 02:22:31,359][324535] Updated weights for policy 0, policy_version 24199 (0.0007) -[2026-06-07 02:22:31,370][324273] Early stopping after 2 epochs (16 sgd steps), loss delta 0.0000008 -[2026-06-07 02:22:31,371][324273] Saving new best policy, reward=1008.428! -[2026-06-07 02:22:32,360][324535] Updated weights for policy 0, policy_version 24209 (0.0006) -[2026-06-07 02:22:32,579][324535] Updated weights for policy 0, policy_version 24219 (0.0006) -[2026-06-07 02:22:32,787][324535] Updated weights for policy 0, policy_version 24229 (0.0006) -[2026-06-07 02:22:33,000][324535] Updated weights for policy 0, policy_version 24239 (0.0006) -[2026-06-07 02:22:33,239][324535] Updated weights for policy 0, policy_version 24250 (0.0006) -[2026-06-07 02:22:33,442][324535] Updated weights for policy 0, policy_version 24260 (0.0008) -[2026-06-07 02:22:34,187][324535] Updated weights for policy 0, policy_version 24270 (0.0007) -[2026-06-07 02:22:34,394][324535] Updated weights for policy 0, policy_version 24280 (0.0007) -[2026-06-07 02:22:34,618][324535] Updated weights for policy 0, policy_version 24290 (0.0007) -[2026-06-07 02:22:34,822][324535] Updated weights for policy 0, policy_version 24300 (0.0007) -[2026-06-07 02:22:35,028][324535] Updated weights for policy 0, policy_version 24310 (0.0007) -[2026-06-07 02:22:35,233][324535] Updated weights for policy 0, policy_version 24320 (0.0007) -[2026-06-07 02:22:35,979][324535] Updated weights for policy 0, policy_version 24330 (0.0007) -[2026-06-07 02:22:36,010][321787] Fps is (10 sec: 19660.7, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 12484608. Throughput: 0: 17752.2. Samples: 12473600. Policy #0 lag: (min: 63.0, avg: 78.9, max: 127.0) -[2026-06-07 02:22:36,011][321787] Avg episode reward: [(0, '1008.241')] -[2026-06-07 02:22:36,185][324535] Updated weights for policy 0, policy_version 24340 (0.0007) -[2026-06-07 02:22:36,386][324535] Updated weights for policy 0, policy_version 24350 (0.0008) -[2026-06-07 02:22:36,598][324535] Updated weights for policy 0, policy_version 24360 (0.0010) -[2026-06-07 02:22:36,807][324535] Updated weights for policy 0, policy_version 24370 (0.0008) -[2026-06-07 02:22:37,006][324535] Updated weights for policy 0, policy_version 24380 (0.0007) -[2026-06-07 02:22:37,237][324535] Updated weights for policy 0, policy_version 24391 (0.0007) -[2026-06-07 02:22:37,991][324535] Updated weights for policy 0, policy_version 24401 (0.0009) -[2026-06-07 02:22:38,213][324535] Updated weights for policy 0, policy_version 24411 (0.0007) -[2026-06-07 02:22:38,419][324535] Updated weights for policy 0, policy_version 24421 (0.0007) -[2026-06-07 02:22:38,609][324535] Updated weights for policy 0, policy_version 24431 (0.0007) -[2026-06-07 02:22:38,843][324535] Updated weights for policy 0, policy_version 24441 (0.0007) -[2026-06-07 02:22:39,029][324535] Updated weights for policy 0, policy_version 24451 (0.0007) -[2026-06-07 02:22:39,805][324535] Updated weights for policy 0, policy_version 24461 (0.0008) -[2026-06-07 02:22:40,028][324535] Updated weights for policy 0, policy_version 24471 (0.0009) -[2026-06-07 02:22:40,233][324535] Updated weights for policy 0, policy_version 24481 (0.0008) -[2026-06-07 02:22:40,442][324535] Updated weights for policy 0, policy_version 24491 (0.0007) -[2026-06-07 02:22:40,668][324535] Updated weights for policy 0, policy_version 24501 (0.0007) -[2026-06-07 02:22:40,877][324535] Updated weights for policy 0, policy_version 24511 (0.0007) -[2026-06-07 02:22:41,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 12550144. Throughput: 0: 17794.9. Samples: 12577280. Policy #0 lag: (min: 63.0, avg: 78.9, max: 127.0) -[2026-06-07 02:22:41,011][321787] Avg episode reward: [(0, '1000.104')] -[2026-06-07 02:22:41,659][324535] Updated weights for policy 0, policy_version 24521 (0.0007) -[2026-06-07 02:22:41,833][324535] Updated weights for policy 0, policy_version 24531 (0.0007) -[2026-06-07 02:22:42,042][324535] Updated weights for policy 0, policy_version 24541 (0.0007) -[2026-06-07 02:22:42,245][324535] Updated weights for policy 0, policy_version 24551 (0.0007) -[2026-06-07 02:22:42,439][324535] Updated weights for policy 0, policy_version 24561 (0.0007) -[2026-06-07 02:22:42,647][324535] Updated weights for policy 0, policy_version 24571 (0.0007) -[2026-06-07 02:22:42,864][324535] Updated weights for policy 0, policy_version 24581 (0.0007) -[2026-06-07 02:22:43,622][324535] Updated weights for policy 0, policy_version 24591 (0.0007) -[2026-06-07 02:22:43,848][324535] Updated weights for policy 0, policy_version 24601 (0.0007) -[2026-06-07 02:22:44,058][324535] Updated weights for policy 0, policy_version 24611 (0.0007) -[2026-06-07 02:22:44,268][324535] Updated weights for policy 0, policy_version 24621 (0.0007) -[2026-06-07 02:22:44,487][324535] Updated weights for policy 0, policy_version 24631 (0.0007) -[2026-06-07 02:22:44,685][324535] Updated weights for policy 0, policy_version 24641 (0.0007) -[2026-06-07 02:22:45,425][324535] Updated weights for policy 0, policy_version 24651 (0.0007) -[2026-06-07 02:22:45,648][324535] Updated weights for policy 0, policy_version 24661 (0.0007) -[2026-06-07 02:22:45,857][324535] Updated weights for policy 0, policy_version 24671 (0.0007) -[2026-06-07 02:22:46,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 12648448. Throughput: 0: 17985.4. Samples: 12689280. Policy #0 lag: (min: 53.0, avg: 68.3, max: 117.0) -[2026-06-07 02:22:46,011][321787] Avg episode reward: [(0, '1029.259')] -[2026-06-07 02:22:46,054][324535] Updated weights for policy 0, policy_version 24681 (0.0007) -[2026-06-07 02:22:46,258][324535] Updated weights for policy 0, policy_version 24691 (0.0007) -[2026-06-07 02:22:46,487][324535] Updated weights for policy 0, policy_version 24701 (0.0007) -[2026-06-07 02:22:46,687][324535] Updated weights for policy 0, policy_version 24711 (0.0007) -[2026-06-07 02:22:46,699][324273] Saving new best policy, reward=1029.259! -[2026-06-07 02:22:47,434][324535] Updated weights for policy 0, policy_version 24721 (0.0007) -[2026-06-07 02:22:47,639][324535] Updated weights for policy 0, policy_version 24731 (0.0007) -[2026-06-07 02:22:47,839][324535] Updated weights for policy 0, policy_version 24741 (0.0007) -[2026-06-07 02:22:48,056][324535] Updated weights for policy 0, policy_version 24751 (0.0007) -[2026-06-07 02:22:48,258][324535] Updated weights for policy 0, policy_version 24761 (0.0007) -[2026-06-07 02:22:48,484][324535] Updated weights for policy 0, policy_version 24771 (0.0007) -[2026-06-07 02:22:49,229][324535] Updated weights for policy 0, policy_version 24781 (0.0007) -[2026-06-07 02:22:49,425][324535] Updated weights for policy 0, policy_version 24791 (0.0007) -[2026-06-07 02:22:49,602][324535] Updated weights for policy 0, policy_version 24801 (0.0007) -[2026-06-07 02:22:49,821][324535] Updated weights for policy 0, policy_version 24811 (0.0007) -[2026-06-07 02:22:50,015][324535] Updated weights for policy 0, policy_version 24821 (0.0007) -[2026-06-07 02:22:50,223][324535] Updated weights for policy 0, policy_version 24831 (0.0007) -[2026-06-07 02:22:50,972][324535] Updated weights for policy 0, policy_version 24841 (0.0007) -[2026-06-07 02:22:51,010][321787] Fps is (10 sec: 19660.4, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 12746752. Throughput: 0: 17752.2. Samples: 12736512. Policy #0 lag: (min: 53.0, avg: 68.3, max: 117.0) -[2026-06-07 02:22:51,011][321787] Avg episode reward: [(0, '1034.507')] -[2026-06-07 02:22:51,171][324535] Updated weights for policy 0, policy_version 24851 (0.0007) -[2026-06-07 02:22:51,382][324535] Updated weights for policy 0, policy_version 24861 (0.0007) -[2026-06-07 02:22:51,612][324535] Updated weights for policy 0, policy_version 24872 (0.0007) -[2026-06-07 02:22:51,816][324535] Updated weights for policy 0, policy_version 24882 (0.0007) -[2026-06-07 02:22:52,016][324535] Updated weights for policy 0, policy_version 24892 (0.0007) -[2026-06-07 02:22:52,237][324535] Updated weights for policy 0, policy_version 24902 (0.0007) -[2026-06-07 02:22:52,264][324273] Saving new best policy, reward=1034.507! -[2026-06-07 02:22:53,004][324535] Updated weights for policy 0, policy_version 24912 (0.0007) -[2026-06-07 02:22:53,236][324535] Updated weights for policy 0, policy_version 24922 (0.0008) -[2026-06-07 02:22:53,437][324535] Updated weights for policy 0, policy_version 24932 (0.0010) -[2026-06-07 02:22:53,642][324535] Updated weights for policy 0, policy_version 24942 (0.0011) -[2026-06-07 02:22:53,839][324535] Updated weights for policy 0, policy_version 24952 (0.0010) -[2026-06-07 02:22:54,045][324535] Updated weights for policy 0, policy_version 24962 (0.0011) -[2026-06-07 02:22:54,806][324535] Updated weights for policy 0, policy_version 24972 (0.0009) -[2026-06-07 02:22:55,017][324535] Updated weights for policy 0, policy_version 24982 (0.0007) -[2026-06-07 02:22:55,201][324535] Updated weights for policy 0, policy_version 24992 (0.0007) -[2026-06-07 02:22:55,424][324535] Updated weights for policy 0, policy_version 25002 (0.0007) -[2026-06-07 02:22:55,665][324535] Updated weights for policy 0, policy_version 25013 (0.0007) -[2026-06-07 02:22:55,871][324535] Updated weights for policy 0, policy_version 25023 (0.0007) -[2026-06-07 02:22:56,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 12812288. Throughput: 0: 17766.4. Samples: 12840064. Policy #0 lag: (min: 53.0, avg: 68.3, max: 117.0) -[2026-06-07 02:22:56,011][321787] Avg episode reward: [(0, '1096.315')] -[2026-06-07 02:22:56,055][324273] Saving new best policy, reward=1096.315! -[2026-06-07 02:22:56,635][324535] Updated weights for policy 0, policy_version 25033 (0.0007) -[2026-06-07 02:22:56,834][324535] Updated weights for policy 0, policy_version 25043 (0.0007) -[2026-06-07 02:22:57,054][324535] Updated weights for policy 0, policy_version 25053 (0.0007) -[2026-06-07 02:22:57,261][324535] Updated weights for policy 0, policy_version 25063 (0.0007) -[2026-06-07 02:22:57,461][324535] Updated weights for policy 0, policy_version 25073 (0.0007) -[2026-06-07 02:22:57,687][324535] Updated weights for policy 0, policy_version 25084 (0.0007) -[2026-06-07 02:22:57,887][324535] Updated weights for policy 0, policy_version 25094 (0.0007) -[2026-06-07 02:22:58,646][324535] Updated weights for policy 0, policy_version 25104 (0.0009) -[2026-06-07 02:22:58,862][324535] Updated weights for policy 0, policy_version 25114 (0.0011) -[2026-06-07 02:22:59,064][324535] Updated weights for policy 0, policy_version 25124 (0.0009) -[2026-06-07 02:22:59,283][324535] Updated weights for policy 0, policy_version 25134 (0.0007) -[2026-06-07 02:22:59,470][324535] Updated weights for policy 0, policy_version 25144 (0.0007) -[2026-06-07 02:22:59,692][324535] Updated weights for policy 0, policy_version 25154 (0.0007) -[2026-06-07 02:23:00,436][324535] Updated weights for policy 0, policy_version 25164 (0.0007) -[2026-06-07 02:23:00,642][324535] Updated weights for policy 0, policy_version 25174 (0.0007) -[2026-06-07 02:23:00,858][324535] Updated weights for policy 0, policy_version 25184 (0.0007) -[2026-06-07 02:23:01,010][321787] Fps is (10 sec: 16384.1, 60 sec: 17476.2, 300 sec: 17439.2). Total num frames: 12910592. Throughput: 0: 17979.7. Samples: 12953344. Policy #0 lag: (min: 53.0, avg: 68.3, max: 117.0) -[2026-06-07 02:23:01,011][321787] Avg episode reward: [(0, '1111.664')] -[2026-06-07 02:23:01,089][324535] Updated weights for policy 0, policy_version 25194 (0.0007) -[2026-06-07 02:23:01,290][324535] Updated weights for policy 0, policy_version 25204 (0.0007) -[2026-06-07 02:23:01,501][324535] Updated weights for policy 0, policy_version 25214 (0.0007) -[2026-06-07 02:23:01,723][324273] Saving new best policy, reward=1111.664! -[2026-06-07 02:23:02,311][324535] Updated weights for policy 0, policy_version 25225 (0.0007) -[2026-06-07 02:23:02,530][324535] Updated weights for policy 0, policy_version 25236 (0.0007) -[2026-06-07 02:23:02,744][324535] Updated weights for policy 0, policy_version 25246 (0.0007) -[2026-06-07 02:23:02,966][324535] Updated weights for policy 0, policy_version 25256 (0.0007) -[2026-06-07 02:23:03,210][324535] Updated weights for policy 0, policy_version 25268 (0.0007) -[2026-06-07 02:23:03,409][324535] Updated weights for policy 0, policy_version 25278 (0.0007) -[2026-06-07 02:23:03,603][324535] Updated weights for policy 0, policy_version 25288 (0.0007) -[2026-06-07 02:23:04,386][324535] Updated weights for policy 0, policy_version 25298 (0.0007) -[2026-06-07 02:23:04,601][324535] Updated weights for policy 0, policy_version 25308 (0.0007) -[2026-06-07 02:23:04,789][324535] Updated weights for policy 0, policy_version 25318 (0.0007) -[2026-06-07 02:23:04,998][324535] Updated weights for policy 0, policy_version 25328 (0.0007) -[2026-06-07 02:23:05,210][324535] Updated weights for policy 0, policy_version 25338 (0.0008) -[2026-06-07 02:23:05,401][324535] Updated weights for policy 0, policy_version 25348 (0.0007) -[2026-06-07 02:23:06,010][321787] Fps is (10 sec: 19660.5, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 13008896. Throughput: 0: 17709.4. Samples: 12998784. Policy #0 lag: (min: 5.0, avg: 20.4, max: 69.0) -[2026-06-07 02:23:06,012][321787] Avg episode reward: [(0, '1154.353')] -[2026-06-07 02:23:06,167][324535] Updated weights for policy 0, policy_version 25358 (0.0007) -[2026-06-07 02:23:06,382][324535] Updated weights for policy 0, policy_version 25368 (0.0007) -[2026-06-07 02:23:06,595][324535] Updated weights for policy 0, policy_version 25378 (0.0007) -[2026-06-07 02:23:06,798][324535] Updated weights for policy 0, policy_version 25388 (0.0007) -[2026-06-07 02:23:06,983][324535] Updated weights for policy 0, policy_version 25398 (0.0007) -[2026-06-07 02:23:07,210][324535] Updated weights for policy 0, policy_version 25408 (0.0007) -[2026-06-07 02:23:07,373][324273] Saving new best policy, reward=1154.353! -[2026-06-07 02:23:07,995][324535] Updated weights for policy 0, policy_version 25418 (0.0007) -[2026-06-07 02:23:08,198][324535] Updated weights for policy 0, policy_version 25428 (0.0007) -[2026-06-07 02:23:08,420][324535] Updated weights for policy 0, policy_version 25438 (0.0007) -[2026-06-07 02:23:08,663][324535] Updated weights for policy 0, policy_version 25449 (0.0007) -[2026-06-07 02:23:08,894][324535] Updated weights for policy 0, policy_version 25460 (0.0007) -[2026-06-07 02:23:09,099][324535] Updated weights for policy 0, policy_version 25470 (0.0007) -[2026-06-07 02:23:09,865][324535] Updated weights for policy 0, policy_version 25481 (0.0007) -[2026-06-07 02:23:10,054][324535] Updated weights for policy 0, policy_version 25491 (0.0007) -[2026-06-07 02:23:10,254][324535] Updated weights for policy 0, policy_version 25501 (0.0007) -[2026-06-07 02:23:10,469][324535] Updated weights for policy 0, policy_version 25511 (0.0007) -[2026-06-07 02:23:10,681][324535] Updated weights for policy 0, policy_version 25521 (0.0007) -[2026-06-07 02:23:10,894][324535] Updated weights for policy 0, policy_version 25531 (0.0007) -[2026-06-07 02:23:11,010][321787] Fps is (10 sec: 16384.1, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 13074432. Throughput: 0: 17757.8. Samples: 13102592. Policy #0 lag: (min: 5.0, avg: 20.4, max: 69.0) -[2026-06-07 02:23:11,011][321787] Avg episode reward: [(0, '1201.115')] -[2026-06-07 02:23:11,094][324535] Updated weights for policy 0, policy_version 25541 (0.0007) -[2026-06-07 02:23:11,146][324273] Saving new best policy, reward=1201.115! -[2026-06-07 02:23:11,859][324535] Updated weights for policy 0, policy_version 25551 (0.0007) -[2026-06-07 02:23:12,057][324535] Updated weights for policy 0, policy_version 25561 (0.0007) -[2026-06-07 02:23:12,267][324535] Updated weights for policy 0, policy_version 25571 (0.0007) -[2026-06-07 02:23:12,502][324535] Updated weights for policy 0, policy_version 25582 (0.0007) -[2026-06-07 02:23:12,702][324535] Updated weights for policy 0, policy_version 25592 (0.0007) -[2026-06-07 02:23:12,924][324535] Updated weights for policy 0, policy_version 25602 (0.0007) -[2026-06-07 02:23:13,702][324535] Updated weights for policy 0, policy_version 25612 (0.0007) -[2026-06-07 02:23:13,901][324535] Updated weights for policy 0, policy_version 25622 (0.0007) -[2026-06-07 02:23:14,101][324535] Updated weights for policy 0, policy_version 25632 (0.0007) -[2026-06-07 02:23:14,293][324535] Updated weights for policy 0, policy_version 25642 (0.0007) -[2026-06-07 02:23:14,498][324535] Updated weights for policy 0, policy_version 25652 (0.0007) -[2026-06-07 02:23:14,726][324535] Updated weights for policy 0, policy_version 25662 (0.0007) -[2026-06-07 02:23:14,932][324535] Updated weights for policy 0, policy_version 25672 (0.0007) -[2026-06-07 02:23:15,671][324535] Updated weights for policy 0, policy_version 25682 (0.0007) -[2026-06-07 02:23:15,868][324535] Updated weights for policy 0, policy_version 25692 (0.0007) -[2026-06-07 02:23:16,010][321787] Fps is (10 sec: 16384.3, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 13172736. Throughput: 0: 17888.7. Samples: 13211776. Policy #0 lag: (min: 5.0, avg: 20.4, max: 69.0) -[2026-06-07 02:23:16,011][321787] Avg episode reward: [(0, '1219.959')] -[2026-06-07 02:23:16,080][324535] Updated weights for policy 0, policy_version 25702 (0.0007) -[2026-06-07 02:23:16,320][324535] Updated weights for policy 0, policy_version 25713 (0.0007) -[2026-06-07 02:23:16,521][324535] Updated weights for policy 0, policy_version 25723 (0.0007) -[2026-06-07 02:23:16,732][324535] Updated weights for policy 0, policy_version 25733 (0.0007) -[2026-06-07 02:23:16,786][324273] Saving new best policy, reward=1219.959! -[2026-06-07 02:23:17,519][324535] Updated weights for policy 0, policy_version 25743 (0.0007) -[2026-06-07 02:23:17,767][324535] Updated weights for policy 0, policy_version 25754 (0.0007) -[2026-06-07 02:23:17,971][324535] Updated weights for policy 0, policy_version 25764 (0.0007) -[2026-06-07 02:23:18,172][324535] Updated weights for policy 0, policy_version 25774 (0.0007) -[2026-06-07 02:23:18,400][324535] Updated weights for policy 0, policy_version 25784 (0.0007) -[2026-06-07 02:23:18,621][324535] Updated weights for policy 0, policy_version 25794 (0.0007) -[2026-06-07 02:23:19,339][324535] Updated weights for policy 0, policy_version 25804 (0.0008) -[2026-06-07 02:23:19,559][324535] Updated weights for policy 0, policy_version 25814 (0.0011) -[2026-06-07 02:23:19,768][324535] Updated weights for policy 0, policy_version 25824 (0.0009) -[2026-06-07 02:23:19,976][324535] Updated weights for policy 0, policy_version 25834 (0.0007) -[2026-06-07 02:23:20,170][324535] Updated weights for policy 0, policy_version 25844 (0.0007) -[2026-06-07 02:23:20,374][324535] Updated weights for policy 0, policy_version 25854 (0.0008) -[2026-06-07 02:23:20,601][324535] Updated weights for policy 0, policy_version 25864 (0.0007) -[2026-06-07 02:23:21,010][321787] Fps is (10 sec: 19660.9, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 13271040. Throughput: 0: 17527.5. Samples: 13262336. Policy #0 lag: (min: 63.0, avg: 79.6, max: 127.0) -[2026-06-07 02:23:21,011][321787] Avg episode reward: [(0, '1223.344')] -[2026-06-07 02:23:21,342][324535] Updated weights for policy 0, policy_version 25874 (0.0007) -[2026-06-07 02:23:21,553][324535] Updated weights for policy 0, policy_version 25884 (0.0007) -[2026-06-07 02:23:21,766][324535] Updated weights for policy 0, policy_version 25894 (0.0007) -[2026-06-07 02:23:21,956][324535] Updated weights for policy 0, policy_version 25904 (0.0007) -[2026-06-07 02:23:22,167][324535] Updated weights for policy 0, policy_version 25914 (0.0007) -[2026-06-07 02:23:22,378][324535] Updated weights for policy 0, policy_version 25924 (0.0007) -[2026-06-07 02:23:22,455][324273] Saving new best policy, reward=1223.344! -[2026-06-07 02:23:23,156][324535] Updated weights for policy 0, policy_version 25934 (0.0007) -[2026-06-07 02:23:23,408][324535] Updated weights for policy 0, policy_version 25946 (0.0007) -[2026-06-07 02:23:23,625][324535] Updated weights for policy 0, policy_version 25956 (0.0007) -[2026-06-07 02:23:23,827][324535] Updated weights for policy 0, policy_version 25966 (0.0007) -[2026-06-07 02:23:24,048][324535] Updated weights for policy 0, policy_version 25976 (0.0007) -[2026-06-07 02:23:24,266][324535] Updated weights for policy 0, policy_version 25986 (0.0007) -[2026-06-07 02:23:25,009][324535] Updated weights for policy 0, policy_version 25996 (0.0008) -[2026-06-07 02:23:25,214][324535] Updated weights for policy 0, policy_version 26006 (0.0011) -[2026-06-07 02:23:25,403][324535] Updated weights for policy 0, policy_version 26016 (0.0011) -[2026-06-07 02:23:25,602][324535] Updated weights for policy 0, policy_version 26026 (0.0008) -[2026-06-07 02:23:25,822][324535] Updated weights for policy 0, policy_version 26036 (0.0007) -[2026-06-07 02:23:26,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 13336576. Throughput: 0: 17501.9. Samples: 13364864. Policy #0 lag: (min: 63.0, avg: 79.6, max: 127.0) -[2026-06-07 02:23:26,011][321787] Avg episode reward: [(0, '1203.470')] -[2026-06-07 02:23:26,033][324535] Updated weights for policy 0, policy_version 26046 (0.0007) -[2026-06-07 02:23:26,237][324535] Updated weights for policy 0, policy_version 26056 (0.0007) -[2026-06-07 02:23:26,973][324535] Updated weights for policy 0, policy_version 26066 (0.0007) -[2026-06-07 02:23:27,192][324535] Updated weights for policy 0, policy_version 26076 (0.0007) -[2026-06-07 02:23:27,411][324535] Updated weights for policy 0, policy_version 26086 (0.0007) -[2026-06-07 02:23:27,627][324535] Updated weights for policy 0, policy_version 26096 (0.0007) -[2026-06-07 02:23:27,876][324535] Updated weights for policy 0, policy_version 26108 (0.0007) -[2026-06-07 02:23:28,079][324535] Updated weights for policy 0, policy_version 26118 (0.0007) -[2026-06-07 02:23:28,798][324535] Updated weights for policy 0, policy_version 26128 (0.0007) -[2026-06-07 02:23:29,000][324535] Updated weights for policy 0, policy_version 26138 (0.0007) -[2026-06-07 02:23:29,215][324535] Updated weights for policy 0, policy_version 26148 (0.0007) -[2026-06-07 02:23:29,408][324535] Updated weights for policy 0, policy_version 26158 (0.0007) -[2026-06-07 02:23:29,615][324535] Updated weights for policy 0, policy_version 26168 (0.0007) -[2026-06-07 02:23:29,818][324535] Updated weights for policy 0, policy_version 26178 (0.0007) -[2026-06-07 02:23:30,578][324535] Updated weights for policy 0, policy_version 26188 (0.0007) -[2026-06-07 02:23:30,779][324535] Updated weights for policy 0, policy_version 26198 (0.0007) -[2026-06-07 02:23:30,980][324535] Updated weights for policy 0, policy_version 26208 (0.0007) -[2026-06-07 02:23:31,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 13434880. Throughput: 0: 17442.1. Samples: 13474176. Policy #0 lag: (min: 63.0, avg: 79.6, max: 127.0) -[2026-06-07 02:23:31,011][321787] Avg episode reward: [(0, '1228.968')] -[2026-06-07 02:23:31,206][324535] Updated weights for policy 0, policy_version 26218 (0.0007) -[2026-06-07 02:23:31,412][324535] Updated weights for policy 0, policy_version 26228 (0.0007) -[2026-06-07 02:23:31,608][324535] Updated weights for policy 0, policy_version 26238 (0.0007) -[2026-06-07 02:23:31,809][324273] Saving new best policy, reward=1228.968! -[2026-06-07 02:23:31,811][324535] Updated weights for policy 0, policy_version 26248 (0.0007) -[2026-06-07 02:23:32,614][324535] Updated weights for policy 0, policy_version 26259 (0.0007) -[2026-06-07 02:23:32,807][324535] Updated weights for policy 0, policy_version 26269 (0.0007) -[2026-06-07 02:23:33,059][324535] Updated weights for policy 0, policy_version 26280 (0.0007) -[2026-06-07 02:23:33,260][324535] Updated weights for policy 0, policy_version 26290 (0.0007) -[2026-06-07 02:23:33,471][324535] Updated weights for policy 0, policy_version 26300 (0.0007) -[2026-06-07 02:23:33,690][324535] Updated weights for policy 0, policy_version 26311 (0.0007) -[2026-06-07 02:23:34,455][324535] Updated weights for policy 0, policy_version 26321 (0.0007) -[2026-06-07 02:23:34,674][324535] Updated weights for policy 0, policy_version 26331 (0.0007) -[2026-06-07 02:23:34,885][324535] Updated weights for policy 0, policy_version 26342 (0.0007) -[2026-06-07 02:23:35,112][324535] Updated weights for policy 0, policy_version 26352 (0.0007) -[2026-06-07 02:23:35,319][324535] Updated weights for policy 0, policy_version 26362 (0.0007) -[2026-06-07 02:23:35,544][324535] Updated weights for policy 0, policy_version 26372 (0.0007) -[2026-06-07 02:23:36,010][321787] Fps is (10 sec: 19660.8, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 13533184. Throughput: 0: 17513.3. Samples: 13524608. Policy #0 lag: (min: 63.0, avg: 79.6, max: 127.0) -[2026-06-07 02:23:36,011][321787] Avg episode reward: [(0, '1232.252')] -[2026-06-07 02:23:36,015][324273] Saving new best policy, reward=1232.252! -[2026-06-07 02:23:36,324][324535] Updated weights for policy 0, policy_version 26382 (0.0007) -[2026-06-07 02:23:36,527][324535] Updated weights for policy 0, policy_version 26392 (0.0007) -[2026-06-07 02:23:36,755][324535] Updated weights for policy 0, policy_version 26403 (0.0007) -[2026-06-07 02:23:36,956][324535] Updated weights for policy 0, policy_version 26413 (0.0007) -[2026-06-07 02:23:37,150][324535] Updated weights for policy 0, policy_version 26423 (0.0007) -[2026-06-07 02:23:37,359][324535] Updated weights for policy 0, policy_version 26433 (0.0008) -[2026-06-07 02:23:38,116][324535] Updated weights for policy 0, policy_version 26443 (0.0007) -[2026-06-07 02:23:38,333][324535] Updated weights for policy 0, policy_version 26453 (0.0007) -[2026-06-07 02:23:38,544][324535] Updated weights for policy 0, policy_version 26463 (0.0007) -[2026-06-07 02:23:38,781][324535] Updated weights for policy 0, policy_version 26473 (0.0007) -[2026-06-07 02:23:38,999][324535] Updated weights for policy 0, policy_version 26483 (0.0007) -[2026-06-07 02:23:39,211][324535] Updated weights for policy 0, policy_version 26493 (0.0007) -[2026-06-07 02:23:39,430][324535] Updated weights for policy 0, policy_version 26503 (0.0007) -[2026-06-07 02:23:40,163][324535] Updated weights for policy 0, policy_version 26513 (0.0007) -[2026-06-07 02:23:40,363][324535] Updated weights for policy 0, policy_version 26523 (0.0007) -[2026-06-07 02:23:40,569][324535] Updated weights for policy 0, policy_version 26533 (0.0007) -[2026-06-07 02:23:40,767][324535] Updated weights for policy 0, policy_version 26543 (0.0007) -[2026-06-07 02:23:40,980][324535] Updated weights for policy 0, policy_version 26553 (0.0007) -[2026-06-07 02:23:41,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 13598720. Throughput: 0: 17496.2. Samples: 13627392. Policy #0 lag: (min: 63.0, avg: 79.2, max: 127.0) -[2026-06-07 02:23:41,011][321787] Avg episode reward: [(0, '1282.272')] -[2026-06-07 02:23:41,200][324535] Updated weights for policy 0, policy_version 26563 (0.0007) -[2026-06-07 02:23:41,297][324273] Saving new best policy, reward=1282.272! -[2026-06-07 02:23:41,930][324535] Updated weights for policy 0, policy_version 26573 (0.0007) -[2026-06-07 02:23:42,153][324535] Updated weights for policy 0, policy_version 26583 (0.0007) -[2026-06-07 02:23:42,362][324535] Updated weights for policy 0, policy_version 26593 (0.0007) -[2026-06-07 02:23:42,559][324535] Updated weights for policy 0, policy_version 26603 (0.0007) -[2026-06-07 02:23:42,761][324535] Updated weights for policy 0, policy_version 26613 (0.0007) -[2026-06-07 02:23:42,978][324535] Updated weights for policy 0, policy_version 26623 (0.0007) -[2026-06-07 02:23:43,739][324535] Updated weights for policy 0, policy_version 26633 (0.0007) -[2026-06-07 02:23:43,948][324535] Updated weights for policy 0, policy_version 26643 (0.0007) -[2026-06-07 02:23:44,133][324535] Updated weights for policy 0, policy_version 26653 (0.0007) -[2026-06-07 02:23:44,346][324535] Updated weights for policy 0, policy_version 26663 (0.0007) -[2026-06-07 02:23:44,549][324535] Updated weights for policy 0, policy_version 26673 (0.0007) -[2026-06-07 02:23:44,763][324535] Updated weights for policy 0, policy_version 26684 (0.0007) -[2026-06-07 02:23:44,997][324535] Updated weights for policy 0, policy_version 26695 (0.0007) -[2026-06-07 02:23:45,810][324535] Updated weights for policy 0, policy_version 26706 (0.0007) -[2026-06-07 02:23:46,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 13697024. Throughput: 0: 17325.5. Samples: 13732992. Policy #0 lag: (min: 63.0, avg: 79.2, max: 127.0) -[2026-06-07 02:23:46,011][321787] Avg episode reward: [(0, '1252.159')] -[2026-06-07 02:23:46,020][324535] Updated weights for policy 0, policy_version 26716 (0.0007) -[2026-06-07 02:23:46,234][324535] Updated weights for policy 0, policy_version 26726 (0.0007) -[2026-06-07 02:23:46,421][324535] Updated weights for policy 0, policy_version 26736 (0.0007) -[2026-06-07 02:23:46,642][324535] Updated weights for policy 0, policy_version 26746 (0.0007) -[2026-06-07 02:23:46,841][324535] Updated weights for policy 0, policy_version 26756 (0.0007) -[2026-06-07 02:23:47,605][324535] Updated weights for policy 0, policy_version 26767 (0.0007) -[2026-06-07 02:23:47,846][324535] Updated weights for policy 0, policy_version 26777 (0.0007) -[2026-06-07 02:23:48,051][324535] Updated weights for policy 0, policy_version 26787 (0.0007) -[2026-06-07 02:23:48,255][324535] Updated weights for policy 0, policy_version 26797 (0.0007) -[2026-06-07 02:23:48,461][324535] Updated weights for policy 0, policy_version 26807 (0.0007) -[2026-06-07 02:23:48,670][324535] Updated weights for policy 0, policy_version 26817 (0.0007) -[2026-06-07 02:23:49,459][324535] Updated weights for policy 0, policy_version 26828 (0.0007) -[2026-06-07 02:23:49,662][324535] Updated weights for policy 0, policy_version 26838 (0.0007) -[2026-06-07 02:23:49,872][324535] Updated weights for policy 0, policy_version 26848 (0.0007) -[2026-06-07 02:23:50,093][324535] Updated weights for policy 0, policy_version 26858 (0.0007) -[2026-06-07 02:23:50,305][324535] Updated weights for policy 0, policy_version 26868 (0.0007) -[2026-06-07 02:23:50,542][324535] Updated weights for policy 0, policy_version 26879 (0.0007) -[2026-06-07 02:23:51,010][321787] Fps is (10 sec: 19660.8, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 13795328. Throughput: 0: 17510.5. Samples: 13786752. Policy #0 lag: (min: 63.0, avg: 79.2, max: 127.0) -[2026-06-07 02:23:51,011][321787] Avg episode reward: [(0, '1240.531')] -[2026-06-07 02:23:51,295][324535] Updated weights for policy 0, policy_version 26889 (0.0007) -[2026-06-07 02:23:51,484][324535] Updated weights for policy 0, policy_version 26899 (0.0007) -[2026-06-07 02:23:51,706][324535] Updated weights for policy 0, policy_version 26909 (0.0007) -[2026-06-07 02:23:51,914][324535] Updated weights for policy 0, policy_version 26919 (0.0007) -[2026-06-07 02:23:52,123][324535] Updated weights for policy 0, policy_version 26929 (0.0007) -[2026-06-07 02:23:52,341][324535] Updated weights for policy 0, policy_version 26939 (0.0007) -[2026-06-07 02:23:52,557][324535] Updated weights for policy 0, policy_version 26949 (0.0007) -[2026-06-07 02:23:53,283][324535] Updated weights for policy 0, policy_version 26959 (0.0007) -[2026-06-07 02:23:53,501][324535] Updated weights for policy 0, policy_version 26969 (0.0007) -[2026-06-07 02:23:53,721][324535] Updated weights for policy 0, policy_version 26980 (0.0007) -[2026-06-07 02:23:53,924][324535] Updated weights for policy 0, policy_version 26990 (0.0007) -[2026-06-07 02:23:54,130][324535] Updated weights for policy 0, policy_version 27000 (0.0007) -[2026-06-07 02:23:54,345][324535] Updated weights for policy 0, policy_version 27010 (0.0007) -[2026-06-07 02:23:55,089][324535] Updated weights for policy 0, policy_version 27020 (0.0007) -[2026-06-07 02:23:55,300][324535] Updated weights for policy 0, policy_version 27030 (0.0007) -[2026-06-07 02:23:55,523][324535] Updated weights for policy 0, policy_version 27040 (0.0007) -[2026-06-07 02:23:55,728][324535] Updated weights for policy 0, policy_version 27050 (0.0007) -[2026-06-07 02:23:55,937][324535] Updated weights for policy 0, policy_version 27060 (0.0007) -[2026-06-07 02:23:56,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 13860864. Throughput: 0: 17464.9. Samples: 13888512. Policy #0 lag: (min: 63.0, avg: 79.2, max: 127.0) -[2026-06-07 02:23:56,011][321787] Avg episode reward: [(0, '1233.670')] -[2026-06-07 02:23:56,135][324535] Updated weights for policy 0, policy_version 27070 (0.0007) -[2026-06-07 02:23:56,342][324535] Updated weights for policy 0, policy_version 27080 (0.0007) -[2026-06-07 02:23:57,094][324535] Updated weights for policy 0, policy_version 27090 (0.0007) -[2026-06-07 02:23:57,297][324535] Updated weights for policy 0, policy_version 27100 (0.0007) -[2026-06-07 02:23:57,527][324535] Updated weights for policy 0, policy_version 27110 (0.0007) -[2026-06-07 02:23:57,729][324535] Updated weights for policy 0, policy_version 27120 (0.0007) -[2026-06-07 02:23:57,936][324535] Updated weights for policy 0, policy_version 27130 (0.0007) -[2026-06-07 02:23:58,140][324535] Updated weights for policy 0, policy_version 27140 (0.0007) -[2026-06-07 02:23:58,876][324535] Updated weights for policy 0, policy_version 27152 (0.0007) -[2026-06-07 02:23:59,081][324535] Updated weights for policy 0, policy_version 27162 (0.0007) -[2026-06-07 02:23:59,287][324535] Updated weights for policy 0, policy_version 27173 (0.0009) -[2026-06-07 02:23:59,514][324535] Updated weights for policy 0, policy_version 27183 (0.0011) -[2026-06-07 02:23:59,720][324535] Updated weights for policy 0, policy_version 27193 (0.0011) -[2026-06-07 02:23:59,941][324535] Updated weights for policy 0, policy_version 27203 (0.0011) -[2026-06-07 02:24:00,718][324535] Updated weights for policy 0, policy_version 27213 (0.0011) -[2026-06-07 02:24:00,924][324535] Updated weights for policy 0, policy_version 27224 (0.0011) -[2026-06-07 02:24:01,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 13959168. Throughput: 0: 17345.4. Samples: 13992320. Policy #0 lag: (min: 100.0, avg: 127.3, max: 167.0) -[2026-06-07 02:24:01,011][321787] Avg episode reward: [(0, '1275.759')] -[2026-06-07 02:24:01,138][324535] Updated weights for policy 0, policy_version 27234 (0.0008) -[2026-06-07 02:24:01,349][324535] Updated weights for policy 0, policy_version 27244 (0.0007) -[2026-06-07 02:24:01,550][324535] Updated weights for policy 0, policy_version 27254 (0.0007) -[2026-06-07 02:24:01,765][324535] Updated weights for policy 0, policy_version 27264 (0.0007) -[2026-06-07 02:24:02,495][324535] Updated weights for policy 0, policy_version 27274 (0.0008) -[2026-06-07 02:24:02,703][324535] Updated weights for policy 0, policy_version 27284 (0.0007) -[2026-06-07 02:24:02,907][324535] Updated weights for policy 0, policy_version 27294 (0.0007) -[2026-06-07 02:24:03,115][324535] Updated weights for policy 0, policy_version 27304 (0.0007) -[2026-06-07 02:24:03,331][324535] Updated weights for policy 0, policy_version 27314 (0.0007) -[2026-06-07 02:24:03,591][324535] Updated weights for policy 0, policy_version 27326 (0.0007) -[2026-06-07 02:24:03,807][324535] Updated weights for policy 0, policy_version 27336 (0.0007) -[2026-06-07 02:24:04,522][324535] Updated weights for policy 0, policy_version 27346 (0.0007) -[2026-06-07 02:24:04,736][324535] Updated weights for policy 0, policy_version 27356 (0.0007) -[2026-06-07 02:24:04,944][324535] Updated weights for policy 0, policy_version 27366 (0.0007) -[2026-06-07 02:24:05,167][324535] Updated weights for policy 0, policy_version 27376 (0.0007) -[2026-06-07 02:24:05,368][324535] Updated weights for policy 0, policy_version 27386 (0.0007) -[2026-06-07 02:24:05,572][324535] Updated weights for policy 0, policy_version 27396 (0.0007) -[2026-06-07 02:24:06,010][321787] Fps is (10 sec: 19660.8, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 14057472. Throughput: 0: 17410.8. Samples: 14045824. Policy #0 lag: (min: 100.0, avg: 127.3, max: 167.0) -[2026-06-07 02:24:06,011][321787] Avg episode reward: [(0, '1244.169')] -[2026-06-07 02:24:06,348][324535] Updated weights for policy 0, policy_version 27407 (0.0007) -[2026-06-07 02:24:06,591][324535] Updated weights for policy 0, policy_version 27418 (0.0007) -[2026-06-07 02:24:06,828][324535] Updated weights for policy 0, policy_version 27429 (0.0007) -[2026-06-07 02:24:07,027][324535] Updated weights for policy 0, policy_version 27439 (0.0007) -[2026-06-07 02:24:07,259][324535] Updated weights for policy 0, policy_version 27450 (0.0007) -[2026-06-07 02:24:07,486][324535] Updated weights for policy 0, policy_version 27460 (0.0007) -[2026-06-07 02:24:08,214][324535] Updated weights for policy 0, policy_version 27470 (0.0007) -[2026-06-07 02:24:08,430][324535] Updated weights for policy 0, policy_version 27480 (0.0007) -[2026-06-07 02:24:08,647][324535] Updated weights for policy 0, policy_version 27490 (0.0007) -[2026-06-07 02:24:08,848][324535] Updated weights for policy 0, policy_version 27500 (0.0007) -[2026-06-07 02:24:09,078][324535] Updated weights for policy 0, policy_version 27510 (0.0007) -[2026-06-07 02:24:09,277][324535] Updated weights for policy 0, policy_version 27520 (0.0007) -[2026-06-07 02:24:10,038][324535] Updated weights for policy 0, policy_version 27530 (0.0007) -[2026-06-07 02:24:10,254][324535] Updated weights for policy 0, policy_version 27540 (0.0007) -[2026-06-07 02:24:10,461][324535] Updated weights for policy 0, policy_version 27550 (0.0007) -[2026-06-07 02:24:10,679][324535] Updated weights for policy 0, policy_version 27560 (0.0007) -[2026-06-07 02:24:10,900][324535] Updated weights for policy 0, policy_version 27571 (0.0007) -[2026-06-07 02:24:11,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 14123008. Throughput: 0: 17385.2. Samples: 14147200. Policy #0 lag: (min: 100.0, avg: 127.3, max: 167.0) -[2026-06-07 02:24:11,011][321787] Avg episode reward: [(0, '1265.150')] -[2026-06-07 02:24:11,111][324535] Updated weights for policy 0, policy_version 27581 (0.0007) -[2026-06-07 02:24:11,320][324535] Updated weights for policy 0, policy_version 27591 (0.0007) -[2026-06-07 02:24:12,061][324535] Updated weights for policy 0, policy_version 27601 (0.0007) -[2026-06-07 02:24:12,280][324535] Updated weights for policy 0, policy_version 27611 (0.0007) -[2026-06-07 02:24:12,478][324535] Updated weights for policy 0, policy_version 27621 (0.0007) -[2026-06-07 02:24:12,688][324535] Updated weights for policy 0, policy_version 27631 (0.0007) -[2026-06-07 02:24:12,903][324535] Updated weights for policy 0, policy_version 27641 (0.0007) -[2026-06-07 02:24:13,118][324535] Updated weights for policy 0, policy_version 27651 (0.0007) -[2026-06-07 02:24:13,819][324535] Updated weights for policy 0, policy_version 27661 (0.0007) -[2026-06-07 02:24:14,034][324535] Updated weights for policy 0, policy_version 27671 (0.0007) -[2026-06-07 02:24:14,240][324535] Updated weights for policy 0, policy_version 27681 (0.0007) -[2026-06-07 02:24:14,461][324535] Updated weights for policy 0, policy_version 27691 (0.0007) -[2026-06-07 02:24:14,669][324535] Updated weights for policy 0, policy_version 27701 (0.0007) -[2026-06-07 02:24:14,886][324535] Updated weights for policy 0, policy_version 27711 (0.0007) -[2026-06-07 02:24:15,675][324535] Updated weights for policy 0, policy_version 27721 (0.0007) -[2026-06-07 02:24:15,888][324535] Updated weights for policy 0, policy_version 27731 (0.0007) -[2026-06-07 02:24:16,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 14221312. Throughput: 0: 17271.5. Samples: 14251392. Policy #0 lag: (min: 100.0, avg: 127.3, max: 167.0) -[2026-06-07 02:24:16,011][321787] Avg episode reward: [(0, '1246.093')] -[2026-06-07 02:24:16,089][324535] Updated weights for policy 0, policy_version 27741 (0.0007) -[2026-06-07 02:24:16,306][324535] Updated weights for policy 0, policy_version 27751 (0.0007) -[2026-06-07 02:24:16,536][324535] Updated weights for policy 0, policy_version 27761 (0.0007) -[2026-06-07 02:24:16,748][324535] Updated weights for policy 0, policy_version 27771 (0.0007) -[2026-06-07 02:24:16,974][324535] Updated weights for policy 0, policy_version 27782 (0.0007) -[2026-06-07 02:24:17,683][324535] Updated weights for policy 0, policy_version 27792 (0.0007) -[2026-06-07 02:24:17,913][324535] Updated weights for policy 0, policy_version 27803 (0.0007) -[2026-06-07 02:24:18,122][324535] Updated weights for policy 0, policy_version 27813 (0.0007) -[2026-06-07 02:24:18,345][324535] Updated weights for policy 0, policy_version 27823 (0.0007) -[2026-06-07 02:24:18,550][324535] Updated weights for policy 0, policy_version 27833 (0.0007) -[2026-06-07 02:24:18,769][324535] Updated weights for policy 0, policy_version 27843 (0.0007) -[2026-06-07 02:24:19,522][324535] Updated weights for policy 0, policy_version 27853 (0.0007) -[2026-06-07 02:24:19,732][324535] Updated weights for policy 0, policy_version 27863 (0.0007) -[2026-06-07 02:24:19,960][324535] Updated weights for policy 0, policy_version 27873 (0.0007) -[2026-06-07 02:24:20,194][324535] Updated weights for policy 0, policy_version 27884 (0.0007) -[2026-06-07 02:24:20,395][324535] Updated weights for policy 0, policy_version 27894 (0.0007) -[2026-06-07 02:24:20,605][324273] Early stopping after 7 epochs (56 sgd steps), loss delta 0.0000002 -[2026-06-07 02:24:20,608][324535] Updated weights for policy 0, policy_version 27904 (0.0007) -[2026-06-07 02:24:21,010][321787] Fps is (10 sec: 19660.8, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 14319616. Throughput: 0: 17373.9. Samples: 14306432. Policy #0 lag: (min: 21.0, avg: 36.6, max: 85.0) -[2026-06-07 02:24:21,011][321787] Avg episode reward: [(0, '1247.028')] -[2026-06-07 02:24:21,419][324535] Updated weights for policy 0, policy_version 27914 (0.0007) -[2026-06-07 02:24:21,625][324535] Updated weights for policy 0, policy_version 27924 (0.0007) -[2026-06-07 02:24:21,826][324535] Updated weights for policy 0, policy_version 27934 (0.0007) -[2026-06-07 02:24:22,035][324535] Updated weights for policy 0, policy_version 27944 (0.0007) -[2026-06-07 02:24:22,267][324535] Updated weights for policy 0, policy_version 27955 (0.0007) -[2026-06-07 02:24:22,535][324535] Updated weights for policy 0, policy_version 27967 (0.0007) -[2026-06-07 02:24:23,286][324535] Updated weights for policy 0, policy_version 27977 (0.0007) -[2026-06-07 02:24:23,497][324535] Updated weights for policy 0, policy_version 27987 (0.0007) -[2026-06-07 02:24:23,706][324535] Updated weights for policy 0, policy_version 27997 (0.0007) -[2026-06-07 02:24:23,934][324535] Updated weights for policy 0, policy_version 28007 (0.0007) -[2026-06-07 02:24:24,187][324535] Updated weights for policy 0, policy_version 28019 (0.0007) -[2026-06-07 02:24:24,386][324535] Updated weights for policy 0, policy_version 28029 (0.0007) -[2026-06-07 02:24:25,144][324535] Updated weights for policy 0, policy_version 28039 (0.0007) -[2026-06-07 02:24:25,345][324535] Updated weights for policy 0, policy_version 28049 (0.0007) -[2026-06-07 02:24:25,555][324535] Updated weights for policy 0, policy_version 28059 (0.0007) -[2026-06-07 02:24:25,772][324535] Updated weights for policy 0, policy_version 28070 (0.0007) -[2026-06-07 02:24:25,988][324535] Updated weights for policy 0, policy_version 28080 (0.0007) -[2026-06-07 02:24:26,010][321787] Fps is (10 sec: 16383.9, 60 sec: 17476.2, 300 sec: 17439.2). Total num frames: 14385152. Throughput: 0: 17351.1. Samples: 14408192. Policy #0 lag: (min: 21.0, avg: 36.6, max: 85.0) -[2026-06-07 02:24:26,011][321787] Avg episode reward: [(0, '1233.459')] -[2026-06-07 02:24:26,218][324535] Updated weights for policy 0, policy_version 28090 (0.0008) -[2026-06-07 02:24:26,966][324535] Updated weights for policy 0, policy_version 28100 (0.0009) -[2026-06-07 02:24:27,169][324535] Updated weights for policy 0, policy_version 28110 (0.0007) -[2026-06-07 02:24:27,391][324535] Updated weights for policy 0, policy_version 28120 (0.0007) -[2026-06-07 02:24:27,605][324535] Updated weights for policy 0, policy_version 28130 (0.0007) -[2026-06-07 02:24:27,840][324535] Updated weights for policy 0, policy_version 28140 (0.0007) -[2026-06-07 02:24:28,034][324535] Updated weights for policy 0, policy_version 28150 (0.0007) -[2026-06-07 02:24:28,807][324535] Updated weights for policy 0, policy_version 28161 (0.0007) -[2026-06-07 02:24:29,003][324535] Updated weights for policy 0, policy_version 28171 (0.0007) -[2026-06-07 02:24:29,185][324535] Updated weights for policy 0, policy_version 28181 (0.0007) -[2026-06-07 02:24:29,383][324535] Updated weights for policy 0, policy_version 28191 (0.0007) -[2026-06-07 02:24:29,628][324535] Updated weights for policy 0, policy_version 28203 (0.0007) -[2026-06-07 02:24:29,831][324535] Updated weights for policy 0, policy_version 28213 (0.0007) -[2026-06-07 02:24:30,039][324535] Updated weights for policy 0, policy_version 28223 (0.0007) -[2026-06-07 02:24:30,819][324535] Updated weights for policy 0, policy_version 28233 (0.0007) -[2026-06-07 02:24:31,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 14483456. Throughput: 0: 17336.9. Samples: 14513152. Policy #0 lag: (min: 21.0, avg: 36.6, max: 85.0) -[2026-06-07 02:24:31,011][321787] Avg episode reward: [(0, '1248.745')] -[2026-06-07 02:24:31,033][324535] Updated weights for policy 0, policy_version 28243 (0.0007) -[2026-06-07 02:24:31,263][324535] Updated weights for policy 0, policy_version 28253 (0.0007) -[2026-06-07 02:24:31,468][324535] Updated weights for policy 0, policy_version 28263 (0.0007) -[2026-06-07 02:24:31,680][324535] Updated weights for policy 0, policy_version 28273 (0.0007) -[2026-06-07 02:24:31,870][324535] Updated weights for policy 0, policy_version 28283 (0.0007) -[2026-06-07 02:24:32,588][324535] Updated weights for policy 0, policy_version 28293 (0.0007) -[2026-06-07 02:24:32,796][324535] Updated weights for policy 0, policy_version 28303 (0.0007) -[2026-06-07 02:24:32,993][324535] Updated weights for policy 0, policy_version 28313 (0.0007) -[2026-06-07 02:24:33,208][324535] Updated weights for policy 0, policy_version 28323 (0.0007) -[2026-06-07 02:24:33,404][324535] Updated weights for policy 0, policy_version 28333 (0.0007) -[2026-06-07 02:24:33,634][324535] Updated weights for policy 0, policy_version 28343 (0.0007) -[2026-06-07 02:24:34,372][324535] Updated weights for policy 0, policy_version 28353 (0.0007) -[2026-06-07 02:24:34,580][324535] Updated weights for policy 0, policy_version 28363 (0.0007) -[2026-06-07 02:24:34,791][324535] Updated weights for policy 0, policy_version 28373 (0.0007) -[2026-06-07 02:24:34,987][324535] Updated weights for policy 0, policy_version 28383 (0.0007) -[2026-06-07 02:24:35,194][324535] Updated weights for policy 0, policy_version 28393 (0.0007) -[2026-06-07 02:24:35,397][324535] Updated weights for policy 0, policy_version 28403 (0.0007) -[2026-06-07 02:24:35,616][324535] Updated weights for policy 0, policy_version 28413 (0.0008) -[2026-06-07 02:24:36,010][321787] Fps is (10 sec: 19660.7, 60 sec: 17476.2, 300 sec: 17550.3). Total num frames: 14581760. Throughput: 0: 17305.5. Samples: 14565504. Policy #0 lag: (min: 21.0, avg: 36.6, max: 85.0) -[2026-06-07 02:24:36,011][321787] Avg episode reward: [(0, '1239.510')] -[2026-06-07 02:24:36,358][324535] Updated weights for policy 0, policy_version 28423 (0.0007) -[2026-06-07 02:24:36,556][324535] Updated weights for policy 0, policy_version 28433 (0.0007) -[2026-06-07 02:24:36,766][324535] Updated weights for policy 0, policy_version 28443 (0.0007) -[2026-06-07 02:24:36,976][324535] Updated weights for policy 0, policy_version 28453 (0.0011) -[2026-06-07 02:24:37,182][324535] Updated weights for policy 0, policy_version 28463 (0.0011) -[2026-06-07 02:24:37,420][324535] Updated weights for policy 0, policy_version 28474 (0.0011) -[2026-06-07 02:24:38,177][324535] Updated weights for policy 0, policy_version 28484 (0.0010) -[2026-06-07 02:24:38,399][324535] Updated weights for policy 0, policy_version 28494 (0.0007) -[2026-06-07 02:24:38,590][324535] Updated weights for policy 0, policy_version 28504 (0.0007) -[2026-06-07 02:24:38,806][324535] Updated weights for policy 0, policy_version 28514 (0.0007) -[2026-06-07 02:24:39,025][324535] Updated weights for policy 0, policy_version 28524 (0.0007) -[2026-06-07 02:24:39,224][324535] Updated weights for policy 0, policy_version 28534 (0.0007) -[2026-06-07 02:24:39,437][324535] Updated weights for policy 0, policy_version 28544 (0.0008) -[2026-06-07 02:24:40,208][324535] Updated weights for policy 0, policy_version 28554 (0.0007) -[2026-06-07 02:24:40,440][324535] Updated weights for policy 0, policy_version 28565 (0.0007) -[2026-06-07 02:24:40,661][324535] Updated weights for policy 0, policy_version 28576 (0.0007) -[2026-06-07 02:24:40,865][324535] Updated weights for policy 0, policy_version 28586 (0.0007) -[2026-06-07 02:24:41,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 14647296. Throughput: 0: 17288.5. Samples: 14666496. Policy #0 lag: (min: 21.0, avg: 36.6, max: 85.0) -[2026-06-07 02:24:41,011][321787] Avg episode reward: [(0, '1225.517')] -[2026-06-07 02:24:41,071][324535] Updated weights for policy 0, policy_version 28596 (0.0007) -[2026-06-07 02:24:41,308][324535] Updated weights for policy 0, policy_version 28607 (0.0007) -[2026-06-07 02:24:42,063][324535] Updated weights for policy 0, policy_version 28618 (0.0009) -[2026-06-07 02:24:42,255][324535] Updated weights for policy 0, policy_version 28628 (0.0007) -[2026-06-07 02:24:42,450][324535] Updated weights for policy 0, policy_version 28638 (0.0007) -[2026-06-07 02:24:42,679][324535] Updated weights for policy 0, policy_version 28649 (0.0007) -[2026-06-07 02:24:42,881][324535] Updated weights for policy 0, policy_version 28659 (0.0007) -[2026-06-07 02:24:43,102][324535] Updated weights for policy 0, policy_version 28669 (0.0007) -[2026-06-07 02:24:43,864][324535] Updated weights for policy 0, policy_version 28679 (0.0007) -[2026-06-07 02:24:44,069][324535] Updated weights for policy 0, policy_version 28689 (0.0007) -[2026-06-07 02:24:44,279][324535] Updated weights for policy 0, policy_version 28699 (0.0007) -[2026-06-07 02:24:44,525][324535] Updated weights for policy 0, policy_version 28710 (0.0007) -[2026-06-07 02:24:44,728][324535] Updated weights for policy 0, policy_version 28720 (0.0007) -[2026-06-07 02:24:44,939][324535] Updated weights for policy 0, policy_version 28730 (0.0007) -[2026-06-07 02:24:45,686][324535] Updated weights for policy 0, policy_version 28740 (0.0007) -[2026-06-07 02:24:45,886][324535] Updated weights for policy 0, policy_version 28750 (0.0007) -[2026-06-07 02:24:46,010][321787] Fps is (10 sec: 16384.3, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 14745600. Throughput: 0: 17317.0. Samples: 14771584. Policy #0 lag: (min: 63.0, avg: 78.0, max: 127.0) -[2026-06-07 02:24:46,011][321787] Avg episode reward: [(0, '1211.836')] -[2026-06-07 02:24:46,080][324535] Updated weights for policy 0, policy_version 28760 (0.0007) -[2026-06-07 02:24:46,294][324535] Updated weights for policy 0, policy_version 28770 (0.0007) -[2026-06-07 02:24:46,517][324535] Updated weights for policy 0, policy_version 28780 (0.0007) -[2026-06-07 02:24:46,705][324535] Updated weights for policy 0, policy_version 28790 (0.0007) -[2026-06-07 02:24:47,490][324535] Updated weights for policy 0, policy_version 28801 (0.0007) -[2026-06-07 02:24:47,702][324535] Updated weights for policy 0, policy_version 28811 (0.0007) -[2026-06-07 02:24:47,911][324535] Updated weights for policy 0, policy_version 28821 (0.0007) -[2026-06-07 02:24:48,114][324535] Updated weights for policy 0, policy_version 28831 (0.0007) -[2026-06-07 02:24:48,334][324535] Updated weights for policy 0, policy_version 28842 (0.0007) -[2026-06-07 02:24:48,550][324535] Updated weights for policy 0, policy_version 28852 (0.0007) -[2026-06-07 02:24:48,775][324535] Updated weights for policy 0, policy_version 28862 (0.0007) -[2026-06-07 02:24:49,546][324535] Updated weights for policy 0, policy_version 28872 (0.0007) -[2026-06-07 02:24:49,739][324535] Updated weights for policy 0, policy_version 28882 (0.0007) -[2026-06-07 02:24:49,982][324535] Updated weights for policy 0, policy_version 28893 (0.0007) -[2026-06-07 02:24:50,184][324535] Updated weights for policy 0, policy_version 28903 (0.0007) -[2026-06-07 02:24:50,371][324535] Updated weights for policy 0, policy_version 28913 (0.0011) -[2026-06-07 02:24:50,615][324535] Updated weights for policy 0, policy_version 28924 (0.0011) -[2026-06-07 02:24:51,010][321787] Fps is (10 sec: 19660.5, 60 sec: 17476.2, 300 sec: 17550.3). Total num frames: 14843904. Throughput: 0: 17328.3. Samples: 14825600. Policy #0 lag: (min: 63.0, avg: 78.0, max: 127.0) -[2026-06-07 02:24:51,011][321787] Avg episode reward: [(0, '1189.076')] -[2026-06-07 02:24:51,353][324535] Updated weights for policy 0, policy_version 28934 (0.0007) -[2026-06-07 02:24:51,562][324535] Updated weights for policy 0, policy_version 28944 (0.0007) -[2026-06-07 02:24:51,766][324535] Updated weights for policy 0, policy_version 28954 (0.0007) -[2026-06-07 02:24:51,992][324535] Updated weights for policy 0, policy_version 28964 (0.0007) -[2026-06-07 02:24:52,202][324535] Updated weights for policy 0, policy_version 28974 (0.0007) -[2026-06-07 02:24:52,439][324535] Updated weights for policy 0, policy_version 28985 (0.0007) -[2026-06-07 02:24:53,197][324535] Updated weights for policy 0, policy_version 28995 (0.0007) -[2026-06-07 02:24:53,413][324535] Updated weights for policy 0, policy_version 29005 (0.0007) -[2026-06-07 02:24:53,644][324535] Updated weights for policy 0, policy_version 29015 (0.0007) -[2026-06-07 02:24:53,844][324535] Updated weights for policy 0, policy_version 29025 (0.0007) -[2026-06-07 02:24:54,047][324535] Updated weights for policy 0, policy_version 29035 (0.0007) -[2026-06-07 02:24:54,249][324535] Updated weights for policy 0, policy_version 29045 (0.0007) -[2026-06-07 02:24:54,457][324535] Updated weights for policy 0, policy_version 29055 (0.0007) -[2026-06-07 02:24:55,226][324535] Updated weights for policy 0, policy_version 29066 (0.0007) -[2026-06-07 02:24:55,442][324535] Updated weights for policy 0, policy_version 29076 (0.0007) -[2026-06-07 02:24:55,663][324535] Updated weights for policy 0, policy_version 29087 (0.0007) -[2026-06-07 02:24:55,852][324535] Updated weights for policy 0, policy_version 29097 (0.0007) -[2026-06-07 02:24:56,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 14909440. Throughput: 0: 17336.9. Samples: 14927360. Policy #0 lag: (min: 63.0, avg: 78.0, max: 127.0) -[2026-06-07 02:24:56,011][321787] Avg episode reward: [(0, '1251.005')] -[2026-06-07 02:24:56,074][324535] Updated weights for policy 0, policy_version 29107 (0.0007) -[2026-06-07 02:24:56,329][324535] Updated weights for policy 0, policy_version 29119 (0.0007) -[2026-06-07 02:24:57,055][324535] Updated weights for policy 0, policy_version 29129 (0.0007) -[2026-06-07 02:24:57,253][324535] Updated weights for policy 0, policy_version 29139 (0.0007) -[2026-06-07 02:24:57,450][324535] Updated weights for policy 0, policy_version 29149 (0.0007) -[2026-06-07 02:24:57,666][324535] Updated weights for policy 0, policy_version 29159 (0.0007) -[2026-06-07 02:24:57,888][324535] Updated weights for policy 0, policy_version 29169 (0.0007) -[2026-06-07 02:24:58,107][324535] Updated weights for policy 0, policy_version 29179 (0.0007) -[2026-06-07 02:24:58,829][324535] Updated weights for policy 0, policy_version 29189 (0.0007) -[2026-06-07 02:24:59,015][324535] Updated weights for policy 0, policy_version 29199 (0.0007) -[2026-06-07 02:24:59,227][324535] Updated weights for policy 0, policy_version 29209 (0.0007) -[2026-06-07 02:24:59,428][324535] Updated weights for policy 0, policy_version 29219 (0.0007) -[2026-06-07 02:24:59,645][324535] Updated weights for policy 0, policy_version 29229 (0.0007) -[2026-06-07 02:24:59,854][324535] Updated weights for policy 0, policy_version 29239 (0.0007) -[2026-06-07 02:25:00,600][324535] Updated weights for policy 0, policy_version 29249 (0.0007) -[2026-06-07 02:25:00,809][324535] Updated weights for policy 0, policy_version 29259 (0.0007) -[2026-06-07 02:25:01,010][321787] Fps is (10 sec: 16384.2, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 15007744. Throughput: 0: 17379.6. Samples: 15033472. Policy #0 lag: (min: 63.0, avg: 78.0, max: 127.0) -[2026-06-07 02:25:01,011][321787] Avg episode reward: [(0, '1218.393')] -[2026-06-07 02:25:01,017][324535] Updated weights for policy 0, policy_version 29269 (0.0007) -[2026-06-07 02:25:01,228][324535] Updated weights for policy 0, policy_version 29279 (0.0007) -[2026-06-07 02:25:01,426][324535] Updated weights for policy 0, policy_version 29289 (0.0007) -[2026-06-07 02:25:01,637][324535] Updated weights for policy 0, policy_version 29299 (0.0007) -[2026-06-07 02:25:01,838][324535] Updated weights for policy 0, policy_version 29309 (0.0007) -[2026-06-07 02:25:02,560][324535] Updated weights for policy 0, policy_version 29319 (0.0007) -[2026-06-07 02:25:02,776][324535] Updated weights for policy 0, policy_version 29329 (0.0007) -[2026-06-07 02:25:02,980][324535] Updated weights for policy 0, policy_version 29339 (0.0007) -[2026-06-07 02:25:03,205][324535] Updated weights for policy 0, policy_version 29350 (0.0007) -[2026-06-07 02:25:03,425][324535] Updated weights for policy 0, policy_version 29361 (0.0007) -[2026-06-07 02:25:03,631][324535] Updated weights for policy 0, policy_version 29371 (0.0007) -[2026-06-07 02:25:04,354][324535] Updated weights for policy 0, policy_version 29381 (0.0007) -[2026-06-07 02:25:04,577][324535] Updated weights for policy 0, policy_version 29391 (0.0007) -[2026-06-07 02:25:04,783][324535] Updated weights for policy 0, policy_version 29401 (0.0007) -[2026-06-07 02:25:05,014][324535] Updated weights for policy 0, policy_version 29412 (0.0007) -[2026-06-07 02:25:05,217][324535] Updated weights for policy 0, policy_version 29422 (0.0007) -[2026-06-07 02:25:05,410][324535] Updated weights for policy 0, policy_version 29432 (0.0007) -[2026-06-07 02:25:06,010][321787] Fps is (10 sec: 19660.8, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 15106048. Throughput: 0: 17345.4. Samples: 15086976. Policy #0 lag: (min: 49.0, avg: 82.0, max: 113.0) -[2026-06-07 02:25:06,011][321787] Avg episode reward: [(0, '1223.791')] -[2026-06-07 02:25:06,187][324535] Updated weights for policy 0, policy_version 29442 (0.0007) -[2026-06-07 02:25:06,388][324535] Updated weights for policy 0, policy_version 29452 (0.0007) -[2026-06-07 02:25:06,609][324535] Updated weights for policy 0, policy_version 29462 (0.0007) -[2026-06-07 02:25:06,815][324535] Updated weights for policy 0, policy_version 29472 (0.0007) -[2026-06-07 02:25:07,031][324535] Updated weights for policy 0, policy_version 29482 (0.0007) -[2026-06-07 02:25:07,218][324535] Updated weights for policy 0, policy_version 29492 (0.0007) -[2026-06-07 02:25:07,427][324535] Updated weights for policy 0, policy_version 29502 (0.0007) -[2026-06-07 02:25:08,191][324535] Updated weights for policy 0, policy_version 29512 (0.0007) -[2026-06-07 02:25:08,389][324535] Updated weights for policy 0, policy_version 29522 (0.0008) -[2026-06-07 02:25:08,577][324535] Updated weights for policy 0, policy_version 29532 (0.0010) -[2026-06-07 02:25:08,786][324535] Updated weights for policy 0, policy_version 29542 (0.0007) -[2026-06-07 02:25:09,016][324535] Updated weights for policy 0, policy_version 29553 (0.0007) -[2026-06-07 02:25:09,253][324535] Updated weights for policy 0, policy_version 29564 (0.0007) -[2026-06-07 02:25:09,983][324535] Updated weights for policy 0, policy_version 29574 (0.0007) -[2026-06-07 02:25:10,207][324535] Updated weights for policy 0, policy_version 29584 (0.0007) -[2026-06-07 02:25:10,411][324535] Updated weights for policy 0, policy_version 29594 (0.0007) -[2026-06-07 02:25:10,622][324535] Updated weights for policy 0, policy_version 29604 (0.0007) -[2026-06-07 02:25:10,845][324535] Updated weights for policy 0, policy_version 29614 (0.0007) -[2026-06-07 02:25:11,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 15171584. Throughput: 0: 17342.6. Samples: 15188608. Policy #0 lag: (min: 49.0, avg: 82.0, max: 113.0) -[2026-06-07 02:25:11,011][321787] Avg episode reward: [(0, '1238.871')] -[2026-06-07 02:25:11,050][324535] Updated weights for policy 0, policy_version 29624 (0.0007) -[2026-06-07 02:25:11,811][324535] Updated weights for policy 0, policy_version 29634 (0.0007) -[2026-06-07 02:25:12,011][324535] Updated weights for policy 0, policy_version 29644 (0.0007) -[2026-06-07 02:25:12,227][324535] Updated weights for policy 0, policy_version 29654 (0.0007) -[2026-06-07 02:25:12,442][324535] Updated weights for policy 0, policy_version 29664 (0.0007) -[2026-06-07 02:25:12,650][324535] Updated weights for policy 0, policy_version 29674 (0.0007) -[2026-06-07 02:25:12,834][324535] Updated weights for policy 0, policy_version 29684 (0.0007) -[2026-06-07 02:25:13,035][324535] Updated weights for policy 0, policy_version 29694 (0.0007) -[2026-06-07 02:25:13,804][324535] Updated weights for policy 0, policy_version 29705 (0.0007) -[2026-06-07 02:25:14,009][324535] Updated weights for policy 0, policy_version 29715 (0.0007) -[2026-06-07 02:25:14,219][324535] Updated weights for policy 0, policy_version 29725 (0.0007) -[2026-06-07 02:25:14,421][324535] Updated weights for policy 0, policy_version 29735 (0.0010) -[2026-06-07 02:25:14,627][324535] Updated weights for policy 0, policy_version 29745 (0.0010) -[2026-06-07 02:25:14,848][324535] Updated weights for policy 0, policy_version 29755 (0.0007) -[2026-06-07 02:25:15,595][324535] Updated weights for policy 0, policy_version 29765 (0.0010) -[2026-06-07 02:25:15,788][324535] Updated weights for policy 0, policy_version 29775 (0.0011) -[2026-06-07 02:25:16,006][324535] Updated weights for policy 0, policy_version 29785 (0.0011) -[2026-06-07 02:25:16,010][321787] Fps is (10 sec: 16383.9, 60 sec: 17476.2, 300 sec: 17439.2). Total num frames: 15269888. Throughput: 0: 17410.8. Samples: 15296640. Policy #0 lag: (min: 49.0, avg: 82.0, max: 113.0) -[2026-06-07 02:25:16,011][321787] Avg episode reward: [(0, '1244.067')] -[2026-06-07 02:25:16,238][324535] Updated weights for policy 0, policy_version 29796 (0.0011) -[2026-06-07 02:25:16,473][324535] Updated weights for policy 0, policy_version 29807 (0.0011) -[2026-06-07 02:25:16,666][324535] Updated weights for policy 0, policy_version 29817 (0.0011) -[2026-06-07 02:25:17,484][324535] Updated weights for policy 0, policy_version 29828 (0.0011) -[2026-06-07 02:25:17,684][324535] Updated weights for policy 0, policy_version 29838 (0.0010) -[2026-06-07 02:25:17,920][324535] Updated weights for policy 0, policy_version 29849 (0.0008) -[2026-06-07 02:25:18,128][324535] Updated weights for policy 0, policy_version 29859 (0.0007) -[2026-06-07 02:25:18,342][324535] Updated weights for policy 0, policy_version 29869 (0.0007) -[2026-06-07 02:25:18,536][324535] Updated weights for policy 0, policy_version 29879 (0.0007) -[2026-06-07 02:25:19,306][324535] Updated weights for policy 0, policy_version 29889 (0.0007) -[2026-06-07 02:25:19,525][324535] Updated weights for policy 0, policy_version 29899 (0.0007) -[2026-06-07 02:25:19,747][324535] Updated weights for policy 0, policy_version 29909 (0.0007) -[2026-06-07 02:25:19,944][324535] Updated weights for policy 0, policy_version 29919 (0.0007) -[2026-06-07 02:25:20,165][324535] Updated weights for policy 0, policy_version 29929 (0.0007) -[2026-06-07 02:25:20,395][324535] Updated weights for policy 0, policy_version 29939 (0.0007) -[2026-06-07 02:25:20,595][324535] Updated weights for policy 0, policy_version 29949 (0.0007) -[2026-06-07 02:25:21,010][321787] Fps is (10 sec: 19660.8, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 15368192. Throughput: 0: 17373.9. Samples: 15347328. Policy #0 lag: (min: 49.0, avg: 82.0, max: 113.0) -[2026-06-07 02:25:21,011][321787] Avg episode reward: [(0, '1267.435')] -[2026-06-07 02:25:21,342][324535] Updated weights for policy 0, policy_version 29959 (0.0007) -[2026-06-07 02:25:21,539][324535] Updated weights for policy 0, policy_version 29969 (0.0007) -[2026-06-07 02:25:21,738][324535] Updated weights for policy 0, policy_version 29979 (0.0007) -[2026-06-07 02:25:21,951][324535] Updated weights for policy 0, policy_version 29989 (0.0007) -[2026-06-07 02:25:22,161][324535] Updated weights for policy 0, policy_version 29999 (0.0007) -[2026-06-07 02:25:22,392][324535] Updated weights for policy 0, policy_version 30009 (0.0007) -[2026-06-07 02:25:23,151][324535] Updated weights for policy 0, policy_version 30019 (0.0007) -[2026-06-07 02:25:23,365][324535] Updated weights for policy 0, policy_version 30029 (0.0007) -[2026-06-07 02:25:23,600][324535] Updated weights for policy 0, policy_version 30040 (0.0007) -[2026-06-07 02:25:23,813][324535] Updated weights for policy 0, policy_version 30050 (0.0007) -[2026-06-07 02:25:24,021][324535] Updated weights for policy 0, policy_version 30060 (0.0007) -[2026-06-07 02:25:24,265][324535] Updated weights for policy 0, policy_version 30071 (0.0007) -[2026-06-07 02:25:24,994][324535] Updated weights for policy 0, policy_version 30081 (0.0007) -[2026-06-07 02:25:25,194][324535] Updated weights for policy 0, policy_version 30091 (0.0007) -[2026-06-07 02:25:25,418][324535] Updated weights for policy 0, policy_version 30101 (0.0008) -[2026-06-07 02:25:25,638][324535] Updated weights for policy 0, policy_version 30111 (0.0012) -[2026-06-07 02:25:25,852][324535] Updated weights for policy 0, policy_version 30121 (0.0011) -[2026-06-07 02:25:26,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 15433728. Throughput: 0: 17393.7. Samples: 15449216. Policy #0 lag: (min: 63.0, avg: 77.9, max: 127.0) -[2026-06-07 02:25:26,011][321787] Avg episode reward: [(0, '1302.017')] -[2026-06-07 02:25:26,065][324535] Updated weights for policy 0, policy_version 30131 (0.0011) -[2026-06-07 02:25:26,256][324535] Updated weights for policy 0, policy_version 30141 (0.0011) -[2026-06-07 02:25:26,323][324273] Saving new best policy, reward=1302.017! -[2026-06-07 02:25:27,050][324535] Updated weights for policy 0, policy_version 30152 (0.0007) -[2026-06-07 02:25:27,291][324535] Updated weights for policy 0, policy_version 30163 (0.0007) -[2026-06-07 02:25:27,503][324535] Updated weights for policy 0, policy_version 30173 (0.0007) -[2026-06-07 02:25:27,737][324535] Updated weights for policy 0, policy_version 30184 (0.0007) -[2026-06-07 02:25:27,966][324535] Updated weights for policy 0, policy_version 30194 (0.0007) -[2026-06-07 02:25:28,194][324535] Updated weights for policy 0, policy_version 30205 (0.0008) -[2026-06-07 02:25:28,950][324535] Updated weights for policy 0, policy_version 30216 (0.0008) -[2026-06-07 02:25:29,156][324535] Updated weights for policy 0, policy_version 30226 (0.0007) -[2026-06-07 02:25:29,364][324535] Updated weights for policy 0, policy_version 30237 (0.0007) -[2026-06-07 02:25:29,594][324535] Updated weights for policy 0, policy_version 30248 (0.0007) -[2026-06-07 02:25:29,841][324535] Updated weights for policy 0, policy_version 30259 (0.0007) -[2026-06-07 02:25:30,033][324535] Updated weights for policy 0, policy_version 30269 (0.0007) -[2026-06-07 02:25:30,828][324535] Updated weights for policy 0, policy_version 30280 (0.0007) -[2026-06-07 02:25:31,010][321787] Fps is (10 sec: 16383.7, 60 sec: 17476.2, 300 sec: 17439.2). Total num frames: 15532032. Throughput: 0: 17373.8. Samples: 15553408. Policy #0 lag: (min: 63.0, avg: 77.9, max: 127.0) -[2026-06-07 02:25:31,011][321787] Avg episode reward: [(0, '1290.062')] -[2026-06-07 02:25:31,044][324535] Updated weights for policy 0, policy_version 30290 (0.0007) -[2026-06-07 02:25:31,244][324535] Updated weights for policy 0, policy_version 30300 (0.0007) -[2026-06-07 02:25:31,454][324535] Updated weights for policy 0, policy_version 30310 (0.0008) -[2026-06-07 02:25:31,665][324535] Updated weights for policy 0, policy_version 30320 (0.0011) -[2026-06-07 02:25:31,869][324535] Updated weights for policy 0, policy_version 30330 (0.0011) -[2026-06-07 02:25:32,623][324535] Updated weights for policy 0, policy_version 30340 (0.0010) -[2026-06-07 02:25:32,825][324535] Updated weights for policy 0, policy_version 30350 (0.0011) -[2026-06-07 02:25:33,034][324535] Updated weights for policy 0, policy_version 30360 (0.0011) -[2026-06-07 02:25:33,250][324535] Updated weights for policy 0, policy_version 30370 (0.0012) -[2026-06-07 02:25:33,469][324535] Updated weights for policy 0, policy_version 30380 (0.0011) -[2026-06-07 02:25:33,681][324535] Updated weights for policy 0, policy_version 30390 (0.0011) -[2026-06-07 02:25:33,871][324535] Updated weights for policy 0, policy_version 30400 (0.0011) -[2026-06-07 02:25:34,638][324535] Updated weights for policy 0, policy_version 30410 (0.0007) -[2026-06-07 02:25:34,853][324535] Updated weights for policy 0, policy_version 30420 (0.0007) -[2026-06-07 02:25:35,059][324535] Updated weights for policy 0, policy_version 30430 (0.0009) -[2026-06-07 02:25:35,252][324535] Updated weights for policy 0, policy_version 30440 (0.0011) -[2026-06-07 02:25:35,477][324535] Updated weights for policy 0, policy_version 30450 (0.0010) -[2026-06-07 02:25:35,689][324535] Updated weights for policy 0, policy_version 30460 (0.0011) -[2026-06-07 02:25:36,010][321787] Fps is (10 sec: 19660.8, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 15630336. Throughput: 0: 17408.0. Samples: 15608960. Policy #0 lag: (min: 63.0, avg: 77.9, max: 127.0) -[2026-06-07 02:25:36,011][321787] Avg episode reward: [(0, '1308.303')] -[2026-06-07 02:25:36,015][324273] Saving new best policy, reward=1308.303! -[2026-06-07 02:25:36,441][324535] Updated weights for policy 0, policy_version 30470 (0.0008) -[2026-06-07 02:25:36,651][324535] Updated weights for policy 0, policy_version 30480 (0.0007) -[2026-06-07 02:25:36,848][324535] Updated weights for policy 0, policy_version 30490 (0.0007) -[2026-06-07 02:25:37,063][324535] Updated weights for policy 0, policy_version 30500 (0.0007) -[2026-06-07 02:25:37,271][324535] Updated weights for policy 0, policy_version 30510 (0.0007) -[2026-06-07 02:25:37,474][324535] Updated weights for policy 0, policy_version 30520 (0.0007) -[2026-06-07 02:25:38,230][324535] Updated weights for policy 0, policy_version 30531 (0.0007) -[2026-06-07 02:25:38,437][324535] Updated weights for policy 0, policy_version 30541 (0.0007) -[2026-06-07 02:25:38,645][324535] Updated weights for policy 0, policy_version 30551 (0.0010) -[2026-06-07 02:25:38,852][324535] Updated weights for policy 0, policy_version 30561 (0.0011) -[2026-06-07 02:25:39,061][324535] Updated weights for policy 0, policy_version 30571 (0.0007) -[2026-06-07 02:25:39,264][324535] Updated weights for policy 0, policy_version 30581 (0.0007) -[2026-06-07 02:25:39,467][324535] Updated weights for policy 0, policy_version 30591 (0.0007) -[2026-06-07 02:25:40,243][324535] Updated weights for policy 0, policy_version 30601 (0.0007) -[2026-06-07 02:25:40,464][324535] Updated weights for policy 0, policy_version 30611 (0.0007) -[2026-06-07 02:25:40,668][324535] Updated weights for policy 0, policy_version 30621 (0.0007) -[2026-06-07 02:25:40,854][324535] Updated weights for policy 0, policy_version 30631 (0.0007) -[2026-06-07 02:25:41,010][321787] Fps is (10 sec: 16384.3, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 15695872. Throughput: 0: 17390.9. Samples: 15709952. Policy #0 lag: (min: 63.0, avg: 77.9, max: 127.0) -[2026-06-07 02:25:41,011][321787] Avg episode reward: [(0, '1297.378')] -[2026-06-07 02:25:41,060][324535] Updated weights for policy 0, policy_version 30641 (0.0007) -[2026-06-07 02:25:41,301][324535] Updated weights for policy 0, policy_version 30652 (0.0007) -[2026-06-07 02:25:42,064][324535] Updated weights for policy 0, policy_version 30662 (0.0007) -[2026-06-07 02:25:42,284][324535] Updated weights for policy 0, policy_version 30672 (0.0007) -[2026-06-07 02:25:42,503][324535] Updated weights for policy 0, policy_version 30682 (0.0007) -[2026-06-07 02:25:42,721][324535] Updated weights for policy 0, policy_version 30692 (0.0007) -[2026-06-07 02:25:42,953][324535] Updated weights for policy 0, policy_version 30702 (0.0007) -[2026-06-07 02:25:43,164][324535] Updated weights for policy 0, policy_version 30712 (0.0007) -[2026-06-07 02:25:43,887][324535] Updated weights for policy 0, policy_version 30722 (0.0007) -[2026-06-07 02:25:44,111][324535] Updated weights for policy 0, policy_version 30732 (0.0007) -[2026-06-07 02:25:44,307][324535] Updated weights for policy 0, policy_version 30742 (0.0007) -[2026-06-07 02:25:44,506][324535] Updated weights for policy 0, policy_version 30752 (0.0007) -[2026-06-07 02:25:44,710][324535] Updated weights for policy 0, policy_version 30762 (0.0007) -[2026-06-07 02:25:44,928][324535] Updated weights for policy 0, policy_version 30773 (0.0007) -[2026-06-07 02:25:45,138][324535] Updated weights for policy 0, policy_version 30783 (0.0007) -[2026-06-07 02:25:45,878][324535] Updated weights for policy 0, policy_version 30793 (0.0007) -[2026-06-07 02:25:46,010][321787] Fps is (10 sec: 16384.1, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 15794176. Throughput: 0: 17345.4. Samples: 15814016. Policy #0 lag: (min: 63.0, avg: 79.2, max: 127.0) -[2026-06-07 02:25:46,011][321787] Avg episode reward: [(0, '1299.375')] -[2026-06-07 02:25:46,106][324535] Updated weights for policy 0, policy_version 30803 (0.0007) -[2026-06-07 02:25:46,338][324535] Updated weights for policy 0, policy_version 30814 (0.0007) -[2026-06-07 02:25:46,522][324535] Updated weights for policy 0, policy_version 30824 (0.0007) -[2026-06-07 02:25:46,757][324535] Updated weights for policy 0, policy_version 30834 (0.0009) -[2026-06-07 02:25:46,957][324535] Updated weights for policy 0, policy_version 30844 (0.0007) -[2026-06-07 02:25:47,713][324535] Updated weights for policy 0, policy_version 30854 (0.0007) -[2026-06-07 02:25:47,914][324535] Updated weights for policy 0, policy_version 30864 (0.0007) -[2026-06-07 02:25:48,121][324535] Updated weights for policy 0, policy_version 30874 (0.0007) -[2026-06-07 02:25:48,346][324535] Updated weights for policy 0, policy_version 30884 (0.0007) -[2026-06-07 02:25:48,562][324535] Updated weights for policy 0, policy_version 30894 (0.0007) -[2026-06-07 02:25:48,770][324535] Updated weights for policy 0, policy_version 30904 (0.0007) -[2026-06-07 02:25:49,504][324535] Updated weights for policy 0, policy_version 30914 (0.0007) -[2026-06-07 02:25:49,707][324535] Updated weights for policy 0, policy_version 30924 (0.0007) -[2026-06-07 02:25:49,910][324535] Updated weights for policy 0, policy_version 30934 (0.0007) -[2026-06-07 02:25:50,110][324535] Updated weights for policy 0, policy_version 30944 (0.0007) -[2026-06-07 02:25:50,335][324535] Updated weights for policy 0, policy_version 30954 (0.0007) -[2026-06-07 02:25:50,562][324535] Updated weights for policy 0, policy_version 30965 (0.0007) -[2026-06-07 02:25:50,619][324273] Early stopping after 7 epochs (56 sgd steps), loss delta 0.0000009 -[2026-06-07 02:25:51,010][321787] Fps is (10 sec: 19660.3, 60 sec: 17476.2, 300 sec: 17550.3). Total num frames: 15892480. Throughput: 0: 17433.5. Samples: 15871488. Policy #0 lag: (min: 63.0, avg: 79.2, max: 127.0) -[2026-06-07 02:25:51,012][321787] Avg episode reward: [(0, '1315.095')] -[2026-06-07 02:25:51,019][324273] Saving new best policy, reward=1315.095! -[2026-06-07 02:25:51,350][324535] Updated weights for policy 0, policy_version 30975 (0.0007) -[2026-06-07 02:25:51,568][324535] Updated weights for policy 0, policy_version 30985 (0.0007) -[2026-06-07 02:25:51,785][324535] Updated weights for policy 0, policy_version 30996 (0.0007) -[2026-06-07 02:25:51,994][324535] Updated weights for policy 0, policy_version 31006 (0.0007) -[2026-06-07 02:25:52,233][324535] Updated weights for policy 0, policy_version 31017 (0.0007) -[2026-06-07 02:25:52,435][324535] Updated weights for policy 0, policy_version 31027 (0.0007) -[2026-06-07 02:25:53,185][324535] Updated weights for policy 0, policy_version 31037 (0.0009) -[2026-06-07 02:25:53,412][324535] Updated weights for policy 0, policy_version 31047 (0.0011) -[2026-06-07 02:25:53,622][324535] Updated weights for policy 0, policy_version 31057 (0.0011) -[2026-06-07 02:25:53,830][324535] Updated weights for policy 0, policy_version 31067 (0.0011) -[2026-06-07 02:25:54,031][324535] Updated weights for policy 0, policy_version 31077 (0.0011) -[2026-06-07 02:25:54,243][324535] Updated weights for policy 0, policy_version 31087 (0.0011) -[2026-06-07 02:25:55,008][324535] Updated weights for policy 0, policy_version 31097 (0.0011) -[2026-06-07 02:25:55,221][324535] Updated weights for policy 0, policy_version 31107 (0.0007) -[2026-06-07 02:25:55,454][324535] Updated weights for policy 0, policy_version 31118 (0.0007) -[2026-06-07 02:25:55,663][324535] Updated weights for policy 0, policy_version 31128 (0.0007) -[2026-06-07 02:25:55,857][324535] Updated weights for policy 0, policy_version 31138 (0.0007) -[2026-06-07 02:25:56,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 15958016. Throughput: 0: 17467.7. Samples: 15974656. Policy #0 lag: (min: 63.0, avg: 79.2, max: 127.0) -[2026-06-07 02:25:56,011][321787] Avg episode reward: [(0, '1350.990')] -[2026-06-07 02:25:56,065][324535] Updated weights for policy 0, policy_version 31148 (0.0007) -[2026-06-07 02:25:56,266][324535] Updated weights for policy 0, policy_version 31158 (0.0011) -[2026-06-07 02:25:56,297][324273] Saving new best policy, reward=1350.990! -[2026-06-07 02:25:57,025][324535] Updated weights for policy 0, policy_version 31168 (0.0011) -[2026-06-07 02:25:57,232][324535] Updated weights for policy 0, policy_version 31178 (0.0011) -[2026-06-07 02:25:57,436][324535] Updated weights for policy 0, policy_version 31188 (0.0011) -[2026-06-07 02:25:57,641][324535] Updated weights for policy 0, policy_version 31198 (0.0011) -[2026-06-07 02:25:57,855][324535] Updated weights for policy 0, policy_version 31209 (0.0011) -[2026-06-07 02:25:58,061][324535] Updated weights for policy 0, policy_version 31219 (0.0011) -[2026-06-07 02:25:58,884][324535] Updated weights for policy 0, policy_version 31229 (0.0012) -[2026-06-07 02:25:59,067][324535] Updated weights for policy 0, policy_version 31239 (0.0011) -[2026-06-07 02:25:59,278][324535] Updated weights for policy 0, policy_version 31249 (0.0011) -[2026-06-07 02:25:59,468][324535] Updated weights for policy 0, policy_version 31259 (0.0010) -[2026-06-07 02:25:59,690][324535] Updated weights for policy 0, policy_version 31269 (0.0007) -[2026-06-07 02:25:59,900][324535] Updated weights for policy 0, policy_version 31279 (0.0007) -[2026-06-07 02:26:00,629][324535] Updated weights for policy 0, policy_version 31289 (0.0007) -[2026-06-07 02:26:00,841][324535] Updated weights for policy 0, policy_version 31299 (0.0007) -[2026-06-07 02:26:01,010][321787] Fps is (10 sec: 16384.4, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 16056320. Throughput: 0: 17436.5. Samples: 16081280. Policy #0 lag: (min: 63.0, avg: 79.2, max: 127.0) -[2026-06-07 02:26:01,011][321787] Avg episode reward: [(0, '1382.288')] -[2026-06-07 02:26:01,052][324535] Updated weights for policy 0, policy_version 31309 (0.0007) -[2026-06-07 02:26:01,266][324535] Updated weights for policy 0, policy_version 31319 (0.0007) -[2026-06-07 02:26:01,473][324535] Updated weights for policy 0, policy_version 31329 (0.0007) -[2026-06-07 02:26:01,693][324535] Updated weights for policy 0, policy_version 31339 (0.0007) -[2026-06-07 02:26:01,914][324535] Updated weights for policy 0, policy_version 31349 (0.0007) -[2026-06-07 02:26:01,955][324273] Saving new best policy, reward=1382.288! -[2026-06-07 02:26:02,663][324535] Updated weights for policy 0, policy_version 31359 (0.0007) -[2026-06-07 02:26:02,871][324535] Updated weights for policy 0, policy_version 31369 (0.0007) -[2026-06-07 02:26:03,072][324535] Updated weights for policy 0, policy_version 31379 (0.0007) -[2026-06-07 02:26:03,308][324535] Updated weights for policy 0, policy_version 31389 (0.0007) -[2026-06-07 02:26:03,548][324535] Updated weights for policy 0, policy_version 31400 (0.0007) -[2026-06-07 02:26:03,777][324535] Updated weights for policy 0, policy_version 31410 (0.0007) -[2026-06-07 02:26:04,514][324535] Updated weights for policy 0, policy_version 31420 (0.0007) -[2026-06-07 02:26:04,718][324535] Updated weights for policy 0, policy_version 31430 (0.0007) -[2026-06-07 02:26:04,937][324535] Updated weights for policy 0, policy_version 31440 (0.0007) -[2026-06-07 02:26:05,149][324535] Updated weights for policy 0, policy_version 31450 (0.0007) -[2026-06-07 02:26:05,354][324535] Updated weights for policy 0, policy_version 31460 (0.0011) -[2026-06-07 02:26:05,575][324535] Updated weights for policy 0, policy_version 31470 (0.0011) -[2026-06-07 02:26:05,786][324535] Updated weights for policy 0, policy_version 31480 (0.0011) -[2026-06-07 02:26:06,010][321787] Fps is (10 sec: 19660.9, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 16154624. Throughput: 0: 17507.6. Samples: 16135168. Policy #0 lag: (min: 63.0, avg: 79.2, max: 127.0) -[2026-06-07 02:26:06,011][321787] Avg episode reward: [(0, '1387.104')] -[2026-06-07 02:26:06,015][324273] Saving new best policy, reward=1387.104! -[2026-06-07 02:26:06,553][324535] Updated weights for policy 0, policy_version 31491 (0.0011) -[2026-06-07 02:26:06,769][324535] Updated weights for policy 0, policy_version 31501 (0.0011) -[2026-06-07 02:26:06,977][324535] Updated weights for policy 0, policy_version 31511 (0.0011) -[2026-06-07 02:26:07,178][324535] Updated weights for policy 0, policy_version 31521 (0.0011) -[2026-06-07 02:26:07,393][324535] Updated weights for policy 0, policy_version 31531 (0.0011) -[2026-06-07 02:26:07,602][324535] Updated weights for policy 0, policy_version 31541 (0.0011) -[2026-06-07 02:26:08,377][324535] Updated weights for policy 0, policy_version 31551 (0.0008) -[2026-06-07 02:26:08,586][324535] Updated weights for policy 0, policy_version 31561 (0.0007) -[2026-06-07 02:26:08,812][324535] Updated weights for policy 0, policy_version 31571 (0.0007) -[2026-06-07 02:26:09,040][324535] Updated weights for policy 0, policy_version 31582 (0.0007) -[2026-06-07 02:26:09,229][324535] Updated weights for policy 0, policy_version 31592 (0.0007) -[2026-06-07 02:26:09,451][324535] Updated weights for policy 0, policy_version 31602 (0.0007) -[2026-06-07 02:26:10,189][324535] Updated weights for policy 0, policy_version 31612 (0.0009) -[2026-06-07 02:26:10,394][324535] Updated weights for policy 0, policy_version 31622 (0.0011) -[2026-06-07 02:26:10,649][324535] Updated weights for policy 0, policy_version 31634 (0.0011) -[2026-06-07 02:26:10,855][324535] Updated weights for policy 0, policy_version 31644 (0.0012) -[2026-06-07 02:26:11,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 16220160. Throughput: 0: 17501.9. Samples: 16236800. Policy #0 lag: (min: 63.0, avg: 79.2, max: 127.0) -[2026-06-07 02:26:11,011][321787] Avg episode reward: [(0, '1355.417')] -[2026-06-07 02:26:11,061][324535] Updated weights for policy 0, policy_version 31654 (0.0011) -[2026-06-07 02:26:11,258][324535] Updated weights for policy 0, policy_version 31664 (0.0007) -[2026-06-07 02:26:12,025][324535] Updated weights for policy 0, policy_version 31675 (0.0008) -[2026-06-07 02:26:12,215][324535] Updated weights for policy 0, policy_version 31685 (0.0012) -[2026-06-07 02:26:12,412][324535] Updated weights for policy 0, policy_version 31695 (0.0011) -[2026-06-07 02:26:12,630][324535] Updated weights for policy 0, policy_version 31706 (0.0010) -[2026-06-07 02:26:12,828][324535] Updated weights for policy 0, policy_version 31716 (0.0012) -[2026-06-07 02:26:13,067][324535] Updated weights for policy 0, policy_version 31727 (0.0009) -[2026-06-07 02:26:13,823][324535] Updated weights for policy 0, policy_version 31737 (0.0011) -[2026-06-07 02:26:14,032][324535] Updated weights for policy 0, policy_version 31747 (0.0010) -[2026-06-07 02:26:14,219][324535] Updated weights for policy 0, policy_version 31757 (0.0008) -[2026-06-07 02:26:14,452][324535] Updated weights for policy 0, policy_version 31768 (0.0007) -[2026-06-07 02:26:14,654][324535] Updated weights for policy 0, policy_version 31778 (0.0008) -[2026-06-07 02:26:14,865][324535] Updated weights for policy 0, policy_version 31788 (0.0007) -[2026-06-07 02:26:15,089][324535] Updated weights for policy 0, policy_version 31798 (0.0007) -[2026-06-07 02:26:15,816][324535] Updated weights for policy 0, policy_version 31808 (0.0008) -[2026-06-07 02:26:16,002][324535] Updated weights for policy 0, policy_version 31818 (0.0010) -[2026-06-07 02:26:16,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 16318464. Throughput: 0: 17504.8. Samples: 16341120. Policy #0 lag: (min: 63.0, avg: 79.2, max: 127.0) -[2026-06-07 02:26:16,011][321787] Avg episode reward: [(0, '1354.067')] -[2026-06-07 02:26:16,204][324535] Updated weights for policy 0, policy_version 31828 (0.0011) -[2026-06-07 02:26:16,396][324535] Updated weights for policy 0, policy_version 31838 (0.0011) -[2026-06-07 02:26:16,596][324535] Updated weights for policy 0, policy_version 31848 (0.0008) -[2026-06-07 02:26:16,790][324535] Updated weights for policy 0, policy_version 31858 (0.0007) -[2026-06-07 02:26:17,599][324535] Updated weights for policy 0, policy_version 31868 (0.0009) -[2026-06-07 02:26:17,816][324535] Updated weights for policy 0, policy_version 31878 (0.0010) -[2026-06-07 02:26:18,028][324535] Updated weights for policy 0, policy_version 31888 (0.0009) -[2026-06-07 02:26:18,232][324535] Updated weights for policy 0, policy_version 31898 (0.0007) -[2026-06-07 02:26:18,452][324535] Updated weights for policy 0, policy_version 31908 (0.0007) -[2026-06-07 02:26:18,656][324535] Updated weights for policy 0, policy_version 31918 (0.0007) -[2026-06-07 02:26:18,860][324535] Updated weights for policy 0, policy_version 31928 (0.0007) -[2026-06-07 02:26:19,577][324535] Updated weights for policy 0, policy_version 31938 (0.0007) -[2026-06-07 02:26:19,794][324535] Updated weights for policy 0, policy_version 31948 (0.0007) -[2026-06-07 02:26:19,985][324535] Updated weights for policy 0, policy_version 31958 (0.0007) -[2026-06-07 02:26:20,193][324535] Updated weights for policy 0, policy_version 31968 (0.0007) -[2026-06-07 02:26:20,403][324535] Updated weights for policy 0, policy_version 31978 (0.0007) -[2026-06-07 02:26:20,609][324535] Updated weights for policy 0, policy_version 31988 (0.0007) -[2026-06-07 02:26:21,010][321787] Fps is (10 sec: 19660.8, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 16416768. Throughput: 0: 17521.8. Samples: 16397440. Policy #0 lag: (min: 63.0, avg: 79.2, max: 127.0) -[2026-06-07 02:26:21,011][321787] Avg episode reward: [(0, '1429.767')] -[2026-06-07 02:26:21,016][324273] Saving new best policy, reward=1429.767! -[2026-06-07 02:26:21,309][324535] Updated weights for policy 0, policy_version 31998 (0.0007) -[2026-06-07 02:26:21,532][324535] Updated weights for policy 0, policy_version 32008 (0.0007) -[2026-06-07 02:26:21,747][324535] Updated weights for policy 0, policy_version 32018 (0.0007) -[2026-06-07 02:26:21,948][324535] Updated weights for policy 0, policy_version 32028 (0.0007) -[2026-06-07 02:26:22,169][324535] Updated weights for policy 0, policy_version 32038 (0.0007) -[2026-06-07 02:26:22,379][324535] Updated weights for policy 0, policy_version 32048 (0.0007) -[2026-06-07 02:26:23,092][324535] Updated weights for policy 0, policy_version 32058 (0.0007) -[2026-06-07 02:26:23,310][324535] Updated weights for policy 0, policy_version 32068 (0.0007) -[2026-06-07 02:26:23,518][324535] Updated weights for policy 0, policy_version 32078 (0.0007) -[2026-06-07 02:26:23,731][324535] Updated weights for policy 0, policy_version 32088 (0.0007) -[2026-06-07 02:26:23,941][324535] Updated weights for policy 0, policy_version 32098 (0.0007) -[2026-06-07 02:26:24,148][324535] Updated weights for policy 0, policy_version 32108 (0.0007) -[2026-06-07 02:26:24,351][324535] Updated weights for policy 0, policy_version 32118 (0.0007) -[2026-06-07 02:26:25,076][324535] Updated weights for policy 0, policy_version 32128 (0.0007) -[2026-06-07 02:26:25,301][324535] Updated weights for policy 0, policy_version 32138 (0.0007) -[2026-06-07 02:26:25,518][324535] Updated weights for policy 0, policy_version 32149 (0.0007) -[2026-06-07 02:26:25,731][324535] Updated weights for policy 0, policy_version 32159 (0.0010) -[2026-06-07 02:26:25,949][324535] Updated weights for policy 0, policy_version 32170 (0.0008) -[2026-06-07 02:26:26,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 16482304. Throughput: 0: 17541.7. Samples: 16499328. Policy #0 lag: (min: 63.0, avg: 79.2, max: 127.0) -[2026-06-07 02:26:26,011][321787] Avg episode reward: [(0, '1448.559')] -[2026-06-07 02:26:26,165][324535] Updated weights for policy 0, policy_version 32180 (0.0011) -[2026-06-07 02:26:26,252][324273] Saving new best policy, reward=1448.559! -[2026-06-07 02:26:26,943][324535] Updated weights for policy 0, policy_version 32190 (0.0009) -[2026-06-07 02:26:27,147][324535] Updated weights for policy 0, policy_version 32200 (0.0007) -[2026-06-07 02:26:27,345][324535] Updated weights for policy 0, policy_version 32210 (0.0007) -[2026-06-07 02:26:27,540][324535] Updated weights for policy 0, policy_version 32220 (0.0007) -[2026-06-07 02:26:27,759][324535] Updated weights for policy 0, policy_version 32230 (0.0007) -[2026-06-07 02:26:27,957][324535] Updated weights for policy 0, policy_version 32240 (0.0007) -[2026-06-07 02:26:28,670][324535] Updated weights for policy 0, policy_version 32250 (0.0007) -[2026-06-07 02:26:28,879][324535] Updated weights for policy 0, policy_version 32260 (0.0009) -[2026-06-07 02:26:29,100][324535] Updated weights for policy 0, policy_version 32270 (0.0007) -[2026-06-07 02:26:29,309][324535] Updated weights for policy 0, policy_version 32280 (0.0006) -[2026-06-07 02:26:29,544][324535] Updated weights for policy 0, policy_version 32291 (0.0008) -[2026-06-07 02:26:29,754][324535] Updated weights for policy 0, policy_version 32301 (0.0007) -[2026-06-07 02:26:29,961][324535] Updated weights for policy 0, policy_version 32311 (0.0007) -[2026-06-07 02:26:30,680][324535] Updated weights for policy 0, policy_version 32321 (0.0009) -[2026-06-07 02:26:30,884][324535] Updated weights for policy 0, policy_version 32331 (0.0009) -[2026-06-07 02:26:31,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 16580608. Throughput: 0: 17635.6. Samples: 16607616. Policy #0 lag: (min: 14.0, avg: 30.2, max: 78.0) -[2026-06-07 02:26:31,011][321787] Avg episode reward: [(0, '1526.911')] -[2026-06-07 02:26:31,086][324535] Updated weights for policy 0, policy_version 32341 (0.0011) -[2026-06-07 02:26:31,300][324535] Updated weights for policy 0, policy_version 32351 (0.0011) -[2026-06-07 02:26:31,513][324535] Updated weights for policy 0, policy_version 32361 (0.0008) -[2026-06-07 02:26:31,709][324535] Updated weights for policy 0, policy_version 32371 (0.0011) -[2026-06-07 02:26:31,816][324273] Saving new best policy, reward=1526.911! -[2026-06-07 02:26:32,497][324535] Updated weights for policy 0, policy_version 32381 (0.0011) -[2026-06-07 02:26:32,694][324535] Updated weights for policy 0, policy_version 32391 (0.0008) -[2026-06-07 02:26:32,896][324535] Updated weights for policy 0, policy_version 32401 (0.0007) -[2026-06-07 02:26:33,095][324535] Updated weights for policy 0, policy_version 32411 (0.0007) -[2026-06-07 02:26:33,292][324535] Updated weights for policy 0, policy_version 32421 (0.0007) -[2026-06-07 02:26:33,511][324535] Updated weights for policy 0, policy_version 32431 (0.0007) -[2026-06-07 02:26:34,284][324535] Updated weights for policy 0, policy_version 32442 (0.0007) -[2026-06-07 02:26:34,496][324535] Updated weights for policy 0, policy_version 32452 (0.0007) -[2026-06-07 02:26:34,691][324535] Updated weights for policy 0, policy_version 32462 (0.0011) -[2026-06-07 02:26:34,926][324535] Updated weights for policy 0, policy_version 32472 (0.0009) -[2026-06-07 02:26:35,144][324535] Updated weights for policy 0, policy_version 32483 (0.0010) -[2026-06-07 02:26:35,330][324535] Updated weights for policy 0, policy_version 32493 (0.0009) -[2026-06-07 02:26:35,548][324535] Updated weights for policy 0, policy_version 32503 (0.0009) -[2026-06-07 02:26:36,010][321787] Fps is (10 sec: 19660.7, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 16678912. Throughput: 0: 17487.7. Samples: 16658432. Policy #0 lag: (min: 14.0, avg: 30.2, max: 78.0) -[2026-06-07 02:26:36,011][321787] Avg episode reward: [(0, '1597.762')] -[2026-06-07 02:26:36,309][324535] Updated weights for policy 0, policy_version 32513 (0.0007) -[2026-06-07 02:26:36,513][324535] Updated weights for policy 0, policy_version 32523 (0.0009) -[2026-06-07 02:26:36,753][324535] Updated weights for policy 0, policy_version 32534 (0.0011) -[2026-06-07 02:26:36,965][324535] Updated weights for policy 0, policy_version 32544 (0.0011) -[2026-06-07 02:26:37,175][324535] Updated weights for policy 0, policy_version 32554 (0.0008) -[2026-06-07 02:26:37,389][324535] Updated weights for policy 0, policy_version 32564 (0.0008) -[2026-06-07 02:26:37,467][324273] Saving new best policy, reward=1597.762! -[2026-06-07 02:26:38,150][324535] Updated weights for policy 0, policy_version 32574 (0.0007) -[2026-06-07 02:26:38,366][324535] Updated weights for policy 0, policy_version 32584 (0.0006) -[2026-06-07 02:26:38,572][324535] Updated weights for policy 0, policy_version 32594 (0.0006) -[2026-06-07 02:26:38,774][324535] Updated weights for policy 0, policy_version 32604 (0.0007) -[2026-06-07 02:26:38,991][324535] Updated weights for policy 0, policy_version 32614 (0.0007) -[2026-06-07 02:26:39,181][324535] Updated weights for policy 0, policy_version 32624 (0.0007) -[2026-06-07 02:26:39,924][324535] Updated weights for policy 0, policy_version 32634 (0.0007) -[2026-06-07 02:26:40,133][324535] Updated weights for policy 0, policy_version 32644 (0.0007) -[2026-06-07 02:26:40,356][324535] Updated weights for policy 0, policy_version 32654 (0.0007) -[2026-06-07 02:26:40,562][324535] Updated weights for policy 0, policy_version 32664 (0.0007) -[2026-06-07 02:26:40,761][324535] Updated weights for policy 0, policy_version 32674 (0.0008) -[2026-06-07 02:26:40,971][324535] Updated weights for policy 0, policy_version 32684 (0.0011) -[2026-06-07 02:26:41,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 16744448. Throughput: 0: 17473.4. Samples: 16760960. Policy #0 lag: (min: 14.0, avg: 30.2, max: 78.0) -[2026-06-07 02:26:41,011][321787] Avg episode reward: [(0, '1620.484')] -[2026-06-07 02:26:41,169][324535] Updated weights for policy 0, policy_version 32694 (0.0011) -[2026-06-07 02:26:41,216][324273] Saving new best policy, reward=1620.484! -[2026-06-07 02:26:41,925][324535] Updated weights for policy 0, policy_version 32704 (0.0008) -[2026-06-07 02:26:42,132][324535] Updated weights for policy 0, policy_version 32714 (0.0008) -[2026-06-07 02:26:42,341][324535] Updated weights for policy 0, policy_version 32724 (0.0007) -[2026-06-07 02:26:42,544][324535] Updated weights for policy 0, policy_version 32734 (0.0008) -[2026-06-07 02:26:42,769][324535] Updated weights for policy 0, policy_version 32744 (0.0008) -[2026-06-07 02:26:42,991][324535] Updated weights for policy 0, policy_version 32754 (0.0008) -[2026-06-07 02:26:43,719][324535] Updated weights for policy 0, policy_version 32764 (0.0007) -[2026-06-07 02:26:43,936][324535] Updated weights for policy 0, policy_version 32774 (0.0007) -[2026-06-07 02:26:44,134][324535] Updated weights for policy 0, policy_version 32784 (0.0010) -[2026-06-07 02:26:44,360][324535] Updated weights for policy 0, policy_version 32794 (0.0009) -[2026-06-07 02:26:44,555][324535] Updated weights for policy 0, policy_version 32804 (0.0007) -[2026-06-07 02:26:44,789][324535] Updated weights for policy 0, policy_version 32815 (0.0008) -[2026-06-07 02:26:45,573][324535] Updated weights for policy 0, policy_version 32826 (0.0007) -[2026-06-07 02:26:45,767][324535] Updated weights for policy 0, policy_version 32836 (0.0007) -[2026-06-07 02:26:45,997][324535] Updated weights for policy 0, policy_version 32846 (0.0011) -[2026-06-07 02:26:46,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 16842752. Throughput: 0: 17496.2. Samples: 16868608. Policy #0 lag: (min: 14.0, avg: 30.2, max: 78.0) -[2026-06-07 02:26:46,011][321787] Avg episode reward: [(0, '1666.499')] -[2026-06-07 02:26:46,218][324535] Updated weights for policy 0, policy_version 32856 (0.0010) -[2026-06-07 02:26:46,428][324535] Updated weights for policy 0, policy_version 32866 (0.0011) -[2026-06-07 02:26:46,664][324535] Updated weights for policy 0, policy_version 32876 (0.0014) -[2026-06-07 02:26:46,887][324535] Updated weights for policy 0, policy_version 32886 (0.0016) -[2026-06-07 02:26:46,925][324273] Saving new best policy, reward=1666.499! -[2026-06-07 02:26:47,612][324535] Updated weights for policy 0, policy_version 32896 (0.0018) -[2026-06-07 02:26:47,836][324535] Updated weights for policy 0, policy_version 32906 (0.0017) -[2026-06-07 02:26:48,058][324535] Updated weights for policy 0, policy_version 32916 (0.0020) -[2026-06-07 02:26:48,266][324535] Updated weights for policy 0, policy_version 32926 (0.0022) -[2026-06-07 02:26:48,493][324535] Updated weights for policy 0, policy_version 32936 (0.0019) -[2026-06-07 02:26:48,721][324535] Updated weights for policy 0, policy_version 32946 (0.0017) -[2026-06-07 02:26:49,414][324535] Updated weights for policy 0, policy_version 32956 (0.0012) -[2026-06-07 02:26:49,631][324535] Updated weights for policy 0, policy_version 32966 (0.0011) -[2026-06-07 02:26:49,846][324535] Updated weights for policy 0, policy_version 32976 (0.0008) -[2026-06-07 02:26:50,071][324535] Updated weights for policy 0, policy_version 32986 (0.0007) -[2026-06-07 02:26:50,274][324535] Updated weights for policy 0, policy_version 32996 (0.0007) -[2026-06-07 02:26:50,483][324535] Updated weights for policy 0, policy_version 33006 (0.0007) -[2026-06-07 02:26:50,712][324535] Updated weights for policy 0, policy_version 33016 (0.0008) -[2026-06-07 02:26:51,010][321787] Fps is (10 sec: 19660.8, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 16941056. Throughput: 0: 17430.7. Samples: 16919552. Policy #0 lag: (min: 14.0, avg: 30.2, max: 78.0) -[2026-06-07 02:26:51,011][321787] Avg episode reward: [(0, '1725.152')] -[2026-06-07 02:26:51,016][324273] Saving new best policy, reward=1725.152! -[2026-06-07 02:26:51,515][324535] Updated weights for policy 0, policy_version 33027 (0.0007) -[2026-06-07 02:26:51,702][324535] Updated weights for policy 0, policy_version 33037 (0.0007) -[2026-06-07 02:26:51,893][324535] Updated weights for policy 0, policy_version 33047 (0.0007) -[2026-06-07 02:26:52,116][324535] Updated weights for policy 0, policy_version 33057 (0.0007) -[2026-06-07 02:26:52,314][324535] Updated weights for policy 0, policy_version 33067 (0.0007) -[2026-06-07 02:26:52,522][324535] Updated weights for policy 0, policy_version 33077 (0.0007) -[2026-06-07 02:26:53,269][324535] Updated weights for policy 0, policy_version 33087 (0.0010) -[2026-06-07 02:26:53,489][324535] Updated weights for policy 0, policy_version 33097 (0.0011) -[2026-06-07 02:26:53,690][324535] Updated weights for policy 0, policy_version 33107 (0.0011) -[2026-06-07 02:26:53,900][324535] Updated weights for policy 0, policy_version 33117 (0.0011) -[2026-06-07 02:26:54,118][324535] Updated weights for policy 0, policy_version 33127 (0.0011) -[2026-06-07 02:26:54,318][324535] Updated weights for policy 0, policy_version 33137 (0.0011) -[2026-06-07 02:26:55,051][324535] Updated weights for policy 0, policy_version 33147 (0.0011) -[2026-06-07 02:26:55,266][324535] Updated weights for policy 0, policy_version 33157 (0.0011) -[2026-06-07 02:26:55,503][324535] Updated weights for policy 0, policy_version 33168 (0.0010) -[2026-06-07 02:26:55,717][324535] Updated weights for policy 0, policy_version 33178 (0.0011) -[2026-06-07 02:26:55,930][324535] Updated weights for policy 0, policy_version 33188 (0.0009) -[2026-06-07 02:26:56,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 17006592. Throughput: 0: 17410.8. Samples: 17020288. Policy #0 lag: (min: 63.0, avg: 78.6, max: 127.0) -[2026-06-07 02:26:56,011][321787] Avg episode reward: [(0, '1715.412')] -[2026-06-07 02:26:56,132][324535] Updated weights for policy 0, policy_version 33198 (0.0011) -[2026-06-07 02:26:56,333][324535] Updated weights for policy 0, policy_version 33208 (0.0011) -[2026-06-07 02:26:57,085][324535] Updated weights for policy 0, policy_version 33218 (0.0007) -[2026-06-07 02:26:57,289][324535] Updated weights for policy 0, policy_version 33228 (0.0007) -[2026-06-07 02:26:57,497][324535] Updated weights for policy 0, policy_version 33238 (0.0007) -[2026-06-07 02:26:57,706][324535] Updated weights for policy 0, policy_version 33248 (0.0007) -[2026-06-07 02:26:57,900][324535] Updated weights for policy 0, policy_version 33258 (0.0007) -[2026-06-07 02:26:58,112][324535] Updated weights for policy 0, policy_version 33268 (0.0007) -[2026-06-07 02:26:58,862][324535] Updated weights for policy 0, policy_version 33278 (0.0007) -[2026-06-07 02:26:59,099][324535] Updated weights for policy 0, policy_version 33289 (0.0007) -[2026-06-07 02:26:59,314][324535] Updated weights for policy 0, policy_version 33299 (0.0007) -[2026-06-07 02:26:59,540][324535] Updated weights for policy 0, policy_version 33311 (0.0007) -[2026-06-07 02:26:59,754][324535] Updated weights for policy 0, policy_version 33321 (0.0007) -[2026-06-07 02:26:59,970][324535] Updated weights for policy 0, policy_version 33331 (0.0007) -[2026-06-07 02:27:00,731][324535] Updated weights for policy 0, policy_version 33341 (0.0007) -[2026-06-07 02:27:00,933][324535] Updated weights for policy 0, policy_version 33352 (0.0007) -[2026-06-07 02:27:01,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.3). Total num frames: 17104896. Throughput: 0: 17419.4. Samples: 17124992. Policy #0 lag: (min: 63.0, avg: 78.6, max: 127.0) -[2026-06-07 02:27:01,011][321787] Avg episode reward: [(0, '1738.042')] -[2026-06-07 02:27:01,142][324535] Updated weights for policy 0, policy_version 33362 (0.0007) -[2026-06-07 02:27:01,351][324535] Updated weights for policy 0, policy_version 33372 (0.0007) -[2026-06-07 02:27:01,563][324535] Updated weights for policy 0, policy_version 33382 (0.0007) -[2026-06-07 02:27:01,777][324535] Updated weights for policy 0, policy_version 33392 (0.0007) -[2026-06-07 02:27:01,930][324273] Saving new best policy, reward=1738.042! -[2026-06-07 02:27:02,549][324535] Updated weights for policy 0, policy_version 33402 (0.0007) -[2026-06-07 02:27:02,760][324535] Updated weights for policy 0, policy_version 33412 (0.0007) -[2026-06-07 02:27:02,958][324535] Updated weights for policy 0, policy_version 33422 (0.0007) -[2026-06-07 02:27:03,176][324535] Updated weights for policy 0, policy_version 33432 (0.0007) -[2026-06-07 02:27:03,376][324535] Updated weights for policy 0, policy_version 33442 (0.0007) -[2026-06-07 02:27:03,587][324535] Updated weights for policy 0, policy_version 33452 (0.0007) -[2026-06-07 02:27:03,815][324535] Updated weights for policy 0, policy_version 33462 (0.0007) -[2026-06-07 02:27:04,595][324535] Updated weights for policy 0, policy_version 33472 (0.0007) -[2026-06-07 02:27:04,823][324535] Updated weights for policy 0, policy_version 33483 (0.0007) -[2026-06-07 02:27:05,036][324535] Updated weights for policy 0, policy_version 33493 (0.0007) -[2026-06-07 02:27:05,257][324535] Updated weights for policy 0, policy_version 33503 (0.0007) -[2026-06-07 02:27:05,483][324535] Updated weights for policy 0, policy_version 33514 (0.0007) -[2026-06-07 02:27:05,684][324535] Updated weights for policy 0, policy_version 33524 (0.0007) -[2026-06-07 02:27:06,010][321787] Fps is (10 sec: 19660.7, 60 sec: 17476.2, 300 sec: 17550.3). Total num frames: 17203200. Throughput: 0: 17388.1. Samples: 17179904. Policy #0 lag: (min: 63.0, avg: 78.6, max: 127.0) -[2026-06-07 02:27:06,011][321787] Avg episode reward: [(0, '1741.515')] -[2026-06-07 02:27:06,015][324273] Saving new best policy, reward=1741.515! -[2026-06-07 02:27:06,425][324535] Updated weights for policy 0, policy_version 33534 (0.0007) -[2026-06-07 02:27:06,646][324535] Updated weights for policy 0, policy_version 33545 (0.0007) -[2026-06-07 02:27:06,900][324535] Updated weights for policy 0, policy_version 33556 (0.0007) -[2026-06-07 02:27:07,104][324535] Updated weights for policy 0, policy_version 33566 (0.0007) -[2026-06-07 02:27:07,309][324535] Updated weights for policy 0, policy_version 33576 (0.0007) -[2026-06-07 02:27:07,534][324535] Updated weights for policy 0, policy_version 33586 (0.0007) -[2026-06-07 02:27:08,282][324535] Updated weights for policy 0, policy_version 33596 (0.0007) -[2026-06-07 02:27:08,477][324535] Updated weights for policy 0, policy_version 33606 (0.0007) -[2026-06-07 02:27:08,693][324535] Updated weights for policy 0, policy_version 33616 (0.0007) -[2026-06-07 02:27:08,898][324535] Updated weights for policy 0, policy_version 33626 (0.0007) -[2026-06-07 02:27:09,107][324535] Updated weights for policy 0, policy_version 33636 (0.0007) -[2026-06-07 02:27:09,308][324535] Updated weights for policy 0, policy_version 33646 (0.0007) -[2026-06-07 02:27:09,505][324535] Updated weights for policy 0, policy_version 33656 (0.0007) -[2026-06-07 02:27:10,305][324535] Updated weights for policy 0, policy_version 33666 (0.0007) -[2026-06-07 02:27:10,528][324535] Updated weights for policy 0, policy_version 33676 (0.0007) -[2026-06-07 02:27:10,734][324535] Updated weights for policy 0, policy_version 33686 (0.0007) -[2026-06-07 02:27:10,953][324535] Updated weights for policy 0, policy_version 33697 (0.0007) -[2026-06-07 02:27:11,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 17268736. Throughput: 0: 17388.1. Samples: 17281792. Policy #0 lag: (min: 63.0, avg: 78.6, max: 127.0) -[2026-06-07 02:27:11,011][321787] Avg episode reward: [(0, '1779.740')] -[2026-06-07 02:27:11,146][324535] Updated weights for policy 0, policy_version 33707 (0.0007) -[2026-06-07 02:27:11,347][324535] Updated weights for policy 0, policy_version 33717 (0.0007) -[2026-06-07 02:27:11,409][324273] Saving new best policy, reward=1779.740! -[2026-06-07 02:27:12,096][324535] Updated weights for policy 0, policy_version 33727 (0.0007) -[2026-06-07 02:27:12,303][324535] Updated weights for policy 0, policy_version 33737 (0.0007) -[2026-06-07 02:27:12,520][324535] Updated weights for policy 0, policy_version 33747 (0.0007) -[2026-06-07 02:27:12,706][324535] Updated weights for policy 0, policy_version 33757 (0.0007) -[2026-06-07 02:27:12,929][324535] Updated weights for policy 0, policy_version 33767 (0.0007) -[2026-06-07 02:27:13,149][324535] Updated weights for policy 0, policy_version 33777 (0.0007) -[2026-06-07 02:27:13,969][324535] Updated weights for policy 0, policy_version 33788 (0.0007) -[2026-06-07 02:27:14,194][324535] Updated weights for policy 0, policy_version 33798 (0.0007) -[2026-06-07 02:27:14,386][324535] Updated weights for policy 0, policy_version 33808 (0.0007) -[2026-06-07 02:27:14,578][324535] Updated weights for policy 0, policy_version 33818 (0.0007) -[2026-06-07 02:27:14,797][324535] Updated weights for policy 0, policy_version 33828 (0.0007) -[2026-06-07 02:27:15,019][324535] Updated weights for policy 0, policy_version 33838 (0.0007) -[2026-06-07 02:27:15,226][324535] Updated weights for policy 0, policy_version 33848 (0.0007) -[2026-06-07 02:27:15,973][324535] Updated weights for policy 0, policy_version 33858 (0.0007) -[2026-06-07 02:27:16,010][321787] Fps is (10 sec: 16383.7, 60 sec: 17476.2, 300 sec: 17439.2). Total num frames: 17367040. Throughput: 0: 17291.3. Samples: 17385728. Policy #0 lag: (min: 63.0, avg: 78.6, max: 127.0) -[2026-06-07 02:27:16,012][321787] Avg episode reward: [(0, '1791.476')] -[2026-06-07 02:27:16,184][324535] Updated weights for policy 0, policy_version 33868 (0.0007) -[2026-06-07 02:27:16,386][324535] Updated weights for policy 0, policy_version 33878 (0.0007) -[2026-06-07 02:27:16,596][324535] Updated weights for policy 0, policy_version 33888 (0.0007) -[2026-06-07 02:27:16,794][324535] Updated weights for policy 0, policy_version 33898 (0.0007) -[2026-06-07 02:27:17,015][324535] Updated weights for policy 0, policy_version 33909 (0.0007) -[2026-06-07 02:27:17,075][324273] Saving new best policy, reward=1791.476! -[2026-06-07 02:27:17,812][324535] Updated weights for policy 0, policy_version 33920 (0.0007) -[2026-06-07 02:27:18,029][324535] Updated weights for policy 0, policy_version 33930 (0.0007) -[2026-06-07 02:27:18,216][324535] Updated weights for policy 0, policy_version 33940 (0.0007) -[2026-06-07 02:27:18,420][324535] Updated weights for policy 0, policy_version 33950 (0.0007) -[2026-06-07 02:27:18,610][324535] Updated weights for policy 0, policy_version 33960 (0.0007) -[2026-06-07 02:27:18,819][324535] Updated weights for policy 0, policy_version 33970 (0.0010) -[2026-06-07 02:27:18,933][324273] Early stopping after 8 epochs (64 sgd steps), loss delta 0.0000006 -[2026-06-07 02:27:19,597][324535] Updated weights for policy 0, policy_version 33980 (0.0008) -[2026-06-07 02:27:19,795][324535] Updated weights for policy 0, policy_version 33990 (0.0007) -[2026-06-07 02:27:19,996][324535] Updated weights for policy 0, policy_version 34000 (0.0007) -[2026-06-07 02:27:20,218][324535] Updated weights for policy 0, policy_version 34010 (0.0007) -[2026-06-07 02:27:20,439][324535] Updated weights for policy 0, policy_version 34020 (0.0007) -[2026-06-07 02:27:20,654][324535] Updated weights for policy 0, policy_version 34030 (0.0007) -[2026-06-07 02:27:20,841][324535] Updated weights for policy 0, policy_version 34040 (0.0007) -[2026-06-07 02:27:21,010][321787] Fps is (10 sec: 19660.8, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 17465344. Throughput: 0: 17436.5. Samples: 17443072. Policy #0 lag: (min: 14.0, avg: 29.9, max: 78.0) -[2026-06-07 02:27:21,011][321787] Avg episode reward: [(0, '1796.715')] -[2026-06-07 02:27:21,015][324273] Saving new best policy, reward=1796.715! -[2026-06-07 02:27:21,613][324535] Updated weights for policy 0, policy_version 34050 (0.0008) -[2026-06-07 02:27:21,803][324535] Updated weights for policy 0, policy_version 34060 (0.0007) -[2026-06-07 02:27:22,013][324535] Updated weights for policy 0, policy_version 34070 (0.0007) -[2026-06-07 02:27:22,202][324535] Updated weights for policy 0, policy_version 34080 (0.0008) -[2026-06-07 02:27:22,405][324535] Updated weights for policy 0, policy_version 34090 (0.0007) -[2026-06-07 02:27:22,642][324535] Updated weights for policy 0, policy_version 34101 (0.0008) -[2026-06-07 02:27:23,409][324535] Updated weights for policy 0, policy_version 34111 (0.0007) -[2026-06-07 02:27:23,612][324535] Updated weights for policy 0, policy_version 34121 (0.0007) -[2026-06-07 02:27:23,805][324535] Updated weights for policy 0, policy_version 34131 (0.0009) -[2026-06-07 02:27:24,032][324535] Updated weights for policy 0, policy_version 34142 (0.0011) -[2026-06-07 02:27:24,255][324535] Updated weights for policy 0, policy_version 34152 (0.0011) -[2026-06-07 02:27:24,455][324535] Updated weights for policy 0, policy_version 34162 (0.0011) -[2026-06-07 02:27:25,201][324535] Updated weights for policy 0, policy_version 34172 (0.0007) -[2026-06-07 02:27:25,400][324535] Updated weights for policy 0, policy_version 34182 (0.0007) -[2026-06-07 02:27:25,642][324535] Updated weights for policy 0, policy_version 34192 (0.0007) -[2026-06-07 02:27:25,856][324535] Updated weights for policy 0, policy_version 34202 (0.0007) -[2026-06-07 02:27:26,010][321787] Fps is (10 sec: 16384.4, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 17530880. Throughput: 0: 17459.2. Samples: 17546624. Policy #0 lag: (min: 14.0, avg: 29.9, max: 78.0) -[2026-06-07 02:27:26,011][321787] Avg episode reward: [(0, '1805.702')] -[2026-06-07 02:27:26,091][324535] Updated weights for policy 0, policy_version 34213 (0.0007) -[2026-06-07 02:27:26,281][324535] Updated weights for policy 0, policy_version 34223 (0.0007) -[2026-06-07 02:27:26,482][324273] Saving new best policy, reward=1805.702! -[2026-06-07 02:27:27,048][324535] Updated weights for policy 0, policy_version 34233 (0.0007) -[2026-06-07 02:27:27,259][324535] Updated weights for policy 0, policy_version 34243 (0.0007) -[2026-06-07 02:27:27,451][324535] Updated weights for policy 0, policy_version 34253 (0.0007) -[2026-06-07 02:27:27,657][324535] Updated weights for policy 0, policy_version 34263 (0.0007) -[2026-06-07 02:27:27,856][324535] Updated weights for policy 0, policy_version 34273 (0.0007) -[2026-06-07 02:27:28,078][324535] Updated weights for policy 0, policy_version 34283 (0.0007) -[2026-06-07 02:27:28,279][324535] Updated weights for policy 0, policy_version 34293 (0.0007) -[2026-06-07 02:27:29,052][324535] Updated weights for policy 0, policy_version 34303 (0.0007) -[2026-06-07 02:27:29,266][324535] Updated weights for policy 0, policy_version 34313 (0.0007) -[2026-06-07 02:27:29,453][324535] Updated weights for policy 0, policy_version 34323 (0.0007) -[2026-06-07 02:27:29,670][324535] Updated weights for policy 0, policy_version 34333 (0.0007) -[2026-06-07 02:27:29,896][324535] Updated weights for policy 0, policy_version 34343 (0.0007) -[2026-06-07 02:27:30,090][324535] Updated weights for policy 0, policy_version 34353 (0.0007) -[2026-06-07 02:27:30,852][324535] Updated weights for policy 0, policy_version 34363 (0.0007) -[2026-06-07 02:27:31,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 17629184. Throughput: 0: 17336.9. Samples: 17648768. Policy #0 lag: (min: 14.0, avg: 29.9, max: 78.0) -[2026-06-07 02:27:31,011][321787] Avg episode reward: [(0, '1811.352')] -[2026-06-07 02:27:31,054][324535] Updated weights for policy 0, policy_version 34373 (0.0008) -[2026-06-07 02:27:31,253][324535] Updated weights for policy 0, policy_version 34383 (0.0008) -[2026-06-07 02:27:31,474][324535] Updated weights for policy 0, policy_version 34393 (0.0008) -[2026-06-07 02:27:31,687][324535] Updated weights for policy 0, policy_version 34403 (0.0007) -[2026-06-07 02:27:31,902][324535] Updated weights for policy 0, policy_version 34413 (0.0007) -[2026-06-07 02:27:32,096][324535] Updated weights for policy 0, policy_version 34423 (0.0007) -[2026-06-07 02:27:32,116][324273] Saving new best policy, reward=1811.352! -[2026-06-07 02:27:32,861][324535] Updated weights for policy 0, policy_version 34433 (0.0008) -[2026-06-07 02:27:33,081][324535] Updated weights for policy 0, policy_version 34443 (0.0007) -[2026-06-07 02:27:33,273][324535] Updated weights for policy 0, policy_version 34453 (0.0007) -[2026-06-07 02:27:33,505][324535] Updated weights for policy 0, policy_version 34463 (0.0008) -[2026-06-07 02:27:33,734][324535] Updated weights for policy 0, policy_version 34474 (0.0007) -[2026-06-07 02:27:33,963][324535] Updated weights for policy 0, policy_version 34484 (0.0007) -[2026-06-07 02:27:34,712][324535] Updated weights for policy 0, policy_version 34494 (0.0007) -[2026-06-07 02:27:34,924][324535] Updated weights for policy 0, policy_version 34504 (0.0007) -[2026-06-07 02:27:35,145][324535] Updated weights for policy 0, policy_version 34514 (0.0007) -[2026-06-07 02:27:35,365][324535] Updated weights for policy 0, policy_version 34524 (0.0007) -[2026-06-07 02:27:35,582][324535] Updated weights for policy 0, policy_version 34534 (0.0007) -[2026-06-07 02:27:35,822][324535] Updated weights for policy 0, policy_version 34545 (0.0010) -[2026-06-07 02:27:36,010][321787] Fps is (10 sec: 19660.7, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 17727488. Throughput: 0: 17496.2. Samples: 17706880. Policy #0 lag: (min: 14.0, avg: 29.9, max: 78.0) -[2026-06-07 02:27:36,011][321787] Avg episode reward: [(0, '1815.322')] -[2026-06-07 02:27:36,016][324273] Saving new best policy, reward=1815.322! -[2026-06-07 02:27:36,552][324535] Updated weights for policy 0, policy_version 34555 (0.0009) -[2026-06-07 02:27:36,763][324535] Updated weights for policy 0, policy_version 34565 (0.0007) -[2026-06-07 02:27:36,990][324535] Updated weights for policy 0, policy_version 34575 (0.0008) -[2026-06-07 02:27:37,184][324535] Updated weights for policy 0, policy_version 34585 (0.0007) -[2026-06-07 02:27:37,412][324535] Updated weights for policy 0, policy_version 34595 (0.0007) -[2026-06-07 02:27:37,615][324535] Updated weights for policy 0, policy_version 34605 (0.0007) -[2026-06-07 02:27:37,822][324535] Updated weights for policy 0, policy_version 34615 (0.0007) -[2026-06-07 02:27:38,603][324535] Updated weights for policy 0, policy_version 34625 (0.0007) -[2026-06-07 02:27:38,842][324535] Updated weights for policy 0, policy_version 34636 (0.0007) -[2026-06-07 02:27:39,057][324535] Updated weights for policy 0, policy_version 34646 (0.0007) -[2026-06-07 02:27:39,268][324535] Updated weights for policy 0, policy_version 34656 (0.0007) -[2026-06-07 02:27:39,474][324535] Updated weights for policy 0, policy_version 34666 (0.0007) -[2026-06-07 02:27:39,674][324535] Updated weights for policy 0, policy_version 34676 (0.0007) -[2026-06-07 02:27:40,471][324535] Updated weights for policy 0, policy_version 34687 (0.0007) -[2026-06-07 02:27:40,662][324535] Updated weights for policy 0, policy_version 34697 (0.0007) -[2026-06-07 02:27:40,866][324535] Updated weights for policy 0, policy_version 34707 (0.0007) -[2026-06-07 02:27:41,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 17793024. Throughput: 0: 17501.9. Samples: 17807872. Policy #0 lag: (min: 8.0, avg: 43.3, max: 72.0) -[2026-06-07 02:27:41,011][321787] Avg episode reward: [(0, '1868.997')] -[2026-06-07 02:27:41,077][324535] Updated weights for policy 0, policy_version 34717 (0.0007) -[2026-06-07 02:27:41,282][324535] Updated weights for policy 0, policy_version 34727 (0.0007) -[2026-06-07 02:27:41,485][324535] Updated weights for policy 0, policy_version 34737 (0.0007) -[2026-06-07 02:27:41,626][324273] Saving new best policy, reward=1868.997! -[2026-06-07 02:27:42,277][324535] Updated weights for policy 0, policy_version 34747 (0.0007) -[2026-06-07 02:27:42,493][324535] Updated weights for policy 0, policy_version 34757 (0.0007) -[2026-06-07 02:27:42,693][324535] Updated weights for policy 0, policy_version 34767 (0.0007) -[2026-06-07 02:27:42,892][324535] Updated weights for policy 0, policy_version 34777 (0.0007) -[2026-06-07 02:27:43,102][324535] Updated weights for policy 0, policy_version 34787 (0.0007) -[2026-06-07 02:27:43,326][324535] Updated weights for policy 0, policy_version 34798 (0.0007) -[2026-06-07 02:27:43,521][324535] Updated weights for policy 0, policy_version 34808 (0.0007) -[2026-06-07 02:27:44,337][324535] Updated weights for policy 0, policy_version 34818 (0.0007) -[2026-06-07 02:27:44,539][324535] Updated weights for policy 0, policy_version 34828 (0.0007) -[2026-06-07 02:27:44,736][324535] Updated weights for policy 0, policy_version 34838 (0.0007) -[2026-06-07 02:27:44,953][324535] Updated weights for policy 0, policy_version 34848 (0.0007) -[2026-06-07 02:27:45,148][324535] Updated weights for policy 0, policy_version 34858 (0.0007) -[2026-06-07 02:27:45,395][324535] Updated weights for policy 0, policy_version 34869 (0.0007) -[2026-06-07 02:27:46,010][321787] Fps is (10 sec: 16384.1, 60 sec: 17476.3, 300 sec: 17439.3). Total num frames: 17891328. Throughput: 0: 17499.0. Samples: 17912448. Policy #0 lag: (min: 8.0, avg: 43.3, max: 72.0) -[2026-06-07 02:27:46,011][321787] Avg episode reward: [(0, '1844.771')] -[2026-06-07 02:27:46,141][324535] Updated weights for policy 0, policy_version 34879 (0.0007) -[2026-06-07 02:27:46,334][324535] Updated weights for policy 0, policy_version 34889 (0.0007) -[2026-06-07 02:27:46,532][324535] Updated weights for policy 0, policy_version 34899 (0.0007) -[2026-06-07 02:27:46,767][324535] Updated weights for policy 0, policy_version 34910 (0.0007) -[2026-06-07 02:27:46,970][324535] Updated weights for policy 0, policy_version 34920 (0.0007) -[2026-06-07 02:27:47,161][324535] Updated weights for policy 0, policy_version 34930 (0.0010) -[2026-06-07 02:27:47,927][324535] Updated weights for policy 0, policy_version 34940 (0.0009) -[2026-06-07 02:27:48,141][324535] Updated weights for policy 0, policy_version 34950 (0.0007) -[2026-06-07 02:27:48,349][324535] Updated weights for policy 0, policy_version 34960 (0.0007) -[2026-06-07 02:27:48,563][324535] Updated weights for policy 0, policy_version 34970 (0.0007) -[2026-06-07 02:27:48,785][324535] Updated weights for policy 0, policy_version 34980 (0.0007) -[2026-06-07 02:27:48,989][324535] Updated weights for policy 0, policy_version 34990 (0.0007) -[2026-06-07 02:27:49,184][324535] Updated weights for policy 0, policy_version 35000 (0.0007) -[2026-06-07 02:27:49,920][324535] Updated weights for policy 0, policy_version 35010 (0.0007) -[2026-06-07 02:27:50,139][324535] Updated weights for policy 0, policy_version 35020 (0.0007) -[2026-06-07 02:27:50,337][324535] Updated weights for policy 0, policy_version 35030 (0.0007) -[2026-06-07 02:27:50,544][324535] Updated weights for policy 0, policy_version 35040 (0.0007) -[2026-06-07 02:27:50,750][324535] Updated weights for policy 0, policy_version 35050 (0.0007) -[2026-06-07 02:27:50,963][324535] Updated weights for policy 0, policy_version 35060 (0.0007) -[2026-06-07 02:27:51,010][321787] Fps is (10 sec: 16384.0, 60 sec: 16930.1, 300 sec: 17439.2). Total num frames: 17956864. Throughput: 0: 17573.0. Samples: 17970688. Policy #0 lag: (min: 8.0, avg: 43.3, max: 72.0) -[2026-06-07 02:27:51,011][321787] Avg episode reward: [(0, '1838.429')] -[2026-06-07 02:27:51,760][324535] Updated weights for policy 0, policy_version 35070 (0.0007) -[2026-06-07 02:27:51,958][324535] Updated weights for policy 0, policy_version 35080 (0.0007) -[2026-06-07 02:27:52,171][324535] Updated weights for policy 0, policy_version 35090 (0.0007) -[2026-06-07 02:27:52,366][324535] Updated weights for policy 0, policy_version 35100 (0.0007) -[2026-06-07 02:27:52,557][324535] Updated weights for policy 0, policy_version 35110 (0.0007) -[2026-06-07 02:27:52,759][324535] Updated weights for policy 0, policy_version 35120 (0.0007) -[2026-06-07 02:27:53,553][324535] Updated weights for policy 0, policy_version 35130 (0.0007) -[2026-06-07 02:27:53,766][324535] Updated weights for policy 0, policy_version 35140 (0.0007) -[2026-06-07 02:27:53,974][324535] Updated weights for policy 0, policy_version 35150 (0.0007) -[2026-06-07 02:27:54,179][324535] Updated weights for policy 0, policy_version 35160 (0.0007) -[2026-06-07 02:27:54,387][324535] Updated weights for policy 0, policy_version 35170 (0.0007) -[2026-06-07 02:27:54,604][324535] Updated weights for policy 0, policy_version 35180 (0.0007) -[2026-06-07 02:27:54,818][324535] Updated weights for policy 0, policy_version 35190 (0.0007) -[2026-06-07 02:27:55,567][324535] Updated weights for policy 0, policy_version 35200 (0.0007) -[2026-06-07 02:27:55,785][324535] Updated weights for policy 0, policy_version 35210 (0.0007) -[2026-06-07 02:27:55,985][324535] Updated weights for policy 0, policy_version 35220 (0.0007) -[2026-06-07 02:27:56,010][321787] Fps is (10 sec: 16383.9, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 18055168. Throughput: 0: 17592.9. Samples: 18073472. Policy #0 lag: (min: 8.0, avg: 43.3, max: 72.0) -[2026-06-07 02:27:56,011][321787] Avg episode reward: [(0, '1841.526')] -[2026-06-07 02:27:56,181][324535] Updated weights for policy 0, policy_version 35230 (0.0007) -[2026-06-07 02:27:56,390][324535] Updated weights for policy 0, policy_version 35240 (0.0007) -[2026-06-07 02:27:56,604][324535] Updated weights for policy 0, policy_version 35250 (0.0006) -[2026-06-07 02:27:57,406][324535] Updated weights for policy 0, policy_version 35260 (0.0007) -[2026-06-07 02:27:57,613][324535] Updated weights for policy 0, policy_version 35270 (0.0007) -[2026-06-07 02:27:57,833][324535] Updated weights for policy 0, policy_version 35280 (0.0007) -[2026-06-07 02:27:58,053][324535] Updated weights for policy 0, policy_version 35290 (0.0011) -[2026-06-07 02:27:58,266][324535] Updated weights for policy 0, policy_version 35300 (0.0011) -[2026-06-07 02:27:58,466][324535] Updated weights for policy 0, policy_version 35310 (0.0010) -[2026-06-07 02:27:58,656][324535] Updated weights for policy 0, policy_version 35320 (0.0007) -[2026-06-07 02:27:59,409][324535] Updated weights for policy 0, policy_version 35330 (0.0007) -[2026-06-07 02:27:59,668][324535] Updated weights for policy 0, policy_version 35341 (0.0007) -[2026-06-07 02:27:59,871][324535] Updated weights for policy 0, policy_version 35351 (0.0007) -[2026-06-07 02:28:00,077][324535] Updated weights for policy 0, policy_version 35361 (0.0007) -[2026-06-07 02:28:00,286][324535] Updated weights for policy 0, policy_version 35371 (0.0007) -[2026-06-07 02:28:00,466][324535] Updated weights for policy 0, policy_version 35381 (0.0007) -[2026-06-07 02:28:01,010][321787] Fps is (10 sec: 19660.9, 60 sec: 17476.3, 300 sec: 17439.3). Total num frames: 18153472. Throughput: 0: 17567.4. Samples: 18176256. Policy #0 lag: (min: 8.0, avg: 43.3, max: 72.0) -[2026-06-07 02:28:01,011][321787] Avg episode reward: [(0, '1828.869')] -[2026-06-07 02:28:01,227][324535] Updated weights for policy 0, policy_version 35391 (0.0007) -[2026-06-07 02:28:01,442][324535] Updated weights for policy 0, policy_version 35401 (0.0007) -[2026-06-07 02:28:01,660][324535] Updated weights for policy 0, policy_version 35411 (0.0007) -[2026-06-07 02:28:01,897][324535] Updated weights for policy 0, policy_version 35422 (0.0007) -[2026-06-07 02:28:02,099][324535] Updated weights for policy 0, policy_version 35432 (0.0007) -[2026-06-07 02:28:02,285][324535] Updated weights for policy 0, policy_version 35442 (0.0007) -[2026-06-07 02:28:03,054][324535] Updated weights for policy 0, policy_version 35452 (0.0007) -[2026-06-07 02:28:03,254][324535] Updated weights for policy 0, policy_version 35462 (0.0007) -[2026-06-07 02:28:03,450][324535] Updated weights for policy 0, policy_version 35472 (0.0007) -[2026-06-07 02:28:03,667][324535] Updated weights for policy 0, policy_version 35482 (0.0007) -[2026-06-07 02:28:03,870][324535] Updated weights for policy 0, policy_version 35492 (0.0007) -[2026-06-07 02:28:04,081][324535] Updated weights for policy 0, policy_version 35502 (0.0007) -[2026-06-07 02:28:04,277][324535] Updated weights for policy 0, policy_version 35512 (0.0007) -[2026-06-07 02:28:05,023][324535] Updated weights for policy 0, policy_version 35522 (0.0007) -[2026-06-07 02:28:05,243][324535] Updated weights for policy 0, policy_version 35532 (0.0007) -[2026-06-07 02:28:05,432][324535] Updated weights for policy 0, policy_version 35542 (0.0007) -[2026-06-07 02:28:05,655][324535] Updated weights for policy 0, policy_version 35552 (0.0007) -[2026-06-07 02:28:05,867][324535] Updated weights for policy 0, policy_version 35562 (0.0007) -[2026-06-07 02:28:06,010][321787] Fps is (10 sec: 16383.9, 60 sec: 16930.1, 300 sec: 17439.2). Total num frames: 18219008. Throughput: 0: 17573.0. Samples: 18233856. Policy #0 lag: (min: 63.0, avg: 77.9, max: 127.0) -[2026-06-07 02:28:06,011][321787] Avg episode reward: [(0, '1829.346')] -[2026-06-07 02:28:06,100][324535] Updated weights for policy 0, policy_version 35572 (0.0007) -[2026-06-07 02:28:06,823][324535] Updated weights for policy 0, policy_version 35582 (0.0007) -[2026-06-07 02:28:07,031][324535] Updated weights for policy 0, policy_version 35592 (0.0007) -[2026-06-07 02:28:07,259][324535] Updated weights for policy 0, policy_version 35603 (0.0007) -[2026-06-07 02:28:07,487][324535] Updated weights for policy 0, policy_version 35614 (0.0007) -[2026-06-07 02:28:07,709][324535] Updated weights for policy 0, policy_version 35624 (0.0007) -[2026-06-07 02:28:07,909][324535] Updated weights for policy 0, policy_version 35634 (0.0007) -[2026-06-07 02:28:08,666][324535] Updated weights for policy 0, policy_version 35644 (0.0007) -[2026-06-07 02:28:08,857][324535] Updated weights for policy 0, policy_version 35654 (0.0007) -[2026-06-07 02:28:09,062][324535] Updated weights for policy 0, policy_version 35664 (0.0007) -[2026-06-07 02:28:09,271][324535] Updated weights for policy 0, policy_version 35674 (0.0007) -[2026-06-07 02:28:09,481][324535] Updated weights for policy 0, policy_version 35684 (0.0007) -[2026-06-07 02:28:09,701][324535] Updated weights for policy 0, policy_version 35694 (0.0007) -[2026-06-07 02:28:09,917][324535] Updated weights for policy 0, policy_version 35704 (0.0007) -[2026-06-07 02:28:10,662][324535] Updated weights for policy 0, policy_version 35714 (0.0007) -[2026-06-07 02:28:10,855][324535] Updated weights for policy 0, policy_version 35724 (0.0007) -[2026-06-07 02:28:11,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 18317312. Throughput: 0: 17530.3. Samples: 18335488. Policy #0 lag: (min: 63.0, avg: 77.9, max: 127.0) -[2026-06-07 02:28:11,011][321787] Avg episode reward: [(0, '1770.495')] -[2026-06-07 02:28:11,083][324535] Updated weights for policy 0, policy_version 35734 (0.0007) -[2026-06-07 02:28:11,324][324535] Updated weights for policy 0, policy_version 35745 (0.0007) -[2026-06-07 02:28:11,534][324535] Updated weights for policy 0, policy_version 35755 (0.0007) -[2026-06-07 02:28:11,730][324535] Updated weights for policy 0, policy_version 35765 (0.0007) -[2026-06-07 02:28:12,506][324535] Updated weights for policy 0, policy_version 35775 (0.0006) -[2026-06-07 02:28:12,714][324535] Updated weights for policy 0, policy_version 35785 (0.0006) -[2026-06-07 02:28:12,931][324535] Updated weights for policy 0, policy_version 35795 (0.0007) -[2026-06-07 02:28:13,168][324535] Updated weights for policy 0, policy_version 35806 (0.0006) -[2026-06-07 02:28:13,389][324535] Updated weights for policy 0, policy_version 35816 (0.0006) -[2026-06-07 02:28:13,602][324535] Updated weights for policy 0, policy_version 35826 (0.0006) -[2026-06-07 02:28:14,376][324535] Updated weights for policy 0, policy_version 35836 (0.0007) -[2026-06-07 02:28:14,595][324535] Updated weights for policy 0, policy_version 35847 (0.0007) -[2026-06-07 02:28:14,824][324535] Updated weights for policy 0, policy_version 35857 (0.0007) -[2026-06-07 02:28:15,020][324535] Updated weights for policy 0, policy_version 35867 (0.0007) -[2026-06-07 02:28:15,229][324535] Updated weights for policy 0, policy_version 35877 (0.0007) -[2026-06-07 02:28:15,429][324535] Updated weights for policy 0, policy_version 35887 (0.0007) -[2026-06-07 02:28:16,010][321787] Fps is (10 sec: 19660.8, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 18415616. Throughput: 0: 17536.0. Samples: 18437888. Policy #0 lag: (min: 63.0, avg: 77.9, max: 127.0) -[2026-06-07 02:28:16,011][321787] Avg episode reward: [(0, '1734.329')] -[2026-06-07 02:28:16,215][324535] Updated weights for policy 0, policy_version 35897 (0.0007) -[2026-06-07 02:28:16,430][324535] Updated weights for policy 0, policy_version 35907 (0.0007) -[2026-06-07 02:28:16,631][324535] Updated weights for policy 0, policy_version 35917 (0.0007) -[2026-06-07 02:28:16,868][324535] Updated weights for policy 0, policy_version 35929 (0.0007) -[2026-06-07 02:28:17,098][324535] Updated weights for policy 0, policy_version 35939 (0.0007) -[2026-06-07 02:28:17,299][324535] Updated weights for policy 0, policy_version 35949 (0.0007) -[2026-06-07 02:28:17,515][324535] Updated weights for policy 0, policy_version 35959 (0.0007) -[2026-06-07 02:28:18,247][324535] Updated weights for policy 0, policy_version 35969 (0.0007) -[2026-06-07 02:28:18,462][324535] Updated weights for policy 0, policy_version 35979 (0.0007) -[2026-06-07 02:28:18,691][324535] Updated weights for policy 0, policy_version 35989 (0.0007) -[2026-06-07 02:28:18,884][324535] Updated weights for policy 0, policy_version 35999 (0.0007) -[2026-06-07 02:28:19,089][324535] Updated weights for policy 0, policy_version 36009 (0.0007) -[2026-06-07 02:28:19,297][324535] Updated weights for policy 0, policy_version 36019 (0.0007) -[2026-06-07 02:28:20,092][324535] Updated weights for policy 0, policy_version 36029 (0.0007) -[2026-06-07 02:28:20,317][324535] Updated weights for policy 0, policy_version 36040 (0.0007) -[2026-06-07 02:28:20,523][324535] Updated weights for policy 0, policy_version 36050 (0.0007) -[2026-06-07 02:28:20,729][324535] Updated weights for policy 0, policy_version 36060 (0.0007) -[2026-06-07 02:28:20,943][324535] Updated weights for policy 0, policy_version 36070 (0.0007) -[2026-06-07 02:28:21,010][321787] Fps is (10 sec: 16384.0, 60 sec: 16930.1, 300 sec: 17439.2). Total num frames: 18481152. Throughput: 0: 17538.9. Samples: 18496128. Policy #0 lag: (min: 63.0, avg: 77.9, max: 127.0) -[2026-06-07 02:28:21,011][321787] Avg episode reward: [(0, '1740.719')] -[2026-06-07 02:28:21,145][324535] Updated weights for policy 0, policy_version 36080 (0.0008) -[2026-06-07 02:28:21,901][324535] Updated weights for policy 0, policy_version 36090 (0.0008) -[2026-06-07 02:28:22,111][324535] Updated weights for policy 0, policy_version 36100 (0.0007) -[2026-06-07 02:28:22,354][324535] Updated weights for policy 0, policy_version 36111 (0.0007) -[2026-06-07 02:28:22,572][324535] Updated weights for policy 0, policy_version 36121 (0.0007) -[2026-06-07 02:28:22,781][324535] Updated weights for policy 0, policy_version 36131 (0.0007) -[2026-06-07 02:28:22,992][324535] Updated weights for policy 0, policy_version 36141 (0.0007) -[2026-06-07 02:28:23,211][324535] Updated weights for policy 0, policy_version 36151 (0.0007) -[2026-06-07 02:28:23,951][324535] Updated weights for policy 0, policy_version 36162 (0.0007) -[2026-06-07 02:28:24,156][324535] Updated weights for policy 0, policy_version 36172 (0.0006) -[2026-06-07 02:28:24,350][324535] Updated weights for policy 0, policy_version 36182 (0.0007) -[2026-06-07 02:28:24,575][324535] Updated weights for policy 0, policy_version 36192 (0.0007) -[2026-06-07 02:28:24,778][324535] Updated weights for policy 0, policy_version 36202 (0.0007) -[2026-06-07 02:28:24,992][324535] Updated weights for policy 0, policy_version 36212 (0.0007) -[2026-06-07 02:28:25,788][324535] Updated weights for policy 0, policy_version 36222 (0.0007) -[2026-06-07 02:28:26,003][324535] Updated weights for policy 0, policy_version 36232 (0.0007) -[2026-06-07 02:28:26,010][321787] Fps is (10 sec: 16384.1, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 18579456. Throughput: 0: 17570.1. Samples: 18598528. Policy #0 lag: (min: 63.0, avg: 77.9, max: 127.0) -[2026-06-07 02:28:26,011][321787] Avg episode reward: [(0, '1667.080')] -[2026-06-07 02:28:26,214][324535] Updated weights for policy 0, policy_version 36242 (0.0007) -[2026-06-07 02:28:26,453][324535] Updated weights for policy 0, policy_version 36253 (0.0007) -[2026-06-07 02:28:26,664][324535] Updated weights for policy 0, policy_version 36263 (0.0007) -[2026-06-07 02:28:26,899][324535] Updated weights for policy 0, policy_version 36273 (0.0007) -[2026-06-07 02:28:27,649][324535] Updated weights for policy 0, policy_version 36283 (0.0007) -[2026-06-07 02:28:27,869][324535] Updated weights for policy 0, policy_version 36294 (0.0007) -[2026-06-07 02:28:28,074][324535] Updated weights for policy 0, policy_version 36304 (0.0007) -[2026-06-07 02:28:28,285][324535] Updated weights for policy 0, policy_version 36314 (0.0007) -[2026-06-07 02:28:28,491][324535] Updated weights for policy 0, policy_version 36324 (0.0007) -[2026-06-07 02:28:28,701][324535] Updated weights for policy 0, policy_version 36334 (0.0007) -[2026-06-07 02:28:28,913][324535] Updated weights for policy 0, policy_version 36344 (0.0007) -[2026-06-07 02:28:29,671][324535] Updated weights for policy 0, policy_version 36354 (0.0011) -[2026-06-07 02:28:29,878][324535] Updated weights for policy 0, policy_version 36364 (0.0008) -[2026-06-07 02:28:30,100][324535] Updated weights for policy 0, policy_version 36374 (0.0007) -[2026-06-07 02:28:30,325][324535] Updated weights for policy 0, policy_version 36384 (0.0007) -[2026-06-07 02:28:30,542][324535] Updated weights for policy 0, policy_version 36394 (0.0007) -[2026-06-07 02:28:30,756][324535] Updated weights for policy 0, policy_version 36404 (0.0007) -[2026-06-07 02:28:31,010][321787] Fps is (10 sec: 19660.6, 60 sec: 17476.2, 300 sec: 17439.2). Total num frames: 18677760. Throughput: 0: 17533.1. Samples: 18701440. Policy #0 lag: (min: 63.0, avg: 79.5, max: 127.0) -[2026-06-07 02:28:31,011][321787] Avg episode reward: [(0, '1625.021')] -[2026-06-07 02:28:31,517][324535] Updated weights for policy 0, policy_version 36414 (0.0007) -[2026-06-07 02:28:31,709][324535] Updated weights for policy 0, policy_version 36424 (0.0007) -[2026-06-07 02:28:31,939][324535] Updated weights for policy 0, policy_version 36435 (0.0008) -[2026-06-07 02:28:32,145][324535] Updated weights for policy 0, policy_version 36445 (0.0008) -[2026-06-07 02:28:32,369][324535] Updated weights for policy 0, policy_version 36455 (0.0007) -[2026-06-07 02:28:32,592][324535] Updated weights for policy 0, policy_version 36466 (0.0007) -[2026-06-07 02:28:33,320][324535] Updated weights for policy 0, policy_version 36476 (0.0007) -[2026-06-07 02:28:33,539][324535] Updated weights for policy 0, policy_version 36486 (0.0007) -[2026-06-07 02:28:33,743][324535] Updated weights for policy 0, policy_version 36496 (0.0007) -[2026-06-07 02:28:33,971][324535] Updated weights for policy 0, policy_version 36506 (0.0007) -[2026-06-07 02:28:34,181][324535] Updated weights for policy 0, policy_version 36516 (0.0007) -[2026-06-07 02:28:34,395][324535] Updated weights for policy 0, policy_version 36526 (0.0007) -[2026-06-07 02:28:34,606][324535] Updated weights for policy 0, policy_version 36536 (0.0007) -[2026-06-07 02:28:35,360][324535] Updated weights for policy 0, policy_version 36546 (0.0007) -[2026-06-07 02:28:35,586][324535] Updated weights for policy 0, policy_version 36556 (0.0007) -[2026-06-07 02:28:35,798][324535] Updated weights for policy 0, policy_version 36567 (0.0011) -[2026-06-07 02:28:36,010][321787] Fps is (10 sec: 16383.8, 60 sec: 16930.1, 300 sec: 17439.2). Total num frames: 18743296. Throughput: 0: 17499.0. Samples: 18758144. Policy #0 lag: (min: 63.0, avg: 79.5, max: 127.0) -[2026-06-07 02:28:36,011][321787] Avg episode reward: [(0, '1686.528')] -[2026-06-07 02:28:36,013][324535] Updated weights for policy 0, policy_version 36577 (0.0010) -[2026-06-07 02:28:36,239][324535] Updated weights for policy 0, policy_version 36588 (0.0007) -[2026-06-07 02:28:36,437][324535] Updated weights for policy 0, policy_version 36598 (0.0007) -[2026-06-07 02:28:37,188][324535] Updated weights for policy 0, policy_version 36608 (0.0007) -[2026-06-07 02:28:37,426][324535] Updated weights for policy 0, policy_version 36619 (0.0007) -[2026-06-07 02:28:37,636][324535] Updated weights for policy 0, policy_version 36629 (0.0007) -[2026-06-07 02:28:37,857][324535] Updated weights for policy 0, policy_version 36639 (0.0007) -[2026-06-07 02:28:38,066][324535] Updated weights for policy 0, policy_version 36649 (0.0007) -[2026-06-07 02:28:38,260][324535] Updated weights for policy 0, policy_version 36659 (0.0007) -[2026-06-07 02:28:39,023][324535] Updated weights for policy 0, policy_version 36669 (0.0007) -[2026-06-07 02:28:39,230][324535] Updated weights for policy 0, policy_version 36679 (0.0007) -[2026-06-07 02:28:39,454][324535] Updated weights for policy 0, policy_version 36689 (0.0007) -[2026-06-07 02:28:39,664][324535] Updated weights for policy 0, policy_version 36700 (0.0007) -[2026-06-07 02:28:39,863][324535] Updated weights for policy 0, policy_version 36710 (0.0007) -[2026-06-07 02:28:40,085][324535] Updated weights for policy 0, policy_version 36720 (0.0007) -[2026-06-07 02:28:40,870][324535] Updated weights for policy 0, policy_version 36730 (0.0007) -[2026-06-07 02:28:41,010][321787] Fps is (10 sec: 16384.1, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 18841600. Throughput: 0: 17496.2. Samples: 18860800. Policy #0 lag: (min: 63.0, avg: 79.5, max: 127.0) -[2026-06-07 02:28:41,011][321787] Avg episode reward: [(0, '1698.867')] -[2026-06-07 02:28:41,083][324535] Updated weights for policy 0, policy_version 36740 (0.0007) -[2026-06-07 02:28:41,293][324535] Updated weights for policy 0, policy_version 36750 (0.0007) -[2026-06-07 02:28:41,503][324535] Updated weights for policy 0, policy_version 36761 (0.0007) -[2026-06-07 02:28:41,709][324535] Updated weights for policy 0, policy_version 36771 (0.0007) -[2026-06-07 02:28:41,914][324535] Updated weights for policy 0, policy_version 36781 (0.0007) -[2026-06-07 02:28:42,125][324535] Updated weights for policy 0, policy_version 36791 (0.0007) -[2026-06-07 02:28:42,865][324535] Updated weights for policy 0, policy_version 36801 (0.0007) -[2026-06-07 02:28:43,097][324535] Updated weights for policy 0, policy_version 36812 (0.0007) -[2026-06-07 02:28:43,306][324535] Updated weights for policy 0, policy_version 36822 (0.0007) -[2026-06-07 02:28:43,502][324535] Updated weights for policy 0, policy_version 36832 (0.0007) -[2026-06-07 02:28:43,708][324535] Updated weights for policy 0, policy_version 36842 (0.0007) -[2026-06-07 02:28:43,926][324535] Updated weights for policy 0, policy_version 36852 (0.0007) -[2026-06-07 02:28:44,693][324535] Updated weights for policy 0, policy_version 36862 (0.0007) -[2026-06-07 02:28:44,902][324535] Updated weights for policy 0, policy_version 36872 (0.0007) -[2026-06-07 02:28:45,143][324535] Updated weights for policy 0, policy_version 36883 (0.0007) -[2026-06-07 02:28:45,344][324535] Updated weights for policy 0, policy_version 36893 (0.0007) -[2026-06-07 02:28:45,563][324535] Updated weights for policy 0, policy_version 36903 (0.0007) -[2026-06-07 02:28:45,766][324535] Updated weights for policy 0, policy_version 36913 (0.0007) -[2026-06-07 02:28:46,010][321787] Fps is (10 sec: 19661.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 18939904. Throughput: 0: 17501.9. Samples: 18963840. Policy #0 lag: (min: 63.0, avg: 79.5, max: 127.0) -[2026-06-07 02:28:46,011][321787] Avg episode reward: [(0, '1702.296')] -[2026-06-07 02:28:46,498][324535] Updated weights for policy 0, policy_version 36923 (0.0007) -[2026-06-07 02:28:46,701][324535] Updated weights for policy 0, policy_version 36933 (0.0007) -[2026-06-07 02:28:46,909][324535] Updated weights for policy 0, policy_version 36943 (0.0007) -[2026-06-07 02:28:47,102][324535] Updated weights for policy 0, policy_version 36953 (0.0007) -[2026-06-07 02:28:47,291][324535] Updated weights for policy 0, policy_version 36963 (0.0007) -[2026-06-07 02:28:47,510][324535] Updated weights for policy 0, policy_version 36973 (0.0007) -[2026-06-07 02:28:47,696][324535] Updated weights for policy 0, policy_version 36983 (0.0007) -[2026-06-07 02:28:48,447][324535] Updated weights for policy 0, policy_version 36993 (0.0007) -[2026-06-07 02:28:48,670][324535] Updated weights for policy 0, policy_version 37003 (0.0007) -[2026-06-07 02:28:48,907][324535] Updated weights for policy 0, policy_version 37014 (0.0007) -[2026-06-07 02:28:49,096][324535] Updated weights for policy 0, policy_version 37024 (0.0007) -[2026-06-07 02:28:49,308][324535] Updated weights for policy 0, policy_version 37034 (0.0007) -[2026-06-07 02:28:49,517][324535] Updated weights for policy 0, policy_version 37044 (0.0007) -[2026-06-07 02:28:50,212][324535] Updated weights for policy 0, policy_version 37054 (0.0007) -[2026-06-07 02:28:50,432][324535] Updated weights for policy 0, policy_version 37064 (0.0007) -[2026-06-07 02:28:50,629][324535] Updated weights for policy 0, policy_version 37074 (0.0007) -[2026-06-07 02:28:50,827][324535] Updated weights for policy 0, policy_version 37084 (0.0007) -[2026-06-07 02:28:51,010][321787] Fps is (10 sec: 16383.8, 60 sec: 17476.2, 300 sec: 17439.2). Total num frames: 19005440. Throughput: 0: 17499.0. Samples: 19021312. Policy #0 lag: (min: 63.0, avg: 79.5, max: 127.0) -[2026-06-07 02:28:51,011][321787] Avg episode reward: [(0, '1702.933')] -[2026-06-07 02:28:51,040][324535] Updated weights for policy 0, policy_version 37094 (0.0007) -[2026-06-07 02:28:51,270][324535] Updated weights for policy 0, policy_version 37105 (0.0007) -[2026-06-07 02:28:52,005][324535] Updated weights for policy 0, policy_version 37115 (0.0007) -[2026-06-07 02:28:52,218][324535] Updated weights for policy 0, policy_version 37125 (0.0007) -[2026-06-07 02:28:52,430][324535] Updated weights for policy 0, policy_version 37135 (0.0007) -[2026-06-07 02:28:52,647][324535] Updated weights for policy 0, policy_version 37145 (0.0007) -[2026-06-07 02:28:52,851][324535] Updated weights for policy 0, policy_version 37155 (0.0007) -[2026-06-07 02:28:53,039][324535] Updated weights for policy 0, policy_version 37165 (0.0007) -[2026-06-07 02:28:53,255][324535] Updated weights for policy 0, policy_version 37175 (0.0007) -[2026-06-07 02:28:53,980][324535] Updated weights for policy 0, policy_version 37185 (0.0007) -[2026-06-07 02:28:54,185][324535] Updated weights for policy 0, policy_version 37195 (0.0007) -[2026-06-07 02:28:54,394][324535] Updated weights for policy 0, policy_version 37205 (0.0007) -[2026-06-07 02:28:54,618][324535] Updated weights for policy 0, policy_version 37216 (0.0007) -[2026-06-07 02:28:54,861][324535] Updated weights for policy 0, policy_version 37227 (0.0007) -[2026-06-07 02:28:55,087][324535] Updated weights for policy 0, policy_version 37238 (0.0007) -[2026-06-07 02:28:55,835][324535] Updated weights for policy 0, policy_version 37248 (0.0007) -[2026-06-07 02:28:56,010][321787] Fps is (10 sec: 16383.9, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 19103744. Throughput: 0: 17516.1. Samples: 19123712. Policy #0 lag: (min: 63.0, avg: 79.3, max: 127.0) -[2026-06-07 02:28:56,011][321787] Avg episode reward: [(0, '1744.487')] -[2026-06-07 02:28:56,047][324535] Updated weights for policy 0, policy_version 37258 (0.0007) -[2026-06-07 02:28:56,249][324535] Updated weights for policy 0, policy_version 37268 (0.0007) -[2026-06-07 02:28:56,454][324535] Updated weights for policy 0, policy_version 37278 (0.0007) -[2026-06-07 02:28:56,678][324535] Updated weights for policy 0, policy_version 37288 (0.0007) -[2026-06-07 02:28:56,897][324535] Updated weights for policy 0, policy_version 37298 (0.0007) -[2026-06-07 02:28:57,634][324535] Updated weights for policy 0, policy_version 37308 (0.0007) -[2026-06-07 02:28:57,830][324535] Updated weights for policy 0, policy_version 37318 (0.0007) -[2026-06-07 02:28:58,068][324535] Updated weights for policy 0, policy_version 37329 (0.0007) -[2026-06-07 02:28:58,289][324535] Updated weights for policy 0, policy_version 37339 (0.0007) -[2026-06-07 02:28:58,502][324535] Updated weights for policy 0, policy_version 37349 (0.0007) -[2026-06-07 02:28:58,706][324535] Updated weights for policy 0, policy_version 37359 (0.0007) -[2026-06-07 02:28:59,472][324535] Updated weights for policy 0, policy_version 37369 (0.0007) -[2026-06-07 02:28:59,683][324535] Updated weights for policy 0, policy_version 37379 (0.0007) -[2026-06-07 02:28:59,881][324535] Updated weights for policy 0, policy_version 37389 (0.0007) -[2026-06-07 02:29:00,087][324535] Updated weights for policy 0, policy_version 37399 (0.0007) -[2026-06-07 02:29:00,278][324535] Updated weights for policy 0, policy_version 37409 (0.0007) -[2026-06-07 02:29:00,494][324535] Updated weights for policy 0, policy_version 37419 (0.0007) -[2026-06-07 02:29:00,694][324535] Updated weights for policy 0, policy_version 37429 (0.0007) -[2026-06-07 02:29:01,010][321787] Fps is (10 sec: 19661.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 19202048. Throughput: 0: 17482.0. Samples: 19224576. Policy #0 lag: (min: 63.0, avg: 79.3, max: 127.0) -[2026-06-07 02:29:01,011][321787] Avg episode reward: [(0, '1800.062')] -[2026-06-07 02:29:01,464][324535] Updated weights for policy 0, policy_version 37439 (0.0007) -[2026-06-07 02:29:01,672][324535] Updated weights for policy 0, policy_version 37449 (0.0007) -[2026-06-07 02:29:01,911][324535] Updated weights for policy 0, policy_version 37460 (0.0007) -[2026-06-07 02:29:02,126][324535] Updated weights for policy 0, policy_version 37470 (0.0007) -[2026-06-07 02:29:02,344][324535] Updated weights for policy 0, policy_version 37480 (0.0007) -[2026-06-07 02:29:02,573][324535] Updated weights for policy 0, policy_version 37490 (0.0007) -[2026-06-07 02:29:03,295][324535] Updated weights for policy 0, policy_version 37500 (0.0007) -[2026-06-07 02:29:03,497][324535] Updated weights for policy 0, policy_version 37510 (0.0007) -[2026-06-07 02:29:03,693][324535] Updated weights for policy 0, policy_version 37520 (0.0007) -[2026-06-07 02:29:03,907][324535] Updated weights for policy 0, policy_version 37530 (0.0007) -[2026-06-07 02:29:04,122][324535] Updated weights for policy 0, policy_version 37540 (0.0007) -[2026-06-07 02:29:04,314][324535] Updated weights for policy 0, policy_version 37550 (0.0007) -[2026-06-07 02:29:04,510][324535] Updated weights for policy 0, policy_version 37560 (0.0007) -[2026-06-07 02:29:05,269][324535] Updated weights for policy 0, policy_version 37570 (0.0007) -[2026-06-07 02:29:05,469][324535] Updated weights for policy 0, policy_version 37580 (0.0007) -[2026-06-07 02:29:05,683][324535] Updated weights for policy 0, policy_version 37590 (0.0007) -[2026-06-07 02:29:05,889][324535] Updated weights for policy 0, policy_version 37600 (0.0007) -[2026-06-07 02:29:06,010][321787] Fps is (10 sec: 16384.1, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 19267584. Throughput: 0: 17479.1. Samples: 19282688. Policy #0 lag: (min: 63.0, avg: 79.3, max: 127.0) -[2026-06-07 02:29:06,011][321787] Avg episode reward: [(0, '1805.767')] -[2026-06-07 02:29:06,089][324535] Updated weights for policy 0, policy_version 37610 (0.0007) -[2026-06-07 02:29:06,295][324535] Updated weights for policy 0, policy_version 37620 (0.0007) -[2026-06-07 02:29:07,044][324535] Updated weights for policy 0, policy_version 37630 (0.0007) -[2026-06-07 02:29:07,245][324535] Updated weights for policy 0, policy_version 37640 (0.0007) -[2026-06-07 02:29:07,461][324535] Updated weights for policy 0, policy_version 37650 (0.0007) -[2026-06-07 02:29:07,675][324535] Updated weights for policy 0, policy_version 37660 (0.0007) -[2026-06-07 02:29:07,883][324535] Updated weights for policy 0, policy_version 37670 (0.0007) -[2026-06-07 02:29:08,115][324535] Updated weights for policy 0, policy_version 37681 (0.0007) -[2026-06-07 02:29:08,876][324535] Updated weights for policy 0, policy_version 37691 (0.0007) -[2026-06-07 02:29:09,100][324535] Updated weights for policy 0, policy_version 37702 (0.0007) -[2026-06-07 02:29:09,303][324535] Updated weights for policy 0, policy_version 37712 (0.0007) -[2026-06-07 02:29:09,515][324535] Updated weights for policy 0, policy_version 37722 (0.0007) -[2026-06-07 02:29:09,735][324535] Updated weights for policy 0, policy_version 37733 (0.0007) -[2026-06-07 02:29:09,988][324535] Updated weights for policy 0, policy_version 37745 (0.0007) -[2026-06-07 02:29:10,762][324535] Updated weights for policy 0, policy_version 37755 (0.0007) -[2026-06-07 02:29:10,963][324535] Updated weights for policy 0, policy_version 37765 (0.0007) -[2026-06-07 02:29:11,010][321787] Fps is (10 sec: 16384.1, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 19365888. Throughput: 0: 17470.6. Samples: 19384704. Policy #0 lag: (min: 63.0, avg: 79.3, max: 127.0) -[2026-06-07 02:29:11,011][321787] Avg episode reward: [(0, '1792.230')] -[2026-06-07 02:29:11,172][324535] Updated weights for policy 0, policy_version 37775 (0.0007) -[2026-06-07 02:29:11,386][324535] Updated weights for policy 0, policy_version 37785 (0.0007) -[2026-06-07 02:29:11,593][324535] Updated weights for policy 0, policy_version 37795 (0.0007) -[2026-06-07 02:29:11,826][324535] Updated weights for policy 0, policy_version 37805 (0.0007) -[2026-06-07 02:29:12,022][324535] Updated weights for policy 0, policy_version 37815 (0.0007) -[2026-06-07 02:29:12,783][324535] Updated weights for policy 0, policy_version 37825 (0.0007) -[2026-06-07 02:29:12,977][324535] Updated weights for policy 0, policy_version 37835 (0.0007) -[2026-06-07 02:29:13,192][324535] Updated weights for policy 0, policy_version 37845 (0.0007) -[2026-06-07 02:29:13,407][324535] Updated weights for policy 0, policy_version 37855 (0.0007) -[2026-06-07 02:29:13,598][324535] Updated weights for policy 0, policy_version 37865 (0.0007) -[2026-06-07 02:29:13,808][324535] Updated weights for policy 0, policy_version 37875 (0.0007) -[2026-06-07 02:29:14,572][324535] Updated weights for policy 0, policy_version 37885 (0.0007) -[2026-06-07 02:29:14,796][324535] Updated weights for policy 0, policy_version 37895 (0.0007) -[2026-06-07 02:29:15,000][324535] Updated weights for policy 0, policy_version 37905 (0.0007) -[2026-06-07 02:29:15,201][324535] Updated weights for policy 0, policy_version 37915 (0.0007) -[2026-06-07 02:29:15,413][324535] Updated weights for policy 0, policy_version 37925 (0.0007) -[2026-06-07 02:29:15,616][324535] Updated weights for policy 0, policy_version 37935 (0.0007) -[2026-06-07 02:29:16,010][321787] Fps is (10 sec: 19660.8, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 19464192. Throughput: 0: 17450.7. Samples: 19486720. Policy #0 lag: (min: 63.0, avg: 79.3, max: 127.0) -[2026-06-07 02:29:16,011][321787] Avg episode reward: [(0, '1817.824')] -[2026-06-07 02:29:16,384][324535] Updated weights for policy 0, policy_version 37945 (0.0007) -[2026-06-07 02:29:16,585][324535] Updated weights for policy 0, policy_version 37955 (0.0007) -[2026-06-07 02:29:16,812][324535] Updated weights for policy 0, policy_version 37966 (0.0007) -[2026-06-07 02:29:17,076][324535] Updated weights for policy 0, policy_version 37978 (0.0007) -[2026-06-07 02:29:17,267][324535] Updated weights for policy 0, policy_version 37988 (0.0007) -[2026-06-07 02:29:17,473][324535] Updated weights for policy 0, policy_version 37998 (0.0007) -[2026-06-07 02:29:17,681][324535] Updated weights for policy 0, policy_version 38008 (0.0007) -[2026-06-07 02:29:18,435][324535] Updated weights for policy 0, policy_version 38018 (0.0007) -[2026-06-07 02:29:18,633][324535] Updated weights for policy 0, policy_version 38028 (0.0007) -[2026-06-07 02:29:18,829][324535] Updated weights for policy 0, policy_version 38038 (0.0007) -[2026-06-07 02:29:19,038][324535] Updated weights for policy 0, policy_version 38048 (0.0007) -[2026-06-07 02:29:19,236][324535] Updated weights for policy 0, policy_version 38058 (0.0007) -[2026-06-07 02:29:19,441][324535] Updated weights for policy 0, policy_version 38068 (0.0007) -[2026-06-07 02:29:20,215][324535] Updated weights for policy 0, policy_version 38078 (0.0007) -[2026-06-07 02:29:20,401][324535] Updated weights for policy 0, policy_version 38088 (0.0007) -[2026-06-07 02:29:20,619][324535] Updated weights for policy 0, policy_version 38098 (0.0007) -[2026-06-07 02:29:20,868][324535] Updated weights for policy 0, policy_version 38109 (0.0007) -[2026-06-07 02:29:21,010][321787] Fps is (10 sec: 16383.8, 60 sec: 17476.2, 300 sec: 17439.2). Total num frames: 19529728. Throughput: 0: 17459.2. Samples: 19543808. Policy #0 lag: (min: 63.0, avg: 79.3, max: 127.0) -[2026-06-07 02:29:21,011][321787] Avg episode reward: [(0, '1793.962')] -[2026-06-07 02:29:21,066][324535] Updated weights for policy 0, policy_version 38119 (0.0007) -[2026-06-07 02:29:21,274][324535] Updated weights for policy 0, policy_version 38129 (0.0007) -[2026-06-07 02:29:22,039][324535] Updated weights for policy 0, policy_version 38139 (0.0007) -[2026-06-07 02:29:22,244][324535] Updated weights for policy 0, policy_version 38149 (0.0007) -[2026-06-07 02:29:22,436][324535] Updated weights for policy 0, policy_version 38159 (0.0007) -[2026-06-07 02:29:22,646][324535] Updated weights for policy 0, policy_version 38169 (0.0007) -[2026-06-07 02:29:22,858][324535] Updated weights for policy 0, policy_version 38179 (0.0007) -[2026-06-07 02:29:23,083][324535] Updated weights for policy 0, policy_version 38190 (0.0007) -[2026-06-07 02:29:23,266][324535] Updated weights for policy 0, policy_version 38200 (0.0009) -[2026-06-07 02:29:23,997][324535] Updated weights for policy 0, policy_version 38210 (0.0011) -[2026-06-07 02:29:24,208][324535] Updated weights for policy 0, policy_version 38220 (0.0011) -[2026-06-07 02:29:24,424][324535] Updated weights for policy 0, policy_version 38230 (0.0007) -[2026-06-07 02:29:24,609][324535] Updated weights for policy 0, policy_version 38240 (0.0007) -[2026-06-07 02:29:24,802][324535] Updated weights for policy 0, policy_version 38250 (0.0007) -[2026-06-07 02:29:25,013][324535] Updated weights for policy 0, policy_version 38260 (0.0007) -[2026-06-07 02:29:25,768][324535] Updated weights for policy 0, policy_version 38270 (0.0007) -[2026-06-07 02:29:25,980][324535] Updated weights for policy 0, policy_version 38280 (0.0007) -[2026-06-07 02:29:26,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 19628032. Throughput: 0: 17464.9. Samples: 19646720. Policy #0 lag: (min: 63.0, avg: 77.4, max: 127.0) -[2026-06-07 02:29:26,011][321787] Avg episode reward: [(0, '1778.823')] -[2026-06-07 02:29:26,213][324535] Updated weights for policy 0, policy_version 38291 (0.0008) -[2026-06-07 02:29:26,401][324535] Updated weights for policy 0, policy_version 38301 (0.0007) -[2026-06-07 02:29:26,607][324535] Updated weights for policy 0, policy_version 38311 (0.0007) -[2026-06-07 02:29:26,811][324535] Updated weights for policy 0, policy_version 38321 (0.0007) -[2026-06-07 02:29:27,543][324535] Updated weights for policy 0, policy_version 38331 (0.0008) -[2026-06-07 02:29:27,749][324535] Updated weights for policy 0, policy_version 38341 (0.0008) -[2026-06-07 02:29:27,982][324535] Updated weights for policy 0, policy_version 38352 (0.0009) -[2026-06-07 02:29:28,182][324535] Updated weights for policy 0, policy_version 38362 (0.0007) -[2026-06-07 02:29:28,386][324535] Updated weights for policy 0, policy_version 38372 (0.0007) -[2026-06-07 02:29:28,585][324535] Updated weights for policy 0, policy_version 38382 (0.0007) -[2026-06-07 02:29:28,782][324535] Updated weights for policy 0, policy_version 38392 (0.0007) -[2026-06-07 02:29:29,531][324535] Updated weights for policy 0, policy_version 38402 (0.0007) -[2026-06-07 02:29:29,723][324535] Updated weights for policy 0, policy_version 38412 (0.0007) -[2026-06-07 02:29:29,926][324535] Updated weights for policy 0, policy_version 38422 (0.0007) -[2026-06-07 02:29:30,130][324535] Updated weights for policy 0, policy_version 38432 (0.0007) -[2026-06-07 02:29:30,339][324535] Updated weights for policy 0, policy_version 38442 (0.0007) -[2026-06-07 02:29:30,534][324535] Updated weights for policy 0, policy_version 38452 (0.0007) -[2026-06-07 02:29:31,010][321787] Fps is (10 sec: 19661.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 19726336. Throughput: 0: 17425.1. Samples: 19747968. Policy #0 lag: (min: 63.0, avg: 77.4, max: 127.0) -[2026-06-07 02:29:31,011][321787] Avg episode reward: [(0, '1770.884')] -[2026-06-07 02:29:31,299][324535] Updated weights for policy 0, policy_version 38462 (0.0007) -[2026-06-07 02:29:31,501][324535] Updated weights for policy 0, policy_version 38472 (0.0007) -[2026-06-07 02:29:31,707][324535] Updated weights for policy 0, policy_version 38482 (0.0007) -[2026-06-07 02:29:31,908][324535] Updated weights for policy 0, policy_version 38492 (0.0007) -[2026-06-07 02:29:32,111][324535] Updated weights for policy 0, policy_version 38502 (0.0007) -[2026-06-07 02:29:32,310][324535] Updated weights for policy 0, policy_version 38512 (0.0007) -[2026-06-07 02:29:33,069][324535] Updated weights for policy 0, policy_version 38522 (0.0007) -[2026-06-07 02:29:33,282][324535] Updated weights for policy 0, policy_version 38532 (0.0007) -[2026-06-07 02:29:33,491][324535] Updated weights for policy 0, policy_version 38542 (0.0007) -[2026-06-07 02:29:33,696][324535] Updated weights for policy 0, policy_version 38552 (0.0007) -[2026-06-07 02:29:33,885][324535] Updated weights for policy 0, policy_version 38562 (0.0007) -[2026-06-07 02:29:34,089][324535] Updated weights for policy 0, policy_version 38572 (0.0006) -[2026-06-07 02:29:34,277][324535] Updated weights for policy 0, policy_version 38582 (0.0007) -[2026-06-07 02:29:35,033][324535] Updated weights for policy 0, policy_version 38592 (0.0007) -[2026-06-07 02:29:35,251][324535] Updated weights for policy 0, policy_version 38602 (0.0007) -[2026-06-07 02:29:35,463][324535] Updated weights for policy 0, policy_version 38612 (0.0007) -[2026-06-07 02:29:35,672][324535] Updated weights for policy 0, policy_version 38622 (0.0007) -[2026-06-07 02:29:35,877][324535] Updated weights for policy 0, policy_version 38632 (0.0007) -[2026-06-07 02:29:36,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 19791872. Throughput: 0: 17433.7. Samples: 19805824. Policy #0 lag: (min: 63.0, avg: 77.4, max: 127.0) -[2026-06-07 02:29:36,011][321787] Avg episode reward: [(0, '1744.018')] -[2026-06-07 02:29:36,075][324535] Updated weights for policy 0, policy_version 38642 (0.0007) -[2026-06-07 02:29:36,794][324535] Updated weights for policy 0, policy_version 38652 (0.0007) -[2026-06-07 02:29:36,994][324535] Updated weights for policy 0, policy_version 38662 (0.0007) -[2026-06-07 02:29:37,199][324535] Updated weights for policy 0, policy_version 38672 (0.0007) -[2026-06-07 02:29:37,412][324535] Updated weights for policy 0, policy_version 38682 (0.0007) -[2026-06-07 02:29:37,624][324535] Updated weights for policy 0, policy_version 38692 (0.0007) -[2026-06-07 02:29:37,841][324535] Updated weights for policy 0, policy_version 38703 (0.0007) -[2026-06-07 02:29:38,598][324535] Updated weights for policy 0, policy_version 38713 (0.0007) -[2026-06-07 02:29:38,815][324535] Updated weights for policy 0, policy_version 38723 (0.0007) -[2026-06-07 02:29:39,023][324535] Updated weights for policy 0, policy_version 38733 (0.0007) -[2026-06-07 02:29:39,237][324535] Updated weights for policy 0, policy_version 38743 (0.0007) -[2026-06-07 02:29:39,455][324535] Updated weights for policy 0, policy_version 38753 (0.0007) -[2026-06-07 02:29:39,666][324535] Updated weights for policy 0, policy_version 38763 (0.0011) -[2026-06-07 02:29:39,898][324535] Updated weights for policy 0, policy_version 38773 (0.0019) -[2026-06-07 02:29:40,662][324535] Updated weights for policy 0, policy_version 38783 (0.0018) -[2026-06-07 02:29:40,877][324535] Updated weights for policy 0, policy_version 38793 (0.0020) -[2026-06-07 02:29:41,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 19890176. Throughput: 0: 17445.0. Samples: 19908736. Policy #0 lag: (min: 63.0, avg: 77.4, max: 127.0) -[2026-06-07 02:29:41,011][321787] Avg episode reward: [(0, '1752.906')] -[2026-06-07 02:29:41,081][324535] Updated weights for policy 0, policy_version 38803 (0.0010) -[2026-06-07 02:29:41,276][324535] Updated weights for policy 0, policy_version 38813 (0.0007) -[2026-06-07 02:29:41,492][324535] Updated weights for policy 0, policy_version 38823 (0.0007) -[2026-06-07 02:29:41,697][324535] Updated weights for policy 0, policy_version 38834 (0.0009) -[2026-06-07 02:29:42,428][324535] Updated weights for policy 0, policy_version 38844 (0.0008) -[2026-06-07 02:29:42,632][324535] Updated weights for policy 0, policy_version 38854 (0.0007) -[2026-06-07 02:29:42,844][324535] Updated weights for policy 0, policy_version 38864 (0.0007) -[2026-06-07 02:29:43,046][324535] Updated weights for policy 0, policy_version 38874 (0.0007) -[2026-06-07 02:29:43,243][324535] Updated weights for policy 0, policy_version 38884 (0.0007) -[2026-06-07 02:29:43,452][324535] Updated weights for policy 0, policy_version 38894 (0.0007) -[2026-06-07 02:29:43,663][324535] Updated weights for policy 0, policy_version 38904 (0.0007) -[2026-06-07 02:29:44,422][324535] Updated weights for policy 0, policy_version 38914 (0.0007) -[2026-06-07 02:29:44,612][324535] Updated weights for policy 0, policy_version 38924 (0.0007) -[2026-06-07 02:29:44,821][324535] Updated weights for policy 0, policy_version 38934 (0.0007) -[2026-06-07 02:29:45,034][324535] Updated weights for policy 0, policy_version 38944 (0.0007) -[2026-06-07 02:29:45,238][324535] Updated weights for policy 0, policy_version 38954 (0.0007) -[2026-06-07 02:29:45,431][324535] Updated weights for policy 0, policy_version 38964 (0.0007) -[2026-06-07 02:29:46,010][321787] Fps is (10 sec: 19660.8, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 19988480. Throughput: 0: 17453.5. Samples: 20009984. Policy #0 lag: (min: 63.0, avg: 77.4, max: 127.0) -[2026-06-07 02:29:46,011][321787] Avg episode reward: [(0, '1774.690')] -[2026-06-07 02:29:46,229][324535] Updated weights for policy 0, policy_version 38975 (0.0007) -[2026-06-07 02:29:46,433][324535] Updated weights for policy 0, policy_version 38985 (0.0007) -[2026-06-07 02:29:46,643][324535] Updated weights for policy 0, policy_version 38995 (0.0007) -[2026-06-07 02:29:46,871][324535] Updated weights for policy 0, policy_version 39006 (0.0007) -[2026-06-07 02:29:47,100][324535] Updated weights for policy 0, policy_version 39016 (0.0007) -[2026-06-07 02:29:47,313][324535] Updated weights for policy 0, policy_version 39026 (0.0007) -[2026-06-07 02:29:48,104][324535] Updated weights for policy 0, policy_version 39036 (0.0007) -[2026-06-07 02:29:48,307][324535] Updated weights for policy 0, policy_version 39046 (0.0007) -[2026-06-07 02:29:48,526][324535] Updated weights for policy 0, policy_version 39056 (0.0007) -[2026-06-07 02:29:48,740][324535] Updated weights for policy 0, policy_version 39066 (0.0007) -[2026-06-07 02:29:48,944][324535] Updated weights for policy 0, policy_version 39076 (0.0009) -[2026-06-07 02:29:49,143][324535] Updated weights for policy 0, policy_version 39086 (0.0014) -[2026-06-07 02:29:49,343][324535] Updated weights for policy 0, policy_version 39096 (0.0013) -[2026-06-07 02:29:50,069][324535] Updated weights for policy 0, policy_version 39106 (0.0007) -[2026-06-07 02:29:50,274][324535] Updated weights for policy 0, policy_version 39116 (0.0007) -[2026-06-07 02:29:50,489][324535] Updated weights for policy 0, policy_version 39126 (0.0007) -[2026-06-07 02:29:50,701][324535] Updated weights for policy 0, policy_version 39136 (0.0007) -[2026-06-07 02:29:50,892][324535] Updated weights for policy 0, policy_version 39146 (0.0007) -[2026-06-07 02:29:51,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 20054016. Throughput: 0: 17433.6. Samples: 20067200. Policy #0 lag: (min: 63.0, avg: 78.1, max: 127.0) -[2026-06-07 02:29:51,011][321787] Avg episode reward: [(0, '1766.370')] -[2026-06-07 02:29:51,139][324535] Updated weights for policy 0, policy_version 39157 (0.0007) -[2026-06-07 02:29:51,905][324535] Updated weights for policy 0, policy_version 39167 (0.0007) -[2026-06-07 02:29:52,101][324535] Updated weights for policy 0, policy_version 39177 (0.0007) -[2026-06-07 02:29:52,317][324535] Updated weights for policy 0, policy_version 39187 (0.0007) -[2026-06-07 02:29:52,539][324535] Updated weights for policy 0, policy_version 39198 (0.0007) -[2026-06-07 02:29:52,779][324535] Updated weights for policy 0, policy_version 39209 (0.0007) -[2026-06-07 02:29:52,979][324535] Updated weights for policy 0, policy_version 39219 (0.0007) -[2026-06-07 02:29:53,728][324535] Updated weights for policy 0, policy_version 39229 (0.0007) -[2026-06-07 02:29:53,916][324535] Updated weights for policy 0, policy_version 39239 (0.0007) -[2026-06-07 02:29:54,178][324535] Updated weights for policy 0, policy_version 39250 (0.0007) -[2026-06-07 02:29:54,374][324535] Updated weights for policy 0, policy_version 39260 (0.0007) -[2026-06-07 02:29:54,595][324535] Updated weights for policy 0, policy_version 39270 (0.0007) -[2026-06-07 02:29:54,791][324535] Updated weights for policy 0, policy_version 39280 (0.0007) -[2026-06-07 02:29:55,553][324535] Updated weights for policy 0, policy_version 39291 (0.0007) -[2026-06-07 02:29:55,778][324535] Updated weights for policy 0, policy_version 39301 (0.0007) -[2026-06-07 02:29:55,983][324535] Updated weights for policy 0, policy_version 39311 (0.0007) -[2026-06-07 02:29:56,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 20152320. Throughput: 0: 17419.4. Samples: 20168576. Policy #0 lag: (min: 63.0, avg: 78.1, max: 127.0) -[2026-06-07 02:29:56,011][321787] Avg episode reward: [(0, '1764.239')] -[2026-06-07 02:29:56,169][324535] Updated weights for policy 0, policy_version 39321 (0.0007) -[2026-06-07 02:29:56,367][324535] Updated weights for policy 0, policy_version 39331 (0.0007) -[2026-06-07 02:29:56,615][324535] Updated weights for policy 0, policy_version 39343 (0.0007) -[2026-06-07 02:29:57,372][324535] Updated weights for policy 0, policy_version 39353 (0.0007) -[2026-06-07 02:29:57,567][324535] Updated weights for policy 0, policy_version 39363 (0.0007) -[2026-06-07 02:29:57,783][324535] Updated weights for policy 0, policy_version 39373 (0.0007) -[2026-06-07 02:29:57,989][324535] Updated weights for policy 0, policy_version 39383 (0.0007) -[2026-06-07 02:29:58,252][324535] Updated weights for policy 0, policy_version 39395 (0.0007) -[2026-06-07 02:29:58,458][324535] Updated weights for policy 0, policy_version 39405 (0.0007) -[2026-06-07 02:29:58,659][324535] Updated weights for policy 0, policy_version 39415 (0.0007) -[2026-06-07 02:29:59,397][324535] Updated weights for policy 0, policy_version 39425 (0.0007) -[2026-06-07 02:29:59,585][324535] Updated weights for policy 0, policy_version 39435 (0.0007) -[2026-06-07 02:29:59,829][324535] Updated weights for policy 0, policy_version 39446 (0.0007) -[2026-06-07 02:30:00,047][324535] Updated weights for policy 0, policy_version 39456 (0.0007) -[2026-06-07 02:30:00,263][324535] Updated weights for policy 0, policy_version 39466 (0.0007) -[2026-06-07 02:30:00,478][324535] Updated weights for policy 0, policy_version 39476 (0.0007) -[2026-06-07 02:30:01,010][321787] Fps is (10 sec: 19660.8, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 20250624. Throughput: 0: 17416.5. Samples: 20270464. Policy #0 lag: (min: 63.0, avg: 78.1, max: 127.0) -[2026-06-07 02:30:01,011][321787] Avg episode reward: [(0, '1766.414')] -[2026-06-07 02:30:01,210][324535] Updated weights for policy 0, policy_version 39486 (0.0007) -[2026-06-07 02:30:01,416][324535] Updated weights for policy 0, policy_version 39496 (0.0007) -[2026-06-07 02:30:01,590][324535] Updated weights for policy 0, policy_version 39506 (0.0007) -[2026-06-07 02:30:01,802][324535] Updated weights for policy 0, policy_version 39516 (0.0007) -[2026-06-07 02:30:02,023][324535] Updated weights for policy 0, policy_version 39526 (0.0007) -[2026-06-07 02:30:02,217][324535] Updated weights for policy 0, policy_version 39536 (0.0007) -[2026-06-07 02:30:02,958][324535] Updated weights for policy 0, policy_version 39546 (0.0007) -[2026-06-07 02:30:03,188][324535] Updated weights for policy 0, policy_version 39557 (0.0006) -[2026-06-07 02:30:03,403][324535] Updated weights for policy 0, policy_version 39567 (0.0006) -[2026-06-07 02:30:03,608][324535] Updated weights for policy 0, policy_version 39577 (0.0006) -[2026-06-07 02:30:03,797][324535] Updated weights for policy 0, policy_version 39587 (0.0007) -[2026-06-07 02:30:03,904][324273] Early stopping after 6 epochs (48 sgd steps), loss delta 0.0000004 -[2026-06-07 02:30:04,630][324535] Updated weights for policy 0, policy_version 39597 (0.0007) -[2026-06-07 02:30:04,844][324535] Updated weights for policy 0, policy_version 39607 (0.0007) -[2026-06-07 02:30:05,071][324535] Updated weights for policy 0, policy_version 39617 (0.0007) -[2026-06-07 02:30:05,262][324535] Updated weights for policy 0, policy_version 39627 (0.0007) -[2026-06-07 02:30:05,465][324535] Updated weights for policy 0, policy_version 39637 (0.0007) -[2026-06-07 02:30:05,672][324535] Updated weights for policy 0, policy_version 39647 (0.0007) -[2026-06-07 02:30:06,010][321787] Fps is (10 sec: 19660.8, 60 sec: 18022.4, 300 sec: 17550.3). Total num frames: 20348928. Throughput: 0: 17416.6. Samples: 20327552. Policy #0 lag: (min: 63.0, avg: 78.1, max: 127.0) -[2026-06-07 02:30:06,011][321787] Avg episode reward: [(0, '1781.049')] -[2026-06-07 02:30:06,424][324535] Updated weights for policy 0, policy_version 39657 (0.0007) -[2026-06-07 02:30:06,632][324535] Updated weights for policy 0, policy_version 39667 (0.0008) -[2026-06-07 02:30:06,840][324535] Updated weights for policy 0, policy_version 39677 (0.0007) -[2026-06-07 02:30:07,045][324535] Updated weights for policy 0, policy_version 39687 (0.0007) -[2026-06-07 02:30:07,238][324535] Updated weights for policy 0, policy_version 39697 (0.0007) -[2026-06-07 02:30:07,444][324535] Updated weights for policy 0, policy_version 39707 (0.0007) -[2026-06-07 02:30:07,665][324535] Updated weights for policy 0, policy_version 39717 (0.0007) -[2026-06-07 02:30:08,418][324535] Updated weights for policy 0, policy_version 39727 (0.0007) -[2026-06-07 02:30:08,616][324535] Updated weights for policy 0, policy_version 39737 (0.0007) -[2026-06-07 02:30:08,826][324535] Updated weights for policy 0, policy_version 39747 (0.0007) -[2026-06-07 02:30:09,021][324535] Updated weights for policy 0, policy_version 39757 (0.0007) -[2026-06-07 02:30:09,226][324535] Updated weights for policy 0, policy_version 39767 (0.0007) -[2026-06-07 02:30:09,454][324535] Updated weights for policy 0, policy_version 39777 (0.0007) -[2026-06-07 02:30:10,183][324535] Updated weights for policy 0, policy_version 39787 (0.0007) -[2026-06-07 02:30:10,412][324535] Updated weights for policy 0, policy_version 39797 (0.0007) -[2026-06-07 02:30:10,607][324535] Updated weights for policy 0, policy_version 39807 (0.0007) -[2026-06-07 02:30:10,838][324535] Updated weights for policy 0, policy_version 39818 (0.0007) -[2026-06-07 02:30:11,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 20414464. Throughput: 0: 17467.7. Samples: 20432768. Policy #0 lag: (min: 63.0, avg: 78.1, max: 127.0) -[2026-06-07 02:30:11,011][321787] Avg episode reward: [(0, '1759.854')] -[2026-06-07 02:30:11,046][324535] Updated weights for policy 0, policy_version 39828 (0.0007) -[2026-06-07 02:30:11,260][324535] Updated weights for policy 0, policy_version 39838 (0.0007) -[2026-06-07 02:30:11,459][324535] Updated weights for policy 0, policy_version 39848 (0.0007) -[2026-06-07 02:30:12,219][324535] Updated weights for policy 0, policy_version 39858 (0.0007) -[2026-06-07 02:30:12,414][324535] Updated weights for policy 0, policy_version 39868 (0.0007) -[2026-06-07 02:30:12,611][324535] Updated weights for policy 0, policy_version 39878 (0.0007) -[2026-06-07 02:30:12,806][324535] Updated weights for policy 0, policy_version 39888 (0.0007) -[2026-06-07 02:30:13,049][324535] Updated weights for policy 0, policy_version 39900 (0.0007) -[2026-06-07 02:30:13,286][324535] Updated weights for policy 0, policy_version 39911 (0.0007) -[2026-06-07 02:30:14,035][324535] Updated weights for policy 0, policy_version 39921 (0.0007) -[2026-06-07 02:30:14,237][324535] Updated weights for policy 0, policy_version 39931 (0.0007) -[2026-06-07 02:30:14,429][324535] Updated weights for policy 0, policy_version 39941 (0.0007) -[2026-06-07 02:30:14,633][324535] Updated weights for policy 0, policy_version 39951 (0.0007) -[2026-06-07 02:30:14,840][324535] Updated weights for policy 0, policy_version 39961 (0.0007) -[2026-06-07 02:30:15,055][324535] Updated weights for policy 0, policy_version 39971 (0.0007) -[2026-06-07 02:30:15,837][324535] Updated weights for policy 0, policy_version 39981 (0.0007) -[2026-06-07 02:30:16,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 20512768. Throughput: 0: 17479.1. Samples: 20534528. Policy #0 lag: (min: 63.0, avg: 77.8, max: 127.0) -[2026-06-07 02:30:16,011][321787] Avg episode reward: [(0, '1789.440')] -[2026-06-07 02:30:16,053][324535] Updated weights for policy 0, policy_version 39992 (0.0007) -[2026-06-07 02:30:16,275][324535] Updated weights for policy 0, policy_version 40002 (0.0007) -[2026-06-07 02:30:16,485][324535] Updated weights for policy 0, policy_version 40012 (0.0007) -[2026-06-07 02:30:16,718][324535] Updated weights for policy 0, policy_version 40022 (0.0007) -[2026-06-07 02:30:16,910][324535] Updated weights for policy 0, policy_version 40032 (0.0007) -[2026-06-07 02:30:17,661][324535] Updated weights for policy 0, policy_version 40043 (0.0007) -[2026-06-07 02:30:17,867][324535] Updated weights for policy 0, policy_version 40053 (0.0007) -[2026-06-07 02:30:18,101][324535] Updated weights for policy 0, policy_version 40064 (0.0007) -[2026-06-07 02:30:18,312][324535] Updated weights for policy 0, policy_version 40074 (0.0007) -[2026-06-07 02:30:18,526][324535] Updated weights for policy 0, policy_version 40084 (0.0007) -[2026-06-07 02:30:18,740][324535] Updated weights for policy 0, policy_version 40094 (0.0007) -[2026-06-07 02:30:18,932][324535] Updated weights for policy 0, policy_version 40104 (0.0007) -[2026-06-07 02:30:19,692][324535] Updated weights for policy 0, policy_version 40115 (0.0007) -[2026-06-07 02:30:19,901][324535] Updated weights for policy 0, policy_version 40125 (0.0007) -[2026-06-07 02:30:20,112][324535] Updated weights for policy 0, policy_version 40135 (0.0007) -[2026-06-07 02:30:20,338][324535] Updated weights for policy 0, policy_version 40145 (0.0007) -[2026-06-07 02:30:20,543][324535] Updated weights for policy 0, policy_version 40155 (0.0007) -[2026-06-07 02:30:20,763][324535] Updated weights for policy 0, policy_version 40165 (0.0007) -[2026-06-07 02:30:21,010][321787] Fps is (10 sec: 19660.6, 60 sec: 18022.4, 300 sec: 17550.3). Total num frames: 20611072. Throughput: 0: 17453.5. Samples: 20591232. Policy #0 lag: (min: 63.0, avg: 77.8, max: 127.0) -[2026-06-07 02:30:21,011][321787] Avg episode reward: [(0, '1778.390')] -[2026-06-07 02:30:21,019][324273] Saving results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs1_seed11/checkpoint_p0/checkpoint_000040168_20611072.pth... -[2026-06-07 02:30:21,493][324535] Updated weights for policy 0, policy_version 40175 (0.0007) -[2026-06-07 02:30:21,707][324535] Updated weights for policy 0, policy_version 40185 (0.0007) -[2026-06-07 02:30:21,908][324535] Updated weights for policy 0, policy_version 40195 (0.0007) -[2026-06-07 02:30:22,119][324535] Updated weights for policy 0, policy_version 40205 (0.0007) -[2026-06-07 02:30:22,324][324535] Updated weights for policy 0, policy_version 40215 (0.0007) -[2026-06-07 02:30:22,512][324535] Updated weights for policy 0, policy_version 40225 (0.0007) -[2026-06-07 02:30:23,271][324535] Updated weights for policy 0, policy_version 40235 (0.0007) -[2026-06-07 02:30:23,509][324535] Updated weights for policy 0, policy_version 40245 (0.0007) -[2026-06-07 02:30:23,698][324535] Updated weights for policy 0, policy_version 40255 (0.0007) -[2026-06-07 02:30:23,924][324535] Updated weights for policy 0, policy_version 40265 (0.0007) -[2026-06-07 02:30:24,127][324535] Updated weights for policy 0, policy_version 40275 (0.0007) -[2026-06-07 02:30:24,323][324535] Updated weights for policy 0, policy_version 40285 (0.0007) -[2026-06-07 02:30:24,527][324535] Updated weights for policy 0, policy_version 40295 (0.0007) -[2026-06-07 02:30:25,285][324535] Updated weights for policy 0, policy_version 40305 (0.0007) -[2026-06-07 02:30:25,509][324535] Updated weights for policy 0, policy_version 40315 (0.0007) -[2026-06-07 02:30:25,711][324535] Updated weights for policy 0, policy_version 40325 (0.0007) -[2026-06-07 02:30:25,945][324535] Updated weights for policy 0, policy_version 40337 (0.0007) -[2026-06-07 02:30:26,011][321787] Fps is (10 sec: 16382.9, 60 sec: 17476.1, 300 sec: 17439.2). Total num frames: 20676608. Throughput: 0: 17424.8. Samples: 20692864. Policy #0 lag: (min: 63.0, avg: 77.8, max: 127.0) -[2026-06-07 02:30:26,011][321787] Avg episode reward: [(0, '1819.313')] -[2026-06-07 02:30:26,162][324535] Updated weights for policy 0, policy_version 40347 (0.0007) -[2026-06-07 02:30:26,363][324535] Updated weights for policy 0, policy_version 40357 (0.0007) -[2026-06-07 02:30:27,100][324535] Updated weights for policy 0, policy_version 40367 (0.0007) -[2026-06-07 02:30:27,312][324535] Updated weights for policy 0, policy_version 40377 (0.0007) -[2026-06-07 02:30:27,570][324535] Updated weights for policy 0, policy_version 40389 (0.0007) -[2026-06-07 02:30:27,779][324535] Updated weights for policy 0, policy_version 40399 (0.0007) -[2026-06-07 02:30:27,983][324535] Updated weights for policy 0, policy_version 40409 (0.0007) -[2026-06-07 02:30:28,198][324535] Updated weights for policy 0, policy_version 40419 (0.0007) -[2026-06-07 02:30:28,966][324535] Updated weights for policy 0, policy_version 40431 (0.0007) -[2026-06-07 02:30:29,172][324535] Updated weights for policy 0, policy_version 40441 (0.0007) -[2026-06-07 02:30:29,383][324535] Updated weights for policy 0, policy_version 40451 (0.0007) -[2026-06-07 02:30:29,586][324535] Updated weights for policy 0, policy_version 40461 (0.0007) -[2026-06-07 02:30:29,793][324535] Updated weights for policy 0, policy_version 40471 (0.0007) -[2026-06-07 02:30:29,993][324535] Updated weights for policy 0, policy_version 40481 (0.0006) -[2026-06-07 02:30:30,730][324535] Updated weights for policy 0, policy_version 40491 (0.0007) -[2026-06-07 02:30:30,945][324535] Updated weights for policy 0, policy_version 40501 (0.0007) -[2026-06-07 02:30:31,010][321787] Fps is (10 sec: 16384.2, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 20774912. Throughput: 0: 17450.7. Samples: 20795264. Policy #0 lag: (min: 63.0, avg: 77.8, max: 127.0) -[2026-06-07 02:30:31,011][321787] Avg episode reward: [(0, '1781.570')] -[2026-06-07 02:30:31,146][324535] Updated weights for policy 0, policy_version 40511 (0.0007) -[2026-06-07 02:30:31,352][324535] Updated weights for policy 0, policy_version 40521 (0.0007) -[2026-06-07 02:30:31,572][324535] Updated weights for policy 0, policy_version 40531 (0.0007) -[2026-06-07 02:30:31,799][324535] Updated weights for policy 0, policy_version 40542 (0.0007) -[2026-06-07 02:30:32,000][324535] Updated weights for policy 0, policy_version 40552 (0.0007) -[2026-06-07 02:30:32,728][324535] Updated weights for policy 0, policy_version 40562 (0.0007) -[2026-06-07 02:30:32,946][324535] Updated weights for policy 0, policy_version 40572 (0.0007) -[2026-06-07 02:30:33,166][324535] Updated weights for policy 0, policy_version 40582 (0.0007) -[2026-06-07 02:30:33,368][324535] Updated weights for policy 0, policy_version 40592 (0.0007) -[2026-06-07 02:30:33,569][324535] Updated weights for policy 0, policy_version 40602 (0.0007) -[2026-06-07 02:30:33,774][324535] Updated weights for policy 0, policy_version 40612 (0.0007) -[2026-06-07 02:30:34,545][324535] Updated weights for policy 0, policy_version 40622 (0.0007) -[2026-06-07 02:30:34,746][324535] Updated weights for policy 0, policy_version 40632 (0.0007) -[2026-06-07 02:30:34,976][324535] Updated weights for policy 0, policy_version 40642 (0.0007) -[2026-06-07 02:30:35,193][324535] Updated weights for policy 0, policy_version 40652 (0.0007) -[2026-06-07 02:30:35,404][324535] Updated weights for policy 0, policy_version 40662 (0.0007) -[2026-06-07 02:30:35,607][324535] Updated weights for policy 0, policy_version 40672 (0.0007) -[2026-06-07 02:30:36,010][321787] Fps is (10 sec: 19661.9, 60 sec: 18022.4, 300 sec: 17550.3). Total num frames: 20873216. Throughput: 0: 17422.2. Samples: 20851200. Policy #0 lag: (min: 63.0, avg: 77.8, max: 127.0) -[2026-06-07 02:30:36,012][321787] Avg episode reward: [(0, '1856.125')] -[2026-06-07 02:30:36,347][324535] Updated weights for policy 0, policy_version 40682 (0.0007) -[2026-06-07 02:30:36,543][324535] Updated weights for policy 0, policy_version 40692 (0.0007) -[2026-06-07 02:30:36,764][324535] Updated weights for policy 0, policy_version 40703 (0.0007) -[2026-06-07 02:30:37,001][324535] Updated weights for policy 0, policy_version 40713 (0.0007) -[2026-06-07 02:30:37,201][324535] Updated weights for policy 0, policy_version 40723 (0.0007) -[2026-06-07 02:30:37,395][324535] Updated weights for policy 0, policy_version 40733 (0.0007) -[2026-06-07 02:30:37,626][324535] Updated weights for policy 0, policy_version 40744 (0.0007) -[2026-06-07 02:30:38,361][324535] Updated weights for policy 0, policy_version 40754 (0.0007) -[2026-06-07 02:30:38,538][324535] Updated weights for policy 0, policy_version 40764 (0.0007) -[2026-06-07 02:30:38,753][324535] Updated weights for policy 0, policy_version 40774 (0.0007) -[2026-06-07 02:30:39,005][324535] Updated weights for policy 0, policy_version 40786 (0.0007) -[2026-06-07 02:30:39,232][324535] Updated weights for policy 0, policy_version 40796 (0.0007) -[2026-06-07 02:30:39,453][324535] Updated weights for policy 0, policy_version 40806 (0.0007) -[2026-06-07 02:30:40,232][324535] Updated weights for policy 0, policy_version 40816 (0.0007) -[2026-06-07 02:30:40,449][324535] Updated weights for policy 0, policy_version 40826 (0.0007) -[2026-06-07 02:30:40,675][324535] Updated weights for policy 0, policy_version 40836 (0.0007) -[2026-06-07 02:30:40,866][324535] Updated weights for policy 0, policy_version 40846 (0.0007) -[2026-06-07 02:30:41,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 20938752. Throughput: 0: 17402.3. Samples: 20951680. Policy #0 lag: (min: 63.0, avg: 77.8, max: 127.0) -[2026-06-07 02:30:41,011][321787] Avg episode reward: [(0, '1822.361')] -[2026-06-07 02:30:41,074][324535] Updated weights for policy 0, policy_version 40856 (0.0007) -[2026-06-07 02:30:41,318][324535] Updated weights for policy 0, policy_version 40867 (0.0007) -[2026-06-07 02:30:42,049][324535] Updated weights for policy 0, policy_version 40877 (0.0007) -[2026-06-07 02:30:42,257][324535] Updated weights for policy 0, policy_version 40887 (0.0007) -[2026-06-07 02:30:42,474][324535] Updated weights for policy 0, policy_version 40897 (0.0008) -[2026-06-07 02:30:42,698][324535] Updated weights for policy 0, policy_version 40908 (0.0007) -[2026-06-07 02:30:42,910][324535] Updated weights for policy 0, policy_version 40918 (0.0007) -[2026-06-07 02:30:43,141][324535] Updated weights for policy 0, policy_version 40929 (0.0007) -[2026-06-07 02:30:43,873][324535] Updated weights for policy 0, policy_version 40939 (0.0008) -[2026-06-07 02:30:44,077][324535] Updated weights for policy 0, policy_version 40949 (0.0009) -[2026-06-07 02:30:44,275][324535] Updated weights for policy 0, policy_version 40959 (0.0007) -[2026-06-07 02:30:44,506][324535] Updated weights for policy 0, policy_version 40969 (0.0007) -[2026-06-07 02:30:44,734][324535] Updated weights for policy 0, policy_version 40979 (0.0006) -[2026-06-07 02:30:44,942][324535] Updated weights for policy 0, policy_version 40989 (0.0007) -[2026-06-07 02:30:45,170][324535] Updated weights for policy 0, policy_version 40999 (0.0007) -[2026-06-07 02:30:45,883][324535] Updated weights for policy 0, policy_version 41009 (0.0007) -[2026-06-07 02:30:46,010][321787] Fps is (10 sec: 16384.2, 60 sec: 17476.3, 300 sec: 17439.3). Total num frames: 21037056. Throughput: 0: 17430.8. Samples: 21054848. Policy #0 lag: (min: 56.0, avg: 80.0, max: 120.0) -[2026-06-07 02:30:46,011][321787] Avg episode reward: [(0, '1821.578')] -[2026-06-07 02:30:46,119][324535] Updated weights for policy 0, policy_version 41020 (0.0007) -[2026-06-07 02:30:46,327][324535] Updated weights for policy 0, policy_version 41030 (0.0007) -[2026-06-07 02:30:46,517][324535] Updated weights for policy 0, policy_version 41040 (0.0007) -[2026-06-07 02:30:46,731][324535] Updated weights for policy 0, policy_version 41051 (0.0007) -[2026-06-07 02:30:46,948][324535] Updated weights for policy 0, policy_version 41061 (0.0007) -[2026-06-07 02:30:47,722][324535] Updated weights for policy 0, policy_version 41072 (0.0007) -[2026-06-07 02:30:47,918][324535] Updated weights for policy 0, policy_version 41082 (0.0007) -[2026-06-07 02:30:48,110][324535] Updated weights for policy 0, policy_version 41092 (0.0007) -[2026-06-07 02:30:48,306][324535] Updated weights for policy 0, policy_version 41102 (0.0007) -[2026-06-07 02:30:48,528][324535] Updated weights for policy 0, policy_version 41112 (0.0007) -[2026-06-07 02:30:48,743][324535] Updated weights for policy 0, policy_version 41122 (0.0007) -[2026-06-07 02:30:49,507][324535] Updated weights for policy 0, policy_version 41132 (0.0007) -[2026-06-07 02:30:49,738][324535] Updated weights for policy 0, policy_version 41143 (0.0007) -[2026-06-07 02:30:49,936][324535] Updated weights for policy 0, policy_version 41153 (0.0007) -[2026-06-07 02:30:50,151][324535] Updated weights for policy 0, policy_version 41163 (0.0007) -[2026-06-07 02:30:50,344][324535] Updated weights for policy 0, policy_version 41173 (0.0006) -[2026-06-07 02:30:50,568][324535] Updated weights for policy 0, policy_version 41184 (0.0007) -[2026-06-07 02:30:51,010][321787] Fps is (10 sec: 19660.5, 60 sec: 18022.4, 300 sec: 17550.3). Total num frames: 21135360. Throughput: 0: 17407.9. Samples: 21110912. Policy #0 lag: (min: 56.0, avg: 80.0, max: 120.0) -[2026-06-07 02:30:51,011][321787] Avg episode reward: [(0, '1821.316')] -[2026-06-07 02:30:51,309][324535] Updated weights for policy 0, policy_version 41194 (0.0007) -[2026-06-07 02:30:51,518][324535] Updated weights for policy 0, policy_version 41204 (0.0007) -[2026-06-07 02:30:51,731][324535] Updated weights for policy 0, policy_version 41214 (0.0007) -[2026-06-07 02:30:51,927][324535] Updated weights for policy 0, policy_version 41224 (0.0007) -[2026-06-07 02:30:52,155][324535] Updated weights for policy 0, policy_version 41235 (0.0007) -[2026-06-07 02:30:52,350][324535] Updated weights for policy 0, policy_version 41245 (0.0007) -[2026-06-07 02:30:52,559][324535] Updated weights for policy 0, policy_version 41255 (0.0007) -[2026-06-07 02:30:53,314][324535] Updated weights for policy 0, policy_version 41265 (0.0007) -[2026-06-07 02:30:53,517][324535] Updated weights for policy 0, policy_version 41275 (0.0007) -[2026-06-07 02:30:53,732][324535] Updated weights for policy 0, policy_version 41285 (0.0007) -[2026-06-07 02:30:53,944][324535] Updated weights for policy 0, policy_version 41295 (0.0007) -[2026-06-07 02:30:54,173][324535] Updated weights for policy 0, policy_version 41305 (0.0007) -[2026-06-07 02:30:54,383][324535] Updated weights for policy 0, policy_version 41315 (0.0007) -[2026-06-07 02:30:55,115][324535] Updated weights for policy 0, policy_version 41325 (0.0007) -[2026-06-07 02:30:55,335][324535] Updated weights for policy 0, policy_version 41335 (0.0007) -[2026-06-07 02:30:55,560][324535] Updated weights for policy 0, policy_version 41345 (0.0007) -[2026-06-07 02:30:55,770][324535] Updated weights for policy 0, policy_version 41355 (0.0007) -[2026-06-07 02:30:55,954][324535] Updated weights for policy 0, policy_version 41365 (0.0007) -[2026-06-07 02:30:56,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 21200896. Throughput: 0: 17348.3. Samples: 21213440. Policy #0 lag: (min: 56.0, avg: 80.0, max: 120.0) -[2026-06-07 02:30:56,011][321787] Avg episode reward: [(0, '1822.848')] -[2026-06-07 02:30:56,157][324535] Updated weights for policy 0, policy_version 41375 (0.0007) -[2026-06-07 02:30:56,907][324535] Updated weights for policy 0, policy_version 41385 (0.0007) -[2026-06-07 02:30:57,114][324535] Updated weights for policy 0, policy_version 41395 (0.0007) -[2026-06-07 02:30:57,325][324535] Updated weights for policy 0, policy_version 41405 (0.0007) -[2026-06-07 02:30:57,563][324535] Updated weights for policy 0, policy_version 41416 (0.0007) -[2026-06-07 02:30:57,787][324535] Updated weights for policy 0, policy_version 41426 (0.0007) -[2026-06-07 02:30:57,998][324535] Updated weights for policy 0, policy_version 41436 (0.0007) -[2026-06-07 02:30:58,202][324535] Updated weights for policy 0, policy_version 41446 (0.0007) -[2026-06-07 02:30:58,943][324535] Updated weights for policy 0, policy_version 41456 (0.0007) -[2026-06-07 02:30:59,165][324535] Updated weights for policy 0, policy_version 41466 (0.0007) -[2026-06-07 02:30:59,355][324535] Updated weights for policy 0, policy_version 41476 (0.0007) -[2026-06-07 02:30:59,546][324535] Updated weights for policy 0, policy_version 41486 (0.0007) -[2026-06-07 02:30:59,747][324535] Updated weights for policy 0, policy_version 41496 (0.0007) -[2026-06-07 02:30:59,951][324535] Updated weights for policy 0, policy_version 41506 (0.0007) -[2026-06-07 02:31:00,683][324535] Updated weights for policy 0, policy_version 41516 (0.0007) -[2026-06-07 02:31:00,876][324535] Updated weights for policy 0, policy_version 41526 (0.0007) -[2026-06-07 02:31:01,010][321787] Fps is (10 sec: 16384.1, 60 sec: 17476.2, 300 sec: 17439.2). Total num frames: 21299200. Throughput: 0: 17442.1. Samples: 21319424. Policy #0 lag: (min: 56.0, avg: 80.0, max: 120.0) -[2026-06-07 02:31:01,011][321787] Avg episode reward: [(0, '1852.619')] -[2026-06-07 02:31:01,088][324535] Updated weights for policy 0, policy_version 41536 (0.0007) -[2026-06-07 02:31:01,298][324535] Updated weights for policy 0, policy_version 41546 (0.0007) -[2026-06-07 02:31:01,525][324535] Updated weights for policy 0, policy_version 41556 (0.0007) -[2026-06-07 02:31:01,744][324535] Updated weights for policy 0, policy_version 41566 (0.0007) -[2026-06-07 02:31:02,496][324535] Updated weights for policy 0, policy_version 41577 (0.0007) -[2026-06-07 02:31:02,721][324535] Updated weights for policy 0, policy_version 41587 (0.0007) -[2026-06-07 02:31:02,927][324535] Updated weights for policy 0, policy_version 41597 (0.0007) -[2026-06-07 02:31:03,145][324535] Updated weights for policy 0, policy_version 41607 (0.0007) -[2026-06-07 02:31:03,342][324535] Updated weights for policy 0, policy_version 41617 (0.0007) -[2026-06-07 02:31:03,561][324535] Updated weights for policy 0, policy_version 41627 (0.0007) -[2026-06-07 02:31:03,779][324535] Updated weights for policy 0, policy_version 41637 (0.0007) -[2026-06-07 02:31:04,504][324535] Updated weights for policy 0, policy_version 41647 (0.0007) -[2026-06-07 02:31:04,731][324535] Updated weights for policy 0, policy_version 41657 (0.0007) -[2026-06-07 02:31:04,933][324535] Updated weights for policy 0, policy_version 41667 (0.0007) -[2026-06-07 02:31:05,154][324535] Updated weights for policy 0, policy_version 41677 (0.0007) -[2026-06-07 02:31:05,344][324535] Updated weights for policy 0, policy_version 41687 (0.0007) -[2026-06-07 02:31:05,557][324535] Updated weights for policy 0, policy_version 41697 (0.0007) -[2026-06-07 02:31:06,010][321787] Fps is (10 sec: 19660.6, 60 sec: 17476.2, 300 sec: 17550.3). Total num frames: 21397504. Throughput: 0: 17388.1. Samples: 21373696. Policy #0 lag: (min: 56.0, avg: 80.0, max: 120.0) -[2026-06-07 02:31:06,011][321787] Avg episode reward: [(0, '1859.759')] -[2026-06-07 02:31:06,319][324535] Updated weights for policy 0, policy_version 41707 (0.0007) -[2026-06-07 02:31:06,529][324535] Updated weights for policy 0, policy_version 41717 (0.0007) -[2026-06-07 02:31:06,739][324535] Updated weights for policy 0, policy_version 41727 (0.0007) -[2026-06-07 02:31:06,957][324535] Updated weights for policy 0, policy_version 41737 (0.0007) -[2026-06-07 02:31:07,172][324535] Updated weights for policy 0, policy_version 41747 (0.0007) -[2026-06-07 02:31:07,383][324535] Updated weights for policy 0, policy_version 41757 (0.0007) -[2026-06-07 02:31:07,571][324535] Updated weights for policy 0, policy_version 41767 (0.0007) -[2026-06-07 02:31:08,341][324535] Updated weights for policy 0, policy_version 41777 (0.0007) -[2026-06-07 02:31:08,543][324535] Updated weights for policy 0, policy_version 41787 (0.0007) -[2026-06-07 02:31:08,747][324535] Updated weights for policy 0, policy_version 41797 (0.0007) -[2026-06-07 02:31:08,947][324535] Updated weights for policy 0, policy_version 41807 (0.0007) -[2026-06-07 02:31:09,156][324535] Updated weights for policy 0, policy_version 41817 (0.0007) -[2026-06-07 02:31:09,357][324535] Updated weights for policy 0, policy_version 41827 (0.0007) -[2026-06-07 02:31:10,119][324535] Updated weights for policy 0, policy_version 41837 (0.0007) -[2026-06-07 02:31:10,348][324535] Updated weights for policy 0, policy_version 41847 (0.0007) -[2026-06-07 02:31:10,560][324535] Updated weights for policy 0, policy_version 41857 (0.0007) -[2026-06-07 02:31:10,782][324535] Updated weights for policy 0, policy_version 41867 (0.0007) -[2026-06-07 02:31:11,010][321787] Fps is (10 sec: 16384.1, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 21463040. Throughput: 0: 17402.6. Samples: 21475968. Policy #0 lag: (min: 56.0, avg: 80.0, max: 120.0) -[2026-06-07 02:31:11,011][321787] Avg episode reward: [(0, '1880.981')] -[2026-06-07 02:31:11,029][324535] Updated weights for policy 0, policy_version 41878 (0.0007) -[2026-06-07 02:31:11,246][324535] Updated weights for policy 0, policy_version 41888 (0.0007) -[2026-06-07 02:31:11,411][324273] Saving new best policy, reward=1880.981! -[2026-06-07 02:31:11,976][324535] Updated weights for policy 0, policy_version 41898 (0.0007) -[2026-06-07 02:31:12,178][324535] Updated weights for policy 0, policy_version 41908 (0.0007) -[2026-06-07 02:31:12,418][324535] Updated weights for policy 0, policy_version 41919 (0.0007) -[2026-06-07 02:31:12,621][324535] Updated weights for policy 0, policy_version 41929 (0.0007) -[2026-06-07 02:31:12,818][324535] Updated weights for policy 0, policy_version 41939 (0.0007) -[2026-06-07 02:31:13,033][324535] Updated weights for policy 0, policy_version 41949 (0.0007) -[2026-06-07 02:31:13,243][324535] Updated weights for policy 0, policy_version 41959 (0.0007) -[2026-06-07 02:31:14,003][324535] Updated weights for policy 0, policy_version 41969 (0.0007) -[2026-06-07 02:31:14,218][324535] Updated weights for policy 0, policy_version 41980 (0.0007) -[2026-06-07 02:31:14,428][324535] Updated weights for policy 0, policy_version 41990 (0.0007) -[2026-06-07 02:31:14,648][324535] Updated weights for policy 0, policy_version 42000 (0.0007) -[2026-06-07 02:31:14,849][324535] Updated weights for policy 0, policy_version 42010 (0.0007) -[2026-06-07 02:31:15,080][324535] Updated weights for policy 0, policy_version 42021 (0.0007) -[2026-06-07 02:31:15,835][324535] Updated weights for policy 0, policy_version 42031 (0.0007) -[2026-06-07 02:31:16,010][321787] Fps is (10 sec: 16384.2, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 21561344. Throughput: 0: 17422.2. Samples: 21579264. Policy #0 lag: (min: 31.0, avg: 46.1, max: 95.0) -[2026-06-07 02:31:16,011][321787] Avg episode reward: [(0, '1869.672')] -[2026-06-07 02:31:16,017][324535] Updated weights for policy 0, policy_version 42041 (0.0007) -[2026-06-07 02:31:16,221][324535] Updated weights for policy 0, policy_version 42051 (0.0007) -[2026-06-07 02:31:16,440][324535] Updated weights for policy 0, policy_version 42061 (0.0007) -[2026-06-07 02:31:16,641][324535] Updated weights for policy 0, policy_version 42071 (0.0007) -[2026-06-07 02:31:16,851][324535] Updated weights for policy 0, policy_version 42081 (0.0007) -[2026-06-07 02:31:17,589][324535] Updated weights for policy 0, policy_version 42091 (0.0007) -[2026-06-07 02:31:17,797][324535] Updated weights for policy 0, policy_version 42101 (0.0007) -[2026-06-07 02:31:17,995][324535] Updated weights for policy 0, policy_version 42111 (0.0008) -[2026-06-07 02:31:18,207][324535] Updated weights for policy 0, policy_version 42121 (0.0011) -[2026-06-07 02:31:18,432][324535] Updated weights for policy 0, policy_version 42132 (0.0009) -[2026-06-07 02:31:18,637][324535] Updated weights for policy 0, policy_version 42142 (0.0007) -[2026-06-07 02:31:18,857][324535] Updated weights for policy 0, policy_version 42152 (0.0008) -[2026-06-07 02:31:19,627][324535] Updated weights for policy 0, policy_version 42162 (0.0008) -[2026-06-07 02:31:19,856][324535] Updated weights for policy 0, policy_version 42173 (0.0008) -[2026-06-07 02:31:20,075][324535] Updated weights for policy 0, policy_version 42183 (0.0007) -[2026-06-07 02:31:20,284][324535] Updated weights for policy 0, policy_version 42193 (0.0007) -[2026-06-07 02:31:20,494][324535] Updated weights for policy 0, policy_version 42203 (0.0007) -[2026-06-07 02:31:20,701][324535] Updated weights for policy 0, policy_version 42213 (0.0007) -[2026-06-07 02:31:21,010][321787] Fps is (10 sec: 19660.4, 60 sec: 17476.2, 300 sec: 17550.3). Total num frames: 21659648. Throughput: 0: 17430.7. Samples: 21635584. Policy #0 lag: (min: 31.0, avg: 46.1, max: 95.0) -[2026-06-07 02:31:21,012][321787] Avg episode reward: [(0, '1936.672')] -[2026-06-07 02:31:21,018][324273] Saving new best policy, reward=1936.672! -[2026-06-07 02:31:21,499][324535] Updated weights for policy 0, policy_version 42223 (0.0007) -[2026-06-07 02:31:21,713][324535] Updated weights for policy 0, policy_version 42233 (0.0007) -[2026-06-07 02:31:21,905][324535] Updated weights for policy 0, policy_version 42243 (0.0007) -[2026-06-07 02:31:22,108][324535] Updated weights for policy 0, policy_version 42253 (0.0011) -[2026-06-07 02:31:22,306][324535] Updated weights for policy 0, policy_version 42263 (0.0011) -[2026-06-07 02:31:22,507][324535] Updated weights for policy 0, policy_version 42273 (0.0011) -[2026-06-07 02:31:23,294][324535] Updated weights for policy 0, policy_version 42284 (0.0009) -[2026-06-07 02:31:23,517][324535] Updated weights for policy 0, policy_version 42295 (0.0007) -[2026-06-07 02:31:23,702][324535] Updated weights for policy 0, policy_version 42305 (0.0007) -[2026-06-07 02:31:23,921][324535] Updated weights for policy 0, policy_version 42315 (0.0007) -[2026-06-07 02:31:24,121][324535] Updated weights for policy 0, policy_version 42325 (0.0007) -[2026-06-07 02:31:24,331][324535] Updated weights for policy 0, policy_version 42335 (0.0007) -[2026-06-07 02:31:25,117][324535] Updated weights for policy 0, policy_version 42345 (0.0007) -[2026-06-07 02:31:25,334][324535] Updated weights for policy 0, policy_version 42355 (0.0007) -[2026-06-07 02:31:25,533][324535] Updated weights for policy 0, policy_version 42365 (0.0007) -[2026-06-07 02:31:25,760][324535] Updated weights for policy 0, policy_version 42376 (0.0007) -[2026-06-07 02:31:25,955][324535] Updated weights for policy 0, policy_version 42386 (0.0007) -[2026-06-07 02:31:26,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.5, 300 sec: 17439.2). Total num frames: 21725184. Throughput: 0: 17467.7. Samples: 21737728. Policy #0 lag: (min: 31.0, avg: 46.1, max: 95.0) -[2026-06-07 02:31:26,011][321787] Avg episode reward: [(0, '2072.006')] -[2026-06-07 02:31:26,170][324535] Updated weights for policy 0, policy_version 42396 (0.0007) -[2026-06-07 02:31:26,385][324535] Updated weights for policy 0, policy_version 42406 (0.0007) -[2026-06-07 02:31:26,418][324273] Saving new best policy, reward=2072.006! -[2026-06-07 02:31:27,153][324535] Updated weights for policy 0, policy_version 42416 (0.0010) -[2026-06-07 02:31:27,363][324535] Updated weights for policy 0, policy_version 42426 (0.0011) -[2026-06-07 02:31:27,556][324535] Updated weights for policy 0, policy_version 42436 (0.0011) -[2026-06-07 02:31:27,766][324535] Updated weights for policy 0, policy_version 42446 (0.0011) -[2026-06-07 02:31:28,018][324535] Updated weights for policy 0, policy_version 42457 (0.0012) -[2026-06-07 02:31:28,238][324535] Updated weights for policy 0, policy_version 42467 (0.0011) -[2026-06-07 02:31:29,001][324535] Updated weights for policy 0, policy_version 42478 (0.0011) -[2026-06-07 02:31:29,195][324535] Updated weights for policy 0, policy_version 42488 (0.0011) -[2026-06-07 02:31:29,399][324535] Updated weights for policy 0, policy_version 42498 (0.0011) -[2026-06-07 02:31:29,598][324535] Updated weights for policy 0, policy_version 42508 (0.0011) -[2026-06-07 02:31:29,806][324535] Updated weights for policy 0, policy_version 42518 (0.0011) -[2026-06-07 02:31:29,997][324535] Updated weights for policy 0, policy_version 42528 (0.0011) -[2026-06-07 02:31:30,767][324535] Updated weights for policy 0, policy_version 42538 (0.0010) -[2026-06-07 02:31:30,971][324535] Updated weights for policy 0, policy_version 42548 (0.0007) -[2026-06-07 02:31:31,010][321787] Fps is (10 sec: 16384.2, 60 sec: 17476.2, 300 sec: 17439.2). Total num frames: 21823488. Throughput: 0: 17462.0. Samples: 21840640. Policy #0 lag: (min: 31.0, avg: 46.1, max: 95.0) -[2026-06-07 02:31:31,011][321787] Avg episode reward: [(0, '2072.976')] -[2026-06-07 02:31:31,220][324535] Updated weights for policy 0, policy_version 42559 (0.0007) -[2026-06-07 02:31:31,438][324535] Updated weights for policy 0, policy_version 42569 (0.0007) -[2026-06-07 02:31:31,660][324535] Updated weights for policy 0, policy_version 42579 (0.0007) -[2026-06-07 02:31:31,876][324535] Updated weights for policy 0, policy_version 42589 (0.0007) -[2026-06-07 02:31:32,065][324535] Updated weights for policy 0, policy_version 42599 (0.0007) -[2026-06-07 02:31:32,088][324273] Saving new best policy, reward=2072.976! -[2026-06-07 02:31:32,806][324535] Updated weights for policy 0, policy_version 42609 (0.0007) -[2026-06-07 02:31:33,019][324535] Updated weights for policy 0, policy_version 42619 (0.0007) -[2026-06-07 02:31:33,224][324535] Updated weights for policy 0, policy_version 42629 (0.0008) -[2026-06-07 02:31:33,448][324535] Updated weights for policy 0, policy_version 42639 (0.0011) -[2026-06-07 02:31:33,655][324535] Updated weights for policy 0, policy_version 42649 (0.0011) -[2026-06-07 02:31:33,860][324535] Updated weights for policy 0, policy_version 42659 (0.0010) -[2026-06-07 02:31:34,607][324535] Updated weights for policy 0, policy_version 42669 (0.0007) -[2026-06-07 02:31:34,807][324535] Updated weights for policy 0, policy_version 42679 (0.0008) -[2026-06-07 02:31:35,005][324535] Updated weights for policy 0, policy_version 42689 (0.0010) -[2026-06-07 02:31:35,223][324535] Updated weights for policy 0, policy_version 42699 (0.0010) -[2026-06-07 02:31:35,424][324535] Updated weights for policy 0, policy_version 42709 (0.0011) -[2026-06-07 02:31:35,646][324535] Updated weights for policy 0, policy_version 42719 (0.0011) -[2026-06-07 02:31:36,010][321787] Fps is (10 sec: 19660.8, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 21921792. Throughput: 0: 17479.2. Samples: 21897472. Policy #0 lag: (min: 31.0, avg: 46.1, max: 95.0) -[2026-06-07 02:31:36,011][321787] Avg episode reward: [(0, '2126.377')] -[2026-06-07 02:31:36,015][324273] Saving new best policy, reward=2126.377! -[2026-06-07 02:31:36,360][324535] Updated weights for policy 0, policy_version 42729 (0.0011) -[2026-06-07 02:31:36,562][324535] Updated weights for policy 0, policy_version 42739 (0.0012) -[2026-06-07 02:31:36,755][324535] Updated weights for policy 0, policy_version 42749 (0.0011) -[2026-06-07 02:31:36,968][324535] Updated weights for policy 0, policy_version 42759 (0.0009) -[2026-06-07 02:31:37,184][324535] Updated weights for policy 0, policy_version 42770 (0.0007) -[2026-06-07 02:31:37,389][324535] Updated weights for policy 0, policy_version 42780 (0.0010) -[2026-06-07 02:31:37,586][324535] Updated weights for policy 0, policy_version 42790 (0.0011) -[2026-06-07 02:31:38,378][324535] Updated weights for policy 0, policy_version 42800 (0.0008) -[2026-06-07 02:31:38,573][324535] Updated weights for policy 0, policy_version 42810 (0.0007) -[2026-06-07 02:31:38,779][324535] Updated weights for policy 0, policy_version 42820 (0.0007) -[2026-06-07 02:31:38,994][324535] Updated weights for policy 0, policy_version 42830 (0.0007) -[2026-06-07 02:31:39,243][324535] Updated weights for policy 0, policy_version 42841 (0.0007) -[2026-06-07 02:31:39,463][324535] Updated weights for policy 0, policy_version 42851 (0.0007) -[2026-06-07 02:31:40,224][324535] Updated weights for policy 0, policy_version 42861 (0.0007) -[2026-06-07 02:31:40,430][324535] Updated weights for policy 0, policy_version 42871 (0.0007) -[2026-06-07 02:31:40,662][324535] Updated weights for policy 0, policy_version 42882 (0.0007) -[2026-06-07 02:31:40,923][324535] Updated weights for policy 0, policy_version 42893 (0.0007) -[2026-06-07 02:31:41,010][321787] Fps is (10 sec: 16383.9, 60 sec: 17476.2, 300 sec: 17439.2). Total num frames: 21987328. Throughput: 0: 17464.8. Samples: 21999360. Policy #0 lag: (min: 50.0, avg: 65.2, max: 114.0) -[2026-06-07 02:31:41,011][321787] Avg episode reward: [(0, '2131.319')] -[2026-06-07 02:31:41,097][324535] Updated weights for policy 0, policy_version 42903 (0.0007) -[2026-06-07 02:31:41,307][324535] Updated weights for policy 0, policy_version 42913 (0.0007) -[2026-06-07 02:31:41,444][324273] Saving new best policy, reward=2131.319! -[2026-06-07 02:31:42,062][324535] Updated weights for policy 0, policy_version 42923 (0.0007) -[2026-06-07 02:31:42,294][324535] Updated weights for policy 0, policy_version 42934 (0.0007) -[2026-06-07 02:31:42,506][324535] Updated weights for policy 0, policy_version 42944 (0.0007) -[2026-06-07 02:31:42,727][324535] Updated weights for policy 0, policy_version 42954 (0.0007) -[2026-06-07 02:31:42,938][324535] Updated weights for policy 0, policy_version 42964 (0.0007) -[2026-06-07 02:31:43,143][324535] Updated weights for policy 0, policy_version 42974 (0.0007) -[2026-06-07 02:31:43,346][324535] Updated weights for policy 0, policy_version 42984 (0.0008) -[2026-06-07 02:31:44,104][324535] Updated weights for policy 0, policy_version 42994 (0.0007) -[2026-06-07 02:31:44,307][324535] Updated weights for policy 0, policy_version 43004 (0.0007) -[2026-06-07 02:31:44,509][324535] Updated weights for policy 0, policy_version 43014 (0.0007) -[2026-06-07 02:31:44,744][324535] Updated weights for policy 0, policy_version 43025 (0.0007) -[2026-06-07 02:31:44,943][324535] Updated weights for policy 0, policy_version 43035 (0.0007) -[2026-06-07 02:31:45,136][324535] Updated weights for policy 0, policy_version 43045 (0.0007) -[2026-06-07 02:31:45,938][324535] Updated weights for policy 0, policy_version 43055 (0.0008) -[2026-06-07 02:31:46,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 22085632. Throughput: 0: 17410.9. Samples: 22102912. Policy #0 lag: (min: 50.0, avg: 65.2, max: 114.0) -[2026-06-07 02:31:46,011][321787] Avg episode reward: [(0, '2085.637')] -[2026-06-07 02:31:46,140][324535] Updated weights for policy 0, policy_version 43065 (0.0007) -[2026-06-07 02:31:46,355][324535] Updated weights for policy 0, policy_version 43075 (0.0007) -[2026-06-07 02:31:46,543][324535] Updated weights for policy 0, policy_version 43085 (0.0007) -[2026-06-07 02:31:46,764][324535] Updated weights for policy 0, policy_version 43095 (0.0007) -[2026-06-07 02:31:46,970][324535] Updated weights for policy 0, policy_version 43105 (0.0007) -[2026-06-07 02:31:47,704][324535] Updated weights for policy 0, policy_version 43115 (0.0007) -[2026-06-07 02:31:47,914][324535] Updated weights for policy 0, policy_version 43125 (0.0007) -[2026-06-07 02:31:48,125][324535] Updated weights for policy 0, policy_version 43135 (0.0007) -[2026-06-07 02:31:48,337][324535] Updated weights for policy 0, policy_version 43145 (0.0007) -[2026-06-07 02:31:48,572][324535] Updated weights for policy 0, policy_version 43155 (0.0007) -[2026-06-07 02:31:48,788][324535] Updated weights for policy 0, policy_version 43165 (0.0007) -[2026-06-07 02:31:48,987][324535] Updated weights for policy 0, policy_version 43175 (0.0007) -[2026-06-07 02:31:49,745][324535] Updated weights for policy 0, policy_version 43185 (0.0007) -[2026-06-07 02:31:49,940][324535] Updated weights for policy 0, policy_version 43195 (0.0007) -[2026-06-07 02:31:50,157][324535] Updated weights for policy 0, policy_version 43206 (0.0007) -[2026-06-07 02:31:50,360][324535] Updated weights for policy 0, policy_version 43216 (0.0007) -[2026-06-07 02:31:50,584][324535] Updated weights for policy 0, policy_version 43227 (0.0007) -[2026-06-07 02:31:50,782][324535] Updated weights for policy 0, policy_version 43237 (0.0007) -[2026-06-07 02:31:51,010][321787] Fps is (10 sec: 19660.8, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 22183936. Throughput: 0: 17507.5. Samples: 22161536. Policy #0 lag: (min: 50.0, avg: 65.2, max: 114.0) -[2026-06-07 02:31:51,011][321787] Avg episode reward: [(0, '2129.213')] -[2026-06-07 02:31:51,521][324535] Updated weights for policy 0, policy_version 43247 (0.0007) -[2026-06-07 02:31:51,715][324535] Updated weights for policy 0, policy_version 43257 (0.0007) -[2026-06-07 02:31:51,974][324535] Updated weights for policy 0, policy_version 43268 (0.0007) -[2026-06-07 02:31:52,174][324535] Updated weights for policy 0, policy_version 43278 (0.0007) -[2026-06-07 02:31:52,372][324535] Updated weights for policy 0, policy_version 43288 (0.0007) -[2026-06-07 02:31:52,564][324535] Updated weights for policy 0, policy_version 43298 (0.0007) -[2026-06-07 02:31:53,315][324535] Updated weights for policy 0, policy_version 43308 (0.0008) -[2026-06-07 02:31:53,536][324535] Updated weights for policy 0, policy_version 43318 (0.0010) -[2026-06-07 02:31:53,744][324535] Updated weights for policy 0, policy_version 43328 (0.0011) -[2026-06-07 02:31:53,943][324535] Updated weights for policy 0, policy_version 43338 (0.0010) -[2026-06-07 02:31:54,170][324535] Updated weights for policy 0, policy_version 43349 (0.0008) -[2026-06-07 02:31:54,380][324535] Updated weights for policy 0, policy_version 43359 (0.0007) -[2026-06-07 02:31:55,155][324535] Updated weights for policy 0, policy_version 43369 (0.0007) -[2026-06-07 02:31:55,355][324535] Updated weights for policy 0, policy_version 43379 (0.0007) -[2026-06-07 02:31:55,572][324535] Updated weights for policy 0, policy_version 43389 (0.0007) -[2026-06-07 02:31:55,813][324535] Updated weights for policy 0, policy_version 43400 (0.0007) -[2026-06-07 02:31:56,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 22249472. Throughput: 0: 17550.2. Samples: 22265728. Policy #0 lag: (min: 50.0, avg: 65.2, max: 114.0) -[2026-06-07 02:31:56,011][321787] Avg episode reward: [(0, '2140.409')] -[2026-06-07 02:31:56,016][324535] Updated weights for policy 0, policy_version 43410 (0.0007) -[2026-06-07 02:31:56,221][324535] Updated weights for policy 0, policy_version 43420 (0.0007) -[2026-06-07 02:31:56,448][324535] Updated weights for policy 0, policy_version 43430 (0.0008) -[2026-06-07 02:31:56,476][324273] Saving new best policy, reward=2140.409! -[2026-06-07 02:31:57,205][324535] Updated weights for policy 0, policy_version 43440 (0.0007) -[2026-06-07 02:31:57,412][324535] Updated weights for policy 0, policy_version 43450 (0.0007) -[2026-06-07 02:31:57,607][324535] Updated weights for policy 0, policy_version 43460 (0.0007) -[2026-06-07 02:31:57,786][324535] Updated weights for policy 0, policy_version 43470 (0.0006) -[2026-06-07 02:31:58,011][324535] Updated weights for policy 0, policy_version 43481 (0.0006) -[2026-06-07 02:31:58,238][324535] Updated weights for policy 0, policy_version 43491 (0.0007) -[2026-06-07 02:31:59,012][324535] Updated weights for policy 0, policy_version 43501 (0.0007) -[2026-06-07 02:31:59,228][324535] Updated weights for policy 0, policy_version 43511 (0.0007) -[2026-06-07 02:31:59,438][324535] Updated weights for policy 0, policy_version 43521 (0.0006) -[2026-06-07 02:31:59,660][324535] Updated weights for policy 0, policy_version 43531 (0.0007) -[2026-06-07 02:31:59,846][324535] Updated weights for policy 0, policy_version 43541 (0.0006) -[2026-06-07 02:32:00,057][324535] Updated weights for policy 0, policy_version 43551 (0.0006) -[2026-06-07 02:32:00,825][324535] Updated weights for policy 0, policy_version 43561 (0.0006) -[2026-06-07 02:32:01,010][321787] Fps is (10 sec: 16384.2, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 22347776. Throughput: 0: 17561.6. Samples: 22369536. Policy #0 lag: (min: 50.0, avg: 65.2, max: 114.0) -[2026-06-07 02:32:01,011][321787] Avg episode reward: [(0, '2163.624')] -[2026-06-07 02:32:01,048][324535] Updated weights for policy 0, policy_version 43571 (0.0006) -[2026-06-07 02:32:01,266][324535] Updated weights for policy 0, policy_version 43581 (0.0008) -[2026-06-07 02:32:01,471][324535] Updated weights for policy 0, policy_version 43591 (0.0008) -[2026-06-07 02:32:01,657][324535] Updated weights for policy 0, policy_version 43601 (0.0007) -[2026-06-07 02:32:01,853][324535] Updated weights for policy 0, policy_version 43611 (0.0007) -[2026-06-07 02:32:02,063][324535] Updated weights for policy 0, policy_version 43621 (0.0007) -[2026-06-07 02:32:02,134][324273] Saving new best policy, reward=2163.624! -[2026-06-07 02:32:02,865][324535] Updated weights for policy 0, policy_version 43631 (0.0007) -[2026-06-07 02:32:03,095][324535] Updated weights for policy 0, policy_version 43642 (0.0007) -[2026-06-07 02:32:03,295][324535] Updated weights for policy 0, policy_version 43652 (0.0007) -[2026-06-07 02:32:03,512][324535] Updated weights for policy 0, policy_version 43662 (0.0007) -[2026-06-07 02:32:03,716][324535] Updated weights for policy 0, policy_version 43672 (0.0007) -[2026-06-07 02:32:03,919][324535] Updated weights for policy 0, policy_version 43682 (0.0007) -[2026-06-07 02:32:04,698][324535] Updated weights for policy 0, policy_version 43692 (0.0007) -[2026-06-07 02:32:04,930][324535] Updated weights for policy 0, policy_version 43702 (0.0007) -[2026-06-07 02:32:05,171][324535] Updated weights for policy 0, policy_version 43713 (0.0007) -[2026-06-07 02:32:05,383][324535] Updated weights for policy 0, policy_version 43723 (0.0006) -[2026-06-07 02:32:05,592][324535] Updated weights for policy 0, policy_version 43733 (0.0007) -[2026-06-07 02:32:05,796][324535] Updated weights for policy 0, policy_version 43743 (0.0007) -[2026-06-07 02:32:06,010][321787] Fps is (10 sec: 19660.2, 60 sec: 17476.2, 300 sec: 17550.3). Total num frames: 22446080. Throughput: 0: 17621.3. Samples: 22428544. Policy #0 lag: (min: 50.0, avg: 65.2, max: 114.0) -[2026-06-07 02:32:06,011][321787] Avg episode reward: [(0, '2145.977')] -[2026-06-07 02:32:06,528][324535] Updated weights for policy 0, policy_version 43753 (0.0007) -[2026-06-07 02:32:06,760][324535] Updated weights for policy 0, policy_version 43765 (0.0007) -[2026-06-07 02:32:06,977][324535] Updated weights for policy 0, policy_version 43775 (0.0007) -[2026-06-07 02:32:07,191][324535] Updated weights for policy 0, policy_version 43785 (0.0007) -[2026-06-07 02:32:07,383][324535] Updated weights for policy 0, policy_version 43795 (0.0007) -[2026-06-07 02:32:07,609][324535] Updated weights for policy 0, policy_version 43805 (0.0007) -[2026-06-07 02:32:07,814][324535] Updated weights for policy 0, policy_version 43815 (0.0007) -[2026-06-07 02:32:08,570][324535] Updated weights for policy 0, policy_version 43825 (0.0007) -[2026-06-07 02:32:08,776][324535] Updated weights for policy 0, policy_version 43835 (0.0007) -[2026-06-07 02:32:09,003][324535] Updated weights for policy 0, policy_version 43845 (0.0007) -[2026-06-07 02:32:09,219][324535] Updated weights for policy 0, policy_version 43855 (0.0007) -[2026-06-07 02:32:09,462][324535] Updated weights for policy 0, policy_version 43866 (0.0007) -[2026-06-07 02:32:09,667][324535] Updated weights for policy 0, policy_version 43876 (0.0008) -[2026-06-07 02:32:10,441][324535] Updated weights for policy 0, policy_version 43886 (0.0009) -[2026-06-07 02:32:10,665][324535] Updated weights for policy 0, policy_version 43896 (0.0011) -[2026-06-07 02:32:10,882][324535] Updated weights for policy 0, policy_version 43906 (0.0010) -[2026-06-07 02:32:11,010][321787] Fps is (10 sec: 16383.9, 60 sec: 17476.2, 300 sec: 17439.3). Total num frames: 22511616. Throughput: 0: 17661.1. Samples: 22532480. Policy #0 lag: (min: 22.0, avg: 37.2, max: 86.0) -[2026-06-07 02:32:11,011][321787] Avg episode reward: [(0, '2196.882')] -[2026-06-07 02:32:11,083][324535] Updated weights for policy 0, policy_version 43917 (0.0012) -[2026-06-07 02:32:11,284][324535] Updated weights for policy 0, policy_version 43927 (0.0011) -[2026-06-07 02:32:11,487][324535] Updated weights for policy 0, policy_version 43937 (0.0010) -[2026-06-07 02:32:11,623][324273] Saving new best policy, reward=2196.882! -[2026-06-07 02:32:12,241][324535] Updated weights for policy 0, policy_version 43947 (0.0009) -[2026-06-07 02:32:12,441][324535] Updated weights for policy 0, policy_version 43957 (0.0007) -[2026-06-07 02:32:12,641][324535] Updated weights for policy 0, policy_version 43967 (0.0007) -[2026-06-07 02:32:12,845][324535] Updated weights for policy 0, policy_version 43977 (0.0007) -[2026-06-07 02:32:13,050][324535] Updated weights for policy 0, policy_version 43987 (0.0009) -[2026-06-07 02:32:13,260][324535] Updated weights for policy 0, policy_version 43997 (0.0007) -[2026-06-07 02:32:13,467][324535] Updated weights for policy 0, policy_version 44007 (0.0007) -[2026-06-07 02:32:14,226][324535] Updated weights for policy 0, policy_version 44017 (0.0007) -[2026-06-07 02:32:14,414][324535] Updated weights for policy 0, policy_version 44027 (0.0007) -[2026-06-07 02:32:14,597][324535] Updated weights for policy 0, policy_version 44037 (0.0007) -[2026-06-07 02:32:14,789][324535] Updated weights for policy 0, policy_version 44047 (0.0008) -[2026-06-07 02:32:14,995][324535] Updated weights for policy 0, policy_version 44057 (0.0007) -[2026-06-07 02:32:15,217][324535] Updated weights for policy 0, policy_version 44068 (0.0007) -[2026-06-07 02:32:15,915][324535] Updated weights for policy 0, policy_version 44078 (0.0007) -[2026-06-07 02:32:16,010][321787] Fps is (10 sec: 16384.4, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 22609920. Throughput: 0: 17666.9. Samples: 22635648. Policy #0 lag: (min: 22.0, avg: 37.2, max: 86.0) -[2026-06-07 02:32:16,011][321787] Avg episode reward: [(0, '2211.765')] -[2026-06-07 02:32:16,125][324535] Updated weights for policy 0, policy_version 44088 (0.0007) -[2026-06-07 02:32:16,312][324535] Updated weights for policy 0, policy_version 44098 (0.0007) -[2026-06-07 02:32:16,518][324535] Updated weights for policy 0, policy_version 44108 (0.0007) -[2026-06-07 02:32:16,757][324535] Updated weights for policy 0, policy_version 44120 (0.0007) -[2026-06-07 02:32:17,000][324535] Updated weights for policy 0, policy_version 44132 (0.0007) -[2026-06-07 02:32:17,070][324273] Saving new best policy, reward=2211.765! -[2026-06-07 02:32:17,745][324535] Updated weights for policy 0, policy_version 44142 (0.0007) -[2026-06-07 02:32:17,940][324535] Updated weights for policy 0, policy_version 44152 (0.0007) -[2026-06-07 02:32:18,136][324535] Updated weights for policy 0, policy_version 44163 (0.0008) -[2026-06-07 02:32:18,315][324535] Updated weights for policy 0, policy_version 44173 (0.0007) -[2026-06-07 02:32:18,528][324535] Updated weights for policy 0, policy_version 44183 (0.0007) -[2026-06-07 02:32:18,734][324535] Updated weights for policy 0, policy_version 44193 (0.0007) -[2026-06-07 02:32:19,450][324535] Updated weights for policy 0, policy_version 44203 (0.0007) -[2026-06-07 02:32:19,663][324535] Updated weights for policy 0, policy_version 44213 (0.0007) -[2026-06-07 02:32:19,852][324535] Updated weights for policy 0, policy_version 44223 (0.0007) -[2026-06-07 02:32:20,053][324535] Updated weights for policy 0, policy_version 44233 (0.0007) -[2026-06-07 02:32:20,245][324535] Updated weights for policy 0, policy_version 44243 (0.0007) -[2026-06-07 02:32:20,445][324535] Updated weights for policy 0, policy_version 44253 (0.0007) -[2026-06-07 02:32:20,647][324535] Updated weights for policy 0, policy_version 44263 (0.0007) -[2026-06-07 02:32:21,011][321787] Fps is (10 sec: 19659.9, 60 sec: 17476.2, 300 sec: 17550.3). Total num frames: 22708224. Throughput: 0: 17712.1. Samples: 22694528. Policy #0 lag: (min: 22.0, avg: 37.2, max: 86.0) -[2026-06-07 02:32:21,013][321787] Avg episode reward: [(0, '2221.010')] -[2026-06-07 02:32:21,020][324273] Saving new best policy, reward=2221.010! -[2026-06-07 02:32:21,396][324535] Updated weights for policy 0, policy_version 44273 (0.0007) -[2026-06-07 02:32:21,603][324535] Updated weights for policy 0, policy_version 44283 (0.0007) -[2026-06-07 02:32:21,807][324535] Updated weights for policy 0, policy_version 44293 (0.0007) -[2026-06-07 02:32:22,021][324535] Updated weights for policy 0, policy_version 44303 (0.0007) -[2026-06-07 02:32:22,246][324535] Updated weights for policy 0, policy_version 44314 (0.0007) -[2026-06-07 02:32:22,462][324535] Updated weights for policy 0, policy_version 44324 (0.0007) -[2026-06-07 02:32:23,193][324535] Updated weights for policy 0, policy_version 44334 (0.0007) -[2026-06-07 02:32:23,397][324535] Updated weights for policy 0, policy_version 44344 (0.0007) -[2026-06-07 02:32:23,608][324535] Updated weights for policy 0, policy_version 44355 (0.0007) -[2026-06-07 02:32:23,829][324535] Updated weights for policy 0, policy_version 44365 (0.0007) -[2026-06-07 02:32:24,043][324535] Updated weights for policy 0, policy_version 44375 (0.0007) -[2026-06-07 02:32:24,255][324535] Updated weights for policy 0, policy_version 44385 (0.0007) -[2026-06-07 02:32:25,017][324535] Updated weights for policy 0, policy_version 44395 (0.0007) -[2026-06-07 02:32:25,214][324535] Updated weights for policy 0, policy_version 44405 (0.0007) -[2026-06-07 02:32:25,425][324535] Updated weights for policy 0, policy_version 44415 (0.0007) -[2026-06-07 02:32:25,629][324535] Updated weights for policy 0, policy_version 44425 (0.0007) -[2026-06-07 02:32:25,812][324535] Updated weights for policy 0, policy_version 44435 (0.0007) -[2026-06-07 02:32:26,009][324535] Updated weights for policy 0, policy_version 44445 (0.0007) -[2026-06-07 02:32:26,010][321787] Fps is (10 sec: 16384.1, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 22773760. Throughput: 0: 17723.8. Samples: 22796928. Policy #0 lag: (min: 22.0, avg: 37.2, max: 86.0) -[2026-06-07 02:32:26,011][321787] Avg episode reward: [(0, '2220.078')] -[2026-06-07 02:32:26,205][324535] Updated weights for policy 0, policy_version 44455 (0.0007) -[2026-06-07 02:32:26,956][324535] Updated weights for policy 0, policy_version 44465 (0.0007) -[2026-06-07 02:32:27,158][324535] Updated weights for policy 0, policy_version 44475 (0.0007) -[2026-06-07 02:32:27,365][324535] Updated weights for policy 0, policy_version 44485 (0.0007) -[2026-06-07 02:32:27,556][324535] Updated weights for policy 0, policy_version 44495 (0.0007) -[2026-06-07 02:32:27,785][324535] Updated weights for policy 0, policy_version 44505 (0.0007) -[2026-06-07 02:32:27,996][324535] Updated weights for policy 0, policy_version 44515 (0.0007) -[2026-06-07 02:32:28,744][324535] Updated weights for policy 0, policy_version 44525 (0.0007) -[2026-06-07 02:32:28,939][324535] Updated weights for policy 0, policy_version 44535 (0.0007) -[2026-06-07 02:32:29,156][324535] Updated weights for policy 0, policy_version 44545 (0.0007) -[2026-06-07 02:32:29,357][324535] Updated weights for policy 0, policy_version 44555 (0.0007) -[2026-06-07 02:32:29,548][324535] Updated weights for policy 0, policy_version 44565 (0.0007) -[2026-06-07 02:32:29,756][324535] Updated weights for policy 0, policy_version 44575 (0.0007) -[2026-06-07 02:32:30,515][324535] Updated weights for policy 0, policy_version 44585 (0.0007) -[2026-06-07 02:32:30,727][324535] Updated weights for policy 0, policy_version 44595 (0.0007) -[2026-06-07 02:32:30,920][324535] Updated weights for policy 0, policy_version 44605 (0.0007) -[2026-06-07 02:32:31,010][321787] Fps is (10 sec: 16384.9, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 22872064. Throughput: 0: 17866.0. Samples: 22906880. Policy #0 lag: (min: 22.0, avg: 37.2, max: 86.0) -[2026-06-07 02:32:31,011][321787] Avg episode reward: [(0, '2224.718')] -[2026-06-07 02:32:31,126][324535] Updated weights for policy 0, policy_version 44615 (0.0007) -[2026-06-07 02:32:31,324][324535] Updated weights for policy 0, policy_version 44625 (0.0007) -[2026-06-07 02:32:31,508][324535] Updated weights for policy 0, policy_version 44635 (0.0007) -[2026-06-07 02:32:31,735][324535] Updated weights for policy 0, policy_version 44645 (0.0007) -[2026-06-07 02:32:31,794][324273] Saving new best policy, reward=2224.718! -[2026-06-07 02:32:32,451][324535] Updated weights for policy 0, policy_version 44655 (0.0007) -[2026-06-07 02:32:32,643][324535] Updated weights for policy 0, policy_version 44665 (0.0007) -[2026-06-07 02:32:32,861][324535] Updated weights for policy 0, policy_version 44675 (0.0007) -[2026-06-07 02:32:33,078][324535] Updated weights for policy 0, policy_version 44685 (0.0007) -[2026-06-07 02:32:33,292][324535] Updated weights for policy 0, policy_version 44695 (0.0007) -[2026-06-07 02:32:33,491][324535] Updated weights for policy 0, policy_version 44705 (0.0007) -[2026-06-07 02:32:34,219][324535] Updated weights for policy 0, policy_version 44715 (0.0007) -[2026-06-07 02:32:34,435][324535] Updated weights for policy 0, policy_version 44726 (0.0007) -[2026-06-07 02:32:34,666][324535] Updated weights for policy 0, policy_version 44737 (0.0007) -[2026-06-07 02:32:34,886][324535] Updated weights for policy 0, policy_version 44747 (0.0007) -[2026-06-07 02:32:35,097][324535] Updated weights for policy 0, policy_version 44757 (0.0007) -[2026-06-07 02:32:35,296][324535] Updated weights for policy 0, policy_version 44767 (0.0007) -[2026-06-07 02:32:36,010][321787] Fps is (10 sec: 19660.8, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 22970368. Throughput: 0: 17706.7. Samples: 22958336. Policy #0 lag: (min: 22.0, avg: 37.2, max: 86.0) -[2026-06-07 02:32:36,011][321787] Avg episode reward: [(0, '2216.582')] -[2026-06-07 02:32:36,069][324535] Updated weights for policy 0, policy_version 44778 (0.0007) -[2026-06-07 02:32:36,271][324535] Updated weights for policy 0, policy_version 44788 (0.0007) -[2026-06-07 02:32:36,491][324535] Updated weights for policy 0, policy_version 44798 (0.0007) -[2026-06-07 02:32:36,699][324535] Updated weights for policy 0, policy_version 44808 (0.0007) -[2026-06-07 02:32:36,912][324535] Updated weights for policy 0, policy_version 44818 (0.0007) -[2026-06-07 02:32:37,110][324535] Updated weights for policy 0, policy_version 44828 (0.0007) -[2026-06-07 02:32:37,311][324535] Updated weights for policy 0, policy_version 44838 (0.0007) -[2026-06-07 02:32:38,062][324535] Updated weights for policy 0, policy_version 44848 (0.0007) -[2026-06-07 02:32:38,279][324535] Updated weights for policy 0, policy_version 44858 (0.0007) -[2026-06-07 02:32:38,489][324535] Updated weights for policy 0, policy_version 44868 (0.0007) -[2026-06-07 02:32:38,690][324535] Updated weights for policy 0, policy_version 44878 (0.0007) -[2026-06-07 02:32:38,904][324535] Updated weights for policy 0, policy_version 44888 (0.0007) -[2026-06-07 02:32:39,106][324535] Updated weights for policy 0, policy_version 44898 (0.0007) -[2026-06-07 02:32:39,824][324535] Updated weights for policy 0, policy_version 44908 (0.0007) -[2026-06-07 02:32:40,045][324535] Updated weights for policy 0, policy_version 44918 (0.0007) -[2026-06-07 02:32:40,261][324535] Updated weights for policy 0, policy_version 44928 (0.0007) -[2026-06-07 02:32:40,471][324535] Updated weights for policy 0, policy_version 44938 (0.0007) -[2026-06-07 02:32:40,662][324535] Updated weights for policy 0, policy_version 44948 (0.0007) -[2026-06-07 02:32:40,866][324535] Updated weights for policy 0, policy_version 44958 (0.0007) -[2026-06-07 02:32:41,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 23035904. Throughput: 0: 17683.9. Samples: 23061504. Policy #0 lag: (min: 63.0, avg: 78.5, max: 127.0) -[2026-06-07 02:32:41,011][321787] Avg episode reward: [(0, '2184.870')] -[2026-06-07 02:32:41,076][324535] Updated weights for policy 0, policy_version 44968 (0.0007) -[2026-06-07 02:32:41,831][324535] Updated weights for policy 0, policy_version 44978 (0.0007) -[2026-06-07 02:32:42,037][324535] Updated weights for policy 0, policy_version 44988 (0.0007) -[2026-06-07 02:32:42,225][324535] Updated weights for policy 0, policy_version 44998 (0.0007) -[2026-06-07 02:32:42,448][324535] Updated weights for policy 0, policy_version 45009 (0.0007) -[2026-06-07 02:32:42,649][324535] Updated weights for policy 0, policy_version 45019 (0.0007) -[2026-06-07 02:32:42,852][324535] Updated weights for policy 0, policy_version 45029 (0.0007) -[2026-06-07 02:32:43,590][324535] Updated weights for policy 0, policy_version 45039 (0.0007) -[2026-06-07 02:32:43,801][324535] Updated weights for policy 0, policy_version 45049 (0.0007) -[2026-06-07 02:32:44,004][324535] Updated weights for policy 0, policy_version 45059 (0.0007) -[2026-06-07 02:32:44,207][324535] Updated weights for policy 0, policy_version 45069 (0.0007) -[2026-06-07 02:32:44,418][324535] Updated weights for policy 0, policy_version 45079 (0.0007) -[2026-06-07 02:32:44,641][324535] Updated weights for policy 0, policy_version 45089 (0.0007) -[2026-06-07 02:32:45,410][324535] Updated weights for policy 0, policy_version 45099 (0.0007) -[2026-06-07 02:32:45,613][324535] Updated weights for policy 0, policy_version 45109 (0.0007) -[2026-06-07 02:32:45,817][324535] Updated weights for policy 0, policy_version 45119 (0.0007) -[2026-06-07 02:32:46,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 23134208. Throughput: 0: 17897.3. Samples: 23174912. Policy #0 lag: (min: 63.0, avg: 78.5, max: 127.0) -[2026-06-07 02:32:46,011][321787] Avg episode reward: [(0, '2212.426')] -[2026-06-07 02:32:46,035][324535] Updated weights for policy 0, policy_version 45129 (0.0007) -[2026-06-07 02:32:46,254][324535] Updated weights for policy 0, policy_version 45139 (0.0007) -[2026-06-07 02:32:46,473][324535] Updated weights for policy 0, policy_version 45149 (0.0007) -[2026-06-07 02:32:46,672][324535] Updated weights for policy 0, policy_version 45159 (0.0007) -[2026-06-07 02:32:47,392][324535] Updated weights for policy 0, policy_version 45169 (0.0007) -[2026-06-07 02:32:47,613][324535] Updated weights for policy 0, policy_version 45179 (0.0007) -[2026-06-07 02:32:47,826][324535] Updated weights for policy 0, policy_version 45189 (0.0007) -[2026-06-07 02:32:48,034][324535] Updated weights for policy 0, policy_version 45199 (0.0007) -[2026-06-07 02:32:48,229][324535] Updated weights for policy 0, policy_version 45209 (0.0007) -[2026-06-07 02:32:48,420][324535] Updated weights for policy 0, policy_version 45219 (0.0007) -[2026-06-07 02:32:49,162][324535] Updated weights for policy 0, policy_version 45229 (0.0007) -[2026-06-07 02:32:49,371][324535] Updated weights for policy 0, policy_version 45239 (0.0007) -[2026-06-07 02:32:49,590][324535] Updated weights for policy 0, policy_version 45250 (0.0007) -[2026-06-07 02:32:49,802][324535] Updated weights for policy 0, policy_version 45260 (0.0007) -[2026-06-07 02:32:50,002][324535] Updated weights for policy 0, policy_version 45270 (0.0007) -[2026-06-07 02:32:50,196][324535] Updated weights for policy 0, policy_version 45280 (0.0007) -[2026-06-07 02:32:50,970][324535] Updated weights for policy 0, policy_version 45291 (0.0007) -[2026-06-07 02:32:51,010][321787] Fps is (10 sec: 19660.7, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 23232512. Throughput: 0: 17612.9. Samples: 23221120. Policy #0 lag: (min: 63.0, avg: 78.5, max: 127.0) -[2026-06-07 02:32:51,011][321787] Avg episode reward: [(0, '2213.173')] -[2026-06-07 02:32:51,171][324535] Updated weights for policy 0, policy_version 45301 (0.0007) -[2026-06-07 02:32:51,385][324535] Updated weights for policy 0, policy_version 45311 (0.0007) -[2026-06-07 02:32:51,576][324535] Updated weights for policy 0, policy_version 45321 (0.0007) -[2026-06-07 02:32:51,787][324535] Updated weights for policy 0, policy_version 45331 (0.0007) -[2026-06-07 02:32:51,993][324535] Updated weights for policy 0, policy_version 45341 (0.0007) -[2026-06-07 02:32:52,193][324535] Updated weights for policy 0, policy_version 45351 (0.0007) -[2026-06-07 02:32:52,927][324535] Updated weights for policy 0, policy_version 45361 (0.0007) -[2026-06-07 02:32:53,144][324535] Updated weights for policy 0, policy_version 45371 (0.0007) -[2026-06-07 02:32:53,354][324535] Updated weights for policy 0, policy_version 45381 (0.0007) -[2026-06-07 02:32:53,603][324535] Updated weights for policy 0, policy_version 45392 (0.0008) -[2026-06-07 02:32:53,793][324535] Updated weights for policy 0, policy_version 45402 (0.0007) -[2026-06-07 02:32:53,998][324535] Updated weights for policy 0, policy_version 45412 (0.0007) -[2026-06-07 02:32:54,748][324535] Updated weights for policy 0, policy_version 45422 (0.0007) -[2026-06-07 02:32:54,981][324535] Updated weights for policy 0, policy_version 45433 (0.0007) -[2026-06-07 02:32:55,173][324535] Updated weights for policy 0, policy_version 45443 (0.0008) -[2026-06-07 02:32:55,370][324535] Updated weights for policy 0, policy_version 45453 (0.0007) -[2026-06-07 02:32:55,571][324535] Updated weights for policy 0, policy_version 45463 (0.0008) -[2026-06-07 02:32:55,784][324535] Updated weights for policy 0, policy_version 45473 (0.0006) -[2026-06-07 02:32:56,010][321787] Fps is (10 sec: 19660.8, 60 sec: 18022.4, 300 sec: 17550.3). Total num frames: 23330816. Throughput: 0: 17669.7. Samples: 23327616. Policy #0 lag: (min: 63.0, avg: 78.5, max: 127.0) -[2026-06-07 02:32:56,011][321787] Avg episode reward: [(0, '2278.868')] -[2026-06-07 02:32:56,015][324273] Saving new best policy, reward=2278.868! -[2026-06-07 02:32:56,540][324535] Updated weights for policy 0, policy_version 45483 (0.0007) -[2026-06-07 02:32:56,729][324535] Updated weights for policy 0, policy_version 45493 (0.0009) -[2026-06-07 02:32:56,923][324535] Updated weights for policy 0, policy_version 45503 (0.0007) -[2026-06-07 02:32:57,169][324535] Updated weights for policy 0, policy_version 45514 (0.0008) -[2026-06-07 02:32:57,371][324535] Updated weights for policy 0, policy_version 45524 (0.0007) -[2026-06-07 02:32:57,605][324535] Updated weights for policy 0, policy_version 45535 (0.0007) -[2026-06-07 02:32:58,368][324535] Updated weights for policy 0, policy_version 45545 (0.0007) -[2026-06-07 02:32:58,575][324535] Updated weights for policy 0, policy_version 45555 (0.0008) -[2026-06-07 02:32:58,783][324535] Updated weights for policy 0, policy_version 45565 (0.0008) -[2026-06-07 02:32:58,980][324535] Updated weights for policy 0, policy_version 45575 (0.0007) -[2026-06-07 02:32:59,207][324535] Updated weights for policy 0, policy_version 45585 (0.0007) -[2026-06-07 02:32:59,412][324535] Updated weights for policy 0, policy_version 45595 (0.0007) -[2026-06-07 02:32:59,629][324535] Updated weights for policy 0, policy_version 45605 (0.0007) -[2026-06-07 02:33:00,373][324535] Updated weights for policy 0, policy_version 45615 (0.0010) -[2026-06-07 02:33:00,568][324535] Updated weights for policy 0, policy_version 45625 (0.0007) -[2026-06-07 02:33:00,805][324535] Updated weights for policy 0, policy_version 45635 (0.0007) -[2026-06-07 02:33:00,989][324535] Updated weights for policy 0, policy_version 45645 (0.0007) -[2026-06-07 02:33:01,010][321787] Fps is (10 sec: 16384.1, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 23396352. Throughput: 0: 17877.3. Samples: 23440128. Policy #0 lag: (min: 63.0, avg: 78.5, max: 127.0) -[2026-06-07 02:33:01,011][321787] Avg episode reward: [(0, '2300.594')] -[2026-06-07 02:33:01,207][324535] Updated weights for policy 0, policy_version 45655 (0.0007) -[2026-06-07 02:33:01,421][324535] Updated weights for policy 0, policy_version 45665 (0.0007) -[2026-06-07 02:33:01,548][324273] Saving new best policy, reward=2300.594! -[2026-06-07 02:33:02,152][324535] Updated weights for policy 0, policy_version 45675 (0.0007) -[2026-06-07 02:33:02,379][324535] Updated weights for policy 0, policy_version 45685 (0.0008) -[2026-06-07 02:33:02,572][324535] Updated weights for policy 0, policy_version 45695 (0.0008) -[2026-06-07 02:33:02,787][324535] Updated weights for policy 0, policy_version 45705 (0.0007) -[2026-06-07 02:33:02,993][324535] Updated weights for policy 0, policy_version 45715 (0.0007) -[2026-06-07 02:33:03,200][324535] Updated weights for policy 0, policy_version 45725 (0.0007) -[2026-06-07 02:33:03,418][324535] Updated weights for policy 0, policy_version 45735 (0.0007) -[2026-06-07 02:33:04,174][324535] Updated weights for policy 0, policy_version 45745 (0.0007) -[2026-06-07 02:33:04,381][324535] Updated weights for policy 0, policy_version 45755 (0.0007) -[2026-06-07 02:33:04,595][324535] Updated weights for policy 0, policy_version 45765 (0.0007) -[2026-06-07 02:33:04,802][324535] Updated weights for policy 0, policy_version 45775 (0.0007) -[2026-06-07 02:33:05,000][324535] Updated weights for policy 0, policy_version 45785 (0.0007) -[2026-06-07 02:33:05,200][324535] Updated weights for policy 0, policy_version 45795 (0.0007) -[2026-06-07 02:33:05,941][324535] Updated weights for policy 0, policy_version 45805 (0.0007) -[2026-06-07 02:33:06,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.4, 300 sec: 17550.3). Total num frames: 23494656. Throughput: 0: 17567.5. Samples: 23485056. Policy #0 lag: (min: 63.0, avg: 78.5, max: 127.0) -[2026-06-07 02:33:06,011][321787] Avg episode reward: [(0, '2319.447')] -[2026-06-07 02:33:06,134][324535] Updated weights for policy 0, policy_version 45815 (0.0007) -[2026-06-07 02:33:06,356][324535] Updated weights for policy 0, policy_version 45825 (0.0007) -[2026-06-07 02:33:06,566][324535] Updated weights for policy 0, policy_version 45835 (0.0007) -[2026-06-07 02:33:06,777][324535] Updated weights for policy 0, policy_version 45845 (0.0007) -[2026-06-07 02:33:06,990][324535] Updated weights for policy 0, policy_version 45855 (0.0007) -[2026-06-07 02:33:07,164][324273] Saving new best policy, reward=2319.447! -[2026-06-07 02:33:07,719][324535] Updated weights for policy 0, policy_version 45865 (0.0007) -[2026-06-07 02:33:07,954][324535] Updated weights for policy 0, policy_version 45875 (0.0007) -[2026-06-07 02:33:08,154][324535] Updated weights for policy 0, policy_version 45885 (0.0007) -[2026-06-07 02:33:08,356][324535] Updated weights for policy 0, policy_version 45895 (0.0007) -[2026-06-07 02:33:08,569][324535] Updated weights for policy 0, policy_version 45905 (0.0007) -[2026-06-07 02:33:08,750][324535] Updated weights for policy 0, policy_version 45915 (0.0007) -[2026-06-07 02:33:08,965][324535] Updated weights for policy 0, policy_version 45925 (0.0007) -[2026-06-07 02:33:09,726][324535] Updated weights for policy 0, policy_version 45935 (0.0007) -[2026-06-07 02:33:09,911][324535] Updated weights for policy 0, policy_version 45945 (0.0007) -[2026-06-07 02:33:10,128][324535] Updated weights for policy 0, policy_version 45955 (0.0007) -[2026-06-07 02:33:10,338][324535] Updated weights for policy 0, policy_version 45965 (0.0008) -[2026-06-07 02:33:10,546][324535] Updated weights for policy 0, policy_version 45975 (0.0009) -[2026-06-07 02:33:10,752][324535] Updated weights for policy 0, policy_version 45985 (0.0007) -[2026-06-07 02:33:11,010][321787] Fps is (10 sec: 19660.8, 60 sec: 18022.4, 300 sec: 17550.3). Total num frames: 23592960. Throughput: 0: 17683.9. Samples: 23592704. Policy #0 lag: (min: 55.0, avg: 70.8, max: 119.0) -[2026-06-07 02:33:11,011][321787] Avg episode reward: [(0, '2331.792')] -[2026-06-07 02:33:11,014][324273] Saving new best policy, reward=2331.792! -[2026-06-07 02:33:11,490][324535] Updated weights for policy 0, policy_version 45995 (0.0007) -[2026-06-07 02:33:11,730][324535] Updated weights for policy 0, policy_version 46006 (0.0007) -[2026-06-07 02:33:11,951][324535] Updated weights for policy 0, policy_version 46016 (0.0007) -[2026-06-07 02:33:12,154][324535] Updated weights for policy 0, policy_version 46026 (0.0007) -[2026-06-07 02:33:12,354][324535] Updated weights for policy 0, policy_version 46036 (0.0007) -[2026-06-07 02:33:12,558][324535] Updated weights for policy 0, policy_version 46046 (0.0007) -[2026-06-07 02:33:12,754][324535] Updated weights for policy 0, policy_version 46056 (0.0007) -[2026-06-07 02:33:13,478][324535] Updated weights for policy 0, policy_version 46066 (0.0007) -[2026-06-07 02:33:13,678][324535] Updated weights for policy 0, policy_version 46076 (0.0007) -[2026-06-07 02:33:13,900][324535] Updated weights for policy 0, policy_version 46086 (0.0007) -[2026-06-07 02:33:14,103][324535] Updated weights for policy 0, policy_version 46096 (0.0007) -[2026-06-07 02:33:14,308][324535] Updated weights for policy 0, policy_version 46106 (0.0007) -[2026-06-07 02:33:14,516][324535] Updated weights for policy 0, policy_version 46116 (0.0007) -[2026-06-07 02:33:15,266][324535] Updated weights for policy 0, policy_version 46126 (0.0007) -[2026-06-07 02:33:15,497][324535] Updated weights for policy 0, policy_version 46137 (0.0007) -[2026-06-07 02:33:15,754][324535] Updated weights for policy 0, policy_version 46148 (0.0007) -[2026-06-07 02:33:15,981][324535] Updated weights for policy 0, policy_version 46158 (0.0007) -[2026-06-07 02:33:16,010][321787] Fps is (10 sec: 16383.9, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 23658496. Throughput: 0: 17740.8. Samples: 23705216. Policy #0 lag: (min: 55.0, avg: 70.8, max: 119.0) -[2026-06-07 02:33:16,011][321787] Avg episode reward: [(0, '2283.011')] -[2026-06-07 02:33:16,172][324535] Updated weights for policy 0, policy_version 46168 (0.0007) -[2026-06-07 02:33:16,408][324535] Updated weights for policy 0, policy_version 46178 (0.0007) -[2026-06-07 02:33:17,157][324535] Updated weights for policy 0, policy_version 46188 (0.0007) -[2026-06-07 02:33:17,348][324535] Updated weights for policy 0, policy_version 46198 (0.0007) -[2026-06-07 02:33:17,559][324535] Updated weights for policy 0, policy_version 46208 (0.0007) -[2026-06-07 02:33:17,750][324535] Updated weights for policy 0, policy_version 46218 (0.0007) -[2026-06-07 02:33:17,966][324535] Updated weights for policy 0, policy_version 46228 (0.0007) -[2026-06-07 02:33:18,186][324535] Updated weights for policy 0, policy_version 46238 (0.0007) -[2026-06-07 02:33:18,398][324535] Updated weights for policy 0, policy_version 46248 (0.0007) -[2026-06-07 02:33:19,145][324535] Updated weights for policy 0, policy_version 46258 (0.0007) -[2026-06-07 02:33:19,352][324535] Updated weights for policy 0, policy_version 46268 (0.0007) -[2026-06-07 02:33:19,569][324535] Updated weights for policy 0, policy_version 46278 (0.0007) -[2026-06-07 02:33:19,779][324535] Updated weights for policy 0, policy_version 46288 (0.0007) -[2026-06-07 02:33:19,992][324535] Updated weights for policy 0, policy_version 46298 (0.0007) -[2026-06-07 02:33:20,207][324535] Updated weights for policy 0, policy_version 46309 (0.0007) -[2026-06-07 02:33:20,998][324535] Updated weights for policy 0, policy_version 46319 (0.0007) -[2026-06-07 02:33:21,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.4, 300 sec: 17550.3). Total num frames: 23756800. Throughput: 0: 17595.7. Samples: 23750144. Policy #0 lag: (min: 55.0, avg: 70.8, max: 119.0) -[2026-06-07 02:33:21,011][321787] Avg episode reward: [(0, '2282.428')] -[2026-06-07 02:33:21,215][324535] Updated weights for policy 0, policy_version 46329 (0.0007) -[2026-06-07 02:33:21,440][324535] Updated weights for policy 0, policy_version 46340 (0.0007) -[2026-06-07 02:33:21,649][324535] Updated weights for policy 0, policy_version 46350 (0.0007) -[2026-06-07 02:33:21,853][324535] Updated weights for policy 0, policy_version 46360 (0.0007) -[2026-06-07 02:33:22,054][324535] Updated weights for policy 0, policy_version 46370 (0.0007) -[2026-06-07 02:33:22,758][324535] Updated weights for policy 0, policy_version 46380 (0.0008) -[2026-06-07 02:33:22,970][324535] Updated weights for policy 0, policy_version 46390 (0.0011) -[2026-06-07 02:33:23,181][324535] Updated weights for policy 0, policy_version 46400 (0.0008) -[2026-06-07 02:33:23,387][324535] Updated weights for policy 0, policy_version 46410 (0.0008) -[2026-06-07 02:33:23,590][324535] Updated weights for policy 0, policy_version 46420 (0.0011) -[2026-06-07 02:33:23,794][324535] Updated weights for policy 0, policy_version 46430 (0.0011) -[2026-06-07 02:33:23,996][324535] Updated weights for policy 0, policy_version 46440 (0.0007) -[2026-06-07 02:33:24,726][324535] Updated weights for policy 0, policy_version 46450 (0.0007) -[2026-06-07 02:33:24,939][324535] Updated weights for policy 0, policy_version 46460 (0.0007) -[2026-06-07 02:33:25,153][324535] Updated weights for policy 0, policy_version 46470 (0.0007) -[2026-06-07 02:33:25,351][324535] Updated weights for policy 0, policy_version 46480 (0.0007) -[2026-06-07 02:33:25,576][324535] Updated weights for policy 0, policy_version 46491 (0.0007) -[2026-06-07 02:33:25,775][324535] Updated weights for policy 0, policy_version 46501 (0.0007) -[2026-06-07 02:33:26,010][321787] Fps is (10 sec: 19660.8, 60 sec: 18022.4, 300 sec: 17550.3). Total num frames: 23855104. Throughput: 0: 17718.1. Samples: 23858816. Policy #0 lag: (min: 55.0, avg: 70.8, max: 119.0) -[2026-06-07 02:33:26,011][321787] Avg episode reward: [(0, '2253.073')] -[2026-06-07 02:33:26,521][324535] Updated weights for policy 0, policy_version 46511 (0.0007) -[2026-06-07 02:33:26,750][324535] Updated weights for policy 0, policy_version 46521 (0.0007) -[2026-06-07 02:33:26,950][324535] Updated weights for policy 0, policy_version 46531 (0.0007) -[2026-06-07 02:33:27,153][324535] Updated weights for policy 0, policy_version 46541 (0.0007) -[2026-06-07 02:33:27,356][324535] Updated weights for policy 0, policy_version 46551 (0.0007) -[2026-06-07 02:33:27,580][324535] Updated weights for policy 0, policy_version 46561 (0.0007) -[2026-06-07 02:33:28,342][324535] Updated weights for policy 0, policy_version 46571 (0.0007) -[2026-06-07 02:33:28,542][324535] Updated weights for policy 0, policy_version 46581 (0.0007) -[2026-06-07 02:33:28,759][324535] Updated weights for policy 0, policy_version 46591 (0.0007) -[2026-06-07 02:33:28,980][324535] Updated weights for policy 0, policy_version 46601 (0.0008) -[2026-06-07 02:33:29,198][324535] Updated weights for policy 0, policy_version 46611 (0.0010) -[2026-06-07 02:33:29,410][324535] Updated weights for policy 0, policy_version 46622 (0.0011) -[2026-06-07 02:33:29,607][324535] Updated weights for policy 0, policy_version 46632 (0.0012) -[2026-06-07 02:33:30,366][324535] Updated weights for policy 0, policy_version 46642 (0.0007) -[2026-06-07 02:33:30,572][324535] Updated weights for policy 0, policy_version 46652 (0.0011) -[2026-06-07 02:33:30,803][324535] Updated weights for policy 0, policy_version 46663 (0.0011) -[2026-06-07 02:33:31,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 23920640. Throughput: 0: 17644.1. Samples: 23968896. Policy #0 lag: (min: 55.0, avg: 70.8, max: 119.0) -[2026-06-07 02:33:31,010][324535] Updated weights for policy 0, policy_version 46673 (0.0011) -[2026-06-07 02:33:31,011][321787] Avg episode reward: [(0, '2222.345')] -[2026-06-07 02:33:31,216][324535] Updated weights for policy 0, policy_version 46683 (0.0011) -[2026-06-07 02:33:31,440][324535] Updated weights for policy 0, policy_version 46693 (0.0011) -[2026-06-07 02:33:32,224][324535] Updated weights for policy 0, policy_version 46703 (0.0009) -[2026-06-07 02:33:32,436][324535] Updated weights for policy 0, policy_version 46713 (0.0010) -[2026-06-07 02:33:32,660][324535] Updated weights for policy 0, policy_version 46723 (0.0011) -[2026-06-07 02:33:32,874][324535] Updated weights for policy 0, policy_version 46733 (0.0010) -[2026-06-07 02:33:33,091][324535] Updated weights for policy 0, policy_version 46743 (0.0010) -[2026-06-07 02:33:33,297][324535] Updated weights for policy 0, policy_version 46753 (0.0007) -[2026-06-07 02:33:34,015][324535] Updated weights for policy 0, policy_version 46763 (0.0007) -[2026-06-07 02:33:34,237][324535] Updated weights for policy 0, policy_version 46773 (0.0007) -[2026-06-07 02:33:34,481][324535] Updated weights for policy 0, policy_version 46784 (0.0007) -[2026-06-07 02:33:34,683][324535] Updated weights for policy 0, policy_version 46794 (0.0007) -[2026-06-07 02:33:34,908][324535] Updated weights for policy 0, policy_version 46804 (0.0007) -[2026-06-07 02:33:35,121][324535] Updated weights for policy 0, policy_version 46814 (0.0007) -[2026-06-07 02:33:35,329][324535] Updated weights for policy 0, policy_version 46824 (0.0007) -[2026-06-07 02:33:36,010][321787] Fps is (10 sec: 16383.9, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 24018944. Throughput: 0: 17644.1. Samples: 24015104. Policy #0 lag: (min: 55.0, avg: 70.8, max: 119.0) -[2026-06-07 02:33:36,011][321787] Avg episode reward: [(0, '2214.816')] -[2026-06-07 02:33:36,076][324535] Updated weights for policy 0, policy_version 46834 (0.0007) -[2026-06-07 02:33:36,280][324535] Updated weights for policy 0, policy_version 46844 (0.0007) -[2026-06-07 02:33:36,480][324535] Updated weights for policy 0, policy_version 46854 (0.0007) -[2026-06-07 02:33:36,686][324535] Updated weights for policy 0, policy_version 46864 (0.0007) -[2026-06-07 02:33:36,909][324535] Updated weights for policy 0, policy_version 46874 (0.0007) -[2026-06-07 02:33:37,129][324535] Updated weights for policy 0, policy_version 46884 (0.0007) -[2026-06-07 02:33:37,849][324535] Updated weights for policy 0, policy_version 46894 (0.0007) -[2026-06-07 02:33:38,062][324535] Updated weights for policy 0, policy_version 46904 (0.0007) -[2026-06-07 02:33:38,271][324535] Updated weights for policy 0, policy_version 46914 (0.0007) -[2026-06-07 02:33:38,488][324535] Updated weights for policy 0, policy_version 46924 (0.0007) -[2026-06-07 02:33:38,697][324535] Updated weights for policy 0, policy_version 46934 (0.0007) -[2026-06-07 02:33:38,910][324535] Updated weights for policy 0, policy_version 46944 (0.0007) -[2026-06-07 02:33:39,589][324535] Updated weights for policy 0, policy_version 46954 (0.0007) -[2026-06-07 02:33:39,798][324535] Updated weights for policy 0, policy_version 46964 (0.0007) -[2026-06-07 02:33:40,007][324535] Updated weights for policy 0, policy_version 46974 (0.0007) -[2026-06-07 02:33:40,205][324535] Updated weights for policy 0, policy_version 46984 (0.0007) -[2026-06-07 02:33:40,410][324535] Updated weights for policy 0, policy_version 46994 (0.0007) -[2026-06-07 02:33:40,621][324535] Updated weights for policy 0, policy_version 47004 (0.0007) -[2026-06-07 02:33:40,836][324535] Updated weights for policy 0, policy_version 47014 (0.0007) -[2026-06-07 02:33:41,010][321787] Fps is (10 sec: 19660.8, 60 sec: 18022.4, 300 sec: 17550.3). Total num frames: 24117248. Throughput: 0: 17661.2. Samples: 24122368. Policy #0 lag: (min: 55.0, avg: 70.8, max: 119.0) -[2026-06-07 02:33:41,011][321787] Avg episode reward: [(0, '2212.183')] -[2026-06-07 02:33:41,572][324535] Updated weights for policy 0, policy_version 47024 (0.0008) -[2026-06-07 02:33:41,771][324535] Updated weights for policy 0, policy_version 47034 (0.0007) -[2026-06-07 02:33:41,987][324535] Updated weights for policy 0, policy_version 47044 (0.0008) -[2026-06-07 02:33:42,213][324535] Updated weights for policy 0, policy_version 47055 (0.0007) -[2026-06-07 02:33:42,433][324535] Updated weights for policy 0, policy_version 47065 (0.0007) -[2026-06-07 02:33:42,660][324535] Updated weights for policy 0, policy_version 47075 (0.0008) -[2026-06-07 02:33:43,373][324535] Updated weights for policy 0, policy_version 47085 (0.0010) -[2026-06-07 02:33:43,582][324535] Updated weights for policy 0, policy_version 47095 (0.0011) -[2026-06-07 02:33:43,803][324535] Updated weights for policy 0, policy_version 47105 (0.0011) -[2026-06-07 02:33:43,999][324535] Updated weights for policy 0, policy_version 47115 (0.0011) -[2026-06-07 02:33:44,220][324535] Updated weights for policy 0, policy_version 47125 (0.0011) -[2026-06-07 02:33:44,406][324535] Updated weights for policy 0, policy_version 47135 (0.0011) -[2026-06-07 02:33:45,151][324535] Updated weights for policy 0, policy_version 47145 (0.0011) -[2026-06-07 02:33:45,362][324535] Updated weights for policy 0, policy_version 47155 (0.0008) -[2026-06-07 02:33:45,551][324535] Updated weights for policy 0, policy_version 47165 (0.0007) -[2026-06-07 02:33:45,768][324535] Updated weights for policy 0, policy_version 47175 (0.0010) -[2026-06-07 02:33:45,972][324535] Updated weights for policy 0, policy_version 47185 (0.0011) -[2026-06-07 02:33:46,010][321787] Fps is (10 sec: 16384.1, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 24182784. Throughput: 0: 17624.2. Samples: 24233216. Policy #0 lag: (min: 63.0, avg: 78.8, max: 127.0) -[2026-06-07 02:33:46,011][321787] Avg episode reward: [(0, '2208.738')] -[2026-06-07 02:33:46,178][324535] Updated weights for policy 0, policy_version 47195 (0.0011) -[2026-06-07 02:33:46,374][324535] Updated weights for policy 0, policy_version 47205 (0.0011) -[2026-06-07 02:33:47,118][324535] Updated weights for policy 0, policy_version 47215 (0.0008) -[2026-06-07 02:33:47,296][324535] Updated weights for policy 0, policy_version 47225 (0.0007) -[2026-06-07 02:33:47,505][324535] Updated weights for policy 0, policy_version 47235 (0.0007) -[2026-06-07 02:33:47,719][324535] Updated weights for policy 0, policy_version 47245 (0.0007) -[2026-06-07 02:33:47,920][324535] Updated weights for policy 0, policy_version 47255 (0.0009) -[2026-06-07 02:33:48,122][324535] Updated weights for policy 0, policy_version 47265 (0.0007) -[2026-06-07 02:33:48,860][324535] Updated weights for policy 0, policy_version 47275 (0.0007) -[2026-06-07 02:33:49,046][324535] Updated weights for policy 0, policy_version 47285 (0.0007) -[2026-06-07 02:33:49,250][324535] Updated weights for policy 0, policy_version 47295 (0.0010) -[2026-06-07 02:33:49,470][324535] Updated weights for policy 0, policy_version 47305 (0.0011) -[2026-06-07 02:33:49,674][324535] Updated weights for policy 0, policy_version 47315 (0.0009) -[2026-06-07 02:33:49,871][324535] Updated weights for policy 0, policy_version 47325 (0.0007) -[2026-06-07 02:33:50,085][324535] Updated weights for policy 0, policy_version 47335 (0.0007) -[2026-06-07 02:33:50,814][324535] Updated weights for policy 0, policy_version 47345 (0.0007) -[2026-06-07 02:33:51,010][321787] Fps is (10 sec: 16383.9, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 24281088. Throughput: 0: 17638.4. Samples: 24278784. Policy #0 lag: (min: 63.0, avg: 78.8, max: 127.0) -[2026-06-07 02:33:51,011][321787] Avg episode reward: [(0, '2161.742')] -[2026-06-07 02:33:51,061][324535] Updated weights for policy 0, policy_version 47356 (0.0009) -[2026-06-07 02:33:51,268][324535] Updated weights for policy 0, policy_version 47366 (0.0011) -[2026-06-07 02:33:51,467][324535] Updated weights for policy 0, policy_version 47376 (0.0011) -[2026-06-07 02:33:51,659][324535] Updated weights for policy 0, policy_version 47386 (0.0007) -[2026-06-07 02:33:51,874][324535] Updated weights for policy 0, policy_version 47396 (0.0007) -[2026-06-07 02:33:52,554][324535] Updated weights for policy 0, policy_version 47406 (0.0007) -[2026-06-07 02:33:52,762][324535] Updated weights for policy 0, policy_version 47416 (0.0007) -[2026-06-07 02:33:52,990][324535] Updated weights for policy 0, policy_version 47426 (0.0010) -[2026-06-07 02:33:53,192][324535] Updated weights for policy 0, policy_version 47436 (0.0011) -[2026-06-07 02:33:53,400][324535] Updated weights for policy 0, policy_version 47446 (0.0009) -[2026-06-07 02:33:53,608][324535] Updated weights for policy 0, policy_version 47456 (0.0007) -[2026-06-07 02:33:54,337][324535] Updated weights for policy 0, policy_version 47466 (0.0007) -[2026-06-07 02:33:54,543][324535] Updated weights for policy 0, policy_version 47476 (0.0006) -[2026-06-07 02:33:54,749][324535] Updated weights for policy 0, policy_version 47486 (0.0007) -[2026-06-07 02:33:54,954][324535] Updated weights for policy 0, policy_version 47496 (0.0008) -[2026-06-07 02:33:55,174][324535] Updated weights for policy 0, policy_version 47506 (0.0007) -[2026-06-07 02:33:55,391][324535] Updated weights for policy 0, policy_version 47516 (0.0006) -[2026-06-07 02:33:55,592][324535] Updated weights for policy 0, policy_version 47526 (0.0007) -[2026-06-07 02:33:56,010][321787] Fps is (10 sec: 19660.9, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 24379392. Throughput: 0: 17780.6. Samples: 24392832. Policy #0 lag: (min: 63.0, avg: 78.8, max: 127.0) -[2026-06-07 02:33:56,011][321787] Avg episode reward: [(0, '2165.673')] -[2026-06-07 02:33:56,323][324535] Updated weights for policy 0, policy_version 47536 (0.0006) -[2026-06-07 02:33:56,534][324535] Updated weights for policy 0, policy_version 47546 (0.0006) -[2026-06-07 02:33:56,755][324535] Updated weights for policy 0, policy_version 47556 (0.0007) -[2026-06-07 02:33:56,955][324535] Updated weights for policy 0, policy_version 47566 (0.0011) -[2026-06-07 02:33:57,173][324535] Updated weights for policy 0, policy_version 47576 (0.0007) -[2026-06-07 02:33:57,373][324535] Updated weights for policy 0, policy_version 47586 (0.0007) -[2026-06-07 02:33:58,095][324535] Updated weights for policy 0, policy_version 47596 (0.0007) -[2026-06-07 02:33:58,302][324535] Updated weights for policy 0, policy_version 47606 (0.0007) -[2026-06-07 02:33:58,528][324535] Updated weights for policy 0, policy_version 47616 (0.0007) -[2026-06-07 02:33:58,750][324535] Updated weights for policy 0, policy_version 47626 (0.0007) -[2026-06-07 02:33:58,990][324535] Updated weights for policy 0, policy_version 47637 (0.0007) -[2026-06-07 02:33:59,211][324535] Updated weights for policy 0, policy_version 47647 (0.0007) -[2026-06-07 02:33:59,938][324535] Updated weights for policy 0, policy_version 47657 (0.0007) -[2026-06-07 02:34:00,134][324535] Updated weights for policy 0, policy_version 47667 (0.0007) -[2026-06-07 02:34:00,316][324535] Updated weights for policy 0, policy_version 47677 (0.0011) -[2026-06-07 02:34:00,561][324535] Updated weights for policy 0, policy_version 47688 (0.0011) -[2026-06-07 02:34:00,783][324535] Updated weights for policy 0, policy_version 47698 (0.0011) -[2026-06-07 02:34:00,971][324535] Updated weights for policy 0, policy_version 47708 (0.0011) -[2026-06-07 02:34:01,010][321787] Fps is (10 sec: 16384.1, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 24444928. Throughput: 0: 17550.2. Samples: 24494976. Policy #0 lag: (min: 63.0, avg: 78.8, max: 127.0) -[2026-06-07 02:34:01,011][321787] Avg episode reward: [(0, '2176.429')] -[2026-06-07 02:34:01,186][324535] Updated weights for policy 0, policy_version 47718 (0.0011) -[2026-06-07 02:34:01,937][324535] Updated weights for policy 0, policy_version 47728 (0.0009) -[2026-06-07 02:34:02,127][324535] Updated weights for policy 0, policy_version 47738 (0.0011) -[2026-06-07 02:34:02,327][324535] Updated weights for policy 0, policy_version 47748 (0.0009) -[2026-06-07 02:34:02,530][324535] Updated weights for policy 0, policy_version 47758 (0.0008) -[2026-06-07 02:34:02,717][324535] Updated weights for policy 0, policy_version 47768 (0.0008) -[2026-06-07 02:34:02,943][324535] Updated weights for policy 0, policy_version 47778 (0.0007) -[2026-06-07 02:34:03,683][324535] Updated weights for policy 0, policy_version 47788 (0.0007) -[2026-06-07 02:34:03,876][324535] Updated weights for policy 0, policy_version 47798 (0.0007) -[2026-06-07 02:34:04,096][324535] Updated weights for policy 0, policy_version 47808 (0.0007) -[2026-06-07 02:34:04,314][324535] Updated weights for policy 0, policy_version 47818 (0.0007) -[2026-06-07 02:34:04,514][324535] Updated weights for policy 0, policy_version 47828 (0.0007) -[2026-06-07 02:34:04,723][324535] Updated weights for policy 0, policy_version 47838 (0.0007) -[2026-06-07 02:34:04,914][324535] Updated weights for policy 0, policy_version 47848 (0.0007) -[2026-06-07 02:34:05,643][324535] Updated weights for policy 0, policy_version 47858 (0.0007) -[2026-06-07 02:34:05,852][324535] Updated weights for policy 0, policy_version 47868 (0.0007) -[2026-06-07 02:34:06,010][321787] Fps is (10 sec: 16383.9, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 24543232. Throughput: 0: 17649.8. Samples: 24544384. Policy #0 lag: (min: 63.0, avg: 78.8, max: 127.0) -[2026-06-07 02:34:06,011][321787] Avg episode reward: [(0, '2169.045')] -[2026-06-07 02:34:06,066][324535] Updated weights for policy 0, policy_version 47879 (0.0007) -[2026-06-07 02:34:06,272][324535] Updated weights for policy 0, policy_version 47889 (0.0007) -[2026-06-07 02:34:06,464][324535] Updated weights for policy 0, policy_version 47899 (0.0007) -[2026-06-07 02:34:06,670][324535] Updated weights for policy 0, policy_version 47909 (0.0007) -[2026-06-07 02:34:07,395][324535] Updated weights for policy 0, policy_version 47919 (0.0007) -[2026-06-07 02:34:07,606][324535] Updated weights for policy 0, policy_version 47929 (0.0007) -[2026-06-07 02:34:07,829][324535] Updated weights for policy 0, policy_version 47939 (0.0007) -[2026-06-07 02:34:08,027][324535] Updated weights for policy 0, policy_version 47949 (0.0007) -[2026-06-07 02:34:08,236][324535] Updated weights for policy 0, policy_version 47959 (0.0007) -[2026-06-07 02:34:08,443][324535] Updated weights for policy 0, policy_version 47969 (0.0007) -[2026-06-07 02:34:09,227][324535] Updated weights for policy 0, policy_version 47979 (0.0007) -[2026-06-07 02:34:09,448][324535] Updated weights for policy 0, policy_version 47989 (0.0007) -[2026-06-07 02:34:09,638][324535] Updated weights for policy 0, policy_version 47999 (0.0007) -[2026-06-07 02:34:09,856][324535] Updated weights for policy 0, policy_version 48009 (0.0007) -[2026-06-07 02:34:10,091][324535] Updated weights for policy 0, policy_version 48020 (0.0007) -[2026-06-07 02:34:10,301][324535] Updated weights for policy 0, policy_version 48030 (0.0007) -[2026-06-07 02:34:10,494][324535] Updated weights for policy 0, policy_version 48040 (0.0007) -[2026-06-07 02:34:11,010][321787] Fps is (10 sec: 19660.8, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 24641536. Throughput: 0: 17706.7. Samples: 24655616. Policy #0 lag: (min: 63.0, avg: 78.8, max: 127.0) -[2026-06-07 02:34:11,011][321787] Avg episode reward: [(0, '2169.481')] -[2026-06-07 02:34:11,227][324535] Updated weights for policy 0, policy_version 48050 (0.0007) -[2026-06-07 02:34:11,447][324535] Updated weights for policy 0, policy_version 48060 (0.0007) -[2026-06-07 02:34:11,669][324535] Updated weights for policy 0, policy_version 48070 (0.0007) -[2026-06-07 02:34:11,873][324535] Updated weights for policy 0, policy_version 48080 (0.0007) -[2026-06-07 02:34:12,074][324535] Updated weights for policy 0, policy_version 48090 (0.0007) -[2026-06-07 02:34:12,288][324535] Updated weights for policy 0, policy_version 48100 (0.0007) -[2026-06-07 02:34:13,015][324535] Updated weights for policy 0, policy_version 48110 (0.0007) -[2026-06-07 02:34:13,235][324535] Updated weights for policy 0, policy_version 48120 (0.0007) -[2026-06-07 02:34:13,461][324535] Updated weights for policy 0, policy_version 48130 (0.0007) -[2026-06-07 02:34:13,667][324535] Updated weights for policy 0, policy_version 48140 (0.0007) -[2026-06-07 02:34:13,889][324535] Updated weights for policy 0, policy_version 48150 (0.0007) -[2026-06-07 02:34:14,101][324535] Updated weights for policy 0, policy_version 48160 (0.0007) -[2026-06-07 02:34:14,821][324535] Updated weights for policy 0, policy_version 48170 (0.0007) -[2026-06-07 02:34:15,039][324535] Updated weights for policy 0, policy_version 48180 (0.0007) -[2026-06-07 02:34:15,258][324535] Updated weights for policy 0, policy_version 48191 (0.0007) -[2026-06-07 02:34:15,461][324535] Updated weights for policy 0, policy_version 48201 (0.0007) -[2026-06-07 02:34:15,665][324535] Updated weights for policy 0, policy_version 48211 (0.0007) -[2026-06-07 02:34:15,880][324535] Updated weights for policy 0, policy_version 48221 (0.0007) -[2026-06-07 02:34:16,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 24707072. Throughput: 0: 17516.1. Samples: 24757120. Policy #0 lag: (min: 68.0, avg: 99.9, max: 134.0) -[2026-06-07 02:34:16,011][321787] Avg episode reward: [(0, '2198.382')] -[2026-06-07 02:34:16,099][324535] Updated weights for policy 0, policy_version 48231 (0.0007) -[2026-06-07 02:34:16,835][324535] Updated weights for policy 0, policy_version 48241 (0.0007) -[2026-06-07 02:34:17,041][324535] Updated weights for policy 0, policy_version 48251 (0.0007) -[2026-06-07 02:34:17,255][324535] Updated weights for policy 0, policy_version 48261 (0.0007) -[2026-06-07 02:34:17,469][324535] Updated weights for policy 0, policy_version 48271 (0.0007) -[2026-06-07 02:34:17,673][324535] Updated weights for policy 0, policy_version 48281 (0.0007) -[2026-06-07 02:34:17,882][324535] Updated weights for policy 0, policy_version 48291 (0.0007) -[2026-06-07 02:34:18,619][324535] Updated weights for policy 0, policy_version 48301 (0.0007) -[2026-06-07 02:34:18,827][324535] Updated weights for policy 0, policy_version 48311 (0.0007) -[2026-06-07 02:34:19,021][324535] Updated weights for policy 0, policy_version 48321 (0.0007) -[2026-06-07 02:34:19,218][324535] Updated weights for policy 0, policy_version 48331 (0.0007) -[2026-06-07 02:34:19,425][324535] Updated weights for policy 0, policy_version 48341 (0.0007) -[2026-06-07 02:34:19,629][324535] Updated weights for policy 0, policy_version 48351 (0.0007) -[2026-06-07 02:34:20,386][324535] Updated weights for policy 0, policy_version 48361 (0.0007) -[2026-06-07 02:34:20,592][324535] Updated weights for policy 0, policy_version 48371 (0.0007) -[2026-06-07 02:34:20,794][324535] Updated weights for policy 0, policy_version 48381 (0.0007) -[2026-06-07 02:34:21,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 24805376. Throughput: 0: 17621.3. Samples: 24808064. Policy #0 lag: (min: 68.0, avg: 99.9, max: 134.0) -[2026-06-07 02:34:21,011][321787] Avg episode reward: [(0, '2181.058')] -[2026-06-07 02:34:21,026][324535] Updated weights for policy 0, policy_version 48391 (0.0007) -[2026-06-07 02:34:21,224][324535] Updated weights for policy 0, policy_version 48401 (0.0007) -[2026-06-07 02:34:21,433][324535] Updated weights for policy 0, policy_version 48411 (0.0007) -[2026-06-07 02:34:21,646][324535] Updated weights for policy 0, policy_version 48421 (0.0007) -[2026-06-07 02:34:22,387][324535] Updated weights for policy 0, policy_version 48431 (0.0007) -[2026-06-07 02:34:22,589][324535] Updated weights for policy 0, policy_version 48441 (0.0006) -[2026-06-07 02:34:22,790][324535] Updated weights for policy 0, policy_version 48451 (0.0006) -[2026-06-07 02:34:22,993][324535] Updated weights for policy 0, policy_version 48461 (0.0007) -[2026-06-07 02:34:23,226][324535] Updated weights for policy 0, policy_version 48472 (0.0007) -[2026-06-07 02:34:23,445][324535] Updated weights for policy 0, policy_version 48482 (0.0007) -[2026-06-07 02:34:24,171][324535] Updated weights for policy 0, policy_version 48492 (0.0007) -[2026-06-07 02:34:24,352][324535] Updated weights for policy 0, policy_version 48502 (0.0007) -[2026-06-07 02:34:24,571][324535] Updated weights for policy 0, policy_version 48512 (0.0007) -[2026-06-07 02:34:24,783][324535] Updated weights for policy 0, policy_version 48523 (0.0007) -[2026-06-07 02:34:25,011][324535] Updated weights for policy 0, policy_version 48533 (0.0007) -[2026-06-07 02:34:25,223][324535] Updated weights for policy 0, policy_version 48543 (0.0007) -[2026-06-07 02:34:25,977][324535] Updated weights for policy 0, policy_version 48553 (0.0007) -[2026-06-07 02:34:26,010][321787] Fps is (10 sec: 19660.8, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 24903680. Throughput: 0: 17644.1. Samples: 24916352. Policy #0 lag: (min: 68.0, avg: 99.9, max: 134.0) -[2026-06-07 02:34:26,011][321787] Avg episode reward: [(0, '2230.613')] -[2026-06-07 02:34:26,183][324535] Updated weights for policy 0, policy_version 48563 (0.0007) -[2026-06-07 02:34:26,403][324535] Updated weights for policy 0, policy_version 48573 (0.0008) -[2026-06-07 02:34:26,612][324535] Updated weights for policy 0, policy_version 48583 (0.0007) -[2026-06-07 02:34:26,811][324535] Updated weights for policy 0, policy_version 48593 (0.0007) -[2026-06-07 02:34:27,056][324535] Updated weights for policy 0, policy_version 48604 (0.0007) -[2026-06-07 02:34:27,246][324535] Updated weights for policy 0, policy_version 48614 (0.0007) -[2026-06-07 02:34:27,993][324535] Updated weights for policy 0, policy_version 48624 (0.0007) -[2026-06-07 02:34:28,198][324535] Updated weights for policy 0, policy_version 48634 (0.0007) -[2026-06-07 02:34:28,425][324535] Updated weights for policy 0, policy_version 48645 (0.0011) -[2026-06-07 02:34:28,626][324535] Updated weights for policy 0, policy_version 48655 (0.0011) -[2026-06-07 02:34:28,835][324535] Updated weights for policy 0, policy_version 48665 (0.0011) -[2026-06-07 02:34:29,048][324535] Updated weights for policy 0, policy_version 48675 (0.0011) -[2026-06-07 02:34:29,827][324535] Updated weights for policy 0, policy_version 48685 (0.0009) -[2026-06-07 02:34:30,039][324535] Updated weights for policy 0, policy_version 48695 (0.0007) -[2026-06-07 02:34:30,239][324535] Updated weights for policy 0, policy_version 48705 (0.0007) -[2026-06-07 02:34:30,445][324535] Updated weights for policy 0, policy_version 48715 (0.0007) -[2026-06-07 02:34:30,668][324535] Updated weights for policy 0, policy_version 48725 (0.0006) -[2026-06-07 02:34:30,873][324535] Updated weights for policy 0, policy_version 48735 (0.0007) -[2026-06-07 02:34:31,010][321787] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 24969216. Throughput: 0: 17476.3. Samples: 25019648. Policy #0 lag: (min: 68.0, avg: 99.9, max: 134.0) -[2026-06-07 02:34:31,011][321787] Avg episode reward: [(0, '2230.613')] -[2026-06-07 02:34:31,610][324535] Updated weights for policy 0, policy_version 48745 (0.0007) -[2026-06-07 02:34:31,806][324535] Updated weights for policy 0, policy_version 48755 (0.0011) -[2026-06-07 02:34:32,024][324535] Updated weights for policy 0, policy_version 48765 (0.0011) -[2026-06-07 02:34:32,227][324535] Updated weights for policy 0, policy_version 48775 (0.0007) -[2026-06-07 02:34:32,445][324535] Updated weights for policy 0, policy_version 48785 (0.0008) -[2026-06-07 02:34:32,654][324535] Updated weights for policy 0, policy_version 48795 (0.0008) -[2026-06-07 02:34:32,861][324535] Updated weights for policy 0, policy_version 48805 (0.0007) -[2026-06-07 02:34:32,912][324273] Stopping Batcher_0... -[2026-06-07 02:34:32,912][321787] Component Batcher_0 stopped! -[2026-06-07 02:34:32,912][324273] Saving results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs1_seed11/checkpoint_p0/checkpoint_000048808_25034752.pth... -[2026-06-07 02:34:32,913][321787] Component RolloutWorker_w1 stopped! -[2026-06-07 02:34:32,913][324537] Stopping RolloutWorker_w1... -[2026-06-07 02:34:32,913][324536] Stopping RolloutWorker_w0... -[2026-06-07 02:34:32,913][324537] Loop rollout_proc1_evt_loop terminating... -[2026-06-07 02:34:32,913][324536] Loop rollout_proc0_evt_loop terminating... -[2026-06-07 02:34:32,913][321787] Component RolloutWorker_w0 stopped! -[2026-06-07 02:34:32,913][324273] Loop batcher_evt_loop terminating... -[2026-06-07 02:34:32,935][324273] Saving results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs1_seed11/checkpoint_p0/checkpoint_000048808_25034752.pth... -[2026-06-07 02:34:32,953][324535] Weights refcount: 2 0 -[2026-06-07 02:34:32,955][324535] Stopping InferenceWorker_p0-w0... -[2026-06-07 02:34:32,955][324535] Loop inference_proc0-0_evt_loop terminating... -[2026-06-07 02:34:32,955][321787] Component InferenceWorker_p0-w0 stopped! -[2026-06-07 02:34:32,957][324273] Stopping LearnerWorker_p0... -[2026-06-07 02:34:32,957][324273] Loop learner_proc0_evt_loop terminating... -[2026-06-07 02:34:32,957][321787] Component LearnerWorker_p0 stopped! -[2026-06-07 02:34:32,957][321787] Waiting for process learner_proc0 to stop... -[2026-06-07 02:34:33,871][321787] Waiting for process inference_proc0-0 to join... -[2026-06-07 02:34:33,873][321787] Waiting for process rollout_proc0 to join... -[2026-06-07 02:34:33,874][321787] Waiting for process rollout_proc1 to join... -[2026-06-07 02:34:33,875][321787] Batcher 0 profile tree view: -batching: 0.7789, releasing_batches: 0.0247 -[2026-06-07 02:34:33,876][321787] InferenceWorker_p0-w0 profile tree view: +[2026-06-07 02:47:39,768][472025] Using optimizer +[2026-06-07 02:47:41,073][472025] No checkpoints found +[2026-06-07 02:47:41,073][472025] Did not load from checkpoint, starting from scratch! +[2026-06-07 02:47:41,073][472025] Initialized policy 0 weights for model version 0 +[2026-06-07 02:47:41,083][472025] LearnerWorker_p0 finished initialization! +[2026-06-07 02:47:41,084][472025] Using GPUs [0] for process 0 (actually maps to GPUs [1]) +[2026-06-07 02:47:42,616][472560] Worker 0 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191] +[2026-06-07 02:47:42,617][472560] Using GPUs [0] for process 0 (actually maps to GPUs [1]) +[2026-06-07 02:47:42,617][472560] Set environment var CUDA_VISIBLE_DEVICES to '1' (GPU indices [0]) for actor process 0 +[2026-06-07 02:47:42,618][472560] Num visible devices: 1 +[2026-06-07 02:47:42,658][472561] Worker 1 uses CPU cores [192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383] +[2026-06-07 02:47:42,659][472561] Using GPUs [0] for process 1 (actually maps to GPUs [1]) +[2026-06-07 02:47:42,659][472561] Set environment var CUDA_VISIBLE_DEVICES to '1' (GPU indices [0]) for actor process 1 +[2026-06-07 02:47:42,660][472561] Num visible devices: 1 +[2026-06-07 02:47:42,896][472559] Using GPUs [0] for process 0 (actually maps to GPUs [1]) +[2026-06-07 02:47:42,896][472559] Set environment var CUDA_VISIBLE_DEVICES to '1' (GPU indices [0]) for inference process 0 +[2026-06-07 02:47:42,898][472559] Num visible devices: 1 +[2026-06-07 02:47:42,914][472559] RunningMeanStd input shape: (3, 84, 84) +[2026-06-07 02:47:42,932][472559] RunningMeanStd input shape: (1,) +[2026-06-07 02:47:42,951][472559] ConvEncoder: input_channels=3 +[2026-06-07 02:47:43,019][472559] Conv encoder output size: 512 +[2026-06-07 02:47:43,036][464927] Inference worker 0-0 is ready! +[2026-06-07 02:47:43,037][464927] All inference workers are ready! Signal rollout workers to start! +[2026-06-07 02:47:43,038][472561] EnvRunner 1-0 uses policy 0 +[2026-06-07 02:47:43,038][472560] EnvRunner 0-0 uses policy 0 +[2026-06-07 02:47:43,117][464927] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2026-06-07 02:47:46,356][472025] Signal inference workers to stop experience collection... +[2026-06-07 02:47:46,360][472559] InferenceWorker_p0-w0: stopping experience collection +[2026-06-07 02:47:47,730][472025] Signal inference workers to resume experience collection... +[2026-06-07 02:47:47,731][472559] InferenceWorker_p0-w0: resuming experience collection +[2026-06-07 02:47:47,935][472559] Updated weights for policy 0, policy_version 73 (0.0063) +[2026-06-07 02:47:48,045][472559] Updated weights for policy 0, policy_version 83 (0.0009) +[2026-06-07 02:47:48,117][464927] Fps is (10 sec: 6553.6, 60 sec: 6553.6, 300 sec: 6553.6). Total num frames: 32768. Throughput: 0: 9625.6. Samples: 48128. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) +[2026-06-07 02:47:48,118][464927] Avg episode reward: [(0, '-7.495')] +[2026-06-07 02:47:48,152][472559] Updated weights for policy 0, policy_version 93 (0.0008) +[2026-06-07 02:47:48,272][472559] Updated weights for policy 0, policy_version 104 (0.0009) +[2026-06-07 02:47:48,387][472559] Updated weights for policy 0, policy_version 114 (0.0009) +[2026-06-07 02:47:48,511][472559] Updated weights for policy 0, policy_version 124 (0.0008) +[2026-06-07 02:47:48,899][472559] Updated weights for policy 0, policy_version 134 (0.0005) +[2026-06-07 02:47:49,030][472559] Updated weights for policy 0, policy_version 146 (0.0008) +[2026-06-07 02:47:49,142][472559] Updated weights for policy 0, policy_version 156 (0.0008) +[2026-06-07 02:47:49,254][472559] Updated weights for policy 0, policy_version 166 (0.0007) +[2026-06-07 02:47:49,369][472559] Updated weights for policy 0, policy_version 176 (0.0007) +[2026-06-07 02:47:49,487][472559] Updated weights for policy 0, policy_version 186 (0.0007) +[2026-06-07 02:47:49,904][472559] Updated weights for policy 0, policy_version 196 (0.0007) +[2026-06-07 02:47:50,043][472559] Updated weights for policy 0, policy_version 206 (0.0004) +[2026-06-07 02:47:50,165][472559] Updated weights for policy 0, policy_version 217 (0.0006) +[2026-06-07 02:47:50,277][472559] Updated weights for policy 0, policy_version 227 (0.0008) +[2026-06-07 02:47:50,396][472559] Updated weights for policy 0, policy_version 237 (0.0008) +[2026-06-07 02:47:50,510][472559] Updated weights for policy 0, policy_version 247 (0.0007) +[2026-06-07 02:47:50,883][472559] Updated weights for policy 0, policy_version 257 (0.0007) +[2026-06-07 02:47:50,999][472559] Updated weights for policy 0, policy_version 267 (0.0004) +[2026-06-07 02:47:51,110][472559] Updated weights for policy 0, policy_version 277 (0.0008) +[2026-06-07 02:47:51,241][472559] Updated weights for policy 0, policy_version 287 (0.0007) +[2026-06-07 02:47:51,347][472559] Updated weights for policy 0, policy_version 297 (0.0008) +[2026-06-07 02:47:51,457][472559] Updated weights for policy 0, policy_version 307 (0.0008) +[2026-06-07 02:47:51,567][472559] Updated weights for policy 0, policy_version 317 (0.0008) +[2026-06-07 02:47:52,006][472559] Updated weights for policy 0, policy_version 327 (0.0005) +[2026-06-07 02:47:52,116][472559] Updated weights for policy 0, policy_version 337 (0.0006) +[2026-06-07 02:47:52,224][472559] Updated weights for policy 0, policy_version 347 (0.0008) +[2026-06-07 02:47:52,337][472559] Updated weights for policy 0, policy_version 357 (0.0009) +[2026-06-07 02:47:52,458][472559] Updated weights for policy 0, policy_version 367 (0.0007) +[2026-06-07 02:47:52,569][472559] Updated weights for policy 0, policy_version 377 (0.0007) +[2026-06-07 02:47:52,971][472559] Updated weights for policy 0, policy_version 387 (0.0007) +[2026-06-07 02:47:53,084][472559] Updated weights for policy 0, policy_version 397 (0.0007) +[2026-06-07 02:47:53,116][464927] Fps is (10 sec: 19661.0, 60 sec: 19661.0, 300 sec: 19661.0). Total num frames: 196608. Throughput: 0: 20352.2. Samples: 203520. Policy #0 lag: (min: 5.0, avg: 21.9, max: 69.0) +[2026-06-07 02:47:53,117][464927] Avg episode reward: [(0, '-5.691')] +[2026-06-07 02:47:53,196][472559] Updated weights for policy 0, policy_version 407 (0.0009) +[2026-06-07 02:47:53,308][472559] Updated weights for policy 0, policy_version 417 (0.0007) +[2026-06-07 02:47:53,417][472559] Updated weights for policy 0, policy_version 427 (0.0008) +[2026-06-07 02:47:53,526][472559] Updated weights for policy 0, policy_version 437 (0.0008) +[2026-06-07 02:47:53,640][472559] Updated weights for policy 0, policy_version 447 (0.0008) +[2026-06-07 02:47:53,655][472025] Saving new best policy, reward=-5.691! +[2026-06-07 02:47:54,088][472559] Updated weights for policy 0, policy_version 457 (0.0005) +[2026-06-07 02:47:54,210][472559] Updated weights for policy 0, policy_version 467 (0.0004) +[2026-06-07 02:47:54,321][472559] Updated weights for policy 0, policy_version 477 (0.0008) +[2026-06-07 02:47:54,447][472559] Updated weights for policy 0, policy_version 487 (0.0007) +[2026-06-07 02:47:54,562][472559] Updated weights for policy 0, policy_version 497 (0.0007) +[2026-06-07 02:47:54,675][472559] Updated weights for policy 0, policy_version 507 (0.0008) +[2026-06-07 02:47:55,092][472559] Updated weights for policy 0, policy_version 517 (0.0006) +[2026-06-07 02:47:55,214][472559] Updated weights for policy 0, policy_version 527 (0.0004) +[2026-06-07 02:47:55,326][472559] Updated weights for policy 0, policy_version 537 (0.0005) +[2026-06-07 02:47:55,446][472559] Updated weights for policy 0, policy_version 548 (0.0008) +[2026-06-07 02:47:55,551][472559] Updated weights for policy 0, policy_version 558 (0.0008) +[2026-06-07 02:47:55,677][472559] Updated weights for policy 0, policy_version 568 (0.0008) +[2026-06-07 02:47:56,060][472559] Updated weights for policy 0, policy_version 578 (0.0004) +[2026-06-07 02:47:56,186][472559] Updated weights for policy 0, policy_version 590 (0.0006) +[2026-06-07 02:47:56,317][472559] Updated weights for policy 0, policy_version 601 (0.0008) +[2026-06-07 02:47:56,436][472559] Updated weights for policy 0, policy_version 611 (0.0009) +[2026-06-07 02:47:56,567][472559] Updated weights for policy 0, policy_version 621 (0.0007) +[2026-06-07 02:47:56,686][472559] Updated weights for policy 0, policy_version 631 (0.0005) +[2026-06-07 02:47:57,116][472559] Updated weights for policy 0, policy_version 643 (0.0009) +[2026-06-07 02:47:57,229][472559] Updated weights for policy 0, policy_version 653 (0.0007) +[2026-06-07 02:47:57,357][472559] Updated weights for policy 0, policy_version 663 (0.0007) +[2026-06-07 02:47:57,486][472559] Updated weights for policy 0, policy_version 675 (0.0008) +[2026-06-07 02:47:57,615][472559] Updated weights for policy 0, policy_version 687 (0.0008) +[2026-06-07 02:47:57,674][464927] Heartbeat connected on Batcher_0 +[2026-06-07 02:47:57,693][464927] Heartbeat connected on RolloutWorker_w0 +[2026-06-07 02:47:57,699][464927] Heartbeat connected on InferenceWorker_p0-w0 +[2026-06-07 02:47:57,703][464927] Heartbeat connected on RolloutWorker_w1 +[2026-06-07 02:47:57,726][472559] Updated weights for policy 0, policy_version 697 (0.0010) +[2026-06-07 02:47:57,799][464927] Heartbeat connected on LearnerWorker_p0 +[2026-06-07 02:47:58,117][464927] Fps is (10 sec: 32767.6, 60 sec: 24029.7, 300 sec: 24029.7). Total num frames: 360448. Throughput: 0: 25941.2. Samples: 389120. Policy #0 lag: (min: 24.0, avg: 52.5, max: 88.0) +[2026-06-07 02:47:58,118][464927] Avg episode reward: [(0, '2.847')] +[2026-06-07 02:47:58,123][472025] Saving new best policy, reward=2.847! +[2026-06-07 02:47:58,261][472559] Updated weights for policy 0, policy_version 709 (0.0008) +[2026-06-07 02:47:58,396][472559] Updated weights for policy 0, policy_version 721 (0.0008) +[2026-06-07 02:47:58,515][472559] Updated weights for policy 0, policy_version 732 (0.0008) +[2026-06-07 02:47:58,637][472559] Updated weights for policy 0, policy_version 742 (0.0009) +[2026-06-07 02:47:58,756][472559] Updated weights for policy 0, policy_version 752 (0.0008) +[2026-06-07 02:47:58,868][472559] Updated weights for policy 0, policy_version 762 (0.0008) +[2026-06-07 02:47:59,365][472559] Updated weights for policy 0, policy_version 774 (0.0008) +[2026-06-07 02:47:59,497][472559] Updated weights for policy 0, policy_version 785 (0.0008) +[2026-06-07 02:47:59,621][472559] Updated weights for policy 0, policy_version 796 (0.0008) +[2026-06-07 02:47:59,775][472559] Updated weights for policy 0, policy_version 810 (0.0008) +[2026-06-07 02:47:59,885][472559] Updated weights for policy 0, policy_version 821 (0.0008) +[2026-06-07 02:48:00,007][472559] Updated weights for policy 0, policy_version 832 (0.0010) +[2026-06-07 02:48:00,603][472559] Updated weights for policy 0, policy_version 846 (0.0009) +[2026-06-07 02:48:00,743][472559] Updated weights for policy 0, policy_version 859 (0.0007) +[2026-06-07 02:48:00,867][472559] Updated weights for policy 0, policy_version 871 (0.0008) +[2026-06-07 02:48:00,991][472559] Updated weights for policy 0, policy_version 882 (0.0008) +[2026-06-07 02:48:01,121][472559] Updated weights for policy 0, policy_version 894 (0.0008) +[2026-06-07 02:48:01,825][472559] Updated weights for policy 0, policy_version 907 (0.0009) +[2026-06-07 02:48:01,957][472559] Updated weights for policy 0, policy_version 920 (0.0008) +[2026-06-07 02:48:02,100][472559] Updated weights for policy 0, policy_version 934 (0.0009) +[2026-06-07 02:48:02,235][472559] Updated weights for policy 0, policy_version 947 (0.0008) +[2026-06-07 02:48:02,374][472559] Updated weights for policy 0, policy_version 960 (0.0009) +[2026-06-07 02:48:03,045][472559] Updated weights for policy 0, policy_version 970 (0.0008) +[2026-06-07 02:48:03,117][464927] Fps is (10 sec: 29490.3, 60 sec: 24575.7, 300 sec: 24575.7). Total num frames: 491520. Throughput: 0: 23410.9. Samples: 468224. Policy #0 lag: (min: 63.0, avg: 80.9, max: 127.0) +[2026-06-07 02:48:03,119][464927] Avg episode reward: [(0, '4.002')] +[2026-06-07 02:48:03,178][472559] Updated weights for policy 0, policy_version 983 (0.0009) +[2026-06-07 02:48:03,311][472559] Updated weights for policy 0, policy_version 995 (0.0008) +[2026-06-07 02:48:03,445][472559] Updated weights for policy 0, policy_version 1007 (0.0008) +[2026-06-07 02:48:03,579][472559] Updated weights for policy 0, policy_version 1019 (0.0008) +[2026-06-07 02:48:03,627][472025] Saving new best policy, reward=4.002! +[2026-06-07 02:48:04,322][472559] Updated weights for policy 0, policy_version 1032 (0.0008) +[2026-06-07 02:48:04,462][472559] Updated weights for policy 0, policy_version 1045 (0.0008) +[2026-06-07 02:48:04,571][472559] Updated weights for policy 0, policy_version 1056 (0.0010) +[2026-06-07 02:48:04,688][472559] Updated weights for policy 0, policy_version 1067 (0.0008) +[2026-06-07 02:48:04,812][472559] Updated weights for policy 0, policy_version 1079 (0.0008) +[2026-06-07 02:48:05,565][472559] Updated weights for policy 0, policy_version 1091 (0.0009) +[2026-06-07 02:48:05,693][472559] Updated weights for policy 0, policy_version 1103 (0.0008) +[2026-06-07 02:48:05,813][472559] Updated weights for policy 0, policy_version 1114 (0.0008) +[2026-06-07 02:48:05,968][472559] Updated weights for policy 0, policy_version 1128 (0.0010) +[2026-06-07 02:48:06,112][472559] Updated weights for policy 0, policy_version 1142 (0.0008) +[2026-06-07 02:48:06,879][472559] Updated weights for policy 0, policy_version 1155 (0.0008) +[2026-06-07 02:48:06,991][472559] Updated weights for policy 0, policy_version 1166 (0.0008) +[2026-06-07 02:48:07,130][472559] Updated weights for policy 0, policy_version 1179 (0.0008) +[2026-06-07 02:48:07,247][472559] Updated weights for policy 0, policy_version 1190 (0.0008) +[2026-06-07 02:48:07,371][472559] Updated weights for policy 0, policy_version 1201 (0.0008) +[2026-06-07 02:48:08,117][464927] Fps is (10 sec: 26213.8, 60 sec: 24903.4, 300 sec: 24903.4). Total num frames: 622592. Throughput: 0: 25179.8. Samples: 629504. Policy #0 lag: (min: 0.0, avg: 22.1, max: 64.0) +[2026-06-07 02:48:08,119][464927] Avg episode reward: [(0, '4.142')] +[2026-06-07 02:48:08,150][472559] Updated weights for policy 0, policy_version 1217 (0.0009) +[2026-06-07 02:48:08,283][472559] Updated weights for policy 0, policy_version 1230 (0.0008) +[2026-06-07 02:48:08,391][472559] Updated weights for policy 0, policy_version 1241 (0.0008) +[2026-06-07 02:48:08,510][472559] Updated weights for policy 0, policy_version 1252 (0.0008) +[2026-06-07 02:48:08,652][472559] Updated weights for policy 0, policy_version 1265 (0.0008) +[2026-06-07 02:48:08,786][472559] Updated weights for policy 0, policy_version 1278 (0.0008) +[2026-06-07 02:48:08,807][472025] Saving new best policy, reward=4.142! +[2026-06-07 02:48:09,478][472559] Updated weights for policy 0, policy_version 1288 (0.0008) +[2026-06-07 02:48:09,609][472559] Updated weights for policy 0, policy_version 1301 (0.0008) +[2026-06-07 02:48:09,744][472559] Updated weights for policy 0, policy_version 1313 (0.0008) +[2026-06-07 02:48:09,862][472559] Updated weights for policy 0, policy_version 1324 (0.0008) +[2026-06-07 02:48:10,051][472559] Updated weights for policy 0, policy_version 1342 (0.0008) +[2026-06-07 02:48:10,787][472559] Updated weights for policy 0, policy_version 1353 (0.0006) +[2026-06-07 02:48:10,956][472559] Updated weights for policy 0, policy_version 1369 (0.0008) +[2026-06-07 02:48:11,086][472559] Updated weights for policy 0, policy_version 1381 (0.0005) +[2026-06-07 02:48:11,188][472559] Updated weights for policy 0, policy_version 1391 (0.0005) +[2026-06-07 02:48:11,309][472559] Updated weights for policy 0, policy_version 1402 (0.0008) +[2026-06-07 02:48:12,035][472559] Updated weights for policy 0, policy_version 1415 (0.0009) +[2026-06-07 02:48:12,139][472559] Updated weights for policy 0, policy_version 1425 (0.0008) +[2026-06-07 02:48:12,272][472559] Updated weights for policy 0, policy_version 1438 (0.0008) +[2026-06-07 02:48:12,425][472559] Updated weights for policy 0, policy_version 1452 (0.0008) +[2026-06-07 02:48:12,564][472559] Updated weights for policy 0, policy_version 1465 (0.0008) +[2026-06-07 02:48:13,117][464927] Fps is (10 sec: 26215.0, 60 sec: 25122.1, 300 sec: 25122.1). Total num frames: 753664. Throughput: 0: 26069.3. Samples: 782080. Policy #0 lag: (min: 63.0, avg: 74.0, max: 127.0) +[2026-06-07 02:48:13,118][464927] Avg episode reward: [(0, '4.670')] +[2026-06-07 02:48:13,124][472025] Saving new best policy, reward=4.670! +[2026-06-07 02:48:13,316][472559] Updated weights for policy 0, policy_version 1477 (0.0008) +[2026-06-07 02:48:13,440][472559] Updated weights for policy 0, policy_version 1489 (0.0009) +[2026-06-07 02:48:13,583][472559] Updated weights for policy 0, policy_version 1503 (0.0008) +[2026-06-07 02:48:13,711][472559] Updated weights for policy 0, policy_version 1514 (0.0008) +[2026-06-07 02:48:13,837][472559] Updated weights for policy 0, policy_version 1526 (0.0008) +[2026-06-07 02:48:14,585][472559] Updated weights for policy 0, policy_version 1541 (0.0008) +[2026-06-07 02:48:14,706][472559] Updated weights for policy 0, policy_version 1552 (0.0008) +[2026-06-07 02:48:14,833][472559] Updated weights for policy 0, policy_version 1565 (0.0008) +[2026-06-07 02:48:14,989][472559] Updated weights for policy 0, policy_version 1579 (0.0008) +[2026-06-07 02:48:15,117][472559] Updated weights for policy 0, policy_version 1591 (0.0008) +[2026-06-07 02:48:15,883][472559] Updated weights for policy 0, policy_version 1605 (0.0008) +[2026-06-07 02:48:15,996][472559] Updated weights for policy 0, policy_version 1616 (0.0008) +[2026-06-07 02:48:16,127][472559] Updated weights for policy 0, policy_version 1628 (0.0008) +[2026-06-07 02:48:16,272][472559] Updated weights for policy 0, policy_version 1642 (0.0008) +[2026-06-07 02:48:16,417][472559] Updated weights for policy 0, policy_version 1655 (0.0008) +[2026-06-07 02:48:17,161][472559] Updated weights for policy 0, policy_version 1665 (0.0008) +[2026-06-07 02:48:17,289][472559] Updated weights for policy 0, policy_version 1678 (0.0008) +[2026-06-07 02:48:17,430][472559] Updated weights for policy 0, policy_version 1691 (0.0008) +[2026-06-07 02:48:17,538][472559] Updated weights for policy 0, policy_version 1701 (0.0008) +[2026-06-07 02:48:17,675][472559] Updated weights for policy 0, policy_version 1713 (0.0008) +[2026-06-07 02:48:17,814][472559] Updated weights for policy 0, policy_version 1726 (0.0009) +[2026-06-07 02:48:18,117][464927] Fps is (10 sec: 26214.3, 60 sec: 25277.9, 300 sec: 25277.9). Total num frames: 884736. Throughput: 0: 24520.9. Samples: 858240. Policy #0 lag: (min: 63.0, avg: 73.4, max: 127.0) +[2026-06-07 02:48:18,120][464927] Avg episode reward: [(0, '5.483')] +[2026-06-07 02:48:18,130][472025] Saving new best policy, reward=5.483! +[2026-06-07 02:48:18,575][472559] Updated weights for policy 0, policy_version 1738 (0.0008) +[2026-06-07 02:48:18,721][472559] Updated weights for policy 0, policy_version 1752 (0.0008) +[2026-06-07 02:48:18,856][472559] Updated weights for policy 0, policy_version 1764 (0.0008) +[2026-06-07 02:48:18,987][472559] Updated weights for policy 0, policy_version 1776 (0.0008) +[2026-06-07 02:48:19,110][472559] Updated weights for policy 0, policy_version 1787 (0.0008) +[2026-06-07 02:48:19,848][472559] Updated weights for policy 0, policy_version 1800 (0.0009) +[2026-06-07 02:48:19,965][472559] Updated weights for policy 0, policy_version 1811 (0.0008) +[2026-06-07 02:48:20,078][472559] Updated weights for policy 0, policy_version 1822 (0.0008) +[2026-06-07 02:48:20,205][472559] Updated weights for policy 0, policy_version 1834 (0.0008) +[2026-06-07 02:48:20,342][472559] Updated weights for policy 0, policy_version 1847 (0.0009) +[2026-06-07 02:48:21,097][472559] Updated weights for policy 0, policy_version 1861 (0.0009) +[2026-06-07 02:48:21,215][472559] Updated weights for policy 0, policy_version 1872 (0.0008) +[2026-06-07 02:48:21,340][472559] Updated weights for policy 0, policy_version 1884 (0.0008) +[2026-06-07 02:48:21,475][472559] Updated weights for policy 0, policy_version 1896 (0.0008) +[2026-06-07 02:48:21,602][472559] Updated weights for policy 0, policy_version 1908 (0.0008) +[2026-06-07 02:48:21,725][472559] Updated weights for policy 0, policy_version 1920 (0.0008) +[2026-06-07 02:48:22,487][472559] Updated weights for policy 0, policy_version 1931 (0.0009) +[2026-06-07 02:48:22,622][472559] Updated weights for policy 0, policy_version 1943 (0.0008) +[2026-06-07 02:48:22,762][472559] Updated weights for policy 0, policy_version 1956 (0.0008) +[2026-06-07 02:48:22,905][472559] Updated weights for policy 0, policy_version 1969 (0.0009) +[2026-06-07 02:48:23,068][472559] Updated weights for policy 0, policy_version 1984 (0.0008) +[2026-06-07 02:48:23,117][464927] Fps is (10 sec: 26213.5, 60 sec: 25395.0, 300 sec: 25395.0). Total num frames: 1015808. Throughput: 0: 25196.6. Samples: 1007872. Policy #0 lag: (min: 63.0, avg: 72.7, max: 127.0) +[2026-06-07 02:48:23,119][464927] Avg episode reward: [(0, '6.007')] +[2026-06-07 02:48:23,132][472025] Saving new best policy, reward=6.007! +[2026-06-07 02:48:23,825][472559] Updated weights for policy 0, policy_version 1998 (0.0009) +[2026-06-07 02:48:23,943][472559] Updated weights for policy 0, policy_version 2010 (0.0008) +[2026-06-07 02:48:24,056][472559] Updated weights for policy 0, policy_version 2021 (0.0008) +[2026-06-07 02:48:24,198][472559] Updated weights for policy 0, policy_version 2034 (0.0008) +[2026-06-07 02:48:24,341][472559] Updated weights for policy 0, policy_version 2047 (0.0008) +[2026-06-07 02:48:25,090][472559] Updated weights for policy 0, policy_version 2059 (0.0009) +[2026-06-07 02:48:25,212][472559] Updated weights for policy 0, policy_version 2070 (0.0008) +[2026-06-07 02:48:25,341][472559] Updated weights for policy 0, policy_version 2083 (0.0008) +[2026-06-07 02:48:25,457][472559] Updated weights for policy 0, policy_version 2094 (0.0009) +[2026-06-07 02:48:25,584][472559] Updated weights for policy 0, policy_version 2106 (0.0009) +[2026-06-07 02:48:26,324][472559] Updated weights for policy 0, policy_version 2117 (0.0009) +[2026-06-07 02:48:26,464][472559] Updated weights for policy 0, policy_version 2130 (0.0008) +[2026-06-07 02:48:26,572][472559] Updated weights for policy 0, policy_version 2140 (0.0008) +[2026-06-07 02:48:26,721][472559] Updated weights for policy 0, policy_version 2154 (0.0009) +[2026-06-07 02:48:26,847][472559] Updated weights for policy 0, policy_version 2166 (0.0009) +[2026-06-07 02:48:27,593][472559] Updated weights for policy 0, policy_version 2177 (0.0009) +[2026-06-07 02:48:27,735][472559] Updated weights for policy 0, policy_version 2190 (0.0008) +[2026-06-07 02:48:27,875][472559] Updated weights for policy 0, policy_version 2203 (0.0008) +[2026-06-07 02:48:27,992][472559] Updated weights for policy 0, policy_version 2214 (0.0009) +[2026-06-07 02:48:28,117][464927] Fps is (10 sec: 22938.6, 60 sec: 24758.1, 300 sec: 24758.1). Total num frames: 1114112. Throughput: 0: 25688.2. Samples: 1155968. Policy #0 lag: (min: 63.0, avg: 72.7, max: 127.0) +[2026-06-07 02:48:28,117][464927] Avg episode reward: [(0, '6.851')] +[2026-06-07 02:48:28,125][472559] Updated weights for policy 0, policy_version 2227 (0.0008) +[2026-06-07 02:48:28,263][472559] Updated weights for policy 0, policy_version 2239 (0.0010) +[2026-06-07 02:48:28,267][472025] Saving new best policy, reward=6.851! +[2026-06-07 02:48:29,016][472559] Updated weights for policy 0, policy_version 2252 (0.0009) +[2026-06-07 02:48:29,136][472559] Updated weights for policy 0, policy_version 2264 (0.0008) +[2026-06-07 02:48:29,260][472559] Updated weights for policy 0, policy_version 2275 (0.0008) +[2026-06-07 02:48:29,387][472559] Updated weights for policy 0, policy_version 2287 (0.0010) +[2026-06-07 02:48:29,525][472559] Updated weights for policy 0, policy_version 2300 (0.0008) +[2026-06-07 02:48:30,285][472559] Updated weights for policy 0, policy_version 2311 (0.0009) +[2026-06-07 02:48:30,388][472559] Updated weights for policy 0, policy_version 2321 (0.0009) +[2026-06-07 02:48:30,548][472559] Updated weights for policy 0, policy_version 2336 (0.0009) +[2026-06-07 02:48:30,713][472559] Updated weights for policy 0, policy_version 2352 (0.0009) +[2026-06-07 02:48:30,844][472559] Updated weights for policy 0, policy_version 2365 (0.0008) +[2026-06-07 02:48:31,625][472559] Updated weights for policy 0, policy_version 2380 (0.0008) +[2026-06-07 02:48:31,762][472559] Updated weights for policy 0, policy_version 2393 (0.0008) +[2026-06-07 02:48:31,905][472559] Updated weights for policy 0, policy_version 2406 (0.0007) +[2026-06-07 02:48:32,035][472559] Updated weights for policy 0, policy_version 2419 (0.0008) +[2026-06-07 02:48:32,169][472559] Updated weights for policy 0, policy_version 2431 (0.0009) +[2026-06-07 02:48:32,880][472559] Updated weights for policy 0, policy_version 2444 (0.0010) +[2026-06-07 02:48:33,007][472559] Updated weights for policy 0, policy_version 2456 (0.0009) +[2026-06-07 02:48:33,117][464927] Fps is (10 sec: 22938.4, 60 sec: 24903.7, 300 sec: 24903.7). Total num frames: 1245184. Throughput: 0: 26277.0. Samples: 1230592. Policy #0 lag: (min: 63.0, avg: 73.0, max: 127.0) +[2026-06-07 02:48:33,118][464927] Avg episode reward: [(0, '8.790')] +[2026-06-07 02:48:33,127][472559] Updated weights for policy 0, policy_version 2467 (0.0008) +[2026-06-07 02:48:33,240][472559] Updated weights for policy 0, policy_version 2478 (0.0008) +[2026-06-07 02:48:33,365][472559] Updated weights for policy 0, policy_version 2490 (0.0009) +[2026-06-07 02:48:33,437][472025] Saving new best policy, reward=8.790! +[2026-06-07 02:48:34,072][472559] Updated weights for policy 0, policy_version 2500 (0.0009) +[2026-06-07 02:48:34,189][472559] Updated weights for policy 0, policy_version 2511 (0.0009) +[2026-06-07 02:48:34,310][472559] Updated weights for policy 0, policy_version 2522 (0.0008) +[2026-06-07 02:48:34,437][472559] Updated weights for policy 0, policy_version 2535 (0.0009) +[2026-06-07 02:48:34,578][472559] Updated weights for policy 0, policy_version 2548 (0.0008) +[2026-06-07 02:48:34,701][472559] Updated weights for policy 0, policy_version 2560 (0.0008) +[2026-06-07 02:48:35,459][472559] Updated weights for policy 0, policy_version 2572 (0.0009) +[2026-06-07 02:48:35,594][472559] Updated weights for policy 0, policy_version 2585 (0.0008) +[2026-06-07 02:48:35,753][472559] Updated weights for policy 0, policy_version 2600 (0.0009) +[2026-06-07 02:48:35,895][472559] Updated weights for policy 0, policy_version 2613 (0.0010) +[2026-06-07 02:48:36,697][472559] Updated weights for policy 0, policy_version 2628 (0.0009) +[2026-06-07 02:48:36,837][472559] Updated weights for policy 0, policy_version 2642 (0.0008) +[2026-06-07 02:48:37,000][472559] Updated weights for policy 0, policy_version 2657 (0.0008) +[2026-06-07 02:48:37,109][472559] Updated weights for policy 0, policy_version 2668 (0.0008) +[2026-06-07 02:48:37,266][472559] Updated weights for policy 0, policy_version 2683 (0.0008) +[2026-06-07 02:48:37,984][472559] Updated weights for policy 0, policy_version 2693 (0.0009) +[2026-06-07 02:48:38,117][464927] Fps is (10 sec: 26214.0, 60 sec: 25022.8, 300 sec: 25022.8). Total num frames: 1376256. Throughput: 0: 26265.5. Samples: 1385472. Policy #0 lag: (min: 63.0, avg: 73.1, max: 127.0) +[2026-06-07 02:48:38,118][464927] Avg episode reward: [(0, '10.817')] +[2026-06-07 02:48:38,146][472559] Updated weights for policy 0, policy_version 2708 (0.0010) +[2026-06-07 02:48:38,287][472559] Updated weights for policy 0, policy_version 2722 (0.0008) +[2026-06-07 02:48:38,410][472559] Updated weights for policy 0, policy_version 2733 (0.0009) +[2026-06-07 02:48:38,571][472559] Updated weights for policy 0, policy_version 2748 (0.0009) +[2026-06-07 02:48:38,608][472025] Saving new best policy, reward=10.817! +[2026-06-07 02:48:39,317][472559] Updated weights for policy 0, policy_version 2761 (0.0009) +[2026-06-07 02:48:39,451][472559] Updated weights for policy 0, policy_version 2774 (0.0009) +[2026-06-07 02:48:39,598][472559] Updated weights for policy 0, policy_version 2789 (0.0009) +[2026-06-07 02:48:39,733][472559] Updated weights for policy 0, policy_version 2802 (0.0009) +[2026-06-07 02:48:39,852][472559] Updated weights for policy 0, policy_version 2813 (0.0009) +[2026-06-07 02:48:40,611][472559] Updated weights for policy 0, policy_version 2827 (0.0009) +[2026-06-07 02:48:40,757][472559] Updated weights for policy 0, policy_version 2841 (0.0009) +[2026-06-07 02:48:40,896][472559] Updated weights for policy 0, policy_version 2855 (0.0009) +[2026-06-07 02:48:41,028][472559] Updated weights for policy 0, policy_version 2867 (0.0009) +[2026-06-07 02:48:41,152][472559] Updated weights for policy 0, policy_version 2879 (0.0009) +[2026-06-07 02:48:41,902][472559] Updated weights for policy 0, policy_version 2891 (0.0009) +[2026-06-07 02:48:42,049][472559] Updated weights for policy 0, policy_version 2904 (0.0008) +[2026-06-07 02:48:42,193][472559] Updated weights for policy 0, policy_version 2918 (0.0009) +[2026-06-07 02:48:42,347][472559] Updated weights for policy 0, policy_version 2933 (0.0009) +[2026-06-07 02:48:43,055][472559] Updated weights for policy 0, policy_version 2945 (0.0005) +[2026-06-07 02:48:43,117][464927] Fps is (10 sec: 26214.4, 60 sec: 25122.1, 300 sec: 25122.1). Total num frames: 1507328. Throughput: 0: 25711.0. Samples: 1546112. Policy #0 lag: (min: 41.0, avg: 64.0, max: 105.0) +[2026-06-07 02:48:43,118][464927] Avg episode reward: [(0, '13.483')] +[2026-06-07 02:48:43,177][472559] Updated weights for policy 0, policy_version 2956 (0.0008) +[2026-06-07 02:48:43,296][472559] Updated weights for policy 0, policy_version 2967 (0.0008) +[2026-06-07 02:48:43,438][472559] Updated weights for policy 0, policy_version 2980 (0.0008) +[2026-06-07 02:48:43,571][472559] Updated weights for policy 0, policy_version 2992 (0.0008) +[2026-06-07 02:48:43,704][472559] Updated weights for policy 0, policy_version 3005 (0.0008) +[2026-06-07 02:48:43,740][472025] Saving new best policy, reward=13.483! +[2026-06-07 02:48:44,423][472559] Updated weights for policy 0, policy_version 3015 (0.0009) +[2026-06-07 02:48:44,558][472559] Updated weights for policy 0, policy_version 3028 (0.0009) +[2026-06-07 02:48:44,683][472559] Updated weights for policy 0, policy_version 3040 (0.0009) +[2026-06-07 02:48:44,796][472559] Updated weights for policy 0, policy_version 3051 (0.0008) +[2026-06-07 02:48:44,957][472559] Updated weights for policy 0, policy_version 3067 (0.0009) +[2026-06-07 02:48:45,698][472559] Updated weights for policy 0, policy_version 3082 (0.0009) +[2026-06-07 02:48:45,822][472559] Updated weights for policy 0, policy_version 3094 (0.0008) +[2026-06-07 02:48:45,946][472559] Updated weights for policy 0, policy_version 3106 (0.0008) +[2026-06-07 02:48:46,072][472559] Updated weights for policy 0, policy_version 3118 (0.0008) +[2026-06-07 02:48:46,199][472559] Updated weights for policy 0, policy_version 3129 (0.0009) +[2026-06-07 02:48:46,867][472559] Updated weights for policy 0, policy_version 3140 (0.0009) +[2026-06-07 02:48:47,023][472559] Updated weights for policy 0, policy_version 3155 (0.0009) +[2026-06-07 02:48:47,164][472559] Updated weights for policy 0, policy_version 3169 (0.0007) +[2026-06-07 02:48:47,288][472559] Updated weights for policy 0, policy_version 3180 (0.0006) +[2026-06-07 02:48:47,418][472559] Updated weights for policy 0, policy_version 3193 (0.0009) +[2026-06-07 02:48:48,117][464927] Fps is (10 sec: 26214.8, 60 sec: 26760.6, 300 sec: 25206.2). Total num frames: 1638400. Throughput: 0: 25566.1. Samples: 1618688. Policy #0 lag: (min: 63.0, avg: 73.9, max: 127.0) +[2026-06-07 02:48:48,117][464927] Avg episode reward: [(0, '15.313')] +[2026-06-07 02:48:48,139][472559] Updated weights for policy 0, policy_version 3206 (0.0009) +[2026-06-07 02:48:48,271][472559] Updated weights for policy 0, policy_version 3219 (0.0009) +[2026-06-07 02:48:48,396][472559] Updated weights for policy 0, policy_version 3231 (0.0009) +[2026-06-07 02:48:48,521][472559] Updated weights for policy 0, policy_version 3243 (0.0009) +[2026-06-07 02:48:48,641][472559] Updated weights for policy 0, policy_version 3254 (0.0009) +[2026-06-07 02:48:48,745][472025] Saving new best policy, reward=15.313! +[2026-06-07 02:48:49,385][472559] Updated weights for policy 0, policy_version 3267 (0.0009) +[2026-06-07 02:48:49,541][472559] Updated weights for policy 0, policy_version 3282 (0.0009) +[2026-06-07 02:48:49,674][472559] Updated weights for policy 0, policy_version 3296 (0.0010) +[2026-06-07 02:48:49,788][472559] Updated weights for policy 0, policy_version 3307 (0.0008) +[2026-06-07 02:48:49,912][472559] Updated weights for policy 0, policy_version 3318 (0.0008) +[2026-06-07 02:48:50,606][472559] Updated weights for policy 0, policy_version 3330 (0.0008) +[2026-06-07 02:48:50,719][472559] Updated weights for policy 0, policy_version 3341 (0.0009) +[2026-06-07 02:48:50,854][472559] Updated weights for policy 0, policy_version 3354 (0.0009) +[2026-06-07 02:48:50,992][472559] Updated weights for policy 0, policy_version 3367 (0.0009) +[2026-06-07 02:48:51,137][472559] Updated weights for policy 0, policy_version 3380 (0.0008) +[2026-06-07 02:48:51,265][472559] Updated weights for policy 0, policy_version 3392 (0.0009) +[2026-06-07 02:48:51,956][472559] Updated weights for policy 0, policy_version 3406 (0.0009) +[2026-06-07 02:48:52,091][472559] Updated weights for policy 0, policy_version 3419 (0.0008) +[2026-06-07 02:48:52,231][472559] Updated weights for policy 0, policy_version 3432 (0.0012) +[2026-06-07 02:48:52,346][472559] Updated weights for policy 0, policy_version 3443 (0.0011) +[2026-06-07 02:48:52,468][472559] Updated weights for policy 0, policy_version 3454 (0.0011) +[2026-06-07 02:48:53,117][464927] Fps is (10 sec: 26214.1, 60 sec: 26214.3, 300 sec: 25278.1). Total num frames: 1769472. Throughput: 0: 25571.7. Samples: 1780224. Policy #0 lag: (min: 63.0, avg: 72.1, max: 127.0) +[2026-06-07 02:48:53,118][464927] Avg episode reward: [(0, '20.471')] +[2026-06-07 02:48:53,124][472559] Updated weights for policy 0, policy_version 3466 (0.0008) +[2026-06-07 02:48:53,255][472559] Updated weights for policy 0, policy_version 3479 (0.0009) +[2026-06-07 02:48:53,386][472559] Updated weights for policy 0, policy_version 3491 (0.0009) +[2026-06-07 02:48:53,529][472559] Updated weights for policy 0, policy_version 3504 (0.0008) +[2026-06-07 02:48:53,679][472559] Updated weights for policy 0, policy_version 3518 (0.0008) +[2026-06-07 02:48:53,704][472025] Saving new best policy, reward=20.471! +[2026-06-07 02:48:54,322][472559] Updated weights for policy 0, policy_version 3528 (0.0008) +[2026-06-07 02:48:54,459][472559] Updated weights for policy 0, policy_version 3541 (0.0008) +[2026-06-07 02:48:54,571][472559] Updated weights for policy 0, policy_version 3552 (0.0008) +[2026-06-07 02:48:54,722][472559] Updated weights for policy 0, policy_version 3565 (0.0008) +[2026-06-07 02:48:54,851][472559] Updated weights for policy 0, policy_version 3577 (0.0008) +[2026-06-07 02:48:55,533][472559] Updated weights for policy 0, policy_version 3590 (0.0009) +[2026-06-07 02:48:55,696][472559] Updated weights for policy 0, policy_version 3605 (0.0009) +[2026-06-07 02:48:55,846][472559] Updated weights for policy 0, policy_version 3619 (0.0008) +[2026-06-07 02:48:55,996][472559] Updated weights for policy 0, policy_version 3634 (0.0009) +[2026-06-07 02:48:56,127][472559] Updated weights for policy 0, policy_version 3646 (0.0008) +[2026-06-07 02:48:56,802][472559] Updated weights for policy 0, policy_version 3658 (0.0008) +[2026-06-07 02:48:56,946][472559] Updated weights for policy 0, policy_version 3672 (0.0009) +[2026-06-07 02:48:57,062][472559] Updated weights for policy 0, policy_version 3683 (0.0009) +[2026-06-07 02:48:57,190][472559] Updated weights for policy 0, policy_version 3695 (0.0009) +[2026-06-07 02:48:57,321][472559] Updated weights for policy 0, policy_version 3707 (0.0009) +[2026-06-07 02:48:57,951][472559] Updated weights for policy 0, policy_version 3718 (0.0006) +[2026-06-07 02:48:58,073][472559] Updated weights for policy 0, policy_version 3729 (0.0005) +[2026-06-07 02:48:58,117][464927] Fps is (10 sec: 26213.0, 60 sec: 25668.1, 300 sec: 25340.4). Total num frames: 1900544. Throughput: 0: 25813.1. Samples: 1943680. Policy #0 lag: (min: 63.0, avg: 73.2, max: 127.0) +[2026-06-07 02:48:58,120][464927] Avg episode reward: [(0, '22.521')] +[2026-06-07 02:48:58,207][472559] Updated weights for policy 0, policy_version 3742 (0.0008) +[2026-06-07 02:48:58,336][472559] Updated weights for policy 0, policy_version 3754 (0.0009) +[2026-06-07 02:48:58,446][472559] Updated weights for policy 0, policy_version 3764 (0.0009) +[2026-06-07 02:48:58,574][472025] Saving new best policy, reward=22.521! +[2026-06-07 02:48:58,576][472559] Updated weights for policy 0, policy_version 3776 (0.0009) +[2026-06-07 02:48:59,212][472559] Updated weights for policy 0, policy_version 3787 (0.0008) +[2026-06-07 02:48:59,346][472559] Updated weights for policy 0, policy_version 3799 (0.0009) +[2026-06-07 02:48:59,476][472559] Updated weights for policy 0, policy_version 3812 (0.0009) +[2026-06-07 02:48:59,647][472559] Updated weights for policy 0, policy_version 3828 (0.0009) +[2026-06-07 02:48:59,775][472559] Updated weights for policy 0, policy_version 3839 (0.0009) +[2026-06-07 02:49:00,383][472559] Updated weights for policy 0, policy_version 3849 (0.0008) +[2026-06-07 02:49:00,515][472559] Updated weights for policy 0, policy_version 3862 (0.0008) +[2026-06-07 02:49:00,649][472559] Updated weights for policy 0, policy_version 3874 (0.0004) +[2026-06-07 02:49:00,788][472559] Updated weights for policy 0, policy_version 3887 (0.0006) +[2026-06-07 02:49:00,915][472559] Updated weights for policy 0, policy_version 3898 (0.0008) +[2026-06-07 02:49:01,542][472559] Updated weights for policy 0, policy_version 3909 (0.0009) +[2026-06-07 02:49:01,656][472559] Updated weights for policy 0, policy_version 3920 (0.0009) +[2026-06-07 02:49:01,770][472559] Updated weights for policy 0, policy_version 3930 (0.0008) +[2026-06-07 02:49:01,894][472559] Updated weights for policy 0, policy_version 3941 (0.0009) +[2026-06-07 02:49:02,028][472559] Updated weights for policy 0, policy_version 3953 (0.0009) +[2026-06-07 02:49:02,135][472559] Updated weights for policy 0, policy_version 3964 (0.0010) +[2026-06-07 02:49:02,717][472559] Updated weights for policy 0, policy_version 3974 (0.0008) +[2026-06-07 02:49:02,847][472559] Updated weights for policy 0, policy_version 3986 (0.0012) +[2026-06-07 02:49:02,978][472559] Updated weights for policy 0, policy_version 3999 (0.0009) +[2026-06-07 02:49:03,117][464927] Fps is (10 sec: 26214.8, 60 sec: 25668.4, 300 sec: 25395.2). Total num frames: 2031616. Throughput: 0: 25810.7. Samples: 2019712. Policy #0 lag: (min: 59.0, avg: 97.9, max: 123.0) +[2026-06-07 02:49:03,117][464927] Avg episode reward: [(0, '33.061')] +[2026-06-07 02:49:03,149][472559] Updated weights for policy 0, policy_version 4015 (0.0009) +[2026-06-07 02:49:03,325][472559] Updated weights for policy 0, policy_version 4031 (0.0009) +[2026-06-07 02:49:03,335][472025] Saving new best policy, reward=33.061! +[2026-06-07 02:49:03,967][472559] Updated weights for policy 0, policy_version 4044 (0.0009) +[2026-06-07 02:49:04,079][472559] Updated weights for policy 0, policy_version 4054 (0.0011) +[2026-06-07 02:49:04,212][472559] Updated weights for policy 0, policy_version 4067 (0.0009) +[2026-06-07 02:49:04,364][472559] Updated weights for policy 0, policy_version 4081 (0.0009) +[2026-06-07 02:49:04,502][472559] Updated weights for policy 0, policy_version 4093 (0.0009) +[2026-06-07 02:49:05,127][472559] Updated weights for policy 0, policy_version 4103 (0.0009) +[2026-06-07 02:49:05,256][472559] Updated weights for policy 0, policy_version 4116 (0.0009) +[2026-06-07 02:49:05,412][472559] Updated weights for policy 0, policy_version 4130 (0.0009) +[2026-06-07 02:49:05,545][472559] Updated weights for policy 0, policy_version 4142 (0.0009) +[2026-06-07 02:49:05,675][472559] Updated weights for policy 0, policy_version 4154 (0.0008) +[2026-06-07 02:49:06,319][472559] Updated weights for policy 0, policy_version 4165 (0.0008) +[2026-06-07 02:49:06,444][472559] Updated weights for policy 0, policy_version 4176 (0.0008) +[2026-06-07 02:49:06,589][472559] Updated weights for policy 0, policy_version 4190 (0.0008) +[2026-06-07 02:49:06,725][472559] Updated weights for policy 0, policy_version 4203 (0.0008) +[2026-06-07 02:49:06,843][472559] Updated weights for policy 0, policy_version 4214 (0.0008) +[2026-06-07 02:49:07,481][472559] Updated weights for policy 0, policy_version 4226 (0.0008) +[2026-06-07 02:49:07,604][472559] Updated weights for policy 0, policy_version 4238 (0.0008) +[2026-06-07 02:49:07,713][472559] Updated weights for policy 0, policy_version 4248 (0.0008) +[2026-06-07 02:49:07,843][472559] Updated weights for policy 0, policy_version 4260 (0.0008) +[2026-06-07 02:49:07,978][472559] Updated weights for policy 0, policy_version 4272 (0.0008) +[2026-06-07 02:49:08,089][472559] Updated weights for policy 0, policy_version 4282 (0.0008) +[2026-06-07 02:49:08,117][464927] Fps is (10 sec: 26215.5, 60 sec: 25668.4, 300 sec: 25443.4). Total num frames: 2162688. Throughput: 0: 26149.2. Samples: 2184576. Policy #0 lag: (min: 63.0, avg: 72.9, max: 127.0) +[2026-06-07 02:49:08,118][464927] Avg episode reward: [(0, '39.507')] +[2026-06-07 02:49:08,149][472025] Saving new best policy, reward=39.507! +[2026-06-07 02:49:08,718][472559] Updated weights for policy 0, policy_version 4294 (0.0008) +[2026-06-07 02:49:08,856][472559] Updated weights for policy 0, policy_version 4307 (0.0008) +[2026-06-07 02:49:08,986][472559] Updated weights for policy 0, policy_version 4319 (0.0009) +[2026-06-07 02:49:09,132][472559] Updated weights for policy 0, policy_version 4333 (0.0009) +[2026-06-07 02:49:09,245][472559] Updated weights for policy 0, policy_version 4343 (0.0008) +[2026-06-07 02:49:09,879][472559] Updated weights for policy 0, policy_version 4354 (0.0009) +[2026-06-07 02:49:09,996][472559] Updated weights for policy 0, policy_version 4364 (0.0008) +[2026-06-07 02:49:10,119][472559] Updated weights for policy 0, policy_version 4376 (0.0008) +[2026-06-07 02:49:10,258][472559] Updated weights for policy 0, policy_version 4389 (0.0008) +[2026-06-07 02:49:10,368][472559] Updated weights for policy 0, policy_version 4399 (0.0008) +[2026-06-07 02:49:10,500][472559] Updated weights for policy 0, policy_version 4411 (0.0008) +[2026-06-07 02:49:11,098][472559] Updated weights for policy 0, policy_version 4421 (0.0008) +[2026-06-07 02:49:11,236][472559] Updated weights for policy 0, policy_version 4434 (0.0008) +[2026-06-07 02:49:11,345][472559] Updated weights for policy 0, policy_version 4444 (0.0008) +[2026-06-07 02:49:11,465][472559] Updated weights for policy 0, policy_version 4455 (0.0008) +[2026-06-07 02:49:11,589][472559] Updated weights for policy 0, policy_version 4466 (0.0008) +[2026-06-07 02:49:11,714][472559] Updated weights for policy 0, policy_version 4478 (0.0008) +[2026-06-07 02:49:12,350][472559] Updated weights for policy 0, policy_version 4489 (0.0008) +[2026-06-07 02:49:12,472][472559] Updated weights for policy 0, policy_version 4500 (0.0008) +[2026-06-07 02:49:12,581][472559] Updated weights for policy 0, policy_version 4510 (0.0008) +[2026-06-07 02:49:12,731][472559] Updated weights for policy 0, policy_version 4524 (0.0008) +[2026-06-07 02:49:12,861][472559] Updated weights for policy 0, policy_version 4535 (0.0008) +[2026-06-07 02:49:13,117][464927] Fps is (10 sec: 29491.0, 60 sec: 26214.4, 300 sec: 25850.3). Total num frames: 2326528. Throughput: 0: 26547.2. Samples: 2350592. Policy #0 lag: (min: 63.0, avg: 73.2, max: 127.0) +[2026-06-07 02:49:13,118][464927] Avg episode reward: [(0, '45.850')] +[2026-06-07 02:49:13,123][472025] Saving new best policy, reward=45.850! +[2026-06-07 02:49:13,473][472559] Updated weights for policy 0, policy_version 4546 (0.0008) +[2026-06-07 02:49:13,600][472559] Updated weights for policy 0, policy_version 4558 (0.0008) +[2026-06-07 02:49:13,724][472559] Updated weights for policy 0, policy_version 4569 (0.0008) +[2026-06-07 02:49:13,861][472559] Updated weights for policy 0, policy_version 4582 (0.0008) +[2026-06-07 02:49:13,962][472559] Updated weights for policy 0, policy_version 4592 (0.0008) +[2026-06-07 02:49:14,080][472559] Updated weights for policy 0, policy_version 4602 (0.0008) +[2026-06-07 02:49:14,692][472559] Updated weights for policy 0, policy_version 4613 (0.0008) +[2026-06-07 02:49:14,810][472559] Updated weights for policy 0, policy_version 4624 (0.0008) +[2026-06-07 02:49:14,923][472559] Updated weights for policy 0, policy_version 4634 (0.0008) +[2026-06-07 02:49:15,048][472559] Updated weights for policy 0, policy_version 4646 (0.0009) +[2026-06-07 02:49:15,178][472559] Updated weights for policy 0, policy_version 4657 (0.0008) +[2026-06-07 02:49:15,304][472559] Updated weights for policy 0, policy_version 4669 (0.0008) +[2026-06-07 02:49:15,915][472559] Updated weights for policy 0, policy_version 4679 (0.0009) +[2026-06-07 02:49:16,039][472559] Updated weights for policy 0, policy_version 4691 (0.0008) +[2026-06-07 02:49:16,159][472559] Updated weights for policy 0, policy_version 4701 (0.0009) +[2026-06-07 02:49:16,276][472559] Updated weights for policy 0, policy_version 4712 (0.0008) +[2026-06-07 02:49:16,414][472559] Updated weights for policy 0, policy_version 4724 (0.0008) +[2026-06-07 02:49:16,533][472559] Updated weights for policy 0, policy_version 4735 (0.0009) +[2026-06-07 02:49:17,119][472559] Updated weights for policy 0, policy_version 4748 (0.0008) +[2026-06-07 02:49:17,234][472559] Updated weights for policy 0, policy_version 4758 (0.0008) +[2026-06-07 02:49:17,368][472559] Updated weights for policy 0, policy_version 4770 (0.0008) +[2026-06-07 02:49:17,474][472559] Updated weights for policy 0, policy_version 4780 (0.0008) +[2026-06-07 02:49:17,586][472559] Updated weights for policy 0, policy_version 4790 (0.0008) +[2026-06-07 02:49:18,117][464927] Fps is (10 sec: 29491.5, 60 sec: 26214.6, 300 sec: 25869.5). Total num frames: 2457600. Throughput: 0: 26860.1. Samples: 2439296. Policy #0 lag: (min: 45.0, avg: 55.3, max: 109.0) +[2026-06-07 02:49:18,118][464927] Avg episode reward: [(0, '51.083')] +[2026-06-07 02:49:18,176][472559] Updated weights for policy 0, policy_version 4802 (0.0008) +[2026-06-07 02:49:18,296][472559] Updated weights for policy 0, policy_version 4813 (0.0009) +[2026-06-07 02:49:18,405][472559] Updated weights for policy 0, policy_version 4823 (0.0008) +[2026-06-07 02:49:18,530][472559] Updated weights for policy 0, policy_version 4834 (0.0009) +[2026-06-07 02:49:18,672][472559] Updated weights for policy 0, policy_version 4848 (0.0009) +[2026-06-07 02:49:18,810][472559] Updated weights for policy 0, policy_version 4860 (0.0008) +[2026-06-07 02:49:18,852][472025] Saving new best policy, reward=51.083! +[2026-06-07 02:49:19,422][472559] Updated weights for policy 0, policy_version 4871 (0.0008) +[2026-06-07 02:49:19,558][472559] Updated weights for policy 0, policy_version 4883 (0.0009) +[2026-06-07 02:49:19,688][472559] Updated weights for policy 0, policy_version 4895 (0.0009) +[2026-06-07 02:49:19,823][472559] Updated weights for policy 0, policy_version 4908 (0.0009) +[2026-06-07 02:49:19,937][472559] Updated weights for policy 0, policy_version 4918 (0.0009) +[2026-06-07 02:49:20,524][472559] Updated weights for policy 0, policy_version 4929 (0.0008) +[2026-06-07 02:49:20,647][472559] Updated weights for policy 0, policy_version 4940 (0.0009) +[2026-06-07 02:49:20,762][472559] Updated weights for policy 0, policy_version 4951 (0.0009) +[2026-06-07 02:49:20,884][472559] Updated weights for policy 0, policy_version 4962 (0.0009) +[2026-06-07 02:49:21,006][472559] Updated weights for policy 0, policy_version 4973 (0.0009) +[2026-06-07 02:49:21,127][472559] Updated weights for policy 0, policy_version 4984 (0.0009) +[2026-06-07 02:49:21,734][472559] Updated weights for policy 0, policy_version 4995 (0.0008) +[2026-06-07 02:49:21,857][472559] Updated weights for policy 0, policy_version 5006 (0.0008) +[2026-06-07 02:49:21,965][472559] Updated weights for policy 0, policy_version 5016 (0.0008) +[2026-06-07 02:49:22,092][472559] Updated weights for policy 0, policy_version 5028 (0.0009) +[2026-06-07 02:49:22,213][472559] Updated weights for policy 0, policy_version 5039 (0.0008) +[2026-06-07 02:49:22,357][472559] Updated weights for policy 0, policy_version 5052 (0.0008) +[2026-06-07 02:49:22,971][472559] Updated weights for policy 0, policy_version 5063 (0.0008) +[2026-06-07 02:49:23,114][472559] Updated weights for policy 0, policy_version 5077 (0.0008) +[2026-06-07 02:49:23,117][464927] Fps is (10 sec: 26213.5, 60 sec: 26214.4, 300 sec: 25886.6). Total num frames: 2588672. Throughput: 0: 27098.9. Samples: 2604928. Policy #0 lag: (min: 63.0, avg: 73.7, max: 127.0) +[2026-06-07 02:49:23,119][464927] Avg episode reward: [(0, '64.255')] +[2026-06-07 02:49:23,230][472559] Updated weights for policy 0, policy_version 5087 (0.0008) +[2026-06-07 02:49:23,358][472559] Updated weights for policy 0, policy_version 5098 (0.0008) +[2026-06-07 02:49:23,472][472559] Updated weights for policy 0, policy_version 5108 (0.0008) +[2026-06-07 02:49:23,573][472559] Updated weights for policy 0, policy_version 5118 (0.0008) +[2026-06-07 02:49:23,591][472025] Saving new best policy, reward=64.255! +[2026-06-07 02:49:24,168][472559] Updated weights for policy 0, policy_version 5129 (0.0008) +[2026-06-07 02:49:24,319][472559] Updated weights for policy 0, policy_version 5143 (0.0008) +[2026-06-07 02:49:24,430][472559] Updated weights for policy 0, policy_version 5153 (0.0008) +[2026-06-07 02:49:24,540][472559] Updated weights for policy 0, policy_version 5163 (0.0008) +[2026-06-07 02:49:24,666][472559] Updated weights for policy 0, policy_version 5174 (0.0009) +[2026-06-07 02:49:25,249][472559] Updated weights for policy 0, policy_version 5185 (0.0008) +[2026-06-07 02:49:25,369][472559] Updated weights for policy 0, policy_version 5197 (0.0008) +[2026-06-07 02:49:25,512][472559] Updated weights for policy 0, policy_version 5210 (0.0008) +[2026-06-07 02:49:25,649][472559] Updated weights for policy 0, policy_version 5222 (0.0009) +[2026-06-07 02:49:25,763][472559] Updated weights for policy 0, policy_version 5233 (0.0008) +[2026-06-07 02:49:25,898][472559] Updated weights for policy 0, policy_version 5245 (0.0008) +[2026-06-07 02:49:26,481][472559] Updated weights for policy 0, policy_version 5255 (0.0009) +[2026-06-07 02:49:26,596][472559] Updated weights for policy 0, policy_version 5266 (0.0008) +[2026-06-07 02:49:26,729][472559] Updated weights for policy 0, policy_version 5278 (0.0009) +[2026-06-07 02:49:26,839][472559] Updated weights for policy 0, policy_version 5288 (0.0009) +[2026-06-07 02:49:26,945][472559] Updated weights for policy 0, policy_version 5298 (0.0008) +[2026-06-07 02:49:27,054][472559] Updated weights for policy 0, policy_version 5308 (0.0008) +[2026-06-07 02:49:27,659][472559] Updated weights for policy 0, policy_version 5320 (0.0008) +[2026-06-07 02:49:27,777][472559] Updated weights for policy 0, policy_version 5331 (0.0009) +[2026-06-07 02:49:27,893][472559] Updated weights for policy 0, policy_version 5341 (0.0009) +[2026-06-07 02:49:28,028][472559] Updated weights for policy 0, policy_version 5353 (0.0008) +[2026-06-07 02:49:28,117][464927] Fps is (10 sec: 26214.3, 60 sec: 26760.5, 300 sec: 25902.3). Total num frames: 2719744. Throughput: 0: 27218.5. Samples: 2770944. Policy #0 lag: (min: 63.0, avg: 73.7, max: 127.0) +[2026-06-07 02:49:28,118][464927] Avg episode reward: [(0, '71.980')] +[2026-06-07 02:49:28,141][472559] Updated weights for policy 0, policy_version 5363 (0.0009) +[2026-06-07 02:49:28,275][472559] Updated weights for policy 0, policy_version 5375 (0.0009) +[2026-06-07 02:49:28,280][472025] Saving new best policy, reward=71.980! +[2026-06-07 02:49:28,861][472559] Updated weights for policy 0, policy_version 5387 (0.0009) +[2026-06-07 02:49:28,992][472559] Updated weights for policy 0, policy_version 5399 (0.0009) +[2026-06-07 02:49:29,095][472559] Updated weights for policy 0, policy_version 5409 (0.0008) +[2026-06-07 02:49:29,212][472559] Updated weights for policy 0, policy_version 5419 (0.0009) +[2026-06-07 02:49:29,329][472559] Updated weights for policy 0, policy_version 5430 (0.0009) +[2026-06-07 02:49:29,443][472559] Updated weights for policy 0, policy_version 5440 (0.0008) +[2026-06-07 02:49:30,032][472559] Updated weights for policy 0, policy_version 5450 (0.0008) +[2026-06-07 02:49:30,149][472559] Updated weights for policy 0, policy_version 5461 (0.0008) +[2026-06-07 02:49:30,295][472559] Updated weights for policy 0, policy_version 5474 (0.0009) +[2026-06-07 02:49:30,417][472559] Updated weights for policy 0, policy_version 5485 (0.0008) +[2026-06-07 02:49:30,546][472559] Updated weights for policy 0, policy_version 5497 (0.0008) +[2026-06-07 02:49:31,119][472559] Updated weights for policy 0, policy_version 5507 (0.0008) +[2026-06-07 02:49:31,235][472559] Updated weights for policy 0, policy_version 5517 (0.0008) +[2026-06-07 02:49:31,370][472559] Updated weights for policy 0, policy_version 5529 (0.0009) +[2026-06-07 02:49:31,492][472559] Updated weights for policy 0, policy_version 5540 (0.0008) +[2026-06-07 02:49:31,599][472559] Updated weights for policy 0, policy_version 5550 (0.0008) +[2026-06-07 02:49:31,726][472559] Updated weights for policy 0, policy_version 5561 (0.0008) +[2026-06-07 02:49:32,315][472559] Updated weights for policy 0, policy_version 5572 (0.0008) +[2026-06-07 02:49:32,450][472559] Updated weights for policy 0, policy_version 5584 (0.0008) +[2026-06-07 02:49:32,583][472559] Updated weights for policy 0, policy_version 5596 (0.0008) +[2026-06-07 02:49:32,717][472559] Updated weights for policy 0, policy_version 5608 (0.0008) +[2026-06-07 02:49:32,831][472559] Updated weights for policy 0, policy_version 5618 (0.0008) +[2026-06-07 02:49:32,942][472559] Updated weights for policy 0, policy_version 5628 (0.0008) +[2026-06-07 02:49:33,117][464927] Fps is (10 sec: 29492.4, 60 sec: 27306.7, 300 sec: 26214.4). Total num frames: 2883584. Throughput: 0: 27619.5. Samples: 2861568. Policy #0 lag: (min: 63.0, avg: 75.1, max: 127.0) +[2026-06-07 02:49:33,117][464927] Avg episode reward: [(0, '87.307')] +[2026-06-07 02:49:33,123][472025] Saving new best policy, reward=87.307! +[2026-06-07 02:49:33,541][472559] Updated weights for policy 0, policy_version 5640 (0.0009) +[2026-06-07 02:49:33,657][472559] Updated weights for policy 0, policy_version 5651 (0.0009) +[2026-06-07 02:49:33,791][472559] Updated weights for policy 0, policy_version 5664 (0.0009) +[2026-06-07 02:49:33,908][472559] Updated weights for policy 0, policy_version 5675 (0.0008) +[2026-06-07 02:49:34,031][472559] Updated weights for policy 0, policy_version 5686 (0.0008) +[2026-06-07 02:49:34,141][472559] Updated weights for policy 0, policy_version 5696 (0.0009) +[2026-06-07 02:49:34,716][472559] Updated weights for policy 0, policy_version 5706 (0.0011) +[2026-06-07 02:49:34,855][472559] Updated weights for policy 0, policy_version 5719 (0.0009) +[2026-06-07 02:49:34,976][472559] Updated weights for policy 0, policy_version 5730 (0.0005) +[2026-06-07 02:49:35,102][472559] Updated weights for policy 0, policy_version 5741 (0.0006) +[2026-06-07 02:49:35,221][472559] Updated weights for policy 0, policy_version 5752 (0.0005) +[2026-06-07 02:49:35,789][472559] Updated weights for policy 0, policy_version 5762 (0.0009) +[2026-06-07 02:49:35,915][472559] Updated weights for policy 0, policy_version 5774 (0.0011) +[2026-06-07 02:49:36,050][472559] Updated weights for policy 0, policy_version 5786 (0.0006) +[2026-06-07 02:49:36,184][472559] Updated weights for policy 0, policy_version 5798 (0.0005) +[2026-06-07 02:49:36,308][472559] Updated weights for policy 0, policy_version 5809 (0.0006) +[2026-06-07 02:49:36,424][472559] Updated weights for policy 0, policy_version 5819 (0.0008) +[2026-06-07 02:49:36,998][472559] Updated weights for policy 0, policy_version 5830 (0.0011) +[2026-06-07 02:49:37,136][472559] Updated weights for policy 0, policy_version 5843 (0.0010) +[2026-06-07 02:49:37,257][472559] Updated weights for policy 0, policy_version 5854 (0.0009) +[2026-06-07 02:49:37,368][472559] Updated weights for policy 0, policy_version 5864 (0.0009) +[2026-06-07 02:49:37,537][472559] Updated weights for policy 0, policy_version 5879 (0.0009) +[2026-06-07 02:49:38,102][472559] Updated weights for policy 0, policy_version 5890 (0.0006) +[2026-06-07 02:49:38,117][464927] Fps is (10 sec: 29491.2, 60 sec: 27306.7, 300 sec: 26214.4). Total num frames: 3014656. Throughput: 0: 27747.6. Samples: 3028864. Policy #0 lag: (min: 63.0, avg: 74.7, max: 127.0) +[2026-06-07 02:49:38,118][464927] Avg episode reward: [(0, '97.125')] +[2026-06-07 02:49:38,237][472559] Updated weights for policy 0, policy_version 5902 (0.0009) +[2026-06-07 02:49:38,360][472559] Updated weights for policy 0, policy_version 5914 (0.0009) +[2026-06-07 02:49:38,494][472559] Updated weights for policy 0, policy_version 5925 (0.0010) +[2026-06-07 02:49:38,614][472559] Updated weights for policy 0, policy_version 5936 (0.0012) +[2026-06-07 02:49:38,729][472559] Updated weights for policy 0, policy_version 5946 (0.0009) +[2026-06-07 02:49:38,787][472025] Saving new best policy, reward=97.125! +[2026-06-07 02:49:39,307][472559] Updated weights for policy 0, policy_version 5957 (0.0008) +[2026-06-07 02:49:39,412][472559] Updated weights for policy 0, policy_version 5967 (0.0008) +[2026-06-07 02:49:39,535][472559] Updated weights for policy 0, policy_version 5978 (0.0008) +[2026-06-07 02:49:39,668][472559] Updated weights for policy 0, policy_version 5990 (0.0008) +[2026-06-07 02:49:39,776][472559] Updated weights for policy 0, policy_version 6000 (0.0008) +[2026-06-07 02:49:39,949][472559] Updated weights for policy 0, policy_version 6016 (0.0009) +[2026-06-07 02:49:40,550][472559] Updated weights for policy 0, policy_version 6027 (0.0005) +[2026-06-07 02:49:40,669][472559] Updated weights for policy 0, policy_version 6038 (0.0005) +[2026-06-07 02:49:40,815][472559] Updated weights for policy 0, policy_version 6051 (0.0005) +[2026-06-07 02:49:40,951][472559] Updated weights for policy 0, policy_version 6063 (0.0006) +[2026-06-07 02:49:41,062][472559] Updated weights for policy 0, policy_version 6073 (0.0005) +[2026-06-07 02:49:41,643][472559] Updated weights for policy 0, policy_version 6085 (0.0005) +[2026-06-07 02:49:41,755][472559] Updated weights for policy 0, policy_version 6095 (0.0005) +[2026-06-07 02:49:41,885][472559] Updated weights for policy 0, policy_version 6107 (0.0005) +[2026-06-07 02:49:42,009][472559] Updated weights for policy 0, policy_version 6118 (0.0005) +[2026-06-07 02:49:42,149][472559] Updated weights for policy 0, policy_version 6130 (0.0005) +[2026-06-07 02:49:42,271][472559] Updated weights for policy 0, policy_version 6141 (0.0005) +[2026-06-07 02:49:42,840][472559] Updated weights for policy 0, policy_version 6152 (0.0007) +[2026-06-07 02:49:42,953][472559] Updated weights for policy 0, policy_version 6162 (0.0008) +[2026-06-07 02:49:43,084][472559] Updated weights for policy 0, policy_version 6174 (0.0008) +[2026-06-07 02:49:43,117][464927] Fps is (10 sec: 26214.2, 60 sec: 27306.7, 300 sec: 26214.4). Total num frames: 3145728. Throughput: 0: 27818.9. Samples: 3195520. Policy #0 lag: (min: 21.0, avg: 64.4, max: 74.0) +[2026-06-07 02:49:43,118][464927] Avg episode reward: [(0, '104.756')] +[2026-06-07 02:49:43,210][472559] Updated weights for policy 0, policy_version 6185 (0.0008) +[2026-06-07 02:49:43,342][472559] Updated weights for policy 0, policy_version 6197 (0.0008) +[2026-06-07 02:49:43,458][472559] Updated weights for policy 0, policy_version 6207 (0.0008) +[2026-06-07 02:49:43,463][472025] Saving new best policy, reward=104.756! +[2026-06-07 02:49:44,053][472559] Updated weights for policy 0, policy_version 6218 (0.0006) +[2026-06-07 02:49:44,158][472559] Updated weights for policy 0, policy_version 6228 (0.0008) +[2026-06-07 02:49:44,274][472559] Updated weights for policy 0, policy_version 6238 (0.0008) +[2026-06-07 02:49:44,400][472559] Updated weights for policy 0, policy_version 6250 (0.0009) +[2026-06-07 02:49:44,529][472559] Updated weights for policy 0, policy_version 6261 (0.0008) +[2026-06-07 02:49:44,653][472559] Updated weights for policy 0, policy_version 6272 (0.0006) +[2026-06-07 02:49:45,244][472559] Updated weights for policy 0, policy_version 6283 (0.0008) +[2026-06-07 02:49:45,359][472559] Updated weights for policy 0, policy_version 6293 (0.0008) +[2026-06-07 02:49:45,468][472559] Updated weights for policy 0, policy_version 6303 (0.0008) +[2026-06-07 02:49:45,585][472559] Updated weights for policy 0, policy_version 6313 (0.0008) +[2026-06-07 02:49:45,717][472559] Updated weights for policy 0, policy_version 6325 (0.0009) +[2026-06-07 02:49:45,841][472559] Updated weights for policy 0, policy_version 6336 (0.0009) +[2026-06-07 02:49:46,401][472559] Updated weights for policy 0, policy_version 6347 (0.0007) +[2026-06-07 02:49:46,532][472559] Updated weights for policy 0, policy_version 6359 (0.0009) +[2026-06-07 02:49:46,686][472559] Updated weights for policy 0, policy_version 6373 (0.0008) +[2026-06-07 02:49:46,816][472559] Updated weights for policy 0, policy_version 6384 (0.0008) +[2026-06-07 02:49:46,937][472559] Updated weights for policy 0, policy_version 6395 (0.0008) +[2026-06-07 02:49:47,541][472559] Updated weights for policy 0, policy_version 6408 (0.0007) +[2026-06-07 02:49:47,677][472559] Updated weights for policy 0, policy_version 6420 (0.0008) +[2026-06-07 02:49:47,807][472559] Updated weights for policy 0, policy_version 6432 (0.0008) +[2026-06-07 02:49:47,944][472559] Updated weights for policy 0, policy_version 6444 (0.0008) +[2026-06-07 02:49:48,078][472559] Updated weights for policy 0, policy_version 6455 (0.0008) +[2026-06-07 02:49:48,117][464927] Fps is (10 sec: 26214.5, 60 sec: 27306.7, 300 sec: 26214.4). Total num frames: 3276800. Throughput: 0: 27997.9. Samples: 3279616. Policy #0 lag: (min: 21.0, avg: 64.4, max: 74.0) +[2026-06-07 02:49:48,118][464927] Avg episode reward: [(0, '104.045')] +[2026-06-07 02:49:48,652][472559] Updated weights for policy 0, policy_version 6466 (0.0008) +[2026-06-07 02:49:48,768][472559] Updated weights for policy 0, policy_version 6477 (0.0008) +[2026-06-07 02:49:48,920][472559] Updated weights for policy 0, policy_version 6491 (0.0008) +[2026-06-07 02:49:49,063][472559] Updated weights for policy 0, policy_version 6503 (0.0009) +[2026-06-07 02:49:49,183][472559] Updated weights for policy 0, policy_version 6514 (0.0008) +[2026-06-07 02:49:49,307][472559] Updated weights for policy 0, policy_version 6525 (0.0007) +[2026-06-07 02:49:49,887][472559] Updated weights for policy 0, policy_version 6536 (0.0008) +[2026-06-07 02:49:50,007][472559] Updated weights for policy 0, policy_version 6547 (0.0008) +[2026-06-07 02:49:50,128][472559] Updated weights for policy 0, policy_version 6558 (0.0008) +[2026-06-07 02:49:50,267][472559] Updated weights for policy 0, policy_version 6570 (0.0008) +[2026-06-07 02:49:50,385][472559] Updated weights for policy 0, policy_version 6581 (0.0008) +[2026-06-07 02:49:50,492][472559] Updated weights for policy 0, policy_version 6591 (0.0008) +[2026-06-07 02:49:51,082][472559] Updated weights for policy 0, policy_version 6603 (0.0008) +[2026-06-07 02:49:51,215][472559] Updated weights for policy 0, policy_version 6615 (0.0008) +[2026-06-07 02:49:51,340][472559] Updated weights for policy 0, policy_version 6626 (0.0008) +[2026-06-07 02:49:51,451][472559] Updated weights for policy 0, policy_version 6636 (0.0008) +[2026-06-07 02:49:51,566][472559] Updated weights for policy 0, policy_version 6646 (0.0009) +[2026-06-07 02:49:51,675][472559] Updated weights for policy 0, policy_version 6656 (0.0008) +[2026-06-07 02:49:52,281][472559] Updated weights for policy 0, policy_version 6668 (0.0005) +[2026-06-07 02:49:52,410][472559] Updated weights for policy 0, policy_version 6680 (0.0005) +[2026-06-07 02:49:52,546][472559] Updated weights for policy 0, policy_version 6692 (0.0005) +[2026-06-07 02:49:52,672][472559] Updated weights for policy 0, policy_version 6703 (0.0004) +[2026-06-07 02:49:52,809][472559] Updated weights for policy 0, policy_version 6715 (0.0005) +[2026-06-07 02:49:53,117][464927] Fps is (10 sec: 29491.4, 60 sec: 27852.9, 300 sec: 26466.5). Total num frames: 3440640. Throughput: 0: 28151.5. Samples: 3451392. Policy #0 lag: (min: 86.0, avg: 116.4, max: 152.0) +[2026-06-07 02:49:53,117][464927] Avg episode reward: [(0, '100.001')] +[2026-06-07 02:49:53,363][472559] Updated weights for policy 0, policy_version 6725 (0.0005) +[2026-06-07 02:49:53,523][472559] Updated weights for policy 0, policy_version 6739 (0.0005) +[2026-06-07 02:49:53,647][472559] Updated weights for policy 0, policy_version 6750 (0.0005) +[2026-06-07 02:49:53,771][472559] Updated weights for policy 0, policy_version 6761 (0.0004) +[2026-06-07 02:49:53,891][472559] Updated weights for policy 0, policy_version 6772 (0.0005) +[2026-06-07 02:49:54,015][472559] Updated weights for policy 0, policy_version 6783 (0.0005) +[2026-06-07 02:49:54,570][472559] Updated weights for policy 0, policy_version 6793 (0.0005) +[2026-06-07 02:49:54,684][472559] Updated weights for policy 0, policy_version 6803 (0.0005) +[2026-06-07 02:49:54,816][472559] Updated weights for policy 0, policy_version 6815 (0.0005) +[2026-06-07 02:49:54,933][472559] Updated weights for policy 0, policy_version 6825 (0.0005) +[2026-06-07 02:49:55,048][472559] Updated weights for policy 0, policy_version 6835 (0.0005) +[2026-06-07 02:49:55,193][472559] Updated weights for policy 0, policy_version 6847 (0.0005) +[2026-06-07 02:49:55,749][472559] Updated weights for policy 0, policy_version 6858 (0.0004) +[2026-06-07 02:49:55,876][472559] Updated weights for policy 0, policy_version 6869 (0.0004) +[2026-06-07 02:49:55,992][472559] Updated weights for policy 0, policy_version 6879 (0.0004) +[2026-06-07 02:49:56,117][472559] Updated weights for policy 0, policy_version 6890 (0.0005) +[2026-06-07 02:49:56,228][472559] Updated weights for policy 0, policy_version 6900 (0.0008) +[2026-06-07 02:49:56,341][472559] Updated weights for policy 0, policy_version 6910 (0.0008) +[2026-06-07 02:49:56,894][472559] Updated weights for policy 0, policy_version 6921 (0.0008) +[2026-06-07 02:49:57,017][472559] Updated weights for policy 0, policy_version 6932 (0.0008) +[2026-06-07 02:49:57,128][472559] Updated weights for policy 0, policy_version 6942 (0.0008) +[2026-06-07 02:49:57,249][472559] Updated weights for policy 0, policy_version 6953 (0.0008) +[2026-06-07 02:49:57,370][472559] Updated weights for policy 0, policy_version 6964 (0.0008) +[2026-06-07 02:49:57,496][472559] Updated weights for policy 0, policy_version 6975 (0.0008) +[2026-06-07 02:49:58,074][472559] Updated weights for policy 0, policy_version 6985 (0.0008) +[2026-06-07 02:49:58,117][464927] Fps is (10 sec: 29491.1, 60 sec: 27853.0, 300 sec: 26457.1). Total num frames: 3571712. Throughput: 0: 28211.2. Samples: 3620096. Policy #0 lag: (min: 63.0, avg: 75.4, max: 127.0) +[2026-06-07 02:49:58,117][464927] Avg episode reward: [(0, '121.440')] +[2026-06-07 02:49:58,186][472559] Updated weights for policy 0, policy_version 6996 (0.0008) +[2026-06-07 02:49:58,301][472559] Updated weights for policy 0, policy_version 7006 (0.0007) +[2026-06-07 02:49:58,432][472559] Updated weights for policy 0, policy_version 7018 (0.0006) +[2026-06-07 02:49:58,566][472559] Updated weights for policy 0, policy_version 7030 (0.0005) +[2026-06-07 02:49:58,681][472025] Saving new best policy, reward=121.440! +[2026-06-07 02:49:59,139][472559] Updated weights for policy 0, policy_version 7041 (0.0005) +[2026-06-07 02:49:59,247][472559] Updated weights for policy 0, policy_version 7051 (0.0008) +[2026-06-07 02:49:59,390][472559] Updated weights for policy 0, policy_version 7064 (0.0008) +[2026-06-07 02:49:59,531][472559] Updated weights for policy 0, policy_version 7076 (0.0008) +[2026-06-07 02:49:59,654][472559] Updated weights for policy 0, policy_version 7087 (0.0008) +[2026-06-07 02:49:59,780][472559] Updated weights for policy 0, policy_version 7098 (0.0009) +[2026-06-07 02:50:00,350][472559] Updated weights for policy 0, policy_version 7108 (0.0008) +[2026-06-07 02:50:00,463][472559] Updated weights for policy 0, policy_version 7118 (0.0008) +[2026-06-07 02:50:00,597][472559] Updated weights for policy 0, policy_version 7130 (0.0008) +[2026-06-07 02:50:00,761][472559] Updated weights for policy 0, policy_version 7144 (0.0008) +[2026-06-07 02:50:00,882][472559] Updated weights for policy 0, policy_version 7155 (0.0008) +[2026-06-07 02:50:00,996][472559] Updated weights for policy 0, policy_version 7165 (0.0008) +[2026-06-07 02:50:01,547][472559] Updated weights for policy 0, policy_version 7175 (0.0008) +[2026-06-07 02:50:01,662][472559] Updated weights for policy 0, policy_version 7185 (0.0008) +[2026-06-07 02:50:01,791][472559] Updated weights for policy 0, policy_version 7197 (0.0008) +[2026-06-07 02:50:01,932][472559] Updated weights for policy 0, policy_version 7209 (0.0008) +[2026-06-07 02:50:02,037][472559] Updated weights for policy 0, policy_version 7219 (0.0008) +[2026-06-07 02:50:02,154][472559] Updated weights for policy 0, policy_version 7229 (0.0008) +[2026-06-07 02:50:02,712][472559] Updated weights for policy 0, policy_version 7239 (0.0008) +[2026-06-07 02:50:02,845][472559] Updated weights for policy 0, policy_version 7251 (0.0008) +[2026-06-07 02:50:02,960][472559] Updated weights for policy 0, policy_version 7261 (0.0008) +[2026-06-07 02:50:03,082][472559] Updated weights for policy 0, policy_version 7272 (0.0009) +[2026-06-07 02:50:03,117][464927] Fps is (10 sec: 26214.3, 60 sec: 27852.8, 300 sec: 26448.5). Total num frames: 3702784. Throughput: 0: 27938.1. Samples: 3696512. Policy #0 lag: (min: 63.0, avg: 74.7, max: 127.0) +[2026-06-07 02:50:03,118][464927] Avg episode reward: [(0, '123.068')] +[2026-06-07 02:50:03,207][472559] Updated weights for policy 0, policy_version 7283 (0.0008) +[2026-06-07 02:50:03,335][472559] Updated weights for policy 0, policy_version 7295 (0.0008) +[2026-06-07 02:50:03,342][472025] Saving new best policy, reward=123.068! +[2026-06-07 02:50:03,892][472559] Updated weights for policy 0, policy_version 7305 (0.0008) +[2026-06-07 02:50:04,000][472559] Updated weights for policy 0, policy_version 7315 (0.0008) +[2026-06-07 02:50:04,127][472559] Updated weights for policy 0, policy_version 7326 (0.0009) +[2026-06-07 02:50:04,248][472559] Updated weights for policy 0, policy_version 7337 (0.0008) +[2026-06-07 02:50:04,369][472559] Updated weights for policy 0, policy_version 7348 (0.0008) +[2026-06-07 02:50:04,498][472559] Updated weights for policy 0, policy_version 7359 (0.0008) +[2026-06-07 02:50:05,076][472559] Updated weights for policy 0, policy_version 7370 (0.0007) +[2026-06-07 02:50:05,219][472559] Updated weights for policy 0, policy_version 7383 (0.0008) +[2026-06-07 02:50:05,342][472559] Updated weights for policy 0, policy_version 7394 (0.0008) +[2026-06-07 02:50:05,466][472559] Updated weights for policy 0, policy_version 7405 (0.0008) +[2026-06-07 02:50:05,587][472559] Updated weights for policy 0, policy_version 7416 (0.0008) +[2026-06-07 02:50:06,161][472559] Updated weights for policy 0, policy_version 7428 (0.0008) +[2026-06-07 02:50:06,280][472559] Updated weights for policy 0, policy_version 7439 (0.0008) +[2026-06-07 02:50:06,425][472559] Updated weights for policy 0, policy_version 7452 (0.0009) +[2026-06-07 02:50:06,539][472559] Updated weights for policy 0, policy_version 7462 (0.0008) +[2026-06-07 02:50:06,665][472559] Updated weights for policy 0, policy_version 7473 (0.0008) +[2026-06-07 02:50:06,796][472559] Updated weights for policy 0, policy_version 7485 (0.0008) +[2026-06-07 02:50:07,370][472559] Updated weights for policy 0, policy_version 7496 (0.0008) +[2026-06-07 02:50:07,489][472559] Updated weights for policy 0, policy_version 7506 (0.0008) +[2026-06-07 02:50:07,622][472559] Updated weights for policy 0, policy_version 7518 (0.0008) +[2026-06-07 02:50:07,748][472559] Updated weights for policy 0, policy_version 7529 (0.0009) +[2026-06-07 02:50:07,882][472559] Updated weights for policy 0, policy_version 7541 (0.0008) +[2026-06-07 02:50:07,995][472559] Updated weights for policy 0, policy_version 7551 (0.0008) +[2026-06-07 02:50:08,117][464927] Fps is (10 sec: 29491.4, 60 sec: 28399.0, 300 sec: 26666.4). Total num frames: 3866624. Throughput: 0: 28123.3. Samples: 3870464. Policy #0 lag: (min: 63.0, avg: 74.7, max: 127.0) +[2026-06-07 02:50:08,117][464927] Avg episode reward: [(0, '124.272')] +[2026-06-07 02:50:08,122][472025] Saving new best policy, reward=124.272! +[2026-06-07 02:50:08,553][472559] Updated weights for policy 0, policy_version 7561 (0.0008) +[2026-06-07 02:50:08,683][472559] Updated weights for policy 0, policy_version 7573 (0.0009) +[2026-06-07 02:50:08,808][472559] Updated weights for policy 0, policy_version 7584 (0.0008) +[2026-06-07 02:50:08,931][472559] Updated weights for policy 0, policy_version 7595 (0.0008) +[2026-06-07 02:50:09,039][472559] Updated weights for policy 0, policy_version 7605 (0.0010) +[2026-06-07 02:50:09,158][472559] Updated weights for policy 0, policy_version 7615 (0.0012) +[2026-06-07 02:50:09,732][472559] Updated weights for policy 0, policy_version 7625 (0.0007) +[2026-06-07 02:50:09,846][472559] Updated weights for policy 0, policy_version 7636 (0.0008) +[2026-06-07 02:50:09,979][472559] Updated weights for policy 0, policy_version 7647 (0.0008) +[2026-06-07 02:50:10,092][472559] Updated weights for policy 0, policy_version 7658 (0.0009) +[2026-06-07 02:50:10,221][472559] Updated weights for policy 0, policy_version 7669 (0.0009) +[2026-06-07 02:50:10,332][472559] Updated weights for policy 0, policy_version 7679 (0.0008) +[2026-06-07 02:50:10,900][472559] Updated weights for policy 0, policy_version 7690 (0.0008) +[2026-06-07 02:50:11,047][472559] Updated weights for policy 0, policy_version 7703 (0.0008) +[2026-06-07 02:50:11,178][472559] Updated weights for policy 0, policy_version 7715 (0.0008) +[2026-06-07 02:50:11,307][472559] Updated weights for policy 0, policy_version 7726 (0.0008) +[2026-06-07 02:50:11,423][472559] Updated weights for policy 0, policy_version 7736 (0.0008) +[2026-06-07 02:50:11,993][472559] Updated weights for policy 0, policy_version 7747 (0.0008) +[2026-06-07 02:50:12,116][472559] Updated weights for policy 0, policy_version 7758 (0.0008) +[2026-06-07 02:50:12,247][472559] Updated weights for policy 0, policy_version 7770 (0.0008) +[2026-06-07 02:50:12,365][472559] Updated weights for policy 0, policy_version 7780 (0.0008) +[2026-06-07 02:50:12,481][472559] Updated weights for policy 0, policy_version 7790 (0.0008) +[2026-06-07 02:50:12,598][472559] Updated weights for policy 0, policy_version 7801 (0.0008) +[2026-06-07 02:50:13,117][464927] Fps is (10 sec: 29490.7, 60 sec: 27852.7, 300 sec: 26651.3). Total num frames: 3997696. Throughput: 0: 28276.5. Samples: 4043392. Policy #0 lag: (min: 63.0, avg: 74.3, max: 127.0) +[2026-06-07 02:50:13,118][464927] Avg episode reward: [(0, '152.600')] +[2026-06-07 02:50:13,161][472559] Updated weights for policy 0, policy_version 7811 (0.0008) +[2026-06-07 02:50:13,270][472559] Updated weights for policy 0, policy_version 7821 (0.0008) +[2026-06-07 02:50:13,380][472559] Updated weights for policy 0, policy_version 7831 (0.0008) +[2026-06-07 02:50:13,506][472559] Updated weights for policy 0, policy_version 7842 (0.0008) +[2026-06-07 02:50:13,619][472559] Updated weights for policy 0, policy_version 7852 (0.0009) +[2026-06-07 02:50:13,732][472559] Updated weights for policy 0, policy_version 7862 (0.0008) +[2026-06-07 02:50:13,837][472025] Saving new best policy, reward=152.600! +[2026-06-07 02:50:13,842][472559] Updated weights for policy 0, policy_version 7872 (0.0008) +[2026-06-07 02:50:14,445][472559] Updated weights for policy 0, policy_version 7885 (0.0010) +[2026-06-07 02:50:14,568][472559] Updated weights for policy 0, policy_version 7896 (0.0010) +[2026-06-07 02:50:14,677][472559] Updated weights for policy 0, policy_version 7906 (0.0010) +[2026-06-07 02:50:14,820][472559] Updated weights for policy 0, policy_version 7918 (0.0010) +[2026-06-07 02:50:14,940][472559] Updated weights for policy 0, policy_version 7929 (0.0008) +[2026-06-07 02:50:15,495][472559] Updated weights for policy 0, policy_version 7939 (0.0008) +[2026-06-07 02:50:15,621][472559] Updated weights for policy 0, policy_version 7950 (0.0008) +[2026-06-07 02:50:15,741][472559] Updated weights for policy 0, policy_version 7961 (0.0008) +[2026-06-07 02:50:15,868][472559] Updated weights for policy 0, policy_version 7972 (0.0008) +[2026-06-07 02:50:16,003][472559] Updated weights for policy 0, policy_version 7984 (0.0008) +[2026-06-07 02:50:16,119][472559] Updated weights for policy 0, policy_version 7994 (0.0008) +[2026-06-07 02:50:16,716][472559] Updated weights for policy 0, policy_version 8008 (0.0008) +[2026-06-07 02:50:16,822][472559] Updated weights for policy 0, policy_version 8018 (0.0010) +[2026-06-07 02:50:16,967][472559] Updated weights for policy 0, policy_version 8031 (0.0008) +[2026-06-07 02:50:17,123][472559] Updated weights for policy 0, policy_version 8045 (0.0008) +[2026-06-07 02:50:17,240][472559] Updated weights for policy 0, policy_version 8056 (0.0008) +[2026-06-07 02:50:17,865][472559] Updated weights for policy 0, policy_version 8071 (0.0008) +[2026-06-07 02:50:17,984][472559] Updated weights for policy 0, policy_version 8082 (0.0008) +[2026-06-07 02:50:18,105][472559] Updated weights for policy 0, policy_version 8093 (0.0008) +[2026-06-07 02:50:18,117][464927] Fps is (10 sec: 26214.3, 60 sec: 27852.8, 300 sec: 26637.2). Total num frames: 4128768. Throughput: 0: 28003.5. Samples: 4121728. Policy #0 lag: (min: 63.0, avg: 74.3, max: 127.0) +[2026-06-07 02:50:18,118][464927] Avg episode reward: [(0, '173.399')] +[2026-06-07 02:50:18,236][472559] Updated weights for policy 0, policy_version 8105 (0.0008) +[2026-06-07 02:50:18,348][472559] Updated weights for policy 0, policy_version 8115 (0.0009) +[2026-06-07 02:50:18,471][472559] Updated weights for policy 0, policy_version 8127 (0.0009) +[2026-06-07 02:50:18,486][472025] Saving new best policy, reward=173.399! +[2026-06-07 02:50:19,058][472559] Updated weights for policy 0, policy_version 8137 (0.0008) +[2026-06-07 02:50:19,168][472559] Updated weights for policy 0, policy_version 8147 (0.0008) +[2026-06-07 02:50:19,280][472559] Updated weights for policy 0, policy_version 8157 (0.0008) +[2026-06-07 02:50:19,400][472559] Updated weights for policy 0, policy_version 8168 (0.0008) +[2026-06-07 02:50:19,512][472559] Updated weights for policy 0, policy_version 8178 (0.0008) +[2026-06-07 02:50:19,639][472559] Updated weights for policy 0, policy_version 8189 (0.0009) +[2026-06-07 02:50:20,209][472559] Updated weights for policy 0, policy_version 8199 (0.0008) +[2026-06-07 02:50:20,355][472559] Updated weights for policy 0, policy_version 8213 (0.0008) +[2026-06-07 02:50:20,478][472559] Updated weights for policy 0, policy_version 8224 (0.0008) +[2026-06-07 02:50:20,602][472559] Updated weights for policy 0, policy_version 8235 (0.0008) +[2026-06-07 02:50:20,718][472559] Updated weights for policy 0, policy_version 8245 (0.0008) +[2026-06-07 02:50:20,834][472559] Updated weights for policy 0, policy_version 8255 (0.0009) +[2026-06-07 02:50:21,373][472559] Updated weights for policy 0, policy_version 8265 (0.0008) +[2026-06-07 02:50:21,488][472559] Updated weights for policy 0, policy_version 8275 (0.0008) +[2026-06-07 02:50:21,618][472559] Updated weights for policy 0, policy_version 8287 (0.0008) +[2026-06-07 02:50:21,747][472559] Updated weights for policy 0, policy_version 8298 (0.0009) +[2026-06-07 02:50:21,883][472559] Updated weights for policy 0, policy_version 8310 (0.0008) +[2026-06-07 02:50:21,998][472559] Updated weights for policy 0, policy_version 8320 (0.0008) +[2026-06-07 02:50:22,547][472559] Updated weights for policy 0, policy_version 8330 (0.0008) +[2026-06-07 02:50:22,659][472559] Updated weights for policy 0, policy_version 8340 (0.0008) +[2026-06-07 02:50:22,770][472559] Updated weights for policy 0, policy_version 8350 (0.0009) +[2026-06-07 02:50:22,897][472559] Updated weights for policy 0, policy_version 8361 (0.0008) +[2026-06-07 02:50:23,037][472559] Updated weights for policy 0, policy_version 8373 (0.0008) +[2026-06-07 02:50:23,117][464927] Fps is (10 sec: 26214.9, 60 sec: 27853.0, 300 sec: 26624.0). Total num frames: 4259840. Throughput: 0: 28015.0. Samples: 4289536. Policy #0 lag: (min: 63.0, avg: 74.8, max: 127.0) +[2026-06-07 02:50:23,117][464927] Avg episode reward: [(0, '183.203')] +[2026-06-07 02:50:23,149][472025] Saving new best policy, reward=183.203! +[2026-06-07 02:50:23,152][472559] Updated weights for policy 0, policy_version 8384 (0.0007) +[2026-06-07 02:50:23,719][472559] Updated weights for policy 0, policy_version 8396 (0.0008) +[2026-06-07 02:50:23,843][472559] Updated weights for policy 0, policy_version 8407 (0.0009) +[2026-06-07 02:50:23,997][472559] Updated weights for policy 0, policy_version 8421 (0.0008) +[2026-06-07 02:50:24,110][472559] Updated weights for policy 0, policy_version 8431 (0.0008) +[2026-06-07 02:50:24,232][472559] Updated weights for policy 0, policy_version 8442 (0.0009) +[2026-06-07 02:50:24,797][472559] Updated weights for policy 0, policy_version 8453 (0.0008) +[2026-06-07 02:50:24,909][472559] Updated weights for policy 0, policy_version 8463 (0.0008) +[2026-06-07 02:50:25,037][472559] Updated weights for policy 0, policy_version 8475 (0.0008) +[2026-06-07 02:50:25,184][472559] Updated weights for policy 0, policy_version 8488 (0.0008) +[2026-06-07 02:50:25,315][472559] Updated weights for policy 0, policy_version 8500 (0.0008) +[2026-06-07 02:50:25,445][472559] Updated weights for policy 0, policy_version 8511 (0.0008) +[2026-06-07 02:50:25,990][472559] Updated weights for policy 0, policy_version 8521 (0.0008) +[2026-06-07 02:50:26,133][472559] Updated weights for policy 0, policy_version 8534 (0.0008) +[2026-06-07 02:50:26,273][472559] Updated weights for policy 0, policy_version 8547 (0.0009) +[2026-06-07 02:50:26,390][472559] Updated weights for policy 0, policy_version 8557 (0.0008) +[2026-06-07 02:50:26,504][472559] Updated weights for policy 0, policy_version 8567 (0.0008) +[2026-06-07 02:50:27,061][472559] Updated weights for policy 0, policy_version 8577 (0.0007) +[2026-06-07 02:50:27,185][472559] Updated weights for policy 0, policy_version 8588 (0.0008) +[2026-06-07 02:50:27,310][472559] Updated weights for policy 0, policy_version 8599 (0.0008) +[2026-06-07 02:50:27,418][472559] Updated weights for policy 0, policy_version 8609 (0.0008) +[2026-06-07 02:50:27,553][472559] Updated weights for policy 0, policy_version 8621 (0.0009) +[2026-06-07 02:50:27,676][472559] Updated weights for policy 0, policy_version 8632 (0.0009) +[2026-06-07 02:50:28,117][464927] Fps is (10 sec: 29491.2, 60 sec: 28398.9, 300 sec: 26810.2). Total num frames: 4423680. Throughput: 0: 28179.9. Samples: 4463616. Policy #0 lag: (min: 52.0, avg: 64.0, max: 116.0) +[2026-06-07 02:50:28,117][464927] Avg episode reward: [(0, '201.182')] +[2026-06-07 02:50:28,123][472025] Saving new best policy, reward=201.182! +[2026-06-07 02:50:28,249][472559] Updated weights for policy 0, policy_version 8642 (0.0008) +[2026-06-07 02:50:28,356][472559] Updated weights for policy 0, policy_version 8652 (0.0008) +[2026-06-07 02:50:28,507][472559] Updated weights for policy 0, policy_version 8666 (0.0009) +[2026-06-07 02:50:28,642][472559] Updated weights for policy 0, policy_version 8678 (0.0009) +[2026-06-07 02:50:28,756][472559] Updated weights for policy 0, policy_version 8688 (0.0009) +[2026-06-07 02:50:28,867][472559] Updated weights for policy 0, policy_version 8698 (0.0009) +[2026-06-07 02:50:29,428][472559] Updated weights for policy 0, policy_version 8710 (0.0008) +[2026-06-07 02:50:29,550][472559] Updated weights for policy 0, policy_version 8721 (0.0008) +[2026-06-07 02:50:29,677][472559] Updated weights for policy 0, policy_version 8732 (0.0008) +[2026-06-07 02:50:29,782][472559] Updated weights for policy 0, policy_version 8742 (0.0008) +[2026-06-07 02:50:29,892][472559] Updated weights for policy 0, policy_version 8752 (0.0009) +[2026-06-07 02:50:30,011][472559] Updated weights for policy 0, policy_version 8762 (0.0008) +[2026-06-07 02:50:30,583][472559] Updated weights for policy 0, policy_version 8773 (0.0009) +[2026-06-07 02:50:30,692][472559] Updated weights for policy 0, policy_version 8783 (0.0009) +[2026-06-07 02:50:30,830][472559] Updated weights for policy 0, policy_version 8795 (0.0009) +[2026-06-07 02:50:30,953][472559] Updated weights for policy 0, policy_version 8806 (0.0009) +[2026-06-07 02:50:31,083][472559] Updated weights for policy 0, policy_version 8817 (0.0008) +[2026-06-07 02:50:31,226][472559] Updated weights for policy 0, policy_version 8830 (0.0009) +[2026-06-07 02:50:31,795][472559] Updated weights for policy 0, policy_version 8841 (0.0008) +[2026-06-07 02:50:31,913][472559] Updated weights for policy 0, policy_version 8852 (0.0009) +[2026-06-07 02:50:32,051][472559] Updated weights for policy 0, policy_version 8864 (0.0009) +[2026-06-07 02:50:32,174][472559] Updated weights for policy 0, policy_version 8875 (0.0008) +[2026-06-07 02:50:32,313][472559] Updated weights for policy 0, policy_version 8888 (0.0005) +[2026-06-07 02:50:32,876][472559] Updated weights for policy 0, policy_version 8898 (0.0008) +[2026-06-07 02:50:32,981][472559] Updated weights for policy 0, policy_version 8908 (0.0008) +[2026-06-07 02:50:33,113][472559] Updated weights for policy 0, policy_version 8920 (0.0008) +[2026-06-07 02:50:33,116][464927] Fps is (10 sec: 29491.2, 60 sec: 27852.8, 300 sec: 26792.7). Total num frames: 4554752. Throughput: 0: 28103.1. Samples: 4544256. Policy #0 lag: (min: 52.0, avg: 64.0, max: 116.0) +[2026-06-07 02:50:33,117][464927] Avg episode reward: [(0, '216.408')] +[2026-06-07 02:50:33,222][472559] Updated weights for policy 0, policy_version 8930 (0.0008) +[2026-06-07 02:50:33,352][472559] Updated weights for policy 0, policy_version 8941 (0.0009) +[2026-06-07 02:50:33,475][472559] Updated weights for policy 0, policy_version 8952 (0.0009) +[2026-06-07 02:50:33,566][472025] Saving new best policy, reward=216.408! +[2026-06-07 02:50:34,033][472559] Updated weights for policy 0, policy_version 8963 (0.0009) +[2026-06-07 02:50:34,152][472559] Updated weights for policy 0, policy_version 8974 (0.0009) +[2026-06-07 02:50:34,268][472559] Updated weights for policy 0, policy_version 8984 (0.0009) +[2026-06-07 02:50:34,389][472559] Updated weights for policy 0, policy_version 8995 (0.0009) +[2026-06-07 02:50:34,502][472559] Updated weights for policy 0, policy_version 9005 (0.0008) +[2026-06-07 02:50:34,646][472559] Updated weights for policy 0, policy_version 9018 (0.0008) +[2026-06-07 02:50:35,202][472559] Updated weights for policy 0, policy_version 9029 (0.0008) +[2026-06-07 02:50:35,331][472559] Updated weights for policy 0, policy_version 9041 (0.0008) +[2026-06-07 02:50:35,481][472559] Updated weights for policy 0, policy_version 9054 (0.0009) +[2026-06-07 02:50:35,593][472559] Updated weights for policy 0, policy_version 9064 (0.0008) +[2026-06-07 02:50:35,727][472559] Updated weights for policy 0, policy_version 9076 (0.0008) +[2026-06-07 02:50:35,861][472559] Updated weights for policy 0, policy_version 9088 (0.0008) +[2026-06-07 02:50:36,426][472559] Updated weights for policy 0, policy_version 9099 (0.0008) +[2026-06-07 02:50:36,546][472559] Updated weights for policy 0, policy_version 9109 (0.0008) +[2026-06-07 02:50:36,653][472559] Updated weights for policy 0, policy_version 9119 (0.0008) +[2026-06-07 02:50:36,772][472559] Updated weights for policy 0, policy_version 9130 (0.0008) +[2026-06-07 02:50:36,911][472559] Updated weights for policy 0, policy_version 9142 (0.0008) +[2026-06-07 02:50:37,023][472559] Updated weights for policy 0, policy_version 9152 (0.0008) +[2026-06-07 02:50:37,606][472559] Updated weights for policy 0, policy_version 9165 (0.0008) +[2026-06-07 02:50:37,734][472559] Updated weights for policy 0, policy_version 9176 (0.0008) +[2026-06-07 02:50:37,854][472559] Updated weights for policy 0, policy_version 9187 (0.0008) +[2026-06-07 02:50:37,979][472559] Updated weights for policy 0, policy_version 9198 (0.0008) +[2026-06-07 02:50:38,116][464927] Fps is (10 sec: 26214.5, 60 sec: 27852.8, 300 sec: 26776.2). Total num frames: 4685824. Throughput: 0: 27938.2. Samples: 4708608. Policy #0 lag: (min: 63.0, avg: 74.8, max: 127.0) +[2026-06-07 02:50:38,117][464927] Avg episode reward: [(0, '214.067')] +[2026-06-07 02:50:38,120][472559] Updated weights for policy 0, policy_version 9210 (0.0008) +[2026-06-07 02:50:38,701][472559] Updated weights for policy 0, policy_version 9221 (0.0007) +[2026-06-07 02:50:38,824][472559] Updated weights for policy 0, policy_version 9232 (0.0005) +[2026-06-07 02:50:38,955][472559] Updated weights for policy 0, policy_version 9243 (0.0005) +[2026-06-07 02:50:39,060][472559] Updated weights for policy 0, policy_version 9253 (0.0005) +[2026-06-07 02:50:39,177][472559] Updated weights for policy 0, policy_version 9263 (0.0005) +[2026-06-07 02:50:39,286][472559] Updated weights for policy 0, policy_version 9273 (0.0005) +[2026-06-07 02:50:39,852][472559] Updated weights for policy 0, policy_version 9285 (0.0007) +[2026-06-07 02:50:39,959][472559] Updated weights for policy 0, policy_version 9295 (0.0008) +[2026-06-07 02:50:40,071][472559] Updated weights for policy 0, policy_version 9305 (0.0008) +[2026-06-07 02:50:40,181][472559] Updated weights for policy 0, policy_version 9315 (0.0008) +[2026-06-07 02:50:40,310][472559] Updated weights for policy 0, policy_version 9326 (0.0008) +[2026-06-07 02:50:40,433][472559] Updated weights for policy 0, policy_version 9337 (0.0008) +[2026-06-07 02:50:40,996][472559] Updated weights for policy 0, policy_version 9348 (0.0008) +[2026-06-07 02:50:41,112][472559] Updated weights for policy 0, policy_version 9359 (0.0008) +[2026-06-07 02:50:41,228][472559] Updated weights for policy 0, policy_version 9369 (0.0009) +[2026-06-07 02:50:41,347][472559] Updated weights for policy 0, policy_version 9380 (0.0008) +[2026-06-07 02:50:41,477][472559] Updated weights for policy 0, policy_version 9392 (0.0009) +[2026-06-07 02:50:41,599][472559] Updated weights for policy 0, policy_version 9402 (0.0008) +[2026-06-07 02:50:42,170][472559] Updated weights for policy 0, policy_version 9413 (0.0009) +[2026-06-07 02:50:42,291][472559] Updated weights for policy 0, policy_version 9424 (0.0008) +[2026-06-07 02:50:42,414][472559] Updated weights for policy 0, policy_version 9435 (0.0009) +[2026-06-07 02:50:42,522][472559] Updated weights for policy 0, policy_version 9445 (0.0008) +[2026-06-07 02:50:42,635][472559] Updated weights for policy 0, policy_version 9455 (0.0008) +[2026-06-07 02:50:42,758][472559] Updated weights for policy 0, policy_version 9466 (0.0009) +[2026-06-07 02:50:43,117][464927] Fps is (10 sec: 29491.1, 60 sec: 28399.0, 300 sec: 26942.6). Total num frames: 4849664. Throughput: 0: 28063.3. Samples: 4882944. Policy #0 lag: (min: 63.0, avg: 74.8, max: 127.0) +[2026-06-07 02:50:43,118][464927] Avg episode reward: [(0, '214.805')] +[2026-06-07 02:50:43,319][472559] Updated weights for policy 0, policy_version 9476 (0.0007) +[2026-06-07 02:50:43,436][472559] Updated weights for policy 0, policy_version 9487 (0.0009) +[2026-06-07 02:50:43,553][472559] Updated weights for policy 0, policy_version 9497 (0.0008) +[2026-06-07 02:50:43,666][472559] Updated weights for policy 0, policy_version 9507 (0.0008) +[2026-06-07 02:50:43,794][472559] Updated weights for policy 0, policy_version 9518 (0.0009) +[2026-06-07 02:50:43,914][472559] Updated weights for policy 0, policy_version 9528 (0.0006) +[2026-06-07 02:50:44,419][472559] Updated weights for policy 0, policy_version 9538 (0.0005) +[2026-06-07 02:50:44,537][472559] Updated weights for policy 0, policy_version 9548 (0.0008) +[2026-06-07 02:50:44,646][472559] Updated weights for policy 0, policy_version 9558 (0.0008) +[2026-06-07 02:50:44,755][472559] Updated weights for policy 0, policy_version 9568 (0.0008) +[2026-06-07 02:50:44,872][472559] Updated weights for policy 0, policy_version 9578 (0.0008) +[2026-06-07 02:50:45,001][472559] Updated weights for policy 0, policy_version 9589 (0.0008) +[2026-06-07 02:50:45,123][472559] Updated weights for policy 0, policy_version 9600 (0.0008) +[2026-06-07 02:50:45,696][472559] Updated weights for policy 0, policy_version 9610 (0.0008) +[2026-06-07 02:50:45,823][472559] Updated weights for policy 0, policy_version 9621 (0.0008) +[2026-06-07 02:50:45,955][472559] Updated weights for policy 0, policy_version 9633 (0.0008) +[2026-06-07 02:50:46,071][472559] Updated weights for policy 0, policy_version 9643 (0.0008) +[2026-06-07 02:50:46,193][472559] Updated weights for policy 0, policy_version 9654 (0.0008) +[2026-06-07 02:50:46,310][472559] Updated weights for policy 0, policy_version 9664 (0.0008) +[2026-06-07 02:50:46,867][472559] Updated weights for policy 0, policy_version 9675 (0.0008) +[2026-06-07 02:50:46,978][472559] Updated weights for policy 0, policy_version 9685 (0.0008) +[2026-06-07 02:50:47,090][472559] Updated weights for policy 0, policy_version 9695 (0.0007) +[2026-06-07 02:50:47,210][472559] Updated weights for policy 0, policy_version 9705 (0.0007) +[2026-06-07 02:50:47,323][472559] Updated weights for policy 0, policy_version 9715 (0.0008) +[2026-06-07 02:50:47,457][472559] Updated weights for policy 0, policy_version 9727 (0.0009) +[2026-06-07 02:50:47,995][472559] Updated weights for policy 0, policy_version 9737 (0.0005) +[2026-06-07 02:50:48,116][472559] Updated weights for policy 0, policy_version 9748 (0.0005) +[2026-06-07 02:50:48,117][464927] Fps is (10 sec: 29491.1, 60 sec: 28398.9, 300 sec: 26922.9). Total num frames: 4980736. Throughput: 0: 28228.3. Samples: 4966784. Policy #0 lag: (min: 43.0, avg: 95.9, max: 107.0) +[2026-06-07 02:50:48,117][464927] Avg episode reward: [(0, '199.851')] +[2026-06-07 02:50:48,231][472559] Updated weights for policy 0, policy_version 9758 (0.0005) +[2026-06-07 02:50:48,358][472559] Updated weights for policy 0, policy_version 9769 (0.0004) +[2026-06-07 02:50:48,471][472559] Updated weights for policy 0, policy_version 9779 (0.0004) +[2026-06-07 02:50:48,591][472559] Updated weights for policy 0, policy_version 9790 (0.0005) +[2026-06-07 02:50:49,122][472559] Updated weights for policy 0, policy_version 9800 (0.0007) +[2026-06-07 02:50:49,242][472559] Updated weights for policy 0, policy_version 9811 (0.0008) +[2026-06-07 02:50:49,359][472559] Updated weights for policy 0, policy_version 9821 (0.0008) +[2026-06-07 02:50:49,481][472559] Updated weights for policy 0, policy_version 9832 (0.0008) +[2026-06-07 02:50:49,595][472559] Updated weights for policy 0, policy_version 9842 (0.0008) +[2026-06-07 02:50:49,710][472559] Updated weights for policy 0, policy_version 9852 (0.0008) +[2026-06-07 02:50:50,271][472559] Updated weights for policy 0, policy_version 9863 (0.0008) +[2026-06-07 02:50:50,382][472559] Updated weights for policy 0, policy_version 9873 (0.0008) +[2026-06-07 02:50:50,494][472559] Updated weights for policy 0, policy_version 9883 (0.0008) +[2026-06-07 02:50:50,608][472559] Updated weights for policy 0, policy_version 9893 (0.0008) +[2026-06-07 02:50:50,721][472559] Updated weights for policy 0, policy_version 9903 (0.0008) +[2026-06-07 02:50:50,843][472559] Updated weights for policy 0, policy_version 9914 (0.0008) +[2026-06-07 02:50:51,422][472559] Updated weights for policy 0, policy_version 9925 (0.0008) +[2026-06-07 02:50:51,542][472559] Updated weights for policy 0, policy_version 9936 (0.0008) +[2026-06-07 02:50:51,657][472559] Updated weights for policy 0, policy_version 9946 (0.0008) +[2026-06-07 02:50:51,776][472559] Updated weights for policy 0, policy_version 9957 (0.0008) +[2026-06-07 02:50:51,898][472559] Updated weights for policy 0, policy_version 9968 (0.0008) +[2026-06-07 02:50:52,022][472559] Updated weights for policy 0, policy_version 9979 (0.0008) +[2026-06-07 02:50:52,607][472559] Updated weights for policy 0, policy_version 9990 (0.0008) +[2026-06-07 02:50:52,720][472559] Updated weights for policy 0, policy_version 10001 (0.0008) +[2026-06-07 02:50:52,846][472559] Updated weights for policy 0, policy_version 10012 (0.0008) +[2026-06-07 02:50:52,970][472559] Updated weights for policy 0, policy_version 10022 (0.0008) +[2026-06-07 02:50:53,099][472559] Updated weights for policy 0, policy_version 10033 (0.0008) +[2026-06-07 02:50:53,117][464927] Fps is (10 sec: 26214.5, 60 sec: 27852.8, 300 sec: 26904.3). Total num frames: 5111808. Throughput: 0: 28051.9. Samples: 5132800. Policy #0 lag: (min: 43.0, avg: 95.9, max: 107.0) +[2026-06-07 02:50:53,117][464927] Avg episode reward: [(0, '218.177')] +[2026-06-07 02:50:53,235][472559] Updated weights for policy 0, policy_version 10044 (0.0008) +[2026-06-07 02:50:53,272][472025] Saving new best policy, reward=218.177! +[2026-06-07 02:50:53,795][472559] Updated weights for policy 0, policy_version 10054 (0.0007) +[2026-06-07 02:50:53,925][472559] Updated weights for policy 0, policy_version 10065 (0.0008) +[2026-06-07 02:50:54,057][472559] Updated weights for policy 0, policy_version 10077 (0.0011) +[2026-06-07 02:50:54,181][472559] Updated weights for policy 0, policy_version 10088 (0.0012) +[2026-06-07 02:50:54,297][472559] Updated weights for policy 0, policy_version 10098 (0.0011) +[2026-06-07 02:50:54,409][472559] Updated weights for policy 0, policy_version 10108 (0.0011) +[2026-06-07 02:50:54,951][472559] Updated weights for policy 0, policy_version 10120 (0.0006) +[2026-06-07 02:50:55,061][472559] Updated weights for policy 0, policy_version 10130 (0.0008) +[2026-06-07 02:50:55,201][472559] Updated weights for policy 0, policy_version 10142 (0.0009) +[2026-06-07 02:50:55,318][472559] Updated weights for policy 0, policy_version 10152 (0.0008) +[2026-06-07 02:50:55,445][472559] Updated weights for policy 0, policy_version 10163 (0.0008) +[2026-06-07 02:50:55,577][472559] Updated weights for policy 0, policy_version 10175 (0.0008) +[2026-06-07 02:50:56,120][472559] Updated weights for policy 0, policy_version 10186 (0.0007) +[2026-06-07 02:50:56,250][472559] Updated weights for policy 0, policy_version 10197 (0.0008) +[2026-06-07 02:50:56,365][472559] Updated weights for policy 0, policy_version 10207 (0.0009) +[2026-06-07 02:50:56,486][472559] Updated weights for policy 0, policy_version 10218 (0.0008) +[2026-06-07 02:50:56,603][472559] Updated weights for policy 0, policy_version 10228 (0.0008) +[2026-06-07 02:50:56,732][472559] Updated weights for policy 0, policy_version 10239 (0.0008) +[2026-06-07 02:50:57,287][472559] Updated weights for policy 0, policy_version 10249 (0.0008) +[2026-06-07 02:50:57,400][472559] Updated weights for policy 0, policy_version 10259 (0.0009) +[2026-06-07 02:50:57,515][472559] Updated weights for policy 0, policy_version 10269 (0.0008) +[2026-06-07 02:50:57,624][472559] Updated weights for policy 0, policy_version 10279 (0.0008) +[2026-06-07 02:50:57,746][472559] Updated weights for policy 0, policy_version 10290 (0.0006) +[2026-06-07 02:50:57,870][472559] Updated weights for policy 0, policy_version 10300 (0.0007) +[2026-06-07 02:50:58,117][464927] Fps is (10 sec: 29491.3, 60 sec: 28399.0, 300 sec: 27054.6). Total num frames: 5275648. Throughput: 0: 28063.4. Samples: 5306240. Policy #0 lag: (min: 53.0, avg: 64.5, max: 117.0) +[2026-06-07 02:50:58,117][464927] Avg episode reward: [(0, '243.004')] +[2026-06-07 02:50:58,122][472025] Saving new best policy, reward=243.004! +[2026-06-07 02:50:58,405][472559] Updated weights for policy 0, policy_version 10311 (0.0005) +[2026-06-07 02:50:58,521][472559] Updated weights for policy 0, policy_version 10321 (0.0006) +[2026-06-07 02:50:58,635][472559] Updated weights for policy 0, policy_version 10331 (0.0005) +[2026-06-07 02:50:58,765][472559] Updated weights for policy 0, policy_version 10342 (0.0011) +[2026-06-07 02:50:58,880][472559] Updated weights for policy 0, policy_version 10353 (0.0011) +[2026-06-07 02:50:58,998][472559] Updated weights for policy 0, policy_version 10363 (0.0012) +[2026-06-07 02:50:59,552][472559] Updated weights for policy 0, policy_version 10374 (0.0008) +[2026-06-07 02:50:59,664][472559] Updated weights for policy 0, policy_version 10384 (0.0005) +[2026-06-07 02:50:59,779][472559] Updated weights for policy 0, policy_version 10394 (0.0006) +[2026-06-07 02:50:59,896][472559] Updated weights for policy 0, policy_version 10404 (0.0006) +[2026-06-07 02:51:00,008][472559] Updated weights for policy 0, policy_version 10414 (0.0011) +[2026-06-07 02:51:00,129][472559] Updated weights for policy 0, policy_version 10425 (0.0010) +[2026-06-07 02:51:00,719][472559] Updated weights for policy 0, policy_version 10435 (0.0009) +[2026-06-07 02:51:00,830][472559] Updated weights for policy 0, policy_version 10445 (0.0009) +[2026-06-07 02:51:00,947][472559] Updated weights for policy 0, policy_version 10456 (0.0009) +[2026-06-07 02:51:01,059][472559] Updated weights for policy 0, policy_version 10466 (0.0009) +[2026-06-07 02:51:01,174][472559] Updated weights for policy 0, policy_version 10476 (0.0008) +[2026-06-07 02:51:01,293][472559] Updated weights for policy 0, policy_version 10486 (0.0009) +[2026-06-07 02:51:01,399][472559] Updated weights for policy 0, policy_version 10496 (0.0008) +[2026-06-07 02:51:01,951][472559] Updated weights for policy 0, policy_version 10507 (0.0006) +[2026-06-07 02:51:02,079][472559] Updated weights for policy 0, policy_version 10518 (0.0008) +[2026-06-07 02:51:02,227][472559] Updated weights for policy 0, policy_version 10531 (0.0009) +[2026-06-07 02:51:02,339][472559] Updated weights for policy 0, policy_version 10541 (0.0008) +[2026-06-07 02:51:02,456][472559] Updated weights for policy 0, policy_version 10551 (0.0007) +[2026-06-07 02:51:03,005][472559] Updated weights for policy 0, policy_version 10561 (0.0007) +[2026-06-07 02:51:03,113][472559] Updated weights for policy 0, policy_version 10571 (0.0008) +[2026-06-07 02:51:03,117][464927] Fps is (10 sec: 29491.2, 60 sec: 28399.0, 300 sec: 27033.6). Total num frames: 5406720. Throughput: 0: 28219.8. Samples: 5391616. Policy #0 lag: (min: 63.0, avg: 75.1, max: 127.0) +[2026-06-07 02:51:03,118][464927] Avg episode reward: [(0, '250.460')] +[2026-06-07 02:51:03,249][472559] Updated weights for policy 0, policy_version 10583 (0.0008) +[2026-06-07 02:51:03,365][472559] Updated weights for policy 0, policy_version 10593 (0.0009) +[2026-06-07 02:51:03,476][472559] Updated weights for policy 0, policy_version 10603 (0.0008) +[2026-06-07 02:51:03,592][472559] Updated weights for policy 0, policy_version 10613 (0.0008) +[2026-06-07 02:51:03,708][472559] Updated weights for policy 0, policy_version 10623 (0.0006) +[2026-06-07 02:51:03,716][472025] Saving new best policy, reward=250.460! +[2026-06-07 02:51:04,246][472559] Updated weights for policy 0, policy_version 10634 (0.0005) +[2026-06-07 02:51:04,367][472559] Updated weights for policy 0, policy_version 10645 (0.0008) +[2026-06-07 02:51:04,486][472559] Updated weights for policy 0, policy_version 10655 (0.0005) +[2026-06-07 02:51:04,616][472559] Updated weights for policy 0, policy_version 10666 (0.0009) +[2026-06-07 02:51:04,718][472559] Updated weights for policy 0, policy_version 10676 (0.0008) +[2026-06-07 02:51:04,845][472559] Updated weights for policy 0, policy_version 10687 (0.0008) +[2026-06-07 02:51:05,397][472559] Updated weights for policy 0, policy_version 10697 (0.0008) +[2026-06-07 02:51:05,521][472559] Updated weights for policy 0, policy_version 10708 (0.0008) +[2026-06-07 02:51:05,636][472559] Updated weights for policy 0, policy_version 10718 (0.0009) +[2026-06-07 02:51:05,781][472559] Updated weights for policy 0, policy_version 10731 (0.0009) +[2026-06-07 02:51:05,905][472559] Updated weights for policy 0, policy_version 10742 (0.0008) +[2026-06-07 02:51:06,013][472559] Updated weights for policy 0, policy_version 10752 (0.0009) +[2026-06-07 02:51:06,578][472559] Updated weights for policy 0, policy_version 10763 (0.0008) +[2026-06-07 02:51:06,727][472559] Updated weights for policy 0, policy_version 10776 (0.0008) +[2026-06-07 02:51:06,851][472559] Updated weights for policy 0, policy_version 10787 (0.0008) +[2026-06-07 02:51:06,977][472559] Updated weights for policy 0, policy_version 10798 (0.0008) +[2026-06-07 02:51:07,092][472559] Updated weights for policy 0, policy_version 10808 (0.0008) +[2026-06-07 02:51:07,627][472559] Updated weights for policy 0, policy_version 10819 (0.0005) +[2026-06-07 02:51:07,761][472559] Updated weights for policy 0, policy_version 10831 (0.0008) +[2026-06-07 02:51:07,867][472559] Updated weights for policy 0, policy_version 10841 (0.0008) +[2026-06-07 02:51:07,992][472559] Updated weights for policy 0, policy_version 10852 (0.0008) +[2026-06-07 02:51:08,117][464927] Fps is (10 sec: 26214.2, 60 sec: 27852.8, 300 sec: 27013.6). Total num frames: 5537792. Throughput: 0: 28182.7. Samples: 5557760. Policy #0 lag: (min: 63.0, avg: 75.1, max: 127.0) +[2026-06-07 02:51:08,118][464927] Avg episode reward: [(0, '278.355')] +[2026-06-07 02:51:08,123][472559] Updated weights for policy 0, policy_version 10863 (0.0008) +[2026-06-07 02:51:08,236][472559] Updated weights for policy 0, policy_version 10873 (0.0008) +[2026-06-07 02:51:08,309][472025] Saving new best policy, reward=278.355! +[2026-06-07 02:51:08,805][472559] Updated weights for policy 0, policy_version 10884 (0.0010) +[2026-06-07 02:51:08,916][472559] Updated weights for policy 0, policy_version 10894 (0.0008) +[2026-06-07 02:51:09,047][472559] Updated weights for policy 0, policy_version 10906 (0.0008) +[2026-06-07 02:51:09,161][472559] Updated weights for policy 0, policy_version 10916 (0.0008) +[2026-06-07 02:51:09,291][472559] Updated weights for policy 0, policy_version 10928 (0.0008) +[2026-06-07 02:51:09,420][472559] Updated weights for policy 0, policy_version 10939 (0.0009) +[2026-06-07 02:51:09,982][472559] Updated weights for policy 0, policy_version 10949 (0.0008) +[2026-06-07 02:51:10,107][472559] Updated weights for policy 0, policy_version 10960 (0.0009) +[2026-06-07 02:51:10,214][472559] Updated weights for policy 0, policy_version 10970 (0.0008) +[2026-06-07 02:51:10,341][472559] Updated weights for policy 0, policy_version 10981 (0.0008) +[2026-06-07 02:51:10,476][472559] Updated weights for policy 0, policy_version 10993 (0.0008) +[2026-06-07 02:51:10,608][472559] Updated weights for policy 0, policy_version 11005 (0.0008) +[2026-06-07 02:51:11,168][472559] Updated weights for policy 0, policy_version 11015 (0.0008) +[2026-06-07 02:51:11,311][472559] Updated weights for policy 0, policy_version 11028 (0.0008) +[2026-06-07 02:51:11,423][472559] Updated weights for policy 0, policy_version 11038 (0.0008) +[2026-06-07 02:51:11,543][472559] Updated weights for policy 0, policy_version 11049 (0.0008) +[2026-06-07 02:51:11,669][472559] Updated weights for policy 0, policy_version 11060 (0.0008) +[2026-06-07 02:51:11,812][472559] Updated weights for policy 0, policy_version 11072 (0.0008) +[2026-06-07 02:51:12,382][472559] Updated weights for policy 0, policy_version 11082 (0.0008) +[2026-06-07 02:51:12,489][472559] Updated weights for policy 0, policy_version 11092 (0.0008) +[2026-06-07 02:51:12,605][472559] Updated weights for policy 0, policy_version 11102 (0.0007) +[2026-06-07 02:51:12,729][472559] Updated weights for policy 0, policy_version 11113 (0.0005) +[2026-06-07 02:51:12,842][472559] Updated weights for policy 0, policy_version 11123 (0.0005) +[2026-06-07 02:51:12,955][472559] Updated weights for policy 0, policy_version 11133 (0.0005) +[2026-06-07 02:51:13,117][464927] Fps is (10 sec: 29491.1, 60 sec: 28399.0, 300 sec: 27150.6). Total num frames: 5701632. Throughput: 0: 28029.2. Samples: 5724928. Policy #0 lag: (min: 63.0, avg: 75.1, max: 127.0) +[2026-06-07 02:51:13,117][464927] Avg episode reward: [(0, '286.739')] +[2026-06-07 02:51:13,123][472025] Saving new best policy, reward=286.739! +[2026-06-07 02:51:13,507][472559] Updated weights for policy 0, policy_version 11145 (0.0007) +[2026-06-07 02:51:13,653][472559] Updated weights for policy 0, policy_version 11158 (0.0009) +[2026-06-07 02:51:13,787][472559] Updated weights for policy 0, policy_version 11170 (0.0008) +[2026-06-07 02:51:13,916][472559] Updated weights for policy 0, policy_version 11181 (0.0008) +[2026-06-07 02:51:14,043][472559] Updated weights for policy 0, policy_version 11192 (0.0008) +[2026-06-07 02:51:14,616][472559] Updated weights for policy 0, policy_version 11204 (0.0008) +[2026-06-07 02:51:14,739][472559] Updated weights for policy 0, policy_version 11215 (0.0008) +[2026-06-07 02:51:14,843][472559] Updated weights for policy 0, policy_version 11225 (0.0008) +[2026-06-07 02:51:14,975][472559] Updated weights for policy 0, policy_version 11236 (0.0008) +[2026-06-07 02:51:15,109][472559] Updated weights for policy 0, policy_version 11248 (0.0008) +[2026-06-07 02:51:15,241][472559] Updated weights for policy 0, policy_version 11259 (0.0008) +[2026-06-07 02:51:15,777][472559] Updated weights for policy 0, policy_version 11269 (0.0008) +[2026-06-07 02:51:15,898][472559] Updated weights for policy 0, policy_version 11280 (0.0009) +[2026-06-07 02:51:16,020][472559] Updated weights for policy 0, policy_version 11291 (0.0008) +[2026-06-07 02:51:16,141][472559] Updated weights for policy 0, policy_version 11301 (0.0008) +[2026-06-07 02:51:16,263][472559] Updated weights for policy 0, policy_version 11312 (0.0009) +[2026-06-07 02:51:16,393][472559] Updated weights for policy 0, policy_version 11323 (0.0008) +[2026-06-07 02:51:16,929][472559] Updated weights for policy 0, policy_version 11333 (0.0008) +[2026-06-07 02:51:17,042][472559] Updated weights for policy 0, policy_version 11343 (0.0008) +[2026-06-07 02:51:17,171][472559] Updated weights for policy 0, policy_version 11355 (0.0008) +[2026-06-07 02:51:17,299][472559] Updated weights for policy 0, policy_version 11366 (0.0009) +[2026-06-07 02:51:17,414][472559] Updated weights for policy 0, policy_version 11376 (0.0008) +[2026-06-07 02:51:17,526][472559] Updated weights for policy 0, policy_version 11386 (0.0008) +[2026-06-07 02:51:18,075][472559] Updated weights for policy 0, policy_version 11396 (0.0008) +[2026-06-07 02:51:18,117][464927] Fps is (10 sec: 29491.2, 60 sec: 28398.9, 300 sec: 27128.9). Total num frames: 5832704. Throughput: 0: 28208.3. Samples: 5813632. Policy #0 lag: (min: 63.0, avg: 75.1, max: 127.0) +[2026-06-07 02:51:18,118][464927] Avg episode reward: [(0, '276.459')] +[2026-06-07 02:51:18,186][472559] Updated weights for policy 0, policy_version 11406 (0.0008) +[2026-06-07 02:51:18,312][472559] Updated weights for policy 0, policy_version 11417 (0.0008) +[2026-06-07 02:51:18,422][472559] Updated weights for policy 0, policy_version 11427 (0.0009) +[2026-06-07 02:51:18,538][472559] Updated weights for policy 0, policy_version 11437 (0.0008) +[2026-06-07 02:51:18,652][472559] Updated weights for policy 0, policy_version 11447 (0.0008) +[2026-06-07 02:51:19,186][472559] Updated weights for policy 0, policy_version 11457 (0.0009) +[2026-06-07 02:51:19,317][472559] Updated weights for policy 0, policy_version 11469 (0.0008) +[2026-06-07 02:51:19,437][472559] Updated weights for policy 0, policy_version 11479 (0.0008) +[2026-06-07 02:51:19,573][472559] Updated weights for policy 0, policy_version 11491 (0.0008) +[2026-06-07 02:51:19,697][472559] Updated weights for policy 0, policy_version 11502 (0.0008) +[2026-06-07 02:51:19,817][472559] Updated weights for policy 0, policy_version 11513 (0.0008) +[2026-06-07 02:51:20,371][472559] Updated weights for policy 0, policy_version 11524 (0.0008) +[2026-06-07 02:51:20,500][472559] Updated weights for policy 0, policy_version 11535 (0.0009) +[2026-06-07 02:51:20,621][472559] Updated weights for policy 0, policy_version 11546 (0.0008) +[2026-06-07 02:51:20,734][472559] Updated weights for policy 0, policy_version 11556 (0.0008) +[2026-06-07 02:51:20,889][472559] Updated weights for policy 0, policy_version 11570 (0.0009) +[2026-06-07 02:51:20,999][472559] Updated weights for policy 0, policy_version 11580 (0.0008) +[2026-06-07 02:51:21,568][472559] Updated weights for policy 0, policy_version 11591 (0.0008) +[2026-06-07 02:51:21,689][472559] Updated weights for policy 0, policy_version 11602 (0.0008) +[2026-06-07 02:51:21,816][472559] Updated weights for policy 0, policy_version 11613 (0.0008) +[2026-06-07 02:51:21,923][472559] Updated weights for policy 0, policy_version 11623 (0.0008) +[2026-06-07 02:51:22,049][472559] Updated weights for policy 0, policy_version 11633 (0.0008) +[2026-06-07 02:51:22,158][472559] Updated weights for policy 0, policy_version 11643 (0.0008) +[2026-06-07 02:51:22,725][472559] Updated weights for policy 0, policy_version 11655 (0.0008) +[2026-06-07 02:51:22,861][472559] Updated weights for policy 0, policy_version 11667 (0.0009) +[2026-06-07 02:51:22,996][472559] Updated weights for policy 0, policy_version 11679 (0.0008) +[2026-06-07 02:51:23,109][472559] Updated weights for policy 0, policy_version 11689 (0.0008) +[2026-06-07 02:51:23,117][464927] Fps is (10 sec: 26214.3, 60 sec: 28398.9, 300 sec: 27108.1). Total num frames: 5963776. Throughput: 0: 28265.2. Samples: 5980544. Policy #0 lag: (min: 43.0, avg: 85.1, max: 107.0) +[2026-06-07 02:51:23,118][464927] Avg episode reward: [(0, '264.324')] +[2026-06-07 02:51:23,220][472559] Updated weights for policy 0, policy_version 11699 (0.0008) +[2026-06-07 02:51:23,336][472559] Updated weights for policy 0, policy_version 11709 (0.0008) +[2026-06-07 02:51:23,869][472559] Updated weights for policy 0, policy_version 11719 (0.0008) +[2026-06-07 02:51:23,997][472559] Updated weights for policy 0, policy_version 11731 (0.0008) +[2026-06-07 02:51:24,122][472559] Updated weights for policy 0, policy_version 11743 (0.0008) +[2026-06-07 02:51:24,257][472559] Updated weights for policy 0, policy_version 11755 (0.0008) +[2026-06-07 02:51:24,387][472559] Updated weights for policy 0, policy_version 11766 (0.0008) +[2026-06-07 02:51:24,940][472559] Updated weights for policy 0, policy_version 11777 (0.0008) +[2026-06-07 02:51:25,056][472559] Updated weights for policy 0, policy_version 11788 (0.0008) +[2026-06-07 02:51:25,173][472559] Updated weights for policy 0, policy_version 11799 (0.0005) +[2026-06-07 02:51:25,303][472559] Updated weights for policy 0, policy_version 11810 (0.0009) +[2026-06-07 02:51:25,420][472559] Updated weights for policy 0, policy_version 11820 (0.0008) +[2026-06-07 02:51:25,548][472559] Updated weights for policy 0, policy_version 11832 (0.0008) +[2026-06-07 02:51:26,123][472559] Updated weights for policy 0, policy_version 11843 (0.0007) +[2026-06-07 02:51:26,248][472559] Updated weights for policy 0, policy_version 11854 (0.0008) +[2026-06-07 02:51:26,370][472559] Updated weights for policy 0, policy_version 11865 (0.0008) +[2026-06-07 02:51:26,503][472559] Updated weights for policy 0, policy_version 11877 (0.0008) +[2026-06-07 02:51:26,617][472559] Updated weights for policy 0, policy_version 11887 (0.0008) +[2026-06-07 02:51:26,727][472559] Updated weights for policy 0, policy_version 11897 (0.0008) +[2026-06-07 02:51:27,296][472559] Updated weights for policy 0, policy_version 11908 (0.0008) +[2026-06-07 02:51:27,408][472559] Updated weights for policy 0, policy_version 11918 (0.0008) +[2026-06-07 02:51:27,521][472559] Updated weights for policy 0, policy_version 11928 (0.0008) +[2026-06-07 02:51:27,627][472559] Updated weights for policy 0, policy_version 11938 (0.0008) +[2026-06-07 02:51:27,767][472559] Updated weights for policy 0, policy_version 11950 (0.0008) +[2026-06-07 02:51:27,889][472559] Updated weights for policy 0, policy_version 11961 (0.0008) +[2026-06-07 02:51:28,117][464927] Fps is (10 sec: 29491.3, 60 sec: 28398.9, 300 sec: 27233.9). Total num frames: 6127616. Throughput: 0: 28114.5. Samples: 6148096. Policy #0 lag: (min: 43.0, avg: 85.1, max: 107.0) +[2026-06-07 02:51:28,118][464927] Avg episode reward: [(0, '290.425')] +[2026-06-07 02:51:28,122][472025] Saving new best policy, reward=290.425! +[2026-06-07 02:51:28,460][472559] Updated weights for policy 0, policy_version 11973 (0.0008) +[2026-06-07 02:51:28,576][472559] Updated weights for policy 0, policy_version 11983 (0.0008) +[2026-06-07 02:51:28,698][472559] Updated weights for policy 0, policy_version 11994 (0.0008) +[2026-06-07 02:51:28,819][472559] Updated weights for policy 0, policy_version 12004 (0.0008) +[2026-06-07 02:51:28,926][472559] Updated weights for policy 0, policy_version 12014 (0.0008) +[2026-06-07 02:51:29,042][472559] Updated weights for policy 0, policy_version 12025 (0.0008) +[2026-06-07 02:51:29,599][472559] Updated weights for policy 0, policy_version 12035 (0.0008) +[2026-06-07 02:51:29,714][472559] Updated weights for policy 0, policy_version 12045 (0.0008) +[2026-06-07 02:51:29,836][472559] Updated weights for policy 0, policy_version 12056 (0.0008) +[2026-06-07 02:51:29,950][472559] Updated weights for policy 0, policy_version 12066 (0.0009) +[2026-06-07 02:51:30,073][472559] Updated weights for policy 0, policy_version 12077 (0.0008) +[2026-06-07 02:51:30,186][472559] Updated weights for policy 0, policy_version 12087 (0.0008) +[2026-06-07 02:51:30,751][472559] Updated weights for policy 0, policy_version 12098 (0.0008) +[2026-06-07 02:51:30,874][472559] Updated weights for policy 0, policy_version 12109 (0.0009) +[2026-06-07 02:51:30,986][472559] Updated weights for policy 0, policy_version 12119 (0.0009) +[2026-06-07 02:51:31,110][472559] Updated weights for policy 0, policy_version 12130 (0.0008) +[2026-06-07 02:51:31,235][472559] Updated weights for policy 0, policy_version 12141 (0.0008) +[2026-06-07 02:51:31,368][472559] Updated weights for policy 0, policy_version 12153 (0.0009) +[2026-06-07 02:51:31,932][472559] Updated weights for policy 0, policy_version 12164 (0.0008) +[2026-06-07 02:51:32,044][472559] Updated weights for policy 0, policy_version 12174 (0.0008) +[2026-06-07 02:51:32,166][472559] Updated weights for policy 0, policy_version 12185 (0.0008) +[2026-06-07 02:51:32,325][472559] Updated weights for policy 0, policy_version 12199 (0.0009) +[2026-06-07 02:51:32,447][472559] Updated weights for policy 0, policy_version 12210 (0.0010) +[2026-06-07 02:51:32,575][472559] Updated weights for policy 0, policy_version 12221 (0.0013) +[2026-06-07 02:51:33,115][472559] Updated weights for policy 0, policy_version 12232 (0.0010) +[2026-06-07 02:51:33,116][464927] Fps is (10 sec: 29491.4, 60 sec: 28398.9, 300 sec: 27211.7). Total num frames: 6258688. Throughput: 0: 28228.3. Samples: 6237056. Policy #0 lag: (min: 62.0, avg: 73.8, max: 126.0) +[2026-06-07 02:51:33,117][464927] Avg episode reward: [(0, '299.218')] +[2026-06-07 02:51:33,227][472559] Updated weights for policy 0, policy_version 12242 (0.0009) +[2026-06-07 02:51:33,357][472559] Updated weights for policy 0, policy_version 12254 (0.0008) +[2026-06-07 02:51:33,495][472559] Updated weights for policy 0, policy_version 12266 (0.0009) +[2026-06-07 02:51:33,608][472559] Updated weights for policy 0, policy_version 12276 (0.0009) +[2026-06-07 02:51:33,730][472559] Updated weights for policy 0, policy_version 12287 (0.0008) +[2026-06-07 02:51:33,742][472025] Saving new best policy, reward=299.218! +[2026-06-07 02:51:34,315][472559] Updated weights for policy 0, policy_version 12300 (0.0008) +[2026-06-07 02:51:34,453][472559] Updated weights for policy 0, policy_version 12312 (0.0009) +[2026-06-07 02:51:34,583][472559] Updated weights for policy 0, policy_version 12324 (0.0009) +[2026-06-07 02:51:34,712][472559] Updated weights for policy 0, policy_version 12335 (0.0009) +[2026-06-07 02:51:34,827][472559] Updated weights for policy 0, policy_version 12345 (0.0009) +[2026-06-07 02:51:35,407][472559] Updated weights for policy 0, policy_version 12357 (0.0008) +[2026-06-07 02:51:35,537][472559] Updated weights for policy 0, policy_version 12369 (0.0010) +[2026-06-07 02:51:35,649][472559] Updated weights for policy 0, policy_version 12379 (0.0008) +[2026-06-07 02:51:35,763][472559] Updated weights for policy 0, policy_version 12389 (0.0009) +[2026-06-07 02:51:35,893][472559] Updated weights for policy 0, policy_version 12401 (0.0009) +[2026-06-07 02:51:36,008][472559] Updated weights for policy 0, policy_version 12411 (0.0008) +[2026-06-07 02:51:36,555][472559] Updated weights for policy 0, policy_version 12422 (0.0008) +[2026-06-07 02:51:36,690][472559] Updated weights for policy 0, policy_version 12434 (0.0008) +[2026-06-07 02:51:36,814][472559] Updated weights for policy 0, policy_version 12445 (0.0009) +[2026-06-07 02:51:36,961][472559] Updated weights for policy 0, policy_version 12458 (0.0009) +[2026-06-07 02:51:37,100][472559] Updated weights for policy 0, policy_version 12470 (0.0009) +[2026-06-07 02:51:37,672][472559] Updated weights for policy 0, policy_version 12481 (0.0009) +[2026-06-07 02:51:37,793][472559] Updated weights for policy 0, policy_version 12492 (0.0006) +[2026-06-07 02:51:37,901][472559] Updated weights for policy 0, policy_version 12502 (0.0010) +[2026-06-07 02:51:38,041][472559] Updated weights for policy 0, policy_version 12514 (0.0006) +[2026-06-07 02:51:38,117][464927] Fps is (10 sec: 26214.5, 60 sec: 28398.9, 300 sec: 27190.5). Total num frames: 6389760. Throughput: 0: 28197.0. Samples: 6401664. Policy #0 lag: (min: 62.0, avg: 73.8, max: 126.0) +[2026-06-07 02:51:38,117][464927] Avg episode reward: [(0, '338.495')] +[2026-06-07 02:51:38,157][472559] Updated weights for policy 0, policy_version 12524 (0.0008) +[2026-06-07 02:51:38,268][472559] Updated weights for policy 0, policy_version 12534 (0.0008) +[2026-06-07 02:51:38,373][472025] Saving new best policy, reward=338.495! +[2026-06-07 02:51:38,376][472559] Updated weights for policy 0, policy_version 12544 (0.0008) +[2026-06-07 02:51:38,945][472559] Updated weights for policy 0, policy_version 12554 (0.0008) +[2026-06-07 02:51:39,055][472559] Updated weights for policy 0, policy_version 12564 (0.0008) +[2026-06-07 02:51:39,176][472559] Updated weights for policy 0, policy_version 12575 (0.0009) +[2026-06-07 02:51:39,296][472559] Updated weights for policy 0, policy_version 12586 (0.0008) +[2026-06-07 02:51:39,417][472559] Updated weights for policy 0, policy_version 12596 (0.0008) +[2026-06-07 02:51:39,533][472559] Updated weights for policy 0, policy_version 12606 (0.0008) +[2026-06-07 02:51:40,078][472559] Updated weights for policy 0, policy_version 12617 (0.0008) +[2026-06-07 02:51:40,213][472559] Updated weights for policy 0, policy_version 12629 (0.0008) +[2026-06-07 02:51:40,324][472559] Updated weights for policy 0, policy_version 12639 (0.0008) +[2026-06-07 02:51:40,439][472559] Updated weights for policy 0, policy_version 12649 (0.0008) +[2026-06-07 02:51:40,559][472559] Updated weights for policy 0, policy_version 12659 (0.0008) +[2026-06-07 02:51:40,699][472559] Updated weights for policy 0, policy_version 12672 (0.0008) +[2026-06-07 02:51:41,250][472559] Updated weights for policy 0, policy_version 12682 (0.0007) +[2026-06-07 02:51:41,363][472559] Updated weights for policy 0, policy_version 12692 (0.0004) +[2026-06-07 02:51:41,506][472559] Updated weights for policy 0, policy_version 12705 (0.0008) +[2026-06-07 02:51:41,628][472559] Updated weights for policy 0, policy_version 12716 (0.0008) +[2026-06-07 02:51:41,758][472559] Updated weights for policy 0, policy_version 12728 (0.0008) +[2026-06-07 02:51:42,321][472559] Updated weights for policy 0, policy_version 12740 (0.0007) +[2026-06-07 02:51:42,429][472559] Updated weights for policy 0, policy_version 12750 (0.0008) +[2026-06-07 02:51:42,563][472559] Updated weights for policy 0, policy_version 12762 (0.0007) +[2026-06-07 02:51:42,672][472559] Updated weights for policy 0, policy_version 12772 (0.0005) +[2026-06-07 02:51:42,802][472559] Updated weights for policy 0, policy_version 12783 (0.0005) +[2026-06-07 02:51:42,928][472559] Updated weights for policy 0, policy_version 12794 (0.0005) +[2026-06-07 02:51:43,117][464927] Fps is (10 sec: 29491.2, 60 sec: 28398.9, 300 sec: 27306.7). Total num frames: 6553600. Throughput: 0: 28088.9. Samples: 6570240. Policy #0 lag: (min: 63.0, avg: 75.3, max: 127.0) +[2026-06-07 02:51:43,117][464927] Avg episode reward: [(0, '316.652')] +[2026-06-07 02:51:43,477][472559] Updated weights for policy 0, policy_version 12804 (0.0006) +[2026-06-07 02:51:43,599][472559] Updated weights for policy 0, policy_version 12815 (0.0009) +[2026-06-07 02:51:43,706][472559] Updated weights for policy 0, policy_version 12825 (0.0008) +[2026-06-07 02:51:43,854][472559] Updated weights for policy 0, policy_version 12838 (0.0008) +[2026-06-07 02:51:43,966][472559] Updated weights for policy 0, policy_version 12848 (0.0008) +[2026-06-07 02:51:44,083][472559] Updated weights for policy 0, policy_version 12858 (0.0008) +[2026-06-07 02:51:44,631][472559] Updated weights for policy 0, policy_version 12868 (0.0008) +[2026-06-07 02:51:44,739][472559] Updated weights for policy 0, policy_version 12878 (0.0008) +[2026-06-07 02:51:44,853][472559] Updated weights for policy 0, policy_version 12888 (0.0009) +[2026-06-07 02:51:44,965][472559] Updated weights for policy 0, policy_version 12898 (0.0008) +[2026-06-07 02:51:45,075][472559] Updated weights for policy 0, policy_version 12908 (0.0008) +[2026-06-07 02:51:45,215][472559] Updated weights for policy 0, policy_version 12920 (0.0009) +[2026-06-07 02:51:45,780][472559] Updated weights for policy 0, policy_version 12932 (0.0009) +[2026-06-07 02:51:45,907][472559] Updated weights for policy 0, policy_version 12943 (0.0009) +[2026-06-07 02:51:46,029][472559] Updated weights for policy 0, policy_version 12954 (0.0009) +[2026-06-07 02:51:46,153][472559] Updated weights for policy 0, policy_version 12965 (0.0008) +[2026-06-07 02:51:46,262][472559] Updated weights for policy 0, policy_version 12975 (0.0008) +[2026-06-07 02:51:46,378][472559] Updated weights for policy 0, policy_version 12985 (0.0008) +[2026-06-07 02:51:46,927][472559] Updated weights for policy 0, policy_version 12995 (0.0008) +[2026-06-07 02:51:47,040][472559] Updated weights for policy 0, policy_version 13005 (0.0008) +[2026-06-07 02:51:47,150][472559] Updated weights for policy 0, policy_version 13015 (0.0005) +[2026-06-07 02:51:47,257][472559] Updated weights for policy 0, policy_version 13025 (0.0005) +[2026-06-07 02:51:47,382][472559] Updated weights for policy 0, policy_version 13036 (0.0006) +[2026-06-07 02:51:47,495][472559] Updated weights for policy 0, policy_version 13046 (0.0005) +[2026-06-07 02:51:47,612][472559] Updated weights for policy 0, policy_version 13056 (0.0005) +[2026-06-07 02:51:48,117][464927] Fps is (10 sec: 29490.9, 60 sec: 28398.9, 300 sec: 27284.4). Total num frames: 6684672. Throughput: 0: 28162.8. Samples: 6658944. Policy #0 lag: (min: 63.0, avg: 75.3, max: 127.0) +[2026-06-07 02:51:48,118][464927] Avg episode reward: [(0, '316.833')] +[2026-06-07 02:51:48,141][472559] Updated weights for policy 0, policy_version 13067 (0.0008) +[2026-06-07 02:51:48,261][472559] Updated weights for policy 0, policy_version 13078 (0.0008) +[2026-06-07 02:51:48,387][472559] Updated weights for policy 0, policy_version 13089 (0.0009) +[2026-06-07 02:51:48,498][472559] Updated weights for policy 0, policy_version 13099 (0.0009) +[2026-06-07 02:51:48,603][472559] Updated weights for policy 0, policy_version 13109 (0.0008) +[2026-06-07 02:51:48,729][472559] Updated weights for policy 0, policy_version 13120 (0.0009) +[2026-06-07 02:51:49,300][472559] Updated weights for policy 0, policy_version 13131 (0.0009) +[2026-06-07 02:51:49,413][472559] Updated weights for policy 0, policy_version 13141 (0.0006) +[2026-06-07 02:51:49,562][472559] Updated weights for policy 0, policy_version 13154 (0.0008) +[2026-06-07 02:51:49,671][472559] Updated weights for policy 0, policy_version 13164 (0.0008) +[2026-06-07 02:51:49,800][472559] Updated weights for policy 0, policy_version 13175 (0.0008) +[2026-06-07 02:51:50,378][472559] Updated weights for policy 0, policy_version 13187 (0.0008) +[2026-06-07 02:51:50,519][472559] Updated weights for policy 0, policy_version 13200 (0.0008) +[2026-06-07 02:51:50,630][472559] Updated weights for policy 0, policy_version 13210 (0.0008) +[2026-06-07 02:51:50,758][472559] Updated weights for policy 0, policy_version 13221 (0.0008) +[2026-06-07 02:51:50,870][472559] Updated weights for policy 0, policy_version 13231 (0.0008) +[2026-06-07 02:51:50,984][472559] Updated weights for policy 0, policy_version 13241 (0.0008) +[2026-06-07 02:51:51,566][472559] Updated weights for policy 0, policy_version 13253 (0.0008) +[2026-06-07 02:51:51,679][472559] Updated weights for policy 0, policy_version 13263 (0.0008) +[2026-06-07 02:51:51,812][472559] Updated weights for policy 0, policy_version 13275 (0.0008) +[2026-06-07 02:51:51,947][472559] Updated weights for policy 0, policy_version 13287 (0.0008) +[2026-06-07 02:51:52,072][472559] Updated weights for policy 0, policy_version 13298 (0.0008) +[2026-06-07 02:51:52,203][472559] Updated weights for policy 0, policy_version 13310 (0.0008) +[2026-06-07 02:51:52,770][472559] Updated weights for policy 0, policy_version 13320 (0.0008) +[2026-06-07 02:51:52,914][472559] Updated weights for policy 0, policy_version 13333 (0.0008) +[2026-06-07 02:51:53,021][472559] Updated weights for policy 0, policy_version 13343 (0.0008) +[2026-06-07 02:51:53,117][464927] Fps is (10 sec: 26214.1, 60 sec: 28398.9, 300 sec: 27263.0). Total num frames: 6815744. Throughput: 0: 28177.0. Samples: 6825728. Policy #0 lag: (min: 63.0, avg: 75.3, max: 127.0) +[2026-06-07 02:51:53,118][464927] Avg episode reward: [(0, '356.700')] +[2026-06-07 02:51:53,160][472559] Updated weights for policy 0, policy_version 13355 (0.0008) +[2026-06-07 02:51:53,278][472559] Updated weights for policy 0, policy_version 13366 (0.0008) +[2026-06-07 02:51:53,394][472025] Saving new best policy, reward=356.700! +[2026-06-07 02:51:53,829][472559] Updated weights for policy 0, policy_version 13377 (0.0008) +[2026-06-07 02:51:53,956][472559] Updated weights for policy 0, policy_version 13388 (0.0008) +[2026-06-07 02:51:54,072][472559] Updated weights for policy 0, policy_version 13399 (0.0008) +[2026-06-07 02:51:54,210][472559] Updated weights for policy 0, policy_version 13410 (0.0008) +[2026-06-07 02:51:54,326][472559] Updated weights for policy 0, policy_version 13420 (0.0008) +[2026-06-07 02:51:54,475][472559] Updated weights for policy 0, policy_version 13433 (0.0008) +[2026-06-07 02:51:55,016][472559] Updated weights for policy 0, policy_version 13443 (0.0008) +[2026-06-07 02:51:55,132][472559] Updated weights for policy 0, policy_version 13453 (0.0008) +[2026-06-07 02:51:55,268][472559] Updated weights for policy 0, policy_version 13465 (0.0008) +[2026-06-07 02:51:55,404][472559] Updated weights for policy 0, policy_version 13477 (0.0008) +[2026-06-07 02:51:55,535][472559] Updated weights for policy 0, policy_version 13489 (0.0008) +[2026-06-07 02:51:55,653][472559] Updated weights for policy 0, policy_version 13499 (0.0008) +[2026-06-07 02:51:56,206][472559] Updated weights for policy 0, policy_version 13509 (0.0009) +[2026-06-07 02:51:56,368][472559] Updated weights for policy 0, policy_version 13524 (0.0008) +[2026-06-07 02:51:56,493][472559] Updated weights for policy 0, policy_version 13535 (0.0007) +[2026-06-07 02:51:56,613][472559] Updated weights for policy 0, policy_version 13546 (0.0008) +[2026-06-07 02:51:56,731][472559] Updated weights for policy 0, policy_version 13557 (0.0008) +[2026-06-07 02:51:56,849][472559] Updated weights for policy 0, policy_version 13568 (0.0009) +[2026-06-07 02:51:57,405][472559] Updated weights for policy 0, policy_version 13578 (0.0008) +[2026-06-07 02:51:57,527][472559] Updated weights for policy 0, policy_version 13589 (0.0008) +[2026-06-07 02:51:57,637][472559] Updated weights for policy 0, policy_version 13599 (0.0008) +[2026-06-07 02:51:57,757][472559] Updated weights for policy 0, policy_version 13610 (0.0008) +[2026-06-07 02:51:57,878][472559] Updated weights for policy 0, policy_version 13621 (0.0010) +[2026-06-07 02:51:57,995][472559] Updated weights for policy 0, policy_version 13631 (0.0008) +[2026-06-07 02:51:58,116][464927] Fps is (10 sec: 29491.6, 60 sec: 28398.9, 300 sec: 27370.9). Total num frames: 6979584. Throughput: 0: 28185.6. Samples: 6993280. Policy #0 lag: (min: 26.0, avg: 42.4, max: 90.0) +[2026-06-07 02:51:58,117][464927] Avg episode reward: [(0, '357.375')] +[2026-06-07 02:51:58,121][472025] Saving new best policy, reward=357.375! +[2026-06-07 02:51:58,554][472559] Updated weights for policy 0, policy_version 13641 (0.0008) +[2026-06-07 02:51:58,671][472559] Updated weights for policy 0, policy_version 13652 (0.0008) +[2026-06-07 02:51:58,801][472559] Updated weights for policy 0, policy_version 13664 (0.0008) +[2026-06-07 02:51:58,921][472559] Updated weights for policy 0, policy_version 13675 (0.0008) +[2026-06-07 02:51:59,048][472559] Updated weights for policy 0, policy_version 13686 (0.0008) +[2026-06-07 02:51:59,160][472559] Updated weights for policy 0, policy_version 13696 (0.0004) +[2026-06-07 02:51:59,711][472559] Updated weights for policy 0, policy_version 13707 (0.0004) +[2026-06-07 02:51:59,823][472559] Updated weights for policy 0, policy_version 13717 (0.0004) +[2026-06-07 02:51:59,939][472559] Updated weights for policy 0, policy_version 13727 (0.0004) +[2026-06-07 02:52:00,057][472559] Updated weights for policy 0, policy_version 13737 (0.0004) +[2026-06-07 02:52:00,179][472559] Updated weights for policy 0, policy_version 13748 (0.0004) +[2026-06-07 02:52:00,754][472559] Updated weights for policy 0, policy_version 13762 (0.0004) +[2026-06-07 02:52:00,881][472559] Updated weights for policy 0, policy_version 13773 (0.0004) +[2026-06-07 02:52:01,008][472559] Updated weights for policy 0, policy_version 13784 (0.0004) +[2026-06-07 02:52:01,120][472559] Updated weights for policy 0, policy_version 13794 (0.0004) +[2026-06-07 02:52:01,251][472559] Updated weights for policy 0, policy_version 13806 (0.0004) +[2026-06-07 02:52:01,368][472559] Updated weights for policy 0, policy_version 13816 (0.0004) +[2026-06-07 02:52:01,930][472559] Updated weights for policy 0, policy_version 13829 (0.0005) +[2026-06-07 02:52:02,059][472559] Updated weights for policy 0, policy_version 13841 (0.0008) +[2026-06-07 02:52:02,169][472559] Updated weights for policy 0, policy_version 13851 (0.0008) +[2026-06-07 02:52:02,306][472559] Updated weights for policy 0, policy_version 13864 (0.0009) +[2026-06-07 02:52:02,446][472559] Updated weights for policy 0, policy_version 13877 (0.0009) +[2026-06-07 02:52:02,570][472559] Updated weights for policy 0, policy_version 13888 (0.0008) +[2026-06-07 02:52:03,117][464927] Fps is (10 sec: 29490.4, 60 sec: 28398.8, 300 sec: 27348.6). Total num frames: 7110656. Throughput: 0: 28239.4. Samples: 7084416. Policy #0 lag: (min: 26.0, avg: 42.4, max: 90.0) +[2026-06-07 02:52:03,118][464927] Avg episode reward: [(0, '358.382')] +[2026-06-07 02:52:03,154][472559] Updated weights for policy 0, policy_version 13900 (0.0008) +[2026-06-07 02:52:03,311][472559] Updated weights for policy 0, policy_version 13915 (0.0009) +[2026-06-07 02:52:03,433][472559] Updated weights for policy 0, policy_version 13926 (0.0008) +[2026-06-07 02:52:03,581][472559] Updated weights for policy 0, policy_version 13939 (0.0008) +[2026-06-07 02:52:03,685][472559] Updated weights for policy 0, policy_version 13949 (0.0004) +[2026-06-07 02:52:03,714][472025] Saving new best policy, reward=358.382! +[2026-06-07 02:52:04,288][472559] Updated weights for policy 0, policy_version 13961 (0.0007) +[2026-06-07 02:52:04,398][472559] Updated weights for policy 0, policy_version 13972 (0.0008) +[2026-06-07 02:52:04,538][472559] Updated weights for policy 0, policy_version 13985 (0.0008) +[2026-06-07 02:52:04,662][472559] Updated weights for policy 0, policy_version 13997 (0.0008) +[2026-06-07 02:52:04,791][472559] Updated weights for policy 0, policy_version 14009 (0.0009) +[2026-06-07 02:52:05,392][472559] Updated weights for policy 0, policy_version 14021 (0.0006) +[2026-06-07 02:52:05,516][472559] Updated weights for policy 0, policy_version 14033 (0.0008) +[2026-06-07 02:52:05,646][472559] Updated weights for policy 0, policy_version 14045 (0.0008) +[2026-06-07 02:52:05,763][472559] Updated weights for policy 0, policy_version 14056 (0.0009) +[2026-06-07 02:52:05,903][472559] Updated weights for policy 0, policy_version 14069 (0.0009) +[2026-06-07 02:52:06,502][472559] Updated weights for policy 0, policy_version 14081 (0.0009) +[2026-06-07 02:52:06,627][472559] Updated weights for policy 0, policy_version 14092 (0.0008) +[2026-06-07 02:52:06,732][472559] Updated weights for policy 0, policy_version 14102 (0.0008) +[2026-06-07 02:52:06,866][472559] Updated weights for policy 0, policy_version 14115 (0.0008) +[2026-06-07 02:52:06,983][472559] Updated weights for policy 0, policy_version 14125 (0.0008) +[2026-06-07 02:52:07,129][472559] Updated weights for policy 0, policy_version 14139 (0.0009) +[2026-06-07 02:52:07,719][472559] Updated weights for policy 0, policy_version 14150 (0.0007) +[2026-06-07 02:52:07,844][472559] Updated weights for policy 0, policy_version 14162 (0.0009) +[2026-06-07 02:52:07,977][472559] Updated weights for policy 0, policy_version 14175 (0.0009) +[2026-06-07 02:52:08,117][464927] Fps is (10 sec: 26214.3, 60 sec: 28398.9, 300 sec: 27327.3). Total num frames: 7241728. Throughput: 0: 28234.0. Samples: 7251072. Policy #0 lag: (min: 63.0, avg: 73.9, max: 127.0) +[2026-06-07 02:52:08,118][464927] Avg episode reward: [(0, '377.721')] +[2026-06-07 02:52:08,118][472559] Updated weights for policy 0, policy_version 14188 (0.0009) +[2026-06-07 02:52:08,262][472559] Updated weights for policy 0, policy_version 14201 (0.0009) +[2026-06-07 02:52:08,335][472025] Saving new best policy, reward=377.721! +[2026-06-07 02:52:08,865][472559] Updated weights for policy 0, policy_version 14212 (0.0008) +[2026-06-07 02:52:08,988][472559] Updated weights for policy 0, policy_version 14223 (0.0008) +[2026-06-07 02:52:09,106][472559] Updated weights for policy 0, policy_version 14233 (0.0008) +[2026-06-07 02:52:09,237][472559] Updated weights for policy 0, policy_version 14245 (0.0008) +[2026-06-07 02:52:09,359][472559] Updated weights for policy 0, policy_version 14256 (0.0008) +[2026-06-07 02:52:09,504][472559] Updated weights for policy 0, policy_version 14269 (0.0008) +[2026-06-07 02:52:10,088][472559] Updated weights for policy 0, policy_version 14280 (0.0008) +[2026-06-07 02:52:10,202][472559] Updated weights for policy 0, policy_version 14290 (0.0008) +[2026-06-07 02:52:10,339][472559] Updated weights for policy 0, policy_version 14302 (0.0009) +[2026-06-07 02:52:10,452][472559] Updated weights for policy 0, policy_version 14312 (0.0008) +[2026-06-07 02:52:10,561][472559] Updated weights for policy 0, policy_version 14322 (0.0008) +[2026-06-07 02:52:10,679][472559] Updated weights for policy 0, policy_version 14332 (0.0008) +[2026-06-07 02:52:11,239][472559] Updated weights for policy 0, policy_version 14342 (0.0008) +[2026-06-07 02:52:11,358][472559] Updated weights for policy 0, policy_version 14353 (0.0009) +[2026-06-07 02:52:11,482][472559] Updated weights for policy 0, policy_version 14364 (0.0008) +[2026-06-07 02:52:11,606][472559] Updated weights for policy 0, policy_version 14375 (0.0008) +[2026-06-07 02:52:11,722][472559] Updated weights for policy 0, policy_version 14385 (0.0009) +[2026-06-07 02:52:11,830][472559] Updated weights for policy 0, policy_version 14395 (0.0008) +[2026-06-07 02:52:12,447][472559] Updated weights for policy 0, policy_version 14407 (0.0008) +[2026-06-07 02:52:12,570][472559] Updated weights for policy 0, policy_version 14418 (0.0008) +[2026-06-07 02:52:12,693][472559] Updated weights for policy 0, policy_version 14429 (0.0008) +[2026-06-07 02:52:12,806][472559] Updated weights for policy 0, policy_version 14439 (0.0008) +[2026-06-07 02:52:12,920][472559] Updated weights for policy 0, policy_version 14449 (0.0009) +[2026-06-07 02:52:13,043][472559] Updated weights for policy 0, policy_version 14460 (0.0008) +[2026-06-07 02:52:13,117][464927] Fps is (10 sec: 29492.2, 60 sec: 28398.9, 300 sec: 27428.0). Total num frames: 7405568. Throughput: 0: 28293.7. Samples: 7421312. Policy #0 lag: (min: 63.0, avg: 73.9, max: 127.0) +[2026-06-07 02:52:13,118][464927] Avg episode reward: [(0, '392.501')] +[2026-06-07 02:52:13,122][472025] Saving new best policy, reward=392.501! +[2026-06-07 02:52:13,614][472559] Updated weights for policy 0, policy_version 14472 (0.0008) +[2026-06-07 02:52:13,724][472559] Updated weights for policy 0, policy_version 14483 (0.0006) +[2026-06-07 02:52:13,843][472559] Updated weights for policy 0, policy_version 14494 (0.0004) +[2026-06-07 02:52:13,975][472559] Updated weights for policy 0, policy_version 14506 (0.0004) +[2026-06-07 02:52:14,108][472559] Updated weights for policy 0, policy_version 14518 (0.0004) +[2026-06-07 02:52:14,674][472559] Updated weights for policy 0, policy_version 14529 (0.0005) +[2026-06-07 02:52:14,785][472559] Updated weights for policy 0, policy_version 14539 (0.0008) +[2026-06-07 02:52:14,927][472559] Updated weights for policy 0, policy_version 14552 (0.0009) +[2026-06-07 02:52:15,055][472559] Updated weights for policy 0, policy_version 14563 (0.0008) +[2026-06-07 02:52:15,167][472559] Updated weights for policy 0, policy_version 14573 (0.0008) +[2026-06-07 02:52:15,282][472559] Updated weights for policy 0, policy_version 14583 (0.0008) +[2026-06-07 02:52:15,853][472559] Updated weights for policy 0, policy_version 14593 (0.0008) +[2026-06-07 02:52:15,972][472559] Updated weights for policy 0, policy_version 14604 (0.0008) +[2026-06-07 02:52:16,083][472559] Updated weights for policy 0, policy_version 14614 (0.0008) +[2026-06-07 02:52:16,251][472559] Updated weights for policy 0, policy_version 14629 (0.0008) +[2026-06-07 02:52:16,372][472559] Updated weights for policy 0, policy_version 14640 (0.0008) +[2026-06-07 02:52:16,487][472559] Updated weights for policy 0, policy_version 14650 (0.0008) +[2026-06-07 02:52:17,020][472559] Updated weights for policy 0, policy_version 14660 (0.0008) +[2026-06-07 02:52:17,150][472559] Updated weights for policy 0, policy_version 14672 (0.0008) +[2026-06-07 02:52:17,282][472559] Updated weights for policy 0, policy_version 14684 (0.0008) +[2026-06-07 02:52:17,401][472559] Updated weights for policy 0, policy_version 14694 (0.0008) +[2026-06-07 02:52:17,525][472559] Updated weights for policy 0, policy_version 14705 (0.0008) +[2026-06-07 02:52:17,653][472559] Updated weights for policy 0, policy_version 14716 (0.0008) +[2026-06-07 02:52:18,116][464927] Fps is (10 sec: 29491.3, 60 sec: 28399.0, 300 sec: 27406.0). Total num frames: 7536640. Throughput: 0: 28362.0. Samples: 7513344. Policy #0 lag: (min: 37.0, avg: 49.1, max: 101.0) +[2026-06-07 02:52:18,117][464927] Avg episode reward: [(0, '378.571')] +[2026-06-07 02:52:18,210][472559] Updated weights for policy 0, policy_version 14727 (0.0008) +[2026-06-07 02:52:18,334][472559] Updated weights for policy 0, policy_version 14738 (0.0008) +[2026-06-07 02:52:18,452][472559] Updated weights for policy 0, policy_version 14749 (0.0009) +[2026-06-07 02:52:18,597][472559] Updated weights for policy 0, policy_version 14762 (0.0008) +[2026-06-07 02:52:18,718][472559] Updated weights for policy 0, policy_version 14773 (0.0008) +[2026-06-07 02:52:18,832][472559] Updated weights for policy 0, policy_version 14783 (0.0008) +[2026-06-07 02:52:19,424][472559] Updated weights for policy 0, policy_version 14797 (0.0005) +[2026-06-07 02:52:19,551][472559] Updated weights for policy 0, policy_version 14809 (0.0004) +[2026-06-07 02:52:19,689][472559] Updated weights for policy 0, policy_version 14822 (0.0006) +[2026-06-07 02:52:19,797][472559] Updated weights for policy 0, policy_version 14832 (0.0006) +[2026-06-07 02:52:19,918][472559] Updated weights for policy 0, policy_version 14843 (0.0005) +[2026-06-07 02:52:20,487][472559] Updated weights for policy 0, policy_version 14853 (0.0006) +[2026-06-07 02:52:20,623][472559] Updated weights for policy 0, policy_version 14866 (0.0009) +[2026-06-07 02:52:20,731][472559] Updated weights for policy 0, policy_version 14876 (0.0009) +[2026-06-07 02:52:20,862][472559] Updated weights for policy 0, policy_version 14887 (0.0008) +[2026-06-07 02:52:20,999][472559] Updated weights for policy 0, policy_version 14899 (0.0008) +[2026-06-07 02:52:21,130][472559] Updated weights for policy 0, policy_version 14911 (0.0005) +[2026-06-07 02:52:21,691][472559] Updated weights for policy 0, policy_version 14922 (0.0007) +[2026-06-07 02:52:21,813][472559] Updated weights for policy 0, policy_version 14933 (0.0009) +[2026-06-07 02:52:21,932][472559] Updated weights for policy 0, policy_version 14944 (0.0008) +[2026-06-07 02:52:22,065][472559] Updated weights for policy 0, policy_version 14956 (0.0008) +[2026-06-07 02:52:22,208][472559] Updated weights for policy 0, policy_version 14968 (0.0008) +[2026-06-07 02:52:22,753][472559] Updated weights for policy 0, policy_version 14978 (0.0008) +[2026-06-07 02:52:22,875][472559] Updated weights for policy 0, policy_version 14989 (0.0004) +[2026-06-07 02:52:23,037][472559] Updated weights for policy 0, policy_version 15004 (0.0005) +[2026-06-07 02:52:23,117][464927] Fps is (10 sec: 26214.4, 60 sec: 28398.9, 300 sec: 27384.7). Total num frames: 7667712. Throughput: 0: 28433.0. Samples: 7681152. Policy #0 lag: (min: 37.0, avg: 49.1, max: 101.0) +[2026-06-07 02:52:23,118][464927] Avg episode reward: [(0, '431.577')] +[2026-06-07 02:52:23,166][472559] Updated weights for policy 0, policy_version 15015 (0.0008) +[2026-06-07 02:52:23,294][472559] Updated weights for policy 0, policy_version 15026 (0.0008) +[2026-06-07 02:52:23,419][472559] Updated weights for policy 0, policy_version 15037 (0.0008) +[2026-06-07 02:52:23,447][472025] Saving new best policy, reward=431.577! +[2026-06-07 02:52:23,973][472559] Updated weights for policy 0, policy_version 15048 (0.0008) +[2026-06-07 02:52:24,093][472559] Updated weights for policy 0, policy_version 15059 (0.0008) +[2026-06-07 02:52:24,209][472559] Updated weights for policy 0, policy_version 15069 (0.0008) +[2026-06-07 02:52:24,340][472559] Updated weights for policy 0, policy_version 15081 (0.0008) +[2026-06-07 02:52:24,458][472559] Updated weights for policy 0, policy_version 15091 (0.0008) +[2026-06-07 02:52:24,585][472559] Updated weights for policy 0, policy_version 15103 (0.0008) +[2026-06-07 02:52:25,157][472559] Updated weights for policy 0, policy_version 15114 (0.0008) +[2026-06-07 02:52:25,280][472559] Updated weights for policy 0, policy_version 15125 (0.0008) +[2026-06-07 02:52:25,402][472559] Updated weights for policy 0, policy_version 15136 (0.0008) +[2026-06-07 02:52:25,535][472559] Updated weights for policy 0, policy_version 15148 (0.0008) +[2026-06-07 02:52:25,652][472559] Updated weights for policy 0, policy_version 15158 (0.0008) +[2026-06-07 02:52:26,210][472559] Updated weights for policy 0, policy_version 15170 (0.0008) +[2026-06-07 02:52:26,330][472559] Updated weights for policy 0, policy_version 15181 (0.0008) +[2026-06-07 02:52:26,464][472559] Updated weights for policy 0, policy_version 15193 (0.0008) +[2026-06-07 02:52:26,578][472559] Updated weights for policy 0, policy_version 15203 (0.0008) +[2026-06-07 02:52:26,698][472559] Updated weights for policy 0, policy_version 15214 (0.0008) +[2026-06-07 02:52:26,847][472559] Updated weights for policy 0, policy_version 15227 (0.0008) +[2026-06-07 02:52:27,415][472559] Updated weights for policy 0, policy_version 15237 (0.0007) +[2026-06-07 02:52:27,525][472559] Updated weights for policy 0, policy_version 15247 (0.0008) +[2026-06-07 02:52:27,640][472559] Updated weights for policy 0, policy_version 15257 (0.0008) +[2026-06-07 02:52:27,770][472559] Updated weights for policy 0, policy_version 15269 (0.0008) +[2026-06-07 02:52:27,883][472559] Updated weights for policy 0, policy_version 15279 (0.0008) +[2026-06-07 02:52:27,998][472559] Updated weights for policy 0, policy_version 15289 (0.0008) +[2026-06-07 02:52:28,117][464927] Fps is (10 sec: 29490.9, 60 sec: 28398.9, 300 sec: 27479.1). Total num frames: 7831552. Throughput: 0: 28404.6. Samples: 7848448. Policy #0 lag: (min: 37.0, avg: 49.1, max: 101.0) +[2026-06-07 02:52:28,118][464927] Avg episode reward: [(0, '420.376')] +[2026-06-07 02:52:28,560][472559] Updated weights for policy 0, policy_version 15300 (0.0008) +[2026-06-07 02:52:28,686][472559] Updated weights for policy 0, policy_version 15312 (0.0008) +[2026-06-07 02:52:28,838][472559] Updated weights for policy 0, policy_version 15325 (0.0008) +[2026-06-07 02:52:28,953][472559] Updated weights for policy 0, policy_version 15335 (0.0008) +[2026-06-07 02:52:29,061][472559] Updated weights for policy 0, policy_version 15345 (0.0008) +[2026-06-07 02:52:29,187][472559] Updated weights for policy 0, policy_version 15356 (0.0008) +[2026-06-07 02:52:29,753][472559] Updated weights for policy 0, policy_version 15366 (0.0008) +[2026-06-07 02:52:29,866][472559] Updated weights for policy 0, policy_version 15376 (0.0008) +[2026-06-07 02:52:29,979][472559] Updated weights for policy 0, policy_version 15386 (0.0008) +[2026-06-07 02:52:30,086][472559] Updated weights for policy 0, policy_version 15396 (0.0008) +[2026-06-07 02:52:30,224][472559] Updated weights for policy 0, policy_version 15408 (0.0008) +[2026-06-07 02:52:30,361][472559] Updated weights for policy 0, policy_version 15420 (0.0008) +[2026-06-07 02:52:30,918][472559] Updated weights for policy 0, policy_version 15430 (0.0007) +[2026-06-07 02:52:31,025][472559] Updated weights for policy 0, policy_version 15440 (0.0008) +[2026-06-07 02:52:31,136][472559] Updated weights for policy 0, policy_version 15450 (0.0008) +[2026-06-07 02:52:31,255][472559] Updated weights for policy 0, policy_version 15460 (0.0008) +[2026-06-07 02:52:31,366][472559] Updated weights for policy 0, policy_version 15470 (0.0008) +[2026-06-07 02:52:31,475][472559] Updated weights for policy 0, policy_version 15480 (0.0008) +[2026-06-07 02:52:32,048][472559] Updated weights for policy 0, policy_version 15491 (0.0008) +[2026-06-07 02:52:32,171][472559] Updated weights for policy 0, policy_version 15502 (0.0008) +[2026-06-07 02:52:32,293][472559] Updated weights for policy 0, policy_version 15513 (0.0008) +[2026-06-07 02:52:32,417][472559] Updated weights for policy 0, policy_version 15524 (0.0008) +[2026-06-07 02:52:32,527][472559] Updated weights for policy 0, policy_version 15534 (0.0008) +[2026-06-07 02:52:32,640][472559] Updated weights for policy 0, policy_version 15544 (0.0008) +[2026-06-07 02:52:33,117][464927] Fps is (10 sec: 29491.4, 60 sec: 28398.9, 300 sec: 27457.3). Total num frames: 7962624. Throughput: 0: 28475.8. Samples: 7940352. Policy #0 lag: (min: 24.0, avg: 57.6, max: 88.0) +[2026-06-07 02:52:33,117][464927] Avg episode reward: [(0, '436.516')] +[2026-06-07 02:52:33,122][472025] Saving new best policy, reward=436.516! +[2026-06-07 02:52:33,233][472559] Updated weights for policy 0, policy_version 15554 (0.0008) +[2026-06-07 02:52:33,343][472559] Updated weights for policy 0, policy_version 15564 (0.0005) +[2026-06-07 02:52:33,458][472559] Updated weights for policy 0, policy_version 15574 (0.0009) +[2026-06-07 02:52:33,566][472559] Updated weights for policy 0, policy_version 15584 (0.0008) +[2026-06-07 02:52:33,685][472559] Updated weights for policy 0, policy_version 15594 (0.0009) +[2026-06-07 02:52:33,794][472559] Updated weights for policy 0, policy_version 15604 (0.0008) +[2026-06-07 02:52:33,913][472559] Updated weights for policy 0, policy_version 15614 (0.0009) +[2026-06-07 02:52:34,450][472559] Updated weights for policy 0, policy_version 15626 (0.0008) +[2026-06-07 02:52:34,558][472559] Updated weights for policy 0, policy_version 15636 (0.0008) +[2026-06-07 02:52:34,681][472559] Updated weights for policy 0, policy_version 15647 (0.0008) +[2026-06-07 02:52:34,810][472559] Updated weights for policy 0, policy_version 15658 (0.0008) +[2026-06-07 02:52:34,930][472559] Updated weights for policy 0, policy_version 15669 (0.0008) +[2026-06-07 02:52:35,486][472559] Updated weights for policy 0, policy_version 15681 (0.0009) +[2026-06-07 02:52:35,621][472559] Updated weights for policy 0, policy_version 15693 (0.0008) +[2026-06-07 02:52:35,732][472559] Updated weights for policy 0, policy_version 15703 (0.0008) +[2026-06-07 02:52:35,846][472559] Updated weights for policy 0, policy_version 15713 (0.0009) +[2026-06-07 02:52:35,966][472559] Updated weights for policy 0, policy_version 15724 (0.0008) +[2026-06-07 02:52:36,101][472559] Updated weights for policy 0, policy_version 15736 (0.0009) +[2026-06-07 02:52:36,646][472559] Updated weights for policy 0, policy_version 15746 (0.0008) +[2026-06-07 02:52:36,790][472559] Updated weights for policy 0, policy_version 15759 (0.0008) +[2026-06-07 02:52:36,924][472559] Updated weights for policy 0, policy_version 15771 (0.0007) +[2026-06-07 02:52:37,049][472559] Updated weights for policy 0, policy_version 15782 (0.0006) +[2026-06-07 02:52:37,177][472559] Updated weights for policy 0, policy_version 15793 (0.0009) +[2026-06-07 02:52:37,300][472559] Updated weights for policy 0, policy_version 15804 (0.0008) +[2026-06-07 02:52:37,850][472559] Updated weights for policy 0, policy_version 15814 (0.0008) +[2026-06-07 02:52:37,981][472559] Updated weights for policy 0, policy_version 15826 (0.0008) +[2026-06-07 02:52:38,094][472559] Updated weights for policy 0, policy_version 15836 (0.0008) +[2026-06-07 02:52:38,117][464927] Fps is (10 sec: 26214.6, 60 sec: 28398.9, 300 sec: 27436.3). Total num frames: 8093696. Throughput: 0: 28464.4. Samples: 8106624. Policy #0 lag: (min: 24.0, avg: 57.6, max: 88.0) +[2026-06-07 02:52:38,117][464927] Avg episode reward: [(0, '426.672')] +[2026-06-07 02:52:38,207][472559] Updated weights for policy 0, policy_version 15846 (0.0008) +[2026-06-07 02:52:38,330][472559] Updated weights for policy 0, policy_version 15857 (0.0008) +[2026-06-07 02:52:38,454][472559] Updated weights for policy 0, policy_version 15868 (0.0008) +[2026-06-07 02:52:39,023][472559] Updated weights for policy 0, policy_version 15879 (0.0008) +[2026-06-07 02:52:39,132][472559] Updated weights for policy 0, policy_version 15889 (0.0008) +[2026-06-07 02:52:39,243][472559] Updated weights for policy 0, policy_version 15899 (0.0008) +[2026-06-07 02:52:39,361][472559] Updated weights for policy 0, policy_version 15910 (0.0008) +[2026-06-07 02:52:39,499][472559] Updated weights for policy 0, policy_version 15922 (0.0008) +[2026-06-07 02:52:39,615][472559] Updated weights for policy 0, policy_version 15932 (0.0008) +[2026-06-07 02:52:40,169][472559] Updated weights for policy 0, policy_version 15942 (0.0007) +[2026-06-07 02:52:40,283][472559] Updated weights for policy 0, policy_version 15952 (0.0008) +[2026-06-07 02:52:40,397][472559] Updated weights for policy 0, policy_version 15962 (0.0008) +[2026-06-07 02:52:40,517][472559] Updated weights for policy 0, policy_version 15973 (0.0008) +[2026-06-07 02:52:40,636][472559] Updated weights for policy 0, policy_version 15983 (0.0008) +[2026-06-07 02:52:40,752][472559] Updated weights for policy 0, policy_version 15993 (0.0008) +[2026-06-07 02:52:41,289][472559] Updated weights for policy 0, policy_version 16003 (0.0008) +[2026-06-07 02:52:41,412][472559] Updated weights for policy 0, policy_version 16014 (0.0008) +[2026-06-07 02:52:41,535][472559] Updated weights for policy 0, policy_version 16025 (0.0008) +[2026-06-07 02:52:41,650][472559] Updated weights for policy 0, policy_version 16035 (0.0008) +[2026-06-07 02:52:41,759][472559] Updated weights for policy 0, policy_version 16045 (0.0008) +[2026-06-07 02:52:41,872][472559] Updated weights for policy 0, policy_version 16055 (0.0008) +[2026-06-07 02:52:42,419][472559] Updated weights for policy 0, policy_version 16065 (0.0008) +[2026-06-07 02:52:42,531][472559] Updated weights for policy 0, policy_version 16075 (0.0008) +[2026-06-07 02:52:42,667][472559] Updated weights for policy 0, policy_version 16087 (0.0008) +[2026-06-07 02:52:42,776][472559] Updated weights for policy 0, policy_version 16097 (0.0008) +[2026-06-07 02:52:42,898][472559] Updated weights for policy 0, policy_version 16108 (0.0009) +[2026-06-07 02:52:43,016][472559] Updated weights for policy 0, policy_version 16118 (0.0009) +[2026-06-07 02:52:43,117][464927] Fps is (10 sec: 26213.8, 60 sec: 27852.7, 300 sec: 27769.5). Total num frames: 8224768. Throughput: 0: 28461.4. Samples: 8274048. Policy #0 lag: (min: 63.0, avg: 74.8, max: 127.0) +[2026-06-07 02:52:43,118][464927] Avg episode reward: [(0, '384.959')] +[2026-06-07 02:52:43,130][472559] Updated weights for policy 0, policy_version 16128 (0.0009) +[2026-06-07 02:52:43,674][472559] Updated weights for policy 0, policy_version 16139 (0.0008) +[2026-06-07 02:52:43,788][472559] Updated weights for policy 0, policy_version 16149 (0.0008) +[2026-06-07 02:52:43,916][472559] Updated weights for policy 0, policy_version 16160 (0.0008) +[2026-06-07 02:52:44,039][472559] Updated weights for policy 0, policy_version 16171 (0.0008) +[2026-06-07 02:52:44,175][472559] Updated weights for policy 0, policy_version 16183 (0.0008) +[2026-06-07 02:52:44,738][472559] Updated weights for policy 0, policy_version 16193 (0.0008) +[2026-06-07 02:52:44,867][472559] Updated weights for policy 0, policy_version 16204 (0.0008) +[2026-06-07 02:52:44,977][472559] Updated weights for policy 0, policy_version 16214 (0.0008) +[2026-06-07 02:52:45,090][472559] Updated weights for policy 0, policy_version 16224 (0.0008) +[2026-06-07 02:52:45,220][472559] Updated weights for policy 0, policy_version 16236 (0.0008) +[2026-06-07 02:52:45,340][472559] Updated weights for policy 0, policy_version 16246 (0.0008) +[2026-06-07 02:52:45,912][472559] Updated weights for policy 0, policy_version 16259 (0.0008) +[2026-06-07 02:52:46,022][472559] Updated weights for policy 0, policy_version 16269 (0.0008) +[2026-06-07 02:52:46,152][472559] Updated weights for policy 0, policy_version 16280 (0.0008) +[2026-06-07 02:52:46,271][472559] Updated weights for policy 0, policy_version 16291 (0.0007) +[2026-06-07 02:52:46,391][472559] Updated weights for policy 0, policy_version 16301 (0.0008) +[2026-06-07 02:52:46,501][472559] Updated weights for policy 0, policy_version 16311 (0.0008) +[2026-06-07 02:52:47,050][472559] Updated weights for policy 0, policy_version 16322 (0.0008) +[2026-06-07 02:52:47,171][472559] Updated weights for policy 0, policy_version 16333 (0.0008) +[2026-06-07 02:52:47,278][472559] Updated weights for policy 0, policy_version 16343 (0.0008) +[2026-06-07 02:52:47,416][472559] Updated weights for policy 0, policy_version 16355 (0.0008) +[2026-06-07 02:52:47,534][472559] Updated weights for policy 0, policy_version 16365 (0.0008) +[2026-06-07 02:52:47,644][472559] Updated weights for policy 0, policy_version 16375 (0.0008) +[2026-06-07 02:52:48,117][464927] Fps is (10 sec: 29491.2, 60 sec: 28399.0, 300 sec: 27769.5). Total num frames: 8388608. Throughput: 0: 28456.1. Samples: 8364928. Policy #0 lag: (min: 63.0, avg: 74.8, max: 127.0) +[2026-06-07 02:52:48,118][464927] Avg episode reward: [(0, '425.653')] +[2026-06-07 02:52:48,199][472559] Updated weights for policy 0, policy_version 16386 (0.0008) +[2026-06-07 02:52:48,333][472559] Updated weights for policy 0, policy_version 16398 (0.0008) +[2026-06-07 02:52:48,450][472559] Updated weights for policy 0, policy_version 16408 (0.0008) +[2026-06-07 02:52:48,568][472559] Updated weights for policy 0, policy_version 16419 (0.0008) +[2026-06-07 02:52:48,714][472559] Updated weights for policy 0, policy_version 16432 (0.0008) +[2026-06-07 02:52:48,851][472559] Updated weights for policy 0, policy_version 16444 (0.0008) +[2026-06-07 02:52:49,400][472559] Updated weights for policy 0, policy_version 16454 (0.0008) +[2026-06-07 02:52:49,553][472559] Updated weights for policy 0, policy_version 16468 (0.0008) +[2026-06-07 02:52:49,662][472559] Updated weights for policy 0, policy_version 16478 (0.0008) +[2026-06-07 02:52:49,776][472559] Updated weights for policy 0, policy_version 16488 (0.0009) +[2026-06-07 02:52:49,898][472559] Updated weights for policy 0, policy_version 16499 (0.0009) +[2026-06-07 02:52:50,026][472559] Updated weights for policy 0, policy_version 16510 (0.0008) +[2026-06-07 02:52:50,581][472559] Updated weights for policy 0, policy_version 16521 (0.0008) +[2026-06-07 02:52:50,700][472559] Updated weights for policy 0, policy_version 16531 (0.0008) +[2026-06-07 02:52:50,821][472559] Updated weights for policy 0, policy_version 16542 (0.0008) +[2026-06-07 02:52:50,945][472559] Updated weights for policy 0, policy_version 16553 (0.0008) +[2026-06-07 02:52:51,062][472559] Updated weights for policy 0, policy_version 16563 (0.0008) +[2026-06-07 02:52:51,187][472559] Updated weights for policy 0, policy_version 16574 (0.0008) +[2026-06-07 02:52:51,742][472559] Updated weights for policy 0, policy_version 16586 (0.0008) +[2026-06-07 02:52:51,867][472559] Updated weights for policy 0, policy_version 16597 (0.0008) +[2026-06-07 02:52:51,990][472559] Updated weights for policy 0, policy_version 16608 (0.0008) +[2026-06-07 02:52:52,103][472559] Updated weights for policy 0, policy_version 16618 (0.0008) +[2026-06-07 02:52:52,221][472559] Updated weights for policy 0, policy_version 16628 (0.0008) +[2026-06-07 02:52:52,348][472559] Updated weights for policy 0, policy_version 16639 (0.0009) +[2026-06-07 02:52:52,896][472559] Updated weights for policy 0, policy_version 16649 (0.0008) +[2026-06-07 02:52:53,019][472559] Updated weights for policy 0, policy_version 16659 (0.0008) +[2026-06-07 02:52:53,117][464927] Fps is (10 sec: 29491.9, 60 sec: 28399.0, 300 sec: 27658.4). Total num frames: 8519680. Throughput: 0: 28450.2. Samples: 8531328. Policy #0 lag: (min: 63.0, avg: 74.8, max: 127.0) +[2026-06-07 02:52:53,117][464927] Avg episode reward: [(0, '469.876')] +[2026-06-07 02:52:53,136][472559] Updated weights for policy 0, policy_version 16670 (0.0008) +[2026-06-07 02:52:53,251][472559] Updated weights for policy 0, policy_version 16680 (0.0008) +[2026-06-07 02:52:53,376][472559] Updated weights for policy 0, policy_version 16691 (0.0008) +[2026-06-07 02:52:53,499][472559] Updated weights for policy 0, policy_version 16701 (0.0009) +[2026-06-07 02:52:53,529][472025] Saving new best policy, reward=469.876! +[2026-06-07 02:52:54,018][472559] Updated weights for policy 0, policy_version 16711 (0.0008) +[2026-06-07 02:52:54,131][472559] Updated weights for policy 0, policy_version 16721 (0.0008) +[2026-06-07 02:52:54,276][472559] Updated weights for policy 0, policy_version 16734 (0.0008) +[2026-06-07 02:52:54,391][472559] Updated weights for policy 0, policy_version 16744 (0.0009) +[2026-06-07 02:52:54,508][472559] Updated weights for policy 0, policy_version 16754 (0.0008) +[2026-06-07 02:52:54,619][472559] Updated weights for policy 0, policy_version 16764 (0.0008) +[2026-06-07 02:52:55,198][472559] Updated weights for policy 0, policy_version 16777 (0.0008) +[2026-06-07 02:52:55,319][472559] Updated weights for policy 0, policy_version 16788 (0.0009) +[2026-06-07 02:52:55,452][472559] Updated weights for policy 0, policy_version 16799 (0.0008) +[2026-06-07 02:52:55,579][472559] Updated weights for policy 0, policy_version 16810 (0.0008) +[2026-06-07 02:52:55,691][472559] Updated weights for policy 0, policy_version 16820 (0.0008) +[2026-06-07 02:52:55,804][472559] Updated weights for policy 0, policy_version 16830 (0.0008) +[2026-06-07 02:52:56,358][472559] Updated weights for policy 0, policy_version 16840 (0.0007) +[2026-06-07 02:52:56,485][472559] Updated weights for policy 0, policy_version 16851 (0.0008) +[2026-06-07 02:52:56,595][472559] Updated weights for policy 0, policy_version 16861 (0.0008) +[2026-06-07 02:52:56,704][472559] Updated weights for policy 0, policy_version 16871 (0.0008) +[2026-06-07 02:52:56,818][472559] Updated weights for policy 0, policy_version 16881 (0.0008) +[2026-06-07 02:52:56,940][472559] Updated weights for policy 0, policy_version 16892 (0.0008) +[2026-06-07 02:52:57,496][472559] Updated weights for policy 0, policy_version 16902 (0.0007) +[2026-06-07 02:52:57,617][472559] Updated weights for policy 0, policy_version 16913 (0.0008) +[2026-06-07 02:52:57,730][472559] Updated weights for policy 0, policy_version 16923 (0.0008) +[2026-06-07 02:52:57,844][472559] Updated weights for policy 0, policy_version 16933 (0.0010) +[2026-06-07 02:52:57,954][472559] Updated weights for policy 0, policy_version 16943 (0.0008) +[2026-06-07 02:52:58,083][472559] Updated weights for policy 0, policy_version 16954 (0.0009) +[2026-06-07 02:52:58,117][464927] Fps is (10 sec: 26214.3, 60 sec: 27852.8, 300 sec: 27658.4). Total num frames: 8650752. Throughput: 0: 28407.5. Samples: 8699648. Policy #0 lag: (min: 14.0, avg: 25.6, max: 78.0) +[2026-06-07 02:52:58,118][464927] Avg episode reward: [(0, '454.870')] +[2026-06-07 02:52:58,635][472559] Updated weights for policy 0, policy_version 16965 (0.0007) +[2026-06-07 02:52:58,741][472559] Updated weights for policy 0, policy_version 16975 (0.0005) +[2026-06-07 02:52:58,873][472559] Updated weights for policy 0, policy_version 16986 (0.0005) +[2026-06-07 02:52:58,999][472559] Updated weights for policy 0, policy_version 16997 (0.0005) +[2026-06-07 02:52:59,131][472559] Updated weights for policy 0, policy_version 17009 (0.0005) +[2026-06-07 02:52:59,271][472559] Updated weights for policy 0, policy_version 17021 (0.0004) +[2026-06-07 02:52:59,790][472559] Updated weights for policy 0, policy_version 17031 (0.0004) +[2026-06-07 02:52:59,901][472559] Updated weights for policy 0, policy_version 17041 (0.0004) +[2026-06-07 02:53:00,017][472559] Updated weights for policy 0, policy_version 17051 (0.0004) +[2026-06-07 02:53:00,139][472559] Updated weights for policy 0, policy_version 17062 (0.0004) +[2026-06-07 02:53:00,260][472559] Updated weights for policy 0, policy_version 17072 (0.0004) +[2026-06-07 02:53:00,383][472559] Updated weights for policy 0, policy_version 17083 (0.0004) +[2026-06-07 02:53:00,913][472559] Updated weights for policy 0, policy_version 17094 (0.0006) +[2026-06-07 02:53:01,031][472559] Updated weights for policy 0, policy_version 17104 (0.0008) +[2026-06-07 02:53:01,150][472559] Updated weights for policy 0, policy_version 17115 (0.0008) +[2026-06-07 02:53:01,276][472559] Updated weights for policy 0, policy_version 17126 (0.0008) +[2026-06-07 02:53:01,406][472559] Updated weights for policy 0, policy_version 17137 (0.0008) +[2026-06-07 02:53:01,531][472559] Updated weights for policy 0, policy_version 17148 (0.0008) +[2026-06-07 02:53:02,105][472559] Updated weights for policy 0, policy_version 17160 (0.0008) +[2026-06-07 02:53:02,224][472559] Updated weights for policy 0, policy_version 17171 (0.0009) +[2026-06-07 02:53:02,334][472559] Updated weights for policy 0, policy_version 17181 (0.0008) +[2026-06-07 02:53:02,460][472559] Updated weights for policy 0, policy_version 17192 (0.0009) +[2026-06-07 02:53:02,581][472559] Updated weights for policy 0, policy_version 17203 (0.0009) +[2026-06-07 02:53:02,694][472559] Updated weights for policy 0, policy_version 17213 (0.0010) +[2026-06-07 02:53:03,117][464927] Fps is (10 sec: 29490.7, 60 sec: 28399.0, 300 sec: 27769.5). Total num frames: 8814592. Throughput: 0: 28384.6. Samples: 8790656. Policy #0 lag: (min: 14.0, avg: 25.6, max: 78.0) +[2026-06-07 02:53:03,118][464927] Avg episode reward: [(0, '464.101')] +[2026-06-07 02:53:03,233][472559] Updated weights for policy 0, policy_version 17224 (0.0008) +[2026-06-07 02:53:03,380][472559] Updated weights for policy 0, policy_version 17237 (0.0007) +[2026-06-07 02:53:03,506][472559] Updated weights for policy 0, policy_version 17248 (0.0009) +[2026-06-07 02:53:03,617][472559] Updated weights for policy 0, policy_version 17258 (0.0009) +[2026-06-07 02:53:03,743][472559] Updated weights for policy 0, policy_version 17269 (0.0010) +[2026-06-07 02:53:03,854][472559] Updated weights for policy 0, policy_version 17279 (0.0009) +[2026-06-07 02:53:04,412][472559] Updated weights for policy 0, policy_version 17290 (0.0006) +[2026-06-07 02:53:04,519][472559] Updated weights for policy 0, policy_version 17300 (0.0007) +[2026-06-07 02:53:04,644][472559] Updated weights for policy 0, policy_version 17311 (0.0008) +[2026-06-07 02:53:04,758][472559] Updated weights for policy 0, policy_version 17321 (0.0009) +[2026-06-07 02:53:04,879][472559] Updated weights for policy 0, policy_version 17332 (0.0009) +[2026-06-07 02:53:05,461][472559] Updated weights for policy 0, policy_version 17345 (0.0008) +[2026-06-07 02:53:05,579][472559] Updated weights for policy 0, policy_version 17355 (0.0008) +[2026-06-07 02:53:05,700][472559] Updated weights for policy 0, policy_version 17366 (0.0008) +[2026-06-07 02:53:05,830][472559] Updated weights for policy 0, policy_version 17378 (0.0008) +[2026-06-07 02:53:05,958][472559] Updated weights for policy 0, policy_version 17389 (0.0008) +[2026-06-07 02:53:06,070][472559] Updated weights for policy 0, policy_version 17399 (0.0008) +[2026-06-07 02:53:06,628][472559] Updated weights for policy 0, policy_version 17410 (0.0008) +[2026-06-07 02:53:06,755][472559] Updated weights for policy 0, policy_version 17421 (0.0008) +[2026-06-07 02:53:06,884][472559] Updated weights for policy 0, policy_version 17433 (0.0008) +[2026-06-07 02:53:07,032][472559] Updated weights for policy 0, policy_version 17446 (0.0008) +[2026-06-07 02:53:07,159][472559] Updated weights for policy 0, policy_version 17457 (0.0007) +[2026-06-07 02:53:07,276][472559] Updated weights for policy 0, policy_version 17467 (0.0004) +[2026-06-07 02:53:07,834][472559] Updated weights for policy 0, policy_version 17477 (0.0005) +[2026-06-07 02:53:07,954][472559] Updated weights for policy 0, policy_version 17488 (0.0009) +[2026-06-07 02:53:08,068][472559] Updated weights for policy 0, policy_version 17498 (0.0008) +[2026-06-07 02:53:08,117][464927] Fps is (10 sec: 29491.0, 60 sec: 28398.9, 300 sec: 27769.5). Total num frames: 8945664. Throughput: 0: 28356.2. Samples: 8957184. Policy #0 lag: (min: 63.0, avg: 75.4, max: 127.0) +[2026-06-07 02:53:08,118][464927] Avg episode reward: [(0, '455.997')] +[2026-06-07 02:53:08,190][472559] Updated weights for policy 0, policy_version 17509 (0.0008) +[2026-06-07 02:53:08,325][472559] Updated weights for policy 0, policy_version 17521 (0.0009) +[2026-06-07 02:53:08,443][472559] Updated weights for policy 0, policy_version 17531 (0.0009) +[2026-06-07 02:53:08,993][472559] Updated weights for policy 0, policy_version 17542 (0.0008) +[2026-06-07 02:53:09,100][472559] Updated weights for policy 0, policy_version 17552 (0.0008) +[2026-06-07 02:53:09,225][472559] Updated weights for policy 0, policy_version 17563 (0.0006) +[2026-06-07 02:53:09,348][472559] Updated weights for policy 0, policy_version 17574 (0.0009) +[2026-06-07 02:53:09,494][472559] Updated weights for policy 0, policy_version 17587 (0.0008) +[2026-06-07 02:53:09,623][472559] Updated weights for policy 0, policy_version 17598 (0.0008) +[2026-06-07 02:53:10,169][472559] Updated weights for policy 0, policy_version 17608 (0.0006) +[2026-06-07 02:53:10,303][472559] Updated weights for policy 0, policy_version 17620 (0.0006) +[2026-06-07 02:53:10,423][472559] Updated weights for policy 0, policy_version 17631 (0.0009) +[2026-06-07 02:53:10,537][472559] Updated weights for policy 0, policy_version 17641 (0.0008) +[2026-06-07 02:53:10,658][472559] Updated weights for policy 0, policy_version 17652 (0.0008) +[2026-06-07 02:53:10,769][472559] Updated weights for policy 0, policy_version 17662 (0.0009) +[2026-06-07 02:53:11,318][472559] Updated weights for policy 0, policy_version 17672 (0.0008) +[2026-06-07 02:53:11,434][472559] Updated weights for policy 0, policy_version 17682 (0.0009) +[2026-06-07 02:53:11,545][472559] Updated weights for policy 0, policy_version 17692 (0.0009) +[2026-06-07 02:53:11,667][472559] Updated weights for policy 0, policy_version 17703 (0.0008) +[2026-06-07 02:53:11,793][472559] Updated weights for policy 0, policy_version 17714 (0.0008) +[2026-06-07 02:53:11,920][472559] Updated weights for policy 0, policy_version 17725 (0.0008) +[2026-06-07 02:53:12,496][472559] Updated weights for policy 0, policy_version 17737 (0.0008) +[2026-06-07 02:53:12,638][472559] Updated weights for policy 0, policy_version 17749 (0.0009) +[2026-06-07 02:53:12,759][472559] Updated weights for policy 0, policy_version 17760 (0.0008) +[2026-06-07 02:53:12,885][472559] Updated weights for policy 0, policy_version 17771 (0.0008) +[2026-06-07 02:53:12,997][472559] Updated weights for policy 0, policy_version 17781 (0.0008) +[2026-06-07 02:53:13,117][464927] Fps is (10 sec: 29491.5, 60 sec: 28398.9, 300 sec: 27880.6). Total num frames: 9109504. Throughput: 0: 28367.7. Samples: 9124992. Policy #0 lag: (min: 63.0, avg: 75.4, max: 127.0) +[2026-06-07 02:53:13,118][464927] Avg episode reward: [(0, '446.453')] +[2026-06-07 02:53:13,571][472559] Updated weights for policy 0, policy_version 17793 (0.0008) +[2026-06-07 02:53:13,685][472559] Updated weights for policy 0, policy_version 17803 (0.0008) +[2026-06-07 02:53:13,791][472559] Updated weights for policy 0, policy_version 17813 (0.0009) +[2026-06-07 02:53:13,908][472559] Updated weights for policy 0, policy_version 17823 (0.0009) +[2026-06-07 02:53:14,021][472559] Updated weights for policy 0, policy_version 17833 (0.0010) +[2026-06-07 02:53:14,139][472559] Updated weights for policy 0, policy_version 17844 (0.0008) +[2026-06-07 02:53:14,261][472559] Updated weights for policy 0, policy_version 17855 (0.0008) +[2026-06-07 02:53:14,829][472559] Updated weights for policy 0, policy_version 17867 (0.0009) +[2026-06-07 02:53:14,946][472559] Updated weights for policy 0, policy_version 17877 (0.0008) +[2026-06-07 02:53:15,055][472559] Updated weights for policy 0, policy_version 17887 (0.0008) +[2026-06-07 02:53:15,192][472559] Updated weights for policy 0, policy_version 17899 (0.0008) +[2026-06-07 02:53:15,318][472559] Updated weights for policy 0, policy_version 17910 (0.0009) +[2026-06-07 02:53:15,436][472559] Updated weights for policy 0, policy_version 17920 (0.0008) +[2026-06-07 02:53:15,981][472559] Updated weights for policy 0, policy_version 17930 (0.0008) +[2026-06-07 02:53:16,115][472559] Updated weights for policy 0, policy_version 17941 (0.0008) +[2026-06-07 02:53:16,230][472559] Updated weights for policy 0, policy_version 17952 (0.0009) +[2026-06-07 02:53:16,367][472559] Updated weights for policy 0, policy_version 17964 (0.0009) +[2026-06-07 02:53:16,493][472559] Updated weights for policy 0, policy_version 17975 (0.0009) +[2026-06-07 02:53:17,035][472559] Updated weights for policy 0, policy_version 17985 (0.0007) +[2026-06-07 02:53:17,157][472559] Updated weights for policy 0, policy_version 17996 (0.0008) +[2026-06-07 02:53:17,304][472559] Updated weights for policy 0, policy_version 18009 (0.0009) +[2026-06-07 02:53:17,431][472559] Updated weights for policy 0, policy_version 18020 (0.0009) +[2026-06-07 02:53:17,555][472559] Updated weights for policy 0, policy_version 18031 (0.0009) +[2026-06-07 02:53:17,680][472559] Updated weights for policy 0, policy_version 18042 (0.0009) +[2026-06-07 02:53:18,117][464927] Fps is (10 sec: 29491.5, 60 sec: 28398.9, 300 sec: 27880.6). Total num frames: 9240576. Throughput: 0: 28339.2. Samples: 9215616. Policy #0 lag: (min: 63.0, avg: 75.4, max: 127.0) +[2026-06-07 02:53:18,117][464927] Avg episode reward: [(0, '479.782')] +[2026-06-07 02:53:18,122][472025] Saving new best policy, reward=479.782! +[2026-06-07 02:53:18,259][472559] Updated weights for policy 0, policy_version 18053 (0.0007) +[2026-06-07 02:53:18,375][472559] Updated weights for policy 0, policy_version 18063 (0.0008) +[2026-06-07 02:53:18,497][472559] Updated weights for policy 0, policy_version 18074 (0.0009) +[2026-06-07 02:53:18,617][472559] Updated weights for policy 0, policy_version 18085 (0.0007) +[2026-06-07 02:53:18,758][472559] Updated weights for policy 0, policy_version 18097 (0.0007) +[2026-06-07 02:53:18,874][472559] Updated weights for policy 0, policy_version 18107 (0.0008) +[2026-06-07 02:53:19,395][472559] Updated weights for policy 0, policy_version 18117 (0.0008) +[2026-06-07 02:53:19,527][472559] Updated weights for policy 0, policy_version 18128 (0.0008) +[2026-06-07 02:53:19,649][472559] Updated weights for policy 0, policy_version 18139 (0.0008) +[2026-06-07 02:53:19,781][472559] Updated weights for policy 0, policy_version 18150 (0.0008) +[2026-06-07 02:53:19,891][472559] Updated weights for policy 0, policy_version 18160 (0.0008) +[2026-06-07 02:53:20,007][472559] Updated weights for policy 0, policy_version 18170 (0.0008) +[2026-06-07 02:53:20,542][472559] Updated weights for policy 0, policy_version 18181 (0.0008) +[2026-06-07 02:53:20,666][472559] Updated weights for policy 0, policy_version 18192 (0.0009) +[2026-06-07 02:53:20,791][472559] Updated weights for policy 0, policy_version 18203 (0.0009) +[2026-06-07 02:53:20,909][472559] Updated weights for policy 0, policy_version 18213 (0.0008) +[2026-06-07 02:53:21,031][472559] Updated weights for policy 0, policy_version 18224 (0.0009) +[2026-06-07 02:53:21,162][472559] Updated weights for policy 0, policy_version 18235 (0.0009) +[2026-06-07 02:53:21,700][472559] Updated weights for policy 0, policy_version 18245 (0.0008) +[2026-06-07 02:53:21,811][472559] Updated weights for policy 0, policy_version 18255 (0.0008) +[2026-06-07 02:53:21,943][472559] Updated weights for policy 0, policy_version 18266 (0.0008) +[2026-06-07 02:53:22,052][472559] Updated weights for policy 0, policy_version 18276 (0.0008) +[2026-06-07 02:53:22,168][472559] Updated weights for policy 0, policy_version 18286 (0.0009) +[2026-06-07 02:53:22,290][472559] Updated weights for policy 0, policy_version 18297 (0.0009) +[2026-06-07 02:53:22,837][472559] Updated weights for policy 0, policy_version 18307 (0.0008) +[2026-06-07 02:53:22,961][472559] Updated weights for policy 0, policy_version 18318 (0.0008) +[2026-06-07 02:53:23,086][472559] Updated weights for policy 0, policy_version 18329 (0.0009) +[2026-06-07 02:53:23,117][464927] Fps is (10 sec: 26214.4, 60 sec: 28398.9, 300 sec: 27991.6). Total num frames: 9371648. Throughput: 0: 28361.9. Samples: 9382912. Policy #0 lag: (min: 14.0, avg: 29.9, max: 78.0) +[2026-06-07 02:53:23,117][464927] Avg episode reward: [(0, '506.905')] +[2026-06-07 02:53:23,196][472559] Updated weights for policy 0, policy_version 18339 (0.0008) +[2026-06-07 02:53:23,312][472559] Updated weights for policy 0, policy_version 18349 (0.0009) +[2026-06-07 02:53:23,435][472559] Updated weights for policy 0, policy_version 18360 (0.0008) +[2026-06-07 02:53:23,524][472025] Saving new best policy, reward=506.905! +[2026-06-07 02:53:24,006][472559] Updated weights for policy 0, policy_version 18371 (0.0008) +[2026-06-07 02:53:24,123][472559] Updated weights for policy 0, policy_version 18382 (0.0009) +[2026-06-07 02:53:24,239][472559] Updated weights for policy 0, policy_version 18392 (0.0008) +[2026-06-07 02:53:24,353][472559] Updated weights for policy 0, policy_version 18402 (0.0009) +[2026-06-07 02:53:24,475][472559] Updated weights for policy 0, policy_version 18413 (0.0008) +[2026-06-07 02:53:24,594][472559] Updated weights for policy 0, policy_version 18423 (0.0009) +[2026-06-07 02:53:25,146][472559] Updated weights for policy 0, policy_version 18433 (0.0008) +[2026-06-07 02:53:25,268][472559] Updated weights for policy 0, policy_version 18444 (0.0008) +[2026-06-07 02:53:25,382][472559] Updated weights for policy 0, policy_version 18454 (0.0008) +[2026-06-07 02:53:25,507][472559] Updated weights for policy 0, policy_version 18465 (0.0009) +[2026-06-07 02:53:25,622][472559] Updated weights for policy 0, policy_version 18475 (0.0008) +[2026-06-07 02:53:25,747][472559] Updated weights for policy 0, policy_version 18486 (0.0008) +[2026-06-07 02:53:25,860][472559] Updated weights for policy 0, policy_version 18496 (0.0009) +[2026-06-07 02:53:26,401][472559] Updated weights for policy 0, policy_version 18507 (0.0007) +[2026-06-07 02:53:26,529][472559] Updated weights for policy 0, policy_version 18518 (0.0008) +[2026-06-07 02:53:26,641][472559] Updated weights for policy 0, policy_version 18528 (0.0008) +[2026-06-07 02:53:26,753][472559] Updated weights for policy 0, policy_version 18538 (0.0008) +[2026-06-07 02:53:26,875][472559] Updated weights for policy 0, policy_version 18549 (0.0008) +[2026-06-07 02:53:26,994][472559] Updated weights for policy 0, policy_version 18559 (0.0008) +[2026-06-07 02:53:27,541][472559] Updated weights for policy 0, policy_version 18569 (0.0008) +[2026-06-07 02:53:27,662][472559] Updated weights for policy 0, policy_version 18580 (0.0008) +[2026-06-07 02:53:27,808][472559] Updated weights for policy 0, policy_version 18593 (0.0008) +[2026-06-07 02:53:27,919][472559] Updated weights for policy 0, policy_version 18603 (0.0008) +[2026-06-07 02:53:28,050][472559] Updated weights for policy 0, policy_version 18614 (0.0009) +[2026-06-07 02:53:28,117][464927] Fps is (10 sec: 26214.3, 60 sec: 27852.8, 300 sec: 27991.6). Total num frames: 9502720. Throughput: 0: 28379.1. Samples: 9551104. Policy #0 lag: (min: 14.0, avg: 29.9, max: 78.0) +[2026-06-07 02:53:28,117][464927] Avg episode reward: [(0, '510.309')] +[2026-06-07 02:53:28,161][472025] Saving new best policy, reward=510.309! +[2026-06-07 02:53:28,608][472559] Updated weights for policy 0, policy_version 18625 (0.0008) +[2026-06-07 02:53:28,740][472559] Updated weights for policy 0, policy_version 18637 (0.0008) +[2026-06-07 02:53:28,873][472559] Updated weights for policy 0, policy_version 18649 (0.0009) +[2026-06-07 02:53:28,998][472559] Updated weights for policy 0, policy_version 18660 (0.0009) +[2026-06-07 02:53:29,111][472559] Updated weights for policy 0, policy_version 18670 (0.0009) +[2026-06-07 02:53:29,226][472559] Updated weights for policy 0, policy_version 18680 (0.0009) +[2026-06-07 02:53:29,784][472559] Updated weights for policy 0, policy_version 18691 (0.0008) +[2026-06-07 02:53:29,903][472559] Updated weights for policy 0, policy_version 18702 (0.0008) +[2026-06-07 02:53:30,032][472559] Updated weights for policy 0, policy_version 18713 (0.0009) +[2026-06-07 02:53:30,149][472559] Updated weights for policy 0, policy_version 18724 (0.0008) +[2026-06-07 02:53:30,269][472559] Updated weights for policy 0, policy_version 18734 (0.0008) +[2026-06-07 02:53:30,386][472559] Updated weights for policy 0, policy_version 18744 (0.0009) +[2026-06-07 02:53:30,943][472559] Updated weights for policy 0, policy_version 18755 (0.0008) +[2026-06-07 02:53:31,057][472559] Updated weights for policy 0, policy_version 18765 (0.0008) +[2026-06-07 02:53:31,183][472559] Updated weights for policy 0, policy_version 18776 (0.0009) +[2026-06-07 02:53:31,301][472559] Updated weights for policy 0, policy_version 18787 (0.0009) +[2026-06-07 02:53:31,430][472559] Updated weights for policy 0, policy_version 18798 (0.0009) +[2026-06-07 02:53:31,541][472559] Updated weights for policy 0, policy_version 18808 (0.0009) +[2026-06-07 02:53:32,119][472559] Updated weights for policy 0, policy_version 18820 (0.0008) +[2026-06-07 02:53:32,258][472559] Updated weights for policy 0, policy_version 18833 (0.0009) +[2026-06-07 02:53:32,399][472559] Updated weights for policy 0, policy_version 18846 (0.0009) +[2026-06-07 02:53:32,524][472559] Updated weights for policy 0, policy_version 18857 (0.0009) +[2026-06-07 02:53:32,656][472559] Updated weights for policy 0, policy_version 18869 (0.0009) +[2026-06-07 02:53:32,782][472559] Updated weights for policy 0, policy_version 18880 (0.0009) +[2026-06-07 02:53:33,117][464927] Fps is (10 sec: 29491.3, 60 sec: 28398.9, 300 sec: 28102.7). Total num frames: 9666560. Throughput: 0: 28364.8. Samples: 9641344. Policy #0 lag: (min: 14.0, avg: 29.9, max: 78.0) +[2026-06-07 02:53:33,117][464927] Avg episode reward: [(0, '549.992')] +[2026-06-07 02:53:33,122][472025] Saving new best policy, reward=549.992! +[2026-06-07 02:53:33,363][472559] Updated weights for policy 0, policy_version 18891 (0.0008) +[2026-06-07 02:53:33,480][472559] Updated weights for policy 0, policy_version 18901 (0.0009) +[2026-06-07 02:53:33,588][472559] Updated weights for policy 0, policy_version 18911 (0.0009) +[2026-06-07 02:53:33,702][472559] Updated weights for policy 0, policy_version 18921 (0.0009) +[2026-06-07 02:53:33,846][472559] Updated weights for policy 0, policy_version 18934 (0.0009) +[2026-06-07 02:53:33,956][472559] Updated weights for policy 0, policy_version 18944 (0.0009) +[2026-06-07 02:53:34,501][472559] Updated weights for policy 0, policy_version 18954 (0.0008) +[2026-06-07 02:53:34,619][472559] Updated weights for policy 0, policy_version 18965 (0.0009) +[2026-06-07 02:53:34,772][472559] Updated weights for policy 0, policy_version 18979 (0.0009) +[2026-06-07 02:53:34,880][472559] Updated weights for policy 0, policy_version 18989 (0.0008) +[2026-06-07 02:53:34,996][472559] Updated weights for policy 0, policy_version 18999 (0.0009) +[2026-06-07 02:53:35,558][472559] Updated weights for policy 0, policy_version 19009 (0.0008) +[2026-06-07 02:53:35,684][472559] Updated weights for policy 0, policy_version 19021 (0.0008) +[2026-06-07 02:53:35,810][472559] Updated weights for policy 0, policy_version 19032 (0.0008) +[2026-06-07 02:53:35,931][472559] Updated weights for policy 0, policy_version 19043 (0.0009) +[2026-06-07 02:53:36,047][472559] Updated weights for policy 0, policy_version 19054 (0.0009) +[2026-06-07 02:53:36,169][472559] Updated weights for policy 0, policy_version 19065 (0.0009) +[2026-06-07 02:53:36,754][472559] Updated weights for policy 0, policy_version 19077 (0.0008) +[2026-06-07 02:53:36,871][472559] Updated weights for policy 0, policy_version 19088 (0.0009) +[2026-06-07 02:53:36,978][472559] Updated weights for policy 0, policy_version 19098 (0.0009) +[2026-06-07 02:53:37,120][472559] Updated weights for policy 0, policy_version 19111 (0.0009) +[2026-06-07 02:53:37,236][472559] Updated weights for policy 0, policy_version 19122 (0.0009) +[2026-06-07 02:53:37,373][472559] Updated weights for policy 0, policy_version 19134 (0.0009) +[2026-06-07 02:53:37,947][472559] Updated weights for policy 0, policy_version 19146 (0.0008) +[2026-06-07 02:53:38,078][472559] Updated weights for policy 0, policy_version 19158 (0.0009) +[2026-06-07 02:53:38,117][464927] Fps is (10 sec: 29491.3, 60 sec: 28398.9, 300 sec: 28102.7). Total num frames: 9797632. Throughput: 0: 28393.2. Samples: 9809024. Policy #0 lag: (min: 17.0, avg: 48.1, max: 81.0) +[2026-06-07 02:53:38,117][464927] Avg episode reward: [(0, '512.124')] +[2026-06-07 02:53:38,198][472559] Updated weights for policy 0, policy_version 19169 (0.0009) +[2026-06-07 02:53:38,322][472559] Updated weights for policy 0, policy_version 19180 (0.0009) +[2026-06-07 02:53:38,450][472559] Updated weights for policy 0, policy_version 19192 (0.0009) +[2026-06-07 02:53:39,012][472559] Updated weights for policy 0, policy_version 19202 (0.0008) +[2026-06-07 02:53:39,118][472559] Updated weights for policy 0, policy_version 19212 (0.0009) +[2026-06-07 02:53:39,238][472559] Updated weights for policy 0, policy_version 19223 (0.0009) +[2026-06-07 02:53:39,347][472559] Updated weights for policy 0, policy_version 19233 (0.0009) +[2026-06-07 02:53:39,456][472559] Updated weights for policy 0, policy_version 19243 (0.0009) +[2026-06-07 02:53:39,587][472559] Updated weights for policy 0, policy_version 19255 (0.0009) +[2026-06-07 02:53:40,157][472559] Updated weights for policy 0, policy_version 19266 (0.0009) +[2026-06-07 02:53:40,274][472559] Updated weights for policy 0, policy_version 19277 (0.0008) +[2026-06-07 02:53:40,402][472559] Updated weights for policy 0, policy_version 19289 (0.0009) +[2026-06-07 02:53:40,517][472559] Updated weights for policy 0, policy_version 19299 (0.0009) +[2026-06-07 02:53:40,647][472559] Updated weights for policy 0, policy_version 19311 (0.0009) +[2026-06-07 02:53:40,765][472559] Updated weights for policy 0, policy_version 19322 (0.0009) +[2026-06-07 02:53:41,327][472559] Updated weights for policy 0, policy_version 19332 (0.0009) +[2026-06-07 02:53:41,454][472559] Updated weights for policy 0, policy_version 19344 (0.0009) +[2026-06-07 02:53:41,583][472559] Updated weights for policy 0, policy_version 19356 (0.0009) +[2026-06-07 02:53:41,723][472559] Updated weights for policy 0, policy_version 19368 (0.0009) +[2026-06-07 02:53:41,843][472559] Updated weights for policy 0, policy_version 19379 (0.0009) +[2026-06-07 02:53:41,970][472559] Updated weights for policy 0, policy_version 19391 (0.0009) +[2026-06-07 02:53:42,543][472559] Updated weights for policy 0, policy_version 19402 (0.0007) +[2026-06-07 02:53:42,654][472559] Updated weights for policy 0, policy_version 19412 (0.0013) +[2026-06-07 02:53:42,773][472559] Updated weights for policy 0, policy_version 19423 (0.0008) +[2026-06-07 02:53:42,900][472559] Updated weights for policy 0, policy_version 19434 (0.0008) +[2026-06-07 02:53:43,015][472559] Updated weights for policy 0, policy_version 19444 (0.0006) +[2026-06-07 02:53:43,117][464927] Fps is (10 sec: 26214.4, 60 sec: 28399.0, 300 sec: 28102.7). Total num frames: 9928704. Throughput: 0: 28396.1. Samples: 9977472. Policy #0 lag: (min: 17.0, avg: 48.1, max: 81.0) +[2026-06-07 02:53:43,118][464927] Avg episode reward: [(0, '502.848')] +[2026-06-07 02:53:43,132][472559] Updated weights for policy 0, policy_version 19454 (0.0008) +[2026-06-07 02:53:43,949][472559] Updated weights for policy 0, policy_version 19467 (0.0010) +[2026-06-07 02:53:44,135][472559] Updated weights for policy 0, policy_version 19483 (0.0006) +[2026-06-07 02:53:44,266][472559] Updated weights for policy 0, policy_version 19495 (0.0006) +[2026-06-07 02:53:44,379][472559] Updated weights for policy 0, policy_version 19505 (0.0008) +[2026-06-07 02:53:44,493][472559] Updated weights for policy 0, policy_version 19515 (0.0005) +[2026-06-07 02:53:45,027][472559] Updated weights for policy 0, policy_version 19526 (0.0006) +[2026-06-07 02:53:45,136][472559] Updated weights for policy 0, policy_version 19536 (0.0008) +[2026-06-07 02:53:45,254][472559] Updated weights for policy 0, policy_version 19546 (0.0008) +[2026-06-07 02:53:45,381][472559] Updated weights for policy 0, policy_version 19557 (0.0008) +[2026-06-07 02:53:45,500][472559] Updated weights for policy 0, policy_version 19568 (0.0008) +[2026-06-07 02:53:45,630][472559] Updated weights for policy 0, policy_version 19579 (0.0008) +[2026-06-07 02:53:46,167][472559] Updated weights for policy 0, policy_version 19589 (0.0008) +[2026-06-07 02:53:46,277][472559] Updated weights for policy 0, policy_version 19599 (0.0008) +[2026-06-07 02:53:46,395][472559] Updated weights for policy 0, policy_version 19609 (0.0008) +[2026-06-07 02:53:46,502][472559] Updated weights for policy 0, policy_version 19619 (0.0008) +[2026-06-07 02:53:46,633][472559] Updated weights for policy 0, policy_version 19630 (0.0009) +[2026-06-07 02:53:46,746][472559] Updated weights for policy 0, policy_version 19640 (0.0008) +[2026-06-07 02:53:47,289][472559] Updated weights for policy 0, policy_version 19651 (0.0008) +[2026-06-07 02:53:47,406][472559] Updated weights for policy 0, policy_version 19662 (0.0008) +[2026-06-07 02:53:47,523][472559] Updated weights for policy 0, policy_version 19672 (0.0008) +[2026-06-07 02:53:47,644][472559] Updated weights for policy 0, policy_version 19683 (0.0008) +[2026-06-07 02:53:47,763][472559] Updated weights for policy 0, policy_version 19693 (0.0008) +[2026-06-07 02:53:47,890][472559] Updated weights for policy 0, policy_version 19704 (0.0008) +[2026-06-07 02:53:48,117][464927] Fps is (10 sec: 29491.3, 60 sec: 28398.9, 300 sec: 28213.8). Total num frames: 10092544. Throughput: 0: 28273.9. Samples: 10062976. Policy #0 lag: (min: 49.0, avg: 80.1, max: 113.0) +[2026-06-07 02:53:48,117][464927] Avg episode reward: [(0, '517.817')] +[2026-06-07 02:53:48,444][472559] Updated weights for policy 0, policy_version 19714 (0.0008) +[2026-06-07 02:53:48,564][472559] Updated weights for policy 0, policy_version 19725 (0.0008) +[2026-06-07 02:53:48,679][472559] Updated weights for policy 0, policy_version 19735 (0.0008) +[2026-06-07 02:53:48,809][472559] Updated weights for policy 0, policy_version 19747 (0.0008) +[2026-06-07 02:53:48,926][472559] Updated weights for policy 0, policy_version 19757 (0.0008) +[2026-06-07 02:53:49,049][472559] Updated weights for policy 0, policy_version 19768 (0.0008) +[2026-06-07 02:53:49,598][472559] Updated weights for policy 0, policy_version 19779 (0.0008) +[2026-06-07 02:53:49,726][472559] Updated weights for policy 0, policy_version 19790 (0.0009) +[2026-06-07 02:53:49,849][472559] Updated weights for policy 0, policy_version 19801 (0.0008) +[2026-06-07 02:53:49,976][472559] Updated weights for policy 0, policy_version 19812 (0.0008) +[2026-06-07 02:53:50,115][472559] Updated weights for policy 0, policy_version 19824 (0.0008) +[2026-06-07 02:53:50,247][472559] Updated weights for policy 0, policy_version 19836 (0.0008) +[2026-06-07 02:53:50,794][472559] Updated weights for policy 0, policy_version 19846 (0.0008) +[2026-06-07 02:53:50,918][472559] Updated weights for policy 0, policy_version 19857 (0.0008) +[2026-06-07 02:53:51,033][472559] Updated weights for policy 0, policy_version 19867 (0.0009) +[2026-06-07 02:53:51,141][472559] Updated weights for policy 0, policy_version 19877 (0.0008) +[2026-06-07 02:53:51,271][472559] Updated weights for policy 0, policy_version 19888 (0.0008) +[2026-06-07 02:53:51,383][472559] Updated weights for policy 0, policy_version 19898 (0.0008) +[2026-06-07 02:53:51,962][472559] Updated weights for policy 0, policy_version 19909 (0.0008) +[2026-06-07 02:53:52,073][472559] Updated weights for policy 0, policy_version 19919 (0.0008) +[2026-06-07 02:53:52,203][472559] Updated weights for policy 0, policy_version 19931 (0.0009) +[2026-06-07 02:53:52,327][472559] Updated weights for policy 0, policy_version 19942 (0.0009) +[2026-06-07 02:53:52,454][472559] Updated weights for policy 0, policy_version 19953 (0.0009) +[2026-06-07 02:53:52,578][472559] Updated weights for policy 0, policy_version 19964 (0.0009) +[2026-06-07 02:53:53,117][464927] Fps is (10 sec: 29491.2, 60 sec: 28398.9, 300 sec: 28213.9). Total num frames: 10223616. Throughput: 0: 28322.2. Samples: 10231680. Policy #0 lag: (min: 49.0, avg: 80.1, max: 113.0) +[2026-06-07 02:53:53,117][464927] Avg episode reward: [(0, '530.728')] +[2026-06-07 02:53:53,128][472559] Updated weights for policy 0, policy_version 19975 (0.0008) +[2026-06-07 02:53:53,239][472559] Updated weights for policy 0, policy_version 19985 (0.0008) +[2026-06-07 02:53:53,363][472559] Updated weights for policy 0, policy_version 19996 (0.0009) +[2026-06-07 02:53:53,490][472559] Updated weights for policy 0, policy_version 20007 (0.0009) +[2026-06-07 02:53:53,631][472559] Updated weights for policy 0, policy_version 20020 (0.0009) +[2026-06-07 02:53:53,773][472559] Updated weights for policy 0, policy_version 20032 (0.0009) +[2026-06-07 02:53:54,342][472559] Updated weights for policy 0, policy_version 20043 (0.0008) +[2026-06-07 02:53:54,467][472559] Updated weights for policy 0, policy_version 20054 (0.0008) +[2026-06-07 02:53:54,581][472559] Updated weights for policy 0, policy_version 20064 (0.0009) +[2026-06-07 02:53:54,714][472559] Updated weights for policy 0, policy_version 20076 (0.0009) +[2026-06-07 02:53:54,861][472559] Updated weights for policy 0, policy_version 20089 (0.0009) +[2026-06-07 02:53:55,441][472559] Updated weights for policy 0, policy_version 20100 (0.0008) +[2026-06-07 02:53:55,591][472559] Updated weights for policy 0, policy_version 20114 (0.0008) +[2026-06-07 02:53:55,717][472559] Updated weights for policy 0, policy_version 20125 (0.0008) +[2026-06-07 02:53:55,839][472559] Updated weights for policy 0, policy_version 20136 (0.0009) +[2026-06-07 02:53:55,955][472559] Updated weights for policy 0, policy_version 20146 (0.0008) +[2026-06-07 02:53:56,070][472559] Updated weights for policy 0, policy_version 20156 (0.0008) +[2026-06-07 02:53:56,645][472559] Updated weights for policy 0, policy_version 20168 (0.0007) +[2026-06-07 02:53:56,769][472559] Updated weights for policy 0, policy_version 20179 (0.0006) +[2026-06-07 02:53:56,886][472559] Updated weights for policy 0, policy_version 20189 (0.0008) +[2026-06-07 02:53:57,008][472559] Updated weights for policy 0, policy_version 20200 (0.0009) +[2026-06-07 02:53:57,116][472559] Updated weights for policy 0, policy_version 20210 (0.0008) +[2026-06-07 02:53:57,232][472559] Updated weights for policy 0, policy_version 20220 (0.0008) +[2026-06-07 02:53:57,767][472559] Updated weights for policy 0, policy_version 20230 (0.0007) +[2026-06-07 02:53:57,907][472559] Updated weights for policy 0, policy_version 20242 (0.0008) +[2026-06-07 02:53:58,039][472559] Updated weights for policy 0, policy_version 20254 (0.0008) +[2026-06-07 02:53:58,117][464927] Fps is (10 sec: 26214.4, 60 sec: 28398.9, 300 sec: 28213.8). Total num frames: 10354688. Throughput: 0: 28310.8. Samples: 10398976. Policy #0 lag: (min: 49.0, avg: 80.1, max: 113.0) +[2026-06-07 02:53:58,117][464927] Avg episode reward: [(0, '554.968')] +[2026-06-07 02:53:58,150][472559] Updated weights for policy 0, policy_version 20264 (0.0008) +[2026-06-07 02:53:58,279][472559] Updated weights for policy 0, policy_version 20275 (0.0008) +[2026-06-07 02:53:58,389][472559] Updated weights for policy 0, policy_version 20285 (0.0008) +[2026-06-07 02:53:58,417][472025] Saving new best policy, reward=554.968! +[2026-06-07 02:53:58,949][472559] Updated weights for policy 0, policy_version 20296 (0.0008) +[2026-06-07 02:53:59,059][472559] Updated weights for policy 0, policy_version 20306 (0.0008) +[2026-06-07 02:53:59,173][472559] Updated weights for policy 0, policy_version 20316 (0.0009) +[2026-06-07 02:53:59,298][472559] Updated weights for policy 0, policy_version 20327 (0.0009) +[2026-06-07 02:53:59,422][472559] Updated weights for policy 0, policy_version 20338 (0.0008) +[2026-06-07 02:53:59,534][472559] Updated weights for policy 0, policy_version 20348 (0.0008) +[2026-06-07 02:54:00,083][472559] Updated weights for policy 0, policy_version 20358 (0.0008) +[2026-06-07 02:54:00,198][472559] Updated weights for policy 0, policy_version 20368 (0.0008) +[2026-06-07 02:54:00,310][472559] Updated weights for policy 0, policy_version 20378 (0.0008) +[2026-06-07 02:54:00,422][472559] Updated weights for policy 0, policy_version 20388 (0.0008) +[2026-06-07 02:54:00,535][472559] Updated weights for policy 0, policy_version 20398 (0.0008) +[2026-06-07 02:54:00,664][472559] Updated weights for policy 0, policy_version 20409 (0.0008) +[2026-06-07 02:54:01,234][472559] Updated weights for policy 0, policy_version 20421 (0.0007) +[2026-06-07 02:54:01,373][472559] Updated weights for policy 0, policy_version 20434 (0.0008) +[2026-06-07 02:54:01,504][472559] Updated weights for policy 0, policy_version 20445 (0.0009) +[2026-06-07 02:54:01,620][472559] Updated weights for policy 0, policy_version 20455 (0.0008) +[2026-06-07 02:54:01,740][472559] Updated weights for policy 0, policy_version 20466 (0.0008) +[2026-06-07 02:54:01,859][472559] Updated weights for policy 0, policy_version 20476 (0.0008) +[2026-06-07 02:54:02,417][472559] Updated weights for policy 0, policy_version 20487 (0.0008) +[2026-06-07 02:54:02,529][472559] Updated weights for policy 0, policy_version 20497 (0.0008) +[2026-06-07 02:54:02,665][472559] Updated weights for policy 0, policy_version 20509 (0.0008) +[2026-06-07 02:54:02,808][472559] Updated weights for policy 0, policy_version 20521 (0.0008) +[2026-06-07 02:54:02,921][472559] Updated weights for policy 0, policy_version 20531 (0.0008) +[2026-06-07 02:54:03,043][472559] Updated weights for policy 0, policy_version 20542 (0.0008) +[2026-06-07 02:54:03,117][464927] Fps is (10 sec: 29491.3, 60 sec: 28399.0, 300 sec: 28324.9). Total num frames: 10518528. Throughput: 0: 28239.7. Samples: 10486400. Policy #0 lag: (min: 32.0, avg: 67.2, max: 96.0) +[2026-06-07 02:54:03,117][464927] Avg episode reward: [(0, '558.160')] +[2026-06-07 02:54:03,122][472025] Saving new best policy, reward=558.160! +[2026-06-07 02:54:03,595][472559] Updated weights for policy 0, policy_version 20552 (0.0008) +[2026-06-07 02:54:03,728][472559] Updated weights for policy 0, policy_version 20564 (0.0008) +[2026-06-07 02:54:03,842][472559] Updated weights for policy 0, policy_version 20574 (0.0008) +[2026-06-07 02:54:03,968][472559] Updated weights for policy 0, policy_version 20585 (0.0008) +[2026-06-07 02:54:04,088][472559] Updated weights for policy 0, policy_version 20595 (0.0008) +[2026-06-07 02:54:04,202][472559] Updated weights for policy 0, policy_version 20605 (0.0008) +[2026-06-07 02:54:04,734][472559] Updated weights for policy 0, policy_version 20615 (0.0008) +[2026-06-07 02:54:04,846][472559] Updated weights for policy 0, policy_version 20625 (0.0008) +[2026-06-07 02:54:04,971][472559] Updated weights for policy 0, policy_version 20636 (0.0008) +[2026-06-07 02:54:05,096][472559] Updated weights for policy 0, policy_version 20647 (0.0005) +[2026-06-07 02:54:05,233][472559] Updated weights for policy 0, policy_version 20659 (0.0009) +[2026-06-07 02:54:05,350][472559] Updated weights for policy 0, policy_version 20669 (0.0006) +[2026-06-07 02:54:05,903][472559] Updated weights for policy 0, policy_version 20680 (0.0008) +[2026-06-07 02:54:06,015][472559] Updated weights for policy 0, policy_version 20690 (0.0009) +[2026-06-07 02:54:06,122][472559] Updated weights for policy 0, policy_version 20700 (0.0007) +[2026-06-07 02:54:06,241][472559] Updated weights for policy 0, policy_version 20711 (0.0008) +[2026-06-07 02:54:06,364][472559] Updated weights for policy 0, policy_version 20722 (0.0009) +[2026-06-07 02:54:06,483][472559] Updated weights for policy 0, policy_version 20732 (0.0009) +[2026-06-07 02:54:07,067][472559] Updated weights for policy 0, policy_version 20744 (0.0007) +[2026-06-07 02:54:07,177][472559] Updated weights for policy 0, policy_version 20754 (0.0009) +[2026-06-07 02:54:07,289][472559] Updated weights for policy 0, policy_version 20764 (0.0009) +[2026-06-07 02:54:07,416][472559] Updated weights for policy 0, policy_version 20775 (0.0008) +[2026-06-07 02:54:07,543][472559] Updated weights for policy 0, policy_version 20786 (0.0009) +[2026-06-07 02:54:07,659][472559] Updated weights for policy 0, policy_version 20796 (0.0008) +[2026-06-07 02:54:08,117][464927] Fps is (10 sec: 29491.2, 60 sec: 28399.0, 300 sec: 28213.8). Total num frames: 10649600. Throughput: 0: 28325.0. Samples: 10657536. Policy #0 lag: (min: 32.0, avg: 67.2, max: 96.0) +[2026-06-07 02:54:08,117][464927] Avg episode reward: [(0, '620.033')] +[2026-06-07 02:54:08,218][472559] Updated weights for policy 0, policy_version 20806 (0.0008) +[2026-06-07 02:54:08,326][472559] Updated weights for policy 0, policy_version 20816 (0.0008) +[2026-06-07 02:54:08,434][472559] Updated weights for policy 0, policy_version 20826 (0.0008) +[2026-06-07 02:54:08,551][472559] Updated weights for policy 0, policy_version 20836 (0.0009) +[2026-06-07 02:54:08,663][472559] Updated weights for policy 0, policy_version 20846 (0.0008) +[2026-06-07 02:54:08,791][472559] Updated weights for policy 0, policy_version 20857 (0.0008) +[2026-06-07 02:54:08,867][472025] Saving new best policy, reward=620.033! +[2026-06-07 02:54:09,345][472559] Updated weights for policy 0, policy_version 20867 (0.0008) +[2026-06-07 02:54:09,455][472559] Updated weights for policy 0, policy_version 20877 (0.0008) +[2026-06-07 02:54:09,583][472559] Updated weights for policy 0, policy_version 20888 (0.0008) +[2026-06-07 02:54:09,697][472559] Updated weights for policy 0, policy_version 20898 (0.0008) +[2026-06-07 02:54:09,832][472559] Updated weights for policy 0, policy_version 20910 (0.0009) +[2026-06-07 02:54:09,948][472559] Updated weights for policy 0, policy_version 20920 (0.0008) +[2026-06-07 02:54:10,499][472559] Updated weights for policy 0, policy_version 20930 (0.0008) +[2026-06-07 02:54:10,610][472559] Updated weights for policy 0, policy_version 20940 (0.0008) +[2026-06-07 02:54:10,732][472559] Updated weights for policy 0, policy_version 20951 (0.0008) +[2026-06-07 02:54:10,842][472559] Updated weights for policy 0, policy_version 20961 (0.0008) +[2026-06-07 02:54:10,989][472559] Updated weights for policy 0, policy_version 20973 (0.0009) +[2026-06-07 02:54:11,099][472559] Updated weights for policy 0, policy_version 20983 (0.0008) +[2026-06-07 02:54:11,642][472559] Updated weights for policy 0, policy_version 20993 (0.0008) +[2026-06-07 02:54:11,763][472559] Updated weights for policy 0, policy_version 21004 (0.0008) +[2026-06-07 02:54:11,897][472559] Updated weights for policy 0, policy_version 21016 (0.0008) +[2026-06-07 02:54:12,025][472559] Updated weights for policy 0, policy_version 21027 (0.0008) +[2026-06-07 02:54:12,142][472559] Updated weights for policy 0, policy_version 21037 (0.0008) +[2026-06-07 02:54:12,261][472559] Updated weights for policy 0, policy_version 21047 (0.0008) +[2026-06-07 02:54:12,814][472559] Updated weights for policy 0, policy_version 21058 (0.0008) +[2026-06-07 02:54:12,937][472559] Updated weights for policy 0, policy_version 21069 (0.0008) +[2026-06-07 02:54:13,061][472559] Updated weights for policy 0, policy_version 21080 (0.0008) +[2026-06-07 02:54:13,117][464927] Fps is (10 sec: 26214.3, 60 sec: 27852.8, 300 sec: 28213.8). Total num frames: 10780672. Throughput: 0: 28316.5. Samples: 10825344. Policy #0 lag: (min: 32.0, avg: 67.2, max: 96.0) +[2026-06-07 02:54:13,117][464927] Avg episode reward: [(0, '633.985')] +[2026-06-07 02:54:13,175][472559] Updated weights for policy 0, policy_version 21090 (0.0009) +[2026-06-07 02:54:13,287][472559] Updated weights for policy 0, policy_version 21100 (0.0009) +[2026-06-07 02:54:13,404][472559] Updated weights for policy 0, policy_version 21110 (0.0008) +[2026-06-07 02:54:13,513][472025] Saving new best policy, reward=633.985! +[2026-06-07 02:54:13,516][472559] Updated weights for policy 0, policy_version 21120 (0.0008) +[2026-06-07 02:54:14,073][472559] Updated weights for policy 0, policy_version 21130 (0.0008) +[2026-06-07 02:54:14,197][472559] Updated weights for policy 0, policy_version 21141 (0.0008) +[2026-06-07 02:54:14,319][472559] Updated weights for policy 0, policy_version 21152 (0.0008) +[2026-06-07 02:54:14,456][472559] Updated weights for policy 0, policy_version 21164 (0.0008) +[2026-06-07 02:54:14,574][472559] Updated weights for policy 0, policy_version 21174 (0.0008) +[2026-06-07 02:54:15,115][472559] Updated weights for policy 0, policy_version 21185 (0.0008) +[2026-06-07 02:54:15,227][472559] Updated weights for policy 0, policy_version 21195 (0.0005) +[2026-06-07 02:54:15,352][472559] Updated weights for policy 0, policy_version 21206 (0.0005) +[2026-06-07 02:54:15,481][472559] Updated weights for policy 0, policy_version 21217 (0.0008) +[2026-06-07 02:54:15,598][472559] Updated weights for policy 0, policy_version 21227 (0.0008) +[2026-06-07 02:54:15,716][472559] Updated weights for policy 0, policy_version 21238 (0.0008) +[2026-06-07 02:54:15,833][472559] Updated weights for policy 0, policy_version 21248 (0.0008) +[2026-06-07 02:54:16,379][472559] Updated weights for policy 0, policy_version 21258 (0.0008) +[2026-06-07 02:54:16,493][472559] Updated weights for policy 0, policy_version 21268 (0.0008) +[2026-06-07 02:54:16,599][472559] Updated weights for policy 0, policy_version 21278 (0.0008) +[2026-06-07 02:54:16,725][472559] Updated weights for policy 0, policy_version 21289 (0.0009) +[2026-06-07 02:54:16,843][472559] Updated weights for policy 0, policy_version 21299 (0.0008) +[2026-06-07 02:54:16,980][472559] Updated weights for policy 0, policy_version 21311 (0.0009) +[2026-06-07 02:54:17,536][472559] Updated weights for policy 0, policy_version 21321 (0.0008) +[2026-06-07 02:54:17,651][472559] Updated weights for policy 0, policy_version 21331 (0.0009) +[2026-06-07 02:54:17,760][472559] Updated weights for policy 0, policy_version 21341 (0.0009) +[2026-06-07 02:54:17,869][472559] Updated weights for policy 0, policy_version 21351 (0.0008) +[2026-06-07 02:54:17,984][472559] Updated weights for policy 0, policy_version 21361 (0.0009) +[2026-06-07 02:54:18,103][472559] Updated weights for policy 0, policy_version 21371 (0.0008) +[2026-06-07 02:54:18,117][464927] Fps is (10 sec: 26214.3, 60 sec: 27852.8, 300 sec: 28213.8). Total num frames: 10911744. Throughput: 0: 28174.2. Samples: 10909184. Policy #0 lag: (min: 32.0, avg: 67.2, max: 96.0) +[2026-06-07 02:54:18,117][464927] Avg episode reward: [(0, '665.655')] +[2026-06-07 02:54:18,162][472025] Saving new best policy, reward=665.655! +[2026-06-07 02:54:18,652][472559] Updated weights for policy 0, policy_version 21381 (0.0009) +[2026-06-07 02:54:18,768][472559] Updated weights for policy 0, policy_version 21391 (0.0008) +[2026-06-07 02:54:18,872][472559] Updated weights for policy 0, policy_version 21401 (0.0008) +[2026-06-07 02:54:18,988][472559] Updated weights for policy 0, policy_version 21411 (0.0009) +[2026-06-07 02:54:19,123][472559] Updated weights for policy 0, policy_version 21423 (0.0008) +[2026-06-07 02:54:19,249][472559] Updated weights for policy 0, policy_version 21434 (0.0008) +[2026-06-07 02:54:19,805][472559] Updated weights for policy 0, policy_version 21444 (0.0008) +[2026-06-07 02:54:19,934][472559] Updated weights for policy 0, policy_version 21455 (0.0008) +[2026-06-07 02:54:20,058][472559] Updated weights for policy 0, policy_version 21466 (0.0008) +[2026-06-07 02:54:20,185][472559] Updated weights for policy 0, policy_version 21477 (0.0009) +[2026-06-07 02:54:20,299][472559] Updated weights for policy 0, policy_version 21487 (0.0008) +[2026-06-07 02:54:20,417][472559] Updated weights for policy 0, policy_version 21497 (0.0008) +[2026-06-07 02:54:20,958][472559] Updated weights for policy 0, policy_version 21507 (0.0008) +[2026-06-07 02:54:21,079][472559] Updated weights for policy 0, policy_version 21518 (0.0008) +[2026-06-07 02:54:21,194][472559] Updated weights for policy 0, policy_version 21528 (0.0008) +[2026-06-07 02:54:21,314][472559] Updated weights for policy 0, policy_version 21539 (0.0008) +[2026-06-07 02:54:21,427][472559] Updated weights for policy 0, policy_version 21549 (0.0008) +[2026-06-07 02:54:21,561][472559] Updated weights for policy 0, policy_version 21560 (0.0008) +[2026-06-07 02:54:22,108][472559] Updated weights for policy 0, policy_version 21571 (0.0008) +[2026-06-07 02:54:22,226][472559] Updated weights for policy 0, policy_version 21581 (0.0008) +[2026-06-07 02:54:22,346][472559] Updated weights for policy 0, policy_version 21592 (0.0008) +[2026-06-07 02:54:22,462][472559] Updated weights for policy 0, policy_version 21602 (0.0008) +[2026-06-07 02:54:22,575][472559] Updated weights for policy 0, policy_version 21612 (0.0008) +[2026-06-07 02:54:22,701][472559] Updated weights for policy 0, policy_version 21623 (0.0009) +[2026-06-07 02:54:23,116][464927] Fps is (10 sec: 29491.6, 60 sec: 28399.0, 300 sec: 28324.9). Total num frames: 11075584. Throughput: 0: 28327.9. Samples: 11083776. Policy #0 lag: (min: 63.0, avg: 75.3, max: 127.0) +[2026-06-07 02:54:23,117][464927] Avg episode reward: [(0, '689.856')] +[2026-06-07 02:54:23,121][472025] Saving new best policy, reward=689.856! +[2026-06-07 02:54:23,258][472559] Updated weights for policy 0, policy_version 21634 (0.0008) +[2026-06-07 02:54:23,386][472559] Updated weights for policy 0, policy_version 21645 (0.0008) +[2026-06-07 02:54:23,506][472559] Updated weights for policy 0, policy_version 21656 (0.0009) +[2026-06-07 02:54:23,643][472559] Updated weights for policy 0, policy_version 21668 (0.0009) +[2026-06-07 02:54:23,760][472559] Updated weights for policy 0, policy_version 21678 (0.0009) +[2026-06-07 02:54:23,875][472559] Updated weights for policy 0, policy_version 21688 (0.0009) +[2026-06-07 02:54:24,415][472559] Updated weights for policy 0, policy_version 21698 (0.0008) +[2026-06-07 02:54:24,524][472559] Updated weights for policy 0, policy_version 21708 (0.0008) +[2026-06-07 02:54:24,640][472559] Updated weights for policy 0, policy_version 21718 (0.0008) +[2026-06-07 02:54:24,753][472559] Updated weights for policy 0, policy_version 21728 (0.0008) +[2026-06-07 02:54:24,877][472559] Updated weights for policy 0, policy_version 21739 (0.0008) +[2026-06-07 02:54:24,995][472559] Updated weights for policy 0, policy_version 21749 (0.0008) +[2026-06-07 02:54:25,119][472559] Updated weights for policy 0, policy_version 21760 (0.0008) +[2026-06-07 02:54:25,653][472559] Updated weights for policy 0, policy_version 21770 (0.0008) +[2026-06-07 02:54:25,762][472559] Updated weights for policy 0, policy_version 21780 (0.0009) +[2026-06-07 02:54:25,872][472559] Updated weights for policy 0, policy_version 21790 (0.0006) +[2026-06-07 02:54:26,006][472559] Updated weights for policy 0, policy_version 21801 (0.0008) +[2026-06-07 02:54:26,119][472559] Updated weights for policy 0, policy_version 21811 (0.0008) +[2026-06-07 02:54:26,240][472559] Updated weights for policy 0, policy_version 21821 (0.0009) +[2026-06-07 02:54:26,774][472559] Updated weights for policy 0, policy_version 21831 (0.0006) +[2026-06-07 02:54:26,899][472559] Updated weights for policy 0, policy_version 21842 (0.0007) +[2026-06-07 02:54:27,014][472559] Updated weights for policy 0, policy_version 21853 (0.0009) +[2026-06-07 02:54:27,148][472559] Updated weights for policy 0, policy_version 21864 (0.0009) +[2026-06-07 02:54:27,274][472559] Updated weights for policy 0, policy_version 21875 (0.0008) +[2026-06-07 02:54:27,399][472559] Updated weights for policy 0, policy_version 21886 (0.0008) +[2026-06-07 02:54:27,958][472559] Updated weights for policy 0, policy_version 21897 (0.0008) +[2026-06-07 02:54:28,065][472559] Updated weights for policy 0, policy_version 21907 (0.0008) +[2026-06-07 02:54:28,117][464927] Fps is (10 sec: 29491.2, 60 sec: 28398.9, 300 sec: 28213.8). Total num frames: 11206656. Throughput: 0: 28290.8. Samples: 11250560. Policy #0 lag: (min: 63.0, avg: 75.3, max: 127.0) +[2026-06-07 02:54:28,117][464927] Avg episode reward: [(0, '678.375')] +[2026-06-07 02:54:28,177][472559] Updated weights for policy 0, policy_version 21917 (0.0007) +[2026-06-07 02:54:28,296][472559] Updated weights for policy 0, policy_version 21927 (0.0008) +[2026-06-07 02:54:28,408][472559] Updated weights for policy 0, policy_version 21937 (0.0009) +[2026-06-07 02:54:28,523][472559] Updated weights for policy 0, policy_version 21947 (0.0008) +[2026-06-07 02:54:29,074][472559] Updated weights for policy 0, policy_version 21957 (0.0008) +[2026-06-07 02:54:29,187][472559] Updated weights for policy 0, policy_version 21967 (0.0008) +[2026-06-07 02:54:29,300][472559] Updated weights for policy 0, policy_version 21977 (0.0008) +[2026-06-07 02:54:29,422][472559] Updated weights for policy 0, policy_version 21988 (0.0008) +[2026-06-07 02:54:29,546][472559] Updated weights for policy 0, policy_version 21998 (0.0008) +[2026-06-07 02:54:29,664][472559] Updated weights for policy 0, policy_version 22008 (0.0008) +[2026-06-07 02:54:30,217][472559] Updated weights for policy 0, policy_version 22019 (0.0008) +[2026-06-07 02:54:30,332][472559] Updated weights for policy 0, policy_version 22029 (0.0009) +[2026-06-07 02:54:30,444][472559] Updated weights for policy 0, policy_version 22039 (0.0008) +[2026-06-07 02:54:30,556][472559] Updated weights for policy 0, policy_version 22049 (0.0008) +[2026-06-07 02:54:30,682][472559] Updated weights for policy 0, policy_version 22060 (0.0008) +[2026-06-07 02:54:30,806][472559] Updated weights for policy 0, policy_version 22071 (0.0009) +[2026-06-07 02:54:31,339][472559] Updated weights for policy 0, policy_version 22081 (0.0008) +[2026-06-07 02:54:31,478][472559] Updated weights for policy 0, policy_version 22093 (0.0008) +[2026-06-07 02:54:31,600][472559] Updated weights for policy 0, policy_version 22104 (0.0008) +[2026-06-07 02:54:31,715][472559] Updated weights for policy 0, policy_version 22114 (0.0008) +[2026-06-07 02:54:31,823][472559] Updated weights for policy 0, policy_version 22124 (0.0008) +[2026-06-07 02:54:31,939][472559] Updated weights for policy 0, policy_version 22134 (0.0008) +[2026-06-07 02:54:32,049][472559] Updated weights for policy 0, policy_version 22144 (0.0008) +[2026-06-07 02:54:32,604][472559] Updated weights for policy 0, policy_version 22154 (0.0008) +[2026-06-07 02:54:32,727][472559] Updated weights for policy 0, policy_version 22165 (0.0008) +[2026-06-07 02:54:32,844][472559] Updated weights for policy 0, policy_version 22175 (0.0008) +[2026-06-07 02:54:32,955][472559] Updated weights for policy 0, policy_version 22185 (0.0008) +[2026-06-07 02:54:33,070][472559] Updated weights for policy 0, policy_version 22196 (0.0008) +[2026-06-07 02:54:33,117][464927] Fps is (10 sec: 26214.2, 60 sec: 27852.8, 300 sec: 28213.8). Total num frames: 11337728. Throughput: 0: 28182.8. Samples: 11331200. Policy #0 lag: (min: 63.0, avg: 75.3, max: 127.0) +[2026-06-07 02:54:33,117][464927] Avg episode reward: [(0, '688.857')] +[2026-06-07 02:54:33,190][472559] Updated weights for policy 0, policy_version 22206 (0.0008) +[2026-06-07 02:54:33,736][472559] Updated weights for policy 0, policy_version 22216 (0.0007) +[2026-06-07 02:54:33,848][472559] Updated weights for policy 0, policy_version 22226 (0.0008) +[2026-06-07 02:54:33,979][472559] Updated weights for policy 0, policy_version 22238 (0.0008) +[2026-06-07 02:54:34,093][472559] Updated weights for policy 0, policy_version 22248 (0.0008) +[2026-06-07 02:54:34,210][472559] Updated weights for policy 0, policy_version 22258 (0.0008) +[2026-06-07 02:54:34,330][472559] Updated weights for policy 0, policy_version 22268 (0.0008) +[2026-06-07 02:54:34,870][472559] Updated weights for policy 0, policy_version 22278 (0.0007) +[2026-06-07 02:54:34,989][472559] Updated weights for policy 0, policy_version 22289 (0.0008) +[2026-06-07 02:54:35,116][472559] Updated weights for policy 0, policy_version 22300 (0.0009) +[2026-06-07 02:54:35,228][472559] Updated weights for policy 0, policy_version 22310 (0.0008) +[2026-06-07 02:54:35,345][472559] Updated weights for policy 0, policy_version 22320 (0.0008) +[2026-06-07 02:54:35,484][472559] Updated weights for policy 0, policy_version 22332 (0.0008) +[2026-06-07 02:54:36,016][472559] Updated weights for policy 0, policy_version 22342 (0.0009) +[2026-06-07 02:54:36,153][472559] Updated weights for policy 0, policy_version 22354 (0.0008) +[2026-06-07 02:54:36,270][472559] Updated weights for policy 0, policy_version 22364 (0.0008) +[2026-06-07 02:54:36,378][472559] Updated weights for policy 0, policy_version 22374 (0.0008) +[2026-06-07 02:54:36,506][472559] Updated weights for policy 0, policy_version 22385 (0.0009) +[2026-06-07 02:54:36,634][472559] Updated weights for policy 0, policy_version 22396 (0.0009) +[2026-06-07 02:54:37,168][472559] Updated weights for policy 0, policy_version 22406 (0.0007) +[2026-06-07 02:54:37,292][472559] Updated weights for policy 0, policy_version 22417 (0.0009) +[2026-06-07 02:54:37,418][472559] Updated weights for policy 0, policy_version 22428 (0.0008) +[2026-06-07 02:54:37,530][472559] Updated weights for policy 0, policy_version 22438 (0.0008) +[2026-06-07 02:54:37,672][472559] Updated weights for policy 0, policy_version 22450 (0.0008) +[2026-06-07 02:54:37,798][472559] Updated weights for policy 0, policy_version 22461 (0.0009) +[2026-06-07 02:54:38,117][464927] Fps is (10 sec: 29491.4, 60 sec: 28399.0, 300 sec: 28324.9). Total num frames: 11501568. Throughput: 0: 28342.1. Samples: 11507072. Policy #0 lag: (min: 1.0, avg: 41.3, max: 65.0) +[2026-06-07 02:54:38,117][464927] Avg episode reward: [(0, '707.758')] +[2026-06-07 02:54:38,122][472025] Saving new best policy, reward=707.758! +[2026-06-07 02:54:38,348][472559] Updated weights for policy 0, policy_version 22472 (0.0005) +[2026-06-07 02:54:38,460][472559] Updated weights for policy 0, policy_version 22482 (0.0007) +[2026-06-07 02:54:38,582][472559] Updated weights for policy 0, policy_version 22493 (0.0008) +[2026-06-07 02:54:38,698][472559] Updated weights for policy 0, policy_version 22503 (0.0008) +[2026-06-07 02:54:38,807][472559] Updated weights for policy 0, policy_version 22513 (0.0008) +[2026-06-07 02:54:38,930][472559] Updated weights for policy 0, policy_version 22524 (0.0008) +[2026-06-07 02:54:39,472][472559] Updated weights for policy 0, policy_version 22534 (0.0008) +[2026-06-07 02:54:39,600][472559] Updated weights for policy 0, policy_version 22546 (0.0009) +[2026-06-07 02:54:39,756][472559] Updated weights for policy 0, policy_version 22560 (0.0008) +[2026-06-07 02:54:39,867][472559] Updated weights for policy 0, policy_version 22570 (0.0008) +[2026-06-07 02:54:39,996][472559] Updated weights for policy 0, policy_version 22581 (0.0008) +[2026-06-07 02:54:40,109][472559] Updated weights for policy 0, policy_version 22591 (0.0008) +[2026-06-07 02:54:40,683][472559] Updated weights for policy 0, policy_version 22603 (0.0008) +[2026-06-07 02:54:40,808][472559] Updated weights for policy 0, policy_version 22614 (0.0008) +[2026-06-07 02:54:40,941][472559] Updated weights for policy 0, policy_version 22626 (0.0008) +[2026-06-07 02:54:41,067][472559] Updated weights for policy 0, policy_version 22637 (0.0008) +[2026-06-07 02:54:41,181][472559] Updated weights for policy 0, policy_version 22647 (0.0008) +[2026-06-07 02:54:41,729][472559] Updated weights for policy 0, policy_version 22657 (0.0008) +[2026-06-07 02:54:41,861][472559] Updated weights for policy 0, policy_version 22669 (0.0008) +[2026-06-07 02:54:41,982][472559] Updated weights for policy 0, policy_version 22680 (0.0008) +[2026-06-07 02:54:42,103][472559] Updated weights for policy 0, policy_version 22691 (0.0008) +[2026-06-07 02:54:42,231][472559] Updated weights for policy 0, policy_version 22702 (0.0008) +[2026-06-07 02:54:42,357][472559] Updated weights for policy 0, policy_version 22713 (0.0009) +[2026-06-07 02:54:42,903][472559] Updated weights for policy 0, policy_version 22723 (0.0008) +[2026-06-07 02:54:43,031][472559] Updated weights for policy 0, policy_version 22734 (0.0008) +[2026-06-07 02:54:43,117][464927] Fps is (10 sec: 29491.2, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 11632640. Throughput: 0: 28333.5. Samples: 11673984. Policy #0 lag: (min: 1.0, avg: 41.3, max: 65.0) +[2026-06-07 02:54:43,118][464927] Avg episode reward: [(0, '691.870')] +[2026-06-07 02:54:43,152][472559] Updated weights for policy 0, policy_version 22745 (0.0008) +[2026-06-07 02:54:43,278][472559] Updated weights for policy 0, policy_version 22756 (0.0008) +[2026-06-07 02:54:43,402][472559] Updated weights for policy 0, policy_version 22767 (0.0008) +[2026-06-07 02:54:43,538][472559] Updated weights for policy 0, policy_version 22779 (0.0008) +[2026-06-07 02:54:44,092][472559] Updated weights for policy 0, policy_version 22789 (0.0008) +[2026-06-07 02:54:44,211][472559] Updated weights for policy 0, policy_version 22800 (0.0009) +[2026-06-07 02:54:44,334][472559] Updated weights for policy 0, policy_version 22811 (0.0008) +[2026-06-07 02:54:44,456][472559] Updated weights for policy 0, policy_version 22821 (0.0008) +[2026-06-07 02:54:44,573][472559] Updated weights for policy 0, policy_version 22832 (0.0009) +[2026-06-07 02:54:44,690][472559] Updated weights for policy 0, policy_version 22842 (0.0008) +[2026-06-07 02:54:45,252][472559] Updated weights for policy 0, policy_version 22852 (0.0008) +[2026-06-07 02:54:45,355][472559] Updated weights for policy 0, policy_version 22862 (0.0008) +[2026-06-07 02:54:45,486][472559] Updated weights for policy 0, policy_version 22873 (0.0009) +[2026-06-07 02:54:45,618][472559] Updated weights for policy 0, policy_version 22885 (0.0008) +[2026-06-07 02:54:45,732][472559] Updated weights for policy 0, policy_version 22895 (0.0008) +[2026-06-07 02:54:45,866][472559] Updated weights for policy 0, policy_version 22907 (0.0008) +[2026-06-07 02:54:46,455][472559] Updated weights for policy 0, policy_version 22919 (0.0008) +[2026-06-07 02:54:46,568][472559] Updated weights for policy 0, policy_version 22929 (0.0008) +[2026-06-07 02:54:46,682][472559] Updated weights for policy 0, policy_version 22939 (0.0009) +[2026-06-07 02:54:46,814][472559] Updated weights for policy 0, policy_version 22951 (0.0009) +[2026-06-07 02:54:46,942][472559] Updated weights for policy 0, policy_version 22962 (0.0008) +[2026-06-07 02:54:47,064][472559] Updated weights for policy 0, policy_version 22973 (0.0008) +[2026-06-07 02:54:47,642][472559] Updated weights for policy 0, policy_version 22985 (0.0008) +[2026-06-07 02:54:47,779][472559] Updated weights for policy 0, policy_version 22998 (0.0008) +[2026-06-07 02:54:47,912][472559] Updated weights for policy 0, policy_version 23009 (0.0006) +[2026-06-07 02:54:48,027][472559] Updated weights for policy 0, policy_version 23019 (0.0008) +[2026-06-07 02:54:48,117][464927] Fps is (10 sec: 26213.9, 60 sec: 27852.7, 300 sec: 28213.8). Total num frames: 11763712. Throughput: 0: 28168.4. Samples: 11753984. Policy #0 lag: (min: 1.0, avg: 41.3, max: 65.0) +[2026-06-07 02:54:48,118][464927] Avg episode reward: [(0, '651.376')] +[2026-06-07 02:54:48,139][472559] Updated weights for policy 0, policy_version 23029 (0.0008) +[2026-06-07 02:54:48,717][472559] Updated weights for policy 0, policy_version 23042 (0.0008) +[2026-06-07 02:54:48,844][472559] Updated weights for policy 0, policy_version 23053 (0.0008) +[2026-06-07 02:54:48,964][472559] Updated weights for policy 0, policy_version 23064 (0.0008) +[2026-06-07 02:54:49,099][472559] Updated weights for policy 0, policy_version 23076 (0.0008) +[2026-06-07 02:54:49,213][472559] Updated weights for policy 0, policy_version 23086 (0.0008) +[2026-06-07 02:54:49,351][472559] Updated weights for policy 0, policy_version 23098 (0.0008) +[2026-06-07 02:54:49,896][472559] Updated weights for policy 0, policy_version 23109 (0.0008) +[2026-06-07 02:54:50,008][472559] Updated weights for policy 0, policy_version 23119 (0.0008) +[2026-06-07 02:54:50,135][472559] Updated weights for policy 0, policy_version 23130 (0.0008) +[2026-06-07 02:54:50,256][472559] Updated weights for policy 0, policy_version 23141 (0.0007) +[2026-06-07 02:54:50,370][472559] Updated weights for policy 0, policy_version 23151 (0.0007) +[2026-06-07 02:54:50,518][472559] Updated weights for policy 0, policy_version 23164 (0.0008) +[2026-06-07 02:54:51,093][472559] Updated weights for policy 0, policy_version 23176 (0.0008) +[2026-06-07 02:54:51,204][472559] Updated weights for policy 0, policy_version 23186 (0.0008) +[2026-06-07 02:54:51,345][472559] Updated weights for policy 0, policy_version 23198 (0.0008) +[2026-06-07 02:54:51,473][472559] Updated weights for policy 0, policy_version 23209 (0.0008) +[2026-06-07 02:54:51,605][472559] Updated weights for policy 0, policy_version 23221 (0.0008) +[2026-06-07 02:54:51,726][472559] Updated weights for policy 0, policy_version 23232 (0.0008) +[2026-06-07 02:54:52,297][472559] Updated weights for policy 0, policy_version 23244 (0.0008) +[2026-06-07 02:54:52,412][472559] Updated weights for policy 0, policy_version 23254 (0.0008) +[2026-06-07 02:54:52,557][472559] Updated weights for policy 0, policy_version 23267 (0.0008) +[2026-06-07 02:54:52,683][472559] Updated weights for policy 0, policy_version 23278 (0.0008) +[2026-06-07 02:54:52,796][472559] Updated weights for policy 0, policy_version 23288 (0.0009) +[2026-06-07 02:54:53,116][464927] Fps is (10 sec: 29491.4, 60 sec: 28399.0, 300 sec: 28324.9). Total num frames: 11927552. Throughput: 0: 28271.0. Samples: 11929728. Policy #0 lag: (min: 22.0, avg: 34.3, max: 86.0) +[2026-06-07 02:54:53,117][464927] Avg episode reward: [(0, '674.005')] +[2026-06-07 02:54:53,347][472559] Updated weights for policy 0, policy_version 23298 (0.0008) +[2026-06-07 02:54:53,493][472559] Updated weights for policy 0, policy_version 23311 (0.0008) +[2026-06-07 02:54:53,614][472559] Updated weights for policy 0, policy_version 23322 (0.0008) +[2026-06-07 02:54:53,743][472559] Updated weights for policy 0, policy_version 23333 (0.0008) +[2026-06-07 02:54:53,875][472559] Updated weights for policy 0, policy_version 23345 (0.0008) +[2026-06-07 02:54:53,991][472559] Updated weights for policy 0, policy_version 23355 (0.0008) +[2026-06-07 02:54:54,551][472559] Updated weights for policy 0, policy_version 23366 (0.0008) +[2026-06-07 02:54:54,674][472559] Updated weights for policy 0, policy_version 23377 (0.0008) +[2026-06-07 02:54:54,799][472559] Updated weights for policy 0, policy_version 23388 (0.0008) +[2026-06-07 02:54:54,912][472559] Updated weights for policy 0, policy_version 23398 (0.0008) +[2026-06-07 02:54:55,069][472559] Updated weights for policy 0, policy_version 23412 (0.0008) +[2026-06-07 02:54:55,195][472559] Updated weights for policy 0, policy_version 23423 (0.0008) +[2026-06-07 02:54:55,733][472559] Updated weights for policy 0, policy_version 23433 (0.0008) +[2026-06-07 02:54:55,854][472559] Updated weights for policy 0, policy_version 23444 (0.0008) +[2026-06-07 02:54:55,987][472559] Updated weights for policy 0, policy_version 23456 (0.0008) +[2026-06-07 02:54:56,103][472559] Updated weights for policy 0, policy_version 23466 (0.0008) +[2026-06-07 02:54:56,216][472559] Updated weights for policy 0, policy_version 23476 (0.0008) +[2026-06-07 02:54:56,344][472559] Updated weights for policy 0, policy_version 23488 (0.0008) +[2026-06-07 02:54:56,890][472559] Updated weights for policy 0, policy_version 23498 (0.0008) +[2026-06-07 02:54:57,005][472559] Updated weights for policy 0, policy_version 23508 (0.0009) +[2026-06-07 02:54:57,118][472559] Updated weights for policy 0, policy_version 23518 (0.0008) +[2026-06-07 02:54:57,256][472559] Updated weights for policy 0, policy_version 23530 (0.0008) +[2026-06-07 02:54:57,382][472559] Updated weights for policy 0, policy_version 23541 (0.0008) +[2026-06-07 02:54:57,964][472559] Updated weights for policy 0, policy_version 23553 (0.0008) +[2026-06-07 02:54:58,072][472559] Updated weights for policy 0, policy_version 23563 (0.0008) +[2026-06-07 02:54:58,116][464927] Fps is (10 sec: 29491.8, 60 sec: 28399.0, 300 sec: 28324.9). Total num frames: 12058624. Throughput: 0: 28347.8. Samples: 12100992. Policy #0 lag: (min: 22.0, avg: 34.3, max: 86.0) +[2026-06-07 02:54:58,117][464927] Avg episode reward: [(0, '675.484')] +[2026-06-07 02:54:58,198][472559] Updated weights for policy 0, policy_version 23574 (0.0008) +[2026-06-07 02:54:58,313][472559] Updated weights for policy 0, policy_version 23584 (0.0008) +[2026-06-07 02:54:58,427][472559] Updated weights for policy 0, policy_version 23594 (0.0008) +[2026-06-07 02:54:58,541][472559] Updated weights for policy 0, policy_version 23604 (0.0008) +[2026-06-07 02:54:58,676][472559] Updated weights for policy 0, policy_version 23616 (0.0008) +[2026-06-07 02:54:59,234][472559] Updated weights for policy 0, policy_version 23627 (0.0008) +[2026-06-07 02:54:59,358][472559] Updated weights for policy 0, policy_version 23638 (0.0008) +[2026-06-07 02:54:59,480][472559] Updated weights for policy 0, policy_version 23648 (0.0008) +[2026-06-07 02:54:59,592][472559] Updated weights for policy 0, policy_version 23658 (0.0009) +[2026-06-07 02:54:59,704][472559] Updated weights for policy 0, policy_version 23668 (0.0008) +[2026-06-07 02:54:59,836][472559] Updated weights for policy 0, policy_version 23679 (0.0008) +[2026-06-07 02:55:00,370][472559] Updated weights for policy 0, policy_version 23689 (0.0008) +[2026-06-07 02:55:00,482][472559] Updated weights for policy 0, policy_version 23699 (0.0008) +[2026-06-07 02:55:00,615][472559] Updated weights for policy 0, policy_version 23710 (0.0009) +[2026-06-07 02:55:00,728][472559] Updated weights for policy 0, policy_version 23720 (0.0008) +[2026-06-07 02:55:00,840][472559] Updated weights for policy 0, policy_version 23730 (0.0008) +[2026-06-07 02:55:00,953][472559] Updated weights for policy 0, policy_version 23740 (0.0008) +[2026-06-07 02:55:01,486][472559] Updated weights for policy 0, policy_version 23751 (0.0008) +[2026-06-07 02:55:01,606][472559] Updated weights for policy 0, policy_version 23761 (0.0008) +[2026-06-07 02:55:01,733][472559] Updated weights for policy 0, policy_version 23772 (0.0008) +[2026-06-07 02:55:01,880][472559] Updated weights for policy 0, policy_version 23785 (0.0008) +[2026-06-07 02:55:01,998][472559] Updated weights for policy 0, policy_version 23795 (0.0008) +[2026-06-07 02:55:02,110][472559] Updated weights for policy 0, policy_version 23805 (0.0008) +[2026-06-07 02:55:02,637][472559] Updated weights for policy 0, policy_version 23815 (0.0008) +[2026-06-07 02:55:02,760][472559] Updated weights for policy 0, policy_version 23826 (0.0008) +[2026-06-07 02:55:02,881][472559] Updated weights for policy 0, policy_version 23836 (0.0008) +[2026-06-07 02:55:03,008][472559] Updated weights for policy 0, policy_version 23847 (0.0008) +[2026-06-07 02:55:03,117][464927] Fps is (10 sec: 26214.1, 60 sec: 27852.8, 300 sec: 28213.8). Total num frames: 12189696. Throughput: 0: 28211.2. Samples: 12178688. Policy #0 lag: (min: 22.0, avg: 34.3, max: 86.0) +[2026-06-07 02:55:03,117][464927] Avg episode reward: [(0, '664.951')] +[2026-06-07 02:55:03,137][472559] Updated weights for policy 0, policy_version 23858 (0.0008) +[2026-06-07 02:55:03,254][472559] Updated weights for policy 0, policy_version 23868 (0.0008) +[2026-06-07 02:55:03,790][472559] Updated weights for policy 0, policy_version 23878 (0.0007) +[2026-06-07 02:55:03,931][472559] Updated weights for policy 0, policy_version 23890 (0.0008) +[2026-06-07 02:55:04,065][472559] Updated weights for policy 0, policy_version 23902 (0.0008) +[2026-06-07 02:55:04,183][472559] Updated weights for policy 0, policy_version 23912 (0.0008) +[2026-06-07 02:55:04,303][472559] Updated weights for policy 0, policy_version 23922 (0.0008) +[2026-06-07 02:55:04,439][472559] Updated weights for policy 0, policy_version 23934 (0.0008) +[2026-06-07 02:55:04,977][472559] Updated weights for policy 0, policy_version 23945 (0.0008) +[2026-06-07 02:55:05,091][472559] Updated weights for policy 0, policy_version 23955 (0.0008) +[2026-06-07 02:55:05,208][472559] Updated weights for policy 0, policy_version 23965 (0.0008) +[2026-06-07 02:55:05,336][472559] Updated weights for policy 0, policy_version 23976 (0.0008) +[2026-06-07 02:55:05,455][472559] Updated weights for policy 0, policy_version 23986 (0.0008) +[2026-06-07 02:55:05,580][472559] Updated weights for policy 0, policy_version 23997 (0.0008) +[2026-06-07 02:55:06,147][472559] Updated weights for policy 0, policy_version 24009 (0.0008) +[2026-06-07 02:55:06,269][472559] Updated weights for policy 0, policy_version 24020 (0.0008) +[2026-06-07 02:55:06,390][472559] Updated weights for policy 0, policy_version 24030 (0.0008) +[2026-06-07 02:55:06,507][472559] Updated weights for policy 0, policy_version 24040 (0.0008) +[2026-06-07 02:55:06,628][472559] Updated weights for policy 0, policy_version 24050 (0.0008) +[2026-06-07 02:55:06,744][472559] Updated weights for policy 0, policy_version 24060 (0.0008) +[2026-06-07 02:55:07,297][472559] Updated weights for policy 0, policy_version 24072 (0.0009) +[2026-06-07 02:55:07,423][472559] Updated weights for policy 0, policy_version 24083 (0.0008) +[2026-06-07 02:55:07,541][472559] Updated weights for policy 0, policy_version 24093 (0.0008) +[2026-06-07 02:55:07,665][472559] Updated weights for policy 0, policy_version 24104 (0.0008) +[2026-06-07 02:55:07,797][472559] Updated weights for policy 0, policy_version 24115 (0.0008) +[2026-06-07 02:55:07,914][472559] Updated weights for policy 0, policy_version 24125 (0.0008) +[2026-06-07 02:55:08,117][464927] Fps is (10 sec: 29491.0, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 12353536. Throughput: 0: 28182.7. Samples: 12352000. Policy #0 lag: (min: 26.0, avg: 59.4, max: 90.0) +[2026-06-07 02:55:08,117][464927] Avg episode reward: [(0, '623.121')] +[2026-06-07 02:55:08,449][472559] Updated weights for policy 0, policy_version 24135 (0.0007) +[2026-06-07 02:55:08,560][472559] Updated weights for policy 0, policy_version 24145 (0.0008) +[2026-06-07 02:55:08,671][472559] Updated weights for policy 0, policy_version 24155 (0.0008) +[2026-06-07 02:55:08,789][472559] Updated weights for policy 0, policy_version 24165 (0.0008) +[2026-06-07 02:55:08,904][472559] Updated weights for policy 0, policy_version 24175 (0.0008) +[2026-06-07 02:55:09,032][472559] Updated weights for policy 0, policy_version 24186 (0.0008) +[2026-06-07 02:55:09,578][472559] Updated weights for policy 0, policy_version 24196 (0.0007) +[2026-06-07 02:55:09,692][472559] Updated weights for policy 0, policy_version 24206 (0.0006) +[2026-06-07 02:55:09,820][472559] Updated weights for policy 0, policy_version 24217 (0.0008) +[2026-06-07 02:55:09,928][472559] Updated weights for policy 0, policy_version 24227 (0.0008) +[2026-06-07 02:55:10,051][472559] Updated weights for policy 0, policy_version 24237 (0.0008) +[2026-06-07 02:55:10,170][472559] Updated weights for policy 0, policy_version 24247 (0.0008) +[2026-06-07 02:55:10,703][472559] Updated weights for policy 0, policy_version 24257 (0.0008) +[2026-06-07 02:55:10,816][472559] Updated weights for policy 0, policy_version 24267 (0.0008) +[2026-06-07 02:55:10,929][472559] Updated weights for policy 0, policy_version 24277 (0.0008) +[2026-06-07 02:55:11,041][472559] Updated weights for policy 0, policy_version 24287 (0.0008) +[2026-06-07 02:55:11,169][472559] Updated weights for policy 0, policy_version 24298 (0.0008) +[2026-06-07 02:55:11,305][472559] Updated weights for policy 0, policy_version 24310 (0.0008) +[2026-06-07 02:55:11,425][472559] Updated weights for policy 0, policy_version 24320 (0.0008) +[2026-06-07 02:55:11,964][472559] Updated weights for policy 0, policy_version 24330 (0.0008) +[2026-06-07 02:55:12,078][472559] Updated weights for policy 0, policy_version 24340 (0.0008) +[2026-06-07 02:55:12,201][472559] Updated weights for policy 0, policy_version 24351 (0.0008) +[2026-06-07 02:55:12,338][472559] Updated weights for policy 0, policy_version 24363 (0.0008) +[2026-06-07 02:55:12,467][472559] Updated weights for policy 0, policy_version 24374 (0.0008) +[2026-06-07 02:55:12,578][472559] Updated weights for policy 0, policy_version 24384 (0.0008) +[2026-06-07 02:55:13,113][472559] Updated weights for policy 0, policy_version 24394 (0.0008) +[2026-06-07 02:55:13,117][464927] Fps is (10 sec: 29491.3, 60 sec: 28399.0, 300 sec: 28324.9). Total num frames: 12484608. Throughput: 0: 28296.6. Samples: 12523904. Policy #0 lag: (min: 26.0, avg: 59.4, max: 90.0) +[2026-06-07 02:55:13,117][464927] Avg episode reward: [(0, '657.511')] +[2026-06-07 02:55:13,233][472559] Updated weights for policy 0, policy_version 24405 (0.0008) +[2026-06-07 02:55:13,370][472559] Updated weights for policy 0, policy_version 24417 (0.0008) +[2026-06-07 02:55:13,483][472559] Updated weights for policy 0, policy_version 24427 (0.0008) +[2026-06-07 02:55:13,605][472559] Updated weights for policy 0, policy_version 24437 (0.0009) +[2026-06-07 02:55:13,722][472559] Updated weights for policy 0, policy_version 24447 (0.0008) +[2026-06-07 02:55:14,258][472559] Updated weights for policy 0, policy_version 24457 (0.0008) +[2026-06-07 02:55:14,391][472559] Updated weights for policy 0, policy_version 24469 (0.0009) +[2026-06-07 02:55:14,522][472559] Updated weights for policy 0, policy_version 24480 (0.0008) +[2026-06-07 02:55:14,633][472559] Updated weights for policy 0, policy_version 24490 (0.0008) +[2026-06-07 02:55:14,775][472559] Updated weights for policy 0, policy_version 24502 (0.0008) +[2026-06-07 02:55:14,884][472559] Updated weights for policy 0, policy_version 24512 (0.0008) +[2026-06-07 02:55:15,433][472559] Updated weights for policy 0, policy_version 24523 (0.0008) +[2026-06-07 02:55:15,561][472559] Updated weights for policy 0, policy_version 24534 (0.0004) +[2026-06-07 02:55:15,687][472559] Updated weights for policy 0, policy_version 24545 (0.0006) +[2026-06-07 02:55:15,815][472559] Updated weights for policy 0, policy_version 24556 (0.0010) +[2026-06-07 02:55:15,930][472559] Updated weights for policy 0, policy_version 24566 (0.0006) +[2026-06-07 02:55:16,464][472559] Updated weights for policy 0, policy_version 24577 (0.0006) +[2026-06-07 02:55:16,581][472559] Updated weights for policy 0, policy_version 24587 (0.0008) +[2026-06-07 02:55:16,692][472559] Updated weights for policy 0, policy_version 24597 (0.0008) +[2026-06-07 02:55:16,805][472559] Updated weights for policy 0, policy_version 24607 (0.0008) +[2026-06-07 02:55:16,919][472559] Updated weights for policy 0, policy_version 24617 (0.0008) +[2026-06-07 02:55:17,043][472559] Updated weights for policy 0, policy_version 24628 (0.0008) +[2026-06-07 02:55:17,172][472559] Updated weights for policy 0, policy_version 24639 (0.0008) +[2026-06-07 02:55:17,731][472559] Updated weights for policy 0, policy_version 24650 (0.0008) +[2026-06-07 02:55:17,841][472559] Updated weights for policy 0, policy_version 24660 (0.0008) +[2026-06-07 02:55:17,956][472559] Updated weights for policy 0, policy_version 24670 (0.0008) +[2026-06-07 02:55:18,070][472559] Updated weights for policy 0, policy_version 24680 (0.0008) +[2026-06-07 02:55:18,117][464927] Fps is (10 sec: 26214.3, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 12615680. Throughput: 0: 28208.3. Samples: 12600576. Policy #0 lag: (min: 26.0, avg: 59.4, max: 90.0) +[2026-06-07 02:55:18,117][464927] Avg episode reward: [(0, '703.875')] +[2026-06-07 02:55:18,190][472559] Updated weights for policy 0, policy_version 24691 (0.0008) +[2026-06-07 02:55:18,307][472559] Updated weights for policy 0, policy_version 24701 (0.0008) +[2026-06-07 02:55:18,846][472559] Updated weights for policy 0, policy_version 24711 (0.0008) +[2026-06-07 02:55:18,959][472559] Updated weights for policy 0, policy_version 24721 (0.0008) +[2026-06-07 02:55:19,084][472559] Updated weights for policy 0, policy_version 24732 (0.0008) +[2026-06-07 02:55:19,194][472559] Updated weights for policy 0, policy_version 24742 (0.0008) +[2026-06-07 02:55:19,305][472559] Updated weights for policy 0, policy_version 24752 (0.0009) +[2026-06-07 02:55:19,426][472559] Updated weights for policy 0, policy_version 24762 (0.0009) +[2026-06-07 02:55:20,000][472559] Updated weights for policy 0, policy_version 24774 (0.0009) +[2026-06-07 02:55:20,125][472559] Updated weights for policy 0, policy_version 24785 (0.0008) +[2026-06-07 02:55:20,258][472559] Updated weights for policy 0, policy_version 24797 (0.0008) +[2026-06-07 02:55:20,389][472559] Updated weights for policy 0, policy_version 24808 (0.0008) +[2026-06-07 02:55:20,501][472559] Updated weights for policy 0, policy_version 24818 (0.0008) +[2026-06-07 02:55:20,621][472559] Updated weights for policy 0, policy_version 24829 (0.0008) +[2026-06-07 02:55:21,159][472559] Updated weights for policy 0, policy_version 24839 (0.0008) +[2026-06-07 02:55:21,294][472559] Updated weights for policy 0, policy_version 24851 (0.0008) +[2026-06-07 02:55:21,413][472559] Updated weights for policy 0, policy_version 24861 (0.0008) +[2026-06-07 02:55:21,519][472559] Updated weights for policy 0, policy_version 24871 (0.0008) +[2026-06-07 02:55:21,637][472559] Updated weights for policy 0, policy_version 24881 (0.0008) +[2026-06-07 02:55:21,749][472559] Updated weights for policy 0, policy_version 24891 (0.0008) +[2026-06-07 02:55:22,304][472559] Updated weights for policy 0, policy_version 24901 (0.0008) +[2026-06-07 02:55:22,415][472559] Updated weights for policy 0, policy_version 24911 (0.0008) +[2026-06-07 02:55:22,522][472559] Updated weights for policy 0, policy_version 24921 (0.0009) +[2026-06-07 02:55:22,632][472559] Updated weights for policy 0, policy_version 24931 (0.0008) +[2026-06-07 02:55:22,773][472559] Updated weights for policy 0, policy_version 24943 (0.0008) +[2026-06-07 02:55:22,907][472559] Updated weights for policy 0, policy_version 24955 (0.0008) +[2026-06-07 02:55:23,117][464927] Fps is (10 sec: 29491.2, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 12779520. Throughput: 0: 28177.1. Samples: 12775040. Policy #0 lag: (min: 26.0, avg: 59.4, max: 90.0) +[2026-06-07 02:55:23,117][464927] Avg episode reward: [(0, '709.085')] +[2026-06-07 02:55:23,122][472025] Saving new best policy, reward=709.085! +[2026-06-07 02:55:23,468][472559] Updated weights for policy 0, policy_version 24966 (0.0007) +[2026-06-07 02:55:23,588][472559] Updated weights for policy 0, policy_version 24977 (0.0008) +[2026-06-07 02:55:23,716][472559] Updated weights for policy 0, policy_version 24988 (0.0008) +[2026-06-07 02:55:23,840][472559] Updated weights for policy 0, policy_version 24998 (0.0008) +[2026-06-07 02:55:23,978][472559] Updated weights for policy 0, policy_version 25010 (0.0008) +[2026-06-07 02:55:24,093][472559] Updated weights for policy 0, policy_version 25020 (0.0008) +[2026-06-07 02:55:24,650][472559] Updated weights for policy 0, policy_version 25032 (0.0008) +[2026-06-07 02:55:24,771][472559] Updated weights for policy 0, policy_version 25043 (0.0008) +[2026-06-07 02:55:24,897][472559] Updated weights for policy 0, policy_version 25054 (0.0008) +[2026-06-07 02:55:25,027][472559] Updated weights for policy 0, policy_version 25065 (0.0008) +[2026-06-07 02:55:25,152][472559] Updated weights for policy 0, policy_version 25076 (0.0008) +[2026-06-07 02:55:25,282][472559] Updated weights for policy 0, policy_version 25087 (0.0008) +[2026-06-07 02:55:25,803][472559] Updated weights for policy 0, policy_version 25097 (0.0008) +[2026-06-07 02:55:25,930][472559] Updated weights for policy 0, policy_version 25108 (0.0008) +[2026-06-07 02:55:26,050][472559] Updated weights for policy 0, policy_version 25119 (0.0008) +[2026-06-07 02:55:26,167][472559] Updated weights for policy 0, policy_version 25129 (0.0008) +[2026-06-07 02:55:26,283][472559] Updated weights for policy 0, policy_version 25139 (0.0008) +[2026-06-07 02:55:26,405][472559] Updated weights for policy 0, policy_version 25150 (0.0008) +[2026-06-07 02:55:26,978][472559] Updated weights for policy 0, policy_version 25161 (0.0008) +[2026-06-07 02:55:27,091][472559] Updated weights for policy 0, policy_version 25171 (0.0008) +[2026-06-07 02:55:27,205][472559] Updated weights for policy 0, policy_version 25181 (0.0008) +[2026-06-07 02:55:27,343][472559] Updated weights for policy 0, policy_version 25193 (0.0008) +[2026-06-07 02:55:27,477][472559] Updated weights for policy 0, policy_version 25205 (0.0008) +[2026-06-07 02:55:27,593][472559] Updated weights for policy 0, policy_version 25215 (0.0008) +[2026-06-07 02:55:28,117][464927] Fps is (10 sec: 29491.2, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 12910592. Throughput: 0: 28316.4. Samples: 12948224. Policy #0 lag: (min: 30.0, avg: 42.5, max: 94.0) +[2026-06-07 02:55:28,117][464927] Avg episode reward: [(0, '719.145')] +[2026-06-07 02:55:28,138][472559] Updated weights for policy 0, policy_version 25227 (0.0007) +[2026-06-07 02:55:28,252][472559] Updated weights for policy 0, policy_version 25237 (0.0009) +[2026-06-07 02:55:28,368][472559] Updated weights for policy 0, policy_version 25247 (0.0008) +[2026-06-07 02:55:28,500][472559] Updated weights for policy 0, policy_version 25258 (0.0008) +[2026-06-07 02:55:28,622][472559] Updated weights for policy 0, policy_version 25268 (0.0008) +[2026-06-07 02:55:28,738][472559] Updated weights for policy 0, policy_version 25278 (0.0008) +[2026-06-07 02:55:28,754][472025] Saving new best policy, reward=719.145! +[2026-06-07 02:55:29,284][472559] Updated weights for policy 0, policy_version 25289 (0.0008) +[2026-06-07 02:55:29,399][472559] Updated weights for policy 0, policy_version 25299 (0.0008) +[2026-06-07 02:55:29,523][472559] Updated weights for policy 0, policy_version 25310 (0.0008) +[2026-06-07 02:55:29,644][472559] Updated weights for policy 0, policy_version 25321 (0.0008) +[2026-06-07 02:55:29,759][472559] Updated weights for policy 0, policy_version 25331 (0.0008) +[2026-06-07 02:55:29,878][472559] Updated weights for policy 0, policy_version 25341 (0.0008) +[2026-06-07 02:55:30,426][472559] Updated weights for policy 0, policy_version 25351 (0.0008) +[2026-06-07 02:55:30,537][472559] Updated weights for policy 0, policy_version 25361 (0.0009) +[2026-06-07 02:55:30,691][472559] Updated weights for policy 0, policy_version 25375 (0.0008) +[2026-06-07 02:55:30,809][472559] Updated weights for policy 0, policy_version 25385 (0.0008) +[2026-06-07 02:55:30,927][472559] Updated weights for policy 0, policy_version 25395 (0.0008) +[2026-06-07 02:55:31,062][472559] Updated weights for policy 0, policy_version 25407 (0.0008) +[2026-06-07 02:55:31,609][472559] Updated weights for policy 0, policy_version 25417 (0.0007) +[2026-06-07 02:55:31,738][472559] Updated weights for policy 0, policy_version 25428 (0.0009) +[2026-06-07 02:55:31,847][472559] Updated weights for policy 0, policy_version 25438 (0.0008) +[2026-06-07 02:55:31,967][472559] Updated weights for policy 0, policy_version 25449 (0.0008) +[2026-06-07 02:55:32,103][472559] Updated weights for policy 0, policy_version 25461 (0.0008) +[2026-06-07 02:55:32,673][472559] Updated weights for policy 0, policy_version 25473 (0.0008) +[2026-06-07 02:55:32,797][472559] Updated weights for policy 0, policy_version 25484 (0.0008) +[2026-06-07 02:55:32,920][472559] Updated weights for policy 0, policy_version 25495 (0.0008) +[2026-06-07 02:55:33,041][472559] Updated weights for policy 0, policy_version 25506 (0.0008) +[2026-06-07 02:55:33,117][464927] Fps is (10 sec: 26214.4, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 13041664. Throughput: 0: 28239.7. Samples: 13024768. Policy #0 lag: (min: 30.0, avg: 42.5, max: 94.0) +[2026-06-07 02:55:33,117][464927] Avg episode reward: [(0, '738.083')] +[2026-06-07 02:55:33,165][472559] Updated weights for policy 0, policy_version 25517 (0.0008) +[2026-06-07 02:55:33,281][472559] Updated weights for policy 0, policy_version 25527 (0.0008) +[2026-06-07 02:55:33,376][472025] Saving new best policy, reward=738.083! +[2026-06-07 02:55:33,856][472559] Updated weights for policy 0, policy_version 25539 (0.0008) +[2026-06-07 02:55:33,986][472559] Updated weights for policy 0, policy_version 25551 (0.0009) +[2026-06-07 02:55:34,101][472559] Updated weights for policy 0, policy_version 25561 (0.0008) +[2026-06-07 02:55:34,227][472559] Updated weights for policy 0, policy_version 25572 (0.0008) +[2026-06-07 02:55:34,346][472559] Updated weights for policy 0, policy_version 25583 (0.0009) +[2026-06-07 02:55:34,463][472559] Updated weights for policy 0, policy_version 25593 (0.0008) +[2026-06-07 02:55:35,002][472559] Updated weights for policy 0, policy_version 25603 (0.0006) +[2026-06-07 02:55:35,114][472559] Updated weights for policy 0, policy_version 25613 (0.0008) +[2026-06-07 02:55:35,226][472559] Updated weights for policy 0, policy_version 25623 (0.0009) +[2026-06-07 02:55:35,348][472559] Updated weights for policy 0, policy_version 25634 (0.0008) +[2026-06-07 02:55:35,486][472559] Updated weights for policy 0, policy_version 25646 (0.0008) +[2026-06-07 02:55:35,634][472559] Updated weights for policy 0, policy_version 25659 (0.0008) +[2026-06-07 02:55:36,191][472559] Updated weights for policy 0, policy_version 25669 (0.0008) +[2026-06-07 02:55:36,297][472559] Updated weights for policy 0, policy_version 25679 (0.0008) +[2026-06-07 02:55:36,423][472559] Updated weights for policy 0, policy_version 25690 (0.0008) +[2026-06-07 02:55:36,559][472559] Updated weights for policy 0, policy_version 25702 (0.0008) +[2026-06-07 02:55:36,673][472559] Updated weights for policy 0, policy_version 25712 (0.0008) +[2026-06-07 02:55:36,783][472559] Updated weights for policy 0, policy_version 25722 (0.0008) +[2026-06-07 02:55:37,341][472559] Updated weights for policy 0, policy_version 25733 (0.0009) +[2026-06-07 02:55:37,448][472559] Updated weights for policy 0, policy_version 25743 (0.0008) +[2026-06-07 02:55:37,562][472559] Updated weights for policy 0, policy_version 25753 (0.0008) +[2026-06-07 02:55:37,672][472559] Updated weights for policy 0, policy_version 25763 (0.0008) +[2026-06-07 02:55:37,810][472559] Updated weights for policy 0, policy_version 25775 (0.0009) +[2026-06-07 02:55:37,922][472559] Updated weights for policy 0, policy_version 25785 (0.0009) +[2026-06-07 02:55:38,117][464927] Fps is (10 sec: 29491.3, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 13205504. Throughput: 0: 28211.1. Samples: 13199232. Policy #0 lag: (min: 30.0, avg: 42.5, max: 94.0) +[2026-06-07 02:55:38,117][464927] Avg episode reward: [(0, '752.351')] +[2026-06-07 02:55:38,123][472025] Saving new best policy, reward=752.351! +[2026-06-07 02:55:38,500][472559] Updated weights for policy 0, policy_version 25797 (0.0009) +[2026-06-07 02:55:38,605][472559] Updated weights for policy 0, policy_version 25807 (0.0009) +[2026-06-07 02:55:38,729][472559] Updated weights for policy 0, policy_version 25818 (0.0008) +[2026-06-07 02:55:38,867][472559] Updated weights for policy 0, policy_version 25830 (0.0008) +[2026-06-07 02:55:38,992][472559] Updated weights for policy 0, policy_version 25841 (0.0008) +[2026-06-07 02:55:39,121][472559] Updated weights for policy 0, policy_version 25853 (0.0008) +[2026-06-07 02:55:39,709][472559] Updated weights for policy 0, policy_version 25866 (0.0008) +[2026-06-07 02:55:39,819][472559] Updated weights for policy 0, policy_version 25876 (0.0008) +[2026-06-07 02:55:39,945][472559] Updated weights for policy 0, policy_version 25887 (0.0008) +[2026-06-07 02:55:40,057][472559] Updated weights for policy 0, policy_version 25897 (0.0008) +[2026-06-07 02:55:40,181][472559] Updated weights for policy 0, policy_version 25908 (0.0008) +[2026-06-07 02:55:40,315][472559] Updated weights for policy 0, policy_version 25920 (0.0008) +[2026-06-07 02:55:40,877][472559] Updated weights for policy 0, policy_version 25930 (0.0008) +[2026-06-07 02:55:40,996][472559] Updated weights for policy 0, policy_version 25941 (0.0008) +[2026-06-07 02:55:41,110][472559] Updated weights for policy 0, policy_version 25951 (0.0008) +[2026-06-07 02:55:41,232][472559] Updated weights for policy 0, policy_version 25962 (0.0008) +[2026-06-07 02:55:41,346][472559] Updated weights for policy 0, policy_version 25972 (0.0008) +[2026-06-07 02:55:41,465][472559] Updated weights for policy 0, policy_version 25983 (0.0008) +[2026-06-07 02:55:42,015][472559] Updated weights for policy 0, policy_version 25993 (0.0008) +[2026-06-07 02:55:42,139][472559] Updated weights for policy 0, policy_version 26004 (0.0008) +[2026-06-07 02:55:42,264][472559] Updated weights for policy 0, policy_version 26015 (0.0008) +[2026-06-07 02:55:42,378][472559] Updated weights for policy 0, policy_version 26025 (0.0008) +[2026-06-07 02:55:42,499][472559] Updated weights for policy 0, policy_version 26036 (0.0008) +[2026-06-07 02:55:42,621][472559] Updated weights for policy 0, policy_version 26046 (0.0009) +[2026-06-07 02:55:43,117][464927] Fps is (10 sec: 29491.2, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 13336576. Throughput: 0: 28299.3. Samples: 13374464. Policy #0 lag: (min: 63.0, avg: 74.7, max: 127.0) +[2026-06-07 02:55:43,118][464927] Avg episode reward: [(0, '793.767')] +[2026-06-07 02:55:43,170][472559] Updated weights for policy 0, policy_version 26056 (0.0008) +[2026-06-07 02:55:43,283][472559] Updated weights for policy 0, policy_version 26066 (0.0009) +[2026-06-07 02:55:43,409][472559] Updated weights for policy 0, policy_version 26077 (0.0008) +[2026-06-07 02:55:43,516][472559] Updated weights for policy 0, policy_version 26087 (0.0008) +[2026-06-07 02:55:43,631][472559] Updated weights for policy 0, policy_version 26097 (0.0008) +[2026-06-07 02:55:43,771][472559] Updated weights for policy 0, policy_version 26109 (0.0008) +[2026-06-07 02:55:43,799][472025] Saving new best policy, reward=793.767! +[2026-06-07 02:55:44,338][472559] Updated weights for policy 0, policy_version 26120 (0.0006) +[2026-06-07 02:55:44,446][472559] Updated weights for policy 0, policy_version 26130 (0.0008) +[2026-06-07 02:55:44,559][472559] Updated weights for policy 0, policy_version 26140 (0.0008) +[2026-06-07 02:55:44,706][472559] Updated weights for policy 0, policy_version 26153 (0.0008) +[2026-06-07 02:55:44,830][472559] Updated weights for policy 0, policy_version 26164 (0.0008) +[2026-06-07 02:55:44,948][472559] Updated weights for policy 0, policy_version 26175 (0.0008) +[2026-06-07 02:55:45,513][472559] Updated weights for policy 0, policy_version 26185 (0.0008) +[2026-06-07 02:55:45,623][472559] Updated weights for policy 0, policy_version 26195 (0.0008) +[2026-06-07 02:55:45,746][472559] Updated weights for policy 0, policy_version 26206 (0.0008) +[2026-06-07 02:55:45,864][472559] Updated weights for policy 0, policy_version 26216 (0.0008) +[2026-06-07 02:55:45,978][472559] Updated weights for policy 0, policy_version 26226 (0.0008) +[2026-06-07 02:55:46,102][472559] Updated weights for policy 0, policy_version 26237 (0.0008) +[2026-06-07 02:55:46,650][472559] Updated weights for policy 0, policy_version 26248 (0.0008) +[2026-06-07 02:55:46,764][472559] Updated weights for policy 0, policy_version 26258 (0.0009) +[2026-06-07 02:55:46,887][472559] Updated weights for policy 0, policy_version 26269 (0.0008) +[2026-06-07 02:55:46,995][472559] Updated weights for policy 0, policy_version 26279 (0.0008) +[2026-06-07 02:55:47,121][472559] Updated weights for policy 0, policy_version 26290 (0.0008) +[2026-06-07 02:55:47,235][472559] Updated weights for policy 0, policy_version 26300 (0.0008) +[2026-06-07 02:55:47,794][472559] Updated weights for policy 0, policy_version 26311 (0.0007) +[2026-06-07 02:55:47,899][472559] Updated weights for policy 0, policy_version 26321 (0.0008) +[2026-06-07 02:55:48,023][472559] Updated weights for policy 0, policy_version 26332 (0.0008) +[2026-06-07 02:55:48,117][464927] Fps is (10 sec: 26214.3, 60 sec: 28399.0, 300 sec: 28324.9). Total num frames: 13467648. Throughput: 0: 28285.1. Samples: 13451520. Policy #0 lag: (min: 63.0, avg: 74.7, max: 127.0) +[2026-06-07 02:55:48,118][464927] Avg episode reward: [(0, '850.461')] +[2026-06-07 02:55:48,159][472559] Updated weights for policy 0, policy_version 26344 (0.0008) +[2026-06-07 02:55:48,274][472559] Updated weights for policy 0, policy_version 26354 (0.0008) +[2026-06-07 02:55:48,397][472559] Updated weights for policy 0, policy_version 26365 (0.0008) +[2026-06-07 02:55:48,426][472025] Saving new best policy, reward=850.461! +[2026-06-07 02:55:48,972][472559] Updated weights for policy 0, policy_version 26377 (0.0007) +[2026-06-07 02:55:49,093][472559] Updated weights for policy 0, policy_version 26388 (0.0008) +[2026-06-07 02:55:49,216][472559] Updated weights for policy 0, policy_version 26399 (0.0008) +[2026-06-07 02:55:49,344][472559] Updated weights for policy 0, policy_version 26410 (0.0004) +[2026-06-07 02:55:49,461][472559] Updated weights for policy 0, policy_version 26420 (0.0004) +[2026-06-07 02:55:49,579][472559] Updated weights for policy 0, policy_version 26430 (0.0004) +[2026-06-07 02:55:50,112][472559] Updated weights for policy 0, policy_version 26441 (0.0004) +[2026-06-07 02:55:50,220][472559] Updated weights for policy 0, policy_version 26451 (0.0004) +[2026-06-07 02:55:50,342][472559] Updated weights for policy 0, policy_version 26462 (0.0004) +[2026-06-07 02:55:50,461][472559] Updated weights for policy 0, policy_version 26472 (0.0004) +[2026-06-07 02:55:50,581][472559] Updated weights for policy 0, policy_version 26482 (0.0004) +[2026-06-07 02:55:50,706][472559] Updated weights for policy 0, policy_version 26493 (0.0004) +[2026-06-07 02:55:51,229][472559] Updated weights for policy 0, policy_version 26503 (0.0004) +[2026-06-07 02:55:51,344][472559] Updated weights for policy 0, policy_version 26513 (0.0004) +[2026-06-07 02:55:51,455][472559] Updated weights for policy 0, policy_version 26523 (0.0004) +[2026-06-07 02:55:51,572][472559] Updated weights for policy 0, policy_version 26533 (0.0004) +[2026-06-07 02:55:51,683][472559] Updated weights for policy 0, policy_version 26543 (0.0004) +[2026-06-07 02:55:51,798][472559] Updated weights for policy 0, policy_version 26553 (0.0004) +[2026-06-07 02:55:52,335][472559] Updated weights for policy 0, policy_version 26564 (0.0004) +[2026-06-07 02:55:52,442][472559] Updated weights for policy 0, policy_version 26574 (0.0004) +[2026-06-07 02:55:52,586][472559] Updated weights for policy 0, policy_version 26586 (0.0004) +[2026-06-07 02:55:52,714][472559] Updated weights for policy 0, policy_version 26597 (0.0004) +[2026-06-07 02:55:52,835][472559] Updated weights for policy 0, policy_version 26608 (0.0004) +[2026-06-07 02:55:52,969][472559] Updated weights for policy 0, policy_version 26619 (0.0004) +[2026-06-07 02:55:53,117][464927] Fps is (10 sec: 29491.2, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 13631488. Throughput: 0: 28310.8. Samples: 13625984. Policy #0 lag: (min: 63.0, avg: 74.7, max: 127.0) +[2026-06-07 02:55:53,118][464927] Avg episode reward: [(0, '835.754')] +[2026-06-07 02:55:53,491][472559] Updated weights for policy 0, policy_version 26629 (0.0007) +[2026-06-07 02:55:53,610][472559] Updated weights for policy 0, policy_version 26640 (0.0008) +[2026-06-07 02:55:53,720][472559] Updated weights for policy 0, policy_version 26650 (0.0008) +[2026-06-07 02:55:53,855][472559] Updated weights for policy 0, policy_version 26662 (0.0008) +[2026-06-07 02:55:53,971][472559] Updated weights for policy 0, policy_version 26672 (0.0008) +[2026-06-07 02:55:54,095][472559] Updated weights for policy 0, policy_version 26683 (0.0008) +[2026-06-07 02:55:54,640][472559] Updated weights for policy 0, policy_version 26693 (0.0007) +[2026-06-07 02:55:54,763][472559] Updated weights for policy 0, policy_version 26704 (0.0008) +[2026-06-07 02:55:54,871][472559] Updated weights for policy 0, policy_version 26714 (0.0008) +[2026-06-07 02:55:54,998][472559] Updated weights for policy 0, policy_version 26725 (0.0008) +[2026-06-07 02:55:55,131][472559] Updated weights for policy 0, policy_version 26737 (0.0008) +[2026-06-07 02:55:55,241][472559] Updated weights for policy 0, policy_version 26747 (0.0008) +[2026-06-07 02:55:55,828][472559] Updated weights for policy 0, policy_version 26758 (0.0008) +[2026-06-07 02:55:55,939][472559] Updated weights for policy 0, policy_version 26768 (0.0008) +[2026-06-07 02:55:56,075][472559] Updated weights for policy 0, policy_version 26780 (0.0009) +[2026-06-07 02:55:56,196][472559] Updated weights for policy 0, policy_version 26791 (0.0008) +[2026-06-07 02:55:56,308][472559] Updated weights for policy 0, policy_version 26801 (0.0008) +[2026-06-07 02:55:56,423][472559] Updated weights for policy 0, policy_version 26811 (0.0009) +[2026-06-07 02:55:56,967][472559] Updated weights for policy 0, policy_version 26821 (0.0008) +[2026-06-07 02:55:57,090][472559] Updated weights for policy 0, policy_version 26832 (0.0008) +[2026-06-07 02:55:57,213][472559] Updated weights for policy 0, policy_version 26843 (0.0009) +[2026-06-07 02:55:57,328][472559] Updated weights for policy 0, policy_version 26853 (0.0008) +[2026-06-07 02:55:57,447][472559] Updated weights for policy 0, policy_version 26864 (0.0008) +[2026-06-07 02:55:57,560][472559] Updated weights for policy 0, policy_version 26874 (0.0009) +[2026-06-07 02:55:58,117][464927] Fps is (10 sec: 29491.2, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 13762560. Throughput: 0: 28401.7. Samples: 13801984. Policy #0 lag: (min: 63.0, avg: 74.7, max: 127.0) +[2026-06-07 02:55:58,117][464927] Avg episode reward: [(0, '819.438')] +[2026-06-07 02:55:58,139][472559] Updated weights for policy 0, policy_version 26885 (0.0008) +[2026-06-07 02:55:58,277][472559] Updated weights for policy 0, policy_version 26897 (0.0008) +[2026-06-07 02:55:58,402][472559] Updated weights for policy 0, policy_version 26908 (0.0008) +[2026-06-07 02:55:58,529][472559] Updated weights for policy 0, policy_version 26919 (0.0009) +[2026-06-07 02:55:58,650][472559] Updated weights for policy 0, policy_version 26930 (0.0008) +[2026-06-07 02:55:58,769][472559] Updated weights for policy 0, policy_version 26940 (0.0008) +[2026-06-07 02:55:59,326][472559] Updated weights for policy 0, policy_version 26953 (0.0007) +[2026-06-07 02:55:59,445][472559] Updated weights for policy 0, policy_version 26964 (0.0007) +[2026-06-07 02:55:59,557][472559] Updated weights for policy 0, policy_version 26974 (0.0006) +[2026-06-07 02:55:59,666][472559] Updated weights for policy 0, policy_version 26984 (0.0009) +[2026-06-07 02:55:59,789][472559] Updated weights for policy 0, policy_version 26994 (0.0009) +[2026-06-07 02:55:59,900][472559] Updated weights for policy 0, policy_version 27004 (0.0010) +[2026-06-07 02:56:00,469][472559] Updated weights for policy 0, policy_version 27015 (0.0008) +[2026-06-07 02:56:00,594][472559] Updated weights for policy 0, policy_version 27025 (0.0008) +[2026-06-07 02:56:00,729][472559] Updated weights for policy 0, policy_version 27038 (0.0008) +[2026-06-07 02:56:00,853][472559] Updated weights for policy 0, policy_version 27049 (0.0009) +[2026-06-07 02:56:00,977][472559] Updated weights for policy 0, policy_version 27060 (0.0008) +[2026-06-07 02:56:01,106][472559] Updated weights for policy 0, policy_version 27072 (0.0008) +[2026-06-07 02:56:01,670][472559] Updated weights for policy 0, policy_version 27084 (0.0009) +[2026-06-07 02:56:01,790][472559] Updated weights for policy 0, policy_version 27095 (0.0008) +[2026-06-07 02:56:01,899][472559] Updated weights for policy 0, policy_version 27105 (0.0008) +[2026-06-07 02:56:02,014][472559] Updated weights for policy 0, policy_version 27115 (0.0009) +[2026-06-07 02:56:02,137][472559] Updated weights for policy 0, policy_version 27126 (0.0008) +[2026-06-07 02:56:02,246][472559] Updated weights for policy 0, policy_version 27136 (0.0007) +[2026-06-07 02:56:02,826][472559] Updated weights for policy 0, policy_version 27148 (0.0008) +[2026-06-07 02:56:02,962][472559] Updated weights for policy 0, policy_version 27160 (0.0008) +[2026-06-07 02:56:03,086][472559] Updated weights for policy 0, policy_version 27171 (0.0008) +[2026-06-07 02:56:03,117][464927] Fps is (10 sec: 26214.2, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 13893632. Throughput: 0: 28393.2. Samples: 13878272. Policy #0 lag: (min: 10.0, avg: 46.2, max: 74.0) +[2026-06-07 02:56:03,118][464927] Avg episode reward: [(0, '835.009')] +[2026-06-07 02:56:03,224][472559] Updated weights for policy 0, policy_version 27183 (0.0008) +[2026-06-07 02:56:03,341][472559] Updated weights for policy 0, policy_version 27193 (0.0008) +[2026-06-07 02:56:03,892][472559] Updated weights for policy 0, policy_version 27204 (0.0008) +[2026-06-07 02:56:04,022][472559] Updated weights for policy 0, policy_version 27216 (0.0009) +[2026-06-07 02:56:04,136][472559] Updated weights for policy 0, policy_version 27226 (0.0008) +[2026-06-07 02:56:04,271][472559] Updated weights for policy 0, policy_version 27238 (0.0009) +[2026-06-07 02:56:04,381][472559] Updated weights for policy 0, policy_version 27248 (0.0008) +[2026-06-07 02:56:04,501][472559] Updated weights for policy 0, policy_version 27258 (0.0008) +[2026-06-07 02:56:05,065][472559] Updated weights for policy 0, policy_version 27269 (0.0007) +[2026-06-07 02:56:05,176][472559] Updated weights for policy 0, policy_version 27279 (0.0008) +[2026-06-07 02:56:05,300][472559] Updated weights for policy 0, policy_version 27290 (0.0008) +[2026-06-07 02:56:05,424][472559] Updated weights for policy 0, policy_version 27301 (0.0008) +[2026-06-07 02:56:05,558][472559] Updated weights for policy 0, policy_version 27313 (0.0009) +[2026-06-07 02:56:05,681][472559] Updated weights for policy 0, policy_version 27324 (0.0008) +[2026-06-07 02:56:06,242][472559] Updated weights for policy 0, policy_version 27336 (0.0008) +[2026-06-07 02:56:06,381][472559] Updated weights for policy 0, policy_version 27348 (0.0008) +[2026-06-07 02:56:06,493][472559] Updated weights for policy 0, policy_version 27358 (0.0008) +[2026-06-07 02:56:06,615][472559] Updated weights for policy 0, policy_version 27369 (0.0008) +[2026-06-07 02:56:06,732][472559] Updated weights for policy 0, policy_version 27379 (0.0008) +[2026-06-07 02:56:06,859][472559] Updated weights for policy 0, policy_version 27390 (0.0009) +[2026-06-07 02:56:07,442][472559] Updated weights for policy 0, policy_version 27403 (0.0008) +[2026-06-07 02:56:07,575][472559] Updated weights for policy 0, policy_version 27415 (0.0008) +[2026-06-07 02:56:07,703][472559] Updated weights for policy 0, policy_version 27426 (0.0009) +[2026-06-07 02:56:07,849][472559] Updated weights for policy 0, policy_version 27438 (0.0009) +[2026-06-07 02:56:07,959][472559] Updated weights for policy 0, policy_version 27448 (0.0008) +[2026-06-07 02:56:08,117][464927] Fps is (10 sec: 29490.6, 60 sec: 28398.8, 300 sec: 28324.9). Total num frames: 14057472. Throughput: 0: 28350.4. Samples: 14050816. Policy #0 lag: (min: 10.0, avg: 46.2, max: 74.0) +[2026-06-07 02:56:08,118][464927] Avg episode reward: [(0, '861.785')] +[2026-06-07 02:56:08,123][472025] Saving new best policy, reward=861.785! +[2026-06-07 02:56:08,499][472559] Updated weights for policy 0, policy_version 27458 (0.0008) +[2026-06-07 02:56:08,610][472559] Updated weights for policy 0, policy_version 27468 (0.0008) +[2026-06-07 02:56:08,742][472559] Updated weights for policy 0, policy_version 27480 (0.0008) +[2026-06-07 02:56:08,853][472559] Updated weights for policy 0, policy_version 27490 (0.0008) +[2026-06-07 02:56:09,012][472559] Updated weights for policy 0, policy_version 27504 (0.0008) +[2026-06-07 02:56:09,135][472559] Updated weights for policy 0, policy_version 27515 (0.0009) +[2026-06-07 02:56:09,700][472559] Updated weights for policy 0, policy_version 27525 (0.0008) +[2026-06-07 02:56:09,819][472559] Updated weights for policy 0, policy_version 27536 (0.0008) +[2026-06-07 02:56:09,942][472559] Updated weights for policy 0, policy_version 27547 (0.0008) +[2026-06-07 02:56:10,074][472559] Updated weights for policy 0, policy_version 27559 (0.0008) +[2026-06-07 02:56:10,188][472559] Updated weights for policy 0, policy_version 27569 (0.0009) +[2026-06-07 02:56:10,303][472559] Updated weights for policy 0, policy_version 27579 (0.0008) +[2026-06-07 02:56:10,862][472559] Updated weights for policy 0, policy_version 27590 (0.0008) +[2026-06-07 02:56:10,983][472559] Updated weights for policy 0, policy_version 27601 (0.0008) +[2026-06-07 02:56:11,108][472559] Updated weights for policy 0, policy_version 27612 (0.0008) +[2026-06-07 02:56:11,217][472559] Updated weights for policy 0, policy_version 27622 (0.0008) +[2026-06-07 02:56:11,346][472559] Updated weights for policy 0, policy_version 27633 (0.0008) +[2026-06-07 02:56:11,479][472559] Updated weights for policy 0, policy_version 27645 (0.0008) +[2026-06-07 02:56:12,046][472559] Updated weights for policy 0, policy_version 27657 (0.0008) +[2026-06-07 02:56:12,158][472559] Updated weights for policy 0, policy_version 27667 (0.0008) +[2026-06-07 02:56:12,270][472559] Updated weights for policy 0, policy_version 27677 (0.0008) +[2026-06-07 02:56:12,380][472559] Updated weights for policy 0, policy_version 27687 (0.0008) +[2026-06-07 02:56:12,514][472559] Updated weights for policy 0, policy_version 27699 (0.0008) +[2026-06-07 02:56:12,643][472559] Updated weights for policy 0, policy_version 27710 (0.0008) +[2026-06-07 02:56:13,117][464927] Fps is (10 sec: 29491.3, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 14188544. Throughput: 0: 28387.5. Samples: 14225664. Policy #0 lag: (min: 10.0, avg: 46.2, max: 74.0) +[2026-06-07 02:56:13,117][464927] Avg episode reward: [(0, '901.107')] +[2026-06-07 02:56:13,219][472559] Updated weights for policy 0, policy_version 27722 (0.0007) +[2026-06-07 02:56:13,339][472559] Updated weights for policy 0, policy_version 27733 (0.0008) +[2026-06-07 02:56:13,458][472559] Updated weights for policy 0, policy_version 27744 (0.0008) +[2026-06-07 02:56:13,576][472559] Updated weights for policy 0, policy_version 27754 (0.0008) +[2026-06-07 02:56:13,691][472559] Updated weights for policy 0, policy_version 27764 (0.0008) +[2026-06-07 02:56:13,812][472559] Updated weights for policy 0, policy_version 27775 (0.0008) +[2026-06-07 02:56:13,819][472025] Saving new best policy, reward=901.107! +[2026-06-07 02:56:14,367][472559] Updated weights for policy 0, policy_version 27786 (0.0007) +[2026-06-07 02:56:14,482][472559] Updated weights for policy 0, policy_version 27796 (0.0008) +[2026-06-07 02:56:14,596][472559] Updated weights for policy 0, policy_version 27806 (0.0008) +[2026-06-07 02:56:14,729][472559] Updated weights for policy 0, policy_version 27818 (0.0008) +[2026-06-07 02:56:14,855][472559] Updated weights for policy 0, policy_version 27829 (0.0008) +[2026-06-07 02:56:15,423][472559] Updated weights for policy 0, policy_version 27841 (0.0008) +[2026-06-07 02:56:15,536][472559] Updated weights for policy 0, policy_version 27851 (0.0008) +[2026-06-07 02:56:15,657][472559] Updated weights for policy 0, policy_version 27862 (0.0008) +[2026-06-07 02:56:15,790][472559] Updated weights for policy 0, policy_version 27874 (0.0008) +[2026-06-07 02:56:15,912][472559] Updated weights for policy 0, policy_version 27885 (0.0008) +[2026-06-07 02:56:16,029][472559] Updated weights for policy 0, policy_version 27895 (0.0008) +[2026-06-07 02:56:16,566][472559] Updated weights for policy 0, policy_version 27905 (0.0008) +[2026-06-07 02:56:16,676][472559] Updated weights for policy 0, policy_version 27915 (0.0008) +[2026-06-07 02:56:16,780][472559] Updated weights for policy 0, policy_version 27925 (0.0008) +[2026-06-07 02:56:16,897][472559] Updated weights for policy 0, policy_version 27935 (0.0010) +[2026-06-07 02:56:17,010][472559] Updated weights for policy 0, policy_version 27945 (0.0008) +[2026-06-07 02:56:17,139][472559] Updated weights for policy 0, policy_version 27956 (0.0008) +[2026-06-07 02:56:17,271][472559] Updated weights for policy 0, policy_version 27968 (0.0008) +[2026-06-07 02:56:17,844][472559] Updated weights for policy 0, policy_version 27979 (0.0008) +[2026-06-07 02:56:17,963][472559] Updated weights for policy 0, policy_version 27989 (0.0008) +[2026-06-07 02:56:18,090][472559] Updated weights for policy 0, policy_version 28000 (0.0009) +[2026-06-07 02:56:18,117][464927] Fps is (10 sec: 26214.9, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 14319616. Throughput: 0: 28407.4. Samples: 14303104. Policy #0 lag: (min: 10.0, avg: 46.2, max: 74.0) +[2026-06-07 02:56:18,118][464927] Avg episode reward: [(0, '955.694')] +[2026-06-07 02:56:18,208][472559] Updated weights for policy 0, policy_version 28010 (0.0008) +[2026-06-07 02:56:18,317][472559] Updated weights for policy 0, policy_version 28020 (0.0008) +[2026-06-07 02:56:18,444][472559] Updated weights for policy 0, policy_version 28031 (0.0009) +[2026-06-07 02:56:18,448][472025] Saving new best policy, reward=955.694! +[2026-06-07 02:56:19,000][472559] Updated weights for policy 0, policy_version 28043 (0.0006) +[2026-06-07 02:56:19,123][472559] Updated weights for policy 0, policy_version 28054 (0.0009) +[2026-06-07 02:56:19,238][472559] Updated weights for policy 0, policy_version 28064 (0.0008) +[2026-06-07 02:56:19,365][472559] Updated weights for policy 0, policy_version 28075 (0.0009) +[2026-06-07 02:56:19,482][472559] Updated weights for policy 0, policy_version 28085 (0.0009) +[2026-06-07 02:56:19,601][472559] Updated weights for policy 0, policy_version 28095 (0.0008) +[2026-06-07 02:56:20,162][472559] Updated weights for policy 0, policy_version 28107 (0.0008) +[2026-06-07 02:56:20,274][472559] Updated weights for policy 0, policy_version 28117 (0.0008) +[2026-06-07 02:56:20,388][472559] Updated weights for policy 0, policy_version 28127 (0.0008) +[2026-06-07 02:56:20,506][472559] Updated weights for policy 0, policy_version 28137 (0.0008) +[2026-06-07 02:56:20,630][472559] Updated weights for policy 0, policy_version 28148 (0.0008) +[2026-06-07 02:56:20,745][472559] Updated weights for policy 0, policy_version 28158 (0.0009) +[2026-06-07 02:56:21,282][472559] Updated weights for policy 0, policy_version 28168 (0.0007) +[2026-06-07 02:56:21,392][472559] Updated weights for policy 0, policy_version 28178 (0.0008) +[2026-06-07 02:56:21,515][472559] Updated weights for policy 0, policy_version 28189 (0.0008) +[2026-06-07 02:56:21,629][472559] Updated weights for policy 0, policy_version 28199 (0.0008) +[2026-06-07 02:56:21,758][472559] Updated weights for policy 0, policy_version 28210 (0.0008) +[2026-06-07 02:56:21,886][472559] Updated weights for policy 0, policy_version 28221 (0.0008) +[2026-06-07 02:56:22,448][472559] Updated weights for policy 0, policy_version 28232 (0.0008) +[2026-06-07 02:56:22,562][472559] Updated weights for policy 0, policy_version 28242 (0.0008) +[2026-06-07 02:56:22,679][472559] Updated weights for policy 0, policy_version 28253 (0.0008) +[2026-06-07 02:56:22,806][472559] Updated weights for policy 0, policy_version 28263 (0.0008) +[2026-06-07 02:56:22,914][472559] Updated weights for policy 0, policy_version 28273 (0.0008) +[2026-06-07 02:56:23,031][472559] Updated weights for policy 0, policy_version 28283 (0.0008) +[2026-06-07 02:56:23,117][464927] Fps is (10 sec: 29491.4, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 14483456. Throughput: 0: 28336.4. Samples: 14474368. Policy #0 lag: (min: 63.0, avg: 75.8, max: 127.0) +[2026-06-07 02:56:23,117][464927] Avg episode reward: [(0, '938.323')] +[2026-06-07 02:56:23,575][472559] Updated weights for policy 0, policy_version 28293 (0.0008) +[2026-06-07 02:56:23,688][472559] Updated weights for policy 0, policy_version 28303 (0.0008) +[2026-06-07 02:56:23,798][472559] Updated weights for policy 0, policy_version 28313 (0.0008) +[2026-06-07 02:56:23,914][472559] Updated weights for policy 0, policy_version 28323 (0.0008) +[2026-06-07 02:56:24,021][472559] Updated weights for policy 0, policy_version 28333 (0.0008) +[2026-06-07 02:56:24,139][472559] Updated weights for policy 0, policy_version 28343 (0.0008) +[2026-06-07 02:56:24,692][472559] Updated weights for policy 0, policy_version 28353 (0.0008) +[2026-06-07 02:56:24,806][472559] Updated weights for policy 0, policy_version 28363 (0.0008) +[2026-06-07 02:56:24,937][472559] Updated weights for policy 0, policy_version 28375 (0.0008) +[2026-06-07 02:56:25,070][472559] Updated weights for policy 0, policy_version 28387 (0.0008) +[2026-06-07 02:56:25,200][472559] Updated weights for policy 0, policy_version 28399 (0.0008) +[2026-06-07 02:56:25,318][472559] Updated weights for policy 0, policy_version 28409 (0.0009) +[2026-06-07 02:56:25,882][472559] Updated weights for policy 0, policy_version 28420 (0.0008) +[2026-06-07 02:56:26,000][472559] Updated weights for policy 0, policy_version 28431 (0.0008) +[2026-06-07 02:56:26,118][472559] Updated weights for policy 0, policy_version 28441 (0.0009) +[2026-06-07 02:56:26,234][472559] Updated weights for policy 0, policy_version 28451 (0.0008) +[2026-06-07 02:56:26,345][472559] Updated weights for policy 0, policy_version 28461 (0.0008) +[2026-06-07 02:56:26,481][472559] Updated weights for policy 0, policy_version 28473 (0.0008) +[2026-06-07 02:56:27,041][472559] Updated weights for policy 0, policy_version 28484 (0.0008) +[2026-06-07 02:56:27,175][472559] Updated weights for policy 0, policy_version 28496 (0.0008) +[2026-06-07 02:56:27,317][472559] Updated weights for policy 0, policy_version 28508 (0.0008) +[2026-06-07 02:56:27,428][472559] Updated weights for policy 0, policy_version 28518 (0.0008) +[2026-06-07 02:56:27,558][472559] Updated weights for policy 0, policy_version 28529 (0.0008) +[2026-06-07 02:56:27,671][472559] Updated weights for policy 0, policy_version 28539 (0.0008) +[2026-06-07 02:56:28,117][464927] Fps is (10 sec: 29491.1, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 14614528. Throughput: 0: 28316.4. Samples: 14648704. Policy #0 lag: (min: 63.0, avg: 75.8, max: 127.0) +[2026-06-07 02:56:28,118][464927] Avg episode reward: [(0, '927.837')] +[2026-06-07 02:56:28,206][472559] Updated weights for policy 0, policy_version 28550 (0.0008) +[2026-06-07 02:56:28,328][472559] Updated weights for policy 0, policy_version 28561 (0.0008) +[2026-06-07 02:56:28,455][472559] Updated weights for policy 0, policy_version 28572 (0.0008) +[2026-06-07 02:56:28,570][472559] Updated weights for policy 0, policy_version 28582 (0.0008) +[2026-06-07 02:56:28,684][472559] Updated weights for policy 0, policy_version 28592 (0.0008) +[2026-06-07 02:56:28,810][472559] Updated weights for policy 0, policy_version 28603 (0.0008) +[2026-06-07 02:56:29,365][472559] Updated weights for policy 0, policy_version 28613 (0.0008) +[2026-06-07 02:56:29,482][472559] Updated weights for policy 0, policy_version 28623 (0.0008) +[2026-06-07 02:56:29,594][472559] Updated weights for policy 0, policy_version 28633 (0.0008) +[2026-06-07 02:56:29,727][472559] Updated weights for policy 0, policy_version 28645 (0.0008) +[2026-06-07 02:56:29,852][472559] Updated weights for policy 0, policy_version 28656 (0.0008) +[2026-06-07 02:56:29,989][472559] Updated weights for policy 0, policy_version 28668 (0.0008) +[2026-06-07 02:56:30,545][472559] Updated weights for policy 0, policy_version 28678 (0.0008) +[2026-06-07 02:56:30,679][472559] Updated weights for policy 0, policy_version 28690 (0.0008) +[2026-06-07 02:56:30,796][472559] Updated weights for policy 0, policy_version 28701 (0.0008) +[2026-06-07 02:56:30,921][472559] Updated weights for policy 0, policy_version 28712 (0.0008) +[2026-06-07 02:56:31,050][472559] Updated weights for policy 0, policy_version 28723 (0.0008) +[2026-06-07 02:56:31,174][472559] Updated weights for policy 0, policy_version 28734 (0.0008) +[2026-06-07 02:56:31,734][472559] Updated weights for policy 0, policy_version 28745 (0.0008) +[2026-06-07 02:56:31,849][472559] Updated weights for policy 0, policy_version 28755 (0.0008) +[2026-06-07 02:56:31,968][472559] Updated weights for policy 0, policy_version 28765 (0.0009) +[2026-06-07 02:56:32,104][472559] Updated weights for policy 0, policy_version 28777 (0.0008) +[2026-06-07 02:56:32,223][472559] Updated weights for policy 0, policy_version 28787 (0.0008) +[2026-06-07 02:56:32,345][472559] Updated weights for policy 0, policy_version 28798 (0.0008) +[2026-06-07 02:56:32,895][472559] Updated weights for policy 0, policy_version 28809 (0.0008) +[2026-06-07 02:56:33,014][472559] Updated weights for policy 0, policy_version 28819 (0.0008) +[2026-06-07 02:56:33,117][464927] Fps is (10 sec: 26214.3, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 14745600. Throughput: 0: 28356.3. Samples: 14727552. Policy #0 lag: (min: 63.0, avg: 75.8, max: 127.0) +[2026-06-07 02:56:33,118][464927] Avg episode reward: [(0, '887.060')] +[2026-06-07 02:56:33,138][472559] Updated weights for policy 0, policy_version 28830 (0.0008) +[2026-06-07 02:56:33,261][472559] Updated weights for policy 0, policy_version 28841 (0.0008) +[2026-06-07 02:56:33,373][472559] Updated weights for policy 0, policy_version 28851 (0.0008) +[2026-06-07 02:56:33,496][472559] Updated weights for policy 0, policy_version 28862 (0.0008) +[2026-06-07 02:56:34,038][472559] Updated weights for policy 0, policy_version 28872 (0.0009) +[2026-06-07 02:56:34,161][472559] Updated weights for policy 0, policy_version 28883 (0.0008) +[2026-06-07 02:56:34,270][472559] Updated weights for policy 0, policy_version 28893 (0.0008) +[2026-06-07 02:56:34,388][472559] Updated weights for policy 0, policy_version 28903 (0.0008) +[2026-06-07 02:56:34,513][472559] Updated weights for policy 0, policy_version 28914 (0.0008) +[2026-06-07 02:56:34,630][472559] Updated weights for policy 0, policy_version 28924 (0.0008) +[2026-06-07 02:56:35,167][472559] Updated weights for policy 0, policy_version 28934 (0.0008) +[2026-06-07 02:56:35,287][472559] Updated weights for policy 0, policy_version 28945 (0.0008) +[2026-06-07 02:56:35,407][472559] Updated weights for policy 0, policy_version 28955 (0.0008) +[2026-06-07 02:56:35,528][472559] Updated weights for policy 0, policy_version 28966 (0.0008) +[2026-06-07 02:56:35,691][472559] Updated weights for policy 0, policy_version 28980 (0.0008) +[2026-06-07 02:56:35,800][472559] Updated weights for policy 0, policy_version 28990 (0.0008) +[2026-06-07 02:56:36,338][472559] Updated weights for policy 0, policy_version 29000 (0.0007) +[2026-06-07 02:56:36,459][472559] Updated weights for policy 0, policy_version 29011 (0.0008) +[2026-06-07 02:56:36,571][472559] Updated weights for policy 0, policy_version 29021 (0.0009) +[2026-06-07 02:56:36,690][472559] Updated weights for policy 0, policy_version 29031 (0.0008) +[2026-06-07 02:56:36,803][472559] Updated weights for policy 0, policy_version 29041 (0.0008) +[2026-06-07 02:56:36,919][472559] Updated weights for policy 0, policy_version 29051 (0.0008) +[2026-06-07 02:56:37,466][472559] Updated weights for policy 0, policy_version 29061 (0.0008) +[2026-06-07 02:56:37,576][472559] Updated weights for policy 0, policy_version 29071 (0.0008) +[2026-06-07 02:56:37,684][472559] Updated weights for policy 0, policy_version 29081 (0.0008) +[2026-06-07 02:56:37,798][472559] Updated weights for policy 0, policy_version 29091 (0.0008) +[2026-06-07 02:56:37,925][472559] Updated weights for policy 0, policy_version 29102 (0.0008) +[2026-06-07 02:56:38,046][472559] Updated weights for policy 0, policy_version 29113 (0.0008) +[2026-06-07 02:56:38,117][464927] Fps is (10 sec: 26214.5, 60 sec: 27852.8, 300 sec: 28213.8). Total num frames: 14876672. Throughput: 0: 28222.6. Samples: 14896000. Policy #0 lag: (min: 63.0, avg: 75.8, max: 127.0) +[2026-06-07 02:56:38,118][464927] Avg episode reward: [(0, '908.680')] +[2026-06-07 02:56:38,617][472559] Updated weights for policy 0, policy_version 29124 (0.0008) +[2026-06-07 02:56:38,740][472559] Updated weights for policy 0, policy_version 29135 (0.0008) +[2026-06-07 02:56:38,852][472559] Updated weights for policy 0, policy_version 29145 (0.0008) +[2026-06-07 02:56:38,962][472559] Updated weights for policy 0, policy_version 29155 (0.0009) +[2026-06-07 02:56:39,088][472559] Updated weights for policy 0, policy_version 29166 (0.0008) +[2026-06-07 02:56:39,208][472559] Updated weights for policy 0, policy_version 29176 (0.0008) +[2026-06-07 02:56:39,741][472559] Updated weights for policy 0, policy_version 29186 (0.0008) +[2026-06-07 02:56:39,858][472559] Updated weights for policy 0, policy_version 29196 (0.0007) +[2026-06-07 02:56:39,970][472559] Updated weights for policy 0, policy_version 29206 (0.0008) +[2026-06-07 02:56:40,090][472559] Updated weights for policy 0, policy_version 29217 (0.0008) +[2026-06-07 02:56:40,207][472559] Updated weights for policy 0, policy_version 29227 (0.0008) +[2026-06-07 02:56:40,314][472559] Updated weights for policy 0, policy_version 29237 (0.0008) +[2026-06-07 02:56:40,432][472559] Updated weights for policy 0, policy_version 29247 (0.0008) +[2026-06-07 02:56:40,981][472559] Updated weights for policy 0, policy_version 29258 (0.0008) +[2026-06-07 02:56:41,094][472559] Updated weights for policy 0, policy_version 29268 (0.0008) +[2026-06-07 02:56:41,219][472559] Updated weights for policy 0, policy_version 29279 (0.0008) +[2026-06-07 02:56:41,344][472559] Updated weights for policy 0, policy_version 29290 (0.0009) +[2026-06-07 02:56:41,462][472559] Updated weights for policy 0, policy_version 29300 (0.0008) +[2026-06-07 02:56:41,593][472559] Updated weights for policy 0, policy_version 29312 (0.0008) +[2026-06-07 02:56:42,149][472559] Updated weights for policy 0, policy_version 29322 (0.0008) +[2026-06-07 02:56:42,278][472559] Updated weights for policy 0, policy_version 29333 (0.0008) +[2026-06-07 02:56:42,399][472559] Updated weights for policy 0, policy_version 29344 (0.0009) +[2026-06-07 02:56:42,509][472559] Updated weights for policy 0, policy_version 29354 (0.0008) +[2026-06-07 02:56:42,628][472559] Updated weights for policy 0, policy_version 29364 (0.0008) +[2026-06-07 02:56:42,751][472559] Updated weights for policy 0, policy_version 29375 (0.0008) +[2026-06-07 02:56:43,117][464927] Fps is (10 sec: 29491.3, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 15040512. Throughput: 0: 28202.7. Samples: 15071104. Policy #0 lag: (min: 63.0, avg: 74.9, max: 127.0) +[2026-06-07 02:56:43,118][464927] Avg episode reward: [(0, '928.967')] +[2026-06-07 02:56:43,317][472559] Updated weights for policy 0, policy_version 29386 (0.0008) +[2026-06-07 02:56:43,444][472559] Updated weights for policy 0, policy_version 29397 (0.0008) +[2026-06-07 02:56:43,557][472559] Updated weights for policy 0, policy_version 29407 (0.0008) +[2026-06-07 02:56:43,684][472559] Updated weights for policy 0, policy_version 29418 (0.0009) +[2026-06-07 02:56:43,793][472559] Updated weights for policy 0, policy_version 29428 (0.0008) +[2026-06-07 02:56:43,905][472559] Updated weights for policy 0, policy_version 29438 (0.0008) +[2026-06-07 02:56:44,442][472559] Updated weights for policy 0, policy_version 29448 (0.0008) +[2026-06-07 02:56:44,568][472559] Updated weights for policy 0, policy_version 29459 (0.0009) +[2026-06-07 02:56:44,677][472559] Updated weights for policy 0, policy_version 29469 (0.0008) +[2026-06-07 02:56:44,784][472559] Updated weights for policy 0, policy_version 29479 (0.0008) +[2026-06-07 02:56:44,904][472559] Updated weights for policy 0, policy_version 29489 (0.0008) +[2026-06-07 02:56:45,018][472559] Updated weights for policy 0, policy_version 29499 (0.0008) +[2026-06-07 02:56:45,575][472559] Updated weights for policy 0, policy_version 29510 (0.0008) +[2026-06-07 02:56:45,683][472559] Updated weights for policy 0, policy_version 29520 (0.0008) +[2026-06-07 02:56:45,821][472559] Updated weights for policy 0, policy_version 29532 (0.0008) +[2026-06-07 02:56:45,930][472559] Updated weights for policy 0, policy_version 29542 (0.0009) +[2026-06-07 02:56:46,046][472559] Updated weights for policy 0, policy_version 29552 (0.0008) +[2026-06-07 02:56:46,173][472559] Updated weights for policy 0, policy_version 29563 (0.0008) +[2026-06-07 02:56:46,727][472559] Updated weights for policy 0, policy_version 29573 (0.0008) +[2026-06-07 02:56:46,852][472559] Updated weights for policy 0, policy_version 29584 (0.0008) +[2026-06-07 02:56:46,966][472559] Updated weights for policy 0, policy_version 29594 (0.0009) +[2026-06-07 02:56:47,082][472559] Updated weights for policy 0, policy_version 29604 (0.0008) +[2026-06-07 02:56:47,195][472559] Updated weights for policy 0, policy_version 29614 (0.0008) +[2026-06-07 02:56:47,308][472559] Updated weights for policy 0, policy_version 29624 (0.0008) +[2026-06-07 02:56:47,848][472559] Updated weights for policy 0, policy_version 29634 (0.0008) +[2026-06-07 02:56:47,989][472559] Updated weights for policy 0, policy_version 29646 (0.0009) +[2026-06-07 02:56:48,112][472559] Updated weights for policy 0, policy_version 29657 (0.0009) +[2026-06-07 02:56:48,117][464927] Fps is (10 sec: 29491.1, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 15171584. Throughput: 0: 28296.5. Samples: 15151616. Policy #0 lag: (min: 63.0, avg: 74.9, max: 127.0) +[2026-06-07 02:56:48,118][464927] Avg episode reward: [(0, '918.528')] +[2026-06-07 02:56:48,249][472559] Updated weights for policy 0, policy_version 29669 (0.0009) +[2026-06-07 02:56:48,371][472559] Updated weights for policy 0, policy_version 29680 (0.0008) +[2026-06-07 02:56:48,492][472559] Updated weights for policy 0, policy_version 29690 (0.0008) +[2026-06-07 02:56:49,031][472559] Updated weights for policy 0, policy_version 29700 (0.0008) +[2026-06-07 02:56:49,169][472559] Updated weights for policy 0, policy_version 29713 (0.0008) +[2026-06-07 02:56:49,288][472559] Updated weights for policy 0, policy_version 29723 (0.0009) +[2026-06-07 02:56:49,397][472559] Updated weights for policy 0, policy_version 29733 (0.0008) +[2026-06-07 02:56:49,518][472559] Updated weights for policy 0, policy_version 29743 (0.0007) +[2026-06-07 02:56:49,641][472559] Updated weights for policy 0, policy_version 29754 (0.0007) +[2026-06-07 02:56:50,203][472559] Updated weights for policy 0, policy_version 29765 (0.0009) +[2026-06-07 02:56:50,315][472559] Updated weights for policy 0, policy_version 29775 (0.0008) +[2026-06-07 02:56:50,429][472559] Updated weights for policy 0, policy_version 29785 (0.0008) +[2026-06-07 02:56:50,553][472559] Updated weights for policy 0, policy_version 29796 (0.0008) +[2026-06-07 02:56:50,679][472559] Updated weights for policy 0, policy_version 29807 (0.0008) +[2026-06-07 02:56:50,797][472559] Updated weights for policy 0, policy_version 29817 (0.0008) +[2026-06-07 02:56:51,340][472559] Updated weights for policy 0, policy_version 29827 (0.0008) +[2026-06-07 02:56:51,465][472559] Updated weights for policy 0, policy_version 29838 (0.0008) +[2026-06-07 02:56:51,586][472559] Updated weights for policy 0, policy_version 29849 (0.0009) +[2026-06-07 02:56:51,715][472559] Updated weights for policy 0, policy_version 29860 (0.0008) +[2026-06-07 02:56:51,831][472559] Updated weights for policy 0, policy_version 29870 (0.0008) +[2026-06-07 02:56:51,959][472559] Updated weights for policy 0, policy_version 29882 (0.0008) +[2026-06-07 02:56:52,542][472559] Updated weights for policy 0, policy_version 29894 (0.0008) +[2026-06-07 02:56:52,677][472559] Updated weights for policy 0, policy_version 29906 (0.0008) +[2026-06-07 02:56:52,809][472559] Updated weights for policy 0, policy_version 29918 (0.0008) +[2026-06-07 02:56:52,922][472559] Updated weights for policy 0, policy_version 29928 (0.0008) +[2026-06-07 02:56:53,041][472559] Updated weights for policy 0, policy_version 29938 (0.0008) +[2026-06-07 02:56:53,116][464927] Fps is (10 sec: 26214.4, 60 sec: 27852.8, 300 sec: 28213.8). Total num frames: 15302656. Throughput: 0: 28194.3. Samples: 15319552. Policy #0 lag: (min: 63.0, avg: 74.9, max: 127.0) +[2026-06-07 02:56:53,117][464927] Avg episode reward: [(0, '918.157')] +[2026-06-07 02:56:53,162][472559] Updated weights for policy 0, policy_version 29949 (0.0008) +[2026-06-07 02:56:53,731][472559] Updated weights for policy 0, policy_version 29960 (0.0008) +[2026-06-07 02:56:53,871][472559] Updated weights for policy 0, policy_version 29972 (0.0009) +[2026-06-07 02:56:53,996][472559] Updated weights for policy 0, policy_version 29983 (0.0009) +[2026-06-07 02:56:54,123][472559] Updated weights for policy 0, policy_version 29994 (0.0008) +[2026-06-07 02:56:54,249][472559] Updated weights for policy 0, policy_version 30005 (0.0009) +[2026-06-07 02:56:54,366][472559] Updated weights for policy 0, policy_version 30016 (0.0009) +[2026-06-07 02:56:54,919][472559] Updated weights for policy 0, policy_version 30027 (0.0008) +[2026-06-07 02:56:55,034][472559] Updated weights for policy 0, policy_version 30038 (0.0008) +[2026-06-07 02:56:55,155][472559] Updated weights for policy 0, policy_version 30048 (0.0008) +[2026-06-07 02:56:55,266][472559] Updated weights for policy 0, policy_version 30058 (0.0009) +[2026-06-07 02:56:55,404][472559] Updated weights for policy 0, policy_version 30070 (0.0009) +[2026-06-07 02:56:55,515][472559] Updated weights for policy 0, policy_version 30080 (0.0009) +[2026-06-07 02:56:56,080][472559] Updated weights for policy 0, policy_version 30091 (0.0004) +[2026-06-07 02:56:56,215][472559] Updated weights for policy 0, policy_version 30103 (0.0004) +[2026-06-07 02:56:56,354][472559] Updated weights for policy 0, policy_version 30115 (0.0004) +[2026-06-07 02:56:56,467][472559] Updated weights for policy 0, policy_version 30125 (0.0004) +[2026-06-07 02:56:56,588][472559] Updated weights for policy 0, policy_version 30136 (0.0004) +[2026-06-07 02:56:57,130][472559] Updated weights for policy 0, policy_version 30147 (0.0004) +[2026-06-07 02:56:57,241][472559] Updated weights for policy 0, policy_version 30157 (0.0004) +[2026-06-07 02:56:57,357][472559] Updated weights for policy 0, policy_version 30167 (0.0004) +[2026-06-07 02:56:57,481][472559] Updated weights for policy 0, policy_version 30178 (0.0004) +[2026-06-07 02:56:57,594][472559] Updated weights for policy 0, policy_version 30188 (0.0004) +[2026-06-07 02:56:57,703][472559] Updated weights for policy 0, policy_version 30198 (0.0004) +[2026-06-07 02:56:58,117][464927] Fps is (10 sec: 29490.8, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 15466496. Throughput: 0: 28242.4. Samples: 15496576. Policy #0 lag: (min: 63.0, avg: 74.9, max: 127.0) +[2026-06-07 02:56:58,118][464927] Avg episode reward: [(0, '931.760')] +[2026-06-07 02:56:58,125][472025] Saving results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs1_seed11/checkpoint_p0/checkpoint_000030208_15466496.pth... +[2026-06-07 02:56:58,248][472559] Updated weights for policy 0, policy_version 30209 (0.0004) +[2026-06-07 02:56:58,357][472559] Updated weights for policy 0, policy_version 30219 (0.0004) +[2026-06-07 02:56:58,483][472559] Updated weights for policy 0, policy_version 30230 (0.0004) +[2026-06-07 02:56:58,595][472559] Updated weights for policy 0, policy_version 30240 (0.0004) +[2026-06-07 02:56:58,715][472559] Updated weights for policy 0, policy_version 30251 (0.0006) +[2026-06-07 02:56:58,823][472559] Updated weights for policy 0, policy_version 30261 (0.0009) +[2026-06-07 02:56:59,394][472559] Updated weights for policy 0, policy_version 30273 (0.0007) +[2026-06-07 02:56:59,515][472559] Updated weights for policy 0, policy_version 30284 (0.0008) +[2026-06-07 02:56:59,639][472559] Updated weights for policy 0, policy_version 30295 (0.0008) +[2026-06-07 02:56:59,766][472559] Updated weights for policy 0, policy_version 30306 (0.0009) +[2026-06-07 02:56:59,900][472559] Updated weights for policy 0, policy_version 30318 (0.0009) +[2026-06-07 02:57:00,025][472559] Updated weights for policy 0, policy_version 30329 (0.0008) +[2026-06-07 02:57:00,576][472559] Updated weights for policy 0, policy_version 30340 (0.0008) +[2026-06-07 02:57:00,679][472559] Updated weights for policy 0, policy_version 30350 (0.0008) +[2026-06-07 02:57:00,797][472559] Updated weights for policy 0, policy_version 30360 (0.0008) +[2026-06-07 02:57:00,918][472559] Updated weights for policy 0, policy_version 30371 (0.0008) +[2026-06-07 02:57:01,036][472559] Updated weights for policy 0, policy_version 30381 (0.0008) +[2026-06-07 02:57:01,162][472559] Updated weights for policy 0, policy_version 30392 (0.0008) +[2026-06-07 02:57:01,714][472559] Updated weights for policy 0, policy_version 30402 (0.0008) +[2026-06-07 02:57:01,818][472559] Updated weights for policy 0, policy_version 30412 (0.0008) +[2026-06-07 02:57:01,945][472559] Updated weights for policy 0, policy_version 30423 (0.0008) +[2026-06-07 02:57:02,063][472559] Updated weights for policy 0, policy_version 30433 (0.0008) +[2026-06-07 02:57:02,187][472559] Updated weights for policy 0, policy_version 30444 (0.0008) +[2026-06-07 02:57:02,312][472559] Updated weights for policy 0, policy_version 30455 (0.0008) +[2026-06-07 02:57:02,860][472559] Updated weights for policy 0, policy_version 30465 (0.0009) +[2026-06-07 02:57:02,965][472559] Updated weights for policy 0, policy_version 30475 (0.0008) +[2026-06-07 02:57:03,082][472559] Updated weights for policy 0, policy_version 30485 (0.0008) +[2026-06-07 02:57:03,117][464927] Fps is (10 sec: 29491.1, 60 sec: 28399.0, 300 sec: 28324.9). Total num frames: 15597568. Throughput: 0: 28356.3. Samples: 15579136. Policy #0 lag: (min: 63.0, avg: 75.0, max: 127.0) +[2026-06-07 02:57:03,117][464927] Avg episode reward: [(0, '894.821')] +[2026-06-07 02:57:03,217][472559] Updated weights for policy 0, policy_version 30497 (0.0008) +[2026-06-07 02:57:03,335][472559] Updated weights for policy 0, policy_version 30507 (0.0009) +[2026-06-07 02:57:03,458][472559] Updated weights for policy 0, policy_version 30518 (0.0009) +[2026-06-07 02:57:03,568][472559] Updated weights for policy 0, policy_version 30528 (0.0009) +[2026-06-07 02:57:04,127][472559] Updated weights for policy 0, policy_version 30539 (0.0008) +[2026-06-07 02:57:04,255][472559] Updated weights for policy 0, policy_version 30550 (0.0008) +[2026-06-07 02:57:04,389][472559] Updated weights for policy 0, policy_version 30562 (0.0008) +[2026-06-07 02:57:04,535][472559] Updated weights for policy 0, policy_version 30575 (0.0008) +[2026-06-07 02:57:04,642][472559] Updated weights for policy 0, policy_version 30585 (0.0008) +[2026-06-07 02:57:05,216][472559] Updated weights for policy 0, policy_version 30595 (0.0008) +[2026-06-07 02:57:05,330][472559] Updated weights for policy 0, policy_version 30605 (0.0008) +[2026-06-07 02:57:05,463][472559] Updated weights for policy 0, policy_version 30617 (0.0009) +[2026-06-07 02:57:05,574][472559] Updated weights for policy 0, policy_version 30627 (0.0008) +[2026-06-07 02:57:05,709][472559] Updated weights for policy 0, policy_version 30639 (0.0008) +[2026-06-07 02:57:05,848][472559] Updated weights for policy 0, policy_version 30651 (0.0008) +[2026-06-07 02:57:06,393][472559] Updated weights for policy 0, policy_version 30661 (0.0008) +[2026-06-07 02:57:06,518][472559] Updated weights for policy 0, policy_version 30672 (0.0008) +[2026-06-07 02:57:06,646][472559] Updated weights for policy 0, policy_version 30683 (0.0008) +[2026-06-07 02:57:06,776][472559] Updated weights for policy 0, policy_version 30694 (0.0008) +[2026-06-07 02:57:06,903][472559] Updated weights for policy 0, policy_version 30705 (0.0008) +[2026-06-07 02:57:07,015][472559] Updated weights for policy 0, policy_version 30715 (0.0008) +[2026-06-07 02:57:07,563][472559] Updated weights for policy 0, policy_version 30725 (0.0008) +[2026-06-07 02:57:07,700][472559] Updated weights for policy 0, policy_version 30738 (0.0008) +[2026-06-07 02:57:07,831][472559] Updated weights for policy 0, policy_version 30750 (0.0008) +[2026-06-07 02:57:07,954][472559] Updated weights for policy 0, policy_version 30761 (0.0008) +[2026-06-07 02:57:08,089][472559] Updated weights for policy 0, policy_version 30773 (0.0005) +[2026-06-07 02:57:08,117][464927] Fps is (10 sec: 26214.2, 60 sec: 27852.8, 300 sec: 28213.8). Total num frames: 15728640. Throughput: 0: 28287.8. Samples: 15747328. Policy #0 lag: (min: 63.0, avg: 75.0, max: 127.0) +[2026-06-07 02:57:08,118][464927] Avg episode reward: [(0, '906.545')] +[2026-06-07 02:57:08,213][472559] Updated weights for policy 0, policy_version 30784 (0.0005) +[2026-06-07 02:57:08,778][472559] Updated weights for policy 0, policy_version 30794 (0.0006) +[2026-06-07 02:57:08,904][472559] Updated weights for policy 0, policy_version 30805 (0.0008) +[2026-06-07 02:57:09,021][472559] Updated weights for policy 0, policy_version 30815 (0.0008) +[2026-06-07 02:57:09,150][472559] Updated weights for policy 0, policy_version 30826 (0.0009) +[2026-06-07 02:57:09,259][472559] Updated weights for policy 0, policy_version 30836 (0.0008) +[2026-06-07 02:57:09,383][472559] Updated weights for policy 0, policy_version 30847 (0.0008) +[2026-06-07 02:57:09,902][472559] Updated weights for policy 0, policy_version 30857 (0.0008) +[2026-06-07 02:57:10,019][472559] Updated weights for policy 0, policy_version 30867 (0.0009) +[2026-06-07 02:57:10,138][472559] Updated weights for policy 0, policy_version 30877 (0.0008) +[2026-06-07 02:57:10,247][472559] Updated weights for policy 0, policy_version 30887 (0.0008) +[2026-06-07 02:57:10,364][472559] Updated weights for policy 0, policy_version 30897 (0.0008) +[2026-06-07 02:57:10,475][472559] Updated weights for policy 0, policy_version 30907 (0.0008) +[2026-06-07 02:57:11,019][472559] Updated weights for policy 0, policy_version 30917 (0.0008) +[2026-06-07 02:57:11,130][472559] Updated weights for policy 0, policy_version 30927 (0.0008) +[2026-06-07 02:57:11,244][472559] Updated weights for policy 0, policy_version 30937 (0.0008) +[2026-06-07 02:57:11,366][472559] Updated weights for policy 0, policy_version 30948 (0.0008) +[2026-06-07 02:57:11,489][472559] Updated weights for policy 0, policy_version 30959 (0.0008) +[2026-06-07 02:57:11,597][472559] Updated weights for policy 0, policy_version 30969 (0.0008) +[2026-06-07 02:57:12,165][472559] Updated weights for policy 0, policy_version 30980 (0.0007) +[2026-06-07 02:57:12,286][472559] Updated weights for policy 0, policy_version 30991 (0.0008) +[2026-06-07 02:57:12,417][472559] Updated weights for policy 0, policy_version 31003 (0.0008) +[2026-06-07 02:57:12,533][472559] Updated weights for policy 0, policy_version 31013 (0.0008) +[2026-06-07 02:57:12,652][472559] Updated weights for policy 0, policy_version 31023 (0.0008) +[2026-06-07 02:57:12,766][472559] Updated weights for policy 0, policy_version 31033 (0.0008) +[2026-06-07 02:57:13,117][464927] Fps is (10 sec: 29491.1, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 15892480. Throughput: 0: 28302.2. Samples: 15922304. Policy #0 lag: (min: 63.0, avg: 75.0, max: 127.0) +[2026-06-07 02:57:13,117][464927] Avg episode reward: [(0, '929.457')] +[2026-06-07 02:57:13,319][472559] Updated weights for policy 0, policy_version 31043 (0.0008) +[2026-06-07 02:57:13,449][472559] Updated weights for policy 0, policy_version 31054 (0.0008) +[2026-06-07 02:57:13,558][472559] Updated weights for policy 0, policy_version 31064 (0.0008) +[2026-06-07 02:57:13,681][472559] Updated weights for policy 0, policy_version 31075 (0.0008) +[2026-06-07 02:57:13,820][472559] Updated weights for policy 0, policy_version 31087 (0.0008) +[2026-06-07 02:57:13,943][472559] Updated weights for policy 0, policy_version 31098 (0.0008) +[2026-06-07 02:57:14,487][472559] Updated weights for policy 0, policy_version 31108 (0.0008) +[2026-06-07 02:57:14,594][472559] Updated weights for policy 0, policy_version 31118 (0.0008) +[2026-06-07 02:57:14,725][472559] Updated weights for policy 0, policy_version 31129 (0.0009) +[2026-06-07 02:57:14,846][472559] Updated weights for policy 0, policy_version 31140 (0.0008) +[2026-06-07 02:57:14,965][472559] Updated weights for policy 0, policy_version 31150 (0.0008) +[2026-06-07 02:57:15,079][472559] Updated weights for policy 0, policy_version 31160 (0.0008) +[2026-06-07 02:57:15,621][472559] Updated weights for policy 0, policy_version 31170 (0.0008) +[2026-06-07 02:57:15,763][472559] Updated weights for policy 0, policy_version 31183 (0.0009) +[2026-06-07 02:57:15,881][472559] Updated weights for policy 0, policy_version 31193 (0.0008) +[2026-06-07 02:57:16,001][472559] Updated weights for policy 0, policy_version 31204 (0.0008) +[2026-06-07 02:57:16,117][472559] Updated weights for policy 0, policy_version 31214 (0.0008) +[2026-06-07 02:57:16,249][472559] Updated weights for policy 0, policy_version 31226 (0.0008) +[2026-06-07 02:57:16,824][472559] Updated weights for policy 0, policy_version 31237 (0.0008) +[2026-06-07 02:57:16,937][472559] Updated weights for policy 0, policy_version 31247 (0.0008) +[2026-06-07 02:57:17,050][472559] Updated weights for policy 0, policy_version 31257 (0.0009) +[2026-06-07 02:57:17,172][472559] Updated weights for policy 0, policy_version 31268 (0.0008) +[2026-06-07 02:57:17,285][472559] Updated weights for policy 0, policy_version 31278 (0.0008) +[2026-06-07 02:57:17,402][472559] Updated weights for policy 0, policy_version 31288 (0.0009) +[2026-06-07 02:57:17,953][472559] Updated weights for policy 0, policy_version 31298 (0.0008) +[2026-06-07 02:57:18,078][472559] Updated weights for policy 0, policy_version 31309 (0.0008) +[2026-06-07 02:57:18,117][464927] Fps is (10 sec: 29491.8, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 16023552. Throughput: 0: 28424.5. Samples: 16006656. Policy #0 lag: (min: 63.0, avg: 75.0, max: 127.0) +[2026-06-07 02:57:18,118][464927] Avg episode reward: [(0, '962.474')] +[2026-06-07 02:57:18,189][472559] Updated weights for policy 0, policy_version 31319 (0.0009) +[2026-06-07 02:57:18,316][472559] Updated weights for policy 0, policy_version 31330 (0.0008) +[2026-06-07 02:57:18,438][472559] Updated weights for policy 0, policy_version 31341 (0.0009) +[2026-06-07 02:57:18,563][472559] Updated weights for policy 0, policy_version 31352 (0.0009) +[2026-06-07 02:57:18,652][472025] Saving new best policy, reward=962.474! +[2026-06-07 02:57:19,114][472559] Updated weights for policy 0, policy_version 31362 (0.0008) +[2026-06-07 02:57:19,249][472559] Updated weights for policy 0, policy_version 31374 (0.0008) +[2026-06-07 02:57:19,360][472559] Updated weights for policy 0, policy_version 31384 (0.0008) +[2026-06-07 02:57:19,477][472559] Updated weights for policy 0, policy_version 31394 (0.0008) +[2026-06-07 02:57:19,600][472559] Updated weights for policy 0, policy_version 31405 (0.0009) +[2026-06-07 02:57:19,706][472559] Updated weights for policy 0, policy_version 31415 (0.0009) +[2026-06-07 02:57:20,249][472559] Updated weights for policy 0, policy_version 31425 (0.0008) +[2026-06-07 02:57:20,364][472559] Updated weights for policy 0, policy_version 31435 (0.0008) +[2026-06-07 02:57:20,476][472559] Updated weights for policy 0, policy_version 31445 (0.0008) +[2026-06-07 02:57:20,605][472559] Updated weights for policy 0, policy_version 31456 (0.0008) +[2026-06-07 02:57:20,727][472559] Updated weights for policy 0, policy_version 31467 (0.0008) +[2026-06-07 02:57:20,857][472559] Updated weights for policy 0, policy_version 31478 (0.0009) +[2026-06-07 02:57:21,405][472559] Updated weights for policy 0, policy_version 31489 (0.0008) +[2026-06-07 02:57:21,513][472559] Updated weights for policy 0, policy_version 31499 (0.0008) +[2026-06-07 02:57:21,639][472559] Updated weights for policy 0, policy_version 31510 (0.0008) +[2026-06-07 02:57:21,754][472559] Updated weights for policy 0, policy_version 31520 (0.0008) +[2026-06-07 02:57:21,869][472559] Updated weights for policy 0, policy_version 31530 (0.0008) +[2026-06-07 02:57:21,985][472559] Updated weights for policy 0, policy_version 31540 (0.0006) +[2026-06-07 02:57:22,110][472559] Updated weights for policy 0, policy_version 31551 (0.0008) +[2026-06-07 02:57:22,668][472559] Updated weights for policy 0, policy_version 31561 (0.0008) +[2026-06-07 02:57:22,795][472559] Updated weights for policy 0, policy_version 31572 (0.0008) +[2026-06-07 02:57:22,914][472559] Updated weights for policy 0, policy_version 31582 (0.0008) +[2026-06-07 02:57:23,021][472559] Updated weights for policy 0, policy_version 31592 (0.0008) +[2026-06-07 02:57:23,117][464927] Fps is (10 sec: 26214.5, 60 sec: 27852.8, 300 sec: 28213.8). Total num frames: 16154624. Throughput: 0: 28424.6. Samples: 16175104. Policy #0 lag: (min: 63.0, avg: 75.5, max: 127.0) +[2026-06-07 02:57:23,117][464927] Avg episode reward: [(0, '978.247')] +[2026-06-07 02:57:23,150][472559] Updated weights for policy 0, policy_version 31603 (0.0008) +[2026-06-07 02:57:23,258][472559] Updated weights for policy 0, policy_version 31613 (0.0009) +[2026-06-07 02:57:23,291][472025] Saving new best policy, reward=978.247! +[2026-06-07 02:57:23,802][472559] Updated weights for policy 0, policy_version 31623 (0.0008) +[2026-06-07 02:57:23,926][472559] Updated weights for policy 0, policy_version 31634 (0.0008) +[2026-06-07 02:57:24,047][472559] Updated weights for policy 0, policy_version 31645 (0.0009) +[2026-06-07 02:57:24,162][472559] Updated weights for policy 0, policy_version 31655 (0.0009) +[2026-06-07 02:57:24,268][472559] Updated weights for policy 0, policy_version 31665 (0.0009) +[2026-06-07 02:57:24,392][472559] Updated weights for policy 0, policy_version 31675 (0.0008) +[2026-06-07 02:57:24,925][472559] Updated weights for policy 0, policy_version 31685 (0.0008) +[2026-06-07 02:57:25,035][472559] Updated weights for policy 0, policy_version 31695 (0.0008) +[2026-06-07 02:57:25,146][472559] Updated weights for policy 0, policy_version 31705 (0.0008) +[2026-06-07 02:57:25,270][472559] Updated weights for policy 0, policy_version 31716 (0.0008) +[2026-06-07 02:57:25,380][472559] Updated weights for policy 0, policy_version 31726 (0.0008) +[2026-06-07 02:57:25,489][472559] Updated weights for policy 0, policy_version 31736 (0.0008) +[2026-06-07 02:57:26,049][472559] Updated weights for policy 0, policy_version 31746 (0.0008) +[2026-06-07 02:57:26,168][472559] Updated weights for policy 0, policy_version 31757 (0.0008) +[2026-06-07 02:57:26,284][472559] Updated weights for policy 0, policy_version 31767 (0.0008) +[2026-06-07 02:57:26,401][472559] Updated weights for policy 0, policy_version 31777 (0.0008) +[2026-06-07 02:57:26,512][472559] Updated weights for policy 0, policy_version 31787 (0.0008) +[2026-06-07 02:57:26,652][472559] Updated weights for policy 0, policy_version 31799 (0.0009) +[2026-06-07 02:57:27,180][472559] Updated weights for policy 0, policy_version 31809 (0.0008) +[2026-06-07 02:57:27,296][472559] Updated weights for policy 0, policy_version 31819 (0.0008) +[2026-06-07 02:57:27,401][472559] Updated weights for policy 0, policy_version 31829 (0.0008) +[2026-06-07 02:57:27,522][472559] Updated weights for policy 0, policy_version 31840 (0.0008) +[2026-06-07 02:57:27,650][472559] Updated weights for policy 0, policy_version 31851 (0.0008) +[2026-06-07 02:57:27,763][472559] Updated weights for policy 0, policy_version 31861 (0.0008) +[2026-06-07 02:57:27,879][472559] Updated weights for policy 0, policy_version 31871 (0.0009) +[2026-06-07 02:57:28,117][464927] Fps is (10 sec: 29490.8, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 16318464. Throughput: 0: 28350.4. Samples: 16346880. Policy #0 lag: (min: 63.0, avg: 75.5, max: 127.0) +[2026-06-07 02:57:28,118][464927] Avg episode reward: [(0, '1004.724')] +[2026-06-07 02:57:28,124][472025] Saving new best policy, reward=1004.724! +[2026-06-07 02:57:28,432][472559] Updated weights for policy 0, policy_version 31881 (0.0008) +[2026-06-07 02:57:28,554][472559] Updated weights for policy 0, policy_version 31892 (0.0009) +[2026-06-07 02:57:28,668][472559] Updated weights for policy 0, policy_version 31902 (0.0008) +[2026-06-07 02:57:28,795][472559] Updated weights for policy 0, policy_version 31913 (0.0008) +[2026-06-07 02:57:28,911][472559] Updated weights for policy 0, policy_version 31923 (0.0008) +[2026-06-07 02:57:29,033][472559] Updated weights for policy 0, policy_version 31934 (0.0006) +[2026-06-07 02:57:29,613][472559] Updated weights for policy 0, policy_version 31945 (0.0008) +[2026-06-07 02:57:29,721][472559] Updated weights for policy 0, policy_version 31955 (0.0007) +[2026-06-07 02:57:29,836][472559] Updated weights for policy 0, policy_version 31965 (0.0008) +[2026-06-07 02:57:29,952][472559] Updated weights for policy 0, policy_version 31975 (0.0009) +[2026-06-07 02:57:30,078][472559] Updated weights for policy 0, policy_version 31986 (0.0008) +[2026-06-07 02:57:30,210][472559] Updated weights for policy 0, policy_version 31997 (0.0008) +[2026-06-07 02:57:30,736][472559] Updated weights for policy 0, policy_version 32007 (0.0008) +[2026-06-07 02:57:30,844][472559] Updated weights for policy 0, policy_version 32017 (0.0008) +[2026-06-07 02:57:30,965][472559] Updated weights for policy 0, policy_version 32028 (0.0008) +[2026-06-07 02:57:31,084][472559] Updated weights for policy 0, policy_version 32038 (0.0009) +[2026-06-07 02:57:31,224][472559] Updated weights for policy 0, policy_version 32050 (0.0009) +[2026-06-07 02:57:31,347][472559] Updated weights for policy 0, policy_version 32061 (0.0008) +[2026-06-07 02:57:31,892][472559] Updated weights for policy 0, policy_version 32071 (0.0008) +[2026-06-07 02:57:32,008][472559] Updated weights for policy 0, policy_version 32081 (0.0008) +[2026-06-07 02:57:32,123][472559] Updated weights for policy 0, policy_version 32092 (0.0008) +[2026-06-07 02:57:32,255][472559] Updated weights for policy 0, policy_version 32103 (0.0008) +[2026-06-07 02:57:32,369][472559] Updated weights for policy 0, policy_version 32113 (0.0008) +[2026-06-07 02:57:32,483][472559] Updated weights for policy 0, policy_version 32123 (0.0008) +[2026-06-07 02:57:33,021][472559] Updated weights for policy 0, policy_version 32133 (0.0008) +[2026-06-07 02:57:33,117][464927] Fps is (10 sec: 29490.9, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 16449536. Throughput: 0: 28487.1. Samples: 16433536. Policy #0 lag: (min: 63.0, avg: 75.5, max: 127.0) +[2026-06-07 02:57:33,118][464927] Avg episode reward: [(0, '983.790')] +[2026-06-07 02:57:33,137][472559] Updated weights for policy 0, policy_version 32143 (0.0009) +[2026-06-07 02:57:33,246][472559] Updated weights for policy 0, policy_version 32153 (0.0009) +[2026-06-07 02:57:33,361][472559] Updated weights for policy 0, policy_version 32163 (0.0008) +[2026-06-07 02:57:33,481][472559] Updated weights for policy 0, policy_version 32174 (0.0008) +[2026-06-07 02:57:33,600][472559] Updated weights for policy 0, policy_version 32184 (0.0009) +[2026-06-07 02:57:34,165][472559] Updated weights for policy 0, policy_version 32195 (0.0008) +[2026-06-07 02:57:34,278][472559] Updated weights for policy 0, policy_version 32205 (0.0008) +[2026-06-07 02:57:34,390][472559] Updated weights for policy 0, policy_version 32215 (0.0008) +[2026-06-07 02:57:34,504][472559] Updated weights for policy 0, policy_version 32225 (0.0009) +[2026-06-07 02:57:34,627][472559] Updated weights for policy 0, policy_version 32236 (0.0008) +[2026-06-07 02:57:34,756][472559] Updated weights for policy 0, policy_version 32247 (0.0009) +[2026-06-07 02:57:35,305][472559] Updated weights for policy 0, policy_version 32257 (0.0009) +[2026-06-07 02:57:35,418][472559] Updated weights for policy 0, policy_version 32267 (0.0008) +[2026-06-07 02:57:35,564][472559] Updated weights for policy 0, policy_version 32280 (0.0008) +[2026-06-07 02:57:35,680][472559] Updated weights for policy 0, policy_version 32290 (0.0008) +[2026-06-07 02:57:35,817][472559] Updated weights for policy 0, policy_version 32302 (0.0009) +[2026-06-07 02:57:35,934][472559] Updated weights for policy 0, policy_version 32312 (0.0008) +[2026-06-07 02:57:36,472][472559] Updated weights for policy 0, policy_version 32322 (0.0008) +[2026-06-07 02:57:36,588][472559] Updated weights for policy 0, policy_version 32332 (0.0008) +[2026-06-07 02:57:36,706][472559] Updated weights for policy 0, policy_version 32343 (0.0008) +[2026-06-07 02:57:36,835][472559] Updated weights for policy 0, policy_version 32354 (0.0008) +[2026-06-07 02:57:36,950][472559] Updated weights for policy 0, policy_version 32364 (0.0008) +[2026-06-07 02:57:37,084][472559] Updated weights for policy 0, policy_version 32376 (0.0008) +[2026-06-07 02:57:37,629][472559] Updated weights for policy 0, policy_version 32387 (0.0009) +[2026-06-07 02:57:37,754][472559] Updated weights for policy 0, policy_version 32398 (0.0008) +[2026-06-07 02:57:37,878][472559] Updated weights for policy 0, policy_version 32409 (0.0008) +[2026-06-07 02:57:38,011][472559] Updated weights for policy 0, policy_version 32421 (0.0008) +[2026-06-07 02:57:38,117][464927] Fps is (10 sec: 26214.6, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 16580608. Throughput: 0: 28481.3. Samples: 16601216. Policy #0 lag: (min: 63.0, avg: 75.5, max: 127.0) +[2026-06-07 02:57:38,118][464927] Avg episode reward: [(0, '967.107')] +[2026-06-07 02:57:38,128][472559] Updated weights for policy 0, policy_version 32431 (0.0009) +[2026-06-07 02:57:38,256][472559] Updated weights for policy 0, policy_version 32442 (0.0008) +[2026-06-07 02:57:38,794][472559] Updated weights for policy 0, policy_version 32452 (0.0008) +[2026-06-07 02:57:38,912][472559] Updated weights for policy 0, policy_version 32463 (0.0008) +[2026-06-07 02:57:39,022][472559] Updated weights for policy 0, policy_version 32473 (0.0008) +[2026-06-07 02:57:39,139][472559] Updated weights for policy 0, policy_version 32483 (0.0008) +[2026-06-07 02:57:39,253][472559] Updated weights for policy 0, policy_version 32493 (0.0007) +[2026-06-07 02:57:39,392][472559] Updated weights for policy 0, policy_version 32505 (0.0008) +[2026-06-07 02:57:39,956][472559] Updated weights for policy 0, policy_version 32516 (0.0008) +[2026-06-07 02:57:40,063][472559] Updated weights for policy 0, policy_version 32526 (0.0008) +[2026-06-07 02:57:40,197][472559] Updated weights for policy 0, policy_version 32537 (0.0008) +[2026-06-07 02:57:40,314][472559] Updated weights for policy 0, policy_version 32547 (0.0008) +[2026-06-07 02:57:40,460][472559] Updated weights for policy 0, policy_version 32560 (0.0008) +[2026-06-07 02:57:40,574][472559] Updated weights for policy 0, policy_version 32570 (0.0008) +[2026-06-07 02:57:41,147][472559] Updated weights for policy 0, policy_version 32582 (0.0008) +[2026-06-07 02:57:41,248][472559] Updated weights for policy 0, policy_version 32592 (0.0008) +[2026-06-07 02:57:41,378][472559] Updated weights for policy 0, policy_version 32603 (0.0008) +[2026-06-07 02:57:41,491][472559] Updated weights for policy 0, policy_version 32613 (0.0009) +[2026-06-07 02:57:41,640][472559] Updated weights for policy 0, policy_version 32626 (0.0009) +[2026-06-07 02:57:41,765][472559] Updated weights for policy 0, policy_version 32637 (0.0008) +[2026-06-07 02:57:42,327][472559] Updated weights for policy 0, policy_version 32647 (0.0008) +[2026-06-07 02:57:42,447][472559] Updated weights for policy 0, policy_version 32658 (0.0008) +[2026-06-07 02:57:42,558][472559] Updated weights for policy 0, policy_version 32668 (0.0008) +[2026-06-07 02:57:42,683][472559] Updated weights for policy 0, policy_version 32679 (0.0008) +[2026-06-07 02:57:42,803][472559] Updated weights for policy 0, policy_version 32690 (0.0008) +[2026-06-07 02:57:42,917][472559] Updated weights for policy 0, policy_version 32700 (0.0008) +[2026-06-07 02:57:43,117][464927] Fps is (10 sec: 29491.4, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 16744448. Throughput: 0: 28327.9. Samples: 16771328. Policy #0 lag: (min: 63.0, avg: 75.5, max: 127.0) +[2026-06-07 02:57:43,118][464927] Avg episode reward: [(0, '1035.886')] +[2026-06-07 02:57:43,122][472025] Saving new best policy, reward=1035.886! +[2026-06-07 02:57:43,484][472559] Updated weights for policy 0, policy_version 32711 (0.0007) +[2026-06-07 02:57:43,602][472559] Updated weights for policy 0, policy_version 32722 (0.0008) +[2026-06-07 02:57:43,732][472559] Updated weights for policy 0, policy_version 32733 (0.0009) +[2026-06-07 02:57:43,851][472559] Updated weights for policy 0, policy_version 32743 (0.0008) +[2026-06-07 02:57:43,964][472559] Updated weights for policy 0, policy_version 32753 (0.0008) +[2026-06-07 02:57:44,100][472559] Updated weights for policy 0, policy_version 32765 (0.0008) +[2026-06-07 02:57:44,635][472559] Updated weights for policy 0, policy_version 32775 (0.0008) +[2026-06-07 02:57:44,749][472559] Updated weights for policy 0, policy_version 32785 (0.0008) +[2026-06-07 02:57:44,869][472559] Updated weights for policy 0, policy_version 32796 (0.0008) +[2026-06-07 02:57:44,983][472559] Updated weights for policy 0, policy_version 32806 (0.0008) +[2026-06-07 02:57:45,110][472559] Updated weights for policy 0, policy_version 32817 (0.0008) +[2026-06-07 02:57:45,220][472559] Updated weights for policy 0, policy_version 32827 (0.0008) +[2026-06-07 02:57:45,808][472559] Updated weights for policy 0, policy_version 32838 (0.0008) +[2026-06-07 02:57:45,930][472559] Updated weights for policy 0, policy_version 32849 (0.0008) +[2026-06-07 02:57:46,060][472559] Updated weights for policy 0, policy_version 32861 (0.0008) +[2026-06-07 02:57:46,174][472559] Updated weights for policy 0, policy_version 32871 (0.0008) +[2026-06-07 02:57:46,314][472559] Updated weights for policy 0, policy_version 32883 (0.0008) +[2026-06-07 02:57:46,440][472559] Updated weights for policy 0, policy_version 32894 (0.0008) +[2026-06-07 02:57:47,007][472559] Updated weights for policy 0, policy_version 32904 (0.0008) +[2026-06-07 02:57:47,119][472559] Updated weights for policy 0, policy_version 32914 (0.0008) +[2026-06-07 02:57:47,232][472559] Updated weights for policy 0, policy_version 32924 (0.0008) +[2026-06-07 02:57:47,344][472559] Updated weights for policy 0, policy_version 32934 (0.0008) +[2026-06-07 02:57:47,476][472559] Updated weights for policy 0, policy_version 32945 (0.0008) +[2026-06-07 02:57:47,598][472559] Updated weights for policy 0, policy_version 32956 (0.0009) +[2026-06-07 02:57:48,117][464927] Fps is (10 sec: 29491.5, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 16875520. Throughput: 0: 28467.2. Samples: 16860160. Policy #0 lag: (min: 63.0, avg: 75.2, max: 127.0) +[2026-06-07 02:57:48,117][464927] Avg episode reward: [(0, '1154.212')] +[2026-06-07 02:57:48,159][472559] Updated weights for policy 0, policy_version 32966 (0.0008) +[2026-06-07 02:57:48,296][472559] Updated weights for policy 0, policy_version 32978 (0.0008) +[2026-06-07 02:57:48,408][472559] Updated weights for policy 0, policy_version 32988 (0.0008) +[2026-06-07 02:57:48,523][472559] Updated weights for policy 0, policy_version 32998 (0.0008) +[2026-06-07 02:57:48,639][472559] Updated weights for policy 0, policy_version 33008 (0.0009) +[2026-06-07 02:57:48,773][472559] Updated weights for policy 0, policy_version 33019 (0.0008) +[2026-06-07 02:57:48,824][472025] Saving new best policy, reward=1154.212! +[2026-06-07 02:57:49,338][472559] Updated weights for policy 0, policy_version 33030 (0.0008) +[2026-06-07 02:57:49,464][472559] Updated weights for policy 0, policy_version 33041 (0.0008) +[2026-06-07 02:57:49,600][472559] Updated weights for policy 0, policy_version 33053 (0.0009) +[2026-06-07 02:57:49,723][472559] Updated weights for policy 0, policy_version 33064 (0.0008) +[2026-06-07 02:57:49,837][472559] Updated weights for policy 0, policy_version 33074 (0.0008) +[2026-06-07 02:57:49,949][472559] Updated weights for policy 0, policy_version 33084 (0.0009) +[2026-06-07 02:57:50,508][472559] Updated weights for policy 0, policy_version 33094 (0.0008) +[2026-06-07 02:57:50,618][472559] Updated weights for policy 0, policy_version 33105 (0.0008) +[2026-06-07 02:57:50,736][472559] Updated weights for policy 0, policy_version 33115 (0.0008) +[2026-06-07 02:57:50,852][472559] Updated weights for policy 0, policy_version 33125 (0.0009) +[2026-06-07 02:57:50,963][472559] Updated weights for policy 0, policy_version 33135 (0.0008) +[2026-06-07 02:57:51,093][472559] Updated weights for policy 0, policy_version 33146 (0.0008) +[2026-06-07 02:57:51,646][472559] Updated weights for policy 0, policy_version 33156 (0.0008) +[2026-06-07 02:57:51,760][472559] Updated weights for policy 0, policy_version 33166 (0.0009) +[2026-06-07 02:57:51,875][472559] Updated weights for policy 0, policy_version 33176 (0.0008) +[2026-06-07 02:57:51,991][472559] Updated weights for policy 0, policy_version 33186 (0.0008) +[2026-06-07 02:57:52,114][472559] Updated weights for policy 0, policy_version 33197 (0.0008) +[2026-06-07 02:57:52,227][472559] Updated weights for policy 0, policy_version 33207 (0.0008) +[2026-06-07 02:57:52,782][472559] Updated weights for policy 0, policy_version 33218 (0.0008) +[2026-06-07 02:57:52,914][472559] Updated weights for policy 0, policy_version 33230 (0.0008) +[2026-06-07 02:57:53,025][472559] Updated weights for policy 0, policy_version 33240 (0.0008) +[2026-06-07 02:57:53,117][464927] Fps is (10 sec: 26214.3, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 17006592. Throughput: 0: 28484.4. Samples: 17029120. Policy #0 lag: (min: 63.0, avg: 75.2, max: 127.0) +[2026-06-07 02:57:53,117][464927] Avg episode reward: [(0, '1134.144')] +[2026-06-07 02:57:53,141][472559] Updated weights for policy 0, policy_version 33250 (0.0008) +[2026-06-07 02:57:53,255][472559] Updated weights for policy 0, policy_version 33260 (0.0008) +[2026-06-07 02:57:53,379][472559] Updated weights for policy 0, policy_version 33271 (0.0008) +[2026-06-07 02:57:53,925][472559] Updated weights for policy 0, policy_version 33281 (0.0008) +[2026-06-07 02:57:54,049][472559] Updated weights for policy 0, policy_version 33292 (0.0008) +[2026-06-07 02:57:54,173][472559] Updated weights for policy 0, policy_version 33303 (0.0006) +[2026-06-07 02:57:54,282][472559] Updated weights for policy 0, policy_version 33313 (0.0008) +[2026-06-07 02:57:54,396][472559] Updated weights for policy 0, policy_version 33323 (0.0008) +[2026-06-07 02:57:54,523][472559] Updated weights for policy 0, policy_version 33334 (0.0008) +[2026-06-07 02:57:54,633][472559] Updated weights for policy 0, policy_version 33344 (0.0008) +[2026-06-07 02:57:55,194][472559] Updated weights for policy 0, policy_version 33356 (0.0008) +[2026-06-07 02:57:55,314][472559] Updated weights for policy 0, policy_version 33366 (0.0008) +[2026-06-07 02:57:55,427][472559] Updated weights for policy 0, policy_version 33376 (0.0008) +[2026-06-07 02:57:55,549][472559] Updated weights for policy 0, policy_version 33387 (0.0008) +[2026-06-07 02:57:55,678][472559] Updated weights for policy 0, policy_version 33398 (0.0008) +[2026-06-07 02:57:55,792][472559] Updated weights for policy 0, policy_version 33408 (0.0009) +[2026-06-07 02:57:56,340][472559] Updated weights for policy 0, policy_version 33419 (0.0007) +[2026-06-07 02:57:56,473][472559] Updated weights for policy 0, policy_version 33431 (0.0008) +[2026-06-07 02:57:56,589][472559] Updated weights for policy 0, policy_version 33441 (0.0008) +[2026-06-07 02:57:56,704][472559] Updated weights for policy 0, policy_version 33451 (0.0008) +[2026-06-07 02:57:56,828][472559] Updated weights for policy 0, policy_version 33462 (0.0008) +[2026-06-07 02:57:56,938][472559] Updated weights for policy 0, policy_version 33472 (0.0008) +[2026-06-07 02:57:57,488][472559] Updated weights for policy 0, policy_version 33482 (0.0008) +[2026-06-07 02:57:57,613][472559] Updated weights for policy 0, policy_version 33493 (0.0008) +[2026-06-07 02:57:57,724][472559] Updated weights for policy 0, policy_version 33503 (0.0008) +[2026-06-07 02:57:57,855][472559] Updated weights for policy 0, policy_version 33514 (0.0008) +[2026-06-07 02:57:57,967][472559] Updated weights for policy 0, policy_version 33524 (0.0009) +[2026-06-07 02:57:58,101][472559] Updated weights for policy 0, policy_version 33536 (0.0009) +[2026-06-07 02:57:58,117][464927] Fps is (10 sec: 29491.0, 60 sec: 28399.0, 300 sec: 28324.9). Total num frames: 17170432. Throughput: 0: 28299.3. Samples: 17195776. Policy #0 lag: (min: 63.0, avg: 75.2, max: 127.0) +[2026-06-07 02:57:58,117][464927] Avg episode reward: [(0, '1139.162')] +[2026-06-07 02:57:58,661][472559] Updated weights for policy 0, policy_version 33546 (0.0008) +[2026-06-07 02:57:58,789][472559] Updated weights for policy 0, policy_version 33557 (0.0008) +[2026-06-07 02:57:58,924][472559] Updated weights for policy 0, policy_version 33569 (0.0008) +[2026-06-07 02:57:59,043][472559] Updated weights for policy 0, policy_version 33579 (0.0008) +[2026-06-07 02:57:59,157][472559] Updated weights for policy 0, policy_version 33589 (0.0008) +[2026-06-07 02:57:59,268][472559] Updated weights for policy 0, policy_version 33599 (0.0008) +[2026-06-07 02:57:59,789][472559] Updated weights for policy 0, policy_version 33609 (0.0008) +[2026-06-07 02:57:59,910][472559] Updated weights for policy 0, policy_version 33620 (0.0008) +[2026-06-07 02:58:00,035][472559] Updated weights for policy 0, policy_version 33631 (0.0008) +[2026-06-07 02:58:00,148][472559] Updated weights for policy 0, policy_version 33641 (0.0008) +[2026-06-07 02:58:00,264][472559] Updated weights for policy 0, policy_version 33651 (0.0008) +[2026-06-07 02:58:00,393][472559] Updated weights for policy 0, policy_version 33662 (0.0008) +[2026-06-07 02:58:00,934][472559] Updated weights for policy 0, policy_version 33672 (0.0008) +[2026-06-07 02:58:01,059][472559] Updated weights for policy 0, policy_version 33683 (0.0008) +[2026-06-07 02:58:01,171][472559] Updated weights for policy 0, policy_version 33693 (0.0008) +[2026-06-07 02:58:01,284][472559] Updated weights for policy 0, policy_version 33703 (0.0008) +[2026-06-07 02:58:01,419][472559] Updated weights for policy 0, policy_version 33715 (0.0008) +[2026-06-07 02:58:01,530][472559] Updated weights for policy 0, policy_version 33725 (0.0008) +[2026-06-07 02:58:02,076][472559] Updated weights for policy 0, policy_version 33735 (0.0007) +[2026-06-07 02:58:02,188][472559] Updated weights for policy 0, policy_version 33745 (0.0008) +[2026-06-07 02:58:02,316][472559] Updated weights for policy 0, policy_version 33756 (0.0009) +[2026-06-07 02:58:02,447][472559] Updated weights for policy 0, policy_version 33768 (0.0008) +[2026-06-07 02:58:02,559][472559] Updated weights for policy 0, policy_version 33778 (0.0008) +[2026-06-07 02:58:02,673][472559] Updated weights for policy 0, policy_version 33788 (0.0008) +[2026-06-07 02:58:03,117][464927] Fps is (10 sec: 29491.3, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 17301504. Throughput: 0: 28455.9. Samples: 17287168. Policy #0 lag: (min: 63.0, avg: 75.2, max: 127.0) +[2026-06-07 02:58:03,118][464927] Avg episode reward: [(0, '1124.124')] +[2026-06-07 02:58:03,211][472559] Updated weights for policy 0, policy_version 33798 (0.0008) +[2026-06-07 02:58:03,323][472559] Updated weights for policy 0, policy_version 33808 (0.0008) +[2026-06-07 02:58:03,438][472559] Updated weights for policy 0, policy_version 33818 (0.0008) +[2026-06-07 02:58:03,555][472559] Updated weights for policy 0, policy_version 33829 (0.0008) +[2026-06-07 02:58:03,677][472559] Updated weights for policy 0, policy_version 33839 (0.0008) +[2026-06-07 02:58:03,811][472559] Updated weights for policy 0, policy_version 33851 (0.0008) +[2026-06-07 02:58:04,361][472559] Updated weights for policy 0, policy_version 33861 (0.0007) +[2026-06-07 02:58:04,502][472559] Updated weights for policy 0, policy_version 33873 (0.0006) +[2026-06-07 02:58:04,654][472559] Updated weights for policy 0, policy_version 33887 (0.0008) +[2026-06-07 02:58:04,777][472559] Updated weights for policy 0, policy_version 33898 (0.0008) +[2026-06-07 02:58:04,893][472559] Updated weights for policy 0, policy_version 33908 (0.0008) +[2026-06-07 02:58:05,017][472559] Updated weights for policy 0, policy_version 33919 (0.0009) +[2026-06-07 02:58:05,577][472559] Updated weights for policy 0, policy_version 33930 (0.0008) +[2026-06-07 02:58:05,684][472559] Updated weights for policy 0, policy_version 33940 (0.0008) +[2026-06-07 02:58:05,798][472559] Updated weights for policy 0, policy_version 33950 (0.0008) +[2026-06-07 02:58:05,912][472559] Updated weights for policy 0, policy_version 33960 (0.0008) +[2026-06-07 02:58:06,021][472559] Updated weights for policy 0, policy_version 33970 (0.0008) +[2026-06-07 02:58:06,148][472559] Updated weights for policy 0, policy_version 33981 (0.0008) +[2026-06-07 02:58:06,713][472559] Updated weights for policy 0, policy_version 33991 (0.0008) +[2026-06-07 02:58:06,817][472559] Updated weights for policy 0, policy_version 34001 (0.0008) +[2026-06-07 02:58:06,937][472559] Updated weights for policy 0, policy_version 34012 (0.0008) +[2026-06-07 02:58:07,076][472559] Updated weights for policy 0, policy_version 34024 (0.0008) +[2026-06-07 02:58:07,186][472559] Updated weights for policy 0, policy_version 34034 (0.0008) +[2026-06-07 02:58:07,311][472559] Updated weights for policy 0, policy_version 34045 (0.0008) +[2026-06-07 02:58:07,868][472559] Updated weights for policy 0, policy_version 34055 (0.0008) +[2026-06-07 02:58:07,989][472559] Updated weights for policy 0, policy_version 34066 (0.0008) +[2026-06-07 02:58:08,102][472559] Updated weights for policy 0, policy_version 34076 (0.0008) +[2026-06-07 02:58:08,117][464927] Fps is (10 sec: 26214.5, 60 sec: 28399.0, 300 sec: 28213.8). Total num frames: 17432576. Throughput: 0: 28421.6. Samples: 17454080. Policy #0 lag: (min: 63.0, avg: 74.7, max: 127.0) +[2026-06-07 02:58:08,119][464927] Avg episode reward: [(0, '1142.974')] +[2026-06-07 02:58:08,216][472559] Updated weights for policy 0, policy_version 34086 (0.0008) +[2026-06-07 02:58:08,340][472559] Updated weights for policy 0, policy_version 34097 (0.0008) +[2026-06-07 02:58:08,461][472559] Updated weights for policy 0, policy_version 34108 (0.0008) +[2026-06-07 02:58:09,012][472559] Updated weights for policy 0, policy_version 34118 (0.0007) +[2026-06-07 02:58:09,145][472559] Updated weights for policy 0, policy_version 34130 (0.0008) +[2026-06-07 02:58:09,260][472559] Updated weights for policy 0, policy_version 34140 (0.0008) +[2026-06-07 02:58:09,370][472559] Updated weights for policy 0, policy_version 34150 (0.0008) +[2026-06-07 02:58:09,496][472559] Updated weights for policy 0, policy_version 34161 (0.0009) +[2026-06-07 02:58:09,620][472559] Updated weights for policy 0, policy_version 34172 (0.0008) +[2026-06-07 02:58:10,173][472559] Updated weights for policy 0, policy_version 34182 (0.0008) +[2026-06-07 02:58:10,286][472559] Updated weights for policy 0, policy_version 34192 (0.0009) +[2026-06-07 02:58:10,419][472559] Updated weights for policy 0, policy_version 34204 (0.0008) +[2026-06-07 02:58:10,535][472559] Updated weights for policy 0, policy_version 34214 (0.0009) +[2026-06-07 02:58:10,643][472559] Updated weights for policy 0, policy_version 34224 (0.0008) +[2026-06-07 02:58:10,756][472559] Updated weights for policy 0, policy_version 34234 (0.0008) +[2026-06-07 02:58:11,314][472559] Updated weights for policy 0, policy_version 34245 (0.0008) +[2026-06-07 02:58:11,433][472559] Updated weights for policy 0, policy_version 34256 (0.0008) +[2026-06-07 02:58:11,561][472559] Updated weights for policy 0, policy_version 34267 (0.0008) +[2026-06-07 02:58:11,670][472559] Updated weights for policy 0, policy_version 34277 (0.0008) +[2026-06-07 02:58:11,790][472559] Updated weights for policy 0, policy_version 34287 (0.0008) +[2026-06-07 02:58:11,908][472559] Updated weights for policy 0, policy_version 34298 (0.0008) +[2026-06-07 02:58:12,477][472559] Updated weights for policy 0, policy_version 34308 (0.0008) +[2026-06-07 02:58:12,589][472559] Updated weights for policy 0, policy_version 34318 (0.0008) +[2026-06-07 02:58:12,707][472559] Updated weights for policy 0, policy_version 34328 (0.0008) +[2026-06-07 02:58:12,848][472559] Updated weights for policy 0, policy_version 34341 (0.0008) +[2026-06-07 02:58:12,970][472559] Updated weights for policy 0, policy_version 34351 (0.0008) +[2026-06-07 02:58:13,083][472559] Updated weights for policy 0, policy_version 34361 (0.0009) +[2026-06-07 02:58:13,117][464927] Fps is (10 sec: 26214.4, 60 sec: 27852.8, 300 sec: 28213.8). Total num frames: 17563648. Throughput: 0: 28336.5. Samples: 17622016. Policy #0 lag: (min: 63.0, avg: 74.7, max: 127.0) +[2026-06-07 02:58:13,117][464927] Avg episode reward: [(0, '1144.899')] +[2026-06-07 02:58:13,637][472559] Updated weights for policy 0, policy_version 34371 (0.0008) +[2026-06-07 02:58:13,782][472559] Updated weights for policy 0, policy_version 34384 (0.0008) +[2026-06-07 02:58:13,893][472559] Updated weights for policy 0, policy_version 34394 (0.0008) +[2026-06-07 02:58:14,009][472559] Updated weights for policy 0, policy_version 34404 (0.0008) +[2026-06-07 02:58:14,121][472559] Updated weights for policy 0, policy_version 34414 (0.0008) +[2026-06-07 02:58:14,240][472559] Updated weights for policy 0, policy_version 34424 (0.0008) +[2026-06-07 02:58:14,808][472559] Updated weights for policy 0, policy_version 34435 (0.0008) +[2026-06-07 02:58:14,925][472559] Updated weights for policy 0, policy_version 34446 (0.0008) +[2026-06-07 02:58:15,038][472559] Updated weights for policy 0, policy_version 34456 (0.0008) +[2026-06-07 02:58:15,163][472559] Updated weights for policy 0, policy_version 34467 (0.0008) +[2026-06-07 02:58:15,299][472559] Updated weights for policy 0, policy_version 34479 (0.0008) +[2026-06-07 02:58:15,438][472559] Updated weights for policy 0, policy_version 34491 (0.0008) +[2026-06-07 02:58:15,975][472559] Updated weights for policy 0, policy_version 34501 (0.0008) +[2026-06-07 02:58:16,084][472559] Updated weights for policy 0, policy_version 34511 (0.0008) +[2026-06-07 02:58:16,239][472559] Updated weights for policy 0, policy_version 34525 (0.0008) +[2026-06-07 02:58:16,364][472559] Updated weights for policy 0, policy_version 34536 (0.0008) +[2026-06-07 02:58:16,496][472559] Updated weights for policy 0, policy_version 34547 (0.0008) +[2026-06-07 02:58:16,602][472559] Updated weights for policy 0, policy_version 34557 (0.0008) +[2026-06-07 02:58:17,147][472559] Updated weights for policy 0, policy_version 34567 (0.0008) +[2026-06-07 02:58:17,260][472559] Updated weights for policy 0, policy_version 34578 (0.0008) +[2026-06-07 02:58:17,394][472559] Updated weights for policy 0, policy_version 34590 (0.0008) +[2026-06-07 02:58:17,509][472559] Updated weights for policy 0, policy_version 34600 (0.0008) +[2026-06-07 02:58:17,641][472559] Updated weights for policy 0, policy_version 34612 (0.0008) +[2026-06-07 02:58:17,770][472559] Updated weights for policy 0, policy_version 34623 (0.0008) +[2026-06-07 02:58:18,117][464927] Fps is (10 sec: 29491.3, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 17727488. Throughput: 0: 28435.9. Samples: 17713152. Policy #0 lag: (min: 63.0, avg: 74.7, max: 127.0) +[2026-06-07 02:58:18,117][464927] Avg episode reward: [(0, '1113.899')] +[2026-06-07 02:58:18,320][472559] Updated weights for policy 0, policy_version 34633 (0.0008) +[2026-06-07 02:58:18,443][472559] Updated weights for policy 0, policy_version 34644 (0.0008) +[2026-06-07 02:58:18,585][472559] Updated weights for policy 0, policy_version 34657 (0.0008) +[2026-06-07 02:58:18,715][472559] Updated weights for policy 0, policy_version 34668 (0.0008) +[2026-06-07 02:58:18,834][472559] Updated weights for policy 0, policy_version 34678 (0.0008) +[2026-06-07 02:58:19,388][472559] Updated weights for policy 0, policy_version 34689 (0.0008) +[2026-06-07 02:58:19,512][472559] Updated weights for policy 0, policy_version 34700 (0.0008) +[2026-06-07 02:58:19,644][472559] Updated weights for policy 0, policy_version 34712 (0.0008) +[2026-06-07 02:58:19,771][472559] Updated weights for policy 0, policy_version 34723 (0.0009) +[2026-06-07 02:58:19,885][472559] Updated weights for policy 0, policy_version 34733 (0.0008) +[2026-06-07 02:58:20,009][472559] Updated weights for policy 0, policy_version 34744 (0.0008) +[2026-06-07 02:58:20,560][472559] Updated weights for policy 0, policy_version 34754 (0.0008) +[2026-06-07 02:58:20,706][472559] Updated weights for policy 0, policy_version 34767 (0.0008) +[2026-06-07 02:58:20,817][472559] Updated weights for policy 0, policy_version 34777 (0.0008) +[2026-06-07 02:58:20,962][472559] Updated weights for policy 0, policy_version 34790 (0.0008) +[2026-06-07 02:58:21,091][472559] Updated weights for policy 0, policy_version 34801 (0.0008) +[2026-06-07 02:58:21,208][472559] Updated weights for policy 0, policy_version 34811 (0.0008) +[2026-06-07 02:58:21,782][472559] Updated weights for policy 0, policy_version 34822 (0.0009) +[2026-06-07 02:58:21,915][472559] Updated weights for policy 0, policy_version 34834 (0.0008) +[2026-06-07 02:58:22,039][472559] Updated weights for policy 0, policy_version 34845 (0.0008) +[2026-06-07 02:58:22,161][472559] Updated weights for policy 0, policy_version 34856 (0.0008) +[2026-06-07 02:58:22,286][472559] Updated weights for policy 0, policy_version 34867 (0.0008) +[2026-06-07 02:58:22,413][472559] Updated weights for policy 0, policy_version 34878 (0.0009) +[2026-06-07 02:58:22,971][472559] Updated weights for policy 0, policy_version 34889 (0.0007) +[2026-06-07 02:58:23,082][472559] Updated weights for policy 0, policy_version 34899 (0.0008) +[2026-06-07 02:58:23,117][464927] Fps is (10 sec: 29491.3, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 17858560. Throughput: 0: 28433.2. Samples: 17880704. Policy #0 lag: (min: 63.0, avg: 74.7, max: 127.0) +[2026-06-07 02:58:23,118][464927] Avg episode reward: [(0, '1118.006')] +[2026-06-07 02:58:23,197][472559] Updated weights for policy 0, policy_version 34909 (0.0008) +[2026-06-07 02:58:23,317][472559] Updated weights for policy 0, policy_version 34919 (0.0008) +[2026-06-07 02:58:23,441][472559] Updated weights for policy 0, policy_version 34930 (0.0008) +[2026-06-07 02:58:23,570][472559] Updated weights for policy 0, policy_version 34941 (0.0008) +[2026-06-07 02:58:24,117][472559] Updated weights for policy 0, policy_version 34952 (0.0008) +[2026-06-07 02:58:24,227][472559] Updated weights for policy 0, policy_version 34962 (0.0008) +[2026-06-07 02:58:24,362][472559] Updated weights for policy 0, policy_version 34974 (0.0008) +[2026-06-07 02:58:24,490][472559] Updated weights for policy 0, policy_version 34985 (0.0008) +[2026-06-07 02:58:24,626][472559] Updated weights for policy 0, policy_version 34997 (0.0008) +[2026-06-07 02:58:24,742][472559] Updated weights for policy 0, policy_version 35007 (0.0008) +[2026-06-07 02:58:25,287][472559] Updated weights for policy 0, policy_version 35017 (0.0008) +[2026-06-07 02:58:25,397][472559] Updated weights for policy 0, policy_version 35027 (0.0008) +[2026-06-07 02:58:25,506][472559] Updated weights for policy 0, policy_version 35037 (0.0008) +[2026-06-07 02:58:25,623][472559] Updated weights for policy 0, policy_version 35047 (0.0008) +[2026-06-07 02:58:25,734][472559] Updated weights for policy 0, policy_version 35057 (0.0008) +[2026-06-07 02:58:25,851][472559] Updated weights for policy 0, policy_version 35067 (0.0009) +[2026-06-07 02:58:26,396][472559] Updated weights for policy 0, policy_version 35077 (0.0008) +[2026-06-07 02:58:26,527][472559] Updated weights for policy 0, policy_version 35089 (0.0008) +[2026-06-07 02:58:26,633][472559] Updated weights for policy 0, policy_version 35099 (0.0008) +[2026-06-07 02:58:26,752][472559] Updated weights for policy 0, policy_version 35109 (0.0008) +[2026-06-07 02:58:26,891][472559] Updated weights for policy 0, policy_version 35121 (0.0008) +[2026-06-07 02:58:27,040][472559] Updated weights for policy 0, policy_version 35135 (0.0008) +[2026-06-07 02:58:27,608][472559] Updated weights for policy 0, policy_version 35145 (0.0008) +[2026-06-07 02:58:27,717][472559] Updated weights for policy 0, policy_version 35155 (0.0008) +[2026-06-07 02:58:27,842][472559] Updated weights for policy 0, policy_version 35166 (0.0008) +[2026-06-07 02:58:27,953][472559] Updated weights for policy 0, policy_version 35176 (0.0008) +[2026-06-07 02:58:28,068][472559] Updated weights for policy 0, policy_version 35186 (0.0009) +[2026-06-07 02:58:28,117][464927] Fps is (10 sec: 26214.2, 60 sec: 27852.8, 300 sec: 28213.8). Total num frames: 17989632. Throughput: 0: 28364.7. Samples: 18047744. Policy #0 lag: (min: 63.0, avg: 74.7, max: 127.0) +[2026-06-07 02:58:28,117][464927] Avg episode reward: [(0, '1137.166')] +[2026-06-07 02:58:28,184][472559] Updated weights for policy 0, policy_version 35196 (0.0009) +[2026-06-07 02:58:28,731][472559] Updated weights for policy 0, policy_version 35206 (0.0008) +[2026-06-07 02:58:28,843][472559] Updated weights for policy 0, policy_version 35216 (0.0008) +[2026-06-07 02:58:28,955][472559] Updated weights for policy 0, policy_version 35226 (0.0008) +[2026-06-07 02:58:29,069][472559] Updated weights for policy 0, policy_version 35236 (0.0008) +[2026-06-07 02:58:29,186][472559] Updated weights for policy 0, policy_version 35246 (0.0008) +[2026-06-07 02:58:29,322][472559] Updated weights for policy 0, policy_version 35258 (0.0009) +[2026-06-07 02:58:29,874][472559] Updated weights for policy 0, policy_version 35268 (0.0008) +[2026-06-07 02:58:29,980][472559] Updated weights for policy 0, policy_version 35278 (0.0008) +[2026-06-07 02:58:30,086][472559] Updated weights for policy 0, policy_version 35288 (0.0008) +[2026-06-07 02:58:30,209][472559] Updated weights for policy 0, policy_version 35299 (0.0008) +[2026-06-07 02:58:30,327][472559] Updated weights for policy 0, policy_version 35309 (0.0007) +[2026-06-07 02:58:30,456][472559] Updated weights for policy 0, policy_version 35320 (0.0004) +[2026-06-07 02:58:30,547][472025] Early stopping after 8 epochs (64 sgd steps), loss delta 0.0000005 +[2026-06-07 02:58:30,989][472559] Updated weights for policy 0, policy_version 35331 (0.0004) +[2026-06-07 02:58:31,098][472559] Updated weights for policy 0, policy_version 35341 (0.0004) +[2026-06-07 02:58:31,222][472559] Updated weights for policy 0, policy_version 35352 (0.0005) +[2026-06-07 02:58:31,338][472559] Updated weights for policy 0, policy_version 35362 (0.0008) +[2026-06-07 02:58:31,453][472559] Updated weights for policy 0, policy_version 35372 (0.0008) +[2026-06-07 02:58:31,585][472559] Updated weights for policy 0, policy_version 35384 (0.0008) +[2026-06-07 02:58:32,126][472559] Updated weights for policy 0, policy_version 35395 (0.0008) +[2026-06-07 02:58:32,259][472559] Updated weights for policy 0, policy_version 35407 (0.0008) +[2026-06-07 02:58:32,386][472559] Updated weights for policy 0, policy_version 35418 (0.0008) +[2026-06-07 02:58:32,497][472559] Updated weights for policy 0, policy_version 35428 (0.0008) +[2026-06-07 02:58:32,645][472559] Updated weights for policy 0, policy_version 35441 (0.0008) +[2026-06-07 02:58:32,771][472559] Updated weights for policy 0, policy_version 35452 (0.0008) +[2026-06-07 02:58:33,117][464927] Fps is (10 sec: 29491.3, 60 sec: 28399.0, 300 sec: 28324.9). Total num frames: 18153472. Throughput: 0: 28407.5. Samples: 18138496. Policy #0 lag: (min: 55.0, avg: 66.6, max: 119.0) +[2026-06-07 02:58:33,117][464927] Avg episode reward: [(0, '1151.302')] +[2026-06-07 02:58:33,318][472559] Updated weights for policy 0, policy_version 35463 (0.0008) +[2026-06-07 02:58:33,424][472559] Updated weights for policy 0, policy_version 35473 (0.0008) +[2026-06-07 02:58:33,558][472559] Updated weights for policy 0, policy_version 35485 (0.0008) +[2026-06-07 02:58:33,672][472559] Updated weights for policy 0, policy_version 35495 (0.0008) +[2026-06-07 02:58:33,784][472559] Updated weights for policy 0, policy_version 35505 (0.0008) +[2026-06-07 02:58:33,909][472559] Updated weights for policy 0, policy_version 35516 (0.0009) +[2026-06-07 02:58:34,466][472559] Updated weights for policy 0, policy_version 35527 (0.0008) +[2026-06-07 02:58:34,598][472559] Updated weights for policy 0, policy_version 35539 (0.0009) +[2026-06-07 02:58:34,725][472559] Updated weights for policy 0, policy_version 35550 (0.0008) +[2026-06-07 02:58:34,857][472559] Updated weights for policy 0, policy_version 35562 (0.0008) +[2026-06-07 02:58:34,985][472559] Updated weights for policy 0, policy_version 35573 (0.0008) +[2026-06-07 02:58:35,106][472559] Updated weights for policy 0, policy_version 35584 (0.0008) +[2026-06-07 02:58:35,685][472559] Updated weights for policy 0, policy_version 35596 (0.0008) +[2026-06-07 02:58:35,830][472559] Updated weights for policy 0, policy_version 35609 (0.0008) +[2026-06-07 02:58:35,951][472559] Updated weights for policy 0, policy_version 35620 (0.0008) +[2026-06-07 02:58:36,086][472559] Updated weights for policy 0, policy_version 35632 (0.0008) +[2026-06-07 02:58:36,197][472559] Updated weights for policy 0, policy_version 35642 (0.0008) +[2026-06-07 02:58:36,751][472559] Updated weights for policy 0, policy_version 35652 (0.0008) +[2026-06-07 02:58:36,882][472559] Updated weights for policy 0, policy_version 35664 (0.0008) +[2026-06-07 02:58:37,003][472559] Updated weights for policy 0, policy_version 35675 (0.0008) +[2026-06-07 02:58:37,130][472559] Updated weights for policy 0, policy_version 35686 (0.0008) +[2026-06-07 02:58:37,256][472559] Updated weights for policy 0, policy_version 35697 (0.0008) +[2026-06-07 02:58:37,376][472559] Updated weights for policy 0, policy_version 35708 (0.0008) +[2026-06-07 02:58:37,939][472559] Updated weights for policy 0, policy_version 35719 (0.0008) +[2026-06-07 02:58:38,052][472559] Updated weights for policy 0, policy_version 35729 (0.0008) +[2026-06-07 02:58:38,117][464927] Fps is (10 sec: 29491.5, 60 sec: 28399.0, 300 sec: 28324.9). Total num frames: 18284544. Throughput: 0: 28344.9. Samples: 18304640. Policy #0 lag: (min: 55.0, avg: 66.6, max: 119.0) +[2026-06-07 02:58:38,118][464927] Avg episode reward: [(0, '1132.825')] +[2026-06-07 02:58:38,157][472559] Updated weights for policy 0, policy_version 35739 (0.0008) +[2026-06-07 02:58:38,285][472559] Updated weights for policy 0, policy_version 35750 (0.0008) +[2026-06-07 02:58:38,419][472559] Updated weights for policy 0, policy_version 35762 (0.0008) +[2026-06-07 02:58:38,548][472559] Updated weights for policy 0, policy_version 35773 (0.0008) +[2026-06-07 02:58:39,093][472559] Updated weights for policy 0, policy_version 35783 (0.0008) +[2026-06-07 02:58:39,218][472559] Updated weights for policy 0, policy_version 35794 (0.0008) +[2026-06-07 02:58:39,354][472559] Updated weights for policy 0, policy_version 35806 (0.0008) +[2026-06-07 02:58:39,466][472559] Updated weights for policy 0, policy_version 35816 (0.0008) +[2026-06-07 02:58:39,594][472559] Updated weights for policy 0, policy_version 35827 (0.0008) +[2026-06-07 02:58:39,711][472559] Updated weights for policy 0, policy_version 35837 (0.0008) +[2026-06-07 02:58:40,252][472559] Updated weights for policy 0, policy_version 35847 (0.0007) +[2026-06-07 02:58:40,372][472559] Updated weights for policy 0, policy_version 35858 (0.0008) +[2026-06-07 02:58:40,490][472559] Updated weights for policy 0, policy_version 35868 (0.0008) +[2026-06-07 02:58:40,603][472559] Updated weights for policy 0, policy_version 35878 (0.0008) +[2026-06-07 02:58:40,718][472559] Updated weights for policy 0, policy_version 35888 (0.0008) +[2026-06-07 02:58:40,838][472559] Updated weights for policy 0, policy_version 35899 (0.0008) +[2026-06-07 02:58:41,415][472559] Updated weights for policy 0, policy_version 35910 (0.0008) +[2026-06-07 02:58:41,538][472559] Updated weights for policy 0, policy_version 35921 (0.0008) +[2026-06-07 02:58:41,653][472559] Updated weights for policy 0, policy_version 35931 (0.0008) +[2026-06-07 02:58:41,783][472559] Updated weights for policy 0, policy_version 35943 (0.0008) +[2026-06-07 02:58:41,910][472559] Updated weights for policy 0, policy_version 35954 (0.0009) +[2026-06-07 02:58:42,026][472559] Updated weights for policy 0, policy_version 35964 (0.0008) +[2026-06-07 02:58:42,583][472559] Updated weights for policy 0, policy_version 35975 (0.0007) +[2026-06-07 02:58:42,716][472559] Updated weights for policy 0, policy_version 35987 (0.0008) +[2026-06-07 02:58:42,837][472559] Updated weights for policy 0, policy_version 35998 (0.0008) +[2026-06-07 02:58:42,963][472559] Updated weights for policy 0, policy_version 36009 (0.0008) +[2026-06-07 02:58:43,079][472559] Updated weights for policy 0, policy_version 36019 (0.0008) +[2026-06-07 02:58:43,117][464927] Fps is (10 sec: 26214.3, 60 sec: 27852.8, 300 sec: 28213.8). Total num frames: 18415616. Throughput: 0: 28342.1. Samples: 18471168. Policy #0 lag: (min: 55.0, avg: 66.6, max: 119.0) +[2026-06-07 02:58:43,117][464927] Avg episode reward: [(0, '1091.766')] +[2026-06-07 02:58:43,213][472559] Updated weights for policy 0, policy_version 36031 (0.0008) +[2026-06-07 02:58:43,763][472559] Updated weights for policy 0, policy_version 36042 (0.0008) +[2026-06-07 02:58:43,896][472559] Updated weights for policy 0, policy_version 36054 (0.0008) +[2026-06-07 02:58:44,022][472559] Updated weights for policy 0, policy_version 36065 (0.0008) +[2026-06-07 02:58:44,154][472559] Updated weights for policy 0, policy_version 36077 (0.0008) +[2026-06-07 02:58:44,269][472559] Updated weights for policy 0, policy_version 36087 (0.0008) +[2026-06-07 02:58:44,817][472559] Updated weights for policy 0, policy_version 36097 (0.0008) +[2026-06-07 02:58:44,930][472559] Updated weights for policy 0, policy_version 36107 (0.0008) +[2026-06-07 02:58:45,039][472559] Updated weights for policy 0, policy_version 36117 (0.0008) +[2026-06-07 02:58:45,164][472559] Updated weights for policy 0, policy_version 36128 (0.0008) +[2026-06-07 02:58:45,285][472559] Updated weights for policy 0, policy_version 36139 (0.0008) +[2026-06-07 02:58:45,396][472559] Updated weights for policy 0, policy_version 36149 (0.0008) +[2026-06-07 02:58:45,513][472559] Updated weights for policy 0, policy_version 36159 (0.0008) +[2026-06-07 02:58:46,055][472559] Updated weights for policy 0, policy_version 36169 (0.0007) +[2026-06-07 02:58:46,175][472559] Updated weights for policy 0, policy_version 36180 (0.0008) +[2026-06-07 02:58:46,298][472559] Updated weights for policy 0, policy_version 36191 (0.0008) +[2026-06-07 02:58:46,412][472559] Updated weights for policy 0, policy_version 36201 (0.0008) +[2026-06-07 02:58:46,524][472559] Updated weights for policy 0, policy_version 36211 (0.0008) +[2026-06-07 02:58:46,648][472559] Updated weights for policy 0, policy_version 36222 (0.0008) +[2026-06-07 02:58:47,229][472559] Updated weights for policy 0, policy_version 36233 (0.0008) +[2026-06-07 02:58:47,336][472559] Updated weights for policy 0, policy_version 36243 (0.0008) +[2026-06-07 02:58:47,449][472559] Updated weights for policy 0, policy_version 36253 (0.0008) +[2026-06-07 02:58:47,572][472559] Updated weights for policy 0, policy_version 36264 (0.0008) +[2026-06-07 02:58:47,688][472559] Updated weights for policy 0, policy_version 36274 (0.0008) +[2026-06-07 02:58:47,799][472559] Updated weights for policy 0, policy_version 36284 (0.0008) +[2026-06-07 02:58:48,117][464927] Fps is (10 sec: 29491.3, 60 sec: 28399.0, 300 sec: 28324.9). Total num frames: 18579456. Throughput: 0: 28325.0. Samples: 18561792. Policy #0 lag: (min: 55.0, avg: 66.6, max: 119.0) +[2026-06-07 02:58:48,118][464927] Avg episode reward: [(0, '1152.350')] +[2026-06-07 02:58:48,380][472559] Updated weights for policy 0, policy_version 36296 (0.0008) +[2026-06-07 02:58:48,504][472559] Updated weights for policy 0, policy_version 36307 (0.0007) +[2026-06-07 02:58:48,606][472559] Updated weights for policy 0, policy_version 36317 (0.0007) +[2026-06-07 02:58:48,724][472559] Updated weights for policy 0, policy_version 36327 (0.0008) +[2026-06-07 02:58:48,844][472559] Updated weights for policy 0, policy_version 36338 (0.0008) +[2026-06-07 02:58:48,967][472559] Updated weights for policy 0, policy_version 36349 (0.0008) +[2026-06-07 02:58:49,533][472559] Updated weights for policy 0, policy_version 36360 (0.0007) +[2026-06-07 02:58:49,643][472559] Updated weights for policy 0, policy_version 36370 (0.0009) +[2026-06-07 02:58:49,755][472559] Updated weights for policy 0, policy_version 36380 (0.0009) +[2026-06-07 02:58:49,868][472559] Updated weights for policy 0, policy_version 36390 (0.0008) +[2026-06-07 02:58:49,990][472559] Updated weights for policy 0, policy_version 36401 (0.0009) +[2026-06-07 02:58:50,145][472559] Updated weights for policy 0, policy_version 36415 (0.0009) +[2026-06-07 02:58:50,701][472559] Updated weights for policy 0, policy_version 36425 (0.0008) +[2026-06-07 02:58:50,832][472559] Updated weights for policy 0, policy_version 36437 (0.0008) +[2026-06-07 02:58:50,960][472559] Updated weights for policy 0, policy_version 36448 (0.0008) +[2026-06-07 02:58:51,072][472559] Updated weights for policy 0, policy_version 36458 (0.0008) +[2026-06-07 02:58:51,201][472559] Updated weights for policy 0, policy_version 36469 (0.0008) +[2026-06-07 02:58:51,323][472559] Updated weights for policy 0, policy_version 36480 (0.0008) +[2026-06-07 02:58:51,901][472559] Updated weights for policy 0, policy_version 36493 (0.0008) +[2026-06-07 02:58:52,024][472559] Updated weights for policy 0, policy_version 36504 (0.0008) +[2026-06-07 02:58:52,151][472559] Updated weights for policy 0, policy_version 36515 (0.0009) +[2026-06-07 02:58:52,264][472559] Updated weights for policy 0, policy_version 36525 (0.0008) +[2026-06-07 02:58:52,413][472559] Updated weights for policy 0, policy_version 36538 (0.0008) +[2026-06-07 02:58:52,972][472559] Updated weights for policy 0, policy_version 36548 (0.0008) +[2026-06-07 02:58:53,086][472559] Updated weights for policy 0, policy_version 36558 (0.0008) +[2026-06-07 02:58:53,116][464927] Fps is (10 sec: 29491.5, 60 sec: 28399.0, 300 sec: 28324.9). Total num frames: 18710528. Throughput: 0: 28322.2. Samples: 18728576. Policy #0 lag: (min: 55.0, avg: 66.6, max: 119.0) +[2026-06-07 02:58:53,117][464927] Avg episode reward: [(0, '1199.508')] +[2026-06-07 02:58:53,194][472559] Updated weights for policy 0, policy_version 36568 (0.0008) +[2026-06-07 02:58:53,325][472559] Updated weights for policy 0, policy_version 36579 (0.0008) +[2026-06-07 02:58:53,438][472559] Updated weights for policy 0, policy_version 36589 (0.0008) +[2026-06-07 02:58:53,576][472559] Updated weights for policy 0, policy_version 36601 (0.0009) +[2026-06-07 02:58:53,649][472025] Saving new best policy, reward=1199.508! +[2026-06-07 02:58:54,122][472559] Updated weights for policy 0, policy_version 36612 (0.0007) +[2026-06-07 02:58:54,265][472559] Updated weights for policy 0, policy_version 36625 (0.0008) +[2026-06-07 02:58:54,390][472559] Updated weights for policy 0, policy_version 36636 (0.0008) +[2026-06-07 02:58:54,500][472559] Updated weights for policy 0, policy_version 36646 (0.0009) +[2026-06-07 02:58:54,635][472559] Updated weights for policy 0, policy_version 36657 (0.0008) +[2026-06-07 02:58:54,755][472559] Updated weights for policy 0, policy_version 36668 (0.0008) +[2026-06-07 02:58:55,306][472559] Updated weights for policy 0, policy_version 36679 (0.0007) +[2026-06-07 02:58:55,422][472559] Updated weights for policy 0, policy_version 36689 (0.0008) +[2026-06-07 02:58:55,535][472559] Updated weights for policy 0, policy_version 36699 (0.0008) +[2026-06-07 02:58:55,682][472559] Updated weights for policy 0, policy_version 36712 (0.0008) +[2026-06-07 02:58:55,810][472559] Updated weights for policy 0, policy_version 36723 (0.0008) +[2026-06-07 02:58:55,934][472559] Updated weights for policy 0, policy_version 36734 (0.0008) +[2026-06-07 02:58:56,506][472559] Updated weights for policy 0, policy_version 36745 (0.0008) +[2026-06-07 02:58:56,632][472559] Updated weights for policy 0, policy_version 36757 (0.0008) +[2026-06-07 02:58:56,772][472559] Updated weights for policy 0, policy_version 36769 (0.0008) +[2026-06-07 02:58:56,894][472559] Updated weights for policy 0, policy_version 36780 (0.0008) +[2026-06-07 02:58:57,007][472559] Updated weights for policy 0, policy_version 36790 (0.0008) +[2026-06-07 02:58:57,563][472559] Updated weights for policy 0, policy_version 36801 (0.0008) +[2026-06-07 02:58:57,675][472559] Updated weights for policy 0, policy_version 36811 (0.0008) +[2026-06-07 02:58:57,810][472559] Updated weights for policy 0, policy_version 36823 (0.0008) +[2026-06-07 02:58:57,923][472559] Updated weights for policy 0, policy_version 36833 (0.0008) +[2026-06-07 02:58:58,047][472559] Updated weights for policy 0, policy_version 36844 (0.0008) +[2026-06-07 02:58:58,117][464927] Fps is (10 sec: 26214.3, 60 sec: 27852.8, 300 sec: 28213.8). Total num frames: 18841600. Throughput: 0: 28310.7. Samples: 18896000. Policy #0 lag: (min: 75.0, avg: 106.2, max: 132.0) +[2026-06-07 02:58:58,118][464927] Avg episode reward: [(0, '1218.181')] +[2026-06-07 02:58:58,175][472559] Updated weights for policy 0, policy_version 36855 (0.0008) +[2026-06-07 02:58:58,271][472025] Saving new best policy, reward=1218.181! +[2026-06-07 02:58:58,754][472559] Updated weights for policy 0, policy_version 36866 (0.0008) +[2026-06-07 02:58:58,863][472559] Updated weights for policy 0, policy_version 36876 (0.0008) +[2026-06-07 02:58:58,976][472559] Updated weights for policy 0, policy_version 36886 (0.0008) +[2026-06-07 02:58:59,099][472559] Updated weights for policy 0, policy_version 36897 (0.0008) +[2026-06-07 02:58:59,219][472559] Updated weights for policy 0, policy_version 36908 (0.0008) +[2026-06-07 02:58:59,334][472559] Updated weights for policy 0, policy_version 36918 (0.0008) +[2026-06-07 02:58:59,884][472559] Updated weights for policy 0, policy_version 36929 (0.0008) +[2026-06-07 02:58:59,998][472559] Updated weights for policy 0, policy_version 36939 (0.0008) +[2026-06-07 02:59:00,114][472559] Updated weights for policy 0, policy_version 36950 (0.0008) +[2026-06-07 02:59:00,230][472559] Updated weights for policy 0, policy_version 36960 (0.0008) +[2026-06-07 02:59:00,346][472559] Updated weights for policy 0, policy_version 36970 (0.0008) +[2026-06-07 02:59:00,472][472559] Updated weights for policy 0, policy_version 36981 (0.0008) +[2026-06-07 02:59:00,585][472559] Updated weights for policy 0, policy_version 36991 (0.0009) +[2026-06-07 02:59:01,140][472559] Updated weights for policy 0, policy_version 37002 (0.0008) +[2026-06-07 02:59:01,262][472559] Updated weights for policy 0, policy_version 37013 (0.0008) +[2026-06-07 02:59:01,379][472559] Updated weights for policy 0, policy_version 37023 (0.0008) +[2026-06-07 02:59:01,501][472559] Updated weights for policy 0, policy_version 37034 (0.0008) +[2026-06-07 02:59:01,609][472559] Updated weights for policy 0, policy_version 37044 (0.0008) +[2026-06-07 02:59:01,747][472559] Updated weights for policy 0, policy_version 37056 (0.0009) +[2026-06-07 02:59:02,299][472559] Updated weights for policy 0, policy_version 37066 (0.0008) +[2026-06-07 02:59:02,417][472559] Updated weights for policy 0, policy_version 37077 (0.0008) +[2026-06-07 02:59:02,538][472559] Updated weights for policy 0, policy_version 37088 (0.0008) +[2026-06-07 02:59:02,659][472559] Updated weights for policy 0, policy_version 37099 (0.0008) +[2026-06-07 02:59:02,791][472559] Updated weights for policy 0, policy_version 37110 (0.0008) +[2026-06-07 02:59:03,117][464927] Fps is (10 sec: 29491.0, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 19005440. Throughput: 0: 28296.6. Samples: 18986496. Policy #0 lag: (min: 75.0, avg: 106.2, max: 132.0) +[2026-06-07 02:59:03,118][464927] Avg episode reward: [(0, '1220.523')] +[2026-06-07 02:59:03,123][472025] Saving new best policy, reward=1220.523! +[2026-06-07 02:59:03,344][472559] Updated weights for policy 0, policy_version 37121 (0.0008) +[2026-06-07 02:59:03,451][472559] Updated weights for policy 0, policy_version 37131 (0.0008) +[2026-06-07 02:59:03,563][472559] Updated weights for policy 0, policy_version 37141 (0.0008) +[2026-06-07 02:59:03,675][472559] Updated weights for policy 0, policy_version 37151 (0.0008) +[2026-06-07 02:59:03,797][472559] Updated weights for policy 0, policy_version 37162 (0.0008) +[2026-06-07 02:59:03,908][472559] Updated weights for policy 0, policy_version 37172 (0.0008) +[2026-06-07 02:59:04,037][472559] Updated weights for policy 0, policy_version 37183 (0.0008) +[2026-06-07 02:59:04,588][472559] Updated weights for policy 0, policy_version 37193 (0.0008) +[2026-06-07 02:59:04,736][472559] Updated weights for policy 0, policy_version 37206 (0.0008) +[2026-06-07 02:59:04,859][472559] Updated weights for policy 0, policy_version 37217 (0.0008) +[2026-06-07 02:59:04,983][472559] Updated weights for policy 0, policy_version 37228 (0.0008) +[2026-06-07 02:59:05,109][472559] Updated weights for policy 0, policy_version 37239 (0.0008) +[2026-06-07 02:59:05,669][472559] Updated weights for policy 0, policy_version 37250 (0.0008) +[2026-06-07 02:59:05,790][472559] Updated weights for policy 0, policy_version 37261 (0.0008) +[2026-06-07 02:59:05,907][472559] Updated weights for policy 0, policy_version 37272 (0.0008) +[2026-06-07 02:59:06,022][472559] Updated weights for policy 0, policy_version 37282 (0.0008) +[2026-06-07 02:59:06,158][472559] Updated weights for policy 0, policy_version 37294 (0.0008) +[2026-06-07 02:59:06,282][472559] Updated weights for policy 0, policy_version 37305 (0.0008) +[2026-06-07 02:59:06,829][472559] Updated weights for policy 0, policy_version 37315 (0.0008) +[2026-06-07 02:59:06,939][472559] Updated weights for policy 0, policy_version 37325 (0.0008) +[2026-06-07 02:59:07,063][472559] Updated weights for policy 0, policy_version 37336 (0.0008) +[2026-06-07 02:59:07,198][472559] Updated weights for policy 0, policy_version 37348 (0.0008) +[2026-06-07 02:59:07,321][472559] Updated weights for policy 0, policy_version 37359 (0.0008) +[2026-06-07 02:59:07,447][472559] Updated weights for policy 0, policy_version 37370 (0.0008) +[2026-06-07 02:59:08,000][472559] Updated weights for policy 0, policy_version 37380 (0.0008) +[2026-06-07 02:59:08,100][472559] Updated weights for policy 0, policy_version 37390 (0.0008) +[2026-06-07 02:59:08,117][464927] Fps is (10 sec: 29491.3, 60 sec: 28399.0, 300 sec: 28324.9). Total num frames: 19136512. Throughput: 0: 28270.9. Samples: 19152896. Policy #0 lag: (min: 75.0, avg: 106.2, max: 132.0) +[2026-06-07 02:59:08,117][464927] Avg episode reward: [(0, '1222.173')] +[2026-06-07 02:59:08,218][472559] Updated weights for policy 0, policy_version 37400 (0.0008) +[2026-06-07 02:59:08,334][472559] Updated weights for policy 0, policy_version 37410 (0.0008) +[2026-06-07 02:59:08,461][472559] Updated weights for policy 0, policy_version 37421 (0.0008) +[2026-06-07 02:59:08,577][472559] Updated weights for policy 0, policy_version 37431 (0.0008) +[2026-06-07 02:59:08,671][472025] Saving new best policy, reward=1222.173! +[2026-06-07 02:59:09,111][472559] Updated weights for policy 0, policy_version 37442 (0.0008) +[2026-06-07 02:59:09,229][472559] Updated weights for policy 0, policy_version 37453 (0.0008) +[2026-06-07 02:59:09,368][472559] Updated weights for policy 0, policy_version 37465 (0.0008) +[2026-06-07 02:59:09,491][472559] Updated weights for policy 0, policy_version 37476 (0.0008) +[2026-06-07 02:59:09,604][472559] Updated weights for policy 0, policy_version 37486 (0.0008) +[2026-06-07 02:59:09,712][472559] Updated weights for policy 0, policy_version 37496 (0.0008) +[2026-06-07 02:59:10,273][472559] Updated weights for policy 0, policy_version 37507 (0.0008) +[2026-06-07 02:59:10,397][472559] Updated weights for policy 0, policy_version 37518 (0.0008) +[2026-06-07 02:59:10,520][472559] Updated weights for policy 0, policy_version 37529 (0.0008) +[2026-06-07 02:59:10,648][472559] Updated weights for policy 0, policy_version 37540 (0.0008) +[2026-06-07 02:59:10,764][472559] Updated weights for policy 0, policy_version 37551 (0.0008) +[2026-06-07 02:59:10,893][472559] Updated weights for policy 0, policy_version 37562 (0.0008) +[2026-06-07 02:59:11,446][472559] Updated weights for policy 0, policy_version 37572 (0.0008) +[2026-06-07 02:59:11,551][472559] Updated weights for policy 0, policy_version 37582 (0.0008) +[2026-06-07 02:59:11,682][472559] Updated weights for policy 0, policy_version 37593 (0.0008) +[2026-06-07 02:59:11,796][472559] Updated weights for policy 0, policy_version 37603 (0.0009) +[2026-06-07 02:59:11,919][472559] Updated weights for policy 0, policy_version 37614 (0.0008) +[2026-06-07 02:59:12,032][472559] Updated weights for policy 0, policy_version 37624 (0.0008) +[2026-06-07 02:59:12,610][472559] Updated weights for policy 0, policy_version 37637 (0.0008) +[2026-06-07 02:59:12,718][472559] Updated weights for policy 0, policy_version 37647 (0.0008) +[2026-06-07 02:59:12,846][472559] Updated weights for policy 0, policy_version 37658 (0.0008) +[2026-06-07 02:59:12,962][472559] Updated weights for policy 0, policy_version 37668 (0.0009) +[2026-06-07 02:59:13,092][472559] Updated weights for policy 0, policy_version 37680 (0.0009) +[2026-06-07 02:59:13,117][464927] Fps is (10 sec: 26214.4, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 19267584. Throughput: 0: 28256.8. Samples: 19319296. Policy #0 lag: (min: 75.0, avg: 106.2, max: 132.0) +[2026-06-07 02:59:13,117][464927] Avg episode reward: [(0, '1236.108')] +[2026-06-07 02:59:13,224][472559] Updated weights for policy 0, policy_version 37693 (0.0009) +[2026-06-07 02:59:13,255][472025] Saving new best policy, reward=1236.108! +[2026-06-07 02:59:13,796][472559] Updated weights for policy 0, policy_version 37703 (0.0008) +[2026-06-07 02:59:13,899][472559] Updated weights for policy 0, policy_version 37713 (0.0008) +[2026-06-07 02:59:14,046][472559] Updated weights for policy 0, policy_version 37727 (0.0008) +[2026-06-07 02:59:14,199][472559] Updated weights for policy 0, policy_version 37741 (0.0009) +[2026-06-07 02:59:14,321][472559] Updated weights for policy 0, policy_version 37752 (0.0008) +[2026-06-07 02:59:14,908][472559] Updated weights for policy 0, policy_version 37763 (0.0008) +[2026-06-07 02:59:15,050][472559] Updated weights for policy 0, policy_version 37777 (0.0008) +[2026-06-07 02:59:15,186][472559] Updated weights for policy 0, policy_version 37789 (0.0008) +[2026-06-07 02:59:15,324][472559] Updated weights for policy 0, policy_version 37802 (0.0008) +[2026-06-07 02:59:15,443][472559] Updated weights for policy 0, policy_version 37813 (0.0008) +[2026-06-07 02:59:16,029][472559] Updated weights for policy 0, policy_version 37825 (0.0008) +[2026-06-07 02:59:16,144][472559] Updated weights for policy 0, policy_version 37836 (0.0008) +[2026-06-07 02:59:16,265][472559] Updated weights for policy 0, policy_version 37847 (0.0008) +[2026-06-07 02:59:16,384][472559] Updated weights for policy 0, policy_version 37858 (0.0008) +[2026-06-07 02:59:16,512][472559] Updated weights for policy 0, policy_version 37870 (0.0008) +[2026-06-07 02:59:16,653][472559] Updated weights for policy 0, policy_version 37883 (0.0008) +[2026-06-07 02:59:17,226][472559] Updated weights for policy 0, policy_version 37893 (0.0008) +[2026-06-07 02:59:17,354][472559] Updated weights for policy 0, policy_version 37905 (0.0008) +[2026-06-07 02:59:17,470][472559] Updated weights for policy 0, policy_version 37916 (0.0009) +[2026-06-07 02:59:17,596][472559] Updated weights for policy 0, policy_version 37928 (0.0008) +[2026-06-07 02:59:17,772][472559] Updated weights for policy 0, policy_version 37944 (0.0009) +[2026-06-07 02:59:18,117][464927] Fps is (10 sec: 29491.3, 60 sec: 28399.0, 300 sec: 28324.9). Total num frames: 19431424. Throughput: 0: 28265.2. Samples: 19410432. Policy #0 lag: (min: 75.0, avg: 106.2, max: 132.0) +[2026-06-07 02:59:18,118][464927] Avg episode reward: [(0, '1281.815')] +[2026-06-07 02:59:18,124][472025] Saving new best policy, reward=1281.815! +[2026-06-07 02:59:18,368][472559] Updated weights for policy 0, policy_version 37957 (0.0008) +[2026-06-07 02:59:18,483][472559] Updated weights for policy 0, policy_version 37968 (0.0008) +[2026-06-07 02:59:18,613][472559] Updated weights for policy 0, policy_version 37980 (0.0008) +[2026-06-07 02:59:18,738][472559] Updated weights for policy 0, policy_version 37991 (0.0008) +[2026-06-07 02:59:18,851][472559] Updated weights for policy 0, policy_version 38001 (0.0008) +[2026-06-07 02:59:18,964][472559] Updated weights for policy 0, policy_version 38011 (0.0008) +[2026-06-07 02:59:19,515][472559] Updated weights for policy 0, policy_version 38021 (0.0008) +[2026-06-07 02:59:19,626][472559] Updated weights for policy 0, policy_version 38031 (0.0008) +[2026-06-07 02:59:19,735][472559] Updated weights for policy 0, policy_version 38041 (0.0008) +[2026-06-07 02:59:19,862][472559] Updated weights for policy 0, policy_version 38053 (0.0008) +[2026-06-07 02:59:19,994][472559] Updated weights for policy 0, policy_version 38065 (0.0008) +[2026-06-07 02:59:20,105][472559] Updated weights for policy 0, policy_version 38075 (0.0008) +[2026-06-07 02:59:20,679][472559] Updated weights for policy 0, policy_version 38086 (0.0008) +[2026-06-07 02:59:20,799][472559] Updated weights for policy 0, policy_version 38098 (0.0008) +[2026-06-07 02:59:20,959][472559] Updated weights for policy 0, policy_version 38113 (0.0008) +[2026-06-07 02:59:21,078][472559] Updated weights for policy 0, policy_version 38124 (0.0008) +[2026-06-07 02:59:21,221][472559] Updated weights for policy 0, policy_version 38137 (0.0008) +[2026-06-07 02:59:21,791][472559] Updated weights for policy 0, policy_version 38147 (0.0008) +[2026-06-07 02:59:21,901][472559] Updated weights for policy 0, policy_version 38157 (0.0008) +[2026-06-07 02:59:22,033][472559] Updated weights for policy 0, policy_version 38169 (0.0008) +[2026-06-07 02:59:22,143][472559] Updated weights for policy 0, policy_version 38179 (0.0008) +[2026-06-07 02:59:22,269][472559] Updated weights for policy 0, policy_version 38190 (0.0008) +[2026-06-07 02:59:22,393][472559] Updated weights for policy 0, policy_version 38201 (0.0008) +[2026-06-07 02:59:22,952][472559] Updated weights for policy 0, policy_version 38211 (0.0008) +[2026-06-07 02:59:23,081][472559] Updated weights for policy 0, policy_version 38223 (0.0008) +[2026-06-07 02:59:23,117][464927] Fps is (10 sec: 29491.3, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 19562496. Throughput: 0: 28265.3. Samples: 19576576. Policy #0 lag: (min: 63.0, avg: 74.3, max: 127.0) +[2026-06-07 02:59:23,118][464927] Avg episode reward: [(0, '1232.607')] +[2026-06-07 02:59:23,219][472559] Updated weights for policy 0, policy_version 38235 (0.0008) +[2026-06-07 02:59:23,329][472559] Updated weights for policy 0, policy_version 38245 (0.0008) +[2026-06-07 02:59:23,468][472559] Updated weights for policy 0, policy_version 38257 (0.0009) +[2026-06-07 02:59:23,606][472559] Updated weights for policy 0, policy_version 38269 (0.0008) +[2026-06-07 02:59:24,161][472559] Updated weights for policy 0, policy_version 38280 (0.0008) +[2026-06-07 02:59:24,282][472559] Updated weights for policy 0, policy_version 38291 (0.0008) +[2026-06-07 02:59:24,398][472559] Updated weights for policy 0, policy_version 38301 (0.0009) +[2026-06-07 02:59:24,529][472559] Updated weights for policy 0, policy_version 38313 (0.0008) +[2026-06-07 02:59:24,659][472559] Updated weights for policy 0, policy_version 38324 (0.0008) +[2026-06-07 02:59:24,782][472559] Updated weights for policy 0, policy_version 38335 (0.0008) +[2026-06-07 02:59:25,332][472559] Updated weights for policy 0, policy_version 38345 (0.0008) +[2026-06-07 02:59:25,455][472559] Updated weights for policy 0, policy_version 38356 (0.0009) +[2026-06-07 02:59:25,613][472559] Updated weights for policy 0, policy_version 38371 (0.0008) +[2026-06-07 02:59:25,752][472559] Updated weights for policy 0, policy_version 38384 (0.0009) +[2026-06-07 02:59:25,869][472559] Updated weights for policy 0, policy_version 38395 (0.0008) +[2026-06-07 02:59:26,433][472559] Updated weights for policy 0, policy_version 38405 (0.0008) +[2026-06-07 02:59:26,540][472559] Updated weights for policy 0, policy_version 38415 (0.0008) +[2026-06-07 02:59:26,662][472559] Updated weights for policy 0, policy_version 38426 (0.0008) +[2026-06-07 02:59:26,792][472559] Updated weights for policy 0, policy_version 38437 (0.0009) +[2026-06-07 02:59:26,908][472559] Updated weights for policy 0, policy_version 38447 (0.0008) +[2026-06-07 02:59:27,042][472559] Updated weights for policy 0, policy_version 38459 (0.0008) +[2026-06-07 02:59:27,587][472559] Updated weights for policy 0, policy_version 38469 (0.0008) +[2026-06-07 02:59:27,712][472559] Updated weights for policy 0, policy_version 38480 (0.0008) +[2026-06-07 02:59:27,847][472559] Updated weights for policy 0, policy_version 38492 (0.0009) +[2026-06-07 02:59:27,969][472559] Updated weights for policy 0, policy_version 38503 (0.0008) +[2026-06-07 02:59:28,094][472559] Updated weights for policy 0, policy_version 38514 (0.0008) +[2026-06-07 02:59:28,117][464927] Fps is (10 sec: 26214.3, 60 sec: 28399.0, 300 sec: 28324.9). Total num frames: 19693568. Throughput: 0: 28256.7. Samples: 19742720. Policy #0 lag: (min: 63.0, avg: 74.3, max: 127.0) +[2026-06-07 02:59:28,117][464927] Avg episode reward: [(0, '1288.716')] +[2026-06-07 02:59:28,219][472559] Updated weights for policy 0, policy_version 38525 (0.0008) +[2026-06-07 02:59:28,248][472025] Saving new best policy, reward=1288.716! +[2026-06-07 02:59:28,766][472559] Updated weights for policy 0, policy_version 38535 (0.0008) +[2026-06-07 02:59:28,885][472559] Updated weights for policy 0, policy_version 38546 (0.0008) +[2026-06-07 02:59:29,026][472559] Updated weights for policy 0, policy_version 38558 (0.0008) +[2026-06-07 02:59:29,161][472559] Updated weights for policy 0, policy_version 38570 (0.0008) +[2026-06-07 02:59:29,283][472559] Updated weights for policy 0, policy_version 38581 (0.0008) +[2026-06-07 02:59:29,398][472559] Updated weights for policy 0, policy_version 38591 (0.0008) +[2026-06-07 02:59:29,960][472559] Updated weights for policy 0, policy_version 38602 (0.0007) +[2026-06-07 02:59:30,067][472559] Updated weights for policy 0, policy_version 38612 (0.0008) +[2026-06-07 02:59:30,187][472559] Updated weights for policy 0, policy_version 38623 (0.0008) +[2026-06-07 02:59:30,305][472559] Updated weights for policy 0, policy_version 38633 (0.0008) +[2026-06-07 02:59:30,439][472559] Updated weights for policy 0, policy_version 38645 (0.0008) +[2026-06-07 02:59:30,563][472559] Updated weights for policy 0, policy_version 38656 (0.0008) +[2026-06-07 02:59:31,126][472559] Updated weights for policy 0, policy_version 38667 (0.0008) +[2026-06-07 02:59:31,253][472559] Updated weights for policy 0, policy_version 38678 (0.0008) +[2026-06-07 02:59:31,363][472559] Updated weights for policy 0, policy_version 38688 (0.0008) +[2026-06-07 02:59:31,476][472559] Updated weights for policy 0, policy_version 38698 (0.0008) +[2026-06-07 02:59:31,601][472559] Updated weights for policy 0, policy_version 38709 (0.0008) +[2026-06-07 02:59:31,723][472559] Updated weights for policy 0, policy_version 38719 (0.0009) +[2026-06-07 02:59:32,269][472559] Updated weights for policy 0, policy_version 38730 (0.0008) +[2026-06-07 02:59:32,379][472559] Updated weights for policy 0, policy_version 38740 (0.0008) +[2026-06-07 02:59:32,491][472559] Updated weights for policy 0, policy_version 38750 (0.0008) +[2026-06-07 02:59:32,620][472559] Updated weights for policy 0, policy_version 38761 (0.0008) +[2026-06-07 02:59:32,738][472559] Updated weights for policy 0, policy_version 38771 (0.0008) +[2026-06-07 02:59:32,874][472559] Updated weights for policy 0, policy_version 38783 (0.0008) +[2026-06-07 02:59:33,117][464927] Fps is (10 sec: 29491.1, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 19857408. Throughput: 0: 28265.2. Samples: 19833728. Policy #0 lag: (min: 63.0, avg: 74.3, max: 127.0) +[2026-06-07 02:59:33,117][464927] Avg episode reward: [(0, '1230.187')] +[2026-06-07 02:59:33,432][472559] Updated weights for policy 0, policy_version 38794 (0.0008) +[2026-06-07 02:59:33,560][472559] Updated weights for policy 0, policy_version 38806 (0.0008) +[2026-06-07 02:59:33,677][472559] Updated weights for policy 0, policy_version 38816 (0.0008) +[2026-06-07 02:59:33,813][472559] Updated weights for policy 0, policy_version 38828 (0.0008) +[2026-06-07 02:59:33,929][472559] Updated weights for policy 0, policy_version 38838 (0.0008) +[2026-06-07 02:59:34,472][472559] Updated weights for policy 0, policy_version 38849 (0.0009) +[2026-06-07 02:59:34,584][472559] Updated weights for policy 0, policy_version 38859 (0.0008) +[2026-06-07 02:59:34,712][472559] Updated weights for policy 0, policy_version 38871 (0.0008) +[2026-06-07 02:59:34,826][472559] Updated weights for policy 0, policy_version 38882 (0.0008) +[2026-06-07 02:59:34,994][472559] Updated weights for policy 0, policy_version 38897 (0.0008) +[2026-06-07 02:59:35,117][472559] Updated weights for policy 0, policy_version 38908 (0.0008) +[2026-06-07 02:59:35,700][472559] Updated weights for policy 0, policy_version 38921 (0.0008) +[2026-06-07 02:59:35,818][472559] Updated weights for policy 0, policy_version 38932 (0.0008) +[2026-06-07 02:59:35,947][472559] Updated weights for policy 0, policy_version 38944 (0.0008) +[2026-06-07 02:59:36,068][472559] Updated weights for policy 0, policy_version 38955 (0.0008) +[2026-06-07 02:59:36,181][472559] Updated weights for policy 0, policy_version 38965 (0.0008) +[2026-06-07 02:59:36,295][472559] Updated weights for policy 0, policy_version 38975 (0.0008) +[2026-06-07 02:59:36,861][472559] Updated weights for policy 0, policy_version 38985 (0.0008) +[2026-06-07 02:59:36,983][472559] Updated weights for policy 0, policy_version 38996 (0.0008) +[2026-06-07 02:59:37,098][472559] Updated weights for policy 0, policy_version 39006 (0.0008) +[2026-06-07 02:59:37,212][472559] Updated weights for policy 0, policy_version 39016 (0.0008) +[2026-06-07 02:59:37,339][472559] Updated weights for policy 0, policy_version 39027 (0.0008) +[2026-06-07 02:59:37,454][472559] Updated weights for policy 0, policy_version 39037 (0.0008) +[2026-06-07 02:59:38,001][472559] Updated weights for policy 0, policy_version 39048 (0.0008) +[2026-06-07 02:59:38,116][464927] Fps is (10 sec: 29491.4, 60 sec: 28399.0, 300 sec: 28324.9). Total num frames: 19988480. Throughput: 0: 28239.6. Samples: 19999360. Policy #0 lag: (min: 63.0, avg: 74.3, max: 127.0) +[2026-06-07 02:59:38,117][464927] Avg episode reward: [(0, '1224.244')] +[2026-06-07 02:59:38,127][472559] Updated weights for policy 0, policy_version 39059 (0.0008) +[2026-06-07 02:59:38,244][472559] Updated weights for policy 0, policy_version 39069 (0.0008) +[2026-06-07 02:59:38,361][472559] Updated weights for policy 0, policy_version 39079 (0.0008) +[2026-06-07 02:59:38,498][472559] Updated weights for policy 0, policy_version 39091 (0.0008) +[2026-06-07 02:59:38,634][472559] Updated weights for policy 0, policy_version 39103 (0.0008) +[2026-06-07 02:59:39,166][472559] Updated weights for policy 0, policy_version 39113 (0.0008) +[2026-06-07 02:59:39,301][472559] Updated weights for policy 0, policy_version 39125 (0.0008) +[2026-06-07 02:59:39,440][472559] Updated weights for policy 0, policy_version 39137 (0.0008) +[2026-06-07 02:59:39,562][472559] Updated weights for policy 0, policy_version 39148 (0.0009) +[2026-06-07 02:59:39,679][472559] Updated weights for policy 0, policy_version 39158 (0.0009) +[2026-06-07 02:59:40,232][472559] Updated weights for policy 0, policy_version 39169 (0.0008) +[2026-06-07 02:59:40,381][472559] Updated weights for policy 0, policy_version 39182 (0.0008) +[2026-06-07 02:59:40,503][472559] Updated weights for policy 0, policy_version 39193 (0.0008) +[2026-06-07 02:59:40,618][472559] Updated weights for policy 0, policy_version 39203 (0.0009) +[2026-06-07 02:59:40,744][472559] Updated weights for policy 0, policy_version 39214 (0.0008) +[2026-06-07 02:59:40,870][472559] Updated weights for policy 0, policy_version 39225 (0.0009) +[2026-06-07 02:59:41,422][472559] Updated weights for policy 0, policy_version 39236 (0.0008) +[2026-06-07 02:59:41,536][472559] Updated weights for policy 0, policy_version 39246 (0.0008) +[2026-06-07 02:59:41,654][472559] Updated weights for policy 0, policy_version 39256 (0.0008) +[2026-06-07 02:59:41,762][472559] Updated weights for policy 0, policy_version 39266 (0.0008) +[2026-06-07 02:59:41,890][472559] Updated weights for policy 0, policy_version 39277 (0.0008) +[2026-06-07 02:59:42,019][472559] Updated weights for policy 0, policy_version 39288 (0.0009) +[2026-06-07 02:59:42,559][472559] Updated weights for policy 0, policy_version 39298 (0.0008) +[2026-06-07 02:59:42,667][472559] Updated weights for policy 0, policy_version 39308 (0.0007) +[2026-06-07 02:59:42,794][472559] Updated weights for policy 0, policy_version 39319 (0.0008) +[2026-06-07 02:59:42,915][472559] Updated weights for policy 0, policy_version 39330 (0.0008) +[2026-06-07 02:59:43,039][472559] Updated weights for policy 0, policy_version 39341 (0.0008) +[2026-06-07 02:59:43,117][464927] Fps is (10 sec: 26214.2, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 20119552. Throughput: 0: 28211.2. Samples: 20165504. Policy #0 lag: (min: 63.0, avg: 74.3, max: 127.0) +[2026-06-07 02:59:43,118][464927] Avg episode reward: [(0, '1230.305')] +[2026-06-07 02:59:43,168][472559] Updated weights for policy 0, policy_version 39352 (0.0008) +[2026-06-07 02:59:43,703][472559] Updated weights for policy 0, policy_version 39362 (0.0008) +[2026-06-07 02:59:43,825][472559] Updated weights for policy 0, policy_version 39373 (0.0008) +[2026-06-07 02:59:43,947][472559] Updated weights for policy 0, policy_version 39384 (0.0008) +[2026-06-07 02:59:44,069][472559] Updated weights for policy 0, policy_version 39394 (0.0008) +[2026-06-07 02:59:44,192][472559] Updated weights for policy 0, policy_version 39405 (0.0009) +[2026-06-07 02:59:44,303][472559] Updated weights for policy 0, policy_version 39415 (0.0008) +[2026-06-07 02:59:44,846][472559] Updated weights for policy 0, policy_version 39426 (0.0008) +[2026-06-07 02:59:44,976][472559] Updated weights for policy 0, policy_version 39437 (0.0008) +[2026-06-07 02:59:45,105][472559] Updated weights for policy 0, policy_version 39448 (0.0008) +[2026-06-07 02:59:45,219][472559] Updated weights for policy 0, policy_version 39458 (0.0008) +[2026-06-07 02:59:45,329][472559] Updated weights for policy 0, policy_version 39468 (0.0008) +[2026-06-07 02:59:45,444][472559] Updated weights for policy 0, policy_version 39478 (0.0008) +[2026-06-07 02:59:45,554][472559] Updated weights for policy 0, policy_version 39488 (0.0008) +[2026-06-07 02:59:46,120][472559] Updated weights for policy 0, policy_version 39500 (0.0008) +[2026-06-07 02:59:46,248][472559] Updated weights for policy 0, policy_version 39511 (0.0008) +[2026-06-07 02:59:46,357][472559] Updated weights for policy 0, policy_version 39521 (0.0008) +[2026-06-07 02:59:46,466][472559] Updated weights for policy 0, policy_version 39531 (0.0008) +[2026-06-07 02:59:46,578][472559] Updated weights for policy 0, policy_version 39541 (0.0008) +[2026-06-07 02:59:46,711][472559] Updated weights for policy 0, policy_version 39552 (0.0008) +[2026-06-07 02:59:47,261][472559] Updated weights for policy 0, policy_version 39563 (0.0008) +[2026-06-07 02:59:47,372][472559] Updated weights for policy 0, policy_version 39573 (0.0008) +[2026-06-07 02:59:47,492][472559] Updated weights for policy 0, policy_version 39583 (0.0008) +[2026-06-07 02:59:47,627][472559] Updated weights for policy 0, policy_version 39595 (0.0008) +[2026-06-07 02:59:47,740][472559] Updated weights for policy 0, policy_version 39605 (0.0008) +[2026-06-07 02:59:47,857][472559] Updated weights for policy 0, policy_version 39615 (0.0008) +[2026-06-07 02:59:48,117][464927] Fps is (10 sec: 29491.0, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 20283392. Throughput: 0: 28205.5. Samples: 20255744. Policy #0 lag: (min: 63.0, avg: 75.5, max: 127.0) +[2026-06-07 02:59:48,117][464927] Avg episode reward: [(0, '1248.950')] +[2026-06-07 02:59:48,404][472559] Updated weights for policy 0, policy_version 39626 (0.0008) +[2026-06-07 02:59:48,532][472559] Updated weights for policy 0, policy_version 39637 (0.0008) +[2026-06-07 02:59:48,656][472559] Updated weights for policy 0, policy_version 39648 (0.0008) +[2026-06-07 02:59:48,767][472559] Updated weights for policy 0, policy_version 39658 (0.0008) +[2026-06-07 02:59:48,896][472559] Updated weights for policy 0, policy_version 39669 (0.0009) +[2026-06-07 02:59:49,013][472559] Updated weights for policy 0, policy_version 39679 (0.0008) +[2026-06-07 02:59:49,549][472559] Updated weights for policy 0, policy_version 39689 (0.0008) +[2026-06-07 02:59:49,662][472559] Updated weights for policy 0, policy_version 39699 (0.0008) +[2026-06-07 02:59:49,798][472559] Updated weights for policy 0, policy_version 39711 (0.0008) +[2026-06-07 02:59:49,917][472559] Updated weights for policy 0, policy_version 39721 (0.0008) +[2026-06-07 02:59:50,027][472559] Updated weights for policy 0, policy_version 39731 (0.0008) +[2026-06-07 02:59:50,151][472559] Updated weights for policy 0, policy_version 39742 (0.0008) +[2026-06-07 02:59:50,707][472559] Updated weights for policy 0, policy_version 39752 (0.0005) +[2026-06-07 02:59:50,825][472559] Updated weights for policy 0, policy_version 39762 (0.0005) +[2026-06-07 02:59:50,952][472559] Updated weights for policy 0, policy_version 39773 (0.0005) +[2026-06-07 02:59:51,079][472559] Updated weights for policy 0, policy_version 39784 (0.0005) +[2026-06-07 02:59:51,191][472559] Updated weights for policy 0, policy_version 39794 (0.0005) +[2026-06-07 02:59:51,341][472559] Updated weights for policy 0, policy_version 39807 (0.0004) +[2026-06-07 02:59:51,872][472559] Updated weights for policy 0, policy_version 39817 (0.0004) +[2026-06-07 02:59:51,994][472559] Updated weights for policy 0, policy_version 39828 (0.0004) +[2026-06-07 02:59:52,124][472559] Updated weights for policy 0, policy_version 39839 (0.0005) +[2026-06-07 02:59:52,239][472559] Updated weights for policy 0, policy_version 39849 (0.0004) +[2026-06-07 02:59:52,357][472559] Updated weights for policy 0, policy_version 39859 (0.0004) +[2026-06-07 02:59:52,492][472559] Updated weights for policy 0, policy_version 39871 (0.0005) +[2026-06-07 02:59:53,015][472559] Updated weights for policy 0, policy_version 39881 (0.0007) +[2026-06-07 02:59:53,117][464927] Fps is (10 sec: 29491.5, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 20414464. Throughput: 0: 28225.4. Samples: 20423040. Policy #0 lag: (min: 63.0, avg: 75.5, max: 127.0) +[2026-06-07 02:59:53,117][464927] Avg episode reward: [(0, '1337.725')] +[2026-06-07 02:59:53,141][472559] Updated weights for policy 0, policy_version 39892 (0.0008) +[2026-06-07 02:59:53,255][472559] Updated weights for policy 0, policy_version 39902 (0.0008) +[2026-06-07 02:59:53,377][472559] Updated weights for policy 0, policy_version 39913 (0.0006) +[2026-06-07 02:59:53,495][472559] Updated weights for policy 0, policy_version 39923 (0.0008) +[2026-06-07 02:59:53,632][472559] Updated weights for policy 0, policy_version 39935 (0.0008) +[2026-06-07 02:59:53,639][472025] Saving new best policy, reward=1337.725! +[2026-06-07 02:59:54,171][472559] Updated weights for policy 0, policy_version 39945 (0.0008) +[2026-06-07 02:59:54,296][472559] Updated weights for policy 0, policy_version 39956 (0.0008) +[2026-06-07 02:59:54,401][472559] Updated weights for policy 0, policy_version 39966 (0.0008) +[2026-06-07 02:59:54,529][472559] Updated weights for policy 0, policy_version 39977 (0.0008) +[2026-06-07 02:59:54,640][472559] Updated weights for policy 0, policy_version 39987 (0.0008) +[2026-06-07 02:59:54,765][472559] Updated weights for policy 0, policy_version 39998 (0.0008) +[2026-06-07 02:59:55,321][472559] Updated weights for policy 0, policy_version 40008 (0.0008) +[2026-06-07 02:59:55,440][472559] Updated weights for policy 0, policy_version 40019 (0.0008) +[2026-06-07 02:59:55,561][472559] Updated weights for policy 0, policy_version 40030 (0.0008) +[2026-06-07 02:59:55,689][472559] Updated weights for policy 0, policy_version 40041 (0.0008) +[2026-06-07 02:59:55,800][472559] Updated weights for policy 0, policy_version 40051 (0.0008) +[2026-06-07 02:59:55,936][472559] Updated weights for policy 0, policy_version 40063 (0.0009) +[2026-06-07 02:59:56,491][472559] Updated weights for policy 0, policy_version 40073 (0.0008) +[2026-06-07 02:59:56,615][472559] Updated weights for policy 0, policy_version 40084 (0.0009) +[2026-06-07 02:59:56,729][472559] Updated weights for policy 0, policy_version 40094 (0.0008) +[2026-06-07 02:59:56,839][472559] Updated weights for policy 0, policy_version 40104 (0.0009) +[2026-06-07 02:59:56,968][472559] Updated weights for policy 0, policy_version 40115 (0.0008) +[2026-06-07 02:59:57,098][472559] Updated weights for policy 0, policy_version 40126 (0.0009) +[2026-06-07 02:59:57,639][472559] Updated weights for policy 0, policy_version 40137 (0.0008) +[2026-06-07 02:59:57,749][472559] Updated weights for policy 0, policy_version 40147 (0.0008) +[2026-06-07 02:59:57,868][472559] Updated weights for policy 0, policy_version 40158 (0.0009) +[2026-06-07 02:59:57,983][472559] Updated weights for policy 0, policy_version 40168 (0.0008) +[2026-06-07 02:59:58,103][472559] Updated weights for policy 0, policy_version 40178 (0.0008) +[2026-06-07 02:59:58,116][464927] Fps is (10 sec: 26214.5, 60 sec: 28399.0, 300 sec: 28324.9). Total num frames: 20545536. Throughput: 0: 28248.2. Samples: 20590464. Policy #0 lag: (min: 63.0, avg: 75.5, max: 127.0) +[2026-06-07 02:59:58,117][464927] Avg episode reward: [(0, '1415.740')] +[2026-06-07 02:59:58,219][472559] Updated weights for policy 0, policy_version 40188 (0.0008) +[2026-06-07 02:59:58,258][472025] Saving new best policy, reward=1415.740! +[2026-06-07 02:59:58,773][472559] Updated weights for policy 0, policy_version 40198 (0.0008) +[2026-06-07 02:59:58,887][472559] Updated weights for policy 0, policy_version 40208 (0.0008) +[2026-06-07 02:59:59,000][472559] Updated weights for policy 0, policy_version 40218 (0.0008) +[2026-06-07 02:59:59,136][472559] Updated weights for policy 0, policy_version 40230 (0.0008) +[2026-06-07 02:59:59,249][472559] Updated weights for policy 0, policy_version 40240 (0.0008) +[2026-06-07 02:59:59,377][472559] Updated weights for policy 0, policy_version 40251 (0.0008) +[2026-06-07 02:59:59,911][472559] Updated weights for policy 0, policy_version 40261 (0.0008) +[2026-06-07 03:00:00,025][472559] Updated weights for policy 0, policy_version 40271 (0.0008) +[2026-06-07 03:00:00,129][472559] Updated weights for policy 0, policy_version 40281 (0.0008) +[2026-06-07 03:00:00,249][472559] Updated weights for policy 0, policy_version 40291 (0.0008) +[2026-06-07 03:00:00,357][472559] Updated weights for policy 0, policy_version 40301 (0.0008) +[2026-06-07 03:00:00,472][472559] Updated weights for policy 0, policy_version 40311 (0.0008) +[2026-06-07 03:00:01,027][472559] Updated weights for policy 0, policy_version 40321 (0.0008) +[2026-06-07 03:00:01,142][472559] Updated weights for policy 0, policy_version 40331 (0.0008) +[2026-06-07 03:00:01,247][472559] Updated weights for policy 0, policy_version 40341 (0.0008) +[2026-06-07 03:00:01,367][472559] Updated weights for policy 0, policy_version 40351 (0.0008) +[2026-06-07 03:00:01,483][472559] Updated weights for policy 0, policy_version 40361 (0.0009) +[2026-06-07 03:00:01,593][472559] Updated weights for policy 0, policy_version 40371 (0.0008) +[2026-06-07 03:00:01,707][472559] Updated weights for policy 0, policy_version 40381 (0.0009) +[2026-06-07 03:00:02,272][472559] Updated weights for policy 0, policy_version 40392 (0.0008) +[2026-06-07 03:00:02,412][472559] Updated weights for policy 0, policy_version 40404 (0.0008) +[2026-06-07 03:00:02,541][472559] Updated weights for policy 0, policy_version 40416 (0.0008) +[2026-06-07 03:00:02,657][472559] Updated weights for policy 0, policy_version 40426 (0.0009) +[2026-06-07 03:00:02,771][472559] Updated weights for policy 0, policy_version 40436 (0.0008) +[2026-06-07 03:00:02,894][472559] Updated weights for policy 0, policy_version 40446 (0.0008) +[2026-06-07 03:00:03,117][464927] Fps is (10 sec: 29491.0, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 20709376. Throughput: 0: 28248.1. Samples: 20681600. Policy #0 lag: (min: 63.0, avg: 75.5, max: 127.0) +[2026-06-07 03:00:03,117][464927] Avg episode reward: [(0, '1444.134')] +[2026-06-07 03:00:03,122][472025] Saving new best policy, reward=1444.134! +[2026-06-07 03:00:03,438][472559] Updated weights for policy 0, policy_version 40456 (0.0009) +[2026-06-07 03:00:03,548][472559] Updated weights for policy 0, policy_version 40466 (0.0010) +[2026-06-07 03:00:03,683][472559] Updated weights for policy 0, policy_version 40478 (0.0012) +[2026-06-07 03:00:03,797][472559] Updated weights for policy 0, policy_version 40488 (0.0010) +[2026-06-07 03:00:03,929][472559] Updated weights for policy 0, policy_version 40499 (0.0010) +[2026-06-07 03:00:04,064][472559] Updated weights for policy 0, policy_version 40512 (0.0009) +[2026-06-07 03:00:04,608][472559] Updated weights for policy 0, policy_version 40522 (0.0004) +[2026-06-07 03:00:04,722][472559] Updated weights for policy 0, policy_version 40532 (0.0004) +[2026-06-07 03:00:04,834][472559] Updated weights for policy 0, policy_version 40542 (0.0004) +[2026-06-07 03:00:04,963][472559] Updated weights for policy 0, policy_version 40553 (0.0004) +[2026-06-07 03:00:05,080][472559] Updated weights for policy 0, policy_version 40563 (0.0004) +[2026-06-07 03:00:05,209][472559] Updated weights for policy 0, policy_version 40574 (0.0004) +[2026-06-07 03:00:05,720][472559] Updated weights for policy 0, policy_version 40584 (0.0004) +[2026-06-07 03:00:05,854][472559] Updated weights for policy 0, policy_version 40596 (0.0004) +[2026-06-07 03:00:05,966][472559] Updated weights for policy 0, policy_version 40606 (0.0004) +[2026-06-07 03:00:06,100][472559] Updated weights for policy 0, policy_version 40617 (0.0004) +[2026-06-07 03:00:06,215][472559] Updated weights for policy 0, policy_version 40627 (0.0004) +[2026-06-07 03:00:06,329][472559] Updated weights for policy 0, policy_version 40637 (0.0004) +[2026-06-07 03:00:06,861][472559] Updated weights for policy 0, policy_version 40648 (0.0004) +[2026-06-07 03:00:06,981][472559] Updated weights for policy 0, policy_version 40659 (0.0004) +[2026-06-07 03:00:07,105][472559] Updated weights for policy 0, policy_version 40670 (0.0004) +[2026-06-07 03:00:07,230][472559] Updated weights for policy 0, policy_version 40681 (0.0004) +[2026-06-07 03:00:07,361][472559] Updated weights for policy 0, policy_version 40692 (0.0004) +[2026-06-07 03:00:07,476][472559] Updated weights for policy 0, policy_version 40702 (0.0004) +[2026-06-07 03:00:07,998][472559] Updated weights for policy 0, policy_version 40712 (0.0004) +[2026-06-07 03:00:08,104][472559] Updated weights for policy 0, policy_version 40722 (0.0004) +[2026-06-07 03:00:08,117][464927] Fps is (10 sec: 29491.0, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 20840448. Throughput: 0: 28282.3. Samples: 20849280. Policy #0 lag: (min: 63.0, avg: 75.5, max: 127.0) +[2026-06-07 03:00:08,117][464927] Avg episode reward: [(0, '1478.953')] +[2026-06-07 03:00:08,223][472559] Updated weights for policy 0, policy_version 40732 (0.0004) +[2026-06-07 03:00:08,353][472559] Updated weights for policy 0, policy_version 40743 (0.0004) +[2026-06-07 03:00:08,460][472559] Updated weights for policy 0, policy_version 40753 (0.0004) +[2026-06-07 03:00:08,578][472559] Updated weights for policy 0, policy_version 40763 (0.0004) +[2026-06-07 03:00:08,636][472025] Saving new best policy, reward=1478.953! +[2026-06-07 03:00:09,093][472559] Updated weights for policy 0, policy_version 40773 (0.0005) +[2026-06-07 03:00:09,211][472559] Updated weights for policy 0, policy_version 40783 (0.0006) +[2026-06-07 03:00:09,317][472559] Updated weights for policy 0, policy_version 40793 (0.0008) +[2026-06-07 03:00:09,427][472559] Updated weights for policy 0, policy_version 40803 (0.0008) +[2026-06-07 03:00:09,550][472559] Updated weights for policy 0, policy_version 40814 (0.0008) +[2026-06-07 03:00:09,692][472559] Updated weights for policy 0, policy_version 40826 (0.0009) +[2026-06-07 03:00:10,226][472559] Updated weights for policy 0, policy_version 40836 (0.0007) +[2026-06-07 03:00:10,347][472559] Updated weights for policy 0, policy_version 40847 (0.0008) +[2026-06-07 03:00:10,474][472559] Updated weights for policy 0, policy_version 40858 (0.0008) +[2026-06-07 03:00:10,587][472559] Updated weights for policy 0, policy_version 40868 (0.0008) +[2026-06-07 03:00:10,704][472559] Updated weights for policy 0, policy_version 40878 (0.0008) +[2026-06-07 03:00:10,838][472559] Updated weights for policy 0, policy_version 40890 (0.0008) +[2026-06-07 03:00:11,403][472559] Updated weights for policy 0, policy_version 40901 (0.0008) +[2026-06-07 03:00:11,521][472559] Updated weights for policy 0, policy_version 40911 (0.0008) +[2026-06-07 03:00:11,644][472559] Updated weights for policy 0, policy_version 40922 (0.0008) +[2026-06-07 03:00:11,764][472559] Updated weights for policy 0, policy_version 40933 (0.0008) +[2026-06-07 03:00:11,906][472559] Updated weights for policy 0, policy_version 40945 (0.0008) +[2026-06-07 03:00:12,030][472559] Updated weights for policy 0, policy_version 40956 (0.0008) +[2026-06-07 03:00:12,579][472559] Updated weights for policy 0, policy_version 40967 (0.0008) +[2026-06-07 03:00:12,713][472559] Updated weights for policy 0, policy_version 40979 (0.0008) +[2026-06-07 03:00:12,836][472559] Updated weights for policy 0, policy_version 40990 (0.0008) +[2026-06-07 03:00:12,959][472559] Updated weights for policy 0, policy_version 41001 (0.0008) +[2026-06-07 03:00:13,081][472559] Updated weights for policy 0, policy_version 41012 (0.0008) +[2026-06-07 03:00:13,117][464927] Fps is (10 sec: 26214.5, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 20971520. Throughput: 0: 28305.1. Samples: 21016448. Policy #0 lag: (min: 63.0, avg: 75.5, max: 127.0) +[2026-06-07 03:00:13,118][464927] Avg episode reward: [(0, '1460.635')] +[2026-06-07 03:00:13,216][472559] Updated weights for policy 0, policy_version 41023 (0.0008) +[2026-06-07 03:00:13,751][472559] Updated weights for policy 0, policy_version 41033 (0.0008) +[2026-06-07 03:00:13,872][472559] Updated weights for policy 0, policy_version 41044 (0.0008) +[2026-06-07 03:00:13,984][472559] Updated weights for policy 0, policy_version 41054 (0.0008) +[2026-06-07 03:00:14,097][472559] Updated weights for policy 0, policy_version 41064 (0.0008) +[2026-06-07 03:00:14,226][472559] Updated weights for policy 0, policy_version 41075 (0.0008) +[2026-06-07 03:00:14,352][472559] Updated weights for policy 0, policy_version 41086 (0.0008) +[2026-06-07 03:00:14,911][472559] Updated weights for policy 0, policy_version 41097 (0.0008) +[2026-06-07 03:00:15,026][472559] Updated weights for policy 0, policy_version 41107 (0.0008) +[2026-06-07 03:00:15,149][472559] Updated weights for policy 0, policy_version 41118 (0.0008) +[2026-06-07 03:00:15,261][472559] Updated weights for policy 0, policy_version 41128 (0.0008) +[2026-06-07 03:00:15,396][472559] Updated weights for policy 0, policy_version 41140 (0.0008) +[2026-06-07 03:00:15,514][472559] Updated weights for policy 0, policy_version 41151 (0.0008) +[2026-06-07 03:00:16,071][472559] Updated weights for policy 0, policy_version 41161 (0.0008) +[2026-06-07 03:00:16,176][472559] Updated weights for policy 0, policy_version 41171 (0.0008) +[2026-06-07 03:00:16,292][472559] Updated weights for policy 0, policy_version 41181 (0.0008) +[2026-06-07 03:00:16,406][472559] Updated weights for policy 0, policy_version 41191 (0.0009) +[2026-06-07 03:00:16,521][472559] Updated weights for policy 0, policy_version 41201 (0.0008) +[2026-06-07 03:00:16,653][472559] Updated weights for policy 0, policy_version 41213 (0.0008) +[2026-06-07 03:00:17,210][472559] Updated weights for policy 0, policy_version 41224 (0.0008) +[2026-06-07 03:00:17,343][472559] Updated weights for policy 0, policy_version 41236 (0.0008) +[2026-06-07 03:00:17,465][472559] Updated weights for policy 0, policy_version 41247 (0.0008) +[2026-06-07 03:00:17,579][472559] Updated weights for policy 0, policy_version 41257 (0.0008) +[2026-06-07 03:00:17,693][472559] Updated weights for policy 0, policy_version 41267 (0.0008) +[2026-06-07 03:00:17,803][472559] Updated weights for policy 0, policy_version 41277 (0.0009) +[2026-06-07 03:00:18,116][464927] Fps is (10 sec: 29491.4, 60 sec: 28399.0, 300 sec: 28324.9). Total num frames: 21135360. Throughput: 0: 28305.1. Samples: 21107456. Policy #0 lag: (min: 56.0, avg: 80.6, max: 120.0) +[2026-06-07 03:00:18,117][464927] Avg episode reward: [(0, '1439.654')] +[2026-06-07 03:00:18,350][472559] Updated weights for policy 0, policy_version 41287 (0.0008) +[2026-06-07 03:00:18,460][472559] Updated weights for policy 0, policy_version 41297 (0.0007) +[2026-06-07 03:00:18,577][472559] Updated weights for policy 0, policy_version 41307 (0.0008) +[2026-06-07 03:00:18,704][472559] Updated weights for policy 0, policy_version 41319 (0.0008) +[2026-06-07 03:00:18,822][472559] Updated weights for policy 0, policy_version 41329 (0.0008) +[2026-06-07 03:00:18,940][472559] Updated weights for policy 0, policy_version 41339 (0.0006) +[2026-06-07 03:00:19,488][472559] Updated weights for policy 0, policy_version 41349 (0.0008) +[2026-06-07 03:00:19,618][472559] Updated weights for policy 0, policy_version 41361 (0.0008) +[2026-06-07 03:00:19,738][472559] Updated weights for policy 0, policy_version 41371 (0.0009) +[2026-06-07 03:00:19,868][472559] Updated weights for policy 0, policy_version 41383 (0.0008) +[2026-06-07 03:00:19,993][472559] Updated weights for policy 0, policy_version 41394 (0.0008) +[2026-06-07 03:00:20,117][472559] Updated weights for policy 0, policy_version 41405 (0.0008) +[2026-06-07 03:00:20,689][472559] Updated weights for policy 0, policy_version 41417 (0.0008) +[2026-06-07 03:00:20,809][472559] Updated weights for policy 0, policy_version 41428 (0.0008) +[2026-06-07 03:00:20,936][472559] Updated weights for policy 0, policy_version 41439 (0.0008) +[2026-06-07 03:00:21,067][472559] Updated weights for policy 0, policy_version 41451 (0.0008) +[2026-06-07 03:00:21,183][472559] Updated weights for policy 0, policy_version 41461 (0.0008) +[2026-06-07 03:00:21,301][472559] Updated weights for policy 0, policy_version 41471 (0.0008) +[2026-06-07 03:00:21,859][472559] Updated weights for policy 0, policy_version 41482 (0.0008) +[2026-06-07 03:00:21,983][472559] Updated weights for policy 0, policy_version 41493 (0.0008) +[2026-06-07 03:00:22,110][472559] Updated weights for policy 0, policy_version 41504 (0.0009) +[2026-06-07 03:00:22,231][472559] Updated weights for policy 0, policy_version 41515 (0.0009) +[2026-06-07 03:00:22,377][472559] Updated weights for policy 0, policy_version 41528 (0.0008) +[2026-06-07 03:00:22,919][472559] Updated weights for policy 0, policy_version 41538 (0.0008) +[2026-06-07 03:00:23,040][472559] Updated weights for policy 0, policy_version 41549 (0.0006) +[2026-06-07 03:00:23,117][464927] Fps is (10 sec: 29491.0, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 21266432. Throughput: 0: 28359.0. Samples: 21275520. Policy #0 lag: (min: 56.0, avg: 80.6, max: 120.0) +[2026-06-07 03:00:23,118][464927] Avg episode reward: [(0, '1458.081')] +[2026-06-07 03:00:23,166][472559] Updated weights for policy 0, policy_version 41560 (0.0008) +[2026-06-07 03:00:23,277][472559] Updated weights for policy 0, policy_version 41570 (0.0009) +[2026-06-07 03:00:23,405][472559] Updated weights for policy 0, policy_version 41581 (0.0009) +[2026-06-07 03:00:23,530][472559] Updated weights for policy 0, policy_version 41592 (0.0008) +[2026-06-07 03:00:24,072][472559] Updated weights for policy 0, policy_version 41602 (0.0008) +[2026-06-07 03:00:24,199][472559] Updated weights for policy 0, policy_version 41613 (0.0009) +[2026-06-07 03:00:24,312][472559] Updated weights for policy 0, policy_version 41624 (0.0008) +[2026-06-07 03:00:24,449][472559] Updated weights for policy 0, policy_version 41635 (0.0008) +[2026-06-07 03:00:24,558][472559] Updated weights for policy 0, policy_version 41645 (0.0008) +[2026-06-07 03:00:24,674][472559] Updated weights for policy 0, policy_version 41655 (0.0009) +[2026-06-07 03:00:25,235][472559] Updated weights for policy 0, policy_version 41667 (0.0008) +[2026-06-07 03:00:25,353][472559] Updated weights for policy 0, policy_version 41678 (0.0008) +[2026-06-07 03:00:25,482][472559] Updated weights for policy 0, policy_version 41689 (0.0009) +[2026-06-07 03:00:25,594][472559] Updated weights for policy 0, policy_version 41699 (0.0008) +[2026-06-07 03:00:25,707][472559] Updated weights for policy 0, policy_version 41709 (0.0008) +[2026-06-07 03:00:25,822][472559] Updated weights for policy 0, policy_version 41719 (0.0008) +[2026-06-07 03:00:26,379][472559] Updated weights for policy 0, policy_version 41730 (0.0008) +[2026-06-07 03:00:26,487][472559] Updated weights for policy 0, policy_version 41740 (0.0008) +[2026-06-07 03:00:26,598][472559] Updated weights for policy 0, policy_version 41750 (0.0008) +[2026-06-07 03:00:26,712][472559] Updated weights for policy 0, policy_version 41760 (0.0009) +[2026-06-07 03:00:26,846][472559] Updated weights for policy 0, policy_version 41772 (0.0008) +[2026-06-07 03:00:26,973][472559] Updated weights for policy 0, policy_version 41783 (0.0008) +[2026-06-07 03:00:27,523][472559] Updated weights for policy 0, policy_version 41793 (0.0008) +[2026-06-07 03:00:27,629][472559] Updated weights for policy 0, policy_version 41803 (0.0008) +[2026-06-07 03:00:27,746][472559] Updated weights for policy 0, policy_version 41813 (0.0008) +[2026-06-07 03:00:27,853][472559] Updated weights for policy 0, policy_version 41823 (0.0008) +[2026-06-07 03:00:27,987][472559] Updated weights for policy 0, policy_version 41834 (0.0004) +[2026-06-07 03:00:28,099][472559] Updated weights for policy 0, policy_version 41844 (0.0004) +[2026-06-07 03:00:28,117][464927] Fps is (10 sec: 26214.1, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 21397504. Throughput: 0: 28393.3. Samples: 21443200. Policy #0 lag: (min: 56.0, avg: 80.6, max: 120.0) +[2026-06-07 03:00:28,118][464927] Avg episode reward: [(0, '1478.267')] +[2026-06-07 03:00:28,227][472559] Updated weights for policy 0, policy_version 41855 (0.0005) +[2026-06-07 03:00:28,761][472559] Updated weights for policy 0, policy_version 41865 (0.0007) +[2026-06-07 03:00:28,886][472559] Updated weights for policy 0, policy_version 41876 (0.0008) +[2026-06-07 03:00:28,996][472559] Updated weights for policy 0, policy_version 41886 (0.0008) +[2026-06-07 03:00:29,115][472559] Updated weights for policy 0, policy_version 41896 (0.0009) +[2026-06-07 03:00:29,242][472559] Updated weights for policy 0, policy_version 41907 (0.0008) +[2026-06-07 03:00:29,362][472559] Updated weights for policy 0, policy_version 41918 (0.0008) +[2026-06-07 03:00:29,912][472559] Updated weights for policy 0, policy_version 41928 (0.0008) +[2026-06-07 03:00:30,039][472559] Updated weights for policy 0, policy_version 41939 (0.0008) +[2026-06-07 03:00:30,170][472559] Updated weights for policy 0, policy_version 41951 (0.0008) +[2026-06-07 03:00:30,294][472559] Updated weights for policy 0, policy_version 41962 (0.0008) +[2026-06-07 03:00:30,417][472559] Updated weights for policy 0, policy_version 41973 (0.0008) +[2026-06-07 03:00:30,540][472559] Updated weights for policy 0, policy_version 41984 (0.0008) +[2026-06-07 03:00:31,109][472559] Updated weights for policy 0, policy_version 41995 (0.0008) +[2026-06-07 03:00:31,218][472559] Updated weights for policy 0, policy_version 42005 (0.0008) +[2026-06-07 03:00:31,331][472559] Updated weights for policy 0, policy_version 42015 (0.0008) +[2026-06-07 03:00:31,441][472559] Updated weights for policy 0, policy_version 42025 (0.0008) +[2026-06-07 03:00:31,565][472559] Updated weights for policy 0, policy_version 42036 (0.0008) +[2026-06-07 03:00:31,698][472559] Updated weights for policy 0, policy_version 42048 (0.0008) +[2026-06-07 03:00:32,261][472559] Updated weights for policy 0, policy_version 42060 (0.0008) +[2026-06-07 03:00:32,372][472559] Updated weights for policy 0, policy_version 42070 (0.0006) +[2026-06-07 03:00:32,486][472559] Updated weights for policy 0, policy_version 42080 (0.0008) +[2026-06-07 03:00:32,608][472559] Updated weights for policy 0, policy_version 42091 (0.0008) +[2026-06-07 03:00:32,742][472559] Updated weights for policy 0, policy_version 42103 (0.0008) +[2026-06-07 03:00:33,117][464927] Fps is (10 sec: 29491.5, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 21561344. Throughput: 0: 28398.9. Samples: 21533696. Policy #0 lag: (min: 56.0, avg: 80.6, max: 120.0) +[2026-06-07 03:00:33,117][464927] Avg episode reward: [(0, '1541.540')] +[2026-06-07 03:00:33,121][472025] Saving new best policy, reward=1541.540! +[2026-06-07 03:00:33,295][472559] Updated weights for policy 0, policy_version 42113 (0.0008) +[2026-06-07 03:00:33,404][472559] Updated weights for policy 0, policy_version 42123 (0.0008) +[2026-06-07 03:00:33,541][472559] Updated weights for policy 0, policy_version 42135 (0.0008) +[2026-06-07 03:00:33,664][472559] Updated weights for policy 0, policy_version 42146 (0.0008) +[2026-06-07 03:00:33,782][472559] Updated weights for policy 0, policy_version 42156 (0.0008) +[2026-06-07 03:00:33,889][472559] Updated weights for policy 0, policy_version 42166 (0.0008) +[2026-06-07 03:00:34,441][472559] Updated weights for policy 0, policy_version 42177 (0.0009) +[2026-06-07 03:00:34,565][472559] Updated weights for policy 0, policy_version 42188 (0.0008) +[2026-06-07 03:00:34,683][472559] Updated weights for policy 0, policy_version 42199 (0.0010) +[2026-06-07 03:00:34,800][472559] Updated weights for policy 0, policy_version 42209 (0.0008) +[2026-06-07 03:00:34,929][472559] Updated weights for policy 0, policy_version 42220 (0.0008) +[2026-06-07 03:00:35,072][472559] Updated weights for policy 0, policy_version 42232 (0.0009) +[2026-06-07 03:00:35,605][472559] Updated weights for policy 0, policy_version 42242 (0.0009) +[2026-06-07 03:00:35,717][472559] Updated weights for policy 0, policy_version 42252 (0.0008) +[2026-06-07 03:00:35,842][472559] Updated weights for policy 0, policy_version 42263 (0.0008) +[2026-06-07 03:00:35,962][472559] Updated weights for policy 0, policy_version 42274 (0.0008) +[2026-06-07 03:00:36,089][472559] Updated weights for policy 0, policy_version 42285 (0.0008) +[2026-06-07 03:00:36,238][472559] Updated weights for policy 0, policy_version 42298 (0.0008) +[2026-06-07 03:00:36,788][472559] Updated weights for policy 0, policy_version 42308 (0.0008) +[2026-06-07 03:00:36,908][472559] Updated weights for policy 0, policy_version 42319 (0.0008) +[2026-06-07 03:00:37,021][472559] Updated weights for policy 0, policy_version 42329 (0.0008) +[2026-06-07 03:00:37,150][472559] Updated weights for policy 0, policy_version 42341 (0.0008) +[2026-06-07 03:00:37,277][472559] Updated weights for policy 0, policy_version 42352 (0.0008) +[2026-06-07 03:00:37,404][472559] Updated weights for policy 0, policy_version 42363 (0.0008) +[2026-06-07 03:00:37,983][472559] Updated weights for policy 0, policy_version 42375 (0.0008) +[2026-06-07 03:00:38,097][472559] Updated weights for policy 0, policy_version 42385 (0.0008) +[2026-06-07 03:00:38,116][464927] Fps is (10 sec: 29491.5, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 21692416. Throughput: 0: 28387.6. Samples: 21700480. Policy #0 lag: (min: 56.0, avg: 80.6, max: 120.0) +[2026-06-07 03:00:38,117][464927] Avg episode reward: [(0, '1568.687')] +[2026-06-07 03:00:38,203][472559] Updated weights for policy 0, policy_version 42395 (0.0008) +[2026-06-07 03:00:38,335][472559] Updated weights for policy 0, policy_version 42406 (0.0008) +[2026-06-07 03:00:38,440][472559] Updated weights for policy 0, policy_version 42416 (0.0008) +[2026-06-07 03:00:38,568][472559] Updated weights for policy 0, policy_version 42427 (0.0008) +[2026-06-07 03:00:38,619][472025] Saving new best policy, reward=1568.687! +[2026-06-07 03:00:39,128][472559] Updated weights for policy 0, policy_version 42437 (0.0008) +[2026-06-07 03:00:39,248][472559] Updated weights for policy 0, policy_version 42448 (0.0008) +[2026-06-07 03:00:39,359][472559] Updated weights for policy 0, policy_version 42458 (0.0008) +[2026-06-07 03:00:39,494][472559] Updated weights for policy 0, policy_version 42470 (0.0008) +[2026-06-07 03:00:39,620][472559] Updated weights for policy 0, policy_version 42481 (0.0008) +[2026-06-07 03:00:39,753][472559] Updated weights for policy 0, policy_version 42493 (0.0008) +[2026-06-07 03:00:40,303][472559] Updated weights for policy 0, policy_version 42503 (0.0008) +[2026-06-07 03:00:40,423][472559] Updated weights for policy 0, policy_version 42514 (0.0009) +[2026-06-07 03:00:40,547][472559] Updated weights for policy 0, policy_version 42525 (0.0008) +[2026-06-07 03:00:40,687][472559] Updated weights for policy 0, policy_version 42537 (0.0008) +[2026-06-07 03:00:40,822][472559] Updated weights for policy 0, policy_version 42549 (0.0008) +[2026-06-07 03:00:41,410][472559] Updated weights for policy 0, policy_version 42561 (0.0008) +[2026-06-07 03:00:41,535][472559] Updated weights for policy 0, policy_version 42572 (0.0008) +[2026-06-07 03:00:41,668][472559] Updated weights for policy 0, policy_version 42584 (0.0008) +[2026-06-07 03:00:41,790][472559] Updated weights for policy 0, policy_version 42595 (0.0009) +[2026-06-07 03:00:41,924][472559] Updated weights for policy 0, policy_version 42607 (0.0008) +[2026-06-07 03:00:42,044][472559] Updated weights for policy 0, policy_version 42617 (0.0008) +[2026-06-07 03:00:42,584][472559] Updated weights for policy 0, policy_version 42627 (0.0008) +[2026-06-07 03:00:42,720][472559] Updated weights for policy 0, policy_version 42639 (0.0008) +[2026-06-07 03:00:42,831][472559] Updated weights for policy 0, policy_version 42649 (0.0008) +[2026-06-07 03:00:42,957][472559] Updated weights for policy 0, policy_version 42660 (0.0008) +[2026-06-07 03:00:43,068][472559] Updated weights for policy 0, policy_version 42670 (0.0009) +[2026-06-07 03:00:43,116][464927] Fps is (10 sec: 26214.5, 60 sec: 28399.0, 300 sec: 28324.9). Total num frames: 21823488. Throughput: 0: 28401.8. Samples: 21868544. Policy #0 lag: (min: 31.0, avg: 43.7, max: 95.0) +[2026-06-07 03:00:43,117][464927] Avg episode reward: [(0, '1531.513')] +[2026-06-07 03:00:43,192][472559] Updated weights for policy 0, policy_version 42681 (0.0009) +[2026-06-07 03:00:43,745][472559] Updated weights for policy 0, policy_version 42691 (0.0008) +[2026-06-07 03:00:43,853][472559] Updated weights for policy 0, policy_version 42701 (0.0008) +[2026-06-07 03:00:43,976][472559] Updated weights for policy 0, policy_version 42712 (0.0008) +[2026-06-07 03:00:44,113][472559] Updated weights for policy 0, policy_version 42724 (0.0008) +[2026-06-07 03:00:44,259][472559] Updated weights for policy 0, policy_version 42737 (0.0008) +[2026-06-07 03:00:44,375][472559] Updated weights for policy 0, policy_version 42747 (0.0008) +[2026-06-07 03:00:44,925][472559] Updated weights for policy 0, policy_version 42757 (0.0008) +[2026-06-07 03:00:45,049][472559] Updated weights for policy 0, policy_version 42768 (0.0008) +[2026-06-07 03:00:45,159][472559] Updated weights for policy 0, policy_version 42778 (0.0008) +[2026-06-07 03:00:45,288][472559] Updated weights for policy 0, policy_version 42789 (0.0008) +[2026-06-07 03:00:45,415][472559] Updated weights for policy 0, policy_version 42800 (0.0008) +[2026-06-07 03:00:45,532][472559] Updated weights for policy 0, policy_version 42810 (0.0008) +[2026-06-07 03:00:46,076][472559] Updated weights for policy 0, policy_version 42820 (0.0007) +[2026-06-07 03:00:46,205][472559] Updated weights for policy 0, policy_version 42831 (0.0008) +[2026-06-07 03:00:46,323][472559] Updated weights for policy 0, policy_version 42842 (0.0009) +[2026-06-07 03:00:46,442][472559] Updated weights for policy 0, policy_version 42852 (0.0008) +[2026-06-07 03:00:46,566][472559] Updated weights for policy 0, policy_version 42863 (0.0008) +[2026-06-07 03:00:46,682][472559] Updated weights for policy 0, policy_version 42873 (0.0009) +[2026-06-07 03:00:47,240][472559] Updated weights for policy 0, policy_version 42884 (0.0008) +[2026-06-07 03:00:47,343][472559] Updated weights for policy 0, policy_version 42894 (0.0009) +[2026-06-07 03:00:47,458][472559] Updated weights for policy 0, policy_version 42904 (0.0009) +[2026-06-07 03:00:47,571][472559] Updated weights for policy 0, policy_version 42914 (0.0009) +[2026-06-07 03:00:47,686][472559] Updated weights for policy 0, policy_version 42924 (0.0008) +[2026-06-07 03:00:47,801][472559] Updated weights for policy 0, policy_version 42934 (0.0008) +[2026-06-07 03:00:47,916][472559] Updated weights for policy 0, policy_version 42944 (0.0008) +[2026-06-07 03:00:48,117][464927] Fps is (10 sec: 29491.0, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 21987328. Throughput: 0: 28401.8. Samples: 21959680. Policy #0 lag: (min: 31.0, avg: 43.7, max: 95.0) +[2026-06-07 03:00:48,117][464927] Avg episode reward: [(0, '1497.216')] +[2026-06-07 03:00:48,490][472559] Updated weights for policy 0, policy_version 42955 (0.0008) +[2026-06-07 03:00:48,616][472559] Updated weights for policy 0, policy_version 42966 (0.0009) +[2026-06-07 03:00:48,723][472559] Updated weights for policy 0, policy_version 42976 (0.0008) +[2026-06-07 03:00:48,853][472559] Updated weights for policy 0, policy_version 42987 (0.0008) +[2026-06-07 03:00:48,967][472559] Updated weights for policy 0, policy_version 42997 (0.0009) +[2026-06-07 03:00:49,083][472559] Updated weights for policy 0, policy_version 43007 (0.0009) +[2026-06-07 03:00:49,636][472559] Updated weights for policy 0, policy_version 43017 (0.0008) +[2026-06-07 03:00:49,745][472559] Updated weights for policy 0, policy_version 43027 (0.0008) +[2026-06-07 03:00:49,880][472559] Updated weights for policy 0, policy_version 43038 (0.0009) +[2026-06-07 03:00:49,990][472559] Updated weights for policy 0, policy_version 43048 (0.0008) +[2026-06-07 03:00:50,106][472559] Updated weights for policy 0, policy_version 43058 (0.0008) +[2026-06-07 03:00:50,221][472559] Updated weights for policy 0, policy_version 43068 (0.0009) +[2026-06-07 03:00:50,775][472559] Updated weights for policy 0, policy_version 43079 (0.0008) +[2026-06-07 03:00:50,884][472559] Updated weights for policy 0, policy_version 43089 (0.0008) +[2026-06-07 03:00:51,001][472559] Updated weights for policy 0, policy_version 43099 (0.0008) +[2026-06-07 03:00:51,111][472559] Updated weights for policy 0, policy_version 43109 (0.0008) +[2026-06-07 03:00:51,237][472559] Updated weights for policy 0, policy_version 43120 (0.0009) +[2026-06-07 03:00:51,352][472559] Updated weights for policy 0, policy_version 43130 (0.0008) +[2026-06-07 03:00:51,885][472559] Updated weights for policy 0, policy_version 43140 (0.0008) +[2026-06-07 03:00:51,995][472559] Updated weights for policy 0, policy_version 43150 (0.0008) +[2026-06-07 03:00:52,105][472559] Updated weights for policy 0, policy_version 43160 (0.0008) +[2026-06-07 03:00:52,222][472559] Updated weights for policy 0, policy_version 43170 (0.0008) +[2026-06-07 03:00:52,348][472559] Updated weights for policy 0, policy_version 43181 (0.0008) +[2026-06-07 03:00:52,461][472559] Updated weights for policy 0, policy_version 43191 (0.0008) +[2026-06-07 03:00:53,003][472559] Updated weights for policy 0, policy_version 43201 (0.0008) +[2026-06-07 03:00:53,115][472559] Updated weights for policy 0, policy_version 43211 (0.0008) +[2026-06-07 03:00:53,117][464927] Fps is (10 sec: 29491.0, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 22118400. Throughput: 0: 28379.0. Samples: 22126336. Policy #0 lag: (min: 31.0, avg: 43.7, max: 95.0) +[2026-06-07 03:00:53,117][464927] Avg episode reward: [(0, '1610.382')] +[2026-06-07 03:00:53,225][472559] Updated weights for policy 0, policy_version 43221 (0.0008) +[2026-06-07 03:00:53,340][472559] Updated weights for policy 0, policy_version 43231 (0.0008) +[2026-06-07 03:00:53,459][472559] Updated weights for policy 0, policy_version 43242 (0.0008) +[2026-06-07 03:00:53,587][472559] Updated weights for policy 0, policy_version 43253 (0.0008) +[2026-06-07 03:00:53,699][472559] Updated weights for policy 0, policy_version 43263 (0.0008) +[2026-06-07 03:00:53,707][472025] Saving new best policy, reward=1610.382! +[2026-06-07 03:00:54,263][472559] Updated weights for policy 0, policy_version 43273 (0.0008) +[2026-06-07 03:00:54,410][472559] Updated weights for policy 0, policy_version 43286 (0.0009) +[2026-06-07 03:00:54,523][472559] Updated weights for policy 0, policy_version 43296 (0.0008) +[2026-06-07 03:00:54,637][472559] Updated weights for policy 0, policy_version 43306 (0.0008) +[2026-06-07 03:00:54,749][472559] Updated weights for policy 0, policy_version 43316 (0.0008) +[2026-06-07 03:00:54,790][472025] Early stopping after 7 epochs (56 sgd steps), loss delta 0.0000000 +[2026-06-07 03:00:55,322][472559] Updated weights for policy 0, policy_version 43326 (0.0008) +[2026-06-07 03:00:55,433][472559] Updated weights for policy 0, policy_version 43336 (0.0009) +[2026-06-07 03:00:55,551][472559] Updated weights for policy 0, policy_version 43346 (0.0008) +[2026-06-07 03:00:55,657][472559] Updated weights for policy 0, policy_version 43356 (0.0008) +[2026-06-07 03:00:55,772][472559] Updated weights for policy 0, policy_version 43366 (0.0008) +[2026-06-07 03:00:55,890][472559] Updated weights for policy 0, policy_version 43376 (0.0008) +[2026-06-07 03:00:56,450][472559] Updated weights for policy 0, policy_version 43387 (0.0007) +[2026-06-07 03:00:56,588][472559] Updated weights for policy 0, policy_version 43399 (0.0007) +[2026-06-07 03:00:56,696][472559] Updated weights for policy 0, policy_version 43409 (0.0008) +[2026-06-07 03:00:56,812][472559] Updated weights for policy 0, policy_version 43419 (0.0009) +[2026-06-07 03:00:56,950][472559] Updated weights for policy 0, policy_version 43431 (0.0008) +[2026-06-07 03:00:57,066][472559] Updated weights for policy 0, policy_version 43441 (0.0008) +[2026-06-07 03:00:57,604][472559] Updated weights for policy 0, policy_version 43451 (0.0008) +[2026-06-07 03:00:57,750][472559] Updated weights for policy 0, policy_version 43464 (0.0008) +[2026-06-07 03:00:57,875][472559] Updated weights for policy 0, policy_version 43475 (0.0009) +[2026-06-07 03:00:57,993][472559] Updated weights for policy 0, policy_version 43486 (0.0008) +[2026-06-07 03:00:58,117][464927] Fps is (10 sec: 26214.4, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 22249472. Throughput: 0: 28396.1. Samples: 22294272. Policy #0 lag: (min: 31.0, avg: 43.7, max: 95.0) +[2026-06-07 03:00:58,117][464927] Avg episode reward: [(0, '1554.232')] +[2026-06-07 03:00:58,135][472559] Updated weights for policy 0, policy_version 43498 (0.0008) +[2026-06-07 03:00:58,249][472559] Updated weights for policy 0, policy_version 43508 (0.0008) +[2026-06-07 03:00:58,797][472559] Updated weights for policy 0, policy_version 43518 (0.0012) +[2026-06-07 03:00:58,914][472559] Updated weights for policy 0, policy_version 43528 (0.0008) +[2026-06-07 03:00:59,025][472559] Updated weights for policy 0, policy_version 43538 (0.0008) +[2026-06-07 03:00:59,150][472559] Updated weights for policy 0, policy_version 43549 (0.0009) +[2026-06-07 03:00:59,262][472559] Updated weights for policy 0, policy_version 43559 (0.0008) +[2026-06-07 03:00:59,376][472559] Updated weights for policy 0, policy_version 43569 (0.0008) +[2026-06-07 03:00:59,931][472559] Updated weights for policy 0, policy_version 43580 (0.0007) +[2026-06-07 03:01:00,039][472559] Updated weights for policy 0, policy_version 43590 (0.0008) +[2026-06-07 03:01:00,149][472559] Updated weights for policy 0, policy_version 43600 (0.0009) +[2026-06-07 03:01:00,263][472559] Updated weights for policy 0, policy_version 43610 (0.0008) +[2026-06-07 03:01:00,397][472559] Updated weights for policy 0, policy_version 43621 (0.0008) +[2026-06-07 03:01:00,504][472559] Updated weights for policy 0, policy_version 43631 (0.0008) +[2026-06-07 03:01:01,053][472559] Updated weights for policy 0, policy_version 43642 (0.0008) +[2026-06-07 03:01:01,175][472559] Updated weights for policy 0, policy_version 43653 (0.0009) +[2026-06-07 03:01:01,282][472559] Updated weights for policy 0, policy_version 43663 (0.0009) +[2026-06-07 03:01:01,405][472559] Updated weights for policy 0, policy_version 43674 (0.0009) +[2026-06-07 03:01:01,530][472559] Updated weights for policy 0, policy_version 43684 (0.0009) +[2026-06-07 03:01:01,658][472559] Updated weights for policy 0, policy_version 43696 (0.0008) +[2026-06-07 03:01:02,215][472559] Updated weights for policy 0, policy_version 43706 (0.0009) +[2026-06-07 03:01:02,332][472559] Updated weights for policy 0, policy_version 43716 (0.0009) +[2026-06-07 03:01:02,446][472559] Updated weights for policy 0, policy_version 43726 (0.0008) +[2026-06-07 03:01:02,556][472559] Updated weights for policy 0, policy_version 43736 (0.0009) +[2026-06-07 03:01:02,672][472559] Updated weights for policy 0, policy_version 43746 (0.0008) +[2026-06-07 03:01:02,796][472559] Updated weights for policy 0, policy_version 43757 (0.0009) +[2026-06-07 03:01:02,917][472559] Updated weights for policy 0, policy_version 43767 (0.0008) +[2026-06-07 03:01:03,116][464927] Fps is (10 sec: 29491.3, 60 sec: 28399.0, 300 sec: 28324.9). Total num frames: 22413312. Throughput: 0: 28370.5. Samples: 22384128. Policy #0 lag: (min: 31.0, avg: 43.7, max: 95.0) +[2026-06-07 03:01:03,117][464927] Avg episode reward: [(0, '1589.477')] +[2026-06-07 03:01:03,441][472559] Updated weights for policy 0, policy_version 43777 (0.0008) +[2026-06-07 03:01:03,566][472559] Updated weights for policy 0, policy_version 43788 (0.0009) +[2026-06-07 03:01:03,696][472559] Updated weights for policy 0, policy_version 43799 (0.0008) +[2026-06-07 03:01:03,817][472559] Updated weights for policy 0, policy_version 43810 (0.0008) +[2026-06-07 03:01:03,936][472559] Updated weights for policy 0, policy_version 43820 (0.0009) +[2026-06-07 03:01:04,059][472559] Updated weights for policy 0, policy_version 43831 (0.0008) +[2026-06-07 03:01:04,607][472559] Updated weights for policy 0, policy_version 43841 (0.0006) +[2026-06-07 03:01:04,723][472559] Updated weights for policy 0, policy_version 43851 (0.0004) +[2026-06-07 03:01:04,873][472559] Updated weights for policy 0, policy_version 43864 (0.0004) +[2026-06-07 03:01:04,988][472559] Updated weights for policy 0, policy_version 43874 (0.0004) +[2026-06-07 03:01:05,103][472559] Updated weights for policy 0, policy_version 43884 (0.0004) +[2026-06-07 03:01:05,217][472559] Updated weights for policy 0, policy_version 43894 (0.0007) +[2026-06-07 03:01:05,780][472559] Updated weights for policy 0, policy_version 43906 (0.0008) +[2026-06-07 03:01:05,896][472559] Updated weights for policy 0, policy_version 43916 (0.0008) +[2026-06-07 03:01:06,021][472559] Updated weights for policy 0, policy_version 43927 (0.0008) +[2026-06-07 03:01:06,135][472559] Updated weights for policy 0, policy_version 43937 (0.0008) +[2026-06-07 03:01:06,253][472559] Updated weights for policy 0, policy_version 43947 (0.0008) +[2026-06-07 03:01:06,364][472559] Updated weights for policy 0, policy_version 43957 (0.0008) +[2026-06-07 03:01:06,907][472559] Updated weights for policy 0, policy_version 43967 (0.0008) +[2026-06-07 03:01:07,030][472559] Updated weights for policy 0, policy_version 43978 (0.0009) +[2026-06-07 03:01:07,142][472559] Updated weights for policy 0, policy_version 43988 (0.0009) +[2026-06-07 03:01:07,278][472559] Updated weights for policy 0, policy_version 44000 (0.0008) +[2026-06-07 03:01:07,396][472559] Updated weights for policy 0, policy_version 44010 (0.0008) +[2026-06-07 03:01:07,511][472559] Updated weights for policy 0, policy_version 44020 (0.0008) +[2026-06-07 03:01:08,058][472559] Updated weights for policy 0, policy_version 44030 (0.0008) +[2026-06-07 03:01:08,117][464927] Fps is (10 sec: 29491.3, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 22544384. Throughput: 0: 28362.0. Samples: 22551808. Policy #0 lag: (min: 31.0, avg: 43.7, max: 95.0) +[2026-06-07 03:01:08,117][464927] Avg episode reward: [(0, '1591.471')] +[2026-06-07 03:01:08,200][472559] Updated weights for policy 0, policy_version 44043 (0.0009) +[2026-06-07 03:01:08,315][472559] Updated weights for policy 0, policy_version 44053 (0.0009) +[2026-06-07 03:01:08,435][472559] Updated weights for policy 0, policy_version 44064 (0.0008) +[2026-06-07 03:01:08,552][472559] Updated weights for policy 0, policy_version 44074 (0.0009) +[2026-06-07 03:01:08,681][472559] Updated weights for policy 0, policy_version 44085 (0.0008) +[2026-06-07 03:01:09,227][472559] Updated weights for policy 0, policy_version 44095 (0.0008) +[2026-06-07 03:01:09,335][472559] Updated weights for policy 0, policy_version 44105 (0.0009) +[2026-06-07 03:01:09,447][472559] Updated weights for policy 0, policy_version 44115 (0.0009) +[2026-06-07 03:01:09,564][472559] Updated weights for policy 0, policy_version 44125 (0.0009) +[2026-06-07 03:01:09,685][472559] Updated weights for policy 0, policy_version 44136 (0.0008) +[2026-06-07 03:01:09,801][472559] Updated weights for policy 0, policy_version 44146 (0.0008) +[2026-06-07 03:01:10,367][472559] Updated weights for policy 0, policy_version 44157 (0.0008) +[2026-06-07 03:01:10,492][472559] Updated weights for policy 0, policy_version 44168 (0.0009) +[2026-06-07 03:01:10,618][472559] Updated weights for policy 0, policy_version 44179 (0.0008) +[2026-06-07 03:01:10,732][472559] Updated weights for policy 0, policy_version 44189 (0.0008) +[2026-06-07 03:01:10,844][472559] Updated weights for policy 0, policy_version 44199 (0.0008) +[2026-06-07 03:01:10,960][472559] Updated weights for policy 0, policy_version 44209 (0.0008) +[2026-06-07 03:01:11,538][472559] Updated weights for policy 0, policy_version 44221 (0.0008) +[2026-06-07 03:01:11,660][472559] Updated weights for policy 0, policy_version 44232 (0.0009) +[2026-06-07 03:01:11,770][472559] Updated weights for policy 0, policy_version 44242 (0.0008) +[2026-06-07 03:01:11,889][472559] Updated weights for policy 0, policy_version 44252 (0.0008) +[2026-06-07 03:01:12,025][472559] Updated weights for policy 0, policy_version 44264 (0.0009) +[2026-06-07 03:01:12,141][472559] Updated weights for policy 0, policy_version 44274 (0.0009) +[2026-06-07 03:01:12,701][472559] Updated weights for policy 0, policy_version 44285 (0.0008) +[2026-06-07 03:01:12,812][472559] Updated weights for policy 0, policy_version 44295 (0.0008) +[2026-06-07 03:01:12,932][472559] Updated weights for policy 0, policy_version 44306 (0.0008) +[2026-06-07 03:01:13,048][472559] Updated weights for policy 0, policy_version 44316 (0.0006) +[2026-06-07 03:01:13,117][464927] Fps is (10 sec: 26214.3, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 22675456. Throughput: 0: 28353.5. Samples: 22719104. Policy #0 lag: (min: 58.0, avg: 83.3, max: 122.0) +[2026-06-07 03:01:13,117][464927] Avg episode reward: [(0, '1538.552')] +[2026-06-07 03:01:13,161][472559] Updated weights for policy 0, policy_version 44326 (0.0008) +[2026-06-07 03:01:13,294][472559] Updated weights for policy 0, policy_version 44338 (0.0008) +[2026-06-07 03:01:13,849][472559] Updated weights for policy 0, policy_version 44348 (0.0008) +[2026-06-07 03:01:13,974][472559] Updated weights for policy 0, policy_version 44359 (0.0007) +[2026-06-07 03:01:14,081][472559] Updated weights for policy 0, policy_version 44369 (0.0006) +[2026-06-07 03:01:14,213][472559] Updated weights for policy 0, policy_version 44380 (0.0008) +[2026-06-07 03:01:14,330][472559] Updated weights for policy 0, policy_version 44390 (0.0008) +[2026-06-07 03:01:14,461][472559] Updated weights for policy 0, policy_version 44401 (0.0009) +[2026-06-07 03:01:14,990][472559] Updated weights for policy 0, policy_version 44412 (0.0007) +[2026-06-07 03:01:15,112][472559] Updated weights for policy 0, policy_version 44423 (0.0008) +[2026-06-07 03:01:15,228][472559] Updated weights for policy 0, policy_version 44433 (0.0008) +[2026-06-07 03:01:15,341][472559] Updated weights for policy 0, policy_version 44443 (0.0008) +[2026-06-07 03:01:15,457][472559] Updated weights for policy 0, policy_version 44453 (0.0008) +[2026-06-07 03:01:15,574][472559] Updated weights for policy 0, policy_version 44463 (0.0009) +[2026-06-07 03:01:16,150][472559] Updated weights for policy 0, policy_version 44475 (0.0008) +[2026-06-07 03:01:16,256][472559] Updated weights for policy 0, policy_version 44485 (0.0008) +[2026-06-07 03:01:16,377][472559] Updated weights for policy 0, policy_version 44496 (0.0009) +[2026-06-07 03:01:16,494][472559] Updated weights for policy 0, policy_version 44506 (0.0009) +[2026-06-07 03:01:16,610][472559] Updated weights for policy 0, policy_version 44516 (0.0009) +[2026-06-07 03:01:16,719][472559] Updated weights for policy 0, policy_version 44526 (0.0009) +[2026-06-07 03:01:17,290][472559] Updated weights for policy 0, policy_version 44537 (0.0008) +[2026-06-07 03:01:17,417][472559] Updated weights for policy 0, policy_version 44548 (0.0008) +[2026-06-07 03:01:17,524][472559] Updated weights for policy 0, policy_version 44558 (0.0008) +[2026-06-07 03:01:17,651][472559] Updated weights for policy 0, policy_version 44569 (0.0009) +[2026-06-07 03:01:17,767][472559] Updated weights for policy 0, policy_version 44579 (0.0008) +[2026-06-07 03:01:17,894][472559] Updated weights for policy 0, policy_version 44590 (0.0007) +[2026-06-07 03:01:18,010][472559] Updated weights for policy 0, policy_version 44600 (0.0005) +[2026-06-07 03:01:18,117][464927] Fps is (10 sec: 29491.1, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 22839296. Throughput: 0: 28333.5. Samples: 22808704. Policy #0 lag: (min: 58.0, avg: 83.3, max: 122.0) +[2026-06-07 03:01:18,117][464927] Avg episode reward: [(0, '1468.810')] +[2026-06-07 03:01:18,549][472559] Updated weights for policy 0, policy_version 44610 (0.0007) +[2026-06-07 03:01:18,666][472559] Updated weights for policy 0, policy_version 44620 (0.0006) +[2026-06-07 03:01:18,782][472559] Updated weights for policy 0, policy_version 44630 (0.0009) +[2026-06-07 03:01:18,920][472559] Updated weights for policy 0, policy_version 44642 (0.0009) +[2026-06-07 03:01:19,039][472559] Updated weights for policy 0, policy_version 44652 (0.0009) +[2026-06-07 03:01:19,160][472559] Updated weights for policy 0, policy_version 44663 (0.0008) +[2026-06-07 03:01:19,694][472559] Updated weights for policy 0, policy_version 44673 (0.0008) +[2026-06-07 03:01:19,807][472559] Updated weights for policy 0, policy_version 44683 (0.0009) +[2026-06-07 03:01:19,920][472559] Updated weights for policy 0, policy_version 44693 (0.0009) +[2026-06-07 03:01:20,056][472559] Updated weights for policy 0, policy_version 44705 (0.0009) +[2026-06-07 03:01:20,172][472559] Updated weights for policy 0, policy_version 44715 (0.0007) +[2026-06-07 03:01:20,308][472559] Updated weights for policy 0, policy_version 44727 (0.0006) +[2026-06-07 03:01:20,842][472559] Updated weights for policy 0, policy_version 44737 (0.0008) +[2026-06-07 03:01:20,959][472559] Updated weights for policy 0, policy_version 44747 (0.0005) +[2026-06-07 03:01:21,096][472559] Updated weights for policy 0, policy_version 44759 (0.0007) +[2026-06-07 03:01:21,209][472559] Updated weights for policy 0, policy_version 44769 (0.0008) +[2026-06-07 03:01:21,330][472559] Updated weights for policy 0, policy_version 44780 (0.0009) +[2026-06-07 03:01:21,454][472559] Updated weights for policy 0, policy_version 44791 (0.0008) +[2026-06-07 03:01:22,006][472559] Updated weights for policy 0, policy_version 44801 (0.0008) +[2026-06-07 03:01:22,122][472559] Updated weights for policy 0, policy_version 44811 (0.0008) +[2026-06-07 03:01:22,226][472559] Updated weights for policy 0, policy_version 44821 (0.0009) +[2026-06-07 03:01:22,364][472559] Updated weights for policy 0, policy_version 44833 (0.0009) +[2026-06-07 03:01:22,478][472559] Updated weights for policy 0, policy_version 44843 (0.0010) +[2026-06-07 03:01:22,591][472559] Updated weights for policy 0, policy_version 44853 (0.0008) +[2026-06-07 03:01:23,117][464927] Fps is (10 sec: 29491.2, 60 sec: 28399.0, 300 sec: 28324.9). Total num frames: 22970368. Throughput: 0: 28387.5. Samples: 22977920. Policy #0 lag: (min: 58.0, avg: 83.3, max: 122.0) +[2026-06-07 03:01:23,117][464927] Avg episode reward: [(0, '1569.034')] +[2026-06-07 03:01:23,146][472559] Updated weights for policy 0, policy_version 44863 (0.0009) +[2026-06-07 03:01:23,266][472559] Updated weights for policy 0, policy_version 44874 (0.0008) +[2026-06-07 03:01:23,378][472559] Updated weights for policy 0, policy_version 44884 (0.0008) +[2026-06-07 03:01:23,493][472559] Updated weights for policy 0, policy_version 44894 (0.0008) +[2026-06-07 03:01:23,615][472559] Updated weights for policy 0, policy_version 44905 (0.0008) +[2026-06-07 03:01:23,731][472559] Updated weights for policy 0, policy_version 44915 (0.0008) +[2026-06-07 03:01:24,280][472559] Updated weights for policy 0, policy_version 44925 (0.0007) +[2026-06-07 03:01:24,404][472559] Updated weights for policy 0, policy_version 44936 (0.0004) +[2026-06-07 03:01:24,538][472559] Updated weights for policy 0, policy_version 44948 (0.0004) +[2026-06-07 03:01:24,655][472559] Updated weights for policy 0, policy_version 44958 (0.0004) +[2026-06-07 03:01:24,773][472559] Updated weights for policy 0, policy_version 44968 (0.0004) +[2026-06-07 03:01:24,888][472559] Updated weights for policy 0, policy_version 44978 (0.0004) +[2026-06-07 03:01:25,408][472559] Updated weights for policy 0, policy_version 44988 (0.0004) +[2026-06-07 03:01:25,518][472559] Updated weights for policy 0, policy_version 44998 (0.0004) +[2026-06-07 03:01:25,648][472559] Updated weights for policy 0, policy_version 45009 (0.0004) +[2026-06-07 03:01:25,758][472559] Updated weights for policy 0, policy_version 45019 (0.0004) +[2026-06-07 03:01:25,874][472559] Updated weights for policy 0, policy_version 45029 (0.0004) +[2026-06-07 03:01:25,984][472559] Updated weights for policy 0, policy_version 45039 (0.0004) +[2026-06-07 03:01:26,497][472559] Updated weights for policy 0, policy_version 45049 (0.0004) +[2026-06-07 03:01:26,622][472559] Updated weights for policy 0, policy_version 45060 (0.0004) +[2026-06-07 03:01:26,734][472559] Updated weights for policy 0, policy_version 45070 (0.0004) +[2026-06-07 03:01:26,850][472559] Updated weights for policy 0, policy_version 45080 (0.0004) +[2026-06-07 03:01:26,965][472559] Updated weights for policy 0, policy_version 45090 (0.0004) +[2026-06-07 03:01:27,091][472559] Updated weights for policy 0, policy_version 45101 (0.0004) +[2026-06-07 03:01:27,218][472559] Updated weights for policy 0, policy_version 45112 (0.0004) +[2026-06-07 03:01:27,743][472559] Updated weights for policy 0, policy_version 45123 (0.0004) +[2026-06-07 03:01:27,858][472559] Updated weights for policy 0, policy_version 45133 (0.0004) +[2026-06-07 03:01:27,975][472559] Updated weights for policy 0, policy_version 45143 (0.0004) +[2026-06-07 03:01:28,100][472559] Updated weights for policy 0, policy_version 45154 (0.0004) +[2026-06-07 03:01:28,117][464927] Fps is (10 sec: 26214.5, 60 sec: 28399.0, 300 sec: 28324.9). Total num frames: 23101440. Throughput: 0: 28387.5. Samples: 23145984. Policy #0 lag: (min: 58.0, avg: 83.3, max: 122.0) +[2026-06-07 03:01:28,117][464927] Avg episode reward: [(0, '1597.694')] +[2026-06-07 03:01:28,219][472559] Updated weights for policy 0, policy_version 45164 (0.0004) +[2026-06-07 03:01:28,341][472559] Updated weights for policy 0, policy_version 45175 (0.0004) +[2026-06-07 03:01:28,879][472559] Updated weights for policy 0, policy_version 45186 (0.0004) +[2026-06-07 03:01:28,989][472559] Updated weights for policy 0, policy_version 45196 (0.0006) +[2026-06-07 03:01:29,101][472559] Updated weights for policy 0, policy_version 45206 (0.0009) +[2026-06-07 03:01:29,219][472559] Updated weights for policy 0, policy_version 45216 (0.0008) +[2026-06-07 03:01:29,340][472559] Updated weights for policy 0, policy_version 45226 (0.0007) +[2026-06-07 03:01:29,463][472559] Updated weights for policy 0, policy_version 45237 (0.0008) +[2026-06-07 03:01:29,986][472559] Updated weights for policy 0, policy_version 45247 (0.0008) +[2026-06-07 03:01:30,111][472559] Updated weights for policy 0, policy_version 45258 (0.0009) +[2026-06-07 03:01:30,227][472559] Updated weights for policy 0, policy_version 45268 (0.0008) +[2026-06-07 03:01:30,364][472559] Updated weights for policy 0, policy_version 45280 (0.0009) +[2026-06-07 03:01:30,480][472559] Updated weights for policy 0, policy_version 45290 (0.0008) +[2026-06-07 03:01:30,599][472559] Updated weights for policy 0, policy_version 45300 (0.0008) +[2026-06-07 03:01:31,145][472559] Updated weights for policy 0, policy_version 45310 (0.0008) +[2026-06-07 03:01:31,266][472559] Updated weights for policy 0, policy_version 45321 (0.0008) +[2026-06-07 03:01:31,375][472559] Updated weights for policy 0, policy_version 45331 (0.0008) +[2026-06-07 03:01:31,504][472559] Updated weights for policy 0, policy_version 45342 (0.0008) +[2026-06-07 03:01:31,618][472559] Updated weights for policy 0, policy_version 45352 (0.0008) +[2026-06-07 03:01:31,742][472559] Updated weights for policy 0, policy_version 45363 (0.0008) +[2026-06-07 03:01:32,281][472559] Updated weights for policy 0, policy_version 45373 (0.0008) +[2026-06-07 03:01:32,394][472559] Updated weights for policy 0, policy_version 45383 (0.0008) +[2026-06-07 03:01:32,507][472559] Updated weights for policy 0, policy_version 45393 (0.0008) +[2026-06-07 03:01:32,631][472559] Updated weights for policy 0, policy_version 45404 (0.0008) +[2026-06-07 03:01:32,758][472559] Updated weights for policy 0, policy_version 45415 (0.0009) +[2026-06-07 03:01:32,873][472559] Updated weights for policy 0, policy_version 45425 (0.0008) +[2026-06-07 03:01:33,117][464927] Fps is (10 sec: 29491.2, 60 sec: 28398.9, 300 sec: 28436.0). Total num frames: 23265280. Throughput: 0: 28390.4. Samples: 23237248. Policy #0 lag: (min: 58.0, avg: 83.3, max: 122.0) +[2026-06-07 03:01:33,117][464927] Avg episode reward: [(0, '1614.950')] +[2026-06-07 03:01:33,122][472025] Saving new best policy, reward=1614.950! +[2026-06-07 03:01:33,416][472559] Updated weights for policy 0, policy_version 45435 (0.0008) +[2026-06-07 03:01:33,561][472559] Updated weights for policy 0, policy_version 45448 (0.0008) +[2026-06-07 03:01:33,674][472559] Updated weights for policy 0, policy_version 45458 (0.0008) +[2026-06-07 03:01:33,788][472559] Updated weights for policy 0, policy_version 45468 (0.0008) +[2026-06-07 03:01:33,902][472559] Updated weights for policy 0, policy_version 45478 (0.0008) +[2026-06-07 03:01:34,017][472559] Updated weights for policy 0, policy_version 45488 (0.0008) +[2026-06-07 03:01:34,576][472559] Updated weights for policy 0, policy_version 45498 (0.0009) +[2026-06-07 03:01:34,692][472559] Updated weights for policy 0, policy_version 45509 (0.0008) +[2026-06-07 03:01:34,809][472559] Updated weights for policy 0, policy_version 45519 (0.0008) +[2026-06-07 03:01:34,930][472559] Updated weights for policy 0, policy_version 45530 (0.0008) +[2026-06-07 03:01:35,050][472559] Updated weights for policy 0, policy_version 45540 (0.0009) +[2026-06-07 03:01:35,160][472559] Updated weights for policy 0, policy_version 45550 (0.0009) +[2026-06-07 03:01:35,729][472559] Updated weights for policy 0, policy_version 45561 (0.0008) +[2026-06-07 03:01:35,845][472559] Updated weights for policy 0, policy_version 45571 (0.0008) +[2026-06-07 03:01:35,952][472559] Updated weights for policy 0, policy_version 45581 (0.0008) +[2026-06-07 03:01:36,102][472559] Updated weights for policy 0, policy_version 45594 (0.0009) +[2026-06-07 03:01:36,238][472559] Updated weights for policy 0, policy_version 45606 (0.0008) +[2026-06-07 03:01:36,343][472559] Updated weights for policy 0, policy_version 45616 (0.0004) +[2026-06-07 03:01:36,897][472559] Updated weights for policy 0, policy_version 45626 (0.0006) +[2026-06-07 03:01:37,008][472559] Updated weights for policy 0, policy_version 45636 (0.0008) +[2026-06-07 03:01:37,139][472559] Updated weights for policy 0, policy_version 45648 (0.0009) +[2026-06-07 03:01:37,263][472559] Updated weights for policy 0, policy_version 45659 (0.0008) +[2026-06-07 03:01:37,383][472559] Updated weights for policy 0, policy_version 45670 (0.0009) +[2026-06-07 03:01:37,514][472559] Updated weights for policy 0, policy_version 45681 (0.0008) +[2026-06-07 03:01:38,092][472559] Updated weights for policy 0, policy_version 45693 (0.0007) +[2026-06-07 03:01:38,117][464927] Fps is (10 sec: 29491.0, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 23396352. Throughput: 0: 28398.9. Samples: 23404288. Policy #0 lag: (min: 58.0, avg: 83.3, max: 122.0) +[2026-06-07 03:01:38,118][464927] Avg episode reward: [(0, '1630.060')] +[2026-06-07 03:01:38,229][472559] Updated weights for policy 0, policy_version 45705 (0.0009) +[2026-06-07 03:01:38,347][472559] Updated weights for policy 0, policy_version 45716 (0.0008) +[2026-06-07 03:01:38,473][472559] Updated weights for policy 0, policy_version 45727 (0.0008) +[2026-06-07 03:01:38,600][472559] Updated weights for policy 0, policy_version 45738 (0.0008) +[2026-06-07 03:01:38,721][472559] Updated weights for policy 0, policy_version 45749 (0.0008) +[2026-06-07 03:01:38,750][472025] Saving new best policy, reward=1630.060! +[2026-06-07 03:01:39,282][472559] Updated weights for policy 0, policy_version 45761 (0.0008) +[2026-06-07 03:01:39,417][472559] Updated weights for policy 0, policy_version 45773 (0.0008) +[2026-06-07 03:01:39,535][472559] Updated weights for policy 0, policy_version 45784 (0.0008) +[2026-06-07 03:01:39,662][472559] Updated weights for policy 0, policy_version 45795 (0.0009) +[2026-06-07 03:01:39,786][472559] Updated weights for policy 0, policy_version 45806 (0.0009) +[2026-06-07 03:01:40,374][472559] Updated weights for policy 0, policy_version 45818 (0.0008) +[2026-06-07 03:01:40,504][472559] Updated weights for policy 0, policy_version 45830 (0.0008) +[2026-06-07 03:01:40,622][472559] Updated weights for policy 0, policy_version 45840 (0.0008) +[2026-06-07 03:01:40,753][472559] Updated weights for policy 0, policy_version 45852 (0.0008) +[2026-06-07 03:01:40,876][472559] Updated weights for policy 0, policy_version 45863 (0.0009) +[2026-06-07 03:01:40,987][472559] Updated weights for policy 0, policy_version 45873 (0.0008) +[2026-06-07 03:01:41,530][472559] Updated weights for policy 0, policy_version 45883 (0.0008) +[2026-06-07 03:01:41,650][472559] Updated weights for policy 0, policy_version 45894 (0.0007) +[2026-06-07 03:01:41,785][472559] Updated weights for policy 0, policy_version 45906 (0.0008) +[2026-06-07 03:01:41,914][472559] Updated weights for policy 0, policy_version 45918 (0.0008) +[2026-06-07 03:01:42,026][472559] Updated weights for policy 0, policy_version 45928 (0.0008) +[2026-06-07 03:01:42,141][472559] Updated weights for policy 0, policy_version 45938 (0.0008) +[2026-06-07 03:01:42,728][472559] Updated weights for policy 0, policy_version 45950 (0.0009) +[2026-06-07 03:01:42,844][472559] Updated weights for policy 0, policy_version 45961 (0.0008) +[2026-06-07 03:01:42,978][472559] Updated weights for policy 0, policy_version 45973 (0.0008) +[2026-06-07 03:01:43,112][472559] Updated weights for policy 0, policy_version 45984 (0.0008) +[2026-06-07 03:01:43,117][464927] Fps is (10 sec: 26214.4, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 23527424. Throughput: 0: 28390.4. Samples: 23571840. Policy #0 lag: (min: 63.0, avg: 75.3, max: 127.0) +[2026-06-07 03:01:43,117][464927] Avg episode reward: [(0, '1704.195')] +[2026-06-07 03:01:43,231][472559] Updated weights for policy 0, policy_version 45994 (0.0009) +[2026-06-07 03:01:43,361][472559] Updated weights for policy 0, policy_version 46005 (0.0008) +[2026-06-07 03:01:43,387][472025] Saving new best policy, reward=1704.195! +[2026-06-07 03:01:43,888][472559] Updated weights for policy 0, policy_version 46015 (0.0008) +[2026-06-07 03:01:43,999][472559] Updated weights for policy 0, policy_version 46025 (0.0008) +[2026-06-07 03:01:44,115][472559] Updated weights for policy 0, policy_version 46036 (0.0009) +[2026-06-07 03:01:44,244][472559] Updated weights for policy 0, policy_version 46047 (0.0008) +[2026-06-07 03:01:44,354][472559] Updated weights for policy 0, policy_version 46057 (0.0008) +[2026-06-07 03:01:44,473][472559] Updated weights for policy 0, policy_version 46068 (0.0008) +[2026-06-07 03:01:45,032][472559] Updated weights for policy 0, policy_version 46078 (0.0008) +[2026-06-07 03:01:45,148][472559] Updated weights for policy 0, policy_version 46089 (0.0008) +[2026-06-07 03:01:45,258][472559] Updated weights for policy 0, policy_version 46099 (0.0008) +[2026-06-07 03:01:45,400][472559] Updated weights for policy 0, policy_version 46112 (0.0007) +[2026-06-07 03:01:45,518][472559] Updated weights for policy 0, policy_version 46122 (0.0007) +[2026-06-07 03:01:45,632][472559] Updated weights for policy 0, policy_version 46132 (0.0008) +[2026-06-07 03:01:46,198][472559] Updated weights for policy 0, policy_version 46142 (0.0007) +[2026-06-07 03:01:46,313][472559] Updated weights for policy 0, policy_version 46152 (0.0009) +[2026-06-07 03:01:46,436][472559] Updated weights for policy 0, policy_version 46163 (0.0008) +[2026-06-07 03:01:46,571][472559] Updated weights for policy 0, policy_version 46175 (0.0009) +[2026-06-07 03:01:46,695][472559] Updated weights for policy 0, policy_version 46185 (0.0008) +[2026-06-07 03:01:46,820][472559] Updated weights for policy 0, policy_version 46196 (0.0009) +[2026-06-07 03:01:47,350][472559] Updated weights for policy 0, policy_version 46206 (0.0007) +[2026-06-07 03:01:47,465][472559] Updated weights for policy 0, policy_version 46216 (0.0008) +[2026-06-07 03:01:47,577][472559] Updated weights for policy 0, policy_version 46226 (0.0008) +[2026-06-07 03:01:47,692][472559] Updated weights for policy 0, policy_version 46236 (0.0009) +[2026-06-07 03:01:47,820][472559] Updated weights for policy 0, policy_version 46247 (0.0008) +[2026-06-07 03:01:47,930][472559] Updated weights for policy 0, policy_version 46257 (0.0008) +[2026-06-07 03:01:48,117][464927] Fps is (10 sec: 29491.0, 60 sec: 28398.9, 300 sec: 28435.9). Total num frames: 23691264. Throughput: 0: 28376.1. Samples: 23661056. Policy #0 lag: (min: 63.0, avg: 75.3, max: 127.0) +[2026-06-07 03:01:48,118][464927] Avg episode reward: [(0, '1701.746')] +[2026-06-07 03:01:48,488][472559] Updated weights for policy 0, policy_version 46268 (0.0008) +[2026-06-07 03:01:48,613][472559] Updated weights for policy 0, policy_version 46279 (0.0009) +[2026-06-07 03:01:48,725][472559] Updated weights for policy 0, policy_version 46289 (0.0008) +[2026-06-07 03:01:48,843][472559] Updated weights for policy 0, policy_version 46299 (0.0009) +[2026-06-07 03:01:48,952][472559] Updated weights for policy 0, policy_version 46309 (0.0008) +[2026-06-07 03:01:49,069][472559] Updated weights for policy 0, policy_version 46319 (0.0008) +[2026-06-07 03:01:49,600][472559] Updated weights for policy 0, policy_version 46329 (0.0008) +[2026-06-07 03:01:49,712][472559] Updated weights for policy 0, policy_version 46339 (0.0008) +[2026-06-07 03:01:49,832][472559] Updated weights for policy 0, policy_version 46350 (0.0008) +[2026-06-07 03:01:49,972][472559] Updated weights for policy 0, policy_version 46363 (0.0008) +[2026-06-07 03:01:50,103][472559] Updated weights for policy 0, policy_version 46374 (0.0008) +[2026-06-07 03:01:50,235][472559] Updated weights for policy 0, policy_version 46386 (0.0008) +[2026-06-07 03:01:50,780][472559] Updated weights for policy 0, policy_version 46396 (0.0008) +[2026-06-07 03:01:50,897][472559] Updated weights for policy 0, policy_version 46407 (0.0008) +[2026-06-07 03:01:51,019][472559] Updated weights for policy 0, policy_version 46417 (0.0009) +[2026-06-07 03:01:51,139][472559] Updated weights for policy 0, policy_version 46428 (0.0008) +[2026-06-07 03:01:51,253][472559] Updated weights for policy 0, policy_version 46438 (0.0008) +[2026-06-07 03:01:51,375][472559] Updated weights for policy 0, policy_version 46449 (0.0008) +[2026-06-07 03:01:51,919][472559] Updated weights for policy 0, policy_version 46459 (0.0008) +[2026-06-07 03:01:52,045][472559] Updated weights for policy 0, policy_version 46470 (0.0008) +[2026-06-07 03:01:52,169][472559] Updated weights for policy 0, policy_version 46481 (0.0008) +[2026-06-07 03:01:52,300][472559] Updated weights for policy 0, policy_version 46493 (0.0008) +[2026-06-07 03:01:52,438][472559] Updated weights for policy 0, policy_version 46505 (0.0008) +[2026-06-07 03:01:52,551][472559] Updated weights for policy 0, policy_version 46515 (0.0008) +[2026-06-07 03:01:53,113][472559] Updated weights for policy 0, policy_version 46526 (0.0008) +[2026-06-07 03:01:53,117][464927] Fps is (10 sec: 29491.1, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 23822336. Throughput: 0: 28359.1. Samples: 23827968. Policy #0 lag: (min: 63.0, avg: 75.3, max: 127.0) +[2026-06-07 03:01:53,118][464927] Avg episode reward: [(0, '1677.433')] +[2026-06-07 03:01:53,233][472559] Updated weights for policy 0, policy_version 46537 (0.0008) +[2026-06-07 03:01:53,342][472559] Updated weights for policy 0, policy_version 46547 (0.0008) +[2026-06-07 03:01:53,476][472559] Updated weights for policy 0, policy_version 46559 (0.0008) +[2026-06-07 03:01:53,628][472559] Updated weights for policy 0, policy_version 46572 (0.0008) +[2026-06-07 03:01:53,742][472559] Updated weights for policy 0, policy_version 46582 (0.0008) +[2026-06-07 03:01:54,297][472559] Updated weights for policy 0, policy_version 46592 (0.0008) +[2026-06-07 03:01:54,410][472559] Updated weights for policy 0, policy_version 46602 (0.0008) +[2026-06-07 03:01:54,529][472559] Updated weights for policy 0, policy_version 46613 (0.0008) +[2026-06-07 03:01:54,658][472559] Updated weights for policy 0, policy_version 46624 (0.0009) +[2026-06-07 03:01:54,767][472559] Updated weights for policy 0, policy_version 46634 (0.0008) +[2026-06-07 03:01:54,910][472559] Updated weights for policy 0, policy_version 46646 (0.0009) +[2026-06-07 03:01:55,449][472559] Updated weights for policy 0, policy_version 46656 (0.0008) +[2026-06-07 03:01:55,584][472559] Updated weights for policy 0, policy_version 46668 (0.0009) +[2026-06-07 03:01:55,706][472559] Updated weights for policy 0, policy_version 46679 (0.0009) +[2026-06-07 03:01:55,823][472559] Updated weights for policy 0, policy_version 46689 (0.0008) +[2026-06-07 03:01:55,942][472559] Updated weights for policy 0, policy_version 46699 (0.0008) +[2026-06-07 03:01:56,077][472559] Updated weights for policy 0, policy_version 46711 (0.0008) +[2026-06-07 03:01:56,613][472559] Updated weights for policy 0, policy_version 46721 (0.0008) +[2026-06-07 03:01:56,754][472559] Updated weights for policy 0, policy_version 46733 (0.0008) +[2026-06-07 03:01:56,866][472559] Updated weights for policy 0, policy_version 46743 (0.0009) +[2026-06-07 03:01:56,982][472559] Updated weights for policy 0, policy_version 46753 (0.0008) +[2026-06-07 03:01:57,118][472559] Updated weights for policy 0, policy_version 46765 (0.0009) +[2026-06-07 03:01:57,242][472559] Updated weights for policy 0, policy_version 46776 (0.0008) +[2026-06-07 03:01:57,796][472559] Updated weights for policy 0, policy_version 46787 (0.0008) +[2026-06-07 03:01:57,920][472559] Updated weights for policy 0, policy_version 46798 (0.0009) +[2026-06-07 03:01:58,033][472559] Updated weights for policy 0, policy_version 46808 (0.0009) +[2026-06-07 03:01:58,117][464927] Fps is (10 sec: 26214.2, 60 sec: 28398.8, 300 sec: 28324.9). Total num frames: 23953408. Throughput: 0: 28353.3. Samples: 23995008. Policy #0 lag: (min: 63.0, avg: 75.3, max: 127.0) +[2026-06-07 03:01:58,118][464927] Avg episode reward: [(0, '1683.444')] +[2026-06-07 03:01:58,147][472559] Updated weights for policy 0, policy_version 46818 (0.0008) +[2026-06-07 03:01:58,262][472559] Updated weights for policy 0, policy_version 46828 (0.0008) +[2026-06-07 03:01:58,393][472559] Updated weights for policy 0, policy_version 46839 (0.0008) +[2026-06-07 03:01:58,928][472559] Updated weights for policy 0, policy_version 46850 (0.0008) +[2026-06-07 03:01:59,037][472559] Updated weights for policy 0, policy_version 46860 (0.0008) +[2026-06-07 03:01:59,161][472559] Updated weights for policy 0, policy_version 46871 (0.0008) +[2026-06-07 03:01:59,277][472559] Updated weights for policy 0, policy_version 46881 (0.0009) +[2026-06-07 03:01:59,402][472559] Updated weights for policy 0, policy_version 46892 (0.0008) +[2026-06-07 03:01:59,515][472559] Updated weights for policy 0, policy_version 46902 (0.0009) +[2026-06-07 03:02:00,059][472559] Updated weights for policy 0, policy_version 46912 (0.0008) +[2026-06-07 03:02:00,173][472559] Updated weights for policy 0, policy_version 46922 (0.0008) +[2026-06-07 03:02:00,281][472559] Updated weights for policy 0, policy_version 46932 (0.0008) +[2026-06-07 03:02:00,409][472559] Updated weights for policy 0, policy_version 46943 (0.0008) +[2026-06-07 03:02:00,541][472559] Updated weights for policy 0, policy_version 46954 (0.0008) +[2026-06-07 03:02:00,651][472559] Updated weights for policy 0, policy_version 46964 (0.0008) +[2026-06-07 03:02:01,206][472559] Updated weights for policy 0, policy_version 46975 (0.0007) +[2026-06-07 03:02:01,321][472559] Updated weights for policy 0, policy_version 46985 (0.0008) +[2026-06-07 03:02:01,431][472559] Updated weights for policy 0, policy_version 46995 (0.0008) +[2026-06-07 03:02:01,548][472559] Updated weights for policy 0, policy_version 47005 (0.0009) +[2026-06-07 03:02:01,661][472559] Updated weights for policy 0, policy_version 47015 (0.0008) +[2026-06-07 03:02:01,771][472559] Updated weights for policy 0, policy_version 47025 (0.0008) +[2026-06-07 03:02:02,318][472559] Updated weights for policy 0, policy_version 47035 (0.0009) +[2026-06-07 03:02:02,426][472559] Updated weights for policy 0, policy_version 47045 (0.0008) +[2026-06-07 03:02:02,550][472559] Updated weights for policy 0, policy_version 47056 (0.0008) +[2026-06-07 03:02:02,676][472559] Updated weights for policy 0, policy_version 47067 (0.0008) +[2026-06-07 03:02:02,789][472559] Updated weights for policy 0, policy_version 47077 (0.0009) +[2026-06-07 03:02:02,905][472559] Updated weights for policy 0, policy_version 47087 (0.0008) +[2026-06-07 03:02:03,117][464927] Fps is (10 sec: 29491.2, 60 sec: 28398.9, 300 sec: 28436.0). Total num frames: 24117248. Throughput: 0: 28319.3. Samples: 24083072. Policy #0 lag: (min: 63.0, avg: 75.3, max: 127.0) +[2026-06-07 03:02:03,117][464927] Avg episode reward: [(0, '1708.601')] +[2026-06-07 03:02:03,122][472025] Saving new best policy, reward=1708.601! +[2026-06-07 03:02:03,444][472559] Updated weights for policy 0, policy_version 47097 (0.0008) +[2026-06-07 03:02:03,555][472559] Updated weights for policy 0, policy_version 47107 (0.0008) +[2026-06-07 03:02:03,672][472559] Updated weights for policy 0, policy_version 47117 (0.0008) +[2026-06-07 03:02:03,782][472559] Updated weights for policy 0, policy_version 47127 (0.0008) +[2026-06-07 03:02:03,909][472559] Updated weights for policy 0, policy_version 47138 (0.0008) +[2026-06-07 03:02:04,022][472559] Updated weights for policy 0, policy_version 47148 (0.0007) +[2026-06-07 03:02:04,136][472559] Updated weights for policy 0, policy_version 47158 (0.0008) +[2026-06-07 03:02:04,686][472559] Updated weights for policy 0, policy_version 47168 (0.0008) +[2026-06-07 03:02:04,801][472559] Updated weights for policy 0, policy_version 47178 (0.0009) +[2026-06-07 03:02:04,906][472559] Updated weights for policy 0, policy_version 47188 (0.0008) +[2026-06-07 03:02:05,034][472559] Updated weights for policy 0, policy_version 47199 (0.0008) +[2026-06-07 03:02:05,154][472559] Updated weights for policy 0, policy_version 47209 (0.0008) +[2026-06-07 03:02:05,279][472559] Updated weights for policy 0, policy_version 47220 (0.0009) +[2026-06-07 03:02:05,830][472559] Updated weights for policy 0, policy_version 47231 (0.0008) +[2026-06-07 03:02:05,945][472559] Updated weights for policy 0, policy_version 47241 (0.0008) +[2026-06-07 03:02:06,058][472559] Updated weights for policy 0, policy_version 47251 (0.0009) +[2026-06-07 03:02:06,169][472559] Updated weights for policy 0, policy_version 47261 (0.0008) +[2026-06-07 03:02:06,296][472559] Updated weights for policy 0, policy_version 47272 (0.0008) +[2026-06-07 03:02:06,414][472559] Updated weights for policy 0, policy_version 47283 (0.0008) +[2026-06-07 03:02:06,977][472559] Updated weights for policy 0, policy_version 47293 (0.0008) +[2026-06-07 03:02:07,093][472559] Updated weights for policy 0, policy_version 47303 (0.0008) +[2026-06-07 03:02:07,205][472559] Updated weights for policy 0, policy_version 47313 (0.0008) +[2026-06-07 03:02:07,319][472559] Updated weights for policy 0, policy_version 47323 (0.0008) +[2026-06-07 03:02:07,449][472559] Updated weights for policy 0, policy_version 47334 (0.0009) +[2026-06-07 03:02:07,573][472559] Updated weights for policy 0, policy_version 47345 (0.0008) +[2026-06-07 03:02:08,117][464927] Fps is (10 sec: 29491.6, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 24248320. Throughput: 0: 28305.0. Samples: 24251648. Policy #0 lag: (min: 63.0, avg: 75.3, max: 127.0) +[2026-06-07 03:02:08,117][464927] Avg episode reward: [(0, '1787.299')] +[2026-06-07 03:02:08,127][472559] Updated weights for policy 0, policy_version 47356 (0.0007) +[2026-06-07 03:02:08,257][472559] Updated weights for policy 0, policy_version 47368 (0.0008) +[2026-06-07 03:02:08,380][472559] Updated weights for policy 0, policy_version 47379 (0.0009) +[2026-06-07 03:02:08,492][472559] Updated weights for policy 0, policy_version 47389 (0.0008) +[2026-06-07 03:02:08,607][472559] Updated weights for policy 0, policy_version 47399 (0.0008) +[2026-06-07 03:02:08,749][472559] Updated weights for policy 0, policy_version 47411 (0.0009) +[2026-06-07 03:02:08,802][472025] Saving new best policy, reward=1787.299! +[2026-06-07 03:02:09,296][472559] Updated weights for policy 0, policy_version 47421 (0.0008) +[2026-06-07 03:02:09,433][472559] Updated weights for policy 0, policy_version 47433 (0.0008) +[2026-06-07 03:02:09,558][472559] Updated weights for policy 0, policy_version 47444 (0.0008) +[2026-06-07 03:02:09,673][472559] Updated weights for policy 0, policy_version 47454 (0.0008) +[2026-06-07 03:02:09,801][472559] Updated weights for policy 0, policy_version 47465 (0.0008) +[2026-06-07 03:02:09,907][472559] Updated weights for policy 0, policy_version 47475 (0.0008) +[2026-06-07 03:02:10,468][472559] Updated weights for policy 0, policy_version 47486 (0.0008) +[2026-06-07 03:02:10,579][472559] Updated weights for policy 0, policy_version 47496 (0.0008) +[2026-06-07 03:02:10,702][472559] Updated weights for policy 0, policy_version 47507 (0.0008) +[2026-06-07 03:02:10,816][472559] Updated weights for policy 0, policy_version 47517 (0.0009) +[2026-06-07 03:02:10,931][472559] Updated weights for policy 0, policy_version 47527 (0.0008) +[2026-06-07 03:02:11,046][472559] Updated weights for policy 0, policy_version 47537 (0.0008) +[2026-06-07 03:02:11,586][472559] Updated weights for policy 0, policy_version 47547 (0.0008) +[2026-06-07 03:02:11,698][472559] Updated weights for policy 0, policy_version 47557 (0.0008) +[2026-06-07 03:02:11,808][472559] Updated weights for policy 0, policy_version 47567 (0.0008) +[2026-06-07 03:02:11,924][472559] Updated weights for policy 0, policy_version 47577 (0.0008) +[2026-06-07 03:02:12,038][472559] Updated weights for policy 0, policy_version 47587 (0.0008) +[2026-06-07 03:02:12,154][472559] Updated weights for policy 0, policy_version 47597 (0.0008) +[2026-06-07 03:02:12,277][472559] Updated weights for policy 0, policy_version 47608 (0.0008) +[2026-06-07 03:02:12,821][472559] Updated weights for policy 0, policy_version 47618 (0.0008) +[2026-06-07 03:02:12,964][472559] Updated weights for policy 0, policy_version 47630 (0.0008) +[2026-06-07 03:02:13,097][472559] Updated weights for policy 0, policy_version 47641 (0.0008) +[2026-06-07 03:02:13,117][464927] Fps is (10 sec: 26214.6, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 24379392. Throughput: 0: 28299.4. Samples: 24419456. Policy #0 lag: (min: 63.0, avg: 75.7, max: 127.0) +[2026-06-07 03:02:13,117][464927] Avg episode reward: [(0, '1822.479')] +[2026-06-07 03:02:13,224][472559] Updated weights for policy 0, policy_version 47653 (0.0008) +[2026-06-07 03:02:13,344][472559] Updated weights for policy 0, policy_version 47663 (0.0008) +[2026-06-07 03:02:13,443][472025] Saving new best policy, reward=1822.479! +[2026-06-07 03:02:13,876][472559] Updated weights for policy 0, policy_version 47673 (0.0008) +[2026-06-07 03:02:13,990][472559] Updated weights for policy 0, policy_version 47683 (0.0008) +[2026-06-07 03:02:14,111][472559] Updated weights for policy 0, policy_version 47694 (0.0008) +[2026-06-07 03:02:14,235][472559] Updated weights for policy 0, policy_version 47705 (0.0008) +[2026-06-07 03:02:14,366][472559] Updated weights for policy 0, policy_version 47716 (0.0008) +[2026-06-07 03:02:14,490][472559] Updated weights for policy 0, policy_version 47727 (0.0008) +[2026-06-07 03:02:15,056][472559] Updated weights for policy 0, policy_version 47738 (0.0008) +[2026-06-07 03:02:15,168][472559] Updated weights for policy 0, policy_version 47748 (0.0008) +[2026-06-07 03:02:15,281][472559] Updated weights for policy 0, policy_version 47758 (0.0009) +[2026-06-07 03:02:15,406][472559] Updated weights for policy 0, policy_version 47769 (0.0008) +[2026-06-07 03:02:15,543][472559] Updated weights for policy 0, policy_version 47781 (0.0008) +[2026-06-07 03:02:15,657][472559] Updated weights for policy 0, policy_version 47791 (0.0008) +[2026-06-07 03:02:16,210][472559] Updated weights for policy 0, policy_version 47801 (0.0008) +[2026-06-07 03:02:16,321][472559] Updated weights for policy 0, policy_version 47811 (0.0007) +[2026-06-07 03:02:16,432][472559] Updated weights for policy 0, policy_version 47821 (0.0008) +[2026-06-07 03:02:16,570][472559] Updated weights for policy 0, policy_version 47833 (0.0008) +[2026-06-07 03:02:16,685][472559] Updated weights for policy 0, policy_version 47843 (0.0008) +[2026-06-07 03:02:16,801][472559] Updated weights for policy 0, policy_version 47853 (0.0008) +[2026-06-07 03:02:16,925][472559] Updated weights for policy 0, policy_version 47864 (0.0008) +[2026-06-07 03:02:17,482][472559] Updated weights for policy 0, policy_version 47876 (0.0007) +[2026-06-07 03:02:17,622][472559] Updated weights for policy 0, policy_version 47888 (0.0008) +[2026-06-07 03:02:17,731][472559] Updated weights for policy 0, policy_version 47898 (0.0008) +[2026-06-07 03:02:17,858][472559] Updated weights for policy 0, policy_version 47909 (0.0008) +[2026-06-07 03:02:17,983][472559] Updated weights for policy 0, policy_version 47920 (0.0008) +[2026-06-07 03:02:18,117][464927] Fps is (10 sec: 29491.4, 60 sec: 28398.9, 300 sec: 28436.0). Total num frames: 24543232. Throughput: 0: 28191.3. Samples: 24505856. Policy #0 lag: (min: 63.0, avg: 75.7, max: 127.0) +[2026-06-07 03:02:18,117][464927] Avg episode reward: [(0, '1848.628')] +[2026-06-07 03:02:18,122][472025] Saving new best policy, reward=1848.628! +[2026-06-07 03:02:18,540][472559] Updated weights for policy 0, policy_version 47931 (0.0008) +[2026-06-07 03:02:18,662][472559] Updated weights for policy 0, policy_version 47941 (0.0008) +[2026-06-07 03:02:18,796][472559] Updated weights for policy 0, policy_version 47953 (0.0008) +[2026-06-07 03:02:18,910][472559] Updated weights for policy 0, policy_version 47963 (0.0009) +[2026-06-07 03:02:19,025][472559] Updated weights for policy 0, policy_version 47973 (0.0008) +[2026-06-07 03:02:19,137][472559] Updated weights for policy 0, policy_version 47983 (0.0009) +[2026-06-07 03:02:19,668][472559] Updated weights for policy 0, policy_version 47993 (0.0008) +[2026-06-07 03:02:19,791][472559] Updated weights for policy 0, policy_version 48004 (0.0008) +[2026-06-07 03:02:19,935][472559] Updated weights for policy 0, policy_version 48017 (0.0008) +[2026-06-07 03:02:20,045][472559] Updated weights for policy 0, policy_version 48027 (0.0007) +[2026-06-07 03:02:20,169][472559] Updated weights for policy 0, policy_version 48038 (0.0004) +[2026-06-07 03:02:20,295][472559] Updated weights for policy 0, policy_version 48049 (0.0004) +[2026-06-07 03:02:20,855][472559] Updated weights for policy 0, policy_version 48060 (0.0008) +[2026-06-07 03:02:20,993][472559] Updated weights for policy 0, policy_version 48073 (0.0008) +[2026-06-07 03:02:21,112][472559] Updated weights for policy 0, policy_version 48084 (0.0009) +[2026-06-07 03:02:21,229][472559] Updated weights for policy 0, policy_version 48095 (0.0008) +[2026-06-07 03:02:21,360][472559] Updated weights for policy 0, policy_version 48107 (0.0008) +[2026-06-07 03:02:21,965][472559] Updated weights for policy 0, policy_version 48121 (0.0008) +[2026-06-07 03:02:22,076][472559] Updated weights for policy 0, policy_version 48131 (0.0006) +[2026-06-07 03:02:22,224][472559] Updated weights for policy 0, policy_version 48145 (0.0008) +[2026-06-07 03:02:22,368][472559] Updated weights for policy 0, policy_version 48158 (0.0008) +[2026-06-07 03:02:22,516][472559] Updated weights for policy 0, policy_version 48172 (0.0008) +[2026-06-07 03:02:22,650][472559] Updated weights for policy 0, policy_version 48184 (0.0008) +[2026-06-07 03:02:23,116][464927] Fps is (10 sec: 29491.3, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 24674304. Throughput: 0: 28305.1. Samples: 24678016. Policy #0 lag: (min: 63.0, avg: 75.7, max: 127.0) +[2026-06-07 03:02:23,117][464927] Avg episode reward: [(0, '1846.116')] +[2026-06-07 03:02:23,231][472559] Updated weights for policy 0, policy_version 48196 (0.0008) +[2026-06-07 03:02:23,340][472559] Updated weights for policy 0, policy_version 48206 (0.0008) +[2026-06-07 03:02:23,465][472559] Updated weights for policy 0, policy_version 48218 (0.0009) +[2026-06-07 03:02:23,587][472559] Updated weights for policy 0, policy_version 48229 (0.0008) +[2026-06-07 03:02:23,742][472559] Updated weights for policy 0, policy_version 48243 (0.0008) +[2026-06-07 03:02:24,338][472559] Updated weights for policy 0, policy_version 48255 (0.0008) +[2026-06-07 03:02:24,453][472559] Updated weights for policy 0, policy_version 48266 (0.0008) +[2026-06-07 03:02:24,561][472559] Updated weights for policy 0, policy_version 48276 (0.0008) +[2026-06-07 03:02:24,685][472559] Updated weights for policy 0, policy_version 48288 (0.0008) +[2026-06-07 03:02:24,819][472559] Updated weights for policy 0, policy_version 48300 (0.0008) +[2026-06-07 03:02:24,946][472559] Updated weights for policy 0, policy_version 48312 (0.0008) +[2026-06-07 03:02:25,534][472559] Updated weights for policy 0, policy_version 48323 (0.0008) +[2026-06-07 03:02:25,667][472559] Updated weights for policy 0, policy_version 48336 (0.0008) +[2026-06-07 03:02:25,785][472559] Updated weights for policy 0, policy_version 48347 (0.0008) +[2026-06-07 03:02:25,898][472559] Updated weights for policy 0, policy_version 48357 (0.0008) +[2026-06-07 03:02:26,026][472559] Updated weights for policy 0, policy_version 48368 (0.0008) +[2026-06-07 03:02:26,587][472559] Updated weights for policy 0, policy_version 48378 (0.0008) +[2026-06-07 03:02:26,704][472559] Updated weights for policy 0, policy_version 48389 (0.0008) +[2026-06-07 03:02:26,835][472559] Updated weights for policy 0, policy_version 48401 (0.0009) +[2026-06-07 03:02:26,956][472559] Updated weights for policy 0, policy_version 48412 (0.0008) +[2026-06-07 03:02:27,086][472559] Updated weights for policy 0, policy_version 48424 (0.0008) +[2026-06-07 03:02:27,225][472559] Updated weights for policy 0, policy_version 48437 (0.0008) +[2026-06-07 03:02:27,805][472559] Updated weights for policy 0, policy_version 48448 (0.0008) +[2026-06-07 03:02:27,969][472559] Updated weights for policy 0, policy_version 48464 (0.0008) +[2026-06-07 03:02:28,117][464927] Fps is (10 sec: 26214.3, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 24805376. Throughput: 0: 28293.7. Samples: 24845056. Policy #0 lag: (min: 63.0, avg: 75.7, max: 127.0) +[2026-06-07 03:02:28,117][464927] Avg episode reward: [(0, '1844.196')] +[2026-06-07 03:02:28,121][472559] Updated weights for policy 0, policy_version 48478 (0.0008) +[2026-06-07 03:02:28,263][472559] Updated weights for policy 0, policy_version 48491 (0.0009) +[2026-06-07 03:02:28,384][472559] Updated weights for policy 0, policy_version 48502 (0.0008) +[2026-06-07 03:02:28,982][472559] Updated weights for policy 0, policy_version 48515 (0.0006) +[2026-06-07 03:02:29,116][472559] Updated weights for policy 0, policy_version 48528 (0.0008) +[2026-06-07 03:02:29,239][472559] Updated weights for policy 0, policy_version 48539 (0.0008) +[2026-06-07 03:02:29,368][472559] Updated weights for policy 0, policy_version 48551 (0.0008) +[2026-06-07 03:02:29,511][472559] Updated weights for policy 0, policy_version 48564 (0.0009) +[2026-06-07 03:02:30,105][472559] Updated weights for policy 0, policy_version 48576 (0.0008) +[2026-06-07 03:02:30,232][472559] Updated weights for policy 0, policy_version 48588 (0.0008) +[2026-06-07 03:02:30,351][472559] Updated weights for policy 0, policy_version 48599 (0.0008) +[2026-06-07 03:02:30,469][472559] Updated weights for policy 0, policy_version 48610 (0.0008) +[2026-06-07 03:02:30,586][472559] Updated weights for policy 0, policy_version 48621 (0.0008) +[2026-06-07 03:02:30,706][472559] Updated weights for policy 0, policy_version 48632 (0.0008) +[2026-06-07 03:02:31,279][472559] Updated weights for policy 0, policy_version 48642 (0.0008) +[2026-06-07 03:02:31,400][472559] Updated weights for policy 0, policy_version 48653 (0.0008) +[2026-06-07 03:02:31,540][472559] Updated weights for policy 0, policy_version 48666 (0.0009) +[2026-06-07 03:02:31,667][472559] Updated weights for policy 0, policy_version 48678 (0.0008) +[2026-06-07 03:02:31,808][472559] Updated weights for policy 0, policy_version 48691 (0.0008) +[2026-06-07 03:02:32,419][472559] Updated weights for policy 0, policy_version 48704 (0.0008) +[2026-06-07 03:02:32,526][472559] Updated weights for policy 0, policy_version 48714 (0.0008) +[2026-06-07 03:02:32,658][472559] Updated weights for policy 0, policy_version 48726 (0.0009) +[2026-06-07 03:02:32,791][472559] Updated weights for policy 0, policy_version 48738 (0.0008) +[2026-06-07 03:02:32,913][472559] Updated weights for policy 0, policy_version 48749 (0.0009) +[2026-06-07 03:02:33,117][464927] Fps is (10 sec: 29491.0, 60 sec: 28398.9, 300 sec: 28436.0). Total num frames: 24969216. Throughput: 0: 28273.8. Samples: 24933376. Policy #0 lag: (min: 63.0, avg: 75.7, max: 127.0) +[2026-06-07 03:02:33,117][464927] Avg episode reward: [(0, '1857.820')] +[2026-06-07 03:02:33,122][472025] Saving new best policy, reward=1857.820! +[2026-06-07 03:02:33,514][472559] Updated weights for policy 0, policy_version 48762 (0.0008) +[2026-06-07 03:02:33,642][472559] Updated weights for policy 0, policy_version 48774 (0.0008) +[2026-06-07 03:02:33,764][472559] Updated weights for policy 0, policy_version 48786 (0.0008) +[2026-06-07 03:02:33,917][472559] Updated weights for policy 0, policy_version 48800 (0.0009) +[2026-06-07 03:02:34,033][472559] Updated weights for policy 0, policy_version 48811 (0.0008) +[2026-06-07 03:02:34,176][472559] Updated weights for policy 0, policy_version 48824 (0.0008) +[2026-06-07 03:02:34,765][472559] Updated weights for policy 0, policy_version 48836 (0.0008) +[2026-06-07 03:02:34,803][472025] Early stopping after 2 epochs (16 sgd steps), loss delta 0.0000000 +[2026-06-07 03:02:34,804][472025] Saving results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs1_seed11/checkpoint_p0/checkpoint_000048840_25034752.pth... +[2026-06-07 03:02:34,805][472561] Stopping RolloutWorker_w1... +[2026-06-07 03:02:34,805][472561] Loop rollout_proc1_evt_loop terminating... +[2026-06-07 03:02:34,805][464927] Component RolloutWorker_w1 stopped! +[2026-06-07 03:02:34,806][472025] Stopping Batcher_0... +[2026-06-07 03:02:34,806][464927] Component Batcher_0 stopped! +[2026-06-07 03:02:34,807][472025] Loop batcher_evt_loop terminating... +[2026-06-07 03:02:34,807][464927] Component RolloutWorker_w0 stopped! +[2026-06-07 03:02:34,807][472560] Stopping RolloutWorker_w0... +[2026-06-07 03:02:34,808][472560] Loop rollout_proc0_evt_loop terminating... +[2026-06-07 03:02:34,824][472025] Saving results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs1_seed11/checkpoint_p0/checkpoint_000048840_25034752.pth... +[2026-06-07 03:02:34,845][472025] Stopping LearnerWorker_p0... +[2026-06-07 03:02:34,845][472025] Loop learner_proc0_evt_loop terminating... +[2026-06-07 03:02:34,845][464927] Component LearnerWorker_p0 stopped! +[2026-06-07 03:02:34,854][472559] Weights refcount: 2 0 +[2026-06-07 03:02:34,855][472559] Stopping InferenceWorker_p0-w0... +[2026-06-07 03:02:34,855][472559] Loop inference_proc0-0_evt_loop terminating... +[2026-06-07 03:02:34,855][464927] Component InferenceWorker_p0-w0 stopped! +[2026-06-07 03:02:34,856][464927] Waiting for process learner_proc0 to stop... +[2026-06-07 03:02:35,951][464927] Waiting for process inference_proc0-0 to join... +[2026-06-07 03:02:35,952][464927] Waiting for process rollout_proc0 to join... +[2026-06-07 03:02:35,953][464927] Waiting for process rollout_proc1 to join... +[2026-06-07 03:02:35,953][464927] Batcher 0 profile tree view: +batching: 0.8595, releasing_batches: 0.0355 +[2026-06-07 03:02:35,954][464927] InferenceWorker_p0-w0 profile tree view: wait_policy: 0.0000 - wait_policy_total: 857.7459 -update_model: 39.6057 + wait_policy_total: 492.3487 +update_model: 41.2255 weight_update: 0.0008 -one_step: 0.0013 - handle_policy_step: 520.6647 - deserialize: 12.4711, stack: 0.3927, obs_to_device_normalize: 51.2568, forward: 134.5967, prepare_outputs: 295.8082, send_messages: 9.0461 -[2026-06-07 02:34:33,877][321787] Learner 0 profile tree view: -misc: 0.0040, prepare_batch: 83.5327 -train: 1015.1894 - epoch_init: 0.0506, minibatch_init: 2.7521, losses_postprocess: 233.1378, kl_divergence: 22.1585, after_optimizer: 550.9691 - calculate_losses: 50.3438 - losses_init: 0.0875, forward_head: 13.7333, bptt_initial: 0.4962, bptt: 0.4892, tail: 11.5138, advantages_returns: 3.7937, losses: 16.2712 - update: 151.7123 - clip: 21.9761 -[2026-06-07 02:34:33,878][321787] RolloutWorker_w0 profile tree view: -wait_for_trajectories: 0.0441, enqueue_policy_requests: 14.8260, wait_policy_outputs_event: 4.0288, process_policy_outputs: 13.3614, env_step: 1001.3818, finalize_trajectories: 0.1630, complete_rollouts: 0.0986 -post_env_step: 22.6488 - process_env_step: 6.2121 -[2026-06-07 02:34:33,878][321787] RolloutWorker_w1 profile tree view: -wait_for_trajectories: 0.0416, enqueue_policy_requests: 14.6100, wait_policy_outputs_event: 3.6698, process_policy_outputs: 12.4170, env_step: 1064.5981, finalize_trajectories: 0.1555, complete_rollouts: 0.0950 -post_env_step: 22.5729 - process_env_step: 6.2076 -[2026-06-07 02:34:33,880][321787] Loop Runner_EvtLoop terminating... -[2026-06-07 02:34:33,880][321787] Runner profile tree view: -main_loop: 1448.3802 -[2026-06-07 02:34:33,881][321787] Collected {0: 25034752}, FPS: 17284.7 +one_step: 0.0016 + handle_policy_step: 336.6886 + deserialize: 4.4723, stack: 0.3235, obs_to_device_normalize: 48.6279, forward: 127.1322, prepare_outputs: 132.5428, send_messages: 9.2429 +[2026-06-07 03:02:35,954][464927] Learner 0 profile tree view: +misc: 0.0046, prepare_batch: 25.8785 +train: 550.2423 + epoch_init: 0.0580, minibatch_init: 2.5765, losses_postprocess: 150.8735, kl_divergence: 22.5554, after_optimizer: 196.3834 + calculate_losses: 40.0390 + losses_init: 0.0797, forward_head: 12.4902, bptt_initial: 0.4070, bptt: 0.4298, tail: 9.1865, advantages_returns: 2.9951, losses: 11.3037 + update: 134.6389 + clip: 13.3527 +[2026-06-07 03:02:35,955][464927] RolloutWorker_w0 profile tree view: +wait_for_trajectories: 0.0328, enqueue_policy_requests: 115.8669, process_policy_outputs: 7.3896, env_step: 530.0606, finalize_trajectories: 0.1032, complete_rollouts: 0.0765 +post_env_step: 16.0570 + process_env_step: 4.6373 +[2026-06-07 03:02:35,955][464927] RolloutWorker_w1 profile tree view: +wait_for_trajectories: 0.0336, enqueue_policy_requests: 114.4408, process_policy_outputs: 7.2068, env_step: 527.0913, finalize_trajectories: 0.1020, complete_rollouts: 0.0794 +post_env_step: 15.7131 + process_env_step: 4.5256 +[2026-06-07 03:02:35,956][464927] Loop Runner_EvtLoop terminating... +[2026-06-07 03:02:35,957][464927] Runner profile tree view: +main_loop: 898.2615 +[2026-06-07 03:02:35,957][464927] Collected {0: 25034752}, FPS: 27870.2