diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs1:obs30:stride1:seed13/checkpoint_p0/best_000048504_24838144_reward_1932.382.pth b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs1:obs30:stride1:seed13/checkpoint_p0/best_000048504_24838144_reward_1932.382.pth new file mode 100644 index 0000000000000000000000000000000000000000..6550336ab617ecde833a48136d0cb247433ccded --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs1:obs30:stride1:seed13/checkpoint_p0/best_000048504_24838144_reward_1932.382.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9fd257b124d7f69f6f5106e89f9bb9ad101dd916d8cb992f35c3a43c03808d0 +size 20560697 diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs1:obs30:stride1:seed13/checkpoint_p0/checkpoint_000030200_15466496.pth b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs1:obs30:stride1:seed13/checkpoint_p0/checkpoint_000030200_15466496.pth new file mode 100644 index 0000000000000000000000000000000000000000..f35e84740e93e44ea3823dde08695226e4ac9b93 --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs1:obs30:stride1:seed13/checkpoint_p0/checkpoint_000030200_15466496.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cf0fa8bc1fd3b670b865981503196d404fb0b365f119c94bb773a8fcdf4059a +size 20561057 diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs1:obs30:stride1:seed13/checkpoint_p0/checkpoint_000048840_25034752.pth b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs1:obs30:stride1:seed13/checkpoint_p0/checkpoint_000048840_25034752.pth index acba8e3cb5fa7e8462b7da4910298c5d94a08070..a3a9ddb0f5d5f41fcef3cd9f66b4607c8247180b 100644 --- a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs1:obs30:stride1:seed13/checkpoint_p0/checkpoint_000048840_25034752.pth +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs1:obs30:stride1:seed13/checkpoint_p0/checkpoint_000048840_25034752.pth @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8b8829700ad8905097657dc053fb6eb78e2b2d8e293f53895e660f8e35f18a4d +oid sha256:ba3b0390aff39030d376fcd947b113ec46ef1b6996ad5cbe566eca4ad5f34afd size 20561057 diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs1:obs30:stride1:seed13/episode_metrics.jsonl b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs1:obs30:stride1:seed13/episode_metrics.jsonl index b1167513d76ce5536e4f9aa96a3be96e3c79beab..9ebc6c0622e0549084f5d3753822de2d9699831f 100644 --- a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs1:obs30:stride1:seed13/episode_metrics.jsonl +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs1:obs30:stride1:seed13/episode_metrics.jsonl @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:113038c531917fafee0bc3d94523e8d25f40e4719d9deb460b6dd6c3359952ba -size 26868657 +oid sha256:109bb59dcf11c9167296765777980172294b72471e37b45d0881debc0986aaad +size 20648110 diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs1:obs30:stride1:seed13/git.diff b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs1:obs30:stride1:seed13/git.diff index d0a0248311155ef2db8d42aec19fc1abcda13cd5..466fb8b9b61c2e47b54ca5d7f5f930e28515b107 100644 --- a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs1:obs30:stride1:seed13/git.diff +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs1:obs30:stride1:seed13/git.diff @@ -24,3 +24,10 @@ index 18376d9..646fe8f 100644 return "train" +diff --git a/starVLA b/starVLA +index ab3380d..9d8c567 160000 +--- a/starVLA ++++ b/starVLA +@@ -1 +1 @@ +-Subproject commit ab3380dfbd1de9649c15d154cc41b97788674537 ++Subproject commit 9d8c567188a3aa2a825296016cf17f3977101d8f diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs1:obs30:stride1:seed13/sf_log.txt b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs1:obs30:stride1:seed13/sf_log.txt index aa7b7fb43912a1c56d16bb349e30c04c09c0b4f9..00816b1d8da72efae388d276d212b00d281aaa49 100644 --- a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs1:obs30:stride1:seed13/sf_log.txt +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs1:obs30:stride1:seed13/sf_log.txt @@ -1,30 +1,30 @@ -[2026-06-06 15:37:33,651][29404] Saving configuration to results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs1_seed13/config.json... -[2026-06-06 15:37:33,768][29404] Using GPUs [0] for process 0 (actually maps to GPUs [3]) -[2026-06-06 15:37:33,770][29404] Rollout worker 0 uses device cuda:0 -[2026-06-06 15:37:33,770][29404] Using GPUs [0] for process 1 (actually maps to GPUs [3]) -[2026-06-06 15:37:33,771][29404] Rollout worker 1 uses device cuda:0 -[2026-06-06 15:37:35,123][29404] Using GPUs [0] for process 0 (actually maps to GPUs [3]) -[2026-06-06 15:37:35,124][29404] InferenceWorker_p0-w0: min num requests: 1 -[2026-06-06 15:37:35,131][29404] Using GPUs [0] for process 0 (actually maps to GPUs [3]) -[2026-06-06 15:37:35,138][29404] Using GPUs [0] for process 1 (actually maps to GPUs [3]) -[2026-06-06 15:37:35,139][29404] Starting all processes... -[2026-06-06 15:37:35,140][29404] Starting process learner_proc0 -[2026-06-06 15:37:38,127][29404] Starting all processes... -[2026-06-06 15:37:38,137][29404] Starting process inference_proc0-0 -[2026-06-06 15:37:38,138][29404] Starting process rollout_proc0 -[2026-06-06 15:37:38,138][29404] Starting process rollout_proc1 -[2026-06-06 15:37:38,670][31861] Using GPUs [0] for process 0 (actually maps to GPUs [3]) -[2026-06-06 15:37:38,670][31861] Set environment var CUDA_VISIBLE_DEVICES to '3' (GPU indices [0]) for learning process 0 -[2026-06-06 15:37:38,670][31861] Num visible devices: 1 -[2026-06-06 15:37:38,672][31861] Setting fixed seed 13 -[2026-06-06 15:37:38,674][31861] Using GPUs [0] for process 0 (actually maps to GPUs [3]) -[2026-06-06 15:37:38,675][31861] Initializing actor-critic model on device cuda:0 -[2026-06-06 15:37:38,675][31861] RunningMeanStd input shape: (3, 84, 84) -[2026-06-06 15:37:38,687][31861] RunningMeanStd input shape: (1,) -[2026-06-06 15:37:38,709][31861] ConvEncoder: input_channels=3 -[2026-06-06 15:37:38,898][31861] Conv encoder output size: 512 -[2026-06-06 15:37:38,901][31861] Created Actor Critic model with architecture: -[2026-06-06 15:37:38,901][31861] ActorCriticSharedWeights( +[2026-06-07 02:47:36,011][464932] Saving configuration to results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs1_seed13/config.json... +[2026-06-07 02:47:36,077][464932] Using GPUs [0] for process 0 (actually maps to GPUs [3]) +[2026-06-07 02:47:36,077][464932] Rollout worker 0 uses device cuda:0 +[2026-06-07 02:47:36,078][464932] Using GPUs [0] for process 1 (actually maps to GPUs [3]) +[2026-06-07 02:47:36,078][464932] Rollout worker 1 uses device cuda:0 +[2026-06-07 02:47:37,904][464932] Using GPUs [0] for process 0 (actually maps to GPUs [3]) +[2026-06-07 02:47:37,904][464932] InferenceWorker_p0-w0: min num requests: 1 +[2026-06-07 02:47:37,909][464932] Using GPUs [0] for process 0 (actually maps to GPUs [3]) +[2026-06-07 02:47:37,914][464932] Using GPUs [0] for process 1 (actually maps to GPUs [3]) +[2026-06-07 02:47:37,915][464932] Starting all processes... +[2026-06-07 02:47:37,915][464932] Starting process learner_proc0 +[2026-06-07 02:47:39,018][464932] Starting all processes... +[2026-06-07 02:47:39,022][464932] Starting process inference_proc0-0 +[2026-06-07 02:47:39,022][464932] Starting process rollout_proc0 +[2026-06-07 02:47:39,022][464932] Starting process rollout_proc1 +[2026-06-07 02:47:40,073][472028] Using GPUs [0] for process 0 (actually maps to GPUs [3]) +[2026-06-07 02:47:40,074][472028] Set environment var CUDA_VISIBLE_DEVICES to '3' (GPU indices [0]) for learning process 0 +[2026-06-07 02:47:40,074][472028] Num visible devices: 1 +[2026-06-07 02:47:40,074][472028] Setting fixed seed 13 +[2026-06-07 02:47:40,075][472028] Using GPUs [0] for process 0 (actually maps to GPUs [3]) +[2026-06-07 02:47:40,075][472028] Initializing actor-critic model on device cuda:0 +[2026-06-07 02:47:40,076][472028] RunningMeanStd input shape: (3, 84, 84) +[2026-06-07 02:47:40,083][472028] RunningMeanStd input shape: (1,) +[2026-06-07 02:47:40,090][472028] ConvEncoder: input_channels=3 +[2026-06-07 02:47:40,152][472028] Conv encoder output size: 512 +[2026-06-07 02:47:40,154][472028] Created Actor Critic model with architecture: +[2026-06-07 02:47:40,154][472028] ActorCriticSharedWeights( (obs_normalizer): ObservationNormalizer( (running_mean_std): RunningMeanStdDictInPlace( (running_mean_std): ModuleDict( @@ -65,6305 +65,4955 @@ (distribution_linear): Linear(in_features=512, out_features=2, bias=True) ) ) -[2026-06-06 15:37:38,917][31861] Using optimizer -[2026-06-06 15:37:40,721][31861] No checkpoints found -[2026-06-06 15:37:40,722][31861] Did not load from checkpoint, starting from scratch! -[2026-06-06 15:37:40,722][31861] Initialized policy 0 weights for model version 0 -[2026-06-06 15:37:40,725][31861] LearnerWorker_p0 finished initialization! -[2026-06-06 15:37:40,726][31861] Using GPUs [0] for process 0 (actually maps to GPUs [3]) -[2026-06-06 15:37:42,083][29404] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) -[2026-06-06 15:37:44,043][32149] Worker 0 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127] -[2026-06-06 15:37:44,044][32149] Using GPUs [0] for process 0 (actually maps to GPUs [3]) -[2026-06-06 15:37:44,044][32149] Set environment var CUDA_VISIBLE_DEVICES to '3' (GPU indices [0]) for actor process 0 -[2026-06-06 15:37:44,044][32149] Num visible devices: 1 -[2026-06-06 15:37:44,129][32147] Using GPUs [0] for process 0 (actually maps to GPUs [3]) -[2026-06-06 15:37:44,130][32147] Set environment var CUDA_VISIBLE_DEVICES to '3' (GPU indices [0]) for inference process 0 -[2026-06-06 15:37:44,130][32147] Num visible devices: 1 -[2026-06-06 15:37:44,158][32147] RunningMeanStd input shape: (3, 84, 84) -[2026-06-06 15:37:44,167][32147] RunningMeanStd input shape: (1,) -[2026-06-06 15:37:44,189][32147] ConvEncoder: input_channels=3 -[2026-06-06 15:37:44,297][32148] Worker 1 uses CPU cores [128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255] -[2026-06-06 15:37:44,298][32148] Using GPUs [0] for process 1 (actually maps to GPUs [3]) -[2026-06-06 15:37:44,298][32148] Set environment var CUDA_VISIBLE_DEVICES to '3' (GPU indices [0]) for actor process 1 -[2026-06-06 15:37:44,299][32148] Num visible devices: 1 -[2026-06-06 15:37:44,337][32147] Conv encoder output size: 512 -[2026-06-06 15:37:44,362][29404] Inference worker 0-0 is ready! -[2026-06-06 15:37:44,363][29404] All inference workers are ready! Signal rollout workers to start! -[2026-06-06 15:37:44,365][32148] EnvRunner 1-0 uses policy 0 -[2026-06-06 15:37:44,365][32149] EnvRunner 0-0 uses policy 0 -[2026-06-06 15:37:47,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) -[2026-06-06 15:37:50,570][31861] Signal inference workers to stop experience collection... -[2026-06-06 15:37:50,588][32147] InferenceWorker_p0-w0: stopping experience collection -[2026-06-06 15:37:52,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 6720.1. Samples: 67200. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) -[2026-06-06 15:37:52,084][29404] Avg episode reward: [(0, '-6.905')] -[2026-06-06 15:37:52,376][31861] Signal inference workers to resume experience collection... -[2026-06-06 15:37:52,378][32147] InferenceWorker_p0-w0: resuming experience collection -[2026-06-06 15:37:52,682][31861] EvtLoop [learner_proc0_evt_loop, process=learner_proc0] unhandled exception in slot='on_new_training_batch' connected to emitter=Emitter(object_id='Batcher_0', signal_name='training_batches_available'), args=(1,) -Traceback (most recent call last): - File "/venv/latency/lib/python3.10/site-packages/signal_slot/signal_slot.py", line 355, in _process_signal - slot_callable(*args) - File "/workspace/latency-sensitive-bench/sample-factory/sample_factory/algo/learning/learner_worker.py", line 150, in on_new_training_batch - stats = self.learner.train(self.batcher.training_batches[batch_idx]) - File "/workspace/latency-sensitive-bench/sample-factory/sample_factory/algo/learning/learner.py", line 1036, in train - buff, experience_size, num_invalids = self._prepare_batch(batch) - File "/workspace/latency-sensitive-bench/sample-factory/sample_factory/algo/learning/learner.py", line 1006, in _prepare_batch - d[k] = v.reshape((dataset_size,) + tuple(v.shape[2:])) -torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 2.58 GiB. GPU 0 has a total capacity of 23.52 GiB of which 2.37 GiB is free. Process 1844093 has 4.12 GiB memory in use. Process 1844387 has 4.12 GiB memory in use. Process 1846244 has 672.00 MiB memory in use. Process 1846424 has 632.00 MiB memory in use. Process 2213744 has 1.74 GiB memory in use. Process 2218740 has 7.01 GiB memory in use. Process 2219479 has 542.00 MiB memory in use. Process 2219483 has 1.16 GiB memory in use. Process 2219481 has 1.16 GiB memory in use. Of the allocated memory 3.95 GiB is allocated by PyTorch, and 2.59 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) -[2026-06-06 15:37:52,684][31861] Unhandled exception CUDA out of memory. Tried to allocate 2.58 GiB. GPU 0 has a total capacity of 23.52 GiB of which 2.37 GiB is free. Process 1844093 has 4.12 GiB memory in use. Process 1844387 has 4.12 GiB memory in use. Process 1846244 has 672.00 MiB memory in use. Process 1846424 has 632.00 MiB memory in use. Process 2213744 has 1.74 GiB memory in use. Process 2218740 has 7.01 GiB memory in use. Process 2219479 has 542.00 MiB memory in use. Process 2219483 has 1.16 GiB memory in use. Process 2219481 has 1.16 GiB memory in use. Of the allocated memory 3.95 GiB is allocated by PyTorch, and 2.59 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) in evt loop learner_proc0_evt_loop -[2026-06-06 15:37:55,109][29404] Heartbeat connected on Batcher_0 -[2026-06-06 15:37:55,124][29404] Heartbeat connected on InferenceWorker_p0-w0 -[2026-06-06 15:37:55,132][29404] Heartbeat connected on RolloutWorker_w0 -[2026-06-06 15:37:55,139][29404] Heartbeat connected on RolloutWorker_w1 -[2026-06-06 15:37:57,083][29404] Fps is (10 sec: 3276.8, 60 sec: 2184.6, 300 sec: 2184.6). Total num frames: 32768. Throughput: 0: 6775.6. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:37:57,085][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:38:02,083][29404] Fps is (10 sec: 3276.8, 60 sec: 1638.4, 300 sec: 1638.4). Total num frames: 32768. Throughput: 0: 5081.6. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:38:02,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:38:07,083][29404] Fps is (10 sec: 0.0, 60 sec: 1310.7, 300 sec: 1310.7). Total num frames: 32768. Throughput: 0: 4065.3. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:38:07,085][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:38:12,083][29404] Fps is (10 sec: 0.0, 60 sec: 1092.3, 300 sec: 1092.3). Total num frames: 32768. Throughput: 0: 3387.7. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:38:12,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:38:17,085][29404] Fps is (10 sec: 0.0, 60 sec: 936.2, 300 sec: 936.2). Total num frames: 32768. Throughput: 0: 2903.8. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:38:17,086][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:38:22,083][29404] Fps is (10 sec: 0.0, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 32768. Throughput: 0: 2540.8. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:38:22,085][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:38:27,083][29404] Fps is (10 sec: 0.0, 60 sec: 728.2, 300 sec: 728.2). Total num frames: 32768. Throughput: 0: 2258.5. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:38:27,085][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:38:32,083][29404] Fps is (10 sec: 0.0, 60 sec: 655.4, 300 sec: 655.4). Total num frames: 32768. Throughput: 0: 2258.5. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:38:32,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:38:37,083][29404] Fps is (10 sec: 0.0, 60 sec: 595.8, 300 sec: 595.8). Total num frames: 32768. Throughput: 0: 765.2. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:38:37,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:38:42,083][29404] Fps is (10 sec: 0.0, 60 sec: 546.1, 300 sec: 546.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:38:42,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:38:47,083][29404] Fps is (10 sec: 0.0, 60 sec: 546.1, 300 sec: 504.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:38:47,085][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:38:52,083][29404] Fps is (10 sec: 0.0, 60 sec: 546.1, 300 sec: 468.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:38:52,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:38:57,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 436.9). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:38:57,085][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:39:02,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 409.6). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:39:02,085][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:39:07,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 385.5). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:39:07,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:39:12,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 364.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:39:12,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:39:17,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 344.9). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:39:17,085][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:39:22,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 327.7). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:39:22,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:39:27,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 312.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:39:27,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:39:32,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 297.9). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:39:32,085][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:39:37,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 284.9). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:39:37,085][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:39:42,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 273.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:39:42,085][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:39:47,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 262.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:39:47,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:39:52,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 252.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:39:52,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:39:57,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 242.7). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:39:57,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:40:02,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 234.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:40:02,085][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:40:07,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 226.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:40:07,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:40:12,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 218.5). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:40:12,085][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:40:17,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 211.4). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:40:17,085][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:40:22,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 204.8). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:40:22,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:40:27,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 198.6). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:40:27,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:40:27,091][29404] Components not started: LearnerWorker_p0, wait_time=184.1 seconds -[2026-06-06 15:40:32,084][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 192.8). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:40:32,086][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:40:37,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 187.2). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:40:37,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:40:42,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 182.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:40:42,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:40:47,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 177.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:40:47,086][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:40:52,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 172.5). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:40:52,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:40:57,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 168.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:40:57,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:41:02,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 163.8). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:41:02,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:41:07,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 159.8). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:41:07,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:41:12,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 156.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:41:12,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:41:17,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 152.4). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:41:17,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:41:22,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 148.9). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:41:22,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:41:27,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 145.6). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:41:27,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:41:32,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 142.5). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:41:32,085][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:41:37,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 139.4). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:41:37,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:41:42,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 136.5). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:41:42,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:41:47,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 133.7). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:41:47,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:41:52,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 131.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:41:52,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:41:57,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 128.5). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:41:57,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:42:02,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 126.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:42:02,085][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:42:07,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 123.7). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:42:07,085][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:42:12,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 121.4). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:42:12,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:42:17,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 119.2). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:42:17,085][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:42:22,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 117.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:42:22,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:42:27,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 115.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:42:27,085][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:42:32,084][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 113.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:42:32,086][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:42:37,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 111.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:42:37,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:42:42,084][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 111.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:42:42,086][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:42:47,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 111.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:42:47,085][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:42:52,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:42:52,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:42:57,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:42:57,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:43:02,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:43:02,085][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:43:07,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:43:07,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:43:12,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:43:12,085][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:43:17,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:43:17,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:43:22,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:43:22,085][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:43:27,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:43:27,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:43:27,093][29404] Components not started: LearnerWorker_p0, wait_time=364.1 seconds -[2026-06-06 15:43:32,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:43:32,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:43:37,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:43:37,085][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:43:42,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:43:42,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:43:47,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:43:47,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:43:52,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:43:52,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:43:57,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:43:57,085][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:44:02,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:44:02,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:44:07,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:44:07,085][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:44:12,084][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:44:12,086][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:44:17,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:44:17,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:44:22,084][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:44:22,085][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:44:27,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:44:27,086][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:44:32,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:44:32,085][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:44:37,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:44:37,085][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:44:42,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:44:42,085][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:44:47,084][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:44:47,086][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:44:52,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:44:52,085][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:44:57,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:44:57,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:45:02,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:45:02,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:45:07,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:45:07,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:45:12,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:45:12,085][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:45:17,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:45:17,085][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:45:22,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:45:22,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:45:27,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:45:27,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:45:32,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:45:32,085][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:45:37,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:45:37,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:45:42,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:45:42,085][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:45:47,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:45:47,085][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:45:52,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:45:52,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:45:57,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:45:57,085][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:46:02,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:46:02,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:46:07,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:46:07,085][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:46:12,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:46:12,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:46:17,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:46:17,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:46:22,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:46:22,086][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:46:27,083][29404] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 101632. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-06 15:46:27,084][29404] Avg episode reward: [(0, '-6.652')] -[2026-06-06 15:46:27,091][29404] Components not started: LearnerWorker_p0, wait_time=544.1 seconds -[2026-06-06 15:46:27,092][29404] Components take too long to start: LearnerWorker_p0. Aborting the experiment! - - - -[2026-06-06 15:46:27,094][31861] Stopping Batcher_0... -[2026-06-06 15:46:27,094][29404] Component Batcher_0 stopped! -[2026-06-06 15:46:27,095][31861] Loop batcher_evt_loop terminating... -[2026-06-06 15:46:27,095][29404] Waiting for ['LearnerWorker_p0', 'InferenceWorker_p0-w0', 'RolloutWorker_w0', 'RolloutWorker_w1'] to stop... -[2026-06-06 15:46:27,095][29404] Component RolloutWorker_w0 stopped! -[2026-06-06 15:46:27,096][29404] Waiting for ['LearnerWorker_p0', 'InferenceWorker_p0-w0', 'RolloutWorker_w1'] to stop... -[2026-06-06 15:46:27,095][32148] Stopping RolloutWorker_w1... -[2026-06-06 15:46:27,096][29404] Component RolloutWorker_w1 stopped! -[2026-06-06 15:46:27,095][32149] Stopping RolloutWorker_w0... -[2026-06-06 15:46:27,097][32148] Loop rollout_proc1_evt_loop terminating... -[2026-06-06 15:46:27,097][29404] Waiting for ['LearnerWorker_p0', 'InferenceWorker_p0-w0'] to stop... -[2026-06-06 15:46:27,097][32149] Loop rollout_proc0_evt_loop terminating... -[2026-06-06 15:46:27,171][32147] Weights refcount: 2 0 -[2026-06-06 15:46:27,174][32147] Stopping InferenceWorker_p0-w0... -[2026-06-06 15:46:27,175][32147] Loop inference_proc0-0_evt_loop terminating... -[2026-06-06 15:46:27,174][29404] Component InferenceWorker_p0-w0 stopped! -[2026-06-06 15:46:27,176][29404] Waiting for ['LearnerWorker_p0'] to stop... -[2026-06-07 01:46:39,401][309677] Saving configuration to results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs1_seed13/config.json... -[2026-06-07 01:46:39,466][309677] Using GPUs [0] for process 0 (actually maps to GPUs [1]) -[2026-06-07 01:46:39,467][309677] Rollout worker 0 uses device cuda:0 -[2026-06-07 01:46:39,468][309677] Using GPUs [0] for process 1 (actually maps to GPUs [1]) -[2026-06-07 01:46:39,468][309677] Rollout worker 1 uses device cuda:0 -[2026-06-07 01:46:41,299][309677] Using GPUs [0] for process 0 (actually maps to GPUs [1]) -[2026-06-07 01:46:41,301][309677] InferenceWorker_p0-w0: min num requests: 1 -[2026-06-07 01:46:41,308][309677] Using GPUs [0] for process 0 (actually maps to GPUs [1]) -[2026-06-07 01:46:41,313][309677] Using GPUs [0] for process 1 (actually maps to GPUs [1]) -[2026-06-07 01:46:41,316][309677] Starting all processes... -[2026-06-07 01:46:41,316][309677] Starting process learner_proc0 -[2026-06-07 01:46:42,789][309677] Starting all processes... -[2026-06-07 01:46:42,794][309677] Starting process inference_proc0-0 -[2026-06-07 01:46:42,795][309677] Starting process rollout_proc0 -[2026-06-07 01:46:42,797][309677] Starting process rollout_proc1 -[2026-06-07 01:46:43,422][314624] Using GPUs [0] for process 0 (actually maps to GPUs [1]) -[2026-06-07 01:46:43,422][314624] Set environment var CUDA_VISIBLE_DEVICES to '1' (GPU indices [0]) for learning process 0 -[2026-06-07 01:46:43,423][314624] Num visible devices: 1 -[2026-06-07 01:46:43,424][314624] Setting fixed seed 13 -[2026-06-07 01:46:43,425][314624] Using GPUs [0] for process 0 (actually maps to GPUs [1]) -[2026-06-07 01:46:43,426][314624] Initializing actor-critic model on device cuda:0 -[2026-06-07 01:46:43,426][314624] RunningMeanStd input shape: (3, 84, 84) -[2026-06-07 01:46:43,436][314624] RunningMeanStd input shape: (1,) -[2026-06-07 01:46:43,451][314624] ConvEncoder: input_channels=3 -[2026-06-07 01:46:43,600][314624] Conv encoder output size: 512 -[2026-06-07 01:46:43,602][314624] Created Actor Critic model with architecture: -[2026-06-07 01:46:43,603][314624] ActorCriticSharedWeights( - (obs_normalizer): ObservationNormalizer( - (running_mean_std): RunningMeanStdDictInPlace( - (running_mean_std): ModuleDict( - (obs): RunningMeanStdInPlace() - ) - ) - ) - (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) - (encoder): MultiInputEncoder( - (encoders): ModuleDict( - (obs): ConvEncoder( - (enc): RecursiveScriptModule( - original_name=ConvEncoderImpl - (conv_head): RecursiveScriptModule( - original_name=Sequential - (0): RecursiveScriptModule(original_name=Conv2d) - (1): RecursiveScriptModule(original_name=ELU) - (2): RecursiveScriptModule(original_name=Conv2d) - (3): RecursiveScriptModule(original_name=ELU) - (4): RecursiveScriptModule(original_name=Conv2d) - (5): RecursiveScriptModule(original_name=ELU) - ) - (mlp_layers): RecursiveScriptModule( - original_name=Sequential - (0): RecursiveScriptModule(original_name=Linear) - (1): RecursiveScriptModule(original_name=ELU) - ) - ) - ) - ) - ) - (core): ModelCoreIdentity() - (decoder): MlpDecoder( - (mlp): Identity() - ) - (critic_linear): Linear(in_features=512, out_features=1, bias=True) - (action_parameterization): ActionParameterizationDefault( - (distribution_linear): Linear(in_features=512, out_features=2, bias=True) - ) -) -[2026-06-07 01:46:43,614][314624] Using optimizer -[2026-06-07 01:46:44,758][314624] No checkpoints found -[2026-06-07 01:46:44,758][314624] Did not load from checkpoint, starting from scratch! -[2026-06-07 01:46:44,759][314624] Initialized policy 0 weights for model version 0 -[2026-06-07 01:46:44,763][314624] LearnerWorker_p0 finished initialization! -[2026-06-07 01:46:44,764][314624] Using GPUs [0] for process 0 (actually maps to GPUs [1]) -[2026-06-07 01:46:45,083][309677] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) -[2026-06-07 01:46:45,382][315155] Worker 1 uses CPU cores [128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255] -[2026-06-07 01:46:45,382][315155] Using GPUs [0] for process 1 (actually maps to GPUs [1]) -[2026-06-07 01:46:45,383][315155] Set environment var CUDA_VISIBLE_DEVICES to '1' (GPU indices [0]) for actor process 1 -[2026-06-07 01:46:45,384][315155] Num visible devices: 1 -[2026-06-07 01:46:46,671][315154] Using GPUs [0] for process 0 (actually maps to GPUs [1]) -[2026-06-07 01:46:46,671][315154] Set environment var CUDA_VISIBLE_DEVICES to '1' (GPU indices [0]) for inference process 0 -[2026-06-07 01:46:46,672][315154] Num visible devices: 1 -[2026-06-07 01:46:46,691][315154] RunningMeanStd input shape: (3, 84, 84) -[2026-06-07 01:46:46,698][315154] RunningMeanStd input shape: (1,) -[2026-06-07 01:46:46,701][315156] Worker 0 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127] -[2026-06-07 01:46:46,702][315156] Using GPUs [0] for process 0 (actually maps to GPUs [1]) -[2026-06-07 01:46:46,702][315156] Set environment var CUDA_VISIBLE_DEVICES to '1' (GPU indices [0]) for actor process 0 -[2026-06-07 01:46:46,702][315156] Num visible devices: 1 -[2026-06-07 01:46:46,716][315154] ConvEncoder: input_channels=3 -[2026-06-07 01:46:46,804][315154] Conv encoder output size: 512 -[2026-06-07 01:46:46,847][309677] Inference worker 0-0 is ready! -[2026-06-07 01:46:46,848][309677] All inference workers are ready! Signal rollout workers to start! -[2026-06-07 01:46:46,849][315156] EnvRunner 0-0 uses policy 0 -[2026-06-07 01:46:46,849][315155] EnvRunner 1-0 uses policy 0 -[2026-06-07 01:46:50,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 6937.6. Samples: 34688. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) -[2026-06-07 01:46:50,084][309677] Avg episode reward: [(0, '-7.036')] -[2026-06-07 01:46:51,833][314624] Signal inference workers to stop experience collection... -[2026-06-07 01:46:51,852][315154] InferenceWorker_p0-w0: stopping experience collection -[2026-06-07 01:46:53,859][314624] Signal inference workers to resume experience collection... -[2026-06-07 01:46:53,859][315154] InferenceWorker_p0-w0: resuming experience collection -[2026-06-07 01:46:54,111][314624] EvtLoop [learner_proc0_evt_loop, process=learner_proc0] unhandled exception in slot='on_new_training_batch' connected to emitter=Emitter(object_id='Batcher_0', signal_name='training_batches_available'), args=(1,) -Traceback (most recent call last): - File "/venv/latency/lib/python3.10/site-packages/signal_slot/signal_slot.py", line 355, in _process_signal - slot_callable(*args) - File "/workspace/latency-sensitive-bench/sample-factory/sample_factory/algo/learning/learner_worker.py", line 150, in on_new_training_batch - stats = self.learner.train(self.batcher.training_batches[batch_idx]) - File "/workspace/latency-sensitive-bench/sample-factory/sample_factory/algo/learning/learner.py", line 1036, in train - buff, experience_size, num_invalids = self._prepare_batch(batch) - File "/workspace/latency-sensitive-bench/sample-factory/sample_factory/algo/learning/learner.py", line 1006, in _prepare_batch - d[k] = v.reshape((dataset_size,) + tuple(v.shape[2:])) -torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 2.58 GiB. GPU 0 has a total capacity of 23.52 GiB of which 315.69 MiB is free. Process 1048759 has 1.74 GiB memory in use. Process 1048756 has 1.74 GiB memory in use. Process 1056287 has 7.01 GiB memory in use. Process 1056348 has 6.98 GiB memory in use. Process 1057238 has 1.16 GiB memory in use. Process 1057236 has 548.00 MiB memory in use. Process 1057240 has 1.16 GiB memory in use. Process 1057311 has 526.00 MiB memory in use. Process 1057315 has 1.16 GiB memory in use. Process 1057313 has 1.16 GiB memory in use. Of the allocated memory 3.95 GiB is allocated by PyTorch, and 2.59 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) -[2026-06-07 01:46:54,112][314624] Unhandled exception CUDA out of memory. Tried to allocate 2.58 GiB. GPU 0 has a total capacity of 23.52 GiB of which 315.69 MiB is free. Process 1048759 has 1.74 GiB memory in use. Process 1048756 has 1.74 GiB memory in use. Process 1056287 has 7.01 GiB memory in use. Process 1056348 has 6.98 GiB memory in use. Process 1057238 has 1.16 GiB memory in use. Process 1057236 has 548.00 MiB memory in use. Process 1057240 has 1.16 GiB memory in use. Process 1057311 has 526.00 MiB memory in use. Process 1057315 has 1.16 GiB memory in use. Process 1057313 has 1.16 GiB memory in use. Of the allocated memory 3.95 GiB is allocated by PyTorch, and 2.59 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) in evt loop learner_proc0_evt_loop -[2026-06-07 01:46:55,083][309677] Fps is (10 sec: 3276.8, 60 sec: 3276.8, 300 sec: 3276.8). Total num frames: 32768. Throughput: 0: 6822.4. Samples: 68224. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:46:55,084][309677] Avg episode reward: [(0, '-6.689')] -[2026-06-07 01:47:00,082][309677] Fps is (10 sec: 3276.8, 60 sec: 2184.5, 300 sec: 2184.5). Total num frames: 32768. Throughput: 0: 6715.7. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:47:00,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:47:01,287][309677] Heartbeat connected on Batcher_0 -[2026-06-07 01:47:01,300][309677] Heartbeat connected on InferenceWorker_p0-w0 -[2026-06-07 01:47:01,308][309677] Heartbeat connected on RolloutWorker_w0 -[2026-06-07 01:47:01,314][309677] Heartbeat connected on RolloutWorker_w1 -[2026-06-07 01:47:05,082][309677] Fps is (10 sec: 0.0, 60 sec: 1638.4, 300 sec: 1638.4). Total num frames: 32768. Throughput: 0: 5036.8. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:47:05,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:47:10,083][309677] Fps is (10 sec: 0.0, 60 sec: 1310.7, 300 sec: 1310.7). Total num frames: 32768. Throughput: 0: 4029.4. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:47:10,084][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:47:15,082][309677] Fps is (10 sec: 0.0, 60 sec: 1092.3, 300 sec: 1092.3). Total num frames: 32768. Throughput: 0: 3357.9. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:47:15,084][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:47:20,082][309677] Fps is (10 sec: 0.0, 60 sec: 936.2, 300 sec: 936.2). Total num frames: 32768. Throughput: 0: 2878.2. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:47:20,084][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:47:25,082][309677] Fps is (10 sec: 0.0, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 32768. Throughput: 0: 2518.4. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:47:25,084][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:47:30,082][309677] Fps is (10 sec: 0.0, 60 sec: 728.2, 300 sec: 728.2). Total num frames: 32768. Throughput: 0: 2238.6. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:47:30,084][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:47:35,082][309677] Fps is (10 sec: 0.0, 60 sec: 655.4, 300 sec: 655.4). Total num frames: 32768. Throughput: 0: 1467.7. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:47:35,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:47:40,082][309677] Fps is (10 sec: 0.0, 60 sec: 595.8, 300 sec: 595.8). Total num frames: 32768. Throughput: 0: 722.5. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:47:40,084][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:47:45,082][309677] Fps is (10 sec: 0.0, 60 sec: 546.1, 300 sec: 546.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:47:45,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:47:50,082][309677] Fps is (10 sec: 0.0, 60 sec: 546.1, 300 sec: 504.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:47:50,084][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:47:55,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 468.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:47:55,084][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:48:00,083][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 436.9). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:48:00,084][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:48:05,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 409.6). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:48:05,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:48:10,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 385.5). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:48:10,084][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:48:15,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 364.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:48:15,084][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:48:20,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 344.9). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:48:20,084][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:48:25,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 327.7). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:48:25,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:48:30,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 312.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:48:30,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:48:35,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 297.9). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:48:35,084][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:48:40,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 284.9). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:48:40,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:48:45,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 273.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:48:45,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:48:50,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 262.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:48:50,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:48:55,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 252.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:48:55,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:49:00,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 242.7). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:49:00,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:49:05,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 234.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:49:05,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:49:10,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 226.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:49:10,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:49:15,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 218.5). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:49:15,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:49:20,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 211.4). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:49:20,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:49:25,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 204.8). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:49:25,084][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:49:30,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 198.6). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:49:30,084][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:49:35,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 192.8). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:49:35,084][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:49:35,091][309677] Components not started: LearnerWorker_p0, wait_time=182.9 seconds -[2026-06-07 01:49:40,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 187.2). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:49:40,084][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:49:45,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 182.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:49:45,084][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:49:50,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 177.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:49:50,084][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:49:55,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 172.5). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:49:55,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:50:00,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 168.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:50:00,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:50:05,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 163.8). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:50:05,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:50:10,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 159.8). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:50:10,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:50:15,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 156.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:50:15,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:50:20,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 152.4). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:50:20,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:50:25,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 148.9). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:50:25,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:50:30,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 145.6). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:50:30,084][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:50:35,083][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 142.5). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:50:35,084][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:50:40,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 139.4). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:50:40,084][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:50:45,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 136.5). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:50:45,084][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:50:50,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 133.7). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:50:50,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:50:55,084][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 131.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:50:55,087][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:51:00,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 128.5). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:51:00,084][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:51:05,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 126.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:51:05,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:51:10,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 123.7). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:51:10,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:51:15,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 121.4). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:51:15,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:51:20,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 119.2). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:51:20,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:51:25,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 117.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:51:25,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:51:30,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 115.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:51:30,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:51:35,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 113.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:51:35,084][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:51:40,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 111.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:51:40,084][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:51:45,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 111.1). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:51:45,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:51:50,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:51:50,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:51:55,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:51:55,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:52:00,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:52:00,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:52:05,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:52:05,084][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:52:10,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:52:10,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:52:15,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:52:15,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:52:20,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:52:20,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:52:25,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:52:25,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:52:30,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:52:30,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:52:35,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:52:35,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:52:35,086][309677] Components not started: LearnerWorker_p0, wait_time=362.9 seconds -[2026-06-07 01:52:40,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:52:40,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:52:45,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:52:45,084][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:52:50,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:52:50,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:52:55,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:52:55,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:53:00,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:53:00,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:53:05,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:53:05,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:53:10,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:53:10,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:53:15,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:53:15,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:53:20,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:53:20,084][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:53:25,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:53:25,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:53:30,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:53:30,084][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:53:35,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:53:35,084][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:53:40,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:53:40,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:53:45,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:53:45,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:53:50,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:53:50,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:53:55,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:53:55,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:54:00,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:54:00,084][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:54:05,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:54:05,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:54:10,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:54:10,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:54:15,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:54:15,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:54:20,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:54:20,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:54:25,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:54:25,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:54:30,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:54:30,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:54:35,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:54:35,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:54:40,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:54:40,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:54:45,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:54:45,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:54:50,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:54:50,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:54:55,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:54:55,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:55:00,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:55:00,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:55:05,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:55:05,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:55:10,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:55:10,084][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:55:15,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:55:15,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:55:20,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:55:20,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:55:25,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:55:25,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:55:30,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:55:30,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:55:35,082][309677] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 32768. Throughput: 0: 0.0. Samples: 100736. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 01:55:35,083][309677] Avg episode reward: [(0, '-6.952')] -[2026-06-07 01:55:35,090][309677] Components not started: LearnerWorker_p0, wait_time=542.9 seconds -[2026-06-07 01:55:35,091][309677] Components take too long to start: LearnerWorker_p0. Aborting the experiment! - - - -[2026-06-07 01:55:35,093][314624] Stopping Batcher_0... -[2026-06-07 01:55:35,093][314624] Loop batcher_evt_loop terminating... -[2026-06-07 01:55:35,093][309677] Component Batcher_0 stopped! -[2026-06-07 01:55:35,093][309677] Waiting for ['LearnerWorker_p0', 'InferenceWorker_p0-w0', 'RolloutWorker_w0', 'RolloutWorker_w1'] to stop... -[2026-06-07 01:55:35,093][315156] Stopping RolloutWorker_w0... -[2026-06-07 01:55:35,093][315155] Stopping RolloutWorker_w1... -[2026-06-07 01:55:35,094][315156] Loop rollout_proc0_evt_loop terminating... -[2026-06-07 01:55:35,095][315155] Loop rollout_proc1_evt_loop terminating... -[2026-06-07 01:55:35,094][309677] Component RolloutWorker_w0 stopped! -[2026-06-07 01:55:35,095][309677] Waiting for ['LearnerWorker_p0', 'InferenceWorker_p0-w0', 'RolloutWorker_w1'] to stop... -[2026-06-07 01:55:35,095][309677] Component RolloutWorker_w1 stopped! -[2026-06-07 01:55:35,096][309677] Waiting for ['LearnerWorker_p0', 'InferenceWorker_p0-w0'] to stop... -[2026-06-07 01:55:35,210][315154] Weights refcount: 2 0 -[2026-06-07 01:55:35,212][315154] Stopping InferenceWorker_p0-w0... -[2026-06-07 01:55:35,213][315154] Loop inference_proc0-0_evt_loop terminating... -[2026-06-07 01:55:35,213][309677] Component InferenceWorker_p0-w0 stopped! -[2026-06-07 01:55:35,213][309677] Waiting for ['LearnerWorker_p0'] to stop... -[2026-06-07 02:10:24,579][321791] Saving configuration to results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs1_seed13/config.json... -[2026-06-07 02:10:24,645][321791] Using GPUs [0] for process 0 (actually maps to GPUs [3]) -[2026-06-07 02:10:24,646][321791] Rollout worker 0 uses device cuda:0 -[2026-06-07 02:10:24,646][321791] Using GPUs [0] for process 1 (actually maps to GPUs [3]) -[2026-06-07 02:10:24,647][321791] Rollout worker 1 uses device cuda:0 -[2026-06-07 02:10:25,662][321791] Using GPUs [0] for process 0 (actually maps to GPUs [3]) -[2026-06-07 02:10:25,663][321791] InferenceWorker_p0-w0: min num requests: 1 -[2026-06-07 02:10:25,669][321791] Using GPUs [0] for process 0 (actually maps to GPUs [3]) -[2026-06-07 02:10:25,674][321791] Using GPUs [0] for process 1 (actually maps to GPUs [3]) -[2026-06-07 02:10:25,675][321791] Starting all processes... -[2026-06-07 02:10:25,675][321791] Starting process learner_proc0 -[2026-06-07 02:10:27,155][321791] Starting all processes... -[2026-06-07 02:10:27,160][321791] Starting process inference_proc0-0 -[2026-06-07 02:10:27,160][321791] Starting process rollout_proc0 -[2026-06-07 02:10:27,161][321791] Starting process rollout_proc1 -[2026-06-07 02:10:27,773][324276] Using GPUs [0] for process 0 (actually maps to GPUs [3]) -[2026-06-07 02:10:27,773][324276] Set environment var CUDA_VISIBLE_DEVICES to '3' (GPU indices [0]) for learning process 0 -[2026-06-07 02:10:27,773][324276] Num visible devices: 1 -[2026-06-07 02:10:27,774][324276] Setting fixed seed 13 -[2026-06-07 02:10:27,775][324276] Using GPUs [0] for process 0 (actually maps to GPUs [3]) -[2026-06-07 02:10:27,775][324276] Initializing actor-critic model on device cuda:0 -[2026-06-07 02:10:27,776][324276] RunningMeanStd input shape: (3, 84, 84) -[2026-06-07 02:10:27,781][324276] RunningMeanStd input shape: (1,) -[2026-06-07 02:10:27,791][324276] ConvEncoder: input_channels=3 -[2026-06-07 02:10:27,916][324276] Conv encoder output size: 512 -[2026-06-07 02:10:27,918][324276] Created Actor Critic model with architecture: -[2026-06-07 02:10:27,918][324276] ActorCriticSharedWeights( - (obs_normalizer): ObservationNormalizer( - (running_mean_std): RunningMeanStdDictInPlace( - (running_mean_std): ModuleDict( - (obs): RunningMeanStdInPlace() - ) - ) - ) - (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) - (encoder): MultiInputEncoder( - (encoders): ModuleDict( - (obs): ConvEncoder( - (enc): RecursiveScriptModule( - original_name=ConvEncoderImpl - (conv_head): RecursiveScriptModule( - original_name=Sequential - (0): RecursiveScriptModule(original_name=Conv2d) - (1): RecursiveScriptModule(original_name=ELU) - (2): RecursiveScriptModule(original_name=Conv2d) - (3): RecursiveScriptModule(original_name=ELU) - (4): RecursiveScriptModule(original_name=Conv2d) - (5): RecursiveScriptModule(original_name=ELU) - ) - (mlp_layers): RecursiveScriptModule( - original_name=Sequential - (0): RecursiveScriptModule(original_name=Linear) - (1): RecursiveScriptModule(original_name=ELU) - ) - ) - ) - ) - ) - (core): ModelCoreIdentity() - (decoder): MlpDecoder( - (mlp): Identity() - ) - (critic_linear): Linear(in_features=512, out_features=1, bias=True) - (action_parameterization): ActionParameterizationDefault( - (distribution_linear): Linear(in_features=512, out_features=2, bias=True) - ) -) -[2026-06-07 02:10:27,923][324276] Using optimizer -[2026-06-07 02:10:28,855][324276] No checkpoints found -[2026-06-07 02:10:28,856][324276] Did not load from checkpoint, starting from scratch! -[2026-06-07 02:10:28,856][324276] Initialized policy 0 weights for model version 0 -[2026-06-07 02:10:28,862][324276] LearnerWorker_p0 finished initialization! -[2026-06-07 02:10:28,862][324276] Using GPUs [0] for process 0 (actually maps to GPUs [3]) -[2026-06-07 02:10:30,382][324563] Using GPUs [0] for process 0 (actually maps to GPUs [3]) -[2026-06-07 02:10:30,382][324563] Set environment var CUDA_VISIBLE_DEVICES to '3' (GPU indices [0]) for inference process 0 -[2026-06-07 02:10:30,383][324563] Num visible devices: 1 -[2026-06-07 02:10:30,384][324563] RunningMeanStd input shape: (3, 84, 84) -[2026-06-07 02:10:30,387][324564] Worker 0 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127] -[2026-06-07 02:10:30,387][324564] Using GPUs [0] for process 0 (actually maps to GPUs [3]) -[2026-06-07 02:10:30,387][324564] Set environment var CUDA_VISIBLE_DEVICES to '3' (GPU indices [0]) for actor process 0 -[2026-06-07 02:10:30,388][324564] Num visible devices: 1 -[2026-06-07 02:10:30,391][324563] RunningMeanStd input shape: (1,) -[2026-06-07 02:10:30,400][324567] Worker 1 uses CPU cores [128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255] -[2026-06-07 02:10:30,400][324567] Using GPUs [0] for process 1 (actually maps to GPUs [3]) -[2026-06-07 02:10:30,401][324567] Set environment var CUDA_VISIBLE_DEVICES to '3' (GPU indices [0]) for actor process 1 -[2026-06-07 02:10:30,401][324567] Num visible devices: 1 -[2026-06-07 02:10:30,403][324563] ConvEncoder: input_channels=3 -[2026-06-07 02:10:30,488][324563] Conv encoder output size: 512 -[2026-06-07 02:10:30,493][321791] Inference worker 0-0 is ready! -[2026-06-07 02:10:30,493][321791] All inference workers are ready! Signal rollout workers to start! -[2026-06-07 02:10:30,494][324567] EnvRunner 1-0 uses policy 0 -[2026-06-07 02:10:30,494][324564] EnvRunner 0-0 uses policy 0 -[2026-06-07 02:10:30,744][321791] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) -[2026-06-07 02:10:33,237][324276] Signal inference workers to stop experience collection... -[2026-06-07 02:10:33,242][324563] InferenceWorker_p0-w0: stopping experience collection -[2026-06-07 02:10:34,493][324276] Signal inference workers to resume experience collection... -[2026-06-07 02:10:34,494][324563] InferenceWorker_p0-w0: resuming experience collection -[2026-06-07 02:10:34,856][324563] Updated weights for policy 0, policy_version 73 (0.0076) -[2026-06-07 02:10:35,058][324563] Updated weights for policy 0, policy_version 83 (0.0011) -[2026-06-07 02:10:35,253][324563] Updated weights for policy 0, policy_version 93 (0.0011) -[2026-06-07 02:10:35,465][324563] Updated weights for policy 0, policy_version 103 (0.0006) -[2026-06-07 02:10:35,691][324563] Updated weights for policy 0, policy_version 113 (0.0009) -[2026-06-07 02:10:35,744][321791] Fps is (10 sec: 6553.7, 60 sec: 6553.7, 300 sec: 6553.7). Total num frames: 32768. Throughput: 0: 14208.3. Samples: 71040. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) -[2026-06-07 02:10:35,745][321791] Avg episode reward: [(0, '-6.656')] -[2026-06-07 02:10:35,886][324563] Updated weights for policy 0, policy_version 123 (0.0009) -[2026-06-07 02:10:36,338][324563] Updated weights for policy 0, policy_version 133 (0.0010) -[2026-06-07 02:10:36,523][324563] Updated weights for policy 0, policy_version 143 (0.0010) -[2026-06-07 02:10:36,734][324563] Updated weights for policy 0, policy_version 153 (0.0008) -[2026-06-07 02:10:36,952][324563] Updated weights for policy 0, policy_version 163 (0.0010) -[2026-06-07 02:10:37,168][324563] Updated weights for policy 0, policy_version 173 (0.0007) -[2026-06-07 02:10:37,359][324563] Updated weights for policy 0, policy_version 183 (0.0009) -[2026-06-07 02:10:37,947][324563] Updated weights for policy 0, policy_version 193 (0.0007) -[2026-06-07 02:10:38,189][324563] Updated weights for policy 0, policy_version 203 (0.0006) -[2026-06-07 02:10:38,399][324563] Updated weights for policy 0, policy_version 213 (0.0008) -[2026-06-07 02:10:38,609][324563] Updated weights for policy 0, policy_version 224 (0.0008) -[2026-06-07 02:10:38,832][324563] Updated weights for policy 0, policy_version 234 (0.0007) -[2026-06-07 02:10:39,046][324563] Updated weights for policy 0, policy_version 244 (0.0008) -[2026-06-07 02:10:39,261][324563] Updated weights for policy 0, policy_version 255 (0.0009) -[2026-06-07 02:10:39,855][324563] Updated weights for policy 0, policy_version 271 (0.0008) -[2026-06-07 02:10:40,081][324563] Updated weights for policy 0, policy_version 281 (0.0006) -[2026-06-07 02:10:40,302][324563] Updated weights for policy 0, policy_version 291 (0.0006) -[2026-06-07 02:10:40,570][324563] Updated weights for policy 0, policy_version 304 (0.0007) -[2026-06-07 02:10:40,745][321791] Fps is (10 sec: 13106.5, 60 sec: 13106.5, 300 sec: 13106.5). Total num frames: 131072. Throughput: 0: 17035.9. Samples: 170368. Policy #0 lag: (min: 63.0, avg: 85.4, max: 127.0) -[2026-06-07 02:10:40,747][321791] Avg episode reward: [(0, '-5.255')] -[2026-06-07 02:10:40,776][324563] Updated weights for policy 0, policy_version 314 (0.0007) -[2026-06-07 02:10:40,907][324276] Saving new best policy, reward=-5.255! -[2026-06-07 02:10:41,272][324563] Updated weights for policy 0, policy_version 324 (0.0007) -[2026-06-07 02:10:41,489][324563] Updated weights for policy 0, policy_version 334 (0.0006) -[2026-06-07 02:10:41,670][324563] Updated weights for policy 0, policy_version 344 (0.0006) -[2026-06-07 02:10:41,869][324563] Updated weights for policy 0, policy_version 354 (0.0007) -[2026-06-07 02:10:42,091][324563] Updated weights for policy 0, policy_version 364 (0.0007) -[2026-06-07 02:10:42,293][324563] Updated weights for policy 0, policy_version 374 (0.0007) -[2026-06-07 02:10:42,489][324563] Updated weights for policy 0, policy_version 384 (0.0006) -[2026-06-07 02:10:43,020][324563] Updated weights for policy 0, policy_version 396 (0.0007) -[2026-06-07 02:10:43,230][324563] Updated weights for policy 0, policy_version 406 (0.0007) -[2026-06-07 02:10:43,458][324563] Updated weights for policy 0, policy_version 416 (0.0008) -[2026-06-07 02:10:43,651][324563] Updated weights for policy 0, policy_version 426 (0.0007) -[2026-06-07 02:10:43,849][324563] Updated weights for policy 0, policy_version 436 (0.0006) -[2026-06-07 02:10:44,076][324563] Updated weights for policy 0, policy_version 446 (0.0006) -[2026-06-07 02:10:44,570][324563] Updated weights for policy 0, policy_version 456 (0.0006) -[2026-06-07 02:10:44,764][324563] Updated weights for policy 0, policy_version 466 (0.0006) -[2026-06-07 02:10:44,957][324563] Updated weights for policy 0, policy_version 476 (0.0006) -[2026-06-07 02:10:45,175][324563] Updated weights for policy 0, policy_version 486 (0.0007) -[2026-06-07 02:10:45,389][324563] Updated weights for policy 0, policy_version 496 (0.0006) -[2026-06-07 02:10:45,579][324563] Updated weights for policy 0, policy_version 506 (0.0006) -[2026-06-07 02:10:45,650][321791] Heartbeat connected on Batcher_0 -[2026-06-07 02:10:45,674][321791] Heartbeat connected on RolloutWorker_w1 -[2026-06-07 02:10:45,678][321791] Heartbeat connected on InferenceWorker_p0-w0 -[2026-06-07 02:10:45,713][321791] Heartbeat connected on LearnerWorker_p0 -[2026-06-07 02:10:45,727][321791] Heartbeat connected on RolloutWorker_w0 -[2026-06-07 02:10:45,744][321791] Fps is (10 sec: 22936.8, 60 sec: 17476.0, 300 sec: 17476.0). Total num frames: 262144. Throughput: 0: 19097.3. Samples: 286464. Policy #0 lag: (min: 63.0, avg: 89.2, max: 127.0) -[2026-06-07 02:10:45,745][321791] Avg episode reward: [(0, '-2.789')] -[2026-06-07 02:10:45,756][324276] Saving new best policy, reward=-2.789! -[2026-06-07 02:10:46,132][324563] Updated weights for policy 0, policy_version 516 (0.0008) -[2026-06-07 02:10:46,342][324563] Updated weights for policy 0, policy_version 526 (0.0007) -[2026-06-07 02:10:46,545][324563] Updated weights for policy 0, policy_version 536 (0.0007) -[2026-06-07 02:10:46,741][324563] Updated weights for policy 0, policy_version 546 (0.0007) -[2026-06-07 02:10:46,941][324563] Updated weights for policy 0, policy_version 556 (0.0007) -[2026-06-07 02:10:47,168][324563] Updated weights for policy 0, policy_version 566 (0.0010) -[2026-06-07 02:10:47,372][324563] Updated weights for policy 0, policy_version 576 (0.0006) -[2026-06-07 02:10:47,845][324563] Updated weights for policy 0, policy_version 586 (0.0006) -[2026-06-07 02:10:48,065][324563] Updated weights for policy 0, policy_version 597 (0.0007) -[2026-06-07 02:10:48,283][324563] Updated weights for policy 0, policy_version 607 (0.0006) -[2026-06-07 02:10:48,499][324563] Updated weights for policy 0, policy_version 617 (0.0007) -[2026-06-07 02:10:48,723][324563] Updated weights for policy 0, policy_version 627 (0.0006) -[2026-06-07 02:10:48,915][324563] Updated weights for policy 0, policy_version 637 (0.0006) -[2026-06-07 02:10:49,534][324563] Updated weights for policy 0, policy_version 647 (0.0006) -[2026-06-07 02:10:49,747][324563] Updated weights for policy 0, policy_version 657 (0.0007) -[2026-06-07 02:10:49,999][324563] Updated weights for policy 0, policy_version 668 (0.0008) -[2026-06-07 02:10:50,199][324563] Updated weights for policy 0, policy_version 678 (0.0006) -[2026-06-07 02:10:50,412][324563] Updated weights for policy 0, policy_version 688 (0.0007) -[2026-06-07 02:10:50,638][324563] Updated weights for policy 0, policy_version 698 (0.0006) -[2026-06-07 02:10:50,744][321791] Fps is (10 sec: 22938.7, 60 sec: 18022.4, 300 sec: 18022.4). Total num frames: 360448. Throughput: 0: 17177.6. Samples: 343552. Policy #0 lag: (min: 43.0, avg: 84.6, max: 107.0) -[2026-06-07 02:10:50,745][321791] Avg episode reward: [(0, '3.089')] -[2026-06-07 02:10:50,758][324276] Saving new best policy, reward=3.089! -[2026-06-07 02:10:51,216][324563] Updated weights for policy 0, policy_version 708 (0.0007) -[2026-06-07 02:10:51,480][324563] Updated weights for policy 0, policy_version 721 (0.0006) -[2026-06-07 02:10:51,678][324563] Updated weights for policy 0, policy_version 731 (0.0007) -[2026-06-07 02:10:51,877][324563] Updated weights for policy 0, policy_version 741 (0.0007) -[2026-06-07 02:10:52,085][324563] Updated weights for policy 0, policy_version 751 (0.0007) -[2026-06-07 02:10:52,295][324563] Updated weights for policy 0, policy_version 761 (0.0006) -[2026-06-07 02:10:53,076][324563] Updated weights for policy 0, policy_version 772 (0.0007) -[2026-06-07 02:10:53,281][324563] Updated weights for policy 0, policy_version 782 (0.0006) -[2026-06-07 02:10:53,498][324563] Updated weights for policy 0, policy_version 793 (0.0006) -[2026-06-07 02:10:53,730][324563] Updated weights for policy 0, policy_version 805 (0.0007) -[2026-06-07 02:10:53,946][324563] Updated weights for policy 0, policy_version 815 (0.0007) -[2026-06-07 02:10:54,138][324563] Updated weights for policy 0, policy_version 825 (0.0007) -[2026-06-07 02:10:54,878][324563] Updated weights for policy 0, policy_version 836 (0.0007) -[2026-06-07 02:10:55,074][324563] Updated weights for policy 0, policy_version 847 (0.0006) -[2026-06-07 02:10:55,291][324563] Updated weights for policy 0, policy_version 858 (0.0006) -[2026-06-07 02:10:55,499][324563] Updated weights for policy 0, policy_version 868 (0.0007) -[2026-06-07 02:10:55,728][324563] Updated weights for policy 0, policy_version 878 (0.0006) -[2026-06-07 02:10:55,745][321791] Fps is (10 sec: 16383.7, 60 sec: 17039.1, 300 sec: 17039.1). Total num frames: 425984. Throughput: 0: 18088.6. Samples: 452224. Policy #0 lag: (min: 4.0, avg: 27.4, max: 68.0) -[2026-06-07 02:10:55,747][321791] Avg episode reward: [(0, '3.982')] -[2026-06-07 02:10:55,959][324563] Updated weights for policy 0, policy_version 889 (0.0006) -[2026-06-07 02:10:56,096][324276] Saving new best policy, reward=3.982! -[2026-06-07 02:10:56,761][324563] Updated weights for policy 0, policy_version 900 (0.0007) -[2026-06-07 02:10:56,975][324563] Updated weights for policy 0, policy_version 911 (0.0007) -[2026-06-07 02:10:57,255][324563] Updated weights for policy 0, policy_version 924 (0.0006) -[2026-06-07 02:10:57,493][324563] Updated weights for policy 0, policy_version 937 (0.0006) -[2026-06-07 02:10:57,705][324563] Updated weights for policy 0, policy_version 948 (0.0006) -[2026-06-07 02:10:57,899][324563] Updated weights for policy 0, policy_version 958 (0.0007) -[2026-06-07 02:10:58,689][324563] Updated weights for policy 0, policy_version 969 (0.0007) -[2026-06-07 02:10:58,877][324563] Updated weights for policy 0, policy_version 979 (0.0007) -[2026-06-07 02:10:59,129][324563] Updated weights for policy 0, policy_version 991 (0.0007) -[2026-06-07 02:10:59,331][324563] Updated weights for policy 0, policy_version 1001 (0.0006) -[2026-06-07 02:10:59,548][324563] Updated weights for policy 0, policy_version 1011 (0.0006) -[2026-06-07 02:10:59,774][324563] Updated weights for policy 0, policy_version 1022 (0.0007) -[2026-06-07 02:11:00,698][324563] Updated weights for policy 0, policy_version 1032 (0.0007) -[2026-06-07 02:11:00,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.2, 300 sec: 17476.2). Total num frames: 524288. Throughput: 0: 18624.0. Samples: 558720. Policy #0 lag: (min: 63.0, avg: 81.5, max: 127.0) -[2026-06-07 02:11:00,745][321791] Avg episode reward: [(0, '4.082')] -[2026-06-07 02:11:00,901][324563] Updated weights for policy 0, policy_version 1042 (0.0006) -[2026-06-07 02:11:01,171][324563] Updated weights for policy 0, policy_version 1055 (0.0007) -[2026-06-07 02:11:01,425][324563] Updated weights for policy 0, policy_version 1067 (0.0007) -[2026-06-07 02:11:01,622][324563] Updated weights for policy 0, policy_version 1077 (0.0006) -[2026-06-07 02:11:01,833][324563] Updated weights for policy 0, policy_version 1087 (0.0007) -[2026-06-07 02:11:01,844][324276] Saving new best policy, reward=4.082! -[2026-06-07 02:11:02,682][324563] Updated weights for policy 0, policy_version 1099 (0.0007) -[2026-06-07 02:11:02,910][324563] Updated weights for policy 0, policy_version 1109 (0.0007) -[2026-06-07 02:11:03,133][324563] Updated weights for policy 0, policy_version 1119 (0.0007) -[2026-06-07 02:11:03,368][324563] Updated weights for policy 0, policy_version 1129 (0.0007) -[2026-06-07 02:11:03,579][324563] Updated weights for policy 0, policy_version 1139 (0.0007) -[2026-06-07 02:11:03,807][324563] Updated weights for policy 0, policy_version 1151 (0.0006) -[2026-06-07 02:11:04,717][324563] Updated weights for policy 0, policy_version 1161 (0.0006) -[2026-06-07 02:11:04,926][324563] Updated weights for policy 0, policy_version 1171 (0.0007) -[2026-06-07 02:11:05,133][324563] Updated weights for policy 0, policy_version 1182 (0.0006) -[2026-06-07 02:11:05,372][324563] Updated weights for policy 0, policy_version 1193 (0.0007) -[2026-06-07 02:11:05,566][324563] Updated weights for policy 0, policy_version 1203 (0.0007) -[2026-06-07 02:11:05,744][321791] Fps is (10 sec: 16384.9, 60 sec: 16852.2, 300 sec: 16852.2). Total num frames: 589824. Throughput: 0: 17174.0. Samples: 601088. Policy #0 lag: (min: 5.0, avg: 17.1, max: 69.0) -[2026-06-07 02:11:05,745][321791] Avg episode reward: [(0, '4.071')] -[2026-06-07 02:11:05,761][324563] Updated weights for policy 0, policy_version 1213 (0.0007) -[2026-06-07 02:11:06,656][324563] Updated weights for policy 0, policy_version 1223 (0.0007) -[2026-06-07 02:11:06,876][324563] Updated weights for policy 0, policy_version 1233 (0.0007) -[2026-06-07 02:11:07,069][324563] Updated weights for policy 0, policy_version 1243 (0.0007) -[2026-06-07 02:11:07,287][324563] Updated weights for policy 0, policy_version 1253 (0.0006) -[2026-06-07 02:11:07,479][324563] Updated weights for policy 0, policy_version 1263 (0.0006) -[2026-06-07 02:11:07,724][324563] Updated weights for policy 0, policy_version 1274 (0.0007) -[2026-06-07 02:11:08,661][324563] Updated weights for policy 0, policy_version 1284 (0.0007) -[2026-06-07 02:11:08,876][324563] Updated weights for policy 0, policy_version 1294 (0.0006) -[2026-06-07 02:11:09,103][324563] Updated weights for policy 0, policy_version 1305 (0.0007) -[2026-06-07 02:11:09,320][324563] Updated weights for policy 0, policy_version 1315 (0.0006) -[2026-06-07 02:11:09,526][324563] Updated weights for policy 0, policy_version 1325 (0.0006) -[2026-06-07 02:11:09,758][324563] Updated weights for policy 0, policy_version 1335 (0.0007) -[2026-06-07 02:11:10,654][324563] Updated weights for policy 0, policy_version 1345 (0.0007) -[2026-06-07 02:11:10,744][321791] Fps is (10 sec: 16383.6, 60 sec: 17203.1, 300 sec: 17203.1). Total num frames: 688128. Throughput: 0: 17471.9. Samples: 698880. Policy #0 lag: (min: 63.0, avg: 76.4, max: 127.0) -[2026-06-07 02:11:10,746][321791] Avg episode reward: [(0, '4.142')] -[2026-06-07 02:11:10,870][324563] Updated weights for policy 0, policy_version 1355 (0.0007) -[2026-06-07 02:11:11,074][324563] Updated weights for policy 0, policy_version 1365 (0.0007) -[2026-06-07 02:11:11,292][324563] Updated weights for policy 0, policy_version 1375 (0.0007) -[2026-06-07 02:11:11,556][324563] Updated weights for policy 0, policy_version 1387 (0.0007) -[2026-06-07 02:11:11,791][324563] Updated weights for policy 0, policy_version 1398 (0.0008) -[2026-06-07 02:11:11,991][324276] Saving new best policy, reward=4.142! -[2026-06-07 02:11:11,993][324563] Updated weights for policy 0, policy_version 1408 (0.0007) -[2026-06-07 02:11:12,896][324563] Updated weights for policy 0, policy_version 1418 (0.0006) -[2026-06-07 02:11:13,109][324563] Updated weights for policy 0, policy_version 1428 (0.0007) -[2026-06-07 02:11:13,307][324563] Updated weights for policy 0, policy_version 1438 (0.0007) -[2026-06-07 02:11:13,570][324563] Updated weights for policy 0, policy_version 1450 (0.0007) -[2026-06-07 02:11:13,773][324563] Updated weights for policy 0, policy_version 1460 (0.0006) -[2026-06-07 02:11:13,981][324563] Updated weights for policy 0, policy_version 1470 (0.0007) -[2026-06-07 02:11:14,901][324563] Updated weights for policy 0, policy_version 1480 (0.0006) -[2026-06-07 02:11:15,126][324563] Updated weights for policy 0, policy_version 1490 (0.0006) -[2026-06-07 02:11:15,360][324563] Updated weights for policy 0, policy_version 1501 (0.0007) -[2026-06-07 02:11:15,551][324563] Updated weights for policy 0, policy_version 1511 (0.0006) -[2026-06-07 02:11:15,744][321791] Fps is (10 sec: 16383.8, 60 sec: 16748.1, 300 sec: 16748.1). Total num frames: 753664. Throughput: 0: 17675.4. Samples: 795392. Policy #0 lag: (min: 63.0, avg: 76.4, max: 127.0) -[2026-06-07 02:11:15,745][321791] Avg episode reward: [(0, '4.543')] -[2026-06-07 02:11:15,793][324563] Updated weights for policy 0, policy_version 1522 (0.0006) -[2026-06-07 02:11:15,992][324563] Updated weights for policy 0, policy_version 1532 (0.0007) -[2026-06-07 02:11:16,061][324276] Saving new best policy, reward=4.543! -[2026-06-07 02:11:16,879][324563] Updated weights for policy 0, policy_version 1542 (0.0006) -[2026-06-07 02:11:17,057][324563] Updated weights for policy 0, policy_version 1552 (0.0006) -[2026-06-07 02:11:17,278][324563] Updated weights for policy 0, policy_version 1562 (0.0006) -[2026-06-07 02:11:17,487][324563] Updated weights for policy 0, policy_version 1572 (0.0006) -[2026-06-07 02:11:17,714][324563] Updated weights for policy 0, policy_version 1582 (0.0007) -[2026-06-07 02:11:17,936][324563] Updated weights for policy 0, policy_version 1593 (0.0006) -[2026-06-07 02:11:18,874][324563] Updated weights for policy 0, policy_version 1604 (0.0006) -[2026-06-07 02:11:19,093][324563] Updated weights for policy 0, policy_version 1614 (0.0006) -[2026-06-07 02:11:19,312][324563] Updated weights for policy 0, policy_version 1625 (0.0006) -[2026-06-07 02:11:19,524][324563] Updated weights for policy 0, policy_version 1635 (0.0006) -[2026-06-07 02:11:19,748][324563] Updated weights for policy 0, policy_version 1645 (0.0006) -[2026-06-07 02:11:19,994][324563] Updated weights for policy 0, policy_version 1656 (0.0006) -[2026-06-07 02:11:20,744][321791] Fps is (10 sec: 16384.8, 60 sec: 17039.4, 300 sec: 17039.4). Total num frames: 851968. Throughput: 0: 17231.7. Samples: 846464. Policy #0 lag: (min: 63.0, avg: 76.3, max: 127.0) -[2026-06-07 02:11:20,745][321791] Avg episode reward: [(0, '5.305')] -[2026-06-07 02:11:20,752][324276] Saving new best policy, reward=5.305! -[2026-06-07 02:11:20,938][324563] Updated weights for policy 0, policy_version 1667 (0.0006) -[2026-06-07 02:11:21,178][324563] Updated weights for policy 0, policy_version 1678 (0.0008) -[2026-06-07 02:11:21,388][324563] Updated weights for policy 0, policy_version 1688 (0.0011) -[2026-06-07 02:11:21,571][324563] Updated weights for policy 0, policy_version 1698 (0.0011) -[2026-06-07 02:11:21,786][324563] Updated weights for policy 0, policy_version 1708 (0.0011) -[2026-06-07 02:11:22,023][324563] Updated weights for policy 0, policy_version 1719 (0.0011) -[2026-06-07 02:11:23,001][324563] Updated weights for policy 0, policy_version 1729 (0.0010) -[2026-06-07 02:11:23,223][324563] Updated weights for policy 0, policy_version 1739 (0.0006) -[2026-06-07 02:11:23,443][324563] Updated weights for policy 0, policy_version 1749 (0.0006) -[2026-06-07 02:11:23,698][324563] Updated weights for policy 0, policy_version 1761 (0.0008) -[2026-06-07 02:11:23,924][324563] Updated weights for policy 0, policy_version 1771 (0.0009) -[2026-06-07 02:11:24,136][324563] Updated weights for policy 0, policy_version 1782 (0.0007) -[2026-06-07 02:11:24,338][324563] Updated weights for policy 0, policy_version 1792 (0.0007) -[2026-06-07 02:11:25,304][324563] Updated weights for policy 0, policy_version 1802 (0.0007) -[2026-06-07 02:11:25,486][324563] Updated weights for policy 0, policy_version 1812 (0.0007) -[2026-06-07 02:11:25,681][324563] Updated weights for policy 0, policy_version 1822 (0.0007) -[2026-06-07 02:11:25,744][321791] Fps is (10 sec: 16383.9, 60 sec: 16681.9, 300 sec: 16681.9). Total num frames: 917504. Throughput: 0: 17029.9. Samples: 936704. Policy #0 lag: (min: 63.0, avg: 76.3, max: 127.0) -[2026-06-07 02:11:25,746][321791] Avg episode reward: [(0, '5.320')] -[2026-06-07 02:11:25,890][324563] Updated weights for policy 0, policy_version 1832 (0.0007) -[2026-06-07 02:11:26,140][324563] Updated weights for policy 0, policy_version 1843 (0.0006) -[2026-06-07 02:11:26,354][324563] Updated weights for policy 0, policy_version 1853 (0.0008) -[2026-06-07 02:11:26,420][324276] Saving new best policy, reward=5.320! -[2026-06-07 02:11:27,309][324563] Updated weights for policy 0, policy_version 1863 (0.0007) -[2026-06-07 02:11:27,529][324563] Updated weights for policy 0, policy_version 1873 (0.0007) -[2026-06-07 02:11:27,748][324563] Updated weights for policy 0, policy_version 1883 (0.0007) -[2026-06-07 02:11:27,954][324563] Updated weights for policy 0, policy_version 1893 (0.0007) -[2026-06-07 02:11:28,185][324563] Updated weights for policy 0, policy_version 1903 (0.0007) -[2026-06-07 02:11:28,385][324563] Updated weights for policy 0, policy_version 1914 (0.0007) -[2026-06-07 02:11:29,287][324563] Updated weights for policy 0, policy_version 1925 (0.0007) -[2026-06-07 02:11:29,507][324563] Updated weights for policy 0, policy_version 1936 (0.0007) -[2026-06-07 02:11:29,783][324563] Updated weights for policy 0, policy_version 1949 (0.0007) -[2026-06-07 02:11:29,992][324563] Updated weights for policy 0, policy_version 1959 (0.0006) -[2026-06-07 02:11:30,206][324563] Updated weights for policy 0, policy_version 1969 (0.0007) -[2026-06-07 02:11:30,419][324563] Updated weights for policy 0, policy_version 1979 (0.0007) -[2026-06-07 02:11:30,744][321791] Fps is (10 sec: 16384.0, 60 sec: 16930.2, 300 sec: 16930.2). Total num frames: 1015808. Throughput: 0: 16523.5. Samples: 1030016. Policy #0 lag: (min: 63.0, avg: 76.0, max: 127.0) -[2026-06-07 02:11:30,745][321791] Avg episode reward: [(0, '5.774')] -[2026-06-07 02:11:30,749][324276] Saving new best policy, reward=5.774! -[2026-06-07 02:11:31,347][324563] Updated weights for policy 0, policy_version 1990 (0.0007) -[2026-06-07 02:11:31,550][324563] Updated weights for policy 0, policy_version 2001 (0.0006) -[2026-06-07 02:11:31,763][324563] Updated weights for policy 0, policy_version 2011 (0.0007) -[2026-06-07 02:11:31,997][324563] Updated weights for policy 0, policy_version 2022 (0.0007) -[2026-06-07 02:11:32,214][324563] Updated weights for policy 0, policy_version 2033 (0.0011) -[2026-06-07 02:11:32,447][324563] Updated weights for policy 0, policy_version 2043 (0.0011) -[2026-06-07 02:11:33,420][324563] Updated weights for policy 0, policy_version 2053 (0.0009) -[2026-06-07 02:11:33,594][324563] Updated weights for policy 0, policy_version 2063 (0.0006) -[2026-06-07 02:11:33,810][324563] Updated weights for policy 0, policy_version 2073 (0.0007) -[2026-06-07 02:11:34,073][324563] Updated weights for policy 0, policy_version 2085 (0.0007) -[2026-06-07 02:11:34,312][324563] Updated weights for policy 0, policy_version 2096 (0.0009) -[2026-06-07 02:11:34,517][324563] Updated weights for policy 0, policy_version 2106 (0.0011) -[2026-06-07 02:11:35,511][324563] Updated weights for policy 0, policy_version 2117 (0.0009) -[2026-06-07 02:11:35,727][324563] Updated weights for policy 0, policy_version 2127 (0.0007) -[2026-06-07 02:11:35,744][321791] Fps is (10 sec: 16384.3, 60 sec: 17476.3, 300 sec: 16636.1). Total num frames: 1081344. Throughput: 0: 16503.5. Samples: 1086208. Policy #0 lag: (min: 63.0, avg: 75.7, max: 127.0) -[2026-06-07 02:11:35,745][321791] Avg episode reward: [(0, '6.105')] -[2026-06-07 02:11:35,953][324563] Updated weights for policy 0, policy_version 2138 (0.0006) -[2026-06-07 02:11:36,149][324563] Updated weights for policy 0, policy_version 2148 (0.0008) -[2026-06-07 02:11:36,349][324563] Updated weights for policy 0, policy_version 2158 (0.0007) -[2026-06-07 02:11:36,548][324563] Updated weights for policy 0, policy_version 2168 (0.0006) -[2026-06-07 02:11:36,687][324276] Saving new best policy, reward=6.105! -[2026-06-07 02:11:37,454][324563] Updated weights for policy 0, policy_version 2178 (0.0007) -[2026-06-07 02:11:37,662][324563] Updated weights for policy 0, policy_version 2188 (0.0007) -[2026-06-07 02:11:37,928][324563] Updated weights for policy 0, policy_version 2200 (0.0007) -[2026-06-07 02:11:38,150][324563] Updated weights for policy 0, policy_version 2210 (0.0007) -[2026-06-07 02:11:38,367][324563] Updated weights for policy 0, policy_version 2220 (0.0009) -[2026-06-07 02:11:38,578][324563] Updated weights for policy 0, policy_version 2230 (0.0010) -[2026-06-07 02:11:39,486][324563] Updated weights for policy 0, policy_version 2241 (0.0010) -[2026-06-07 02:11:39,704][324563] Updated weights for policy 0, policy_version 2252 (0.0007) -[2026-06-07 02:11:39,910][324563] Updated weights for policy 0, policy_version 2262 (0.0007) -[2026-06-07 02:11:40,098][324563] Updated weights for policy 0, policy_version 2272 (0.0007) -[2026-06-07 02:11:40,326][324563] Updated weights for policy 0, policy_version 2283 (0.0007) -[2026-06-07 02:11:40,518][324563] Updated weights for policy 0, policy_version 2293 (0.0006) -[2026-06-07 02:11:40,721][324563] Updated weights for policy 0, policy_version 2304 (0.0007) -[2026-06-07 02:11:40,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.5, 300 sec: 16852.1). Total num frames: 1179648. Throughput: 0: 16230.6. Samples: 1182592. Policy #0 lag: (min: 63.0, avg: 76.0, max: 127.0) -[2026-06-07 02:11:40,745][321791] Avg episode reward: [(0, '6.398')] -[2026-06-07 02:11:40,751][324276] Saving new best policy, reward=6.398! -[2026-06-07 02:11:41,666][324563] Updated weights for policy 0, policy_version 2314 (0.0007) -[2026-06-07 02:11:41,852][324563] Updated weights for policy 0, policy_version 2324 (0.0007) -[2026-06-07 02:11:42,078][324563] Updated weights for policy 0, policy_version 2334 (0.0007) -[2026-06-07 02:11:42,295][324563] Updated weights for policy 0, policy_version 2345 (0.0008) -[2026-06-07 02:11:42,546][324563] Updated weights for policy 0, policy_version 2358 (0.0007) -[2026-06-07 02:11:43,562][324563] Updated weights for policy 0, policy_version 2370 (0.0007) -[2026-06-07 02:11:43,796][324563] Updated weights for policy 0, policy_version 2381 (0.0006) -[2026-06-07 02:11:43,989][324563] Updated weights for policy 0, policy_version 2391 (0.0006) -[2026-06-07 02:11:44,220][324563] Updated weights for policy 0, policy_version 2402 (0.0007) -[2026-06-07 02:11:44,451][324563] Updated weights for policy 0, policy_version 2413 (0.0007) -[2026-06-07 02:11:44,665][324563] Updated weights for policy 0, policy_version 2424 (0.0007) -[2026-06-07 02:11:45,622][324563] Updated weights for policy 0, policy_version 2435 (0.0008) -[2026-06-07 02:11:45,745][321791] Fps is (10 sec: 16383.1, 60 sec: 16383.9, 300 sec: 16602.4). Total num frames: 1245184. Throughput: 0: 16056.7. Samples: 1281280. Policy #0 lag: (min: 63.0, avg: 75.2, max: 127.0) -[2026-06-07 02:11:45,746][321791] Avg episode reward: [(0, '7.363')] -[2026-06-07 02:11:45,832][324563] Updated weights for policy 0, policy_version 2445 (0.0007) -[2026-06-07 02:11:46,059][324563] Updated weights for policy 0, policy_version 2456 (0.0006) -[2026-06-07 02:11:46,280][324563] Updated weights for policy 0, policy_version 2467 (0.0006) -[2026-06-07 02:11:46,519][324563] Updated weights for policy 0, policy_version 2479 (0.0006) -[2026-06-07 02:11:46,759][324563] Updated weights for policy 0, policy_version 2490 (0.0006) -[2026-06-07 02:11:46,861][324276] Saving new best policy, reward=7.363! -[2026-06-07 02:11:47,736][324563] Updated weights for policy 0, policy_version 2501 (0.0007) -[2026-06-07 02:11:47,927][324563] Updated weights for policy 0, policy_version 2511 (0.0006) -[2026-06-07 02:11:48,122][324563] Updated weights for policy 0, policy_version 2521 (0.0006) -[2026-06-07 02:11:48,358][324563] Updated weights for policy 0, policy_version 2531 (0.0007) -[2026-06-07 02:11:48,579][324563] Updated weights for policy 0, policy_version 2542 (0.0007) -[2026-06-07 02:11:48,798][324563] Updated weights for policy 0, policy_version 2553 (0.0007) -[2026-06-07 02:11:49,793][324563] Updated weights for policy 0, policy_version 2565 (0.0006) -[2026-06-07 02:11:50,011][324563] Updated weights for policy 0, policy_version 2576 (0.0006) -[2026-06-07 02:11:50,254][324563] Updated weights for policy 0, policy_version 2587 (0.0007) -[2026-06-07 02:11:50,484][324563] Updated weights for policy 0, policy_version 2597 (0.0006) -[2026-06-07 02:11:50,744][321791] Fps is (10 sec: 13106.7, 60 sec: 15837.8, 300 sec: 16384.0). Total num frames: 1310720. Throughput: 0: 16034.0. Samples: 1322624. Policy #0 lag: (min: 63.0, avg: 75.2, max: 127.0) -[2026-06-07 02:11:50,746][321791] Avg episode reward: [(0, '8.139')] -[2026-06-07 02:11:50,781][324563] Updated weights for policy 0, policy_version 2611 (0.0008) -[2026-06-07 02:11:51,047][324276] Saving new best policy, reward=8.139! -[2026-06-07 02:11:51,877][324563] Updated weights for policy 0, policy_version 2625 (0.0012) -[2026-06-07 02:11:52,109][324563] Updated weights for policy 0, policy_version 2636 (0.0011) -[2026-06-07 02:11:52,291][324563] Updated weights for policy 0, policy_version 2646 (0.0011) -[2026-06-07 02:11:52,531][324563] Updated weights for policy 0, policy_version 2657 (0.0011) -[2026-06-07 02:11:52,725][324563] Updated weights for policy 0, policy_version 2668 (0.0012) -[2026-06-07 02:11:52,917][324563] Updated weights for policy 0, policy_version 2678 (0.0011) -[2026-06-07 02:11:53,912][324563] Updated weights for policy 0, policy_version 2689 (0.0011) -[2026-06-07 02:11:54,165][324563] Updated weights for policy 0, policy_version 2701 (0.0011) -[2026-06-07 02:11:54,364][324563] Updated weights for policy 0, policy_version 2711 (0.0011) -[2026-06-07 02:11:54,578][324563] Updated weights for policy 0, policy_version 2722 (0.0011) -[2026-06-07 02:11:54,799][324563] Updated weights for policy 0, policy_version 2733 (0.0006) -[2026-06-07 02:11:55,024][324563] Updated weights for policy 0, policy_version 2744 (0.0006) -[2026-06-07 02:11:55,744][321791] Fps is (10 sec: 16385.0, 60 sec: 16384.2, 300 sec: 16576.8). Total num frames: 1409024. Throughput: 0: 15997.3. Samples: 1418752. Policy #0 lag: (min: 63.0, avg: 75.5, max: 127.0) -[2026-06-07 02:11:55,745][321791] Avg episode reward: [(0, '9.643')] -[2026-06-07 02:11:55,750][324276] Saving new best policy, reward=9.643! -[2026-06-07 02:11:56,039][324563] Updated weights for policy 0, policy_version 2757 (0.0006) -[2026-06-07 02:11:56,262][324563] Updated weights for policy 0, policy_version 2768 (0.0006) -[2026-06-07 02:11:56,484][324563] Updated weights for policy 0, policy_version 2779 (0.0006) -[2026-06-07 02:11:56,722][324563] Updated weights for policy 0, policy_version 2790 (0.0006) -[2026-06-07 02:11:56,957][324563] Updated weights for policy 0, policy_version 2801 (0.0006) -[2026-06-07 02:11:57,196][324563] Updated weights for policy 0, policy_version 2812 (0.0010) -[2026-06-07 02:11:58,144][324563] Updated weights for policy 0, policy_version 2823 (0.0008) -[2026-06-07 02:11:58,359][324563] Updated weights for policy 0, policy_version 2833 (0.0006) -[2026-06-07 02:11:58,556][324563] Updated weights for policy 0, policy_version 2843 (0.0006) -[2026-06-07 02:11:58,807][324563] Updated weights for policy 0, policy_version 2855 (0.0007) -[2026-06-07 02:11:59,018][324563] Updated weights for policy 0, policy_version 2865 (0.0006) -[2026-06-07 02:11:59,275][324563] Updated weights for policy 0, policy_version 2877 (0.0007) -[2026-06-07 02:12:00,252][324563] Updated weights for policy 0, policy_version 2889 (0.0007) -[2026-06-07 02:12:00,488][324563] Updated weights for policy 0, policy_version 2900 (0.0006) -[2026-06-07 02:12:00,704][324563] Updated weights for policy 0, policy_version 2910 (0.0006) -[2026-06-07 02:12:00,745][321791] Fps is (10 sec: 16384.1, 60 sec: 15837.8, 300 sec: 16384.0). Total num frames: 1474560. Throughput: 0: 16019.9. Samples: 1516288. Policy #0 lag: (min: 53.0, avg: 64.7, max: 117.0) -[2026-06-07 02:12:00,748][321791] Avg episode reward: [(0, '10.410')] -[2026-06-07 02:12:00,887][324563] Updated weights for policy 0, policy_version 2920 (0.0007) -[2026-06-07 02:12:01,156][324563] Updated weights for policy 0, policy_version 2933 (0.0007) -[2026-06-07 02:12:01,365][324563] Updated weights for policy 0, policy_version 2943 (0.0006) -[2026-06-07 02:12:01,370][324276] Saving new best policy, reward=10.410! -[2026-06-07 02:12:02,347][324563] Updated weights for policy 0, policy_version 2954 (0.0007) -[2026-06-07 02:12:02,598][324563] Updated weights for policy 0, policy_version 2966 (0.0006) -[2026-06-07 02:12:02,766][324563] Updated weights for policy 0, policy_version 2976 (0.0006) -[2026-06-07 02:12:03,004][324563] Updated weights for policy 0, policy_version 2986 (0.0006) -[2026-06-07 02:12:03,261][324563] Updated weights for policy 0, policy_version 2998 (0.0006) -[2026-06-07 02:12:04,242][324563] Updated weights for policy 0, policy_version 3010 (0.0006) -[2026-06-07 02:12:04,441][324563] Updated weights for policy 0, policy_version 3020 (0.0009) -[2026-06-07 02:12:04,670][324563] Updated weights for policy 0, policy_version 3031 (0.0011) -[2026-06-07 02:12:04,910][324563] Updated weights for policy 0, policy_version 3042 (0.0011) -[2026-06-07 02:12:05,111][324563] Updated weights for policy 0, policy_version 3052 (0.0012) -[2026-06-07 02:12:05,352][324563] Updated weights for policy 0, policy_version 3063 (0.0011) -[2026-06-07 02:12:05,744][321791] Fps is (10 sec: 16383.9, 60 sec: 16384.0, 300 sec: 16556.5). Total num frames: 1572864. Throughput: 0: 15829.3. Samples: 1558784. Policy #0 lag: (min: 63.0, avg: 75.6, max: 127.0) -[2026-06-07 02:12:05,745][321791] Avg episode reward: [(0, '9.880')] -[2026-06-07 02:12:06,286][324563] Updated weights for policy 0, policy_version 3074 (0.0010) -[2026-06-07 02:12:06,506][324563] Updated weights for policy 0, policy_version 3084 (0.0006) -[2026-06-07 02:12:06,744][324563] Updated weights for policy 0, policy_version 3096 (0.0006) -[2026-06-07 02:12:06,929][324563] Updated weights for policy 0, policy_version 3106 (0.0006) -[2026-06-07 02:12:07,152][324563] Updated weights for policy 0, policy_version 3117 (0.0006) -[2026-06-07 02:12:07,413][324563] Updated weights for policy 0, policy_version 3128 (0.0006) -[2026-06-07 02:12:08,372][324563] Updated weights for policy 0, policy_version 3139 (0.0008) -[2026-06-07 02:12:08,567][324563] Updated weights for policy 0, policy_version 3149 (0.0010) -[2026-06-07 02:12:08,766][324563] Updated weights for policy 0, policy_version 3159 (0.0009) -[2026-06-07 02:12:08,983][324563] Updated weights for policy 0, policy_version 3170 (0.0008) -[2026-06-07 02:12:09,213][324563] Updated weights for policy 0, policy_version 3181 (0.0009) -[2026-06-07 02:12:09,475][324563] Updated weights for policy 0, policy_version 3193 (0.0007) -[2026-06-07 02:12:10,391][324563] Updated weights for policy 0, policy_version 3205 (0.0007) -[2026-06-07 02:12:10,714][324563] Updated weights for policy 0, policy_version 3220 (0.0006) -[2026-06-07 02:12:10,744][321791] Fps is (10 sec: 16384.4, 60 sec: 15838.0, 300 sec: 16384.0). Total num frames: 1638400. Throughput: 0: 15965.9. Samples: 1655168. Policy #0 lag: (min: 18.0, avg: 51.0, max: 82.0) -[2026-06-07 02:12:10,745][321791] Avg episode reward: [(0, '9.506')] -[2026-06-07 02:12:10,962][324563] Updated weights for policy 0, policy_version 3233 (0.0006) -[2026-06-07 02:12:11,169][324563] Updated weights for policy 0, policy_version 3243 (0.0006) -[2026-06-07 02:12:11,378][324563] Updated weights for policy 0, policy_version 3253 (0.0006) -[2026-06-07 02:12:11,609][324563] Updated weights for policy 0, policy_version 3264 (0.0007) -[2026-06-07 02:12:12,572][324563] Updated weights for policy 0, policy_version 3274 (0.0007) -[2026-06-07 02:12:12,788][324563] Updated weights for policy 0, policy_version 3285 (0.0006) -[2026-06-07 02:12:13,011][324563] Updated weights for policy 0, policy_version 3296 (0.0006) -[2026-06-07 02:12:13,218][324563] Updated weights for policy 0, policy_version 3306 (0.0007) -[2026-06-07 02:12:13,446][324563] Updated weights for policy 0, policy_version 3317 (0.0007) -[2026-06-07 02:12:13,657][324563] Updated weights for policy 0, policy_version 3328 (0.0007) -[2026-06-07 02:12:14,615][324563] Updated weights for policy 0, policy_version 3338 (0.0007) -[2026-06-07 02:12:14,820][324563] Updated weights for policy 0, policy_version 3348 (0.0007) -[2026-06-07 02:12:15,105][324563] Updated weights for policy 0, policy_version 3362 (0.0007) -[2026-06-07 02:12:15,306][324563] Updated weights for policy 0, policy_version 3373 (0.0007) -[2026-06-07 02:12:15,578][324563] Updated weights for policy 0, policy_version 3386 (0.0007) -[2026-06-07 02:12:15,744][321791] Fps is (10 sec: 16384.1, 60 sec: 16384.0, 300 sec: 16540.1). Total num frames: 1736704. Throughput: 0: 16045.5. Samples: 1752064. Policy #0 lag: (min: 63.0, avg: 75.3, max: 127.0) -[2026-06-07 02:12:15,745][321791] Avg episode reward: [(0, '10.616')] -[2026-06-07 02:12:15,750][324276] Saving new best policy, reward=10.616! -[2026-06-07 02:12:16,522][324563] Updated weights for policy 0, policy_version 3396 (0.0007) -[2026-06-07 02:12:16,722][324563] Updated weights for policy 0, policy_version 3406 (0.0007) -[2026-06-07 02:12:16,961][324563] Updated weights for policy 0, policy_version 3417 (0.0006) -[2026-06-07 02:12:17,237][324563] Updated weights for policy 0, policy_version 3430 (0.0006) -[2026-06-07 02:12:17,468][324563] Updated weights for policy 0, policy_version 3441 (0.0006) -[2026-06-07 02:12:17,683][324563] Updated weights for policy 0, policy_version 3451 (0.0006) -[2026-06-07 02:12:18,597][324563] Updated weights for policy 0, policy_version 3463 (0.0007) -[2026-06-07 02:12:18,856][324563] Updated weights for policy 0, policy_version 3475 (0.0007) -[2026-06-07 02:12:19,108][324563] Updated weights for policy 0, policy_version 3487 (0.0006) -[2026-06-07 02:12:19,285][324563] Updated weights for policy 0, policy_version 3497 (0.0007) -[2026-06-07 02:12:19,505][324563] Updated weights for policy 0, policy_version 3508 (0.0008) -[2026-06-07 02:12:19,724][324563] Updated weights for policy 0, policy_version 3519 (0.0008) -[2026-06-07 02:12:20,671][324563] Updated weights for policy 0, policy_version 3530 (0.0011) -[2026-06-07 02:12:20,744][321791] Fps is (10 sec: 16384.1, 60 sec: 15837.9, 300 sec: 16384.0). Total num frames: 1802240. Throughput: 0: 16028.5. Samples: 1807488. Policy #0 lag: (min: 63.0, avg: 75.3, max: 127.0) -[2026-06-07 02:12:20,745][321791] Avg episode reward: [(0, '12.338')] -[2026-06-07 02:12:20,885][324563] Updated weights for policy 0, policy_version 3540 (0.0010) -[2026-06-07 02:12:21,086][324563] Updated weights for policy 0, policy_version 3550 (0.0011) -[2026-06-07 02:12:21,303][324563] Updated weights for policy 0, policy_version 3561 (0.0011) -[2026-06-07 02:12:21,550][324563] Updated weights for policy 0, policy_version 3572 (0.0010) -[2026-06-07 02:12:21,750][324563] Updated weights for policy 0, policy_version 3582 (0.0011) -[2026-06-07 02:12:21,784][324276] Saving new best policy, reward=12.338! -[2026-06-07 02:12:22,678][324563] Updated weights for policy 0, policy_version 3592 (0.0007) -[2026-06-07 02:12:22,918][324563] Updated weights for policy 0, policy_version 3603 (0.0006) -[2026-06-07 02:12:23,154][324563] Updated weights for policy 0, policy_version 3614 (0.0006) -[2026-06-07 02:12:23,355][324563] Updated weights for policy 0, policy_version 3625 (0.0006) -[2026-06-07 02:12:23,624][324563] Updated weights for policy 0, policy_version 3637 (0.0006) -[2026-06-07 02:12:23,871][324563] Updated weights for policy 0, policy_version 3648 (0.0006) -[2026-06-07 02:12:24,795][324563] Updated weights for policy 0, policy_version 3658 (0.0007) -[2026-06-07 02:12:24,982][324563] Updated weights for policy 0, policy_version 3668 (0.0007) -[2026-06-07 02:12:25,151][324563] Updated weights for policy 0, policy_version 3678 (0.0006) -[2026-06-07 02:12:25,379][324563] Updated weights for policy 0, policy_version 3690 (0.0006) -[2026-06-07 02:12:25,585][324563] Updated weights for policy 0, policy_version 3701 (0.0007) -[2026-06-07 02:12:25,744][321791] Fps is (10 sec: 13107.2, 60 sec: 15837.9, 300 sec: 16241.5). Total num frames: 1867776. Throughput: 0: 16071.1. Samples: 1905792. Policy #0 lag: (min: 11.0, avg: 40.4, max: 75.0) -[2026-06-07 02:12:25,745][321791] Avg episode reward: [(0, '13.671')] -[2026-06-07 02:12:25,791][324563] Updated weights for policy 0, policy_version 3711 (0.0007) -[2026-06-07 02:12:25,807][324276] Saving new best policy, reward=13.671! -[2026-06-07 02:12:26,742][324563] Updated weights for policy 0, policy_version 3721 (0.0006) -[2026-06-07 02:12:26,961][324563] Updated weights for policy 0, policy_version 3732 (0.0006) -[2026-06-07 02:12:27,180][324563] Updated weights for policy 0, policy_version 3743 (0.0007) -[2026-06-07 02:12:27,431][324563] Updated weights for policy 0, policy_version 3755 (0.0007) -[2026-06-07 02:12:27,640][324563] Updated weights for policy 0, policy_version 3765 (0.0006) -[2026-06-07 02:12:27,835][324563] Updated weights for policy 0, policy_version 3775 (0.0006) -[2026-06-07 02:12:28,847][324563] Updated weights for policy 0, policy_version 3787 (0.0006) -[2026-06-07 02:12:29,033][324563] Updated weights for policy 0, policy_version 3797 (0.0007) -[2026-06-07 02:12:29,227][324563] Updated weights for policy 0, policy_version 3807 (0.0007) -[2026-06-07 02:12:29,452][324563] Updated weights for policy 0, policy_version 3817 (0.0007) -[2026-06-07 02:12:29,693][324563] Updated weights for policy 0, policy_version 3829 (0.0006) -[2026-06-07 02:12:29,893][324563] Updated weights for policy 0, policy_version 3839 (0.0006) -[2026-06-07 02:12:30,744][321791] Fps is (10 sec: 16384.0, 60 sec: 15837.9, 300 sec: 16384.0). Total num frames: 1966080. Throughput: 0: 16091.2. Samples: 2005376. Policy #0 lag: (min: 41.0, avg: 72.4, max: 105.0) -[2026-06-07 02:12:30,745][321791] Avg episode reward: [(0, '16.579')] -[2026-06-07 02:12:30,772][324563] Updated weights for policy 0, policy_version 3849 (0.0006) -[2026-06-07 02:12:30,989][324563] Updated weights for policy 0, policy_version 3860 (0.0007) -[2026-06-07 02:12:31,213][324563] Updated weights for policy 0, policy_version 3872 (0.0006) -[2026-06-07 02:12:31,438][324563] Updated weights for policy 0, policy_version 3883 (0.0006) -[2026-06-07 02:12:31,665][324563] Updated weights for policy 0, policy_version 3894 (0.0010) -[2026-06-07 02:12:31,863][324276] Saving new best policy, reward=16.579! -[2026-06-07 02:12:32,599][324563] Updated weights for policy 0, policy_version 3905 (0.0009) -[2026-06-07 02:12:32,905][324563] Updated weights for policy 0, policy_version 3919 (0.0008) -[2026-06-07 02:12:33,133][324563] Updated weights for policy 0, policy_version 3930 (0.0006) -[2026-06-07 02:12:33,342][324563] Updated weights for policy 0, policy_version 3940 (0.0006) -[2026-06-07 02:12:33,567][324563] Updated weights for policy 0, policy_version 3951 (0.0006) -[2026-06-07 02:12:33,781][324563] Updated weights for policy 0, policy_version 3961 (0.0006) -[2026-06-07 02:12:34,703][324563] Updated weights for policy 0, policy_version 3972 (0.0007) -[2026-06-07 02:12:34,925][324563] Updated weights for policy 0, policy_version 3982 (0.0006) -[2026-06-07 02:12:35,137][324563] Updated weights for policy 0, policy_version 3992 (0.0006) -[2026-06-07 02:12:35,360][324563] Updated weights for policy 0, policy_version 4003 (0.0007) -[2026-06-07 02:12:35,555][324563] Updated weights for policy 0, policy_version 4014 (0.0006) -[2026-06-07 02:12:35,745][321791] Fps is (10 sec: 16382.2, 60 sec: 15837.6, 300 sec: 16252.8). Total num frames: 2031616. Throughput: 0: 16102.1. Samples: 2047232. Policy #0 lag: (min: 63.0, avg: 75.6, max: 127.0) -[2026-06-07 02:12:35,747][321791] Avg episode reward: [(0, '17.146')] -[2026-06-07 02:12:35,771][324563] Updated weights for policy 0, policy_version 4025 (0.0006) -[2026-06-07 02:12:35,909][324276] Saving new best policy, reward=17.146! -[2026-06-07 02:12:36,660][324563] Updated weights for policy 0, policy_version 4036 (0.0006) -[2026-06-07 02:12:36,931][324563] Updated weights for policy 0, policy_version 4048 (0.0006) -[2026-06-07 02:12:37,152][324563] Updated weights for policy 0, policy_version 4059 (0.0006) -[2026-06-07 02:12:37,373][324563] Updated weights for policy 0, policy_version 4069 (0.0006) -[2026-06-07 02:12:37,619][324563] Updated weights for policy 0, policy_version 4081 (0.0007) -[2026-06-07 02:12:37,810][324563] Updated weights for policy 0, policy_version 4091 (0.0007) -[2026-06-07 02:12:38,693][324563] Updated weights for policy 0, policy_version 4102 (0.0007) -[2026-06-07 02:12:38,905][324563] Updated weights for policy 0, policy_version 4112 (0.0006) -[2026-06-07 02:12:39,089][324563] Updated weights for policy 0, policy_version 4122 (0.0006) -[2026-06-07 02:12:39,274][324563] Updated weights for policy 0, policy_version 4132 (0.0006) -[2026-06-07 02:12:39,514][324563] Updated weights for policy 0, policy_version 4144 (0.0006) -[2026-06-07 02:12:39,728][324563] Updated weights for policy 0, policy_version 4155 (0.0006) -[2026-06-07 02:12:40,679][324563] Updated weights for policy 0, policy_version 4167 (0.0006) -[2026-06-07 02:12:40,744][321791] Fps is (10 sec: 16384.0, 60 sec: 15837.9, 300 sec: 16384.0). Total num frames: 2129920. Throughput: 0: 16170.7. Samples: 2146432. Policy #0 lag: (min: 50.0, avg: 91.4, max: 114.0) -[2026-06-07 02:12:40,745][321791] Avg episode reward: [(0, '21.655')] -[2026-06-07 02:12:40,888][324563] Updated weights for policy 0, policy_version 4177 (0.0006) -[2026-06-07 02:12:41,100][324563] Updated weights for policy 0, policy_version 4188 (0.0007) -[2026-06-07 02:12:41,310][324563] Updated weights for policy 0, policy_version 4198 (0.0006) -[2026-06-07 02:12:41,536][324563] Updated weights for policy 0, policy_version 4209 (0.0009) -[2026-06-07 02:12:41,771][324563] Updated weights for policy 0, policy_version 4221 (0.0006) -[2026-06-07 02:12:41,816][324276] Saving new best policy, reward=21.655! -[2026-06-07 02:12:42,601][324563] Updated weights for policy 0, policy_version 4231 (0.0010) -[2026-06-07 02:12:42,801][324563] Updated weights for policy 0, policy_version 4242 (0.0011) -[2026-06-07 02:12:43,031][324563] Updated weights for policy 0, policy_version 4253 (0.0009) -[2026-06-07 02:12:43,239][324563] Updated weights for policy 0, policy_version 4263 (0.0006) -[2026-06-07 02:12:43,435][324563] Updated weights for policy 0, policy_version 4273 (0.0006) -[2026-06-07 02:12:43,628][324563] Updated weights for policy 0, policy_version 4283 (0.0006) -[2026-06-07 02:12:44,537][324563] Updated weights for policy 0, policy_version 4295 (0.0006) -[2026-06-07 02:12:44,735][324563] Updated weights for policy 0, policy_version 4305 (0.0006) -[2026-06-07 02:12:44,935][324563] Updated weights for policy 0, policy_version 4315 (0.0007) -[2026-06-07 02:12:45,171][324563] Updated weights for policy 0, policy_version 4326 (0.0006) -[2026-06-07 02:12:45,362][324563] Updated weights for policy 0, policy_version 4336 (0.0006) -[2026-06-07 02:12:45,591][324563] Updated weights for policy 0, policy_version 4348 (0.0007) -[2026-06-07 02:12:45,744][321791] Fps is (10 sec: 19663.1, 60 sec: 16384.2, 300 sec: 16505.4). Total num frames: 2228224. Throughput: 0: 16230.5. Samples: 2246656. Policy #0 lag: (min: 63.0, avg: 76.3, max: 127.0) -[2026-06-07 02:12:45,745][321791] Avg episode reward: [(0, '25.333')] -[2026-06-07 02:12:45,750][324276] Saving new best policy, reward=25.333! -[2026-06-07 02:12:46,469][324563] Updated weights for policy 0, policy_version 4358 (0.0006) -[2026-06-07 02:12:46,662][324563] Updated weights for policy 0, policy_version 4368 (0.0011) -[2026-06-07 02:12:46,945][324563] Updated weights for policy 0, policy_version 4381 (0.0011) -[2026-06-07 02:12:47,139][324563] Updated weights for policy 0, policy_version 4391 (0.0008) -[2026-06-07 02:12:47,335][324563] Updated weights for policy 0, policy_version 4402 (0.0006) -[2026-06-07 02:12:47,563][324563] Updated weights for policy 0, policy_version 4412 (0.0006) -[2026-06-07 02:12:48,411][324563] Updated weights for policy 0, policy_version 4422 (0.0007) -[2026-06-07 02:12:48,717][324563] Updated weights for policy 0, policy_version 4435 (0.0007) -[2026-06-07 02:12:48,927][324563] Updated weights for policy 0, policy_version 4446 (0.0007) -[2026-06-07 02:12:49,182][324563] Updated weights for policy 0, policy_version 4458 (0.0006) -[2026-06-07 02:12:49,390][324563] Updated weights for policy 0, policy_version 4469 (0.0006) -[2026-06-07 02:12:49,575][324563] Updated weights for policy 0, policy_version 4479 (0.0006) -[2026-06-07 02:12:50,389][324563] Updated weights for policy 0, policy_version 4489 (0.0011) -[2026-06-07 02:12:50,599][324563] Updated weights for policy 0, policy_version 4499 (0.0011) -[2026-06-07 02:12:50,744][321791] Fps is (10 sec: 16383.9, 60 sec: 16384.1, 300 sec: 16384.0). Total num frames: 2293760. Throughput: 0: 16560.3. Samples: 2304000. Policy #0 lag: (min: 63.0, avg: 76.3, max: 127.0) -[2026-06-07 02:12:50,745][321791] Avg episode reward: [(0, '29.924')] -[2026-06-07 02:12:50,798][324563] Updated weights for policy 0, policy_version 4509 (0.0007) -[2026-06-07 02:12:51,009][324563] Updated weights for policy 0, policy_version 4520 (0.0006) -[2026-06-07 02:12:51,199][324563] Updated weights for policy 0, policy_version 4530 (0.0006) -[2026-06-07 02:12:51,411][324563] Updated weights for policy 0, policy_version 4540 (0.0007) -[2026-06-07 02:12:51,485][324276] Saving new best policy, reward=29.924! -[2026-06-07 02:12:52,230][324563] Updated weights for policy 0, policy_version 4550 (0.0007) -[2026-06-07 02:12:52,470][324563] Updated weights for policy 0, policy_version 4562 (0.0010) -[2026-06-07 02:12:52,688][324563] Updated weights for policy 0, policy_version 4572 (0.0009) -[2026-06-07 02:12:52,928][324563] Updated weights for policy 0, policy_version 4583 (0.0007) -[2026-06-07 02:12:53,103][324563] Updated weights for policy 0, policy_version 4593 (0.0006) -[2026-06-07 02:12:53,345][324563] Updated weights for policy 0, policy_version 4604 (0.0007) -[2026-06-07 02:12:54,177][324563] Updated weights for policy 0, policy_version 4614 (0.0007) -[2026-06-07 02:12:54,389][324563] Updated weights for policy 0, policy_version 4624 (0.0010) -[2026-06-07 02:12:54,644][324563] Updated weights for policy 0, policy_version 4637 (0.0010) -[2026-06-07 02:12:54,855][324563] Updated weights for policy 0, policy_version 4647 (0.0006) -[2026-06-07 02:12:55,038][324563] Updated weights for policy 0, policy_version 4657 (0.0008) -[2026-06-07 02:12:55,246][324563] Updated weights for policy 0, policy_version 4667 (0.0008) -[2026-06-07 02:12:55,744][321791] Fps is (10 sec: 16384.0, 60 sec: 16384.0, 300 sec: 16497.0). Total num frames: 2392064. Throughput: 0: 16631.5. Samples: 2403584. Policy #0 lag: (min: 56.0, avg: 96.4, max: 120.0) -[2026-06-07 02:12:55,745][321791] Avg episode reward: [(0, '31.476')] -[2026-06-07 02:12:55,750][324276] Saving new best policy, reward=31.476! -[2026-06-07 02:12:56,119][324563] Updated weights for policy 0, policy_version 4677 (0.0007) -[2026-06-07 02:12:56,358][324563] Updated weights for policy 0, policy_version 4688 (0.0008) -[2026-06-07 02:12:56,511][324276] Early stopping after 3 epochs (24 sgd steps), loss delta 0.0000007 -[2026-06-07 02:12:57,449][324563] Updated weights for policy 0, policy_version 4698 (0.0008) -[2026-06-07 02:12:57,679][324563] Updated weights for policy 0, policy_version 4709 (0.0006) -[2026-06-07 02:12:57,881][324563] Updated weights for policy 0, policy_version 4719 (0.0006) -[2026-06-07 02:12:58,097][324563] Updated weights for policy 0, policy_version 4729 (0.0007) -[2026-06-07 02:12:58,302][324563] Updated weights for policy 0, policy_version 4740 (0.0006) -[2026-06-07 02:12:58,516][324563] Updated weights for policy 0, policy_version 4751 (0.0006) -[2026-06-07 02:12:59,354][324563] Updated weights for policy 0, policy_version 4761 (0.0007) -[2026-06-07 02:12:59,571][324563] Updated weights for policy 0, policy_version 4771 (0.0006) -[2026-06-07 02:12:59,765][324563] Updated weights for policy 0, policy_version 4781 (0.0006) -[2026-06-07 02:12:59,979][324563] Updated weights for policy 0, policy_version 4791 (0.0006) -[2026-06-07 02:13:00,177][324563] Updated weights for policy 0, policy_version 4801 (0.0006) -[2026-06-07 02:13:00,403][324563] Updated weights for policy 0, policy_version 4812 (0.0006) -[2026-06-07 02:13:00,603][324563] Updated weights for policy 0, policy_version 4822 (0.0010) -[2026-06-07 02:13:00,744][321791] Fps is (10 sec: 19660.9, 60 sec: 16930.2, 300 sec: 16602.5). Total num frames: 2490368. Throughput: 0: 16876.1. Samples: 2511488. Policy #0 lag: (min: 2.0, avg: 51.0, max: 66.0) -[2026-06-07 02:13:00,745][321791] Avg episode reward: [(0, '34.385')] -[2026-06-07 02:13:00,750][324276] Saving new best policy, reward=34.385! -[2026-06-07 02:13:01,516][324563] Updated weights for policy 0, policy_version 4835 (0.0007) -[2026-06-07 02:13:01,723][324563] Updated weights for policy 0, policy_version 4845 (0.0006) -[2026-06-07 02:13:01,931][324563] Updated weights for policy 0, policy_version 4855 (0.0006) -[2026-06-07 02:13:02,147][324563] Updated weights for policy 0, policy_version 4865 (0.0006) -[2026-06-07 02:13:02,364][324563] Updated weights for policy 0, policy_version 4876 (0.0007) -[2026-06-07 02:13:02,609][324563] Updated weights for policy 0, policy_version 4888 (0.0006) -[2026-06-07 02:13:03,490][324563] Updated weights for policy 0, policy_version 4900 (0.0007) -[2026-06-07 02:13:03,684][324563] Updated weights for policy 0, policy_version 4910 (0.0009) -[2026-06-07 02:13:03,869][324563] Updated weights for policy 0, policy_version 4920 (0.0011) -[2026-06-07 02:13:04,113][324563] Updated weights for policy 0, policy_version 4931 (0.0011) -[2026-06-07 02:13:04,336][324563] Updated weights for policy 0, policy_version 4941 (0.0010) -[2026-06-07 02:13:04,533][324563] Updated weights for policy 0, policy_version 4952 (0.0010) -[2026-06-07 02:13:05,447][324563] Updated weights for policy 0, policy_version 4963 (0.0011) -[2026-06-07 02:13:05,632][324563] Updated weights for policy 0, policy_version 4973 (0.0010) -[2026-06-07 02:13:05,744][321791] Fps is (10 sec: 16384.0, 60 sec: 16384.0, 300 sec: 16489.7). Total num frames: 2555904. Throughput: 0: 16910.2. Samples: 2568448. Policy #0 lag: (min: 2.0, avg: 51.0, max: 66.0) -[2026-06-07 02:13:05,745][321791] Avg episode reward: [(0, '32.130')] -[2026-06-07 02:13:05,860][324563] Updated weights for policy 0, policy_version 4983 (0.0011) -[2026-06-07 02:13:06,047][324563] Updated weights for policy 0, policy_version 4993 (0.0011) -[2026-06-07 02:13:06,236][324563] Updated weights for policy 0, policy_version 5003 (0.0009) -[2026-06-07 02:13:06,441][324563] Updated weights for policy 0, policy_version 5013 (0.0007) -[2026-06-07 02:13:07,295][324563] Updated weights for policy 0, policy_version 5023 (0.0007) -[2026-06-07 02:13:07,494][324563] Updated weights for policy 0, policy_version 5033 (0.0009) -[2026-06-07 02:13:07,712][324563] Updated weights for policy 0, policy_version 5043 (0.0011) -[2026-06-07 02:13:07,921][324563] Updated weights for policy 0, policy_version 5053 (0.0011) -[2026-06-07 02:13:08,147][324563] Updated weights for policy 0, policy_version 5064 (0.0009) -[2026-06-07 02:13:08,376][324563] Updated weights for policy 0, policy_version 5075 (0.0006) -[2026-06-07 02:13:09,198][324563] Updated weights for policy 0, policy_version 5085 (0.0009) -[2026-06-07 02:13:09,416][324563] Updated weights for policy 0, policy_version 5096 (0.0011) -[2026-06-07 02:13:09,642][324563] Updated weights for policy 0, policy_version 5106 (0.0011) -[2026-06-07 02:13:09,895][324563] Updated weights for policy 0, policy_version 5118 (0.0011) -[2026-06-07 02:13:10,123][324563] Updated weights for policy 0, policy_version 5128 (0.0011) -[2026-06-07 02:13:10,348][324563] Updated weights for policy 0, policy_version 5139 (0.0009) -[2026-06-07 02:13:10,744][321791] Fps is (10 sec: 16383.8, 60 sec: 16930.1, 300 sec: 16588.8). Total num frames: 2654208. Throughput: 0: 17009.7. Samples: 2671232. Policy #0 lag: (min: 63.0, avg: 76.3, max: 127.0) -[2026-06-07 02:13:10,745][321791] Avg episode reward: [(0, '37.386')] -[2026-06-07 02:13:10,752][324276] Saving new best policy, reward=37.386! -[2026-06-07 02:13:11,156][324563] Updated weights for policy 0, policy_version 5149 (0.0006) -[2026-06-07 02:13:11,327][324563] Updated weights for policy 0, policy_version 5159 (0.0007) -[2026-06-07 02:13:11,552][324563] Updated weights for policy 0, policy_version 5170 (0.0007) -[2026-06-07 02:13:11,783][324563] Updated weights for policy 0, policy_version 5181 (0.0006) -[2026-06-07 02:13:12,009][324563] Updated weights for policy 0, policy_version 5192 (0.0006) -[2026-06-07 02:13:12,204][324563] Updated weights for policy 0, policy_version 5202 (0.0006) -[2026-06-07 02:13:13,061][324563] Updated weights for policy 0, policy_version 5213 (0.0008) -[2026-06-07 02:13:13,269][324563] Updated weights for policy 0, policy_version 5223 (0.0010) -[2026-06-07 02:13:13,523][324563] Updated weights for policy 0, policy_version 5235 (0.0008) -[2026-06-07 02:13:13,720][324563] Updated weights for policy 0, policy_version 5245 (0.0010) -[2026-06-07 02:13:13,942][324563] Updated weights for policy 0, policy_version 5256 (0.0008) -[2026-06-07 02:13:14,161][324563] Updated weights for policy 0, policy_version 5267 (0.0007) -[2026-06-07 02:13:14,990][324563] Updated weights for policy 0, policy_version 5277 (0.0007) -[2026-06-07 02:13:15,229][324563] Updated weights for policy 0, policy_version 5289 (0.0007) -[2026-06-07 02:13:15,453][324563] Updated weights for policy 0, policy_version 5299 (0.0009) -[2026-06-07 02:13:15,674][324563] Updated weights for policy 0, policy_version 5310 (0.0010) -[2026-06-07 02:13:15,744][321791] Fps is (10 sec: 16384.0, 60 sec: 16384.0, 300 sec: 16483.3). Total num frames: 2719744. Throughput: 0: 17046.8. Samples: 2772480. Policy #0 lag: (min: 60.0, avg: 73.4, max: 124.0) -[2026-06-07 02:13:15,745][321791] Avg episode reward: [(0, '45.594')] -[2026-06-07 02:13:15,867][324563] Updated weights for policy 0, policy_version 5320 (0.0007) -[2026-06-07 02:13:16,093][324563] Updated weights for policy 0, policy_version 5331 (0.0006) -[2026-06-07 02:13:16,174][324276] Saving new best policy, reward=45.594! -[2026-06-07 02:13:16,931][324563] Updated weights for policy 0, policy_version 5341 (0.0006) -[2026-06-07 02:13:17,132][324563] Updated weights for policy 0, policy_version 5351 (0.0007) -[2026-06-07 02:13:17,321][324563] Updated weights for policy 0, policy_version 5361 (0.0012) -[2026-06-07 02:13:17,525][324563] Updated weights for policy 0, policy_version 5371 (0.0011) -[2026-06-07 02:13:17,749][324563] Updated weights for policy 0, policy_version 5381 (0.0011) -[2026-06-07 02:13:17,941][324563] Updated weights for policy 0, policy_version 5391 (0.0011) -[2026-06-07 02:13:18,740][324563] Updated weights for policy 0, policy_version 5401 (0.0011) -[2026-06-07 02:13:18,976][324563] Updated weights for policy 0, policy_version 5411 (0.0007) -[2026-06-07 02:13:19,185][324563] Updated weights for policy 0, policy_version 5421 (0.0011) -[2026-06-07 02:13:19,385][324563] Updated weights for policy 0, policy_version 5431 (0.0011) -[2026-06-07 02:13:19,617][324563] Updated weights for policy 0, policy_version 5441 (0.0011) -[2026-06-07 02:13:19,832][324563] Updated weights for policy 0, policy_version 5451 (0.0011) -[2026-06-07 02:13:20,061][324563] Updated weights for policy 0, policy_version 5461 (0.0011) -[2026-06-07 02:13:20,744][321791] Fps is (10 sec: 16384.1, 60 sec: 16930.1, 300 sec: 16576.8). Total num frames: 2818048. Throughput: 0: 17354.4. Samples: 2828160. Policy #0 lag: (min: 60.0, avg: 73.4, max: 124.0) -[2026-06-07 02:13:20,745][321791] Avg episode reward: [(0, '51.743')] -[2026-06-07 02:13:20,831][324563] Updated weights for policy 0, policy_version 5471 (0.0008) -[2026-06-07 02:13:21,037][324563] Updated weights for policy 0, policy_version 5481 (0.0006) -[2026-06-07 02:13:21,227][324563] Updated weights for policy 0, policy_version 5491 (0.0006) -[2026-06-07 02:13:21,428][324563] Updated weights for policy 0, policy_version 5501 (0.0006) -[2026-06-07 02:13:21,634][324563] Updated weights for policy 0, policy_version 5511 (0.0006) -[2026-06-07 02:13:21,829][324563] Updated weights for policy 0, policy_version 5521 (0.0006) -[2026-06-07 02:13:21,964][324276] Saving new best policy, reward=51.743! -[2026-06-07 02:13:22,652][324563] Updated weights for policy 0, policy_version 5531 (0.0007) -[2026-06-07 02:13:22,859][324563] Updated weights for policy 0, policy_version 5541 (0.0006) -[2026-06-07 02:13:23,080][324563] Updated weights for policy 0, policy_version 5552 (0.0006) -[2026-06-07 02:13:23,313][324563] Updated weights for policy 0, policy_version 5562 (0.0006) -[2026-06-07 02:13:23,512][324563] Updated weights for policy 0, policy_version 5572 (0.0006) -[2026-06-07 02:13:23,745][324563] Updated weights for policy 0, policy_version 5583 (0.0006) -[2026-06-07 02:13:24,605][324563] Updated weights for policy 0, policy_version 5594 (0.0006) -[2026-06-07 02:13:24,808][324563] Updated weights for policy 0, policy_version 5604 (0.0006) -[2026-06-07 02:13:25,005][324563] Updated weights for policy 0, policy_version 5614 (0.0007) -[2026-06-07 02:13:25,216][324563] Updated weights for policy 0, policy_version 5624 (0.0006) -[2026-06-07 02:13:25,429][324563] Updated weights for policy 0, policy_version 5634 (0.0006) -[2026-06-07 02:13:25,663][324563] Updated weights for policy 0, policy_version 5645 (0.0007) -[2026-06-07 02:13:25,744][321791] Fps is (10 sec: 16384.0, 60 sec: 16930.1, 300 sec: 16477.6). Total num frames: 2883584. Throughput: 0: 17442.1. Samples: 2931328. Policy #0 lag: (min: 63.0, avg: 77.3, max: 127.0) -[2026-06-07 02:13:25,745][321791] Avg episode reward: [(0, '60.733')] -[2026-06-07 02:13:25,872][324563] Updated weights for policy 0, policy_version 5655 (0.0006) -[2026-06-07 02:13:25,882][324276] Saving new best policy, reward=60.733! -[2026-06-07 02:13:26,696][324563] Updated weights for policy 0, policy_version 5665 (0.0006) -[2026-06-07 02:13:26,914][324563] Updated weights for policy 0, policy_version 5676 (0.0006) -[2026-06-07 02:13:27,129][324563] Updated weights for policy 0, policy_version 5687 (0.0007) -[2026-06-07 02:13:27,332][324563] Updated weights for policy 0, policy_version 5697 (0.0006) -[2026-06-07 02:13:27,533][324563] Updated weights for policy 0, policy_version 5707 (0.0006) -[2026-06-07 02:13:27,733][324563] Updated weights for policy 0, policy_version 5717 (0.0007) -[2026-06-07 02:13:28,499][324563] Updated weights for policy 0, policy_version 5727 (0.0006) -[2026-06-07 02:13:28,708][324563] Updated weights for policy 0, policy_version 5737 (0.0006) -[2026-06-07 02:13:28,936][324563] Updated weights for policy 0, policy_version 5748 (0.0006) -[2026-06-07 02:13:29,164][324563] Updated weights for policy 0, policy_version 5759 (0.0007) -[2026-06-07 02:13:29,380][324563] Updated weights for policy 0, policy_version 5769 (0.0006) -[2026-06-07 02:13:29,585][324563] Updated weights for policy 0, policy_version 5779 (0.0006) -[2026-06-07 02:13:30,345][324563] Updated weights for policy 0, policy_version 5789 (0.0006) -[2026-06-07 02:13:30,609][324563] Updated weights for policy 0, policy_version 5800 (0.0006) -[2026-06-07 02:13:30,744][321791] Fps is (10 sec: 16384.0, 60 sec: 16930.1, 300 sec: 16566.1). Total num frames: 2981888. Throughput: 0: 17473.4. Samples: 3032960. Policy #0 lag: (min: 63.0, avg: 76.9, max: 127.0) -[2026-06-07 02:13:30,745][321791] Avg episode reward: [(0, '63.845')] -[2026-06-07 02:13:30,793][324563] Updated weights for policy 0, policy_version 5810 (0.0007) -[2026-06-07 02:13:31,010][324563] Updated weights for policy 0, policy_version 5821 (0.0006) -[2026-06-07 02:13:31,195][324563] Updated weights for policy 0, policy_version 5831 (0.0007) -[2026-06-07 02:13:31,409][324563] Updated weights for policy 0, policy_version 5841 (0.0007) -[2026-06-07 02:13:31,547][324276] Saving new best policy, reward=63.845! -[2026-06-07 02:13:32,216][324563] Updated weights for policy 0, policy_version 5851 (0.0007) -[2026-06-07 02:13:32,425][324563] Updated weights for policy 0, policy_version 5861 (0.0007) -[2026-06-07 02:13:32,639][324563] Updated weights for policy 0, policy_version 5871 (0.0006) -[2026-06-07 02:13:32,847][324563] Updated weights for policy 0, policy_version 5881 (0.0006) -[2026-06-07 02:13:33,070][324563] Updated weights for policy 0, policy_version 5891 (0.0007) -[2026-06-07 02:13:33,266][324563] Updated weights for policy 0, policy_version 5901 (0.0007) -[2026-06-07 02:13:33,493][324563] Updated weights for policy 0, policy_version 5911 (0.0006) -[2026-06-07 02:13:34,265][324563] Updated weights for policy 0, policy_version 5921 (0.0006) -[2026-06-07 02:13:34,485][324563] Updated weights for policy 0, policy_version 5931 (0.0007) -[2026-06-07 02:13:34,699][324563] Updated weights for policy 0, policy_version 5941 (0.0008) -[2026-06-07 02:13:34,912][324563] Updated weights for policy 0, policy_version 5951 (0.0011) -[2026-06-07 02:13:35,109][324563] Updated weights for policy 0, policy_version 5961 (0.0007) -[2026-06-07 02:13:35,290][324563] Updated weights for policy 0, policy_version 5971 (0.0009) -[2026-06-07 02:13:35,744][321791] Fps is (10 sec: 19660.7, 60 sec: 17476.6, 300 sec: 16649.7). Total num frames: 3080192. Throughput: 0: 17171.9. Samples: 3076736. Policy #0 lag: (min: 63.0, avg: 76.9, max: 127.0) -[2026-06-07 02:13:35,745][321791] Avg episode reward: [(0, '63.597')] -[2026-06-07 02:13:36,081][324563] Updated weights for policy 0, policy_version 5981 (0.0010) -[2026-06-07 02:13:36,301][324563] Updated weights for policy 0, policy_version 5992 (0.0009) -[2026-06-07 02:13:36,507][324563] Updated weights for policy 0, policy_version 6002 (0.0008) -[2026-06-07 02:13:36,711][324563] Updated weights for policy 0, policy_version 6012 (0.0006) -[2026-06-07 02:13:36,923][324563] Updated weights for policy 0, policy_version 6022 (0.0006) -[2026-06-07 02:13:37,112][324563] Updated weights for policy 0, policy_version 6032 (0.0007) -[2026-06-07 02:13:37,978][324563] Updated weights for policy 0, policy_version 6044 (0.0006) -[2026-06-07 02:13:38,229][324563] Updated weights for policy 0, policy_version 6055 (0.0006) -[2026-06-07 02:13:38,455][324563] Updated weights for policy 0, policy_version 6066 (0.0006) -[2026-06-07 02:13:38,655][324563] Updated weights for policy 0, policy_version 6076 (0.0006) -[2026-06-07 02:13:38,903][324563] Updated weights for policy 0, policy_version 6087 (0.0006) -[2026-06-07 02:13:39,094][324563] Updated weights for policy 0, policy_version 6097 (0.0007) -[2026-06-07 02:13:39,847][324563] Updated weights for policy 0, policy_version 6107 (0.0007) -[2026-06-07 02:13:40,045][324563] Updated weights for policy 0, policy_version 6117 (0.0007) -[2026-06-07 02:13:40,271][324563] Updated weights for policy 0, policy_version 6127 (0.0007) -[2026-06-07 02:13:40,498][324563] Updated weights for policy 0, policy_version 6138 (0.0006) -[2026-06-07 02:13:40,728][324563] Updated weights for policy 0, policy_version 6149 (0.0006) -[2026-06-07 02:13:40,744][321791] Fps is (10 sec: 16383.8, 60 sec: 16930.1, 300 sec: 16556.5). Total num frames: 3145728. Throughput: 0: 17405.1. Samples: 3186816. Policy #0 lag: (min: 54.0, avg: 68.7, max: 118.0) -[2026-06-07 02:13:40,745][321791] Avg episode reward: [(0, '63.794')] -[2026-06-07 02:13:40,938][324563] Updated weights for policy 0, policy_version 6159 (0.0006) -[2026-06-07 02:13:41,727][324563] Updated weights for policy 0, policy_version 6169 (0.0007) -[2026-06-07 02:13:41,919][324563] Updated weights for policy 0, policy_version 6179 (0.0006) -[2026-06-07 02:13:42,146][324563] Updated weights for policy 0, policy_version 6189 (0.0006) -[2026-06-07 02:13:42,336][324563] Updated weights for policy 0, policy_version 6199 (0.0006) -[2026-06-07 02:13:42,573][324563] Updated weights for policy 0, policy_version 6210 (0.0006) -[2026-06-07 02:13:42,825][324563] Updated weights for policy 0, policy_version 6221 (0.0006) -[2026-06-07 02:13:43,041][324563] Updated weights for policy 0, policy_version 6231 (0.0006) -[2026-06-07 02:13:43,863][324563] Updated weights for policy 0, policy_version 6242 (0.0007) -[2026-06-07 02:13:44,069][324563] Updated weights for policy 0, policy_version 6252 (0.0006) -[2026-06-07 02:13:44,288][324563] Updated weights for policy 0, policy_version 6263 (0.0007) -[2026-06-07 02:13:44,505][324563] Updated weights for policy 0, policy_version 6274 (0.0007) -[2026-06-07 02:13:44,711][324563] Updated weights for policy 0, policy_version 6284 (0.0006) -[2026-06-07 02:13:44,912][324563] Updated weights for policy 0, policy_version 6294 (0.0007) -[2026-06-07 02:13:45,665][324563] Updated weights for policy 0, policy_version 6304 (0.0007) -[2026-06-07 02:13:45,744][321791] Fps is (10 sec: 16383.8, 60 sec: 16930.1, 300 sec: 16636.1). Total num frames: 3244032. Throughput: 0: 17348.2. Samples: 3292160. Policy #0 lag: (min: 36.0, avg: 50.5, max: 100.0) -[2026-06-07 02:13:45,746][321791] Avg episode reward: [(0, '76.906')] -[2026-06-07 02:13:45,885][324563] Updated weights for policy 0, policy_version 6315 (0.0007) -[2026-06-07 02:13:46,122][324563] Updated weights for policy 0, policy_version 6326 (0.0007) -[2026-06-07 02:13:46,359][324563] Updated weights for policy 0, policy_version 6337 (0.0007) -[2026-06-07 02:13:46,589][324563] Updated weights for policy 0, policy_version 6348 (0.0006) -[2026-06-07 02:13:46,793][324563] Updated weights for policy 0, policy_version 6358 (0.0007) -[2026-06-07 02:13:46,840][324276] Saving new best policy, reward=76.906! -[2026-06-07 02:13:47,640][324563] Updated weights for policy 0, policy_version 6368 (0.0007) -[2026-06-07 02:13:47,851][324563] Updated weights for policy 0, policy_version 6378 (0.0007) -[2026-06-07 02:13:48,083][324563] Updated weights for policy 0, policy_version 6388 (0.0007) -[2026-06-07 02:13:48,295][324563] Updated weights for policy 0, policy_version 6398 (0.0007) -[2026-06-07 02:13:48,500][324563] Updated weights for policy 0, policy_version 6409 (0.0006) -[2026-06-07 02:13:48,717][324563] Updated weights for policy 0, policy_version 6419 (0.0006) -[2026-06-07 02:13:49,506][324563] Updated weights for policy 0, policy_version 6429 (0.0006) -[2026-06-07 02:13:49,756][324563] Updated weights for policy 0, policy_version 6441 (0.0007) -[2026-06-07 02:13:49,954][324563] Updated weights for policy 0, policy_version 6451 (0.0007) -[2026-06-07 02:13:50,168][324563] Updated weights for policy 0, policy_version 6461 (0.0007) -[2026-06-07 02:13:50,375][324563] Updated weights for policy 0, policy_version 6471 (0.0006) -[2026-06-07 02:13:50,582][324563] Updated weights for policy 0, policy_version 6481 (0.0007) -[2026-06-07 02:13:50,744][321791] Fps is (10 sec: 19661.2, 60 sec: 17476.3, 300 sec: 16711.7). Total num frames: 3342336. Throughput: 0: 17049.6. Samples: 3335680. Policy #0 lag: (min: 36.0, avg: 50.5, max: 100.0) -[2026-06-07 02:13:50,745][321791] Avg episode reward: [(0, '88.595')] -[2026-06-07 02:13:50,750][324276] Saving new best policy, reward=88.595! -[2026-06-07 02:13:51,380][324563] Updated weights for policy 0, policy_version 6491 (0.0006) -[2026-06-07 02:13:51,575][324563] Updated weights for policy 0, policy_version 6501 (0.0007) -[2026-06-07 02:13:51,779][324563] Updated weights for policy 0, policy_version 6511 (0.0006) -[2026-06-07 02:13:51,969][324563] Updated weights for policy 0, policy_version 6521 (0.0006) -[2026-06-07 02:13:52,187][324563] Updated weights for policy 0, policy_version 6531 (0.0006) -[2026-06-07 02:13:52,395][324563] Updated weights for policy 0, policy_version 6541 (0.0006) -[2026-06-07 02:13:52,617][324563] Updated weights for policy 0, policy_version 6551 (0.0006) -[2026-06-07 02:13:53,406][324563] Updated weights for policy 0, policy_version 6561 (0.0006) -[2026-06-07 02:13:53,620][324563] Updated weights for policy 0, policy_version 6571 (0.0006) -[2026-06-07 02:13:53,853][324563] Updated weights for policy 0, policy_version 6582 (0.0006) -[2026-06-07 02:13:54,057][324563] Updated weights for policy 0, policy_version 6592 (0.0006) -[2026-06-07 02:13:54,269][324563] Updated weights for policy 0, policy_version 6602 (0.0006) -[2026-06-07 02:13:54,480][324563] Updated weights for policy 0, policy_version 6612 (0.0006) -[2026-06-07 02:13:55,254][324563] Updated weights for policy 0, policy_version 6622 (0.0008) -[2026-06-07 02:13:55,477][324563] Updated weights for policy 0, policy_version 6632 (0.0011) -[2026-06-07 02:13:55,702][324563] Updated weights for policy 0, policy_version 6643 (0.0010) -[2026-06-07 02:13:55,744][321791] Fps is (10 sec: 16384.1, 60 sec: 16930.1, 300 sec: 16623.8). Total num frames: 3407872. Throughput: 0: 17021.2. Samples: 3437184. Policy #0 lag: (min: 38.0, avg: 51.8, max: 102.0) -[2026-06-07 02:13:55,745][321791] Avg episode reward: [(0, '76.996')] -[2026-06-07 02:13:55,957][324563] Updated weights for policy 0, policy_version 6654 (0.0010) -[2026-06-07 02:13:56,156][324563] Updated weights for policy 0, policy_version 6664 (0.0008) -[2026-06-07 02:13:56,383][324563] Updated weights for policy 0, policy_version 6674 (0.0006) -[2026-06-07 02:13:57,166][324563] Updated weights for policy 0, policy_version 6684 (0.0006) -[2026-06-07 02:13:57,386][324563] Updated weights for policy 0, policy_version 6694 (0.0006) -[2026-06-07 02:13:57,592][324563] Updated weights for policy 0, policy_version 6705 (0.0006) -[2026-06-07 02:13:57,782][324563] Updated weights for policy 0, policy_version 6715 (0.0007) -[2026-06-07 02:13:58,000][324563] Updated weights for policy 0, policy_version 6726 (0.0006) -[2026-06-07 02:13:58,212][324563] Updated weights for policy 0, policy_version 6736 (0.0006) -[2026-06-07 02:13:58,999][324563] Updated weights for policy 0, policy_version 6746 (0.0007) -[2026-06-07 02:13:59,230][324563] Updated weights for policy 0, policy_version 6756 (0.0007) -[2026-06-07 02:13:59,421][324563] Updated weights for policy 0, policy_version 6766 (0.0006) -[2026-06-07 02:13:59,619][324563] Updated weights for policy 0, policy_version 6776 (0.0007) -[2026-06-07 02:13:59,821][324563] Updated weights for policy 0, policy_version 6786 (0.0006) -[2026-06-07 02:14:00,032][324563] Updated weights for policy 0, policy_version 6796 (0.0006) -[2026-06-07 02:14:00,235][324563] Updated weights for policy 0, policy_version 6806 (0.0006) -[2026-06-07 02:14:00,744][321791] Fps is (10 sec: 16383.9, 60 sec: 16930.1, 300 sec: 16696.1). Total num frames: 3506176. Throughput: 0: 17174.7. Samples: 3545344. Policy #0 lag: (min: 63.0, avg: 78.4, max: 127.0) -[2026-06-07 02:14:00,745][321791] Avg episode reward: [(0, '81.727')] -[2026-06-07 02:14:01,025][324563] Updated weights for policy 0, policy_version 6816 (0.0006) -[2026-06-07 02:14:01,212][324563] Updated weights for policy 0, policy_version 6826 (0.0006) -[2026-06-07 02:14:01,438][324563] Updated weights for policy 0, policy_version 6837 (0.0007) -[2026-06-07 02:14:01,643][324563] Updated weights for policy 0, policy_version 6847 (0.0006) -[2026-06-07 02:14:01,845][324563] Updated weights for policy 0, policy_version 6857 (0.0006) -[2026-06-07 02:14:02,052][324563] Updated weights for policy 0, policy_version 6867 (0.0007) -[2026-06-07 02:14:02,812][324563] Updated weights for policy 0, policy_version 6877 (0.0006) -[2026-06-07 02:14:03,028][324563] Updated weights for policy 0, policy_version 6887 (0.0006) -[2026-06-07 02:14:03,247][324563] Updated weights for policy 0, policy_version 6897 (0.0007) -[2026-06-07 02:14:03,473][324563] Updated weights for policy 0, policy_version 6908 (0.0006) -[2026-06-07 02:14:03,693][324563] Updated weights for policy 0, policy_version 6918 (0.0006) -[2026-06-07 02:14:03,888][324563] Updated weights for policy 0, policy_version 6928 (0.0006) -[2026-06-07 02:14:04,702][324563] Updated weights for policy 0, policy_version 6939 (0.0006) -[2026-06-07 02:14:04,942][324563] Updated weights for policy 0, policy_version 6950 (0.0006) -[2026-06-07 02:14:05,151][324563] Updated weights for policy 0, policy_version 6960 (0.0006) -[2026-06-07 02:14:05,380][324563] Updated weights for policy 0, policy_version 6970 (0.0006) -[2026-06-07 02:14:05,583][324563] Updated weights for policy 0, policy_version 6980 (0.0007) -[2026-06-07 02:14:05,744][321791] Fps is (10 sec: 16384.1, 60 sec: 16930.1, 300 sec: 16612.6). Total num frames: 3571712. Throughput: 0: 17055.3. Samples: 3595648. Policy #0 lag: (min: 63.0, avg: 78.4, max: 127.0) -[2026-06-07 02:14:05,745][321791] Avg episode reward: [(0, '92.975')] -[2026-06-07 02:14:05,779][324563] Updated weights for policy 0, policy_version 6990 (0.0007) -[2026-06-07 02:14:05,987][324276] Saving new best policy, reward=92.975! -[2026-06-07 02:14:06,563][324563] Updated weights for policy 0, policy_version 7001 (0.0007) -[2026-06-07 02:14:06,763][324563] Updated weights for policy 0, policy_version 7011 (0.0006) -[2026-06-07 02:14:06,982][324563] Updated weights for policy 0, policy_version 7021 (0.0007) -[2026-06-07 02:14:07,206][324563] Updated weights for policy 0, policy_version 7032 (0.0006) -[2026-06-07 02:14:07,429][324563] Updated weights for policy 0, policy_version 7042 (0.0006) -[2026-06-07 02:14:07,617][324563] Updated weights for policy 0, policy_version 7052 (0.0006) -[2026-06-07 02:14:07,837][324563] Updated weights for policy 0, policy_version 7062 (0.0006) -[2026-06-07 02:14:08,587][324563] Updated weights for policy 0, policy_version 7072 (0.0006) -[2026-06-07 02:14:08,818][324563] Updated weights for policy 0, policy_version 7083 (0.0006) -[2026-06-07 02:14:09,036][324563] Updated weights for policy 0, policy_version 7093 (0.0007) -[2026-06-07 02:14:09,238][324563] Updated weights for policy 0, policy_version 7103 (0.0006) -[2026-06-07 02:14:09,434][324563] Updated weights for policy 0, policy_version 7113 (0.0006) -[2026-06-07 02:14:09,645][324563] Updated weights for policy 0, policy_version 7124 (0.0007) -[2026-06-07 02:14:10,455][324563] Updated weights for policy 0, policy_version 7134 (0.0007) -[2026-06-07 02:14:10,656][324563] Updated weights for policy 0, policy_version 7144 (0.0007) -[2026-06-07 02:14:10,744][321791] Fps is (10 sec: 16384.1, 60 sec: 16930.2, 300 sec: 16681.9). Total num frames: 3670016. Throughput: 0: 17004.1. Samples: 3696512. Policy #0 lag: (min: 63.0, avg: 78.9, max: 127.0) -[2026-06-07 02:14:10,745][321791] Avg episode reward: [(0, '89.082')] -[2026-06-07 02:14:10,862][324563] Updated weights for policy 0, policy_version 7154 (0.0007) -[2026-06-07 02:14:11,098][324563] Updated weights for policy 0, policy_version 7165 (0.0007) -[2026-06-07 02:14:11,308][324563] Updated weights for policy 0, policy_version 7175 (0.0007) -[2026-06-07 02:14:11,507][324563] Updated weights for policy 0, policy_version 7185 (0.0007) -[2026-06-07 02:14:12,321][324563] Updated weights for policy 0, policy_version 7195 (0.0007) -[2026-06-07 02:14:12,518][324563] Updated weights for policy 0, policy_version 7205 (0.0007) -[2026-06-07 02:14:12,704][324563] Updated weights for policy 0, policy_version 7215 (0.0007) -[2026-06-07 02:14:12,883][324563] Updated weights for policy 0, policy_version 7225 (0.0007) -[2026-06-07 02:14:13,129][324563] Updated weights for policy 0, policy_version 7237 (0.0007) -[2026-06-07 02:14:13,363][324563] Updated weights for policy 0, policy_version 7248 (0.0011) -[2026-06-07 02:14:14,154][324563] Updated weights for policy 0, policy_version 7258 (0.0010) -[2026-06-07 02:14:14,363][324563] Updated weights for policy 0, policy_version 7268 (0.0010) -[2026-06-07 02:14:14,566][324563] Updated weights for policy 0, policy_version 7278 (0.0010) -[2026-06-07 02:14:14,753][324563] Updated weights for policy 0, policy_version 7288 (0.0009) -[2026-06-07 02:14:14,964][324563] Updated weights for policy 0, policy_version 7298 (0.0006) -[2026-06-07 02:14:15,182][324563] Updated weights for policy 0, policy_version 7308 (0.0006) -[2026-06-07 02:14:15,403][324563] Updated weights for policy 0, policy_version 7318 (0.0006) -[2026-06-07 02:14:15,744][321791] Fps is (10 sec: 19660.9, 60 sec: 17476.3, 300 sec: 16748.1). Total num frames: 3768320. Throughput: 0: 17052.5. Samples: 3800320. Policy #0 lag: (min: 63.0, avg: 77.0, max: 127.0) -[2026-06-07 02:14:15,745][321791] Avg episode reward: [(0, '97.443')] -[2026-06-07 02:14:15,751][324276] Saving new best policy, reward=97.443! -[2026-06-07 02:14:16,186][324563] Updated weights for policy 0, policy_version 7328 (0.0009) -[2026-06-07 02:14:16,412][324563] Updated weights for policy 0, policy_version 7339 (0.0011) -[2026-06-07 02:14:16,605][324563] Updated weights for policy 0, policy_version 7349 (0.0010) -[2026-06-07 02:14:16,814][324563] Updated weights for policy 0, policy_version 7359 (0.0011) -[2026-06-07 02:14:17,023][324563] Updated weights for policy 0, policy_version 7369 (0.0011) -[2026-06-07 02:14:17,259][324563] Updated weights for policy 0, policy_version 7380 (0.0011) -[2026-06-07 02:14:18,006][324563] Updated weights for policy 0, policy_version 7390 (0.0008) -[2026-06-07 02:14:18,232][324563] Updated weights for policy 0, policy_version 7401 (0.0006) -[2026-06-07 02:14:18,442][324563] Updated weights for policy 0, policy_version 7411 (0.0007) -[2026-06-07 02:14:18,644][324563] Updated weights for policy 0, policy_version 7421 (0.0006) -[2026-06-07 02:14:18,887][324563] Updated weights for policy 0, policy_version 7433 (0.0006) -[2026-06-07 02:14:19,111][324563] Updated weights for policy 0, policy_version 7444 (0.0006) -[2026-06-07 02:14:19,888][324563] Updated weights for policy 0, policy_version 7454 (0.0007) -[2026-06-07 02:14:20,090][324563] Updated weights for policy 0, policy_version 7464 (0.0006) -[2026-06-07 02:14:20,297][324563] Updated weights for policy 0, policy_version 7474 (0.0006) -[2026-06-07 02:14:20,543][324563] Updated weights for policy 0, policy_version 7485 (0.0006) -[2026-06-07 02:14:20,727][324563] Updated weights for policy 0, policy_version 7495 (0.0007) -[2026-06-07 02:14:20,744][321791] Fps is (10 sec: 16383.9, 60 sec: 16930.1, 300 sec: 16668.9). Total num frames: 3833856. Throughput: 0: 17328.4. Samples: 3856512. Policy #0 lag: (min: 63.0, avg: 77.0, max: 127.0) -[2026-06-07 02:14:20,745][321791] Avg episode reward: [(0, '81.574')] -[2026-06-07 02:14:20,928][324563] Updated weights for policy 0, policy_version 7505 (0.0007) -[2026-06-07 02:14:21,705][324563] Updated weights for policy 0, policy_version 7515 (0.0006) -[2026-06-07 02:14:21,956][324563] Updated weights for policy 0, policy_version 7527 (0.0006) -[2026-06-07 02:14:22,152][324563] Updated weights for policy 0, policy_version 7537 (0.0007) -[2026-06-07 02:14:22,395][324563] Updated weights for policy 0, policy_version 7548 (0.0006) -[2026-06-07 02:14:22,576][324563] Updated weights for policy 0, policy_version 7558 (0.0006) -[2026-06-07 02:14:22,761][324563] Updated weights for policy 0, policy_version 7568 (0.0006) -[2026-06-07 02:14:23,573][324563] Updated weights for policy 0, policy_version 7579 (0.0007) -[2026-06-07 02:14:23,793][324563] Updated weights for policy 0, policy_version 7589 (0.0006) -[2026-06-07 02:14:23,987][324563] Updated weights for policy 0, policy_version 7599 (0.0007) -[2026-06-07 02:14:24,205][324563] Updated weights for policy 0, policy_version 7609 (0.0006) -[2026-06-07 02:14:24,413][324563] Updated weights for policy 0, policy_version 7619 (0.0007) -[2026-06-07 02:14:24,623][324563] Updated weights for policy 0, policy_version 7629 (0.0006) -[2026-06-07 02:14:24,835][324563] Updated weights for policy 0, policy_version 7639 (0.0007) -[2026-06-07 02:14:25,588][324563] Updated weights for policy 0, policy_version 7650 (0.0007) -[2026-06-07 02:14:25,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 16732.6). Total num frames: 3932160. Throughput: 0: 17163.5. Samples: 3959168. Policy #0 lag: (min: 12.0, avg: 26.1, max: 76.0) -[2026-06-07 02:14:25,745][321791] Avg episode reward: [(0, '104.546')] -[2026-06-07 02:14:25,801][324563] Updated weights for policy 0, policy_version 7660 (0.0006) -[2026-06-07 02:14:26,022][324563] Updated weights for policy 0, policy_version 7670 (0.0006) -[2026-06-07 02:14:26,217][324563] Updated weights for policy 0, policy_version 7680 (0.0006) -[2026-06-07 02:14:26,416][324563] Updated weights for policy 0, policy_version 7690 (0.0007) -[2026-06-07 02:14:26,635][324563] Updated weights for policy 0, policy_version 7700 (0.0008) -[2026-06-07 02:14:26,710][324276] Saving new best policy, reward=104.546! -[2026-06-07 02:14:27,479][324563] Updated weights for policy 0, policy_version 7711 (0.0007) -[2026-06-07 02:14:27,670][324563] Updated weights for policy 0, policy_version 7721 (0.0006) -[2026-06-07 02:14:27,890][324563] Updated weights for policy 0, policy_version 7731 (0.0007) -[2026-06-07 02:14:28,088][324563] Updated weights for policy 0, policy_version 7741 (0.0006) -[2026-06-07 02:14:28,299][324563] Updated weights for policy 0, policy_version 7751 (0.0007) -[2026-06-07 02:14:28,480][324563] Updated weights for policy 0, policy_version 7761 (0.0006) -[2026-06-07 02:14:29,255][324563] Updated weights for policy 0, policy_version 7771 (0.0007) -[2026-06-07 02:14:29,496][324563] Updated weights for policy 0, policy_version 7782 (0.0006) -[2026-06-07 02:14:29,688][324563] Updated weights for policy 0, policy_version 7792 (0.0006) -[2026-06-07 02:14:29,900][324563] Updated weights for policy 0, policy_version 7802 (0.0007) -[2026-06-07 02:14:30,113][324563] Updated weights for policy 0, policy_version 7812 (0.0006) -[2026-06-07 02:14:30,311][324563] Updated weights for policy 0, policy_version 7822 (0.0006) -[2026-06-07 02:14:30,505][324563] Updated weights for policy 0, policy_version 7832 (0.0007) -[2026-06-07 02:14:30,744][321791] Fps is (10 sec: 19660.9, 60 sec: 17476.3, 300 sec: 16793.6). Total num frames: 4030464. Throughput: 0: 17044.0. Samples: 4059136. Policy #0 lag: (min: 12.0, avg: 26.1, max: 76.0) -[2026-06-07 02:14:30,745][321791] Avg episode reward: [(0, '112.538')] -[2026-06-07 02:14:30,751][324276] Saving new best policy, reward=112.538! -[2026-06-07 02:14:31,338][324563] Updated weights for policy 0, policy_version 7842 (0.0006) -[2026-06-07 02:14:31,570][324563] Updated weights for policy 0, policy_version 7853 (0.0006) -[2026-06-07 02:14:31,775][324563] Updated weights for policy 0, policy_version 7863 (0.0006) -[2026-06-07 02:14:31,998][324563] Updated weights for policy 0, policy_version 7874 (0.0007) -[2026-06-07 02:14:32,196][324563] Updated weights for policy 0, policy_version 7884 (0.0007) -[2026-06-07 02:14:32,394][324563] Updated weights for policy 0, policy_version 7894 (0.0007) -[2026-06-07 02:14:33,169][324563] Updated weights for policy 0, policy_version 7904 (0.0007) -[2026-06-07 02:14:33,401][324563] Updated weights for policy 0, policy_version 7914 (0.0007) -[2026-06-07 02:14:33,599][324563] Updated weights for policy 0, policy_version 7924 (0.0006) -[2026-06-07 02:14:33,808][324563] Updated weights for policy 0, policy_version 7934 (0.0006) -[2026-06-07 02:14:34,036][324563] Updated weights for policy 0, policy_version 7945 (0.0006) -[2026-06-07 02:14:34,245][324563] Updated weights for policy 0, policy_version 7955 (0.0006) -[2026-06-07 02:14:34,968][324563] Updated weights for policy 0, policy_version 7965 (0.0006) -[2026-06-07 02:14:35,193][324563] Updated weights for policy 0, policy_version 7975 (0.0006) -[2026-06-07 02:14:35,416][324563] Updated weights for policy 0, policy_version 7986 (0.0006) -[2026-06-07 02:14:35,666][324563] Updated weights for policy 0, policy_version 7998 (0.0007) -[2026-06-07 02:14:35,744][321791] Fps is (10 sec: 16384.0, 60 sec: 16930.1, 300 sec: 16718.4). Total num frames: 4096000. Throughput: 0: 17379.6. Samples: 4117760. Policy #0 lag: (min: 63.0, avg: 76.0, max: 127.0) -[2026-06-07 02:14:35,745][321791] Avg episode reward: [(0, '109.880')] -[2026-06-07 02:14:35,878][324563] Updated weights for policy 0, policy_version 8008 (0.0007) -[2026-06-07 02:14:36,073][324563] Updated weights for policy 0, policy_version 8018 (0.0006) -[2026-06-07 02:14:36,826][324563] Updated weights for policy 0, policy_version 8028 (0.0006) -[2026-06-07 02:14:37,063][324563] Updated weights for policy 0, policy_version 8039 (0.0006) -[2026-06-07 02:14:37,279][324563] Updated weights for policy 0, policy_version 8049 (0.0006) -[2026-06-07 02:14:37,512][324563] Updated weights for policy 0, policy_version 8061 (0.0006) -[2026-06-07 02:14:37,760][324563] Updated weights for policy 0, policy_version 8072 (0.0006) -[2026-06-07 02:14:37,959][324563] Updated weights for policy 0, policy_version 8082 (0.0007) -[2026-06-07 02:14:38,753][324563] Updated weights for policy 0, policy_version 8092 (0.0007) -[2026-06-07 02:14:38,984][324563] Updated weights for policy 0, policy_version 8103 (0.0007) -[2026-06-07 02:14:39,190][324563] Updated weights for policy 0, policy_version 8113 (0.0006) -[2026-06-07 02:14:39,410][324563] Updated weights for policy 0, policy_version 8123 (0.0006) -[2026-06-07 02:14:39,615][324563] Updated weights for policy 0, policy_version 8133 (0.0006) -[2026-06-07 02:14:39,820][324563] Updated weights for policy 0, policy_version 8143 (0.0006) -[2026-06-07 02:14:40,585][324563] Updated weights for policy 0, policy_version 8153 (0.0006) -[2026-06-07 02:14:40,744][321791] Fps is (10 sec: 16383.9, 60 sec: 17476.3, 300 sec: 16777.2). Total num frames: 4194304. Throughput: 0: 17362.5. Samples: 4218496. Policy #0 lag: (min: 63.0, avg: 76.0, max: 127.0) -[2026-06-07 02:14:40,745][321791] Avg episode reward: [(0, '123.425')] -[2026-06-07 02:14:40,782][324563] Updated weights for policy 0, policy_version 8163 (0.0006) -[2026-06-07 02:14:40,956][324563] Updated weights for policy 0, policy_version 8173 (0.0006) -[2026-06-07 02:14:41,169][324563] Updated weights for policy 0, policy_version 8183 (0.0006) -[2026-06-07 02:14:41,384][324563] Updated weights for policy 0, policy_version 8193 (0.0006) -[2026-06-07 02:14:41,603][324563] Updated weights for policy 0, policy_version 8203 (0.0006) -[2026-06-07 02:14:41,808][324563] Updated weights for policy 0, policy_version 8213 (0.0006) -[2026-06-07 02:14:41,878][324276] Saving new best policy, reward=123.425! -[2026-06-07 02:14:42,616][324563] Updated weights for policy 0, policy_version 8223 (0.0008) -[2026-06-07 02:14:42,842][324563] Updated weights for policy 0, policy_version 8234 (0.0007) -[2026-06-07 02:14:43,065][324563] Updated weights for policy 0, policy_version 8244 (0.0006) -[2026-06-07 02:14:43,286][324563] Updated weights for policy 0, policy_version 8254 (0.0006) -[2026-06-07 02:14:43,498][324563] Updated weights for policy 0, policy_version 8264 (0.0006) -[2026-06-07 02:14:43,685][324563] Updated weights for policy 0, policy_version 8274 (0.0006) -[2026-06-07 02:14:44,440][324563] Updated weights for policy 0, policy_version 8285 (0.0009) -[2026-06-07 02:14:44,649][324563] Updated weights for policy 0, policy_version 8295 (0.0010) -[2026-06-07 02:14:44,870][324563] Updated weights for policy 0, policy_version 8305 (0.0010) -[2026-06-07 02:14:45,087][324563] Updated weights for policy 0, policy_version 8315 (0.0011) -[2026-06-07 02:14:45,291][324563] Updated weights for policy 0, policy_version 8325 (0.0011) -[2026-06-07 02:14:45,499][324563] Updated weights for policy 0, policy_version 8335 (0.0011) -[2026-06-07 02:14:45,744][321791] Fps is (10 sec: 19660.8, 60 sec: 17476.3, 300 sec: 16833.8). Total num frames: 4292608. Throughput: 0: 17203.2. Samples: 4319488. Policy #0 lag: (min: 63.0, avg: 78.5, max: 127.0) -[2026-06-07 02:14:45,745][321791] Avg episode reward: [(0, '127.794')] -[2026-06-07 02:14:45,749][324276] Saving new best policy, reward=127.794! -[2026-06-07 02:14:46,287][324563] Updated weights for policy 0, policy_version 8345 (0.0009) -[2026-06-07 02:14:46,484][324563] Updated weights for policy 0, policy_version 8355 (0.0006) -[2026-06-07 02:14:46,700][324563] Updated weights for policy 0, policy_version 8365 (0.0006) -[2026-06-07 02:14:46,928][324563] Updated weights for policy 0, policy_version 8376 (0.0007) -[2026-06-07 02:14:47,122][324563] Updated weights for policy 0, policy_version 8386 (0.0007) -[2026-06-07 02:14:47,332][324563] Updated weights for policy 0, policy_version 8396 (0.0007) -[2026-06-07 02:14:47,536][324563] Updated weights for policy 0, policy_version 8406 (0.0006) -[2026-06-07 02:14:48,313][324563] Updated weights for policy 0, policy_version 8416 (0.0007) -[2026-06-07 02:14:48,527][324563] Updated weights for policy 0, policy_version 8426 (0.0006) -[2026-06-07 02:14:48,720][324563] Updated weights for policy 0, policy_version 8436 (0.0006) -[2026-06-07 02:14:48,941][324563] Updated weights for policy 0, policy_version 8447 (0.0007) -[2026-06-07 02:14:49,150][324563] Updated weights for policy 0, policy_version 8457 (0.0006) -[2026-06-07 02:14:49,346][324563] Updated weights for policy 0, policy_version 8467 (0.0006) -[2026-06-07 02:14:50,127][324563] Updated weights for policy 0, policy_version 8477 (0.0006) -[2026-06-07 02:14:50,329][324563] Updated weights for policy 0, policy_version 8487 (0.0006) -[2026-06-07 02:14:50,529][324563] Updated weights for policy 0, policy_version 8497 (0.0006) -[2026-06-07 02:14:50,744][321791] Fps is (10 sec: 16384.0, 60 sec: 16930.1, 300 sec: 16762.1). Total num frames: 4358144. Throughput: 0: 17376.7. Samples: 4377600. Policy #0 lag: (min: 63.0, avg: 78.5, max: 127.0) -[2026-06-07 02:14:50,745][321791] Avg episode reward: [(0, '136.924')] -[2026-06-07 02:14:50,793][324563] Updated weights for policy 0, policy_version 8509 (0.0007) -[2026-06-07 02:14:51,031][324563] Updated weights for policy 0, policy_version 8520 (0.0007) -[2026-06-07 02:14:51,240][324563] Updated weights for policy 0, policy_version 8530 (0.0007) -[2026-06-07 02:14:51,352][324276] Saving new best policy, reward=136.924! -[2026-06-07 02:14:51,978][324563] Updated weights for policy 0, policy_version 8540 (0.0006) -[2026-06-07 02:14:52,176][324563] Updated weights for policy 0, policy_version 8550 (0.0006) -[2026-06-07 02:14:52,370][324563] Updated weights for policy 0, policy_version 8560 (0.0006) -[2026-06-07 02:14:52,591][324563] Updated weights for policy 0, policy_version 8571 (0.0007) -[2026-06-07 02:14:52,790][324563] Updated weights for policy 0, policy_version 8581 (0.0006) -[2026-06-07 02:14:52,993][324563] Updated weights for policy 0, policy_version 8591 (0.0007) -[2026-06-07 02:14:53,764][324563] Updated weights for policy 0, policy_version 8601 (0.0007) -[2026-06-07 02:14:53,982][324563] Updated weights for policy 0, policy_version 8611 (0.0007) -[2026-06-07 02:14:54,195][324563] Updated weights for policy 0, policy_version 8621 (0.0007) -[2026-06-07 02:14:54,398][324563] Updated weights for policy 0, policy_version 8631 (0.0007) -[2026-06-07 02:14:54,604][324563] Updated weights for policy 0, policy_version 8641 (0.0007) -[2026-06-07 02:14:54,811][324563] Updated weights for policy 0, policy_version 8651 (0.0007) -[2026-06-07 02:14:55,011][324563] Updated weights for policy 0, policy_version 8661 (0.0006) -[2026-06-07 02:14:55,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 16816.8). Total num frames: 4456448. Throughput: 0: 17416.5. Samples: 4480256. Policy #0 lag: (min: 22.0, avg: 36.9, max: 86.0) -[2026-06-07 02:14:55,745][321791] Avg episode reward: [(0, '142.391')] -[2026-06-07 02:14:55,746][324563] Updated weights for policy 0, policy_version 8671 (0.0007) -[2026-06-07 02:14:55,963][324563] Updated weights for policy 0, policy_version 8681 (0.0006) -[2026-06-07 02:14:56,177][324563] Updated weights for policy 0, policy_version 8691 (0.0007) -[2026-06-07 02:14:56,383][324563] Updated weights for policy 0, policy_version 8701 (0.0006) -[2026-06-07 02:14:56,574][324563] Updated weights for policy 0, policy_version 8711 (0.0006) -[2026-06-07 02:14:56,772][324563] Updated weights for policy 0, policy_version 8721 (0.0006) -[2026-06-07 02:14:56,906][324276] Saving new best policy, reward=142.391! -[2026-06-07 02:14:57,548][324563] Updated weights for policy 0, policy_version 8731 (0.0006) -[2026-06-07 02:14:57,761][324563] Updated weights for policy 0, policy_version 8741 (0.0007) -[2026-06-07 02:14:57,959][324563] Updated weights for policy 0, policy_version 8751 (0.0006) -[2026-06-07 02:14:58,151][324563] Updated weights for policy 0, policy_version 8761 (0.0006) -[2026-06-07 02:14:58,357][324563] Updated weights for policy 0, policy_version 8771 (0.0006) -[2026-06-07 02:14:58,571][324563] Updated weights for policy 0, policy_version 8782 (0.0007) -[2026-06-07 02:14:58,770][324563] Updated weights for policy 0, policy_version 8792 (0.0006) -[2026-06-07 02:14:59,582][324563] Updated weights for policy 0, policy_version 8803 (0.0007) -[2026-06-07 02:14:59,804][324563] Updated weights for policy 0, policy_version 8813 (0.0006) -[2026-06-07 02:15:00,008][324563] Updated weights for policy 0, policy_version 8823 (0.0007) -[2026-06-07 02:15:00,224][324563] Updated weights for policy 0, policy_version 8834 (0.0006) -[2026-06-07 02:15:00,434][324563] Updated weights for policy 0, policy_version 8844 (0.0006) -[2026-06-07 02:15:00,633][324563] Updated weights for policy 0, policy_version 8854 (0.0007) -[2026-06-07 02:15:00,744][321791] Fps is (10 sec: 19660.8, 60 sec: 17476.3, 300 sec: 16869.5). Total num frames: 4554752. Throughput: 0: 17408.0. Samples: 4583680. Policy #0 lag: (min: 66.0, avg: 82.5, max: 126.0) -[2026-06-07 02:15:00,745][321791] Avg episode reward: [(0, '146.197')] -[2026-06-07 02:15:00,749][324276] Saving new best policy, reward=146.197! -[2026-06-07 02:15:01,413][324563] Updated weights for policy 0, policy_version 8864 (0.0007) -[2026-06-07 02:15:01,615][324563] Updated weights for policy 0, policy_version 8874 (0.0007) -[2026-06-07 02:15:01,809][324563] Updated weights for policy 0, policy_version 8884 (0.0007) -[2026-06-07 02:15:01,995][324563] Updated weights for policy 0, policy_version 8894 (0.0006) -[2026-06-07 02:15:02,207][324563] Updated weights for policy 0, policy_version 8904 (0.0007) -[2026-06-07 02:15:02,411][324563] Updated weights for policy 0, policy_version 8914 (0.0007) -[2026-06-07 02:15:03,188][324563] Updated weights for policy 0, policy_version 8924 (0.0007) -[2026-06-07 02:15:03,377][324563] Updated weights for policy 0, policy_version 8934 (0.0006) -[2026-06-07 02:15:03,579][324563] Updated weights for policy 0, policy_version 8944 (0.0007) -[2026-06-07 02:15:03,796][324563] Updated weights for policy 0, policy_version 8954 (0.0006) -[2026-06-07 02:15:03,992][324563] Updated weights for policy 0, policy_version 8964 (0.0006) -[2026-06-07 02:15:04,219][324563] Updated weights for policy 0, policy_version 8975 (0.0006) -[2026-06-07 02:15:04,979][324563] Updated weights for policy 0, policy_version 8985 (0.0006) -[2026-06-07 02:15:05,174][324563] Updated weights for policy 0, policy_version 8995 (0.0006) -[2026-06-07 02:15:05,385][324563] Updated weights for policy 0, policy_version 9005 (0.0006) -[2026-06-07 02:15:05,625][324563] Updated weights for policy 0, policy_version 9016 (0.0006) -[2026-06-07 02:15:05,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 16801.1). Total num frames: 4620288. Throughput: 0: 17439.3. Samples: 4641280. Policy #0 lag: (min: 66.0, avg: 82.5, max: 126.0) -[2026-06-07 02:15:05,745][321791] Avg episode reward: [(0, '143.445')] -[2026-06-07 02:15:05,846][324563] Updated weights for policy 0, policy_version 9026 (0.0007) -[2026-06-07 02:15:06,065][324563] Updated weights for policy 0, policy_version 9037 (0.0006) -[2026-06-07 02:15:06,293][324563] Updated weights for policy 0, policy_version 9047 (0.0006) -[2026-06-07 02:15:07,030][324563] Updated weights for policy 0, policy_version 9057 (0.0007) -[2026-06-07 02:15:07,228][324563] Updated weights for policy 0, policy_version 9067 (0.0011) -[2026-06-07 02:15:07,426][324563] Updated weights for policy 0, policy_version 9077 (0.0011) -[2026-06-07 02:15:07,634][324563] Updated weights for policy 0, policy_version 9087 (0.0011) -[2026-06-07 02:15:07,851][324563] Updated weights for policy 0, policy_version 9098 (0.0010) -[2026-06-07 02:15:08,067][324563] Updated weights for policy 0, policy_version 9108 (0.0010) -[2026-06-07 02:15:08,839][324563] Updated weights for policy 0, policy_version 9118 (0.0009) -[2026-06-07 02:15:09,047][324563] Updated weights for policy 0, policy_version 9129 (0.0007) -[2026-06-07 02:15:09,250][324563] Updated weights for policy 0, policy_version 9139 (0.0006) -[2026-06-07 02:15:09,462][324563] Updated weights for policy 0, policy_version 9149 (0.0008) -[2026-06-07 02:15:09,652][324563] Updated weights for policy 0, policy_version 9159 (0.0008) -[2026-06-07 02:15:09,874][324563] Updated weights for policy 0, policy_version 9169 (0.0008) -[2026-06-07 02:15:10,616][324563] Updated weights for policy 0, policy_version 9179 (0.0007) -[2026-06-07 02:15:10,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 16852.1). Total num frames: 4718592. Throughput: 0: 17422.2. Samples: 4743168. Policy #0 lag: (min: 115.0, avg: 129.9, max: 179.0) -[2026-06-07 02:15:10,745][321791] Avg episode reward: [(0, '165.212')] -[2026-06-07 02:15:10,833][324563] Updated weights for policy 0, policy_version 9189 (0.0006) -[2026-06-07 02:15:11,038][324563] Updated weights for policy 0, policy_version 9199 (0.0006) -[2026-06-07 02:15:11,228][324563] Updated weights for policy 0, policy_version 9209 (0.0006) -[2026-06-07 02:15:11,436][324563] Updated weights for policy 0, policy_version 9220 (0.0007) -[2026-06-07 02:15:11,642][324563] Updated weights for policy 0, policy_version 9230 (0.0006) -[2026-06-07 02:15:11,840][324276] Saving new best policy, reward=165.212! -[2026-06-07 02:15:11,842][324563] Updated weights for policy 0, policy_version 9240 (0.0006) -[2026-06-07 02:15:12,618][324563] Updated weights for policy 0, policy_version 9250 (0.0006) -[2026-06-07 02:15:12,814][324563] Updated weights for policy 0, policy_version 9260 (0.0006) -[2026-06-07 02:15:13,039][324563] Updated weights for policy 0, policy_version 9271 (0.0006) -[2026-06-07 02:15:13,239][324563] Updated weights for policy 0, policy_version 9281 (0.0006) -[2026-06-07 02:15:13,448][324563] Updated weights for policy 0, policy_version 9291 (0.0006) -[2026-06-07 02:15:13,643][324563] Updated weights for policy 0, policy_version 9301 (0.0007) -[2026-06-07 02:15:14,386][324563] Updated weights for policy 0, policy_version 9311 (0.0006) -[2026-06-07 02:15:14,586][324563] Updated weights for policy 0, policy_version 9321 (0.0007) -[2026-06-07 02:15:14,790][324563] Updated weights for policy 0, policy_version 9331 (0.0007) -[2026-06-07 02:15:14,996][324563] Updated weights for policy 0, policy_version 9341 (0.0006) -[2026-06-07 02:15:15,219][324563] Updated weights for policy 0, policy_version 9351 (0.0006) -[2026-06-07 02:15:15,451][324563] Updated weights for policy 0, policy_version 9362 (0.0006) -[2026-06-07 02:15:15,744][321791] Fps is (10 sec: 19660.9, 60 sec: 17476.3, 300 sec: 16901.4). Total num frames: 4816896. Throughput: 0: 17453.5. Samples: 4844544. Policy #0 lag: (min: 115.0, avg: 129.9, max: 179.0) -[2026-06-07 02:15:15,745][321791] Avg episode reward: [(0, '182.414')] -[2026-06-07 02:15:15,749][324276] Saving new best policy, reward=182.414! -[2026-06-07 02:15:16,251][324563] Updated weights for policy 0, policy_version 9374 (0.0006) -[2026-06-07 02:15:16,444][324563] Updated weights for policy 0, policy_version 9384 (0.0006) -[2026-06-07 02:15:16,660][324563] Updated weights for policy 0, policy_version 9394 (0.0006) -[2026-06-07 02:15:16,862][324563] Updated weights for policy 0, policy_version 9404 (0.0006) -[2026-06-07 02:15:17,059][324563] Updated weights for policy 0, policy_version 9414 (0.0006) -[2026-06-07 02:15:17,268][324563] Updated weights for policy 0, policy_version 9424 (0.0006) -[2026-06-07 02:15:18,011][324563] Updated weights for policy 0, policy_version 9434 (0.0007) -[2026-06-07 02:15:18,221][324563] Updated weights for policy 0, policy_version 9444 (0.0007) -[2026-06-07 02:15:18,403][324563] Updated weights for policy 0, policy_version 9454 (0.0009) -[2026-06-07 02:15:18,609][324563] Updated weights for policy 0, policy_version 9464 (0.0007) -[2026-06-07 02:15:18,834][324563] Updated weights for policy 0, policy_version 9474 (0.0006) -[2026-06-07 02:15:19,018][324563] Updated weights for policy 0, policy_version 9484 (0.0006) -[2026-06-07 02:15:19,213][324563] Updated weights for policy 0, policy_version 9494 (0.0006) -[2026-06-07 02:15:19,999][324563] Updated weights for policy 0, policy_version 9504 (0.0006) -[2026-06-07 02:15:20,179][324563] Updated weights for policy 0, policy_version 9514 (0.0006) -[2026-06-07 02:15:20,382][324563] Updated weights for policy 0, policy_version 9524 (0.0006) -[2026-06-07 02:15:20,575][324563] Updated weights for policy 0, policy_version 9534 (0.0006) -[2026-06-07 02:15:20,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 16836.0). Total num frames: 4882432. Throughput: 0: 17427.9. Samples: 4902016. Policy #0 lag: (min: 92.0, avg: 119.4, max: 160.0) -[2026-06-07 02:15:20,745][321791] Avg episode reward: [(0, '180.383')] -[2026-06-07 02:15:20,773][324563] Updated weights for policy 0, policy_version 9544 (0.0006) -[2026-06-07 02:15:20,991][324563] Updated weights for policy 0, policy_version 9554 (0.0006) -[2026-06-07 02:15:21,775][324563] Updated weights for policy 0, policy_version 9565 (0.0006) -[2026-06-07 02:15:21,972][324563] Updated weights for policy 0, policy_version 9575 (0.0007) -[2026-06-07 02:15:22,170][324563] Updated weights for policy 0, policy_version 9585 (0.0011) -[2026-06-07 02:15:22,381][324563] Updated weights for policy 0, policy_version 9595 (0.0011) -[2026-06-07 02:15:22,596][324563] Updated weights for policy 0, policy_version 9605 (0.0009) -[2026-06-07 02:15:22,795][324563] Updated weights for policy 0, policy_version 9615 (0.0007) -[2026-06-07 02:15:23,534][324563] Updated weights for policy 0, policy_version 9625 (0.0008) -[2026-06-07 02:15:23,735][324563] Updated weights for policy 0, policy_version 9635 (0.0009) -[2026-06-07 02:15:23,930][324563] Updated weights for policy 0, policy_version 9645 (0.0007) -[2026-06-07 02:15:24,127][324563] Updated weights for policy 0, policy_version 9655 (0.0006) -[2026-06-07 02:15:24,317][324563] Updated weights for policy 0, policy_version 9665 (0.0006) -[2026-06-07 02:15:24,532][324563] Updated weights for policy 0, policy_version 9675 (0.0007) -[2026-06-07 02:15:24,749][324563] Updated weights for policy 0, policy_version 9685 (0.0006) -[2026-06-07 02:15:25,487][324563] Updated weights for policy 0, policy_version 9695 (0.0006) -[2026-06-07 02:15:25,736][324563] Updated weights for policy 0, policy_version 9706 (0.0006) -[2026-06-07 02:15:25,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 16883.9). Total num frames: 4980736. Throughput: 0: 17467.7. Samples: 5004544. Policy #0 lag: (min: 92.0, avg: 119.4, max: 160.0) -[2026-06-07 02:15:25,745][321791] Avg episode reward: [(0, '183.897')] -[2026-06-07 02:15:25,944][324563] Updated weights for policy 0, policy_version 9716 (0.0006) -[2026-06-07 02:15:26,150][324563] Updated weights for policy 0, policy_version 9726 (0.0007) -[2026-06-07 02:15:26,373][324563] Updated weights for policy 0, policy_version 9737 (0.0010) -[2026-06-07 02:15:26,579][324563] Updated weights for policy 0, policy_version 9747 (0.0011) -[2026-06-07 02:15:26,691][324276] Saving new best policy, reward=183.897! -[2026-06-07 02:15:27,351][324563] Updated weights for policy 0, policy_version 9757 (0.0011) -[2026-06-07 02:15:27,551][324563] Updated weights for policy 0, policy_version 9767 (0.0011) -[2026-06-07 02:15:27,760][324563] Updated weights for policy 0, policy_version 9777 (0.0011) -[2026-06-07 02:15:27,967][324563] Updated weights for policy 0, policy_version 9787 (0.0009) -[2026-06-07 02:15:28,181][324563] Updated weights for policy 0, policy_version 9797 (0.0006) -[2026-06-07 02:15:28,413][324563] Updated weights for policy 0, policy_version 9808 (0.0007) -[2026-06-07 02:15:29,165][324563] Updated weights for policy 0, policy_version 9818 (0.0007) -[2026-06-07 02:15:29,356][324563] Updated weights for policy 0, policy_version 9828 (0.0007) -[2026-06-07 02:15:29,593][324563] Updated weights for policy 0, policy_version 9839 (0.0006) -[2026-06-07 02:15:29,821][324563] Updated weights for policy 0, policy_version 9850 (0.0007) -[2026-06-07 02:15:30,019][324563] Updated weights for policy 0, policy_version 9860 (0.0007) -[2026-06-07 02:15:30,220][324563] Updated weights for policy 0, policy_version 9870 (0.0006) -[2026-06-07 02:15:30,399][324563] Updated weights for policy 0, policy_version 9880 (0.0007) -[2026-06-07 02:15:30,744][321791] Fps is (10 sec: 19660.8, 60 sec: 17476.2, 300 sec: 17106.0). Total num frames: 5079040. Throughput: 0: 17510.4. Samples: 5107456. Policy #0 lag: (min: 12.0, avg: 27.3, max: 76.0) -[2026-06-07 02:15:30,745][321791] Avg episode reward: [(0, '211.612')] -[2026-06-07 02:15:30,752][324276] Saving new best policy, reward=211.612! -[2026-06-07 02:15:31,164][324563] Updated weights for policy 0, policy_version 9890 (0.0007) -[2026-06-07 02:15:31,363][324563] Updated weights for policy 0, policy_version 9900 (0.0009) -[2026-06-07 02:15:31,562][324563] Updated weights for policy 0, policy_version 9910 (0.0009) -[2026-06-07 02:15:31,778][324563] Updated weights for policy 0, policy_version 9920 (0.0007) -[2026-06-07 02:15:31,980][324563] Updated weights for policy 0, policy_version 9930 (0.0007) -[2026-06-07 02:15:32,172][324563] Updated weights for policy 0, policy_version 9940 (0.0007) -[2026-06-07 02:15:32,926][324563] Updated weights for policy 0, policy_version 9950 (0.0007) -[2026-06-07 02:15:33,121][324563] Updated weights for policy 0, policy_version 9960 (0.0006) -[2026-06-07 02:15:33,342][324563] Updated weights for policy 0, policy_version 9970 (0.0006) -[2026-06-07 02:15:33,531][324563] Updated weights for policy 0, policy_version 9980 (0.0007) -[2026-06-07 02:15:33,750][324563] Updated weights for policy 0, policy_version 9990 (0.0006) -[2026-06-07 02:15:33,959][324563] Updated weights for policy 0, policy_version 10000 (0.0007) -[2026-06-07 02:15:34,724][324563] Updated weights for policy 0, policy_version 10011 (0.0007) -[2026-06-07 02:15:34,929][324563] Updated weights for policy 0, policy_version 10021 (0.0006) -[2026-06-07 02:15:35,127][324563] Updated weights for policy 0, policy_version 10031 (0.0007) -[2026-06-07 02:15:35,369][324563] Updated weights for policy 0, policy_version 10041 (0.0006) -[2026-06-07 02:15:35,591][324563] Updated weights for policy 0, policy_version 10051 (0.0006) -[2026-06-07 02:15:35,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 16995.0). Total num frames: 5144576. Throughput: 0: 17453.5. Samples: 5163008. Policy #0 lag: (min: 12.0, avg: 27.3, max: 76.0) -[2026-06-07 02:15:35,745][321791] Avg episode reward: [(0, '217.062')] -[2026-06-07 02:15:35,791][324563] Updated weights for policy 0, policy_version 10061 (0.0007) -[2026-06-07 02:15:35,989][324563] Updated weights for policy 0, policy_version 10071 (0.0007) -[2026-06-07 02:15:36,010][324276] Saving new best policy, reward=217.062! -[2026-06-07 02:15:36,700][324563] Updated weights for policy 0, policy_version 10081 (0.0010) -[2026-06-07 02:15:36,921][324563] Updated weights for policy 0, policy_version 10091 (0.0010) -[2026-06-07 02:15:37,118][324563] Updated weights for policy 0, policy_version 10101 (0.0006) -[2026-06-07 02:15:37,336][324563] Updated weights for policy 0, policy_version 10111 (0.0006) -[2026-06-07 02:15:37,570][324563] Updated weights for policy 0, policy_version 10122 (0.0006) -[2026-06-07 02:15:37,775][324563] Updated weights for policy 0, policy_version 10132 (0.0006) -[2026-06-07 02:15:38,525][324563] Updated weights for policy 0, policy_version 10142 (0.0007) -[2026-06-07 02:15:38,721][324563] Updated weights for policy 0, policy_version 10152 (0.0007) -[2026-06-07 02:15:38,928][324563] Updated weights for policy 0, policy_version 10162 (0.0007) -[2026-06-07 02:15:39,168][324563] Updated weights for policy 0, policy_version 10173 (0.0007) -[2026-06-07 02:15:39,378][324563] Updated weights for policy 0, policy_version 10183 (0.0007) -[2026-06-07 02:15:39,583][324563] Updated weights for policy 0, policy_version 10193 (0.0007) -[2026-06-07 02:15:40,318][324563] Updated weights for policy 0, policy_version 10203 (0.0007) -[2026-06-07 02:15:40,528][324563] Updated weights for policy 0, policy_version 10213 (0.0007) -[2026-06-07 02:15:40,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 16883.9). Total num frames: 5242880. Throughput: 0: 17430.7. Samples: 5264640. Policy #0 lag: (min: 63.0, avg: 78.0, max: 127.0) -[2026-06-07 02:15:40,745][321791] Avg episode reward: [(0, '215.634')] -[2026-06-07 02:15:40,752][324563] Updated weights for policy 0, policy_version 10223 (0.0006) -[2026-06-07 02:15:40,955][324563] Updated weights for policy 0, policy_version 10233 (0.0006) -[2026-06-07 02:15:41,165][324563] Updated weights for policy 0, policy_version 10243 (0.0007) -[2026-06-07 02:15:41,403][324563] Updated weights for policy 0, policy_version 10255 (0.0007) -[2026-06-07 02:15:42,123][324563] Updated weights for policy 0, policy_version 10265 (0.0007) -[2026-06-07 02:15:42,329][324563] Updated weights for policy 0, policy_version 10275 (0.0007) -[2026-06-07 02:15:42,551][324563] Updated weights for policy 0, policy_version 10286 (0.0007) -[2026-06-07 02:15:42,748][324563] Updated weights for policy 0, policy_version 10296 (0.0006) -[2026-06-07 02:15:42,954][324563] Updated weights for policy 0, policy_version 10306 (0.0006) -[2026-06-07 02:15:43,171][324563] Updated weights for policy 0, policy_version 10317 (0.0006) -[2026-06-07 02:15:43,382][324563] Updated weights for policy 0, policy_version 10327 (0.0006) -[2026-06-07 02:15:44,128][324563] Updated weights for policy 0, policy_version 10337 (0.0007) -[2026-06-07 02:15:44,316][324563] Updated weights for policy 0, policy_version 10347 (0.0007) -[2026-06-07 02:15:44,510][324563] Updated weights for policy 0, policy_version 10357 (0.0006) -[2026-06-07 02:15:44,711][324563] Updated weights for policy 0, policy_version 10367 (0.0006) -[2026-06-07 02:15:44,911][324563] Updated weights for policy 0, policy_version 10377 (0.0007) -[2026-06-07 02:15:45,121][324563] Updated weights for policy 0, policy_version 10387 (0.0007) -[2026-06-07 02:15:45,744][321791] Fps is (10 sec: 19660.8, 60 sec: 17476.3, 300 sec: 16883.9). Total num frames: 5341184. Throughput: 0: 17581.5. Samples: 5374848. Policy #0 lag: (min: 63.0, avg: 78.0, max: 127.0) -[2026-06-07 02:15:45,745][321791] Avg episode reward: [(0, '206.084')] -[2026-06-07 02:15:45,849][324563] Updated weights for policy 0, policy_version 10397 (0.0007) -[2026-06-07 02:15:46,060][324563] Updated weights for policy 0, policy_version 10408 (0.0007) -[2026-06-07 02:15:46,259][324563] Updated weights for policy 0, policy_version 10418 (0.0007) -[2026-06-07 02:15:46,455][324563] Updated weights for policy 0, policy_version 10428 (0.0006) -[2026-06-07 02:15:46,673][324563] Updated weights for policy 0, policy_version 10438 (0.0006) -[2026-06-07 02:15:46,858][324563] Updated weights for policy 0, policy_version 10448 (0.0006) -[2026-06-07 02:15:47,646][324563] Updated weights for policy 0, policy_version 10458 (0.0007) -[2026-06-07 02:15:47,856][324563] Updated weights for policy 0, policy_version 10468 (0.0009) -[2026-06-07 02:15:48,089][324563] Updated weights for policy 0, policy_version 10479 (0.0011) -[2026-06-07 02:15:48,289][324563] Updated weights for policy 0, policy_version 10490 (0.0011) -[2026-06-07 02:15:48,488][324563] Updated weights for policy 0, policy_version 10500 (0.0011) -[2026-06-07 02:15:48,716][324563] Updated weights for policy 0, policy_version 10511 (0.0011) -[2026-06-07 02:15:49,459][324563] Updated weights for policy 0, policy_version 10521 (0.0011) -[2026-06-07 02:15:49,684][324563] Updated weights for policy 0, policy_version 10531 (0.0008) -[2026-06-07 02:15:49,922][324563] Updated weights for policy 0, policy_version 10542 (0.0007) -[2026-06-07 02:15:50,132][324563] Updated weights for policy 0, policy_version 10552 (0.0007) -[2026-06-07 02:15:50,322][324563] Updated weights for policy 0, policy_version 10562 (0.0006) -[2026-06-07 02:15:50,544][324563] Updated weights for policy 0, policy_version 10572 (0.0007) -[2026-06-07 02:15:50,723][324563] Updated weights for policy 0, policy_version 10582 (0.0007) -[2026-06-07 02:15:50,744][321791] Fps is (10 sec: 16384.1, 60 sec: 17476.3, 300 sec: 16883.9). Total num frames: 5406720. Throughput: 0: 17393.8. Samples: 5424000. Policy #0 lag: (min: 63.0, avg: 77.2, max: 127.0) -[2026-06-07 02:15:50,745][321791] Avg episode reward: [(0, '215.261')] -[2026-06-07 02:15:51,461][324563] Updated weights for policy 0, policy_version 10592 (0.0006) -[2026-06-07 02:15:51,659][324563] Updated weights for policy 0, policy_version 10602 (0.0006) -[2026-06-07 02:15:51,853][324563] Updated weights for policy 0, policy_version 10612 (0.0007) -[2026-06-07 02:15:52,070][324563] Updated weights for policy 0, policy_version 10622 (0.0006) -[2026-06-07 02:15:52,255][324563] Updated weights for policy 0, policy_version 10632 (0.0006) -[2026-06-07 02:15:52,448][324563] Updated weights for policy 0, policy_version 10642 (0.0006) -[2026-06-07 02:15:53,194][324563] Updated weights for policy 0, policy_version 10652 (0.0007) -[2026-06-07 02:15:53,401][324563] Updated weights for policy 0, policy_version 10662 (0.0006) -[2026-06-07 02:15:53,622][324563] Updated weights for policy 0, policy_version 10672 (0.0006) -[2026-06-07 02:15:53,821][324563] Updated weights for policy 0, policy_version 10682 (0.0006) -[2026-06-07 02:15:54,036][324563] Updated weights for policy 0, policy_version 10693 (0.0007) -[2026-06-07 02:15:54,232][324563] Updated weights for policy 0, policy_version 10703 (0.0006) -[2026-06-07 02:15:54,960][324563] Updated weights for policy 0, policy_version 10713 (0.0007) -[2026-06-07 02:15:55,211][324563] Updated weights for policy 0, policy_version 10724 (0.0012) -[2026-06-07 02:15:55,431][324563] Updated weights for policy 0, policy_version 10735 (0.0008) -[2026-06-07 02:15:55,633][324563] Updated weights for policy 0, policy_version 10745 (0.0006) -[2026-06-07 02:15:55,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 16883.9). Total num frames: 5505024. Throughput: 0: 17453.5. Samples: 5528576. Policy #0 lag: (min: 63.0, avg: 77.2, max: 127.0) -[2026-06-07 02:15:55,745][321791] Avg episode reward: [(0, '223.223')] -[2026-06-07 02:15:55,846][324563] Updated weights for policy 0, policy_version 10756 (0.0006) -[2026-06-07 02:15:56,051][324563] Updated weights for policy 0, policy_version 10766 (0.0006) -[2026-06-07 02:15:56,256][324276] Saving new best policy, reward=223.223! -[2026-06-07 02:15:56,257][324563] Updated weights for policy 0, policy_version 10776 (0.0006) -[2026-06-07 02:15:57,002][324563] Updated weights for policy 0, policy_version 10786 (0.0008) -[2026-06-07 02:15:57,192][324563] Updated weights for policy 0, policy_version 10796 (0.0011) -[2026-06-07 02:15:57,418][324563] Updated weights for policy 0, policy_version 10807 (0.0010) -[2026-06-07 02:15:57,639][324563] Updated weights for policy 0, policy_version 10818 (0.0009) -[2026-06-07 02:15:57,844][324563] Updated weights for policy 0, policy_version 10828 (0.0007) -[2026-06-07 02:15:58,059][324563] Updated weights for policy 0, policy_version 10838 (0.0011) -[2026-06-07 02:15:58,866][324563] Updated weights for policy 0, policy_version 10849 (0.0007) -[2026-06-07 02:15:59,080][324563] Updated weights for policy 0, policy_version 10860 (0.0006) -[2026-06-07 02:15:59,278][324563] Updated weights for policy 0, policy_version 10870 (0.0006) -[2026-06-07 02:15:59,520][324563] Updated weights for policy 0, policy_version 10881 (0.0007) -[2026-06-07 02:15:59,728][324563] Updated weights for policy 0, policy_version 10891 (0.0007) -[2026-06-07 02:15:59,941][324563] Updated weights for policy 0, policy_version 10902 (0.0009) -[2026-06-07 02:16:00,710][324563] Updated weights for policy 0, policy_version 10913 (0.0007) -[2026-06-07 02:16:00,744][321791] Fps is (10 sec: 19660.7, 60 sec: 17476.3, 300 sec: 16994.9). Total num frames: 5603328. Throughput: 0: 17686.7. Samples: 5640448. Policy #0 lag: (min: 52.0, avg: 66.3, max: 116.0) -[2026-06-07 02:16:00,745][321791] Avg episode reward: [(0, '244.918')] -[2026-06-07 02:16:00,919][324563] Updated weights for policy 0, policy_version 10923 (0.0007) -[2026-06-07 02:16:01,116][324563] Updated weights for policy 0, policy_version 10933 (0.0007) -[2026-06-07 02:16:01,320][324563] Updated weights for policy 0, policy_version 10943 (0.0007) -[2026-06-07 02:16:01,527][324563] Updated weights for policy 0, policy_version 10954 (0.0007) -[2026-06-07 02:16:01,734][324563] Updated weights for policy 0, policy_version 10964 (0.0007) -[2026-06-07 02:16:01,802][324276] Saving new best policy, reward=244.918! -[2026-06-07 02:16:02,507][324563] Updated weights for policy 0, policy_version 10974 (0.0008) -[2026-06-07 02:16:02,711][324563] Updated weights for policy 0, policy_version 10984 (0.0007) -[2026-06-07 02:16:02,923][324563] Updated weights for policy 0, policy_version 10994 (0.0008) -[2026-06-07 02:16:03,143][324563] Updated weights for policy 0, policy_version 11004 (0.0008) -[2026-06-07 02:16:03,339][324563] Updated weights for policy 0, policy_version 11014 (0.0006) -[2026-06-07 02:16:03,544][324563] Updated weights for policy 0, policy_version 11024 (0.0007) -[2026-06-07 02:16:04,290][324563] Updated weights for policy 0, policy_version 11034 (0.0007) -[2026-06-07 02:16:04,527][324563] Updated weights for policy 0, policy_version 11045 (0.0006) -[2026-06-07 02:16:04,748][324563] Updated weights for policy 0, policy_version 11055 (0.0006) -[2026-06-07 02:16:04,943][324563] Updated weights for policy 0, policy_version 11065 (0.0007) -[2026-06-07 02:16:05,140][324563] Updated weights for policy 0, policy_version 11075 (0.0007) -[2026-06-07 02:16:05,362][324563] Updated weights for policy 0, policy_version 11086 (0.0007) -[2026-06-07 02:16:05,557][324563] Updated weights for policy 0, policy_version 11096 (0.0006) -[2026-06-07 02:16:05,744][321791] Fps is (10 sec: 19660.5, 60 sec: 18022.4, 300 sec: 16994.9). Total num frames: 5701632. Throughput: 0: 17393.7. Samples: 5684736. Policy #0 lag: (min: 52.0, avg: 66.3, max: 116.0) -[2026-06-07 02:16:05,745][321791] Avg episode reward: [(0, '242.720')] -[2026-06-07 02:16:06,313][324563] Updated weights for policy 0, policy_version 11106 (0.0006) -[2026-06-07 02:16:06,511][324563] Updated weights for policy 0, policy_version 11117 (0.0006) -[2026-06-07 02:16:06,734][324563] Updated weights for policy 0, policy_version 11128 (0.0007) -[2026-06-07 02:16:06,940][324563] Updated weights for policy 0, policy_version 11139 (0.0006) -[2026-06-07 02:16:07,139][324563] Updated weights for policy 0, policy_version 11149 (0.0006) -[2026-06-07 02:16:07,308][324563] Updated weights for policy 0, policy_version 11159 (0.0006) -[2026-06-07 02:16:08,088][324563] Updated weights for policy 0, policy_version 11170 (0.0006) -[2026-06-07 02:16:08,287][324563] Updated weights for policy 0, policy_version 11180 (0.0006) -[2026-06-07 02:16:08,501][324563] Updated weights for policy 0, policy_version 11190 (0.0007) -[2026-06-07 02:16:08,705][324563] Updated weights for policy 0, policy_version 11200 (0.0006) -[2026-06-07 02:16:08,945][324563] Updated weights for policy 0, policy_version 11211 (0.0007) -[2026-06-07 02:16:09,166][324563] Updated weights for policy 0, policy_version 11221 (0.0006) -[2026-06-07 02:16:09,921][324563] Updated weights for policy 0, policy_version 11232 (0.0007) -[2026-06-07 02:16:10,148][324563] Updated weights for policy 0, policy_version 11242 (0.0006) -[2026-06-07 02:16:10,353][324563] Updated weights for policy 0, policy_version 11252 (0.0006) -[2026-06-07 02:16:10,570][324563] Updated weights for policy 0, policy_version 11262 (0.0007) -[2026-06-07 02:16:10,744][321791] Fps is (10 sec: 16384.1, 60 sec: 17476.3, 300 sec: 16994.9). Total num frames: 5767168. Throughput: 0: 17609.9. Samples: 5796992. Policy #0 lag: (min: 23.0, avg: 37.5, max: 87.0) -[2026-06-07 02:16:10,745][321791] Avg episode reward: [(0, '247.130')] -[2026-06-07 02:16:10,787][324563] Updated weights for policy 0, policy_version 11272 (0.0006) -[2026-06-07 02:16:11,009][324563] Updated weights for policy 0, policy_version 11282 (0.0007) -[2026-06-07 02:16:11,119][324276] Saving new best policy, reward=247.130! -[2026-06-07 02:16:11,772][324563] Updated weights for policy 0, policy_version 11293 (0.0006) -[2026-06-07 02:16:11,981][324563] Updated weights for policy 0, policy_version 11303 (0.0007) -[2026-06-07 02:16:12,169][324563] Updated weights for policy 0, policy_version 11313 (0.0007) -[2026-06-07 02:16:12,375][324563] Updated weights for policy 0, policy_version 11323 (0.0007) -[2026-06-07 02:16:12,588][324563] Updated weights for policy 0, policy_version 11333 (0.0006) -[2026-06-07 02:16:12,787][324563] Updated weights for policy 0, policy_version 11343 (0.0007) -[2026-06-07 02:16:13,509][324563] Updated weights for policy 0, policy_version 11353 (0.0007) -[2026-06-07 02:16:13,725][324563] Updated weights for policy 0, policy_version 11363 (0.0006) -[2026-06-07 02:16:13,931][324563] Updated weights for policy 0, policy_version 11373 (0.0006) -[2026-06-07 02:16:14,136][324563] Updated weights for policy 0, policy_version 11383 (0.0006) -[2026-06-07 02:16:14,367][324563] Updated weights for policy 0, policy_version 11393 (0.0006) -[2026-06-07 02:16:14,581][324563] Updated weights for policy 0, policy_version 11403 (0.0006) -[2026-06-07 02:16:14,782][324563] Updated weights for policy 0, policy_version 11413 (0.0006) -[2026-06-07 02:16:15,583][324563] Updated weights for policy 0, policy_version 11424 (0.0006) -[2026-06-07 02:16:15,744][321791] Fps is (10 sec: 16384.2, 60 sec: 17476.3, 300 sec: 16994.9). Total num frames: 5865472. Throughput: 0: 17703.8. Samples: 5904128. Policy #0 lag: (min: 23.0, avg: 37.5, max: 87.0) -[2026-06-07 02:16:15,745][321791] Avg episode reward: [(0, '252.349')] -[2026-06-07 02:16:15,798][324563] Updated weights for policy 0, policy_version 11435 (0.0006) -[2026-06-07 02:16:16,024][324563] Updated weights for policy 0, policy_version 11445 (0.0006) -[2026-06-07 02:16:16,217][324563] Updated weights for policy 0, policy_version 11455 (0.0006) -[2026-06-07 02:16:16,431][324563] Updated weights for policy 0, policy_version 11465 (0.0006) -[2026-06-07 02:16:16,685][324563] Updated weights for policy 0, policy_version 11477 (0.0007) -[2026-06-07 02:16:16,733][324276] Saving new best policy, reward=252.349! -[2026-06-07 02:16:17,460][324563] Updated weights for policy 0, policy_version 11487 (0.0007) -[2026-06-07 02:16:17,676][324563] Updated weights for policy 0, policy_version 11498 (0.0006) -[2026-06-07 02:16:17,873][324563] Updated weights for policy 0, policy_version 11508 (0.0010) -[2026-06-07 02:16:18,087][324563] Updated weights for policy 0, policy_version 11518 (0.0009) -[2026-06-07 02:16:18,294][324563] Updated weights for policy 0, policy_version 11528 (0.0006) -[2026-06-07 02:16:18,533][324563] Updated weights for policy 0, policy_version 11538 (0.0006) -[2026-06-07 02:16:19,293][324563] Updated weights for policy 0, policy_version 11548 (0.0006) -[2026-06-07 02:16:19,487][324563] Updated weights for policy 0, policy_version 11558 (0.0007) -[2026-06-07 02:16:19,702][324563] Updated weights for policy 0, policy_version 11568 (0.0006) -[2026-06-07 02:16:19,919][324563] Updated weights for policy 0, policy_version 11579 (0.0006) -[2026-06-07 02:16:20,146][324563] Updated weights for policy 0, policy_version 11590 (0.0006) -[2026-06-07 02:16:20,357][324563] Updated weights for policy 0, policy_version 11600 (0.0006) -[2026-06-07 02:16:20,744][321791] Fps is (10 sec: 19660.2, 60 sec: 18022.3, 300 sec: 17106.0). Total num frames: 5963776. Throughput: 0: 17467.6. Samples: 5949056. Policy #0 lag: (min: 63.0, avg: 78.6, max: 127.0) -[2026-06-07 02:16:20,746][321791] Avg episode reward: [(0, '275.287')] -[2026-06-07 02:16:20,756][324276] Saving new best policy, reward=275.287! -[2026-06-07 02:16:21,091][324563] Updated weights for policy 0, policy_version 11610 (0.0006) -[2026-06-07 02:16:21,294][324563] Updated weights for policy 0, policy_version 11620 (0.0006) -[2026-06-07 02:16:21,481][324563] Updated weights for policy 0, policy_version 11630 (0.0006) -[2026-06-07 02:16:21,684][324563] Updated weights for policy 0, policy_version 11640 (0.0006) -[2026-06-07 02:16:21,893][324563] Updated weights for policy 0, policy_version 11650 (0.0006) -[2026-06-07 02:16:22,090][324563] Updated weights for policy 0, policy_version 11660 (0.0007) -[2026-06-07 02:16:22,312][324563] Updated weights for policy 0, policy_version 11670 (0.0006) -[2026-06-07 02:16:23,046][324563] Updated weights for policy 0, policy_version 11680 (0.0008) -[2026-06-07 02:16:23,248][324563] Updated weights for policy 0, policy_version 11690 (0.0008) -[2026-06-07 02:16:23,462][324563] Updated weights for policy 0, policy_version 11700 (0.0007) -[2026-06-07 02:16:23,671][324563] Updated weights for policy 0, policy_version 11710 (0.0007) -[2026-06-07 02:16:23,888][324563] Updated weights for policy 0, policy_version 11720 (0.0007) -[2026-06-07 02:16:24,075][324563] Updated weights for policy 0, policy_version 11730 (0.0006) -[2026-06-07 02:16:24,834][324563] Updated weights for policy 0, policy_version 11740 (0.0007) -[2026-06-07 02:16:25,048][324563] Updated weights for policy 0, policy_version 11750 (0.0007) -[2026-06-07 02:16:25,243][324563] Updated weights for policy 0, policy_version 11760 (0.0007) -[2026-06-07 02:16:25,459][324563] Updated weights for policy 0, policy_version 11770 (0.0006) -[2026-06-07 02:16:25,659][324563] Updated weights for policy 0, policy_version 11780 (0.0006) -[2026-06-07 02:16:25,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 16994.9). Total num frames: 6029312. Throughput: 0: 17675.4. Samples: 6060032. Policy #0 lag: (min: 63.0, avg: 78.6, max: 127.0) -[2026-06-07 02:16:25,745][321791] Avg episode reward: [(0, '265.913')] -[2026-06-07 02:16:25,899][324563] Updated weights for policy 0, policy_version 11791 (0.0006) -[2026-06-07 02:16:26,693][324563] Updated weights for policy 0, policy_version 11802 (0.0006) -[2026-06-07 02:16:26,914][324563] Updated weights for policy 0, policy_version 11812 (0.0006) -[2026-06-07 02:16:27,099][324563] Updated weights for policy 0, policy_version 11822 (0.0007) -[2026-06-07 02:16:27,294][324563] Updated weights for policy 0, policy_version 11832 (0.0007) -[2026-06-07 02:16:27,509][324563] Updated weights for policy 0, policy_version 11843 (0.0006) -[2026-06-07 02:16:27,719][324563] Updated weights for policy 0, policy_version 11853 (0.0006) -[2026-06-07 02:16:27,941][324563] Updated weights for policy 0, policy_version 11863 (0.0007) -[2026-06-07 02:16:28,686][324563] Updated weights for policy 0, policy_version 11873 (0.0006) -[2026-06-07 02:16:28,923][324563] Updated weights for policy 0, policy_version 11884 (0.0006) -[2026-06-07 02:16:29,143][324563] Updated weights for policy 0, policy_version 11895 (0.0006) -[2026-06-07 02:16:29,362][324563] Updated weights for policy 0, policy_version 11905 (0.0007) -[2026-06-07 02:16:29,585][324563] Updated weights for policy 0, policy_version 11916 (0.0007) -[2026-06-07 02:16:29,824][324563] Updated weights for policy 0, policy_version 11927 (0.0006) -[2026-06-07 02:16:30,570][324563] Updated weights for policy 0, policy_version 11937 (0.0006) -[2026-06-07 02:16:30,744][321791] Fps is (10 sec: 16384.5, 60 sec: 17476.3, 300 sec: 17106.0). Total num frames: 6127616. Throughput: 0: 17590.0. Samples: 6166400. Policy #0 lag: (min: 63.0, avg: 77.9, max: 127.0) -[2026-06-07 02:16:30,745][321791] Avg episode reward: [(0, '291.197')] -[2026-06-07 02:16:30,778][324563] Updated weights for policy 0, policy_version 11947 (0.0006) -[2026-06-07 02:16:30,986][324563] Updated weights for policy 0, policy_version 11957 (0.0007) -[2026-06-07 02:16:31,209][324563] Updated weights for policy 0, policy_version 11967 (0.0007) -[2026-06-07 02:16:31,412][324563] Updated weights for policy 0, policy_version 11977 (0.0006) -[2026-06-07 02:16:31,625][324563] Updated weights for policy 0, policy_version 11987 (0.0006) -[2026-06-07 02:16:31,728][324276] Saving new best policy, reward=291.197! -[2026-06-07 02:16:32,387][324563] Updated weights for policy 0, policy_version 11997 (0.0007) -[2026-06-07 02:16:32,598][324563] Updated weights for policy 0, policy_version 12007 (0.0007) -[2026-06-07 02:16:32,815][324563] Updated weights for policy 0, policy_version 12017 (0.0007) -[2026-06-07 02:16:33,023][324563] Updated weights for policy 0, policy_version 12027 (0.0007) -[2026-06-07 02:16:33,230][324563] Updated weights for policy 0, policy_version 12037 (0.0006) -[2026-06-07 02:16:33,460][324563] Updated weights for policy 0, policy_version 12047 (0.0007) -[2026-06-07 02:16:34,200][324563] Updated weights for policy 0, policy_version 12057 (0.0007) -[2026-06-07 02:16:34,443][324563] Updated weights for policy 0, policy_version 12069 (0.0006) -[2026-06-07 02:16:34,645][324563] Updated weights for policy 0, policy_version 12079 (0.0006) -[2026-06-07 02:16:34,880][324563] Updated weights for policy 0, policy_version 12090 (0.0007) -[2026-06-07 02:16:35,076][324563] Updated weights for policy 0, policy_version 12100 (0.0007) -[2026-06-07 02:16:35,306][324563] Updated weights for policy 0, policy_version 12111 (0.0006) -[2026-06-07 02:16:35,744][321791] Fps is (10 sec: 19660.8, 60 sec: 18022.4, 300 sec: 17106.0). Total num frames: 6225920. Throughput: 0: 17496.2. Samples: 6211328. Policy #0 lag: (min: 63.0, avg: 77.9, max: 127.0) -[2026-06-07 02:16:35,745][321791] Avg episode reward: [(0, '293.146')] -[2026-06-07 02:16:35,751][324276] Saving new best policy, reward=293.146! -[2026-06-07 02:16:36,080][324563] Updated weights for policy 0, policy_version 12121 (0.0006) -[2026-06-07 02:16:36,295][324563] Updated weights for policy 0, policy_version 12132 (0.0007) -[2026-06-07 02:16:36,490][324563] Updated weights for policy 0, policy_version 12142 (0.0007) -[2026-06-07 02:16:36,726][324563] Updated weights for policy 0, policy_version 12153 (0.0007) -[2026-06-07 02:16:36,950][324563] Updated weights for policy 0, policy_version 12164 (0.0006) -[2026-06-07 02:16:37,184][324563] Updated weights for policy 0, policy_version 12175 (0.0007) -[2026-06-07 02:16:37,977][324563] Updated weights for policy 0, policy_version 12186 (0.0006) -[2026-06-07 02:16:38,180][324563] Updated weights for policy 0, policy_version 12196 (0.0006) -[2026-06-07 02:16:38,389][324563] Updated weights for policy 0, policy_version 12206 (0.0006) -[2026-06-07 02:16:38,645][324563] Updated weights for policy 0, policy_version 12217 (0.0006) -[2026-06-07 02:16:38,867][324563] Updated weights for policy 0, policy_version 12228 (0.0007) -[2026-06-07 02:16:39,086][324563] Updated weights for policy 0, policy_version 12239 (0.0006) -[2026-06-07 02:16:39,853][324563] Updated weights for policy 0, policy_version 12250 (0.0007) -[2026-06-07 02:16:40,087][324563] Updated weights for policy 0, policy_version 12261 (0.0007) -[2026-06-07 02:16:40,298][324563] Updated weights for policy 0, policy_version 12271 (0.0006) -[2026-06-07 02:16:40,497][324563] Updated weights for policy 0, policy_version 12281 (0.0007) -[2026-06-07 02:16:40,735][324563] Updated weights for policy 0, policy_version 12293 (0.0007) -[2026-06-07 02:16:40,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17106.0). Total num frames: 6291456. Throughput: 0: 17601.4. Samples: 6320640. Policy #0 lag: (min: 63.0, avg: 77.9, max: 127.0) -[2026-06-07 02:16:40,745][321791] Avg episode reward: [(0, '287.471')] -[2026-06-07 02:16:40,968][324563] Updated weights for policy 0, policy_version 12304 (0.0006) -[2026-06-07 02:16:41,749][324563] Updated weights for policy 0, policy_version 12314 (0.0006) -[2026-06-07 02:16:41,954][324563] Updated weights for policy 0, policy_version 12324 (0.0006) -[2026-06-07 02:16:42,176][324563] Updated weights for policy 0, policy_version 12335 (0.0006) -[2026-06-07 02:16:42,430][324563] Updated weights for policy 0, policy_version 12346 (0.0007) -[2026-06-07 02:16:42,672][324563] Updated weights for policy 0, policy_version 12358 (0.0007) -[2026-06-07 02:16:42,876][324563] Updated weights for policy 0, policy_version 12368 (0.0006) -[2026-06-07 02:16:43,653][324563] Updated weights for policy 0, policy_version 12379 (0.0006) -[2026-06-07 02:16:43,878][324563] Updated weights for policy 0, policy_version 12389 (0.0006) -[2026-06-07 02:16:44,076][324563] Updated weights for policy 0, policy_version 12399 (0.0006) -[2026-06-07 02:16:44,292][324563] Updated weights for policy 0, policy_version 12409 (0.0006) -[2026-06-07 02:16:44,526][324563] Updated weights for policy 0, policy_version 12420 (0.0008) -[2026-06-07 02:16:44,751][324563] Updated weights for policy 0, policy_version 12430 (0.0006) -[2026-06-07 02:16:45,564][324563] Updated weights for policy 0, policy_version 12442 (0.0006) -[2026-06-07 02:16:45,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17217.1). Total num frames: 6389760. Throughput: 0: 17510.4. Samples: 6428416. Policy #0 lag: (min: 63.0, avg: 78.2, max: 127.0) -[2026-06-07 02:16:45,745][321791] Avg episode reward: [(0, '299.786')] -[2026-06-07 02:16:45,788][324563] Updated weights for policy 0, policy_version 12452 (0.0006) -[2026-06-07 02:16:46,044][324563] Updated weights for policy 0, policy_version 12464 (0.0006) -[2026-06-07 02:16:46,275][324563] Updated weights for policy 0, policy_version 12475 (0.0007) -[2026-06-07 02:16:46,481][324563] Updated weights for policy 0, policy_version 12485 (0.0006) -[2026-06-07 02:16:46,693][324563] Updated weights for policy 0, policy_version 12496 (0.0006) -[2026-06-07 02:16:46,850][324276] Saving new best policy, reward=299.786! -[2026-06-07 02:16:47,497][324563] Updated weights for policy 0, policy_version 12506 (0.0006) -[2026-06-07 02:16:47,724][324563] Updated weights for policy 0, policy_version 12517 (0.0007) -[2026-06-07 02:16:47,945][324563] Updated weights for policy 0, policy_version 12527 (0.0006) -[2026-06-07 02:16:48,158][324563] Updated weights for policy 0, policy_version 12537 (0.0006) -[2026-06-07 02:16:48,357][324563] Updated weights for policy 0, policy_version 12547 (0.0007) -[2026-06-07 02:16:48,596][324563] Updated weights for policy 0, policy_version 12558 (0.0006) -[2026-06-07 02:16:49,378][324563] Updated weights for policy 0, policy_version 12570 (0.0007) -[2026-06-07 02:16:49,630][324563] Updated weights for policy 0, policy_version 12581 (0.0006) -[2026-06-07 02:16:49,821][324563] Updated weights for policy 0, policy_version 12591 (0.0006) -[2026-06-07 02:16:50,022][324563] Updated weights for policy 0, policy_version 12601 (0.0007) -[2026-06-07 02:16:50,242][324563] Updated weights for policy 0, policy_version 12612 (0.0006) -[2026-06-07 02:16:50,446][324563] Updated weights for policy 0, policy_version 12622 (0.0006) -[2026-06-07 02:16:50,667][324563] Updated weights for policy 0, policy_version 12632 (0.0006) -[2026-06-07 02:16:50,744][321791] Fps is (10 sec: 19660.5, 60 sec: 18022.4, 300 sec: 17217.1). Total num frames: 6488064. Throughput: 0: 17507.5. Samples: 6472576. Policy #0 lag: (min: 63.0, avg: 78.2, max: 127.0) -[2026-06-07 02:16:50,745][321791] Avg episode reward: [(0, '288.802')] -[2026-06-07 02:16:51,436][324563] Updated weights for policy 0, policy_version 12642 (0.0007) -[2026-06-07 02:16:51,646][324563] Updated weights for policy 0, policy_version 12652 (0.0006) -[2026-06-07 02:16:51,842][324563] Updated weights for policy 0, policy_version 12662 (0.0006) -[2026-06-07 02:16:52,052][324563] Updated weights for policy 0, policy_version 12672 (0.0007) -[2026-06-07 02:16:52,252][324563] Updated weights for policy 0, policy_version 12682 (0.0007) -[2026-06-07 02:16:52,459][324563] Updated weights for policy 0, policy_version 12692 (0.0006) -[2026-06-07 02:16:53,206][324563] Updated weights for policy 0, policy_version 12702 (0.0006) -[2026-06-07 02:16:53,424][324563] Updated weights for policy 0, policy_version 12712 (0.0007) -[2026-06-07 02:16:53,638][324563] Updated weights for policy 0, policy_version 12722 (0.0007) -[2026-06-07 02:16:53,847][324563] Updated weights for policy 0, policy_version 12732 (0.0007) -[2026-06-07 02:16:54,062][324563] Updated weights for policy 0, policy_version 12742 (0.0006) -[2026-06-07 02:16:54,283][324563] Updated weights for policy 0, policy_version 12753 (0.0007) -[2026-06-07 02:16:55,027][324563] Updated weights for policy 0, policy_version 12763 (0.0006) -[2026-06-07 02:16:55,220][324563] Updated weights for policy 0, policy_version 12773 (0.0006) -[2026-06-07 02:16:55,437][324563] Updated weights for policy 0, policy_version 12783 (0.0007) -[2026-06-07 02:16:55,651][324563] Updated weights for policy 0, policy_version 12793 (0.0006) -[2026-06-07 02:16:55,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17217.1). Total num frames: 6553600. Throughput: 0: 17319.8. Samples: 6576384. Policy #0 lag: (min: 55.0, avg: 68.9, max: 119.0) -[2026-06-07 02:16:55,745][321791] Avg episode reward: [(0, '298.736')] -[2026-06-07 02:16:55,845][324563] Updated weights for policy 0, policy_version 12803 (0.0006) -[2026-06-07 02:16:56,050][324563] Updated weights for policy 0, policy_version 12814 (0.0006) -[2026-06-07 02:16:56,260][324563] Updated weights for policy 0, policy_version 12824 (0.0006) -[2026-06-07 02:16:57,025][324563] Updated weights for policy 0, policy_version 12834 (0.0006) -[2026-06-07 02:16:57,234][324563] Updated weights for policy 0, policy_version 12844 (0.0006) -[2026-06-07 02:16:57,452][324563] Updated weights for policy 0, policy_version 12854 (0.0009) -[2026-06-07 02:16:57,642][324563] Updated weights for policy 0, policy_version 12864 (0.0007) -[2026-06-07 02:16:57,857][324563] Updated weights for policy 0, policy_version 12874 (0.0007) -[2026-06-07 02:16:58,104][324563] Updated weights for policy 0, policy_version 12884 (0.0006) -[2026-06-07 02:16:58,837][324563] Updated weights for policy 0, policy_version 12894 (0.0007) -[2026-06-07 02:16:59,027][324563] Updated weights for policy 0, policy_version 12904 (0.0007) -[2026-06-07 02:16:59,237][324563] Updated weights for policy 0, policy_version 12914 (0.0006) -[2026-06-07 02:16:59,461][324563] Updated weights for policy 0, policy_version 12924 (0.0006) -[2026-06-07 02:16:59,663][324563] Updated weights for policy 0, policy_version 12934 (0.0007) -[2026-06-07 02:16:59,857][324563] Updated weights for policy 0, policy_version 12944 (0.0006) -[2026-06-07 02:17:00,598][324563] Updated weights for policy 0, policy_version 12954 (0.0007) -[2026-06-07 02:17:00,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.2, 300 sec: 17217.1). Total num frames: 6651904. Throughput: 0: 17447.8. Samples: 6689280. Policy #0 lag: (min: 55.0, avg: 68.9, max: 119.0) -[2026-06-07 02:17:00,745][321791] Avg episode reward: [(0, '288.154')] -[2026-06-07 02:17:00,788][324563] Updated weights for policy 0, policy_version 12964 (0.0007) -[2026-06-07 02:17:00,986][324563] Updated weights for policy 0, policy_version 12975 (0.0006) -[2026-06-07 02:17:01,238][324563] Updated weights for policy 0, policy_version 12985 (0.0006) -[2026-06-07 02:17:01,428][324563] Updated weights for policy 0, policy_version 12995 (0.0007) -[2026-06-07 02:17:01,616][324563] Updated weights for policy 0, policy_version 13005 (0.0006) -[2026-06-07 02:17:01,821][324563] Updated weights for policy 0, policy_version 13015 (0.0006) -[2026-06-07 02:17:02,557][324563] Updated weights for policy 0, policy_version 13025 (0.0007) -[2026-06-07 02:17:02,783][324563] Updated weights for policy 0, policy_version 13035 (0.0007) -[2026-06-07 02:17:03,005][324563] Updated weights for policy 0, policy_version 13045 (0.0009) -[2026-06-07 02:17:03,205][324563] Updated weights for policy 0, policy_version 13055 (0.0007) -[2026-06-07 02:17:03,445][324563] Updated weights for policy 0, policy_version 13066 (0.0007) -[2026-06-07 02:17:03,662][324563] Updated weights for policy 0, policy_version 13076 (0.0007) -[2026-06-07 02:17:04,429][324563] Updated weights for policy 0, policy_version 13086 (0.0006) -[2026-06-07 02:17:04,619][324563] Updated weights for policy 0, policy_version 13096 (0.0006) -[2026-06-07 02:17:04,825][324563] Updated weights for policy 0, policy_version 13106 (0.0007) -[2026-06-07 02:17:05,045][324563] Updated weights for policy 0, policy_version 13116 (0.0007) -[2026-06-07 02:17:05,266][324563] Updated weights for policy 0, policy_version 13127 (0.0007) -[2026-06-07 02:17:05,474][324563] Updated weights for policy 0, policy_version 13137 (0.0007) -[2026-06-07 02:17:05,744][321791] Fps is (10 sec: 19660.6, 60 sec: 17476.3, 300 sec: 17328.2). Total num frames: 6750208. Throughput: 0: 17430.8. Samples: 6733440. Policy #0 lag: (min: 63.0, avg: 77.4, max: 127.0) -[2026-06-07 02:17:05,745][321791] Avg episode reward: [(0, '274.757')] -[2026-06-07 02:17:06,208][324563] Updated weights for policy 0, policy_version 13147 (0.0007) -[2026-06-07 02:17:06,419][324563] Updated weights for policy 0, policy_version 13157 (0.0007) -[2026-06-07 02:17:06,625][324563] Updated weights for policy 0, policy_version 13167 (0.0007) -[2026-06-07 02:17:06,833][324563] Updated weights for policy 0, policy_version 13177 (0.0010) -[2026-06-07 02:17:07,053][324563] Updated weights for policy 0, policy_version 13187 (0.0007) -[2026-06-07 02:17:07,302][324563] Updated weights for policy 0, policy_version 13199 (0.0006) -[2026-06-07 02:17:08,038][324563] Updated weights for policy 0, policy_version 13209 (0.0009) -[2026-06-07 02:17:08,243][324563] Updated weights for policy 0, policy_version 13219 (0.0006) -[2026-06-07 02:17:08,447][324563] Updated weights for policy 0, policy_version 13229 (0.0006) -[2026-06-07 02:17:08,664][324563] Updated weights for policy 0, policy_version 13239 (0.0008) -[2026-06-07 02:17:08,885][324563] Updated weights for policy 0, policy_version 13249 (0.0007) -[2026-06-07 02:17:09,100][324563] Updated weights for policy 0, policy_version 13259 (0.0006) -[2026-06-07 02:17:09,305][324563] Updated weights for policy 0, policy_version 13269 (0.0007) -[2026-06-07 02:17:10,063][324563] Updated weights for policy 0, policy_version 13279 (0.0008) -[2026-06-07 02:17:10,263][324563] Updated weights for policy 0, policy_version 13289 (0.0006) -[2026-06-07 02:17:10,475][324563] Updated weights for policy 0, policy_version 13299 (0.0006) -[2026-06-07 02:17:10,666][324563] Updated weights for policy 0, policy_version 13309 (0.0006) -[2026-06-07 02:17:10,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.2, 300 sec: 17217.1). Total num frames: 6815744. Throughput: 0: 17319.8. Samples: 6839424. Policy #0 lag: (min: 63.0, avg: 77.4, max: 127.0) -[2026-06-07 02:17:10,745][321791] Avg episode reward: [(0, '306.291')] -[2026-06-07 02:17:10,871][324563] Updated weights for policy 0, policy_version 13319 (0.0006) -[2026-06-07 02:17:11,058][324563] Updated weights for policy 0, policy_version 13329 (0.0006) -[2026-06-07 02:17:11,223][324276] Saving new best policy, reward=306.291! -[2026-06-07 02:17:11,854][324563] Updated weights for policy 0, policy_version 13339 (0.0007) -[2026-06-07 02:17:12,054][324563] Updated weights for policy 0, policy_version 13349 (0.0006) -[2026-06-07 02:17:12,253][324563] Updated weights for policy 0, policy_version 13359 (0.0007) -[2026-06-07 02:17:12,499][324563] Updated weights for policy 0, policy_version 13370 (0.0006) -[2026-06-07 02:17:12,712][324563] Updated weights for policy 0, policy_version 13380 (0.0007) -[2026-06-07 02:17:12,909][324563] Updated weights for policy 0, policy_version 13390 (0.0006) -[2026-06-07 02:17:13,123][324563] Updated weights for policy 0, policy_version 13400 (0.0007) -[2026-06-07 02:17:13,914][324563] Updated weights for policy 0, policy_version 13411 (0.0007) -[2026-06-07 02:17:14,095][324563] Updated weights for policy 0, policy_version 13421 (0.0006) -[2026-06-07 02:17:14,302][324563] Updated weights for policy 0, policy_version 13431 (0.0008) -[2026-06-07 02:17:14,510][324563] Updated weights for policy 0, policy_version 13441 (0.0009) -[2026-06-07 02:17:14,717][324563] Updated weights for policy 0, policy_version 13451 (0.0010) -[2026-06-07 02:17:14,927][324563] Updated weights for policy 0, policy_version 13461 (0.0011) -[2026-06-07 02:17:15,663][324563] Updated weights for policy 0, policy_version 13471 (0.0008) -[2026-06-07 02:17:15,744][321791] Fps is (10 sec: 16384.2, 60 sec: 17476.3, 300 sec: 17328.2). Total num frames: 6914048. Throughput: 0: 17433.6. Samples: 6950912. Policy #0 lag: (min: 31.0, avg: 46.1, max: 95.0) -[2026-06-07 02:17:15,745][321791] Avg episode reward: [(0, '323.791')] -[2026-06-07 02:17:15,877][324563] Updated weights for policy 0, policy_version 13482 (0.0006) -[2026-06-07 02:17:16,076][324563] Updated weights for policy 0, policy_version 13492 (0.0006) -[2026-06-07 02:17:16,296][324563] Updated weights for policy 0, policy_version 13503 (0.0006) -[2026-06-07 02:17:16,502][324563] Updated weights for policy 0, policy_version 13513 (0.0006) -[2026-06-07 02:17:16,701][324563] Updated weights for policy 0, policy_version 13523 (0.0006) -[2026-06-07 02:17:16,805][324276] Saving new best policy, reward=323.791! -[2026-06-07 02:17:17,481][324563] Updated weights for policy 0, policy_version 13535 (0.0006) -[2026-06-07 02:17:17,665][324563] Updated weights for policy 0, policy_version 13545 (0.0006) -[2026-06-07 02:17:17,870][324563] Updated weights for policy 0, policy_version 13555 (0.0006) -[2026-06-07 02:17:18,079][324563] Updated weights for policy 0, policy_version 13565 (0.0006) -[2026-06-07 02:17:18,287][324563] Updated weights for policy 0, policy_version 13575 (0.0006) -[2026-06-07 02:17:18,507][324563] Updated weights for policy 0, policy_version 13585 (0.0006) -[2026-06-07 02:17:19,233][324563] Updated weights for policy 0, policy_version 13595 (0.0006) -[2026-06-07 02:17:19,441][324563] Updated weights for policy 0, policy_version 13605 (0.0006) -[2026-06-07 02:17:19,657][324563] Updated weights for policy 0, policy_version 13615 (0.0007) -[2026-06-07 02:17:19,851][324563] Updated weights for policy 0, policy_version 13625 (0.0007) -[2026-06-07 02:17:20,053][324563] Updated weights for policy 0, policy_version 13635 (0.0006) -[2026-06-07 02:17:20,250][324563] Updated weights for policy 0, policy_version 13645 (0.0007) -[2026-06-07 02:17:20,447][324563] Updated weights for policy 0, policy_version 13655 (0.0006) -[2026-06-07 02:17:20,744][321791] Fps is (10 sec: 19660.6, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 7012352. Throughput: 0: 17439.2. Samples: 6996096. Policy #0 lag: (min: 31.0, avg: 46.1, max: 95.0) -[2026-06-07 02:17:20,746][321791] Avg episode reward: [(0, '320.708')] -[2026-06-07 02:17:21,176][324563] Updated weights for policy 0, policy_version 13665 (0.0007) -[2026-06-07 02:17:21,371][324563] Updated weights for policy 0, policy_version 13675 (0.0007) -[2026-06-07 02:17:21,589][324563] Updated weights for policy 0, policy_version 13686 (0.0006) -[2026-06-07 02:17:21,824][324563] Updated weights for policy 0, policy_version 13697 (0.0006) -[2026-06-07 02:17:22,028][324563] Updated weights for policy 0, policy_version 13707 (0.0006) -[2026-06-07 02:17:22,237][324563] Updated weights for policy 0, policy_version 13717 (0.0006) -[2026-06-07 02:17:22,968][324563] Updated weights for policy 0, policy_version 13727 (0.0006) -[2026-06-07 02:17:23,174][324563] Updated weights for policy 0, policy_version 13737 (0.0006) -[2026-06-07 02:17:23,378][324563] Updated weights for policy 0, policy_version 13747 (0.0007) -[2026-06-07 02:17:23,600][324563] Updated weights for policy 0, policy_version 13757 (0.0006) -[2026-06-07 02:17:23,806][324563] Updated weights for policy 0, policy_version 13767 (0.0006) -[2026-06-07 02:17:24,012][324563] Updated weights for policy 0, policy_version 13777 (0.0007) -[2026-06-07 02:17:24,764][324563] Updated weights for policy 0, policy_version 13787 (0.0006) -[2026-06-07 02:17:24,981][324563] Updated weights for policy 0, policy_version 13797 (0.0007) -[2026-06-07 02:17:25,213][324563] Updated weights for policy 0, policy_version 13807 (0.0006) -[2026-06-07 02:17:25,412][324563] Updated weights for policy 0, policy_version 13817 (0.0006) -[2026-06-07 02:17:25,627][324563] Updated weights for policy 0, policy_version 13827 (0.0006) -[2026-06-07 02:17:25,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17328.2). Total num frames: 7077888. Throughput: 0: 17527.5. Samples: 7109376. Policy #0 lag: (min: 31.0, avg: 46.1, max: 95.0) -[2026-06-07 02:17:25,745][321791] Avg episode reward: [(0, '311.962')] -[2026-06-07 02:17:25,864][324563] Updated weights for policy 0, policy_version 13837 (0.0007) -[2026-06-07 02:17:26,055][324563] Updated weights for policy 0, policy_version 13847 (0.0006) -[2026-06-07 02:17:26,780][324563] Updated weights for policy 0, policy_version 13857 (0.0007) -[2026-06-07 02:17:27,032][324563] Updated weights for policy 0, policy_version 13868 (0.0007) -[2026-06-07 02:17:27,246][324563] Updated weights for policy 0, policy_version 13879 (0.0006) -[2026-06-07 02:17:27,454][324563] Updated weights for policy 0, policy_version 13889 (0.0007) -[2026-06-07 02:17:27,673][324563] Updated weights for policy 0, policy_version 13899 (0.0006) -[2026-06-07 02:17:27,868][324563] Updated weights for policy 0, policy_version 13909 (0.0007) -[2026-06-07 02:17:28,608][324563] Updated weights for policy 0, policy_version 13919 (0.0006) -[2026-06-07 02:17:28,809][324563] Updated weights for policy 0, policy_version 13929 (0.0006) -[2026-06-07 02:17:29,029][324563] Updated weights for policy 0, policy_version 13939 (0.0006) -[2026-06-07 02:17:29,248][324563] Updated weights for policy 0, policy_version 13950 (0.0011) -[2026-06-07 02:17:29,444][324563] Updated weights for policy 0, policy_version 13960 (0.0008) -[2026-06-07 02:17:29,661][324563] Updated weights for policy 0, policy_version 13970 (0.0006) -[2026-06-07 02:17:30,383][324563] Updated weights for policy 0, policy_version 13980 (0.0006) -[2026-06-07 02:17:30,628][324563] Updated weights for policy 0, policy_version 13991 (0.0007) -[2026-06-07 02:17:30,744][321791] Fps is (10 sec: 16384.4, 60 sec: 17476.3, 300 sec: 17439.3). Total num frames: 7176192. Throughput: 0: 17427.9. Samples: 7212672. Policy #0 lag: (min: 36.0, avg: 51.1, max: 100.0) -[2026-06-07 02:17:30,745][321791] Avg episode reward: [(0, '325.035')] -[2026-06-07 02:17:30,830][324563] Updated weights for policy 0, policy_version 14001 (0.0006) -[2026-06-07 02:17:31,035][324563] Updated weights for policy 0, policy_version 14011 (0.0006) -[2026-06-07 02:17:31,225][324563] Updated weights for policy 0, policy_version 14021 (0.0006) -[2026-06-07 02:17:31,416][324563] Updated weights for policy 0, policy_version 14031 (0.0007) -[2026-06-07 02:17:31,591][324276] Saving new best policy, reward=325.035! -[2026-06-07 02:17:32,172][324563] Updated weights for policy 0, policy_version 14041 (0.0007) -[2026-06-07 02:17:32,383][324563] Updated weights for policy 0, policy_version 14051 (0.0008) -[2026-06-07 02:17:32,579][324563] Updated weights for policy 0, policy_version 14061 (0.0007) -[2026-06-07 02:17:32,815][324563] Updated weights for policy 0, policy_version 14071 (0.0007) -[2026-06-07 02:17:32,997][324563] Updated weights for policy 0, policy_version 14081 (0.0006) -[2026-06-07 02:17:33,246][324563] Updated weights for policy 0, policy_version 14092 (0.0007) -[2026-06-07 02:17:33,472][324563] Updated weights for policy 0, policy_version 14102 (0.0006) -[2026-06-07 02:17:34,228][324563] Updated weights for policy 0, policy_version 14112 (0.0006) -[2026-06-07 02:17:34,472][324563] Updated weights for policy 0, policy_version 14123 (0.0006) -[2026-06-07 02:17:34,667][324563] Updated weights for policy 0, policy_version 14133 (0.0006) -[2026-06-07 02:17:34,863][324563] Updated weights for policy 0, policy_version 14143 (0.0006) -[2026-06-07 02:17:35,076][324563] Updated weights for policy 0, policy_version 14153 (0.0006) -[2026-06-07 02:17:35,281][324563] Updated weights for policy 0, policy_version 14163 (0.0006) -[2026-06-07 02:17:35,744][321791] Fps is (10 sec: 19660.5, 60 sec: 17476.2, 300 sec: 17439.2). Total num frames: 7274496. Throughput: 0: 17439.3. Samples: 7257344. Policy #0 lag: (min: 36.0, avg: 51.1, max: 100.0) -[2026-06-07 02:17:35,745][321791] Avg episode reward: [(0, '323.232')] -[2026-06-07 02:17:36,047][324563] Updated weights for policy 0, policy_version 14173 (0.0007) -[2026-06-07 02:17:36,264][324563] Updated weights for policy 0, policy_version 14183 (0.0006) -[2026-06-07 02:17:36,482][324563] Updated weights for policy 0, policy_version 14193 (0.0006) -[2026-06-07 02:17:36,684][324563] Updated weights for policy 0, policy_version 14203 (0.0006) -[2026-06-07 02:17:36,897][324563] Updated weights for policy 0, policy_version 14213 (0.0007) -[2026-06-07 02:17:37,104][324563] Updated weights for policy 0, policy_version 14223 (0.0007) -[2026-06-07 02:17:37,858][324563] Updated weights for policy 0, policy_version 14233 (0.0006) -[2026-06-07 02:17:38,076][324563] Updated weights for policy 0, policy_version 14243 (0.0006) -[2026-06-07 02:17:38,293][324563] Updated weights for policy 0, policy_version 14253 (0.0006) -[2026-06-07 02:17:38,502][324563] Updated weights for policy 0, policy_version 14263 (0.0006) -[2026-06-07 02:17:38,709][324563] Updated weights for policy 0, policy_version 14274 (0.0007) -[2026-06-07 02:17:38,918][324563] Updated weights for policy 0, policy_version 14284 (0.0006) -[2026-06-07 02:17:39,123][324563] Updated weights for policy 0, policy_version 14294 (0.0007) -[2026-06-07 02:17:39,865][324563] Updated weights for policy 0, policy_version 14304 (0.0007) -[2026-06-07 02:17:40,094][324563] Updated weights for policy 0, policy_version 14315 (0.0006) -[2026-06-07 02:17:40,301][324563] Updated weights for policy 0, policy_version 14325 (0.0006) -[2026-06-07 02:17:40,516][324563] Updated weights for policy 0, policy_version 14335 (0.0009) -[2026-06-07 02:17:40,713][324563] Updated weights for policy 0, policy_version 14345 (0.0011) -[2026-06-07 02:17:40,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17328.2). Total num frames: 7340032. Throughput: 0: 17632.7. Samples: 7369856. Policy #0 lag: (min: 63.0, avg: 78.4, max: 127.0) -[2026-06-07 02:17:40,745][321791] Avg episode reward: [(0, '305.559')] -[2026-06-07 02:17:40,905][324563] Updated weights for policy 0, policy_version 14355 (0.0008) -[2026-06-07 02:17:41,662][324563] Updated weights for policy 0, policy_version 14365 (0.0006) -[2026-06-07 02:17:41,890][324563] Updated weights for policy 0, policy_version 14375 (0.0007) -[2026-06-07 02:17:42,098][324563] Updated weights for policy 0, policy_version 14385 (0.0007) -[2026-06-07 02:17:42,296][324563] Updated weights for policy 0, policy_version 14395 (0.0007) -[2026-06-07 02:17:42,502][324563] Updated weights for policy 0, policy_version 14405 (0.0006) -[2026-06-07 02:17:42,712][324563] Updated weights for policy 0, policy_version 14415 (0.0006) -[2026-06-07 02:17:43,464][324563] Updated weights for policy 0, policy_version 14425 (0.0006) -[2026-06-07 02:17:43,666][324563] Updated weights for policy 0, policy_version 14435 (0.0006) -[2026-06-07 02:17:43,866][324563] Updated weights for policy 0, policy_version 14445 (0.0006) -[2026-06-07 02:17:44,067][324563] Updated weights for policy 0, policy_version 14455 (0.0006) -[2026-06-07 02:17:44,274][324563] Updated weights for policy 0, policy_version 14465 (0.0007) -[2026-06-07 02:17:44,483][324563] Updated weights for policy 0, policy_version 14475 (0.0006) -[2026-06-07 02:17:44,679][324563] Updated weights for policy 0, policy_version 14485 (0.0006) -[2026-06-07 02:17:45,450][324563] Updated weights for policy 0, policy_version 14495 (0.0007) -[2026-06-07 02:17:45,665][324563] Updated weights for policy 0, policy_version 14506 (0.0006) -[2026-06-07 02:17:45,744][321791] Fps is (10 sec: 16384.2, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 7438336. Throughput: 0: 17493.4. Samples: 7476480. Policy #0 lag: (min: 63.0, avg: 78.4, max: 127.0) -[2026-06-07 02:17:45,745][321791] Avg episode reward: [(0, '338.013')] -[2026-06-07 02:17:45,883][324563] Updated weights for policy 0, policy_version 14516 (0.0007) -[2026-06-07 02:17:46,088][324563] Updated weights for policy 0, policy_version 14526 (0.0006) -[2026-06-07 02:17:46,334][324563] Updated weights for policy 0, policy_version 14537 (0.0006) -[2026-06-07 02:17:46,530][324563] Updated weights for policy 0, policy_version 14547 (0.0007) -[2026-06-07 02:17:46,622][324276] Saving new best policy, reward=338.013! -[2026-06-07 02:17:47,323][324563] Updated weights for policy 0, policy_version 14559 (0.0007) -[2026-06-07 02:17:47,554][324563] Updated weights for policy 0, policy_version 14570 (0.0007) -[2026-06-07 02:17:47,751][324563] Updated weights for policy 0, policy_version 14580 (0.0006) -[2026-06-07 02:17:47,951][324563] Updated weights for policy 0, policy_version 14590 (0.0006) -[2026-06-07 02:17:48,159][324563] Updated weights for policy 0, policy_version 14600 (0.0006) -[2026-06-07 02:17:48,373][324563] Updated weights for policy 0, policy_version 14610 (0.0007) -[2026-06-07 02:17:49,142][324563] Updated weights for policy 0, policy_version 14620 (0.0006) -[2026-06-07 02:17:49,349][324563] Updated weights for policy 0, policy_version 14630 (0.0006) -[2026-06-07 02:17:49,578][324563] Updated weights for policy 0, policy_version 14640 (0.0006) -[2026-06-07 02:17:49,771][324563] Updated weights for policy 0, policy_version 14650 (0.0006) -[2026-06-07 02:17:49,977][324563] Updated weights for policy 0, policy_version 14660 (0.0007) -[2026-06-07 02:17:50,178][324563] Updated weights for policy 0, policy_version 14670 (0.0006) -[2026-06-07 02:17:50,382][324563] Updated weights for policy 0, policy_version 14680 (0.0006) -[2026-06-07 02:17:50,744][321791] Fps is (10 sec: 19660.8, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 7536640. Throughput: 0: 17507.6. Samples: 7521280. Policy #0 lag: (min: 63.0, avg: 77.5, max: 127.0) -[2026-06-07 02:17:50,745][321791] Avg episode reward: [(0, '355.181')] -[2026-06-07 02:17:50,749][324276] Saving new best policy, reward=355.181! -[2026-06-07 02:17:51,157][324563] Updated weights for policy 0, policy_version 14691 (0.0007) -[2026-06-07 02:17:51,386][324563] Updated weights for policy 0, policy_version 14701 (0.0006) -[2026-06-07 02:17:51,599][324563] Updated weights for policy 0, policy_version 14711 (0.0007) -[2026-06-07 02:17:51,829][324563] Updated weights for policy 0, policy_version 14722 (0.0007) -[2026-06-07 02:17:52,039][324563] Updated weights for policy 0, policy_version 14732 (0.0007) -[2026-06-07 02:17:52,242][324563] Updated weights for policy 0, policy_version 14742 (0.0006) -[2026-06-07 02:17:53,008][324563] Updated weights for policy 0, policy_version 14752 (0.0007) -[2026-06-07 02:17:53,222][324563] Updated weights for policy 0, policy_version 14762 (0.0007) -[2026-06-07 02:17:53,439][324563] Updated weights for policy 0, policy_version 14772 (0.0007) -[2026-06-07 02:17:53,648][324563] Updated weights for policy 0, policy_version 14782 (0.0007) -[2026-06-07 02:17:53,845][324563] Updated weights for policy 0, policy_version 14792 (0.0007) -[2026-06-07 02:17:54,045][324563] Updated weights for policy 0, policy_version 14802 (0.0007) -[2026-06-07 02:17:54,813][324563] Updated weights for policy 0, policy_version 14812 (0.0006) -[2026-06-07 02:17:55,013][324563] Updated weights for policy 0, policy_version 14822 (0.0006) -[2026-06-07 02:17:55,210][324563] Updated weights for policy 0, policy_version 14832 (0.0007) -[2026-06-07 02:17:55,418][324563] Updated weights for policy 0, policy_version 14842 (0.0006) -[2026-06-07 02:17:55,613][324563] Updated weights for policy 0, policy_version 14852 (0.0007) -[2026-06-07 02:17:55,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17328.2). Total num frames: 7602176. Throughput: 0: 17709.6. Samples: 7636352. Policy #0 lag: (min: 63.0, avg: 77.5, max: 127.0) -[2026-06-07 02:17:55,745][321791] Avg episode reward: [(0, '371.185')] -[2026-06-07 02:17:55,832][324563] Updated weights for policy 0, policy_version 14862 (0.0006) -[2026-06-07 02:17:56,013][324276] Saving new best policy, reward=371.185! -[2026-06-07 02:17:56,015][324563] Updated weights for policy 0, policy_version 14872 (0.0006) -[2026-06-07 02:17:56,758][324563] Updated weights for policy 0, policy_version 14882 (0.0007) -[2026-06-07 02:17:56,959][324563] Updated weights for policy 0, policy_version 14892 (0.0006) -[2026-06-07 02:17:57,141][324563] Updated weights for policy 0, policy_version 14902 (0.0007) -[2026-06-07 02:17:57,367][324563] Updated weights for policy 0, policy_version 14913 (0.0006) -[2026-06-07 02:17:57,556][324563] Updated weights for policy 0, policy_version 14923 (0.0006) -[2026-06-07 02:17:57,772][324563] Updated weights for policy 0, policy_version 14933 (0.0006) -[2026-06-07 02:17:58,541][324563] Updated weights for policy 0, policy_version 14943 (0.0008) -[2026-06-07 02:17:58,738][324563] Updated weights for policy 0, policy_version 14953 (0.0007) -[2026-06-07 02:17:58,934][324563] Updated weights for policy 0, policy_version 14963 (0.0007) -[2026-06-07 02:17:59,132][324563] Updated weights for policy 0, policy_version 14973 (0.0006) -[2026-06-07 02:17:59,348][324563] Updated weights for policy 0, policy_version 14983 (0.0006) -[2026-06-07 02:17:59,569][324563] Updated weights for policy 0, policy_version 14993 (0.0006) -[2026-06-07 02:18:00,355][324563] Updated weights for policy 0, policy_version 15003 (0.0007) -[2026-06-07 02:18:00,562][324563] Updated weights for policy 0, policy_version 15013 (0.0007) -[2026-06-07 02:18:00,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 7700480. Throughput: 0: 17615.6. Samples: 7743616. Policy #0 lag: (min: 63.0, avg: 77.5, max: 127.0) -[2026-06-07 02:18:00,745][321791] Avg episode reward: [(0, '373.934')] -[2026-06-07 02:18:00,774][324563] Updated weights for policy 0, policy_version 15023 (0.0006) -[2026-06-07 02:18:00,969][324563] Updated weights for policy 0, policy_version 15033 (0.0006) -[2026-06-07 02:18:01,188][324563] Updated weights for policy 0, policy_version 15043 (0.0006) -[2026-06-07 02:18:01,413][324563] Updated weights for policy 0, policy_version 15053 (0.0006) -[2026-06-07 02:18:01,623][324563] Updated weights for policy 0, policy_version 15063 (0.0007) -[2026-06-07 02:18:01,644][324276] Saving new best policy, reward=373.934! -[2026-06-07 02:18:02,394][324563] Updated weights for policy 0, policy_version 15073 (0.0006) -[2026-06-07 02:18:02,607][324563] Updated weights for policy 0, policy_version 15083 (0.0006) -[2026-06-07 02:18:02,814][324563] Updated weights for policy 0, policy_version 15093 (0.0006) -[2026-06-07 02:18:03,025][324563] Updated weights for policy 0, policy_version 15103 (0.0007) -[2026-06-07 02:18:03,225][324563] Updated weights for policy 0, policy_version 15113 (0.0007) -[2026-06-07 02:18:03,421][324563] Updated weights for policy 0, policy_version 15123 (0.0007) -[2026-06-07 02:18:04,213][324563] Updated weights for policy 0, policy_version 15133 (0.0007) -[2026-06-07 02:18:04,455][324563] Updated weights for policy 0, policy_version 15144 (0.0007) -[2026-06-07 02:18:04,665][324563] Updated weights for policy 0, policy_version 15154 (0.0007) -[2026-06-07 02:18:04,901][324563] Updated weights for policy 0, policy_version 15166 (0.0006) -[2026-06-07 02:18:05,148][324563] Updated weights for policy 0, policy_version 15176 (0.0007) -[2026-06-07 02:18:05,364][324563] Updated weights for policy 0, policy_version 15186 (0.0007) -[2026-06-07 02:18:05,744][321791] Fps is (10 sec: 19660.8, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 7798784. Throughput: 0: 17601.5. Samples: 7788160. Policy #0 lag: (min: 63.0, avg: 78.1, max: 127.0) -[2026-06-07 02:18:05,745][321791] Avg episode reward: [(0, '409.877')] -[2026-06-07 02:18:05,749][324276] Saving new best policy, reward=409.877! -[2026-06-07 02:18:06,108][324563] Updated weights for policy 0, policy_version 15197 (0.0007) -[2026-06-07 02:18:06,311][324563] Updated weights for policy 0, policy_version 15207 (0.0007) -[2026-06-07 02:18:06,549][324563] Updated weights for policy 0, policy_version 15217 (0.0007) -[2026-06-07 02:18:06,754][324563] Updated weights for policy 0, policy_version 15227 (0.0007) -[2026-06-07 02:18:06,955][324563] Updated weights for policy 0, policy_version 15237 (0.0006) -[2026-06-07 02:18:07,193][324563] Updated weights for policy 0, policy_version 15248 (0.0006) -[2026-06-07 02:18:07,933][324563] Updated weights for policy 0, policy_version 15258 (0.0006) -[2026-06-07 02:18:08,119][324563] Updated weights for policy 0, policy_version 15268 (0.0007) -[2026-06-07 02:18:08,320][324563] Updated weights for policy 0, policy_version 15278 (0.0007) -[2026-06-07 02:18:08,521][324563] Updated weights for policy 0, policy_version 15288 (0.0006) -[2026-06-07 02:18:08,766][324563] Updated weights for policy 0, policy_version 15299 (0.0006) -[2026-06-07 02:18:08,951][324563] Updated weights for policy 0, policy_version 15309 (0.0006) -[2026-06-07 02:18:09,157][324563] Updated weights for policy 0, policy_version 15319 (0.0006) -[2026-06-07 02:18:09,915][324563] Updated weights for policy 0, policy_version 15329 (0.0006) -[2026-06-07 02:18:10,112][324563] Updated weights for policy 0, policy_version 15339 (0.0007) -[2026-06-07 02:18:10,318][324563] Updated weights for policy 0, policy_version 15349 (0.0006) -[2026-06-07 02:18:10,530][324563] Updated weights for policy 0, policy_version 15359 (0.0006) -[2026-06-07 02:18:10,744][321791] Fps is (10 sec: 16383.9, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 7864320. Throughput: 0: 17584.3. Samples: 7900672. Policy #0 lag: (min: 63.0, avg: 78.1, max: 127.0) -[2026-06-07 02:18:10,745][321791] Avg episode reward: [(0, '401.654')] -[2026-06-07 02:18:10,753][324563] Updated weights for policy 0, policy_version 15369 (0.0006) -[2026-06-07 02:18:10,964][324563] Updated weights for policy 0, policy_version 15379 (0.0006) -[2026-06-07 02:18:11,725][324563] Updated weights for policy 0, policy_version 15389 (0.0006) -[2026-06-07 02:18:11,904][324563] Updated weights for policy 0, policy_version 15399 (0.0006) -[2026-06-07 02:18:12,106][324563] Updated weights for policy 0, policy_version 15409 (0.0006) -[2026-06-07 02:18:12,328][324563] Updated weights for policy 0, policy_version 15419 (0.0007) -[2026-06-07 02:18:12,531][324563] Updated weights for policy 0, policy_version 15429 (0.0006) -[2026-06-07 02:18:12,737][324563] Updated weights for policy 0, policy_version 15439 (0.0007) -[2026-06-07 02:18:13,509][324563] Updated weights for policy 0, policy_version 15449 (0.0006) -[2026-06-07 02:18:13,703][324563] Updated weights for policy 0, policy_version 15459 (0.0007) -[2026-06-07 02:18:13,901][324563] Updated weights for policy 0, policy_version 15469 (0.0006) -[2026-06-07 02:18:14,114][324563] Updated weights for policy 0, policy_version 15479 (0.0007) -[2026-06-07 02:18:14,344][324563] Updated weights for policy 0, policy_version 15489 (0.0006) -[2026-06-07 02:18:14,532][324563] Updated weights for policy 0, policy_version 15499 (0.0006) -[2026-06-07 02:18:14,747][324563] Updated weights for policy 0, policy_version 15509 (0.0006) -[2026-06-07 02:18:15,488][324563] Updated weights for policy 0, policy_version 15520 (0.0008) -[2026-06-07 02:18:15,702][324563] Updated weights for policy 0, policy_version 15530 (0.0008) -[2026-06-07 02:18:15,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 7962624. Throughput: 0: 17664.0. Samples: 8007552. Policy #0 lag: (min: 63.0, avg: 78.1, max: 127.0) -[2026-06-07 02:18:15,745][321791] Avg episode reward: [(0, '424.601')] -[2026-06-07 02:18:15,903][324563] Updated weights for policy 0, policy_version 15540 (0.0006) -[2026-06-07 02:18:16,103][324563] Updated weights for policy 0, policy_version 15550 (0.0006) -[2026-06-07 02:18:16,292][324563] Updated weights for policy 0, policy_version 15560 (0.0006) -[2026-06-07 02:18:16,495][324563] Updated weights for policy 0, policy_version 15570 (0.0007) -[2026-06-07 02:18:16,616][324276] Saving new best policy, reward=424.601! -[2026-06-07 02:18:17,309][324563] Updated weights for policy 0, policy_version 15581 (0.0006) -[2026-06-07 02:18:17,513][324563] Updated weights for policy 0, policy_version 15591 (0.0006) -[2026-06-07 02:18:17,748][324563] Updated weights for policy 0, policy_version 15602 (0.0006) -[2026-06-07 02:18:17,989][324563] Updated weights for policy 0, policy_version 15612 (0.0006) -[2026-06-07 02:18:18,179][324563] Updated weights for policy 0, policy_version 15622 (0.0006) -[2026-06-07 02:18:18,380][324563] Updated weights for policy 0, policy_version 15632 (0.0006) -[2026-06-07 02:18:19,107][324563] Updated weights for policy 0, policy_version 15642 (0.0006) -[2026-06-07 02:18:19,311][324563] Updated weights for policy 0, policy_version 15652 (0.0006) -[2026-06-07 02:18:19,528][324563] Updated weights for policy 0, policy_version 15662 (0.0006) -[2026-06-07 02:18:19,713][324563] Updated weights for policy 0, policy_version 15672 (0.0006) -[2026-06-07 02:18:19,915][324563] Updated weights for policy 0, policy_version 15682 (0.0007) -[2026-06-07 02:18:20,109][324563] Updated weights for policy 0, policy_version 15692 (0.0007) -[2026-06-07 02:18:20,301][324563] Updated weights for policy 0, policy_version 15702 (0.0006) -[2026-06-07 02:18:20,744][321791] Fps is (10 sec: 19660.9, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 8060928. Throughput: 0: 17692.5. Samples: 8053504. Policy #0 lag: (min: 63.0, avg: 78.9, max: 127.0) -[2026-06-07 02:18:20,745][321791] Avg episode reward: [(0, '419.750')] -[2026-06-07 02:18:21,054][324563] Updated weights for policy 0, policy_version 15712 (0.0006) -[2026-06-07 02:18:21,250][324563] Updated weights for policy 0, policy_version 15722 (0.0006) -[2026-06-07 02:18:21,468][324563] Updated weights for policy 0, policy_version 15733 (0.0007) -[2026-06-07 02:18:21,661][324563] Updated weights for policy 0, policy_version 15743 (0.0007) -[2026-06-07 02:18:21,868][324563] Updated weights for policy 0, policy_version 15753 (0.0006) -[2026-06-07 02:18:22,085][324563] Updated weights for policy 0, policy_version 15763 (0.0007) -[2026-06-07 02:18:22,854][324563] Updated weights for policy 0, policy_version 15773 (0.0009) -[2026-06-07 02:18:23,058][324563] Updated weights for policy 0, policy_version 15783 (0.0007) -[2026-06-07 02:18:23,267][324563] Updated weights for policy 0, policy_version 15793 (0.0006) -[2026-06-07 02:18:23,477][324563] Updated weights for policy 0, policy_version 15803 (0.0007) -[2026-06-07 02:18:23,668][324563] Updated weights for policy 0, policy_version 15813 (0.0007) -[2026-06-07 02:18:23,876][324563] Updated weights for policy 0, policy_version 15823 (0.0006) -[2026-06-07 02:18:24,661][324563] Updated weights for policy 0, policy_version 15833 (0.0006) -[2026-06-07 02:18:24,856][324563] Updated weights for policy 0, policy_version 15843 (0.0006) -[2026-06-07 02:18:25,070][324563] Updated weights for policy 0, policy_version 15853 (0.0007) -[2026-06-07 02:18:25,265][324563] Updated weights for policy 0, policy_version 15863 (0.0006) -[2026-06-07 02:18:25,456][324563] Updated weights for policy 0, policy_version 15873 (0.0007) -[2026-06-07 02:18:25,672][324563] Updated weights for policy 0, policy_version 15884 (0.0006) -[2026-06-07 02:18:25,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 8126464. Throughput: 0: 17777.8. Samples: 8169856. Policy #0 lag: (min: 63.0, avg: 78.9, max: 127.0) -[2026-06-07 02:18:25,745][321791] Avg episode reward: [(0, '427.119')] -[2026-06-07 02:18:25,882][324563] Updated weights for policy 0, policy_version 15894 (0.0007) -[2026-06-07 02:18:25,911][324276] Saving new best policy, reward=427.119! -[2026-06-07 02:18:26,681][324563] Updated weights for policy 0, policy_version 15904 (0.0006) -[2026-06-07 02:18:26,896][324563] Updated weights for policy 0, policy_version 15914 (0.0007) -[2026-06-07 02:18:27,101][324563] Updated weights for policy 0, policy_version 15924 (0.0006) -[2026-06-07 02:18:27,315][324563] Updated weights for policy 0, policy_version 15934 (0.0007) -[2026-06-07 02:18:27,507][324563] Updated weights for policy 0, policy_version 15944 (0.0006) -[2026-06-07 02:18:27,686][324563] Updated weights for policy 0, policy_version 15954 (0.0006) -[2026-06-07 02:18:28,480][324563] Updated weights for policy 0, policy_version 15964 (0.0006) -[2026-06-07 02:18:28,677][324563] Updated weights for policy 0, policy_version 15974 (0.0006) -[2026-06-07 02:18:28,895][324563] Updated weights for policy 0, policy_version 15984 (0.0006) -[2026-06-07 02:18:29,101][324563] Updated weights for policy 0, policy_version 15994 (0.0006) -[2026-06-07 02:18:29,305][324563] Updated weights for policy 0, policy_version 16004 (0.0006) -[2026-06-07 02:18:29,514][324563] Updated weights for policy 0, policy_version 16014 (0.0006) -[2026-06-07 02:18:29,715][324563] Updated weights for policy 0, policy_version 16024 (0.0006) -[2026-06-07 02:18:30,503][324563] Updated weights for policy 0, policy_version 16035 (0.0006) -[2026-06-07 02:18:30,709][324563] Updated weights for policy 0, policy_version 16045 (0.0006) -[2026-06-07 02:18:30,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 8224768. Throughput: 0: 17755.0. Samples: 8275456. Policy #0 lag: (min: 63.0, avg: 78.0, max: 127.0) -[2026-06-07 02:18:30,745][321791] Avg episode reward: [(0, '445.621')] -[2026-06-07 02:18:30,913][324563] Updated weights for policy 0, policy_version 16055 (0.0006) -[2026-06-07 02:18:31,120][324563] Updated weights for policy 0, policy_version 16065 (0.0007) -[2026-06-07 02:18:31,341][324563] Updated weights for policy 0, policy_version 16075 (0.0006) -[2026-06-07 02:18:31,567][324563] Updated weights for policy 0, policy_version 16086 (0.0006) -[2026-06-07 02:18:31,593][324276] Saving new best policy, reward=445.621! -[2026-06-07 02:18:32,347][324563] Updated weights for policy 0, policy_version 16096 (0.0006) -[2026-06-07 02:18:32,558][324563] Updated weights for policy 0, policy_version 16106 (0.0007) -[2026-06-07 02:18:32,758][324563] Updated weights for policy 0, policy_version 16116 (0.0006) -[2026-06-07 02:18:32,953][324563] Updated weights for policy 0, policy_version 16126 (0.0006) -[2026-06-07 02:18:33,187][324563] Updated weights for policy 0, policy_version 16136 (0.0007) -[2026-06-07 02:18:33,404][324563] Updated weights for policy 0, policy_version 16146 (0.0007) -[2026-06-07 02:18:34,146][324563] Updated weights for policy 0, policy_version 16156 (0.0006) -[2026-06-07 02:18:34,351][324563] Updated weights for policy 0, policy_version 16166 (0.0006) -[2026-06-07 02:18:34,535][324563] Updated weights for policy 0, policy_version 16176 (0.0010) -[2026-06-07 02:18:34,742][324563] Updated weights for policy 0, policy_version 16186 (0.0007) -[2026-06-07 02:18:34,938][324563] Updated weights for policy 0, policy_version 16196 (0.0006) -[2026-06-07 02:18:35,155][324563] Updated weights for policy 0, policy_version 16206 (0.0006) -[2026-06-07 02:18:35,350][324563] Updated weights for policy 0, policy_version 16216 (0.0006) -[2026-06-07 02:18:35,744][321791] Fps is (10 sec: 19660.7, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 8323072. Throughput: 0: 17774.9. Samples: 8321152. Policy #0 lag: (min: 63.0, avg: 78.0, max: 127.0) -[2026-06-07 02:18:35,745][321791] Avg episode reward: [(0, '448.459')] -[2026-06-07 02:18:35,750][324276] Saving new best policy, reward=448.459! -[2026-06-07 02:18:36,111][324563] Updated weights for policy 0, policy_version 16226 (0.0006) -[2026-06-07 02:18:36,327][324563] Updated weights for policy 0, policy_version 16236 (0.0006) -[2026-06-07 02:18:36,505][324563] Updated weights for policy 0, policy_version 16246 (0.0006) -[2026-06-07 02:18:36,729][324563] Updated weights for policy 0, policy_version 16256 (0.0006) -[2026-06-07 02:18:36,939][324563] Updated weights for policy 0, policy_version 16266 (0.0006) -[2026-06-07 02:18:37,147][324563] Updated weights for policy 0, policy_version 16276 (0.0007) -[2026-06-07 02:18:37,903][324563] Updated weights for policy 0, policy_version 16286 (0.0006) -[2026-06-07 02:18:38,108][324563] Updated weights for policy 0, policy_version 16296 (0.0006) -[2026-06-07 02:18:38,315][324563] Updated weights for policy 0, policy_version 16306 (0.0006) -[2026-06-07 02:18:38,534][324563] Updated weights for policy 0, policy_version 16316 (0.0007) -[2026-06-07 02:18:38,737][324563] Updated weights for policy 0, policy_version 16326 (0.0006) -[2026-06-07 02:18:38,955][324563] Updated weights for policy 0, policy_version 16336 (0.0007) -[2026-06-07 02:18:39,674][324563] Updated weights for policy 0, policy_version 16346 (0.0006) -[2026-06-07 02:18:39,922][324563] Updated weights for policy 0, policy_version 16357 (0.0006) -[2026-06-07 02:18:40,115][324563] Updated weights for policy 0, policy_version 16367 (0.0007) -[2026-06-07 02:18:40,320][324563] Updated weights for policy 0, policy_version 16377 (0.0006) -[2026-06-07 02:18:40,554][324563] Updated weights for policy 0, policy_version 16387 (0.0007) -[2026-06-07 02:18:40,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 8388608. Throughput: 0: 17763.5. Samples: 8435712. Policy #0 lag: (min: 63.0, avg: 78.0, max: 127.0) -[2026-06-07 02:18:40,745][321791] Avg episode reward: [(0, '436.987')] -[2026-06-07 02:18:40,772][324563] Updated weights for policy 0, policy_version 16397 (0.0007) -[2026-06-07 02:18:40,970][324563] Updated weights for policy 0, policy_version 16407 (0.0007) -[2026-06-07 02:18:41,731][324563] Updated weights for policy 0, policy_version 16418 (0.0007) -[2026-06-07 02:18:41,920][324563] Updated weights for policy 0, policy_version 16428 (0.0006) -[2026-06-07 02:18:42,128][324563] Updated weights for policy 0, policy_version 16438 (0.0007) -[2026-06-07 02:18:42,314][324563] Updated weights for policy 0, policy_version 16448 (0.0006) -[2026-06-07 02:18:42,536][324563] Updated weights for policy 0, policy_version 16458 (0.0007) -[2026-06-07 02:18:42,734][324563] Updated weights for policy 0, policy_version 16468 (0.0006) -[2026-06-07 02:18:43,497][324563] Updated weights for policy 0, policy_version 16478 (0.0007) -[2026-06-07 02:18:43,727][324563] Updated weights for policy 0, policy_version 16488 (0.0006) -[2026-06-07 02:18:43,933][324563] Updated weights for policy 0, policy_version 16498 (0.0006) -[2026-06-07 02:18:44,142][324563] Updated weights for policy 0, policy_version 16508 (0.0006) -[2026-06-07 02:18:44,343][324563] Updated weights for policy 0, policy_version 16518 (0.0006) -[2026-06-07 02:18:44,548][324563] Updated weights for policy 0, policy_version 16528 (0.0006) -[2026-06-07 02:18:45,272][324563] Updated weights for policy 0, policy_version 16538 (0.0006) -[2026-06-07 02:18:45,493][324563] Updated weights for policy 0, policy_version 16549 (0.0006) -[2026-06-07 02:18:45,681][324563] Updated weights for policy 0, policy_version 16559 (0.0007) -[2026-06-07 02:18:45,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 8486912. Throughput: 0: 17635.6. Samples: 8537216. Policy #0 lag: (min: 85.0, avg: 102.7, max: 152.0) -[2026-06-07 02:18:45,745][321791] Avg episode reward: [(0, '453.140')] -[2026-06-07 02:18:45,912][324563] Updated weights for policy 0, policy_version 16569 (0.0006) -[2026-06-07 02:18:46,113][324563] Updated weights for policy 0, policy_version 16579 (0.0008) -[2026-06-07 02:18:46,316][324563] Updated weights for policy 0, policy_version 16589 (0.0007) -[2026-06-07 02:18:46,517][324563] Updated weights for policy 0, policy_version 16599 (0.0007) -[2026-06-07 02:18:46,531][324276] Saving new best policy, reward=453.140! -[2026-06-07 02:18:47,259][324563] Updated weights for policy 0, policy_version 16609 (0.0007) -[2026-06-07 02:18:47,488][324563] Updated weights for policy 0, policy_version 16620 (0.0006) -[2026-06-07 02:18:47,686][324563] Updated weights for policy 0, policy_version 16630 (0.0007) -[2026-06-07 02:18:47,899][324563] Updated weights for policy 0, policy_version 16640 (0.0007) -[2026-06-07 02:18:48,121][324563] Updated weights for policy 0, policy_version 16650 (0.0007) -[2026-06-07 02:18:48,334][324563] Updated weights for policy 0, policy_version 16660 (0.0007) -[2026-06-07 02:18:49,039][324563] Updated weights for policy 0, policy_version 16670 (0.0006) -[2026-06-07 02:18:49,247][324563] Updated weights for policy 0, policy_version 16680 (0.0006) -[2026-06-07 02:18:49,464][324563] Updated weights for policy 0, policy_version 16690 (0.0007) -[2026-06-07 02:18:49,675][324563] Updated weights for policy 0, policy_version 16700 (0.0007) -[2026-06-07 02:18:49,887][324563] Updated weights for policy 0, policy_version 16710 (0.0006) -[2026-06-07 02:18:50,101][324563] Updated weights for policy 0, policy_version 16720 (0.0006) -[2026-06-07 02:18:50,744][321791] Fps is (10 sec: 19660.9, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 8585216. Throughput: 0: 17720.9. Samples: 8585600. Policy #0 lag: (min: 85.0, avg: 102.7, max: 152.0) -[2026-06-07 02:18:50,745][321791] Avg episode reward: [(0, '458.879')] -[2026-06-07 02:18:50,807][324563] Updated weights for policy 0, policy_version 16730 (0.0007) -[2026-06-07 02:18:51,007][324563] Updated weights for policy 0, policy_version 16740 (0.0006) -[2026-06-07 02:18:51,218][324563] Updated weights for policy 0, policy_version 16750 (0.0006) -[2026-06-07 02:18:51,427][324563] Updated weights for policy 0, policy_version 16760 (0.0007) -[2026-06-07 02:18:51,614][324563] Updated weights for policy 0, policy_version 16770 (0.0006) -[2026-06-07 02:18:51,830][324563] Updated weights for policy 0, policy_version 16780 (0.0006) -[2026-06-07 02:18:52,024][324563] Updated weights for policy 0, policy_version 16790 (0.0007) -[2026-06-07 02:18:52,064][324276] Saving new best policy, reward=458.879! -[2026-06-07 02:18:52,774][324563] Updated weights for policy 0, policy_version 16800 (0.0009) -[2026-06-07 02:18:52,990][324563] Updated weights for policy 0, policy_version 16810 (0.0007) -[2026-06-07 02:18:53,191][324563] Updated weights for policy 0, policy_version 16820 (0.0007) -[2026-06-07 02:18:53,415][324563] Updated weights for policy 0, policy_version 16831 (0.0006) -[2026-06-07 02:18:53,617][324563] Updated weights for policy 0, policy_version 16841 (0.0006) -[2026-06-07 02:18:53,826][324563] Updated weights for policy 0, policy_version 16851 (0.0006) -[2026-06-07 02:18:54,550][324563] Updated weights for policy 0, policy_version 16861 (0.0007) -[2026-06-07 02:18:54,784][324563] Updated weights for policy 0, policy_version 16872 (0.0007) -[2026-06-07 02:18:55,028][324563] Updated weights for policy 0, policy_version 16883 (0.0006) -[2026-06-07 02:18:55,236][324563] Updated weights for policy 0, policy_version 16893 (0.0006) -[2026-06-07 02:18:55,438][324563] Updated weights for policy 0, policy_version 16903 (0.0006) -[2026-06-07 02:18:55,650][324563] Updated weights for policy 0, policy_version 16913 (0.0006) -[2026-06-07 02:18:55,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17439.2). Total num frames: 8650752. Throughput: 0: 17703.8. Samples: 8697344. Policy #0 lag: (min: 85.0, avg: 102.7, max: 152.0) -[2026-06-07 02:18:55,745][321791] Avg episode reward: [(0, '440.000')] -[2026-06-07 02:18:56,386][324563] Updated weights for policy 0, policy_version 16923 (0.0007) -[2026-06-07 02:18:56,586][324563] Updated weights for policy 0, policy_version 16933 (0.0006) -[2026-06-07 02:18:56,784][324563] Updated weights for policy 0, policy_version 16943 (0.0007) -[2026-06-07 02:18:56,978][324563] Updated weights for policy 0, policy_version 16953 (0.0006) -[2026-06-07 02:18:57,198][324563] Updated weights for policy 0, policy_version 16963 (0.0006) -[2026-06-07 02:18:57,422][324563] Updated weights for policy 0, policy_version 16973 (0.0007) -[2026-06-07 02:18:57,643][324563] Updated weights for policy 0, policy_version 16983 (0.0006) -[2026-06-07 02:18:58,380][324563] Updated weights for policy 0, policy_version 16993 (0.0006) -[2026-06-07 02:18:58,576][324563] Updated weights for policy 0, policy_version 17003 (0.0006) -[2026-06-07 02:18:58,792][324563] Updated weights for policy 0, policy_version 17013 (0.0006) -[2026-06-07 02:18:59,004][324563] Updated weights for policy 0, policy_version 17023 (0.0006) -[2026-06-07 02:18:59,211][324563] Updated weights for policy 0, policy_version 17033 (0.0006) -[2026-06-07 02:18:59,402][324563] Updated weights for policy 0, policy_version 17043 (0.0006) -[2026-06-07 02:19:00,144][324563] Updated weights for policy 0, policy_version 17053 (0.0006) -[2026-06-07 02:19:00,374][324563] Updated weights for policy 0, policy_version 17064 (0.0006) -[2026-06-07 02:19:00,604][324563] Updated weights for policy 0, policy_version 17074 (0.0006) -[2026-06-07 02:19:00,744][321791] Fps is (10 sec: 16383.8, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 8749056. Throughput: 0: 17598.5. Samples: 8799488. Policy #0 lag: (min: 63.0, avg: 78.5, max: 127.0) -[2026-06-07 02:19:00,745][321791] Avg episode reward: [(0, '465.862')] -[2026-06-07 02:19:00,831][324563] Updated weights for policy 0, policy_version 17084 (0.0006) -[2026-06-07 02:19:01,029][324563] Updated weights for policy 0, policy_version 17094 (0.0006) -[2026-06-07 02:19:01,229][324563] Updated weights for policy 0, policy_version 17104 (0.0007) -[2026-06-07 02:19:01,373][324276] Saving new best policy, reward=465.862! -[2026-06-07 02:19:01,939][324563] Updated weights for policy 0, policy_version 17114 (0.0006) -[2026-06-07 02:19:02,144][324563] Updated weights for policy 0, policy_version 17124 (0.0007) -[2026-06-07 02:19:02,367][324563] Updated weights for policy 0, policy_version 17134 (0.0007) -[2026-06-07 02:19:02,588][324563] Updated weights for policy 0, policy_version 17144 (0.0006) -[2026-06-07 02:19:02,794][324563] Updated weights for policy 0, policy_version 17154 (0.0007) -[2026-06-07 02:19:03,007][324563] Updated weights for policy 0, policy_version 17164 (0.0006) -[2026-06-07 02:19:03,204][324563] Updated weights for policy 0, policy_version 17174 (0.0006) -[2026-06-07 02:19:03,926][324563] Updated weights for policy 0, policy_version 17184 (0.0006) -[2026-06-07 02:19:04,121][324563] Updated weights for policy 0, policy_version 17194 (0.0006) -[2026-06-07 02:19:04,326][324563] Updated weights for policy 0, policy_version 17204 (0.0006) -[2026-06-07 02:19:04,541][324563] Updated weights for policy 0, policy_version 17214 (0.0006) -[2026-06-07 02:19:04,727][324563] Updated weights for policy 0, policy_version 17224 (0.0006) -[2026-06-07 02:19:04,950][324563] Updated weights for policy 0, policy_version 17234 (0.0006) -[2026-06-07 02:19:05,689][324563] Updated weights for policy 0, policy_version 17244 (0.0006) -[2026-06-07 02:19:05,744][321791] Fps is (10 sec: 19660.8, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 8847360. Throughput: 0: 17729.4. Samples: 8851328. Policy #0 lag: (min: 63.0, avg: 78.5, max: 127.0) -[2026-06-07 02:19:05,745][321791] Avg episode reward: [(0, '413.258')] -[2026-06-07 02:19:05,892][324563] Updated weights for policy 0, policy_version 17254 (0.0007) -[2026-06-07 02:19:06,091][324563] Updated weights for policy 0, policy_version 17264 (0.0006) -[2026-06-07 02:19:06,286][324563] Updated weights for policy 0, policy_version 17274 (0.0007) -[2026-06-07 02:19:06,515][324563] Updated weights for policy 0, policy_version 17284 (0.0007) -[2026-06-07 02:19:06,729][324563] Updated weights for policy 0, policy_version 17294 (0.0009) -[2026-06-07 02:19:06,927][324563] Updated weights for policy 0, policy_version 17304 (0.0006) -[2026-06-07 02:19:07,682][324563] Updated weights for policy 0, policy_version 17314 (0.0006) -[2026-06-07 02:19:07,884][324563] Updated weights for policy 0, policy_version 17324 (0.0007) -[2026-06-07 02:19:08,090][324563] Updated weights for policy 0, policy_version 17334 (0.0007) -[2026-06-07 02:19:08,301][324563] Updated weights for policy 0, policy_version 17344 (0.0007) -[2026-06-07 02:19:08,511][324563] Updated weights for policy 0, policy_version 17354 (0.0007) -[2026-06-07 02:19:08,740][324563] Updated weights for policy 0, policy_version 17364 (0.0007) -[2026-06-07 02:19:09,492][324563] Updated weights for policy 0, policy_version 17374 (0.0007) -[2026-06-07 02:19:09,700][324563] Updated weights for policy 0, policy_version 17384 (0.0007) -[2026-06-07 02:19:09,924][324563] Updated weights for policy 0, policy_version 17394 (0.0007) -[2026-06-07 02:19:10,122][324563] Updated weights for policy 0, policy_version 17404 (0.0006) -[2026-06-07 02:19:10,309][324563] Updated weights for policy 0, policy_version 17414 (0.0007) -[2026-06-07 02:19:10,532][324563] Updated weights for policy 0, policy_version 17424 (0.0006) -[2026-06-07 02:19:10,744][321791] Fps is (10 sec: 19660.9, 60 sec: 18022.4, 300 sec: 17550.3). Total num frames: 8945664. Throughput: 0: 17504.7. Samples: 8957568. Policy #0 lag: (min: 63.0, avg: 78.2, max: 127.0) -[2026-06-07 02:19:10,745][321791] Avg episode reward: [(0, '382.455')] -[2026-06-07 02:19:11,276][324563] Updated weights for policy 0, policy_version 17434 (0.0006) -[2026-06-07 02:19:11,474][324563] Updated weights for policy 0, policy_version 17444 (0.0010) -[2026-06-07 02:19:11,696][324563] Updated weights for policy 0, policy_version 17454 (0.0011) -[2026-06-07 02:19:11,911][324563] Updated weights for policy 0, policy_version 17464 (0.0010) -[2026-06-07 02:19:12,115][324563] Updated weights for policy 0, policy_version 17474 (0.0009) -[2026-06-07 02:19:12,325][324563] Updated weights for policy 0, policy_version 17484 (0.0011) -[2026-06-07 02:19:12,527][324563] Updated weights for policy 0, policy_version 17494 (0.0010) -[2026-06-07 02:19:13,267][324563] Updated weights for policy 0, policy_version 17504 (0.0007) -[2026-06-07 02:19:13,463][324563] Updated weights for policy 0, policy_version 17514 (0.0006) -[2026-06-07 02:19:13,676][324563] Updated weights for policy 0, policy_version 17524 (0.0007) -[2026-06-07 02:19:13,884][324563] Updated weights for policy 0, policy_version 17534 (0.0007) -[2026-06-07 02:19:14,092][324563] Updated weights for policy 0, policy_version 17544 (0.0006) -[2026-06-07 02:19:14,293][324563] Updated weights for policy 0, policy_version 17554 (0.0006) -[2026-06-07 02:19:15,083][324563] Updated weights for policy 0, policy_version 17565 (0.0006) -[2026-06-07 02:19:15,294][324563] Updated weights for policy 0, policy_version 17575 (0.0007) -[2026-06-07 02:19:15,500][324563] Updated weights for policy 0, policy_version 17585 (0.0006) -[2026-06-07 02:19:15,737][324563] Updated weights for policy 0, policy_version 17595 (0.0007) -[2026-06-07 02:19:15,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 9011200. Throughput: 0: 17427.9. Samples: 9059712. Policy #0 lag: (min: 63.0, avg: 78.2, max: 127.0) -[2026-06-07 02:19:15,745][321791] Avg episode reward: [(0, '401.971')] -[2026-06-07 02:19:15,949][324563] Updated weights for policy 0, policy_version 17606 (0.0006) -[2026-06-07 02:19:16,151][324563] Updated weights for policy 0, policy_version 17616 (0.0007) -[2026-06-07 02:19:16,882][324563] Updated weights for policy 0, policy_version 17626 (0.0006) -[2026-06-07 02:19:17,100][324563] Updated weights for policy 0, policy_version 17636 (0.0006) -[2026-06-07 02:19:17,309][324563] Updated weights for policy 0, policy_version 17646 (0.0007) -[2026-06-07 02:19:17,498][324563] Updated weights for policy 0, policy_version 17656 (0.0006) -[2026-06-07 02:19:17,700][324563] Updated weights for policy 0, policy_version 17666 (0.0006) -[2026-06-07 02:19:17,918][324563] Updated weights for policy 0, policy_version 17676 (0.0006) -[2026-06-07 02:19:18,182][324563] Updated weights for policy 0, policy_version 17686 (0.0006) -[2026-06-07 02:19:18,923][324563] Updated weights for policy 0, policy_version 17696 (0.0007) -[2026-06-07 02:19:19,133][324563] Updated weights for policy 0, policy_version 17706 (0.0006) -[2026-06-07 02:19:19,348][324563] Updated weights for policy 0, policy_version 17716 (0.0006) -[2026-06-07 02:19:19,575][324563] Updated weights for policy 0, policy_version 17726 (0.0006) -[2026-06-07 02:19:19,770][324563] Updated weights for policy 0, policy_version 17736 (0.0007) -[2026-06-07 02:19:19,982][324563] Updated weights for policy 0, policy_version 17746 (0.0006) -[2026-06-07 02:19:20,708][324563] Updated weights for policy 0, policy_version 17756 (0.0007) -[2026-06-07 02:19:20,744][321791] Fps is (10 sec: 16383.8, 60 sec: 17476.2, 300 sec: 17550.3). Total num frames: 9109504. Throughput: 0: 17590.0. Samples: 9112704. Policy #0 lag: (min: 63.0, avg: 78.2, max: 127.0) -[2026-06-07 02:19:20,745][321791] Avg episode reward: [(0, '391.048')] -[2026-06-07 02:19:20,907][324563] Updated weights for policy 0, policy_version 17766 (0.0006) -[2026-06-07 02:19:21,120][324563] Updated weights for policy 0, policy_version 17776 (0.0006) -[2026-06-07 02:19:21,328][324563] Updated weights for policy 0, policy_version 17786 (0.0006) -[2026-06-07 02:19:21,541][324563] Updated weights for policy 0, policy_version 17796 (0.0006) -[2026-06-07 02:19:21,743][324563] Updated weights for policy 0, policy_version 17806 (0.0006) -[2026-06-07 02:19:21,956][324563] Updated weights for policy 0, policy_version 17816 (0.0006) -[2026-06-07 02:19:22,713][324563] Updated weights for policy 0, policy_version 17827 (0.0007) -[2026-06-07 02:19:22,931][324563] Updated weights for policy 0, policy_version 17837 (0.0006) -[2026-06-07 02:19:23,149][324563] Updated weights for policy 0, policy_version 17847 (0.0007) -[2026-06-07 02:19:23,343][324563] Updated weights for policy 0, policy_version 17857 (0.0007) -[2026-06-07 02:19:23,543][324563] Updated weights for policy 0, policy_version 17867 (0.0006) -[2026-06-07 02:19:23,761][324563] Updated weights for policy 0, policy_version 17877 (0.0007) -[2026-06-07 02:19:24,474][324563] Updated weights for policy 0, policy_version 17887 (0.0006) -[2026-06-07 02:19:24,679][324563] Updated weights for policy 0, policy_version 17897 (0.0006) -[2026-06-07 02:19:24,892][324563] Updated weights for policy 0, policy_version 17907 (0.0006) -[2026-06-07 02:19:25,082][324563] Updated weights for policy 0, policy_version 17917 (0.0006) -[2026-06-07 02:19:25,293][324563] Updated weights for policy 0, policy_version 17927 (0.0006) -[2026-06-07 02:19:25,497][324563] Updated weights for policy 0, policy_version 17937 (0.0006) -[2026-06-07 02:19:25,744][321791] Fps is (10 sec: 19660.6, 60 sec: 18022.4, 300 sec: 17550.3). Total num frames: 9207808. Throughput: 0: 17408.0. Samples: 9219072. Policy #0 lag: (min: 63.0, avg: 78.8, max: 127.0) -[2026-06-07 02:19:25,745][321791] Avg episode reward: [(0, '376.235')] -[2026-06-07 02:19:26,240][324563] Updated weights for policy 0, policy_version 17947 (0.0006) -[2026-06-07 02:19:26,442][324563] Updated weights for policy 0, policy_version 17957 (0.0006) -[2026-06-07 02:19:26,678][324563] Updated weights for policy 0, policy_version 17968 (0.0006) -[2026-06-07 02:19:26,898][324563] Updated weights for policy 0, policy_version 17978 (0.0006) -[2026-06-07 02:19:27,094][324563] Updated weights for policy 0, policy_version 17988 (0.0006) -[2026-06-07 02:19:27,291][324563] Updated weights for policy 0, policy_version 17998 (0.0007) -[2026-06-07 02:19:27,497][324563] Updated weights for policy 0, policy_version 18008 (0.0007) -[2026-06-07 02:19:28,260][324563] Updated weights for policy 0, policy_version 18018 (0.0006) -[2026-06-07 02:19:28,488][324563] Updated weights for policy 0, policy_version 18029 (0.0006) -[2026-06-07 02:19:28,704][324563] Updated weights for policy 0, policy_version 18039 (0.0006) -[2026-06-07 02:19:28,908][324563] Updated weights for policy 0, policy_version 18049 (0.0006) -[2026-06-07 02:19:29,098][324563] Updated weights for policy 0, policy_version 18059 (0.0006) -[2026-06-07 02:19:29,287][324563] Updated weights for policy 0, policy_version 18069 (0.0006) -[2026-06-07 02:19:30,048][324563] Updated weights for policy 0, policy_version 18079 (0.0006) -[2026-06-07 02:19:30,258][324563] Updated weights for policy 0, policy_version 18089 (0.0006) -[2026-06-07 02:19:30,477][324563] Updated weights for policy 0, policy_version 18099 (0.0007) -[2026-06-07 02:19:30,715][324563] Updated weights for policy 0, policy_version 18110 (0.0006) -[2026-06-07 02:19:30,744][321791] Fps is (10 sec: 16384.2, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 9273344. Throughput: 0: 17453.5. Samples: 9322624. Policy #0 lag: (min: 63.0, avg: 78.8, max: 127.0) -[2026-06-07 02:19:30,745][321791] Avg episode reward: [(0, '384.647')] -[2026-06-07 02:19:30,937][324563] Updated weights for policy 0, policy_version 18120 (0.0007) -[2026-06-07 02:19:31,152][324563] Updated weights for policy 0, policy_version 18130 (0.0006) -[2026-06-07 02:19:31,868][324563] Updated weights for policy 0, policy_version 18140 (0.0007) -[2026-06-07 02:19:32,076][324563] Updated weights for policy 0, policy_version 18150 (0.0007) -[2026-06-07 02:19:32,279][324563] Updated weights for policy 0, policy_version 18160 (0.0007) -[2026-06-07 02:19:32,478][324563] Updated weights for policy 0, policy_version 18170 (0.0006) -[2026-06-07 02:19:32,692][324563] Updated weights for policy 0, policy_version 18180 (0.0007) -[2026-06-07 02:19:32,903][324563] Updated weights for policy 0, policy_version 18190 (0.0007) -[2026-06-07 02:19:33,089][324563] Updated weights for policy 0, policy_version 18200 (0.0006) -[2026-06-07 02:19:33,869][324563] Updated weights for policy 0, policy_version 18210 (0.0006) -[2026-06-07 02:19:34,066][324563] Updated weights for policy 0, policy_version 18220 (0.0006) -[2026-06-07 02:19:34,275][324563] Updated weights for policy 0, policy_version 18230 (0.0007) -[2026-06-07 02:19:34,473][324563] Updated weights for policy 0, policy_version 18240 (0.0006) -[2026-06-07 02:19:34,694][324563] Updated weights for policy 0, policy_version 18250 (0.0007) -[2026-06-07 02:19:34,884][324563] Updated weights for policy 0, policy_version 18260 (0.0006) -[2026-06-07 02:19:35,621][324563] Updated weights for policy 0, policy_version 18270 (0.0007) -[2026-06-07 02:19:35,744][321791] Fps is (10 sec: 16383.9, 60 sec: 17476.2, 300 sec: 17550.3). Total num frames: 9371648. Throughput: 0: 17618.4. Samples: 9378432. Policy #0 lag: (min: 63.0, avg: 78.8, max: 127.0) -[2026-06-07 02:19:35,746][321791] Avg episode reward: [(0, '386.888')] -[2026-06-07 02:19:35,851][324563] Updated weights for policy 0, policy_version 18280 (0.0007) -[2026-06-07 02:19:36,064][324563] Updated weights for policy 0, policy_version 18290 (0.0006) -[2026-06-07 02:19:36,269][324563] Updated weights for policy 0, policy_version 18300 (0.0007) -[2026-06-07 02:19:36,494][324563] Updated weights for policy 0, policy_version 18311 (0.0006) -[2026-06-07 02:19:36,701][324563] Updated weights for policy 0, policy_version 18321 (0.0006) -[2026-06-07 02:19:37,454][324563] Updated weights for policy 0, policy_version 18331 (0.0010) -[2026-06-07 02:19:37,669][324563] Updated weights for policy 0, policy_version 18341 (0.0006) -[2026-06-07 02:19:37,879][324563] Updated weights for policy 0, policy_version 18351 (0.0007) -[2026-06-07 02:19:38,102][324563] Updated weights for policy 0, policy_version 18361 (0.0006) -[2026-06-07 02:19:38,306][324563] Updated weights for policy 0, policy_version 18371 (0.0006) -[2026-06-07 02:19:38,499][324563] Updated weights for policy 0, policy_version 18381 (0.0007) -[2026-06-07 02:19:38,702][324563] Updated weights for policy 0, policy_version 18391 (0.0006) -[2026-06-07 02:19:39,442][324563] Updated weights for policy 0, policy_version 18401 (0.0006) -[2026-06-07 02:19:39,661][324563] Updated weights for policy 0, policy_version 18411 (0.0006) -[2026-06-07 02:19:39,868][324563] Updated weights for policy 0, policy_version 18421 (0.0006) -[2026-06-07 02:19:40,080][324563] Updated weights for policy 0, policy_version 18431 (0.0007) -[2026-06-07 02:19:40,287][324563] Updated weights for policy 0, policy_version 18441 (0.0006) -[2026-06-07 02:19:40,494][324563] Updated weights for policy 0, policy_version 18451 (0.0007) -[2026-06-07 02:19:40,744][321791] Fps is (10 sec: 19660.5, 60 sec: 18022.4, 300 sec: 17550.3). Total num frames: 9469952. Throughput: 0: 17419.3. Samples: 9481216. Policy #0 lag: (min: 63.0, avg: 77.9, max: 127.0) -[2026-06-07 02:19:40,746][321791] Avg episode reward: [(0, '373.447')] -[2026-06-07 02:19:41,260][324563] Updated weights for policy 0, policy_version 18461 (0.0007) -[2026-06-07 02:19:41,461][324563] Updated weights for policy 0, policy_version 18471 (0.0007) -[2026-06-07 02:19:41,661][324563] Updated weights for policy 0, policy_version 18481 (0.0009) -[2026-06-07 02:19:41,869][324563] Updated weights for policy 0, policy_version 18491 (0.0010) -[2026-06-07 02:19:42,048][324563] Updated weights for policy 0, policy_version 18501 (0.0011) -[2026-06-07 02:19:42,257][324563] Updated weights for policy 0, policy_version 18511 (0.0011) -[2026-06-07 02:19:43,022][324563] Updated weights for policy 0, policy_version 18521 (0.0011) -[2026-06-07 02:19:43,219][324563] Updated weights for policy 0, policy_version 18531 (0.0006) -[2026-06-07 02:19:43,428][324563] Updated weights for policy 0, policy_version 18541 (0.0008) -[2026-06-07 02:19:43,630][324563] Updated weights for policy 0, policy_version 18551 (0.0011) -[2026-06-07 02:19:43,833][324563] Updated weights for policy 0, policy_version 18561 (0.0011) -[2026-06-07 02:19:44,046][324563] Updated weights for policy 0, policy_version 18571 (0.0011) -[2026-06-07 02:19:44,277][324563] Updated weights for policy 0, policy_version 18581 (0.0011) -[2026-06-07 02:19:45,023][324563] Updated weights for policy 0, policy_version 18591 (0.0006) -[2026-06-07 02:19:45,250][324563] Updated weights for policy 0, policy_version 18601 (0.0007) -[2026-06-07 02:19:45,437][324563] Updated weights for policy 0, policy_version 18611 (0.0006) -[2026-06-07 02:19:45,656][324563] Updated weights for policy 0, policy_version 18621 (0.0006) -[2026-06-07 02:19:45,744][321791] Fps is (10 sec: 16384.3, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 9535488. Throughput: 0: 17447.9. Samples: 9584640. Policy #0 lag: (min: 63.0, avg: 77.9, max: 127.0) -[2026-06-07 02:19:45,745][321791] Avg episode reward: [(0, '410.918')] -[2026-06-07 02:19:45,876][324563] Updated weights for policy 0, policy_version 18631 (0.0006) -[2026-06-07 02:19:46,102][324563] Updated weights for policy 0, policy_version 18641 (0.0006) -[2026-06-07 02:19:46,838][324563] Updated weights for policy 0, policy_version 18651 (0.0007) -[2026-06-07 02:19:47,039][324563] Updated weights for policy 0, policy_version 18661 (0.0010) -[2026-06-07 02:19:47,248][324563] Updated weights for policy 0, policy_version 18672 (0.0008) -[2026-06-07 02:19:47,462][324563] Updated weights for policy 0, policy_version 18682 (0.0006) -[2026-06-07 02:19:47,684][324563] Updated weights for policy 0, policy_version 18692 (0.0006) -[2026-06-07 02:19:47,883][324563] Updated weights for policy 0, policy_version 18702 (0.0006) -[2026-06-07 02:19:48,098][324563] Updated weights for policy 0, policy_version 18712 (0.0006) -[2026-06-07 02:19:48,834][324563] Updated weights for policy 0, policy_version 18722 (0.0007) -[2026-06-07 02:19:49,052][324563] Updated weights for policy 0, policy_version 18732 (0.0007) -[2026-06-07 02:19:49,247][324563] Updated weights for policy 0, policy_version 18742 (0.0007) -[2026-06-07 02:19:49,447][324563] Updated weights for policy 0, policy_version 18752 (0.0006) -[2026-06-07 02:19:49,676][324563] Updated weights for policy 0, policy_version 18762 (0.0008) -[2026-06-07 02:19:49,882][324563] Updated weights for policy 0, policy_version 18772 (0.0007) -[2026-06-07 02:19:50,620][324563] Updated weights for policy 0, policy_version 18782 (0.0006) -[2026-06-07 02:19:50,744][321791] Fps is (10 sec: 16384.3, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 9633792. Throughput: 0: 17564.4. Samples: 9641728. Policy #0 lag: (min: 63.0, avg: 77.9, max: 127.0) -[2026-06-07 02:19:50,745][321791] Avg episode reward: [(0, '427.376')] -[2026-06-07 02:19:50,858][324563] Updated weights for policy 0, policy_version 18793 (0.0007) -[2026-06-07 02:19:51,087][324563] Updated weights for policy 0, policy_version 18803 (0.0006) -[2026-06-07 02:19:51,303][324563] Updated weights for policy 0, policy_version 18813 (0.0007) -[2026-06-07 02:19:51,521][324563] Updated weights for policy 0, policy_version 18824 (0.0006) -[2026-06-07 02:19:51,741][324563] Updated weights for policy 0, policy_version 18834 (0.0006) -[2026-06-07 02:19:52,456][324563] Updated weights for policy 0, policy_version 18844 (0.0006) -[2026-06-07 02:19:52,664][324563] Updated weights for policy 0, policy_version 18854 (0.0007) -[2026-06-07 02:19:52,893][324563] Updated weights for policy 0, policy_version 18864 (0.0006) -[2026-06-07 02:19:53,098][324563] Updated weights for policy 0, policy_version 18874 (0.0007) -[2026-06-07 02:19:53,315][324563] Updated weights for policy 0, policy_version 18884 (0.0007) -[2026-06-07 02:19:53,523][324563] Updated weights for policy 0, policy_version 18894 (0.0006) -[2026-06-07 02:19:53,748][324563] Updated weights for policy 0, policy_version 18904 (0.0007) -[2026-06-07 02:19:54,484][324563] Updated weights for policy 0, policy_version 18914 (0.0006) -[2026-06-07 02:19:54,695][324563] Updated weights for policy 0, policy_version 18924 (0.0007) -[2026-06-07 02:19:54,906][324563] Updated weights for policy 0, policy_version 18934 (0.0006) -[2026-06-07 02:19:55,126][324563] Updated weights for policy 0, policy_version 18944 (0.0007) -[2026-06-07 02:19:55,350][324563] Updated weights for policy 0, policy_version 18954 (0.0006) -[2026-06-07 02:19:55,546][324563] Updated weights for policy 0, policy_version 18964 (0.0006) -[2026-06-07 02:19:55,744][321791] Fps is (10 sec: 19660.6, 60 sec: 18022.4, 300 sec: 17550.3). Total num frames: 9732096. Throughput: 0: 17456.3. Samples: 9743104. Policy #0 lag: (min: 63.0, avg: 78.6, max: 127.0) -[2026-06-07 02:19:55,745][321791] Avg episode reward: [(0, '452.498')] -[2026-06-07 02:19:56,302][324563] Updated weights for policy 0, policy_version 18974 (0.0007) -[2026-06-07 02:19:56,488][324563] Updated weights for policy 0, policy_version 18984 (0.0007) -[2026-06-07 02:19:56,694][324563] Updated weights for policy 0, policy_version 18994 (0.0007) -[2026-06-07 02:19:56,884][324563] Updated weights for policy 0, policy_version 19004 (0.0007) -[2026-06-07 02:19:57,081][324563] Updated weights for policy 0, policy_version 19014 (0.0007) -[2026-06-07 02:19:57,295][324563] Updated weights for policy 0, policy_version 19024 (0.0006) -[2026-06-07 02:19:58,046][324563] Updated weights for policy 0, policy_version 19034 (0.0007) -[2026-06-07 02:19:58,263][324563] Updated weights for policy 0, policy_version 19044 (0.0007) -[2026-06-07 02:19:58,472][324563] Updated weights for policy 0, policy_version 19054 (0.0007) -[2026-06-07 02:19:58,666][324563] Updated weights for policy 0, policy_version 19064 (0.0007) -[2026-06-07 02:19:58,858][324563] Updated weights for policy 0, policy_version 19074 (0.0007) -[2026-06-07 02:19:59,083][324563] Updated weights for policy 0, policy_version 19084 (0.0007) -[2026-06-07 02:19:59,281][324563] Updated weights for policy 0, policy_version 19094 (0.0007) -[2026-06-07 02:20:00,031][324563] Updated weights for policy 0, policy_version 19105 (0.0007) -[2026-06-07 02:20:00,242][324563] Updated weights for policy 0, policy_version 19115 (0.0007) -[2026-06-07 02:20:00,465][324563] Updated weights for policy 0, policy_version 19126 (0.0007) -[2026-06-07 02:20:00,677][324563] Updated weights for policy 0, policy_version 19136 (0.0008) -[2026-06-07 02:20:00,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 9797632. Throughput: 0: 17445.0. Samples: 9844736. Policy #0 lag: (min: 63.0, avg: 78.6, max: 127.0) -[2026-06-07 02:20:00,745][321791] Avg episode reward: [(0, '467.453')] -[2026-06-07 02:20:00,875][324563] Updated weights for policy 0, policy_version 19146 (0.0011) -[2026-06-07 02:20:01,095][324563] Updated weights for policy 0, policy_version 19156 (0.0011) -[2026-06-07 02:20:01,173][324276] Saving new best policy, reward=467.453! -[2026-06-07 02:20:01,885][324563] Updated weights for policy 0, policy_version 19167 (0.0011) -[2026-06-07 02:20:02,086][324563] Updated weights for policy 0, policy_version 19177 (0.0009) -[2026-06-07 02:20:02,271][324563] Updated weights for policy 0, policy_version 19187 (0.0007) -[2026-06-07 02:20:02,465][324563] Updated weights for policy 0, policy_version 19197 (0.0006) -[2026-06-07 02:20:02,700][324563] Updated weights for policy 0, policy_version 19207 (0.0006) -[2026-06-07 02:20:02,899][324563] Updated weights for policy 0, policy_version 19217 (0.0007) -[2026-06-07 02:20:03,640][324563] Updated weights for policy 0, policy_version 19227 (0.0006) -[2026-06-07 02:20:03,839][324563] Updated weights for policy 0, policy_version 19237 (0.0007) -[2026-06-07 02:20:04,044][324563] Updated weights for policy 0, policy_version 19247 (0.0006) -[2026-06-07 02:20:04,261][324563] Updated weights for policy 0, policy_version 19257 (0.0006) -[2026-06-07 02:20:04,481][324563] Updated weights for policy 0, policy_version 19267 (0.0006) -[2026-06-07 02:20:04,686][324563] Updated weights for policy 0, policy_version 19277 (0.0006) -[2026-06-07 02:20:04,895][324563] Updated weights for policy 0, policy_version 19287 (0.0006) -[2026-06-07 02:20:05,642][324563] Updated weights for policy 0, policy_version 19297 (0.0006) -[2026-06-07 02:20:05,744][321791] Fps is (10 sec: 16384.1, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 9895936. Throughput: 0: 17578.7. Samples: 9903744. Policy #0 lag: (min: 63.0, avg: 78.6, max: 127.0) -[2026-06-07 02:20:05,745][321791] Avg episode reward: [(0, '455.189')] -[2026-06-07 02:20:05,840][324563] Updated weights for policy 0, policy_version 19307 (0.0006) -[2026-06-07 02:20:06,049][324563] Updated weights for policy 0, policy_version 19317 (0.0007) -[2026-06-07 02:20:06,252][324563] Updated weights for policy 0, policy_version 19327 (0.0006) -[2026-06-07 02:20:06,479][324563] Updated weights for policy 0, policy_version 19337 (0.0007) -[2026-06-07 02:20:06,683][324563] Updated weights for policy 0, policy_version 19347 (0.0006) -[2026-06-07 02:20:07,420][324563] Updated weights for policy 0, policy_version 19357 (0.0006) -[2026-06-07 02:20:07,619][324563] Updated weights for policy 0, policy_version 19367 (0.0006) -[2026-06-07 02:20:07,832][324563] Updated weights for policy 0, policy_version 19377 (0.0006) -[2026-06-07 02:20:08,045][324563] Updated weights for policy 0, policy_version 19387 (0.0007) -[2026-06-07 02:20:08,244][324563] Updated weights for policy 0, policy_version 19397 (0.0006) -[2026-06-07 02:20:08,438][324563] Updated weights for policy 0, policy_version 19407 (0.0007) -[2026-06-07 02:20:09,208][324563] Updated weights for policy 0, policy_version 19417 (0.0007) -[2026-06-07 02:20:09,426][324563] Updated weights for policy 0, policy_version 19427 (0.0007) -[2026-06-07 02:20:09,632][324563] Updated weights for policy 0, policy_version 19437 (0.0006) -[2026-06-07 02:20:09,822][324563] Updated weights for policy 0, policy_version 19447 (0.0006) -[2026-06-07 02:20:10,051][324563] Updated weights for policy 0, policy_version 19457 (0.0006) -[2026-06-07 02:20:10,280][324563] Updated weights for policy 0, policy_version 19468 (0.0007) -[2026-06-07 02:20:10,503][324563] Updated weights for policy 0, policy_version 19478 (0.0007) -[2026-06-07 02:20:10,744][321791] Fps is (10 sec: 19660.6, 60 sec: 17476.2, 300 sec: 17550.3). Total num frames: 9994240. Throughput: 0: 17490.5. Samples: 10006144. Policy #0 lag: (min: 33.0, avg: 75.7, max: 97.0) -[2026-06-07 02:20:10,745][321791] Avg episode reward: [(0, '490.757')] -[2026-06-07 02:20:10,752][324276] Saving new best policy, reward=490.757! -[2026-06-07 02:20:11,264][324563] Updated weights for policy 0, policy_version 19489 (0.0006) -[2026-06-07 02:20:11,491][324563] Updated weights for policy 0, policy_version 19499 (0.0007) -[2026-06-07 02:20:11,715][324563] Updated weights for policy 0, policy_version 19510 (0.0006) -[2026-06-07 02:20:11,928][324563] Updated weights for policy 0, policy_version 19520 (0.0006) -[2026-06-07 02:20:12,111][324563] Updated weights for policy 0, policy_version 19530 (0.0006) -[2026-06-07 02:20:12,308][324563] Updated weights for policy 0, policy_version 19540 (0.0006) -[2026-06-07 02:20:13,052][324563] Updated weights for policy 0, policy_version 19550 (0.0007) -[2026-06-07 02:20:13,271][324563] Updated weights for policy 0, policy_version 19560 (0.0006) -[2026-06-07 02:20:13,488][324563] Updated weights for policy 0, policy_version 19570 (0.0007) -[2026-06-07 02:20:13,725][324563] Updated weights for policy 0, policy_version 19581 (0.0007) -[2026-06-07 02:20:13,936][324563] Updated weights for policy 0, policy_version 19591 (0.0006) -[2026-06-07 02:20:14,145][324563] Updated weights for policy 0, policy_version 19601 (0.0007) -[2026-06-07 02:20:14,888][324563] Updated weights for policy 0, policy_version 19611 (0.0006) -[2026-06-07 02:20:15,102][324563] Updated weights for policy 0, policy_version 19621 (0.0006) -[2026-06-07 02:20:15,322][324563] Updated weights for policy 0, policy_version 19631 (0.0007) -[2026-06-07 02:20:15,553][324563] Updated weights for policy 0, policy_version 19642 (0.0007) -[2026-06-07 02:20:15,744][321791] Fps is (10 sec: 16384.1, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 10059776. Throughput: 0: 17484.8. Samples: 10109440. Policy #0 lag: (min: 33.0, avg: 75.7, max: 97.0) -[2026-06-07 02:20:15,745][321791] Avg episode reward: [(0, '497.814')] -[2026-06-07 02:20:15,772][324563] Updated weights for policy 0, policy_version 19652 (0.0006) -[2026-06-07 02:20:15,978][324563] Updated weights for policy 0, policy_version 19662 (0.0006) -[2026-06-07 02:20:16,175][324276] Saving new best policy, reward=497.814! -[2026-06-07 02:20:16,177][324563] Updated weights for policy 0, policy_version 19672 (0.0006) -[2026-06-07 02:20:16,885][324563] Updated weights for policy 0, policy_version 19682 (0.0006) -[2026-06-07 02:20:17,095][324563] Updated weights for policy 0, policy_version 19692 (0.0007) -[2026-06-07 02:20:17,310][324563] Updated weights for policy 0, policy_version 19702 (0.0007) -[2026-06-07 02:20:17,516][324563] Updated weights for policy 0, policy_version 19712 (0.0007) -[2026-06-07 02:20:17,736][324563] Updated weights for policy 0, policy_version 19722 (0.0006) -[2026-06-07 02:20:17,929][324563] Updated weights for policy 0, policy_version 19732 (0.0006) -[2026-06-07 02:20:18,689][324563] Updated weights for policy 0, policy_version 19742 (0.0006) -[2026-06-07 02:20:18,897][324563] Updated weights for policy 0, policy_version 19752 (0.0007) -[2026-06-07 02:20:19,119][324563] Updated weights for policy 0, policy_version 19762 (0.0007) -[2026-06-07 02:20:19,326][324563] Updated weights for policy 0, policy_version 19772 (0.0006) -[2026-06-07 02:20:19,513][324563] Updated weights for policy 0, policy_version 19782 (0.0007) -[2026-06-07 02:20:19,729][324563] Updated weights for policy 0, policy_version 19792 (0.0007) -[2026-06-07 02:20:20,487][324563] Updated weights for policy 0, policy_version 19802 (0.0006) -[2026-06-07 02:20:20,684][324563] Updated weights for policy 0, policy_version 19812 (0.0006) -[2026-06-07 02:20:20,744][321791] Fps is (10 sec: 16384.2, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 10158080. Throughput: 0: 17524.7. Samples: 10167040. Policy #0 lag: (min: 33.0, avg: 75.7, max: 97.0) -[2026-06-07 02:20:20,745][321791] Avg episode reward: [(0, '540.383')] -[2026-06-07 02:20:20,899][324563] Updated weights for policy 0, policy_version 19822 (0.0007) -[2026-06-07 02:20:21,147][324563] Updated weights for policy 0, policy_version 19833 (0.0006) -[2026-06-07 02:20:21,362][324563] Updated weights for policy 0, policy_version 19843 (0.0006) -[2026-06-07 02:20:21,570][324563] Updated weights for policy 0, policy_version 19853 (0.0006) -[2026-06-07 02:20:21,756][324563] Updated weights for policy 0, policy_version 19863 (0.0007) -[2026-06-07 02:20:21,780][324276] Saving results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs1_seed13/checkpoint_p0/checkpoint_000019864_10190848.pth... -[2026-06-07 02:20:21,802][324276] Saving new best policy, reward=540.383! -[2026-06-07 02:20:22,526][324563] Updated weights for policy 0, policy_version 19873 (0.0006) -[2026-06-07 02:20:22,722][324563] Updated weights for policy 0, policy_version 19883 (0.0006) -[2026-06-07 02:20:22,933][324563] Updated weights for policy 0, policy_version 19893 (0.0006) -[2026-06-07 02:20:23,126][324563] Updated weights for policy 0, policy_version 19903 (0.0006) -[2026-06-07 02:20:23,343][324563] Updated weights for policy 0, policy_version 19913 (0.0006) -[2026-06-07 02:20:23,564][324563] Updated weights for policy 0, policy_version 19923 (0.0006) -[2026-06-07 02:20:24,356][324563] Updated weights for policy 0, policy_version 19933 (0.0006) -[2026-06-07 02:20:24,567][324563] Updated weights for policy 0, policy_version 19944 (0.0007) -[2026-06-07 02:20:24,789][324563] Updated weights for policy 0, policy_version 19954 (0.0006) -[2026-06-07 02:20:24,995][324563] Updated weights for policy 0, policy_version 19964 (0.0006) -[2026-06-07 02:20:25,207][324563] Updated weights for policy 0, policy_version 19974 (0.0006) -[2026-06-07 02:20:25,424][324563] Updated weights for policy 0, policy_version 19984 (0.0006) -[2026-06-07 02:20:25,744][321791] Fps is (10 sec: 19660.6, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 10256384. Throughput: 0: 17533.2. Samples: 10270208. Policy #0 lag: (min: 44.0, avg: 58.8, max: 108.0) -[2026-06-07 02:20:25,745][321791] Avg episode reward: [(0, '546.376')] -[2026-06-07 02:20:25,750][324276] Saving new best policy, reward=546.376! -[2026-06-07 02:20:26,181][324563] Updated weights for policy 0, policy_version 19994 (0.0006) -[2026-06-07 02:20:26,375][324563] Updated weights for policy 0, policy_version 20004 (0.0006) -[2026-06-07 02:20:26,575][324563] Updated weights for policy 0, policy_version 20014 (0.0007) -[2026-06-07 02:20:26,778][324563] Updated weights for policy 0, policy_version 20024 (0.0007) -[2026-06-07 02:20:27,002][324563] Updated weights for policy 0, policy_version 20034 (0.0006) -[2026-06-07 02:20:27,219][324563] Updated weights for policy 0, policy_version 20044 (0.0006) -[2026-06-07 02:20:27,420][324563] Updated weights for policy 0, policy_version 20054 (0.0006) -[2026-06-07 02:20:28,166][324563] Updated weights for policy 0, policy_version 20064 (0.0007) -[2026-06-07 02:20:28,381][324563] Updated weights for policy 0, policy_version 20074 (0.0006) -[2026-06-07 02:20:28,612][324563] Updated weights for policy 0, policy_version 20085 (0.0006) -[2026-06-07 02:20:28,820][324563] Updated weights for policy 0, policy_version 20095 (0.0006) -[2026-06-07 02:20:29,018][324563] Updated weights for policy 0, policy_version 20105 (0.0006) -[2026-06-07 02:20:29,236][324563] Updated weights for policy 0, policy_version 20115 (0.0006) -[2026-06-07 02:20:29,980][324563] Updated weights for policy 0, policy_version 20125 (0.0007) -[2026-06-07 02:20:30,193][324563] Updated weights for policy 0, policy_version 20135 (0.0007) -[2026-06-07 02:20:30,414][324563] Updated weights for policy 0, policy_version 20146 (0.0006) -[2026-06-07 02:20:30,625][324563] Updated weights for policy 0, policy_version 20156 (0.0006) -[2026-06-07 02:20:30,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 10321920. Throughput: 0: 17516.1. Samples: 10372864. Policy #0 lag: (min: 44.0, avg: 58.8, max: 108.0) -[2026-06-07 02:20:30,745][321791] Avg episode reward: [(0, '520.277')] -[2026-06-07 02:20:30,812][324563] Updated weights for policy 0, policy_version 20166 (0.0006) -[2026-06-07 02:20:31,048][324563] Updated weights for policy 0, policy_version 20177 (0.0006) -[2026-06-07 02:20:31,802][324563] Updated weights for policy 0, policy_version 20187 (0.0007) -[2026-06-07 02:20:32,015][324563] Updated weights for policy 0, policy_version 20197 (0.0007) -[2026-06-07 02:20:32,234][324563] Updated weights for policy 0, policy_version 20208 (0.0006) -[2026-06-07 02:20:32,426][324563] Updated weights for policy 0, policy_version 20218 (0.0006) -[2026-06-07 02:20:32,631][324563] Updated weights for policy 0, policy_version 20228 (0.0006) -[2026-06-07 02:20:32,831][324563] Updated weights for policy 0, policy_version 20238 (0.0007) -[2026-06-07 02:20:33,039][324563] Updated weights for policy 0, policy_version 20248 (0.0006) -[2026-06-07 02:20:33,848][324563] Updated weights for policy 0, policy_version 20260 (0.0007) -[2026-06-07 02:20:34,054][324563] Updated weights for policy 0, policy_version 20270 (0.0007) -[2026-06-07 02:20:34,266][324563] Updated weights for policy 0, policy_version 20280 (0.0006) -[2026-06-07 02:20:34,481][324563] Updated weights for policy 0, policy_version 20290 (0.0006) -[2026-06-07 02:20:34,681][324563] Updated weights for policy 0, policy_version 20300 (0.0007) -[2026-06-07 02:20:34,890][324563] Updated weights for policy 0, policy_version 20310 (0.0007) -[2026-06-07 02:20:35,622][324563] Updated weights for policy 0, policy_version 20320 (0.0006) -[2026-06-07 02:20:35,744][321791] Fps is (10 sec: 16384.1, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 10420224. Throughput: 0: 17553.1. Samples: 10431616. Policy #0 lag: (min: 44.0, avg: 58.8, max: 108.0) -[2026-06-07 02:20:35,745][321791] Avg episode reward: [(0, '554.400')] -[2026-06-07 02:20:35,803][324563] Updated weights for policy 0, policy_version 20330 (0.0007) -[2026-06-07 02:20:36,013][324563] Updated weights for policy 0, policy_version 20340 (0.0007) -[2026-06-07 02:20:36,237][324563] Updated weights for policy 0, policy_version 20350 (0.0006) -[2026-06-07 02:20:36,455][324563] Updated weights for policy 0, policy_version 20360 (0.0007) -[2026-06-07 02:20:36,675][324563] Updated weights for policy 0, policy_version 20371 (0.0006) -[2026-06-07 02:20:36,756][324276] Saving new best policy, reward=554.400! -[2026-06-07 02:20:37,413][324563] Updated weights for policy 0, policy_version 20381 (0.0006) -[2026-06-07 02:20:37,625][324563] Updated weights for policy 0, policy_version 20391 (0.0006) -[2026-06-07 02:20:37,840][324563] Updated weights for policy 0, policy_version 20401 (0.0006) -[2026-06-07 02:20:38,052][324563] Updated weights for policy 0, policy_version 20411 (0.0007) -[2026-06-07 02:20:38,264][324563] Updated weights for policy 0, policy_version 20421 (0.0007) -[2026-06-07 02:20:38,480][324563] Updated weights for policy 0, policy_version 20431 (0.0006) -[2026-06-07 02:20:39,210][324563] Updated weights for policy 0, policy_version 20441 (0.0007) -[2026-06-07 02:20:39,413][324563] Updated weights for policy 0, policy_version 20451 (0.0006) -[2026-06-07 02:20:39,627][324563] Updated weights for policy 0, policy_version 20461 (0.0006) -[2026-06-07 02:20:39,832][324563] Updated weights for policy 0, policy_version 20471 (0.0006) -[2026-06-07 02:20:40,035][324563] Updated weights for policy 0, policy_version 20481 (0.0006) -[2026-06-07 02:20:40,279][324563] Updated weights for policy 0, policy_version 20492 (0.0010) -[2026-06-07 02:20:40,483][324563] Updated weights for policy 0, policy_version 20502 (0.0009) -[2026-06-07 02:20:40,744][321791] Fps is (10 sec: 19660.8, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 10518528. Throughput: 0: 17575.8. Samples: 10534016. Policy #0 lag: (min: 6.0, avg: 21.7, max: 70.0) -[2026-06-07 02:20:40,745][321791] Avg episode reward: [(0, '544.046')] -[2026-06-07 02:20:41,221][324563] Updated weights for policy 0, policy_version 20512 (0.0007) -[2026-06-07 02:20:41,445][324563] Updated weights for policy 0, policy_version 20522 (0.0006) -[2026-06-07 02:20:41,631][324563] Updated weights for policy 0, policy_version 20532 (0.0006) -[2026-06-07 02:20:41,841][324563] Updated weights for policy 0, policy_version 20542 (0.0006) -[2026-06-07 02:20:42,042][324563] Updated weights for policy 0, policy_version 20552 (0.0006) -[2026-06-07 02:20:42,254][324563] Updated weights for policy 0, policy_version 20562 (0.0006) -[2026-06-07 02:20:43,000][324563] Updated weights for policy 0, policy_version 20572 (0.0007) -[2026-06-07 02:20:43,212][324563] Updated weights for policy 0, policy_version 20582 (0.0006) -[2026-06-07 02:20:43,407][324563] Updated weights for policy 0, policy_version 20592 (0.0006) -[2026-06-07 02:20:43,642][324563] Updated weights for policy 0, policy_version 20603 (0.0007) -[2026-06-07 02:20:43,852][324563] Updated weights for policy 0, policy_version 20613 (0.0006) -[2026-06-07 02:20:44,081][324563] Updated weights for policy 0, policy_version 20623 (0.0006) -[2026-06-07 02:20:44,838][324563] Updated weights for policy 0, policy_version 20633 (0.0006) -[2026-06-07 02:20:45,024][324563] Updated weights for policy 0, policy_version 20643 (0.0006) -[2026-06-07 02:20:45,228][324563] Updated weights for policy 0, policy_version 20653 (0.0006) -[2026-06-07 02:20:45,453][324563] Updated weights for policy 0, policy_version 20663 (0.0007) -[2026-06-07 02:20:45,678][324563] Updated weights for policy 0, policy_version 20673 (0.0008) -[2026-06-07 02:20:45,744][321791] Fps is (10 sec: 16384.1, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 10584064. Throughput: 0: 17607.1. Samples: 10637056. Policy #0 lag: (min: 6.0, avg: 21.7, max: 70.0) -[2026-06-07 02:20:45,745][321791] Avg episode reward: [(0, '561.073')] -[2026-06-07 02:20:45,879][324563] Updated weights for policy 0, policy_version 20683 (0.0009) -[2026-06-07 02:20:46,073][324563] Updated weights for policy 0, policy_version 20693 (0.0011) -[2026-06-07 02:20:46,141][324276] Saving new best policy, reward=561.073! -[2026-06-07 02:20:46,795][324563] Updated weights for policy 0, policy_version 20703 (0.0009) -[2026-06-07 02:20:46,971][324563] Updated weights for policy 0, policy_version 20713 (0.0006) -[2026-06-07 02:20:47,207][324563] Updated weights for policy 0, policy_version 20724 (0.0007) -[2026-06-07 02:20:47,413][324563] Updated weights for policy 0, policy_version 20734 (0.0006) -[2026-06-07 02:20:47,622][324563] Updated weights for policy 0, policy_version 20744 (0.0006) -[2026-06-07 02:20:47,816][324563] Updated weights for policy 0, policy_version 20754 (0.0006) -[2026-06-07 02:20:48,563][324563] Updated weights for policy 0, policy_version 20764 (0.0006) -[2026-06-07 02:20:48,760][324563] Updated weights for policy 0, policy_version 20774 (0.0006) -[2026-06-07 02:20:48,961][324563] Updated weights for policy 0, policy_version 20784 (0.0007) -[2026-06-07 02:20:49,165][324563] Updated weights for policy 0, policy_version 20794 (0.0006) -[2026-06-07 02:20:49,366][324563] Updated weights for policy 0, policy_version 20804 (0.0007) -[2026-06-07 02:20:49,558][324563] Updated weights for policy 0, policy_version 20814 (0.0006) -[2026-06-07 02:20:49,765][324563] Updated weights for policy 0, policy_version 20824 (0.0007) -[2026-06-07 02:20:50,513][324563] Updated weights for policy 0, policy_version 20835 (0.0006) -[2026-06-07 02:20:50,712][324563] Updated weights for policy 0, policy_version 20845 (0.0006) -[2026-06-07 02:20:50,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 10682368. Throughput: 0: 17567.3. Samples: 10694272. Policy #0 lag: (min: 6.0, avg: 21.7, max: 70.0) -[2026-06-07 02:20:50,745][321791] Avg episode reward: [(0, '569.282')] -[2026-06-07 02:20:50,923][324563] Updated weights for policy 0, policy_version 20855 (0.0006) -[2026-06-07 02:20:51,122][324563] Updated weights for policy 0, policy_version 20865 (0.0007) -[2026-06-07 02:20:51,351][324563] Updated weights for policy 0, policy_version 20876 (0.0006) -[2026-06-07 02:20:51,551][324563] Updated weights for policy 0, policy_version 20886 (0.0006) -[2026-06-07 02:20:51,581][324276] Saving new best policy, reward=569.282! -[2026-06-07 02:20:52,267][324563] Updated weights for policy 0, policy_version 20896 (0.0006) -[2026-06-07 02:20:52,493][324563] Updated weights for policy 0, policy_version 20907 (0.0006) -[2026-06-07 02:20:52,687][324563] Updated weights for policy 0, policy_version 20917 (0.0007) -[2026-06-07 02:20:52,887][324563] Updated weights for policy 0, policy_version 20927 (0.0007) -[2026-06-07 02:20:53,110][324563] Updated weights for policy 0, policy_version 20937 (0.0006) -[2026-06-07 02:20:53,320][324563] Updated weights for policy 0, policy_version 20947 (0.0007) -[2026-06-07 02:20:54,062][324563] Updated weights for policy 0, policy_version 20957 (0.0007) -[2026-06-07 02:20:54,266][324563] Updated weights for policy 0, policy_version 20967 (0.0007) -[2026-06-07 02:20:54,502][324563] Updated weights for policy 0, policy_version 20978 (0.0007) -[2026-06-07 02:20:54,706][324563] Updated weights for policy 0, policy_version 20988 (0.0007) -[2026-06-07 02:20:54,904][324563] Updated weights for policy 0, policy_version 20998 (0.0006) -[2026-06-07 02:20:55,112][324563] Updated weights for policy 0, policy_version 21008 (0.0006) -[2026-06-07 02:20:55,744][321791] Fps is (10 sec: 19660.8, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 10780672. Throughput: 0: 17564.5. Samples: 10796544. Policy #0 lag: (min: 63.0, avg: 77.9, max: 127.0) -[2026-06-07 02:20:55,745][321791] Avg episode reward: [(0, '619.246')] -[2026-06-07 02:20:55,853][324563] Updated weights for policy 0, policy_version 21018 (0.0007) -[2026-06-07 02:20:56,055][324563] Updated weights for policy 0, policy_version 21028 (0.0006) -[2026-06-07 02:20:56,296][324563] Updated weights for policy 0, policy_version 21039 (0.0006) -[2026-06-07 02:20:56,499][324563] Updated weights for policy 0, policy_version 21049 (0.0006) -[2026-06-07 02:20:56,699][324563] Updated weights for policy 0, policy_version 21059 (0.0006) -[2026-06-07 02:20:56,921][324563] Updated weights for policy 0, policy_version 21069 (0.0007) -[2026-06-07 02:20:57,122][324563] Updated weights for policy 0, policy_version 21079 (0.0007) -[2026-06-07 02:20:57,133][324276] Saving new best policy, reward=619.246! -[2026-06-07 02:20:57,886][324563] Updated weights for policy 0, policy_version 21090 (0.0006) -[2026-06-07 02:20:58,107][324563] Updated weights for policy 0, policy_version 21100 (0.0006) -[2026-06-07 02:20:58,346][324563] Updated weights for policy 0, policy_version 21111 (0.0007) -[2026-06-07 02:20:58,546][324563] Updated weights for policy 0, policy_version 21121 (0.0007) -[2026-06-07 02:20:58,751][324563] Updated weights for policy 0, policy_version 21131 (0.0006) -[2026-06-07 02:20:58,949][324563] Updated weights for policy 0, policy_version 21141 (0.0006) -[2026-06-07 02:20:59,681][324563] Updated weights for policy 0, policy_version 21151 (0.0006) -[2026-06-07 02:20:59,864][324563] Updated weights for policy 0, policy_version 21161 (0.0006) -[2026-06-07 02:21:00,097][324563] Updated weights for policy 0, policy_version 21172 (0.0006) -[2026-06-07 02:21:00,308][324563] Updated weights for policy 0, policy_version 21182 (0.0007) -[2026-06-07 02:21:00,511][324563] Updated weights for policy 0, policy_version 21192 (0.0006) -[2026-06-07 02:21:00,726][324563] Updated weights for policy 0, policy_version 21202 (0.0006) -[2026-06-07 02:21:00,744][321791] Fps is (10 sec: 16383.8, 60 sec: 17476.2, 300 sec: 17439.2). Total num frames: 10846208. Throughput: 0: 17541.6. Samples: 10898816. Policy #0 lag: (min: 63.0, avg: 77.9, max: 127.0) -[2026-06-07 02:21:00,745][321791] Avg episode reward: [(0, '655.486')] -[2026-06-07 02:21:00,833][324276] Saving new best policy, reward=655.486! -[2026-06-07 02:21:01,435][324563] Updated weights for policy 0, policy_version 21212 (0.0007) -[2026-06-07 02:21:01,640][324563] Updated weights for policy 0, policy_version 21222 (0.0006) -[2026-06-07 02:21:01,833][324563] Updated weights for policy 0, policy_version 21233 (0.0006) -[2026-06-07 02:21:02,037][324563] Updated weights for policy 0, policy_version 21243 (0.0006) -[2026-06-07 02:21:02,273][324563] Updated weights for policy 0, policy_version 21253 (0.0007) -[2026-06-07 02:21:02,504][324563] Updated weights for policy 0, policy_version 21264 (0.0006) -[2026-06-07 02:21:03,259][324563] Updated weights for policy 0, policy_version 21274 (0.0007) -[2026-06-07 02:21:03,449][324563] Updated weights for policy 0, policy_version 21284 (0.0006) -[2026-06-07 02:21:03,653][324563] Updated weights for policy 0, policy_version 21294 (0.0006) -[2026-06-07 02:21:03,838][324563] Updated weights for policy 0, policy_version 21304 (0.0006) -[2026-06-07 02:21:04,071][324563] Updated weights for policy 0, policy_version 21315 (0.0007) -[2026-06-07 02:21:04,305][324563] Updated weights for policy 0, policy_version 21325 (0.0006) -[2026-06-07 02:21:04,524][324563] Updated weights for policy 0, policy_version 21335 (0.0006) -[2026-06-07 02:21:05,251][324563] Updated weights for policy 0, policy_version 21345 (0.0008) -[2026-06-07 02:21:05,454][324563] Updated weights for policy 0, policy_version 21355 (0.0007) -[2026-06-07 02:21:05,665][324563] Updated weights for policy 0, policy_version 21365 (0.0007) -[2026-06-07 02:21:05,744][321791] Fps is (10 sec: 16383.9, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 10944512. Throughput: 0: 17544.5. Samples: 10956544. Policy #0 lag: (min: 63.0, avg: 77.9, max: 127.0) -[2026-06-07 02:21:05,745][321791] Avg episode reward: [(0, '673.045')] -[2026-06-07 02:21:05,870][324563] Updated weights for policy 0, policy_version 21375 (0.0007) -[2026-06-07 02:21:06,084][324563] Updated weights for policy 0, policy_version 21385 (0.0007) -[2026-06-07 02:21:06,311][324563] Updated weights for policy 0, policy_version 21395 (0.0007) -[2026-06-07 02:21:06,410][324276] Saving new best policy, reward=673.045! -[2026-06-07 02:21:07,031][324563] Updated weights for policy 0, policy_version 21405 (0.0007) -[2026-06-07 02:21:07,240][324563] Updated weights for policy 0, policy_version 21415 (0.0006) -[2026-06-07 02:21:07,455][324563] Updated weights for policy 0, policy_version 21425 (0.0007) -[2026-06-07 02:21:07,667][324563] Updated weights for policy 0, policy_version 21435 (0.0007) -[2026-06-07 02:21:07,905][324563] Updated weights for policy 0, policy_version 21446 (0.0011) -[2026-06-07 02:21:08,117][324563] Updated weights for policy 0, policy_version 21456 (0.0010) -[2026-06-07 02:21:08,844][324563] Updated weights for policy 0, policy_version 21466 (0.0007) -[2026-06-07 02:21:09,068][324563] Updated weights for policy 0, policy_version 21476 (0.0007) -[2026-06-07 02:21:09,286][324563] Updated weights for policy 0, policy_version 21486 (0.0006) -[2026-06-07 02:21:09,494][324563] Updated weights for policy 0, policy_version 21496 (0.0007) -[2026-06-07 02:21:09,696][324563] Updated weights for policy 0, policy_version 21506 (0.0010) -[2026-06-07 02:21:09,903][324563] Updated weights for policy 0, policy_version 21516 (0.0011) -[2026-06-07 02:21:10,124][324563] Updated weights for policy 0, policy_version 21526 (0.0011) -[2026-06-07 02:21:10,744][321791] Fps is (10 sec: 19660.8, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 11042816. Throughput: 0: 17513.2. Samples: 11058304. Policy #0 lag: (min: 63.0, avg: 77.9, max: 127.0) -[2026-06-07 02:21:10,745][321791] Avg episode reward: [(0, '694.241')] -[2026-06-07 02:21:10,848][324563] Updated weights for policy 0, policy_version 21536 (0.0007) -[2026-06-07 02:21:11,095][324563] Updated weights for policy 0, policy_version 21547 (0.0006) -[2026-06-07 02:21:11,323][324563] Updated weights for policy 0, policy_version 21558 (0.0007) -[2026-06-07 02:21:11,526][324563] Updated weights for policy 0, policy_version 21568 (0.0007) -[2026-06-07 02:21:11,727][324563] Updated weights for policy 0, policy_version 21578 (0.0008) -[2026-06-07 02:21:11,932][324563] Updated weights for policy 0, policy_version 21588 (0.0007) -[2026-06-07 02:21:12,007][324276] Saving new best policy, reward=694.241! -[2026-06-07 02:21:12,682][324563] Updated weights for policy 0, policy_version 21598 (0.0007) -[2026-06-07 02:21:12,898][324563] Updated weights for policy 0, policy_version 21608 (0.0007) -[2026-06-07 02:21:13,103][324563] Updated weights for policy 0, policy_version 21618 (0.0007) -[2026-06-07 02:21:13,304][324563] Updated weights for policy 0, policy_version 21628 (0.0007) -[2026-06-07 02:21:13,510][324563] Updated weights for policy 0, policy_version 21638 (0.0006) -[2026-06-07 02:21:13,712][324563] Updated weights for policy 0, policy_version 21648 (0.0006) -[2026-06-07 02:21:14,439][324563] Updated weights for policy 0, policy_version 21658 (0.0007) -[2026-06-07 02:21:14,649][324563] Updated weights for policy 0, policy_version 21668 (0.0006) -[2026-06-07 02:21:14,859][324563] Updated weights for policy 0, policy_version 21678 (0.0006) -[2026-06-07 02:21:15,073][324563] Updated weights for policy 0, policy_version 21688 (0.0008) -[2026-06-07 02:21:15,268][324563] Updated weights for policy 0, policy_version 21698 (0.0010) -[2026-06-07 02:21:15,490][324563] Updated weights for policy 0, policy_version 21708 (0.0011) -[2026-06-07 02:21:15,677][324563] Updated weights for policy 0, policy_version 21718 (0.0008) -[2026-06-07 02:21:15,744][321791] Fps is (10 sec: 19660.7, 60 sec: 18022.4, 300 sec: 17550.3). Total num frames: 11141120. Throughput: 0: 17501.8. Samples: 11160448. Policy #0 lag: (min: 21.0, avg: 37.9, max: 85.0) -[2026-06-07 02:21:15,745][321791] Avg episode reward: [(0, '714.857')] -[2026-06-07 02:21:15,752][324276] Saving new best policy, reward=714.857! -[2026-06-07 02:21:16,395][324563] Updated weights for policy 0, policy_version 21728 (0.0007) -[2026-06-07 02:21:16,587][324563] Updated weights for policy 0, policy_version 21738 (0.0006) -[2026-06-07 02:21:16,782][324563] Updated weights for policy 0, policy_version 21748 (0.0006) -[2026-06-07 02:21:16,998][324563] Updated weights for policy 0, policy_version 21758 (0.0006) -[2026-06-07 02:21:17,212][324563] Updated weights for policy 0, policy_version 21768 (0.0006) -[2026-06-07 02:21:17,406][324563] Updated weights for policy 0, policy_version 21778 (0.0006) -[2026-06-07 02:21:18,130][324563] Updated weights for policy 0, policy_version 21788 (0.0006) -[2026-06-07 02:21:18,338][324563] Updated weights for policy 0, policy_version 21798 (0.0007) -[2026-06-07 02:21:18,563][324563] Updated weights for policy 0, policy_version 21809 (0.0006) -[2026-06-07 02:21:18,771][324563] Updated weights for policy 0, policy_version 21819 (0.0006) -[2026-06-07 02:21:18,980][324563] Updated weights for policy 0, policy_version 21829 (0.0007) -[2026-06-07 02:21:19,193][324563] Updated weights for policy 0, policy_version 21839 (0.0007) -[2026-06-07 02:21:19,883][324563] Updated weights for policy 0, policy_version 21849 (0.0007) -[2026-06-07 02:21:20,114][324563] Updated weights for policy 0, policy_version 21860 (0.0007) -[2026-06-07 02:21:20,316][324563] Updated weights for policy 0, policy_version 21870 (0.0007) -[2026-06-07 02:21:20,505][324563] Updated weights for policy 0, policy_version 21880 (0.0008) -[2026-06-07 02:21:20,714][324563] Updated weights for policy 0, policy_version 21890 (0.0011) -[2026-06-07 02:21:20,744][321791] Fps is (10 sec: 16384.2, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 11206656. Throughput: 0: 17462.0. Samples: 11217408. Policy #0 lag: (min: 21.0, avg: 37.9, max: 85.0) -[2026-06-07 02:21:20,745][321791] Avg episode reward: [(0, '709.010')] -[2026-06-07 02:21:20,909][324563] Updated weights for policy 0, policy_version 21900 (0.0011) -[2026-06-07 02:21:21,104][324563] Updated weights for policy 0, policy_version 21910 (0.0009) -[2026-06-07 02:21:21,838][324563] Updated weights for policy 0, policy_version 21920 (0.0006) -[2026-06-07 02:21:22,051][324563] Updated weights for policy 0, policy_version 21930 (0.0007) -[2026-06-07 02:21:22,249][324563] Updated weights for policy 0, policy_version 21940 (0.0007) -[2026-06-07 02:21:22,461][324563] Updated weights for policy 0, policy_version 21950 (0.0006) -[2026-06-07 02:21:22,674][324563] Updated weights for policy 0, policy_version 21960 (0.0006) -[2026-06-07 02:21:22,873][324563] Updated weights for policy 0, policy_version 21970 (0.0007) -[2026-06-07 02:21:22,998][324276] Early stopping after 8 epochs (64 sgd steps), loss delta 0.0000006 -[2026-06-07 02:21:23,598][324563] Updated weights for policy 0, policy_version 21980 (0.0006) -[2026-06-07 02:21:23,796][324563] Updated weights for policy 0, policy_version 21990 (0.0007) -[2026-06-07 02:21:24,029][324563] Updated weights for policy 0, policy_version 22001 (0.0006) -[2026-06-07 02:21:24,225][324563] Updated weights for policy 0, policy_version 22011 (0.0006) -[2026-06-07 02:21:24,451][324563] Updated weights for policy 0, policy_version 22021 (0.0006) -[2026-06-07 02:21:24,662][324563] Updated weights for policy 0, policy_version 22031 (0.0006) -[2026-06-07 02:21:25,392][324563] Updated weights for policy 0, policy_version 22041 (0.0007) -[2026-06-07 02:21:25,619][324563] Updated weights for policy 0, policy_version 22051 (0.0006) -[2026-06-07 02:21:25,744][321791] Fps is (10 sec: 16384.2, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 11304960. Throughput: 0: 17476.3. Samples: 11320448. Policy #0 lag: (min: 21.0, avg: 37.9, max: 85.0) -[2026-06-07 02:21:25,745][321791] Avg episode reward: [(0, '715.530')] -[2026-06-07 02:21:25,819][324563] Updated weights for policy 0, policy_version 22061 (0.0006) -[2026-06-07 02:21:26,003][324563] Updated weights for policy 0, policy_version 22071 (0.0006) -[2026-06-07 02:21:26,199][324563] Updated weights for policy 0, policy_version 22081 (0.0006) -[2026-06-07 02:21:26,415][324563] Updated weights for policy 0, policy_version 22091 (0.0006) -[2026-06-07 02:21:26,617][324563] Updated weights for policy 0, policy_version 22101 (0.0006) -[2026-06-07 02:21:26,679][324276] Saving new best policy, reward=715.530! -[2026-06-07 02:21:27,385][324563] Updated weights for policy 0, policy_version 22111 (0.0006) -[2026-06-07 02:21:27,595][324563] Updated weights for policy 0, policy_version 22121 (0.0007) -[2026-06-07 02:21:27,830][324563] Updated weights for policy 0, policy_version 22131 (0.0007) -[2026-06-07 02:21:28,037][324563] Updated weights for policy 0, policy_version 22141 (0.0007) -[2026-06-07 02:21:28,238][324563] Updated weights for policy 0, policy_version 22151 (0.0007) -[2026-06-07 02:21:28,415][324563] Updated weights for policy 0, policy_version 22161 (0.0006) -[2026-06-07 02:21:29,168][324563] Updated weights for policy 0, policy_version 22171 (0.0008) -[2026-06-07 02:21:29,370][324563] Updated weights for policy 0, policy_version 22181 (0.0010) -[2026-06-07 02:21:29,570][324563] Updated weights for policy 0, policy_version 22191 (0.0011) -[2026-06-07 02:21:29,768][324563] Updated weights for policy 0, policy_version 22201 (0.0011) -[2026-06-07 02:21:29,984][324563] Updated weights for policy 0, policy_version 22211 (0.0009) -[2026-06-07 02:21:30,201][324563] Updated weights for policy 0, policy_version 22221 (0.0007) -[2026-06-07 02:21:30,410][324563] Updated weights for policy 0, policy_version 22231 (0.0006) -[2026-06-07 02:21:30,744][321791] Fps is (10 sec: 19660.8, 60 sec: 18022.4, 300 sec: 17550.3). Total num frames: 11403264. Throughput: 0: 17504.7. Samples: 11424768. Policy #0 lag: (min: 24.0, avg: 54.5, max: 88.0) -[2026-06-07 02:21:30,745][321791] Avg episode reward: [(0, '702.351')] -[2026-06-07 02:21:31,140][324563] Updated weights for policy 0, policy_version 22241 (0.0006) -[2026-06-07 02:21:31,345][324563] Updated weights for policy 0, policy_version 22251 (0.0006) -[2026-06-07 02:21:31,550][324563] Updated weights for policy 0, policy_version 22261 (0.0008) -[2026-06-07 02:21:31,728][324563] Updated weights for policy 0, policy_version 22271 (0.0008) -[2026-06-07 02:21:31,935][324563] Updated weights for policy 0, policy_version 22281 (0.0007) -[2026-06-07 02:21:32,147][324563] Updated weights for policy 0, policy_version 22291 (0.0006) -[2026-06-07 02:21:32,912][324563] Updated weights for policy 0, policy_version 22301 (0.0006) -[2026-06-07 02:21:33,109][324563] Updated weights for policy 0, policy_version 22311 (0.0008) -[2026-06-07 02:21:33,320][324563] Updated weights for policy 0, policy_version 22321 (0.0007) -[2026-06-07 02:21:33,522][324563] Updated weights for policy 0, policy_version 22331 (0.0006) -[2026-06-07 02:21:33,728][324563] Updated weights for policy 0, policy_version 22341 (0.0006) -[2026-06-07 02:21:33,945][324563] Updated weights for policy 0, policy_version 22351 (0.0006) -[2026-06-07 02:21:34,676][324563] Updated weights for policy 0, policy_version 22361 (0.0007) -[2026-06-07 02:21:34,921][324563] Updated weights for policy 0, policy_version 22372 (0.0007) -[2026-06-07 02:21:35,111][324563] Updated weights for policy 0, policy_version 22382 (0.0006) -[2026-06-07 02:21:35,320][324563] Updated weights for policy 0, policy_version 22392 (0.0006) -[2026-06-07 02:21:35,521][324563] Updated weights for policy 0, policy_version 22402 (0.0006) -[2026-06-07 02:21:35,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 11468800. Throughput: 0: 17487.7. Samples: 11481216. Policy #0 lag: (min: 24.0, avg: 54.5, max: 88.0) -[2026-06-07 02:21:35,745][321791] Avg episode reward: [(0, '730.598')] -[2026-06-07 02:21:35,750][324563] Updated weights for policy 0, policy_version 22413 (0.0006) -[2026-06-07 02:21:35,969][324563] Updated weights for policy 0, policy_version 22423 (0.0006) -[2026-06-07 02:21:35,985][324276] Saving new best policy, reward=730.598! -[2026-06-07 02:21:36,726][324563] Updated weights for policy 0, policy_version 22433 (0.0006) -[2026-06-07 02:21:36,946][324563] Updated weights for policy 0, policy_version 22443 (0.0006) -[2026-06-07 02:21:37,170][324563] Updated weights for policy 0, policy_version 22454 (0.0007) -[2026-06-07 02:21:37,401][324563] Updated weights for policy 0, policy_version 22465 (0.0007) -[2026-06-07 02:21:37,609][324563] Updated weights for policy 0, policy_version 22475 (0.0007) -[2026-06-07 02:21:37,819][324563] Updated weights for policy 0, policy_version 22485 (0.0006) -[2026-06-07 02:21:38,489][324563] Updated weights for policy 0, policy_version 22495 (0.0006) -[2026-06-07 02:21:38,687][324563] Updated weights for policy 0, policy_version 22505 (0.0007) -[2026-06-07 02:21:38,914][324563] Updated weights for policy 0, policy_version 22515 (0.0007) -[2026-06-07 02:21:39,111][324563] Updated weights for policy 0, policy_version 22525 (0.0007) -[2026-06-07 02:21:39,320][324563] Updated weights for policy 0, policy_version 22535 (0.0006) -[2026-06-07 02:21:39,526][324563] Updated weights for policy 0, policy_version 22545 (0.0007) -[2026-06-07 02:21:40,262][324563] Updated weights for policy 0, policy_version 22555 (0.0006) -[2026-06-07 02:21:40,469][324563] Updated weights for policy 0, policy_version 22565 (0.0006) -[2026-06-07 02:21:40,676][324563] Updated weights for policy 0, policy_version 22575 (0.0006) -[2026-06-07 02:21:40,744][321791] Fps is (10 sec: 16383.9, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 11567104. Throughput: 0: 17499.0. Samples: 11584000. Policy #0 lag: (min: 24.0, avg: 54.5, max: 88.0) -[2026-06-07 02:21:40,745][321791] Avg episode reward: [(0, '757.955')] -[2026-06-07 02:21:40,869][324563] Updated weights for policy 0, policy_version 22585 (0.0007) -[2026-06-07 02:21:41,077][324563] Updated weights for policy 0, policy_version 22595 (0.0010) -[2026-06-07 02:21:41,291][324563] Updated weights for policy 0, policy_version 22605 (0.0009) -[2026-06-07 02:21:41,490][324563] Updated weights for policy 0, policy_version 22615 (0.0009) -[2026-06-07 02:21:41,499][324276] Saving new best policy, reward=757.955! -[2026-06-07 02:21:42,259][324563] Updated weights for policy 0, policy_version 22625 (0.0009) -[2026-06-07 02:21:42,484][324563] Updated weights for policy 0, policy_version 22636 (0.0010) -[2026-06-07 02:21:42,706][324563] Updated weights for policy 0, policy_version 22646 (0.0008) -[2026-06-07 02:21:42,893][324563] Updated weights for policy 0, policy_version 22656 (0.0006) -[2026-06-07 02:21:43,104][324563] Updated weights for policy 0, policy_version 22666 (0.0008) -[2026-06-07 02:21:43,321][324563] Updated weights for policy 0, policy_version 22677 (0.0009) -[2026-06-07 02:21:44,052][324563] Updated weights for policy 0, policy_version 22687 (0.0010) -[2026-06-07 02:21:44,270][324563] Updated weights for policy 0, policy_version 22697 (0.0009) -[2026-06-07 02:21:44,473][324563] Updated weights for policy 0, policy_version 22707 (0.0010) -[2026-06-07 02:21:44,679][324563] Updated weights for policy 0, policy_version 22717 (0.0011) -[2026-06-07 02:21:44,893][324563] Updated weights for policy 0, policy_version 22727 (0.0011) -[2026-06-07 02:21:45,100][324563] Updated weights for policy 0, policy_version 22737 (0.0011) -[2026-06-07 02:21:45,744][321791] Fps is (10 sec: 19660.8, 60 sec: 18022.4, 300 sec: 17550.3). Total num frames: 11665408. Throughput: 0: 17684.0. Samples: 11694592. Policy #0 lag: (min: 63.0, avg: 79.2, max: 127.0) -[2026-06-07 02:21:45,745][321791] Avg episode reward: [(0, '742.118')] -[2026-06-07 02:21:45,850][324563] Updated weights for policy 0, policy_version 22747 (0.0008) -[2026-06-07 02:21:46,053][324563] Updated weights for policy 0, policy_version 22757 (0.0006) -[2026-06-07 02:21:46,278][324563] Updated weights for policy 0, policy_version 22767 (0.0006) -[2026-06-07 02:21:46,504][324563] Updated weights for policy 0, policy_version 22777 (0.0006) -[2026-06-07 02:21:46,730][324563] Updated weights for policy 0, policy_version 22788 (0.0006) -[2026-06-07 02:21:46,944][324563] Updated weights for policy 0, policy_version 22799 (0.0006) -[2026-06-07 02:21:47,688][324563] Updated weights for policy 0, policy_version 22809 (0.0006) -[2026-06-07 02:21:47,900][324563] Updated weights for policy 0, policy_version 22819 (0.0006) -[2026-06-07 02:21:48,108][324563] Updated weights for policy 0, policy_version 22829 (0.0006) -[2026-06-07 02:21:48,333][324563] Updated weights for policy 0, policy_version 22840 (0.0006) -[2026-06-07 02:21:48,550][324563] Updated weights for policy 0, policy_version 22850 (0.0006) -[2026-06-07 02:21:48,735][324563] Updated weights for policy 0, policy_version 22860 (0.0006) -[2026-06-07 02:21:48,992][324563] Updated weights for policy 0, policy_version 22872 (0.0006) -[2026-06-07 02:21:49,740][324563] Updated weights for policy 0, policy_version 22882 (0.0007) -[2026-06-07 02:21:49,962][324563] Updated weights for policy 0, policy_version 22892 (0.0007) -[2026-06-07 02:21:50,174][324563] Updated weights for policy 0, policy_version 22902 (0.0006) -[2026-06-07 02:21:50,384][324563] Updated weights for policy 0, policy_version 22912 (0.0006) -[2026-06-07 02:21:50,567][324563] Updated weights for policy 0, policy_version 22922 (0.0006) -[2026-06-07 02:21:50,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 11730944. Throughput: 0: 17510.4. Samples: 11744512. Policy #0 lag: (min: 63.0, avg: 79.2, max: 127.0) -[2026-06-07 02:21:50,745][321791] Avg episode reward: [(0, '693.705')] -[2026-06-07 02:21:50,764][324563] Updated weights for policy 0, policy_version 22932 (0.0006) -[2026-06-07 02:21:51,516][324563] Updated weights for policy 0, policy_version 22942 (0.0007) -[2026-06-07 02:21:51,713][324563] Updated weights for policy 0, policy_version 22952 (0.0006) -[2026-06-07 02:21:51,932][324563] Updated weights for policy 0, policy_version 22963 (0.0006) -[2026-06-07 02:21:52,141][324563] Updated weights for policy 0, policy_version 22973 (0.0006) -[2026-06-07 02:21:52,343][324563] Updated weights for policy 0, policy_version 22983 (0.0006) -[2026-06-07 02:21:52,567][324563] Updated weights for policy 0, policy_version 22993 (0.0007) -[2026-06-07 02:21:53,309][324563] Updated weights for policy 0, policy_version 23003 (0.0007) -[2026-06-07 02:21:53,555][324563] Updated weights for policy 0, policy_version 23015 (0.0006) -[2026-06-07 02:21:53,747][324563] Updated weights for policy 0, policy_version 23025 (0.0006) -[2026-06-07 02:21:53,952][324563] Updated weights for policy 0, policy_version 23035 (0.0006) -[2026-06-07 02:21:54,158][324563] Updated weights for policy 0, policy_version 23045 (0.0006) -[2026-06-07 02:21:54,373][324563] Updated weights for policy 0, policy_version 23055 (0.0007) -[2026-06-07 02:21:55,131][324563] Updated weights for policy 0, policy_version 23065 (0.0007) -[2026-06-07 02:21:55,354][324563] Updated weights for policy 0, policy_version 23075 (0.0007) -[2026-06-07 02:21:55,561][324563] Updated weights for policy 0, policy_version 23085 (0.0006) -[2026-06-07 02:21:55,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 11829248. Throughput: 0: 17538.9. Samples: 11847552. Policy #0 lag: (min: 63.0, avg: 79.2, max: 127.0) -[2026-06-07 02:21:55,745][321791] Avg episode reward: [(0, '713.183')] -[2026-06-07 02:21:55,780][324563] Updated weights for policy 0, policy_version 23095 (0.0006) -[2026-06-07 02:21:55,997][324563] Updated weights for policy 0, policy_version 23105 (0.0007) -[2026-06-07 02:21:56,216][324563] Updated weights for policy 0, policy_version 23115 (0.0007) -[2026-06-07 02:21:56,417][324563] Updated weights for policy 0, policy_version 23125 (0.0006) -[2026-06-07 02:21:57,148][324563] Updated weights for policy 0, policy_version 23135 (0.0006) -[2026-06-07 02:21:57,333][324563] Updated weights for policy 0, policy_version 23145 (0.0006) -[2026-06-07 02:21:57,564][324563] Updated weights for policy 0, policy_version 23155 (0.0006) -[2026-06-07 02:21:57,760][324563] Updated weights for policy 0, policy_version 23165 (0.0006) -[2026-06-07 02:21:57,969][324563] Updated weights for policy 0, policy_version 23175 (0.0006) -[2026-06-07 02:21:58,160][324563] Updated weights for policy 0, policy_version 23185 (0.0006) -[2026-06-07 02:21:58,881][324563] Updated weights for policy 0, policy_version 23195 (0.0006) -[2026-06-07 02:21:59,134][324563] Updated weights for policy 0, policy_version 23207 (0.0007) -[2026-06-07 02:21:59,327][324563] Updated weights for policy 0, policy_version 23217 (0.0006) -[2026-06-07 02:21:59,528][324563] Updated weights for policy 0, policy_version 23227 (0.0007) -[2026-06-07 02:21:59,756][324563] Updated weights for policy 0, policy_version 23237 (0.0007) -[2026-06-07 02:21:59,950][324563] Updated weights for policy 0, policy_version 23247 (0.0006) -[2026-06-07 02:22:00,702][324563] Updated weights for policy 0, policy_version 23257 (0.0007) -[2026-06-07 02:22:00,744][321791] Fps is (10 sec: 19660.9, 60 sec: 18022.4, 300 sec: 17550.3). Total num frames: 11927552. Throughput: 0: 17806.3. Samples: 11961728. Policy #0 lag: (min: 63.0, avg: 79.2, max: 127.0) -[2026-06-07 02:22:00,745][321791] Avg episode reward: [(0, '710.364')] -[2026-06-07 02:22:00,921][324563] Updated weights for policy 0, policy_version 23267 (0.0006) -[2026-06-07 02:22:01,126][324563] Updated weights for policy 0, policy_version 23277 (0.0006) -[2026-06-07 02:22:01,320][324563] Updated weights for policy 0, policy_version 23287 (0.0006) -[2026-06-07 02:22:01,515][324563] Updated weights for policy 0, policy_version 23297 (0.0006) -[2026-06-07 02:22:01,718][324563] Updated weights for policy 0, policy_version 23307 (0.0006) -[2026-06-07 02:22:01,914][324563] Updated weights for policy 0, policy_version 23317 (0.0006) -[2026-06-07 02:22:02,648][324563] Updated weights for policy 0, policy_version 23327 (0.0007) -[2026-06-07 02:22:02,855][324563] Updated weights for policy 0, policy_version 23337 (0.0006) -[2026-06-07 02:22:03,044][324563] Updated weights for policy 0, policy_version 23347 (0.0007) -[2026-06-07 02:22:03,254][324563] Updated weights for policy 0, policy_version 23357 (0.0006) -[2026-06-07 02:22:03,469][324563] Updated weights for policy 0, policy_version 23368 (0.0006) -[2026-06-07 02:22:03,703][324563] Updated weights for policy 0, policy_version 23379 (0.0006) -[2026-06-07 02:22:04,442][324563] Updated weights for policy 0, policy_version 23389 (0.0006) -[2026-06-07 02:22:04,651][324563] Updated weights for policy 0, policy_version 23399 (0.0006) -[2026-06-07 02:22:04,858][324563] Updated weights for policy 0, policy_version 23409 (0.0006) -[2026-06-07 02:22:05,063][324563] Updated weights for policy 0, policy_version 23419 (0.0006) -[2026-06-07 02:22:05,289][324563] Updated weights for policy 0, policy_version 23429 (0.0006) -[2026-06-07 02:22:05,495][324563] Updated weights for policy 0, policy_version 23439 (0.0007) -[2026-06-07 02:22:05,744][321791] Fps is (10 sec: 19660.8, 60 sec: 18022.4, 300 sec: 17661.4). Total num frames: 12025856. Throughput: 0: 17592.9. Samples: 12009088. Policy #0 lag: (min: 63.0, avg: 78.6, max: 127.0) -[2026-06-07 02:22:05,745][321791] Avg episode reward: [(0, '745.262')] -[2026-06-07 02:22:06,228][324563] Updated weights for policy 0, policy_version 23449 (0.0006) -[2026-06-07 02:22:06,427][324563] Updated weights for policy 0, policy_version 23459 (0.0006) -[2026-06-07 02:22:06,639][324563] Updated weights for policy 0, policy_version 23469 (0.0010) -[2026-06-07 02:22:06,857][324563] Updated weights for policy 0, policy_version 23479 (0.0011) -[2026-06-07 02:22:07,066][324563] Updated weights for policy 0, policy_version 23489 (0.0011) -[2026-06-07 02:22:07,259][324563] Updated weights for policy 0, policy_version 23499 (0.0011) -[2026-06-07 02:22:07,468][324563] Updated weights for policy 0, policy_version 23509 (0.0009) -[2026-06-07 02:22:08,223][324563] Updated weights for policy 0, policy_version 23520 (0.0007) -[2026-06-07 02:22:08,433][324563] Updated weights for policy 0, policy_version 23530 (0.0007) -[2026-06-07 02:22:08,637][324563] Updated weights for policy 0, policy_version 23540 (0.0006) -[2026-06-07 02:22:08,847][324563] Updated weights for policy 0, policy_version 23550 (0.0007) -[2026-06-07 02:22:09,029][324563] Updated weights for policy 0, policy_version 23560 (0.0010) -[2026-06-07 02:22:09,294][324563] Updated weights for policy 0, policy_version 23572 (0.0011) -[2026-06-07 02:22:10,061][324563] Updated weights for policy 0, policy_version 23582 (0.0011) -[2026-06-07 02:22:10,262][324563] Updated weights for policy 0, policy_version 23592 (0.0011) -[2026-06-07 02:22:10,461][324563] Updated weights for policy 0, policy_version 23602 (0.0007) -[2026-06-07 02:22:10,692][324563] Updated weights for policy 0, policy_version 23613 (0.0006) -[2026-06-07 02:22:10,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 12091392. Throughput: 0: 17615.6. Samples: 12113152. Policy #0 lag: (min: 63.0, avg: 78.6, max: 127.0) -[2026-06-07 02:22:10,745][321791] Avg episode reward: [(0, '736.213')] -[2026-06-07 02:22:10,920][324563] Updated weights for policy 0, policy_version 23624 (0.0006) -[2026-06-07 02:22:11,130][324563] Updated weights for policy 0, policy_version 23634 (0.0006) -[2026-06-07 02:22:11,860][324563] Updated weights for policy 0, policy_version 23644 (0.0006) -[2026-06-07 02:22:12,107][324563] Updated weights for policy 0, policy_version 23655 (0.0006) -[2026-06-07 02:22:12,310][324563] Updated weights for policy 0, policy_version 23665 (0.0006) -[2026-06-07 02:22:12,516][324563] Updated weights for policy 0, policy_version 23675 (0.0006) -[2026-06-07 02:22:12,736][324563] Updated weights for policy 0, policy_version 23685 (0.0006) -[2026-06-07 02:22:12,953][324563] Updated weights for policy 0, policy_version 23695 (0.0006) -[2026-06-07 02:22:13,685][324563] Updated weights for policy 0, policy_version 23705 (0.0006) -[2026-06-07 02:22:13,902][324563] Updated weights for policy 0, policy_version 23716 (0.0006) -[2026-06-07 02:22:14,113][324563] Updated weights for policy 0, policy_version 23726 (0.0006) -[2026-06-07 02:22:14,335][324563] Updated weights for policy 0, policy_version 23736 (0.0006) -[2026-06-07 02:22:14,541][324563] Updated weights for policy 0, policy_version 23746 (0.0006) -[2026-06-07 02:22:14,746][324563] Updated weights for policy 0, policy_version 23756 (0.0006) -[2026-06-07 02:22:14,978][324563] Updated weights for policy 0, policy_version 23767 (0.0006) -[2026-06-07 02:22:15,744][321791] Fps is (10 sec: 16383.9, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 12189696. Throughput: 0: 17780.6. Samples: 12224896. Policy #0 lag: (min: 63.0, avg: 78.6, max: 127.0) -[2026-06-07 02:22:15,745][321791] Avg episode reward: [(0, '721.833')] -[2026-06-07 02:22:15,748][324563] Updated weights for policy 0, policy_version 23777 (0.0007) -[2026-06-07 02:22:15,961][324563] Updated weights for policy 0, policy_version 23787 (0.0006) -[2026-06-07 02:22:16,162][324563] Updated weights for policy 0, policy_version 23797 (0.0006) -[2026-06-07 02:22:16,362][324563] Updated weights for policy 0, policy_version 23807 (0.0006) -[2026-06-07 02:22:16,576][324563] Updated weights for policy 0, policy_version 23818 (0.0007) -[2026-06-07 02:22:16,779][324563] Updated weights for policy 0, policy_version 23828 (0.0006) -[2026-06-07 02:22:17,558][324563] Updated weights for policy 0, policy_version 23839 (0.0006) -[2026-06-07 02:22:17,789][324563] Updated weights for policy 0, policy_version 23850 (0.0007) -[2026-06-07 02:22:17,981][324563] Updated weights for policy 0, policy_version 23860 (0.0006) -[2026-06-07 02:22:18,193][324563] Updated weights for policy 0, policy_version 23870 (0.0006) -[2026-06-07 02:22:18,390][324563] Updated weights for policy 0, policy_version 23880 (0.0006) -[2026-06-07 02:22:18,612][324563] Updated weights for policy 0, policy_version 23890 (0.0007) -[2026-06-07 02:22:19,370][324563] Updated weights for policy 0, policy_version 23900 (0.0007) -[2026-06-07 02:22:19,574][324563] Updated weights for policy 0, policy_version 23910 (0.0006) -[2026-06-07 02:22:19,770][324563] Updated weights for policy 0, policy_version 23920 (0.0006) -[2026-06-07 02:22:19,973][324563] Updated weights for policy 0, policy_version 23930 (0.0006) -[2026-06-07 02:22:20,178][324563] Updated weights for policy 0, policy_version 23940 (0.0007) -[2026-06-07 02:22:20,373][324563] Updated weights for policy 0, policy_version 23950 (0.0007) -[2026-06-07 02:22:20,578][324563] Updated weights for policy 0, policy_version 23960 (0.0007) -[2026-06-07 02:22:20,744][321791] Fps is (10 sec: 19660.6, 60 sec: 18022.4, 300 sec: 17661.4). Total num frames: 12288000. Throughput: 0: 17513.2. Samples: 12269312. Policy #0 lag: (min: 63.0, avg: 78.6, max: 127.0) -[2026-06-07 02:22:20,745][321791] Avg episode reward: [(0, '749.537')] -[2026-06-07 02:22:21,301][324563] Updated weights for policy 0, policy_version 23970 (0.0007) -[2026-06-07 02:22:21,523][324563] Updated weights for policy 0, policy_version 23981 (0.0007) -[2026-06-07 02:22:21,729][324563] Updated weights for policy 0, policy_version 23991 (0.0006) -[2026-06-07 02:22:21,949][324563] Updated weights for policy 0, policy_version 24002 (0.0007) -[2026-06-07 02:22:22,148][324563] Updated weights for policy 0, policy_version 24012 (0.0006) -[2026-06-07 02:22:22,353][324563] Updated weights for policy 0, policy_version 24022 (0.0009) -[2026-06-07 02:22:23,138][324563] Updated weights for policy 0, policy_version 24032 (0.0008) -[2026-06-07 02:22:23,402][324563] Updated weights for policy 0, policy_version 24044 (0.0006) -[2026-06-07 02:22:23,611][324563] Updated weights for policy 0, policy_version 24054 (0.0007) -[2026-06-07 02:22:23,818][324563] Updated weights for policy 0, policy_version 24064 (0.0006) -[2026-06-07 02:22:24,012][324563] Updated weights for policy 0, policy_version 24074 (0.0006) -[2026-06-07 02:22:24,261][324563] Updated weights for policy 0, policy_version 24085 (0.0006) -[2026-06-07 02:22:25,033][324563] Updated weights for policy 0, policy_version 24095 (0.0006) -[2026-06-07 02:22:25,241][324563] Updated weights for policy 0, policy_version 24105 (0.0006) -[2026-06-07 02:22:25,450][324563] Updated weights for policy 0, policy_version 24115 (0.0006) -[2026-06-07 02:22:25,646][324563] Updated weights for policy 0, policy_version 24125 (0.0007) -[2026-06-07 02:22:25,744][321791] Fps is (10 sec: 16384.1, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 12353536. Throughput: 0: 17632.7. Samples: 12377472. Policy #0 lag: (min: 63.0, avg: 78.2, max: 127.0) -[2026-06-07 02:22:25,745][321791] Avg episode reward: [(0, '753.838')] -[2026-06-07 02:22:25,846][324563] Updated weights for policy 0, policy_version 24135 (0.0006) -[2026-06-07 02:22:26,055][324563] Updated weights for policy 0, policy_version 24145 (0.0006) -[2026-06-07 02:22:26,781][324563] Updated weights for policy 0, policy_version 24155 (0.0006) -[2026-06-07 02:22:26,962][324563] Updated weights for policy 0, policy_version 24165 (0.0006) -[2026-06-07 02:22:27,204][324563] Updated weights for policy 0, policy_version 24176 (0.0007) -[2026-06-07 02:22:27,392][324563] Updated weights for policy 0, policy_version 24186 (0.0007) -[2026-06-07 02:22:27,604][324563] Updated weights for policy 0, policy_version 24196 (0.0006) -[2026-06-07 02:22:27,811][324563] Updated weights for policy 0, policy_version 24206 (0.0007) -[2026-06-07 02:22:28,003][324563] Updated weights for policy 0, policy_version 24216 (0.0006) -[2026-06-07 02:22:28,774][324563] Updated weights for policy 0, policy_version 24226 (0.0007) -[2026-06-07 02:22:28,976][324563] Updated weights for policy 0, policy_version 24236 (0.0006) -[2026-06-07 02:22:29,165][324563] Updated weights for policy 0, policy_version 24246 (0.0006) -[2026-06-07 02:22:29,369][324563] Updated weights for policy 0, policy_version 24256 (0.0007) -[2026-06-07 02:22:29,574][324563] Updated weights for policy 0, policy_version 24266 (0.0006) -[2026-06-07 02:22:29,797][324563] Updated weights for policy 0, policy_version 24277 (0.0006) -[2026-06-07 02:22:30,526][324563] Updated weights for policy 0, policy_version 24288 (0.0006) -[2026-06-07 02:22:30,735][324563] Updated weights for policy 0, policy_version 24298 (0.0007) -[2026-06-07 02:22:30,744][321791] Fps is (10 sec: 16383.8, 60 sec: 17476.2, 300 sec: 17550.3). Total num frames: 12451840. Throughput: 0: 17618.4. Samples: 12487424. Policy #0 lag: (min: 63.0, avg: 78.2, max: 127.0) -[2026-06-07 02:22:30,745][321791] Avg episode reward: [(0, '720.055')] -[2026-06-07 02:22:30,936][324563] Updated weights for policy 0, policy_version 24308 (0.0007) -[2026-06-07 02:22:31,145][324563] Updated weights for policy 0, policy_version 24318 (0.0007) -[2026-06-07 02:22:31,338][324563] Updated weights for policy 0, policy_version 24328 (0.0006) -[2026-06-07 02:22:31,550][324563] Updated weights for policy 0, policy_version 24338 (0.0006) -[2026-06-07 02:22:32,293][324563] Updated weights for policy 0, policy_version 24348 (0.0007) -[2026-06-07 02:22:32,515][324563] Updated weights for policy 0, policy_version 24358 (0.0006) -[2026-06-07 02:22:32,747][324563] Updated weights for policy 0, policy_version 24369 (0.0006) -[2026-06-07 02:22:32,954][324563] Updated weights for policy 0, policy_version 24379 (0.0007) -[2026-06-07 02:22:33,142][324563] Updated weights for policy 0, policy_version 24389 (0.0006) -[2026-06-07 02:22:33,361][324563] Updated weights for policy 0, policy_version 24399 (0.0007) -[2026-06-07 02:22:34,098][324563] Updated weights for policy 0, policy_version 24409 (0.0006) -[2026-06-07 02:22:34,309][324563] Updated weights for policy 0, policy_version 24419 (0.0006) -[2026-06-07 02:22:34,493][324563] Updated weights for policy 0, policy_version 24429 (0.0007) -[2026-06-07 02:22:34,697][324563] Updated weights for policy 0, policy_version 24439 (0.0006) -[2026-06-07 02:22:34,931][324563] Updated weights for policy 0, policy_version 24449 (0.0007) -[2026-06-07 02:22:35,160][324563] Updated weights for policy 0, policy_version 24459 (0.0006) -[2026-06-07 02:22:35,369][324563] Updated weights for policy 0, policy_version 24469 (0.0006) -[2026-06-07 02:22:35,744][321791] Fps is (10 sec: 19660.6, 60 sec: 18022.4, 300 sec: 17661.4). Total num frames: 12550144. Throughput: 0: 17513.2. Samples: 12532608. Policy #0 lag: (min: 63.0, avg: 78.2, max: 127.0) -[2026-06-07 02:22:35,745][321791] Avg episode reward: [(0, '730.794')] -[2026-06-07 02:22:36,133][324563] Updated weights for policy 0, policy_version 24480 (0.0007) -[2026-06-07 02:22:36,334][324563] Updated weights for policy 0, policy_version 24490 (0.0006) -[2026-06-07 02:22:36,553][324563] Updated weights for policy 0, policy_version 24500 (0.0007) -[2026-06-07 02:22:36,757][324563] Updated weights for policy 0, policy_version 24510 (0.0007) -[2026-06-07 02:22:36,982][324563] Updated weights for policy 0, policy_version 24522 (0.0006) -[2026-06-07 02:22:37,190][324563] Updated weights for policy 0, policy_version 24532 (0.0006) -[2026-06-07 02:22:37,957][324563] Updated weights for policy 0, policy_version 24542 (0.0006) -[2026-06-07 02:22:38,163][324563] Updated weights for policy 0, policy_version 24552 (0.0006) -[2026-06-07 02:22:38,384][324563] Updated weights for policy 0, policy_version 24563 (0.0006) -[2026-06-07 02:22:38,587][324563] Updated weights for policy 0, policy_version 24573 (0.0006) -[2026-06-07 02:22:38,820][324563] Updated weights for policy 0, policy_version 24584 (0.0006) -[2026-06-07 02:22:39,052][324563] Updated weights for policy 0, policy_version 24594 (0.0006) -[2026-06-07 02:22:39,787][324563] Updated weights for policy 0, policy_version 24604 (0.0007) -[2026-06-07 02:22:39,997][324563] Updated weights for policy 0, policy_version 24614 (0.0006) -[2026-06-07 02:22:40,204][324563] Updated weights for policy 0, policy_version 24624 (0.0006) -[2026-06-07 02:22:40,422][324563] Updated weights for policy 0, policy_version 24634 (0.0006) -[2026-06-07 02:22:40,630][324563] Updated weights for policy 0, policy_version 24644 (0.0007) -[2026-06-07 02:22:40,744][321791] Fps is (10 sec: 16383.8, 60 sec: 17476.2, 300 sec: 17550.3). Total num frames: 12615680. Throughput: 0: 17735.0. Samples: 12645632. Policy #0 lag: (min: 63.0, avg: 78.2, max: 127.0) -[2026-06-07 02:22:40,745][321791] Avg episode reward: [(0, '759.495')] -[2026-06-07 02:22:40,831][324563] Updated weights for policy 0, policy_version 24654 (0.0007) -[2026-06-07 02:22:41,048][324276] Saving new best policy, reward=759.495! -[2026-06-07 02:22:41,051][324563] Updated weights for policy 0, policy_version 24664 (0.0007) -[2026-06-07 02:22:41,780][324563] Updated weights for policy 0, policy_version 24674 (0.0006) -[2026-06-07 02:22:42,015][324563] Updated weights for policy 0, policy_version 24685 (0.0007) -[2026-06-07 02:22:42,208][324563] Updated weights for policy 0, policy_version 24695 (0.0009) -[2026-06-07 02:22:42,428][324563] Updated weights for policy 0, policy_version 24706 (0.0011) -[2026-06-07 02:22:42,639][324563] Updated weights for policy 0, policy_version 24716 (0.0011) -[2026-06-07 02:22:42,863][324563] Updated weights for policy 0, policy_version 24727 (0.0011) -[2026-06-07 02:22:43,623][324563] Updated weights for policy 0, policy_version 24737 (0.0010) -[2026-06-07 02:22:43,820][324563] Updated weights for policy 0, policy_version 24747 (0.0007) -[2026-06-07 02:22:44,046][324563] Updated weights for policy 0, policy_version 24758 (0.0006) -[2026-06-07 02:22:44,255][324563] Updated weights for policy 0, policy_version 24768 (0.0007) -[2026-06-07 02:22:44,477][324563] Updated weights for policy 0, policy_version 24779 (0.0006) -[2026-06-07 02:22:44,655][324563] Updated weights for policy 0, policy_version 24789 (0.0007) -[2026-06-07 02:22:45,396][324563] Updated weights for policy 0, policy_version 24799 (0.0006) -[2026-06-07 02:22:45,601][324563] Updated weights for policy 0, policy_version 24809 (0.0008) -[2026-06-07 02:22:45,744][321791] Fps is (10 sec: 16384.1, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 12713984. Throughput: 0: 17547.4. Samples: 12751360. Policy #0 lag: (min: 63.0, avg: 78.3, max: 127.0) -[2026-06-07 02:22:45,745][321791] Avg episode reward: [(0, '800.269')] -[2026-06-07 02:22:45,816][324563] Updated weights for policy 0, policy_version 24820 (0.0011) -[2026-06-07 02:22:46,017][324563] Updated weights for policy 0, policy_version 24830 (0.0011) -[2026-06-07 02:22:46,225][324563] Updated weights for policy 0, policy_version 24840 (0.0011) -[2026-06-07 02:22:46,421][324563] Updated weights for policy 0, policy_version 24850 (0.0009) -[2026-06-07 02:22:46,535][324276] Saving new best policy, reward=800.269! -[2026-06-07 02:22:47,190][324563] Updated weights for policy 0, policy_version 24860 (0.0010) -[2026-06-07 02:22:47,409][324563] Updated weights for policy 0, policy_version 24870 (0.0011) -[2026-06-07 02:22:47,607][324563] Updated weights for policy 0, policy_version 24880 (0.0011) -[2026-06-07 02:22:47,791][324563] Updated weights for policy 0, policy_version 24890 (0.0011) -[2026-06-07 02:22:48,001][324563] Updated weights for policy 0, policy_version 24900 (0.0011) -[2026-06-07 02:22:48,220][324563] Updated weights for policy 0, policy_version 24911 (0.0010) -[2026-06-07 02:22:48,995][324563] Updated weights for policy 0, policy_version 24921 (0.0008) -[2026-06-07 02:22:49,192][324563] Updated weights for policy 0, policy_version 24931 (0.0010) -[2026-06-07 02:22:49,376][324563] Updated weights for policy 0, policy_version 24941 (0.0011) -[2026-06-07 02:22:49,602][324563] Updated weights for policy 0, policy_version 24951 (0.0011) -[2026-06-07 02:22:49,828][324563] Updated weights for policy 0, policy_version 24961 (0.0010) -[2026-06-07 02:22:50,031][324563] Updated weights for policy 0, policy_version 24971 (0.0007) -[2026-06-07 02:22:50,227][324563] Updated weights for policy 0, policy_version 24981 (0.0006) -[2026-06-07 02:22:50,744][321791] Fps is (10 sec: 19661.4, 60 sec: 18022.4, 300 sec: 17661.4). Total num frames: 12812288. Throughput: 0: 17555.9. Samples: 12799104. Policy #0 lag: (min: 63.0, avg: 78.3, max: 127.0) -[2026-06-07 02:22:50,745][321791] Avg episode reward: [(0, '792.523')] -[2026-06-07 02:22:50,988][324563] Updated weights for policy 0, policy_version 24991 (0.0006) -[2026-06-07 02:22:51,198][324563] Updated weights for policy 0, policy_version 25001 (0.0006) -[2026-06-07 02:22:51,403][324563] Updated weights for policy 0, policy_version 25011 (0.0006) -[2026-06-07 02:22:51,627][324563] Updated weights for policy 0, policy_version 25021 (0.0006) -[2026-06-07 02:22:51,823][324563] Updated weights for policy 0, policy_version 25031 (0.0007) -[2026-06-07 02:22:52,044][324563] Updated weights for policy 0, policy_version 25042 (0.0006) -[2026-06-07 02:22:52,785][324563] Updated weights for policy 0, policy_version 25052 (0.0007) -[2026-06-07 02:22:53,004][324563] Updated weights for policy 0, policy_version 25062 (0.0006) -[2026-06-07 02:22:53,203][324563] Updated weights for policy 0, policy_version 25072 (0.0006) -[2026-06-07 02:22:53,411][324563] Updated weights for policy 0, policy_version 25082 (0.0006) -[2026-06-07 02:22:53,632][324563] Updated weights for policy 0, policy_version 25092 (0.0006) -[2026-06-07 02:22:53,835][324563] Updated weights for policy 0, policy_version 25102 (0.0006) -[2026-06-07 02:22:54,031][324563] Updated weights for policy 0, policy_version 25112 (0.0006) -[2026-06-07 02:22:54,764][324563] Updated weights for policy 0, policy_version 25122 (0.0007) -[2026-06-07 02:22:54,987][324563] Updated weights for policy 0, policy_version 25132 (0.0007) -[2026-06-07 02:22:55,193][324563] Updated weights for policy 0, policy_version 25142 (0.0007) -[2026-06-07 02:22:55,394][324563] Updated weights for policy 0, policy_version 25152 (0.0011) -[2026-06-07 02:22:55,602][324563] Updated weights for policy 0, policy_version 25162 (0.0010) -[2026-06-07 02:22:55,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 12877824. Throughput: 0: 17769.3. Samples: 12912768. Policy #0 lag: (min: 63.0, avg: 78.3, max: 127.0) -[2026-06-07 02:22:55,745][321791] Avg episode reward: [(0, '766.869')] -[2026-06-07 02:22:55,813][324563] Updated weights for policy 0, policy_version 25172 (0.0011) -[2026-06-07 02:22:56,558][324563] Updated weights for policy 0, policy_version 25183 (0.0008) -[2026-06-07 02:22:56,773][324563] Updated weights for policy 0, policy_version 25193 (0.0006) -[2026-06-07 02:22:56,974][324563] Updated weights for policy 0, policy_version 25203 (0.0011) -[2026-06-07 02:22:57,164][324563] Updated weights for policy 0, policy_version 25213 (0.0011) -[2026-06-07 02:22:57,373][324563] Updated weights for policy 0, policy_version 25223 (0.0011) -[2026-06-07 02:22:57,584][324563] Updated weights for policy 0, policy_version 25233 (0.0011) -[2026-06-07 02:22:58,328][324563] Updated weights for policy 0, policy_version 25243 (0.0007) -[2026-06-07 02:22:58,535][324563] Updated weights for policy 0, policy_version 25253 (0.0008) -[2026-06-07 02:22:58,710][324563] Updated weights for policy 0, policy_version 25263 (0.0007) -[2026-06-07 02:22:58,931][324563] Updated weights for policy 0, policy_version 25274 (0.0007) -[2026-06-07 02:22:59,123][324563] Updated weights for policy 0, policy_version 25284 (0.0007) -[2026-06-07 02:22:59,330][324563] Updated weights for policy 0, policy_version 25294 (0.0007) -[2026-06-07 02:22:59,527][324563] Updated weights for policy 0, policy_version 25304 (0.0006) -[2026-06-07 02:23:00,265][324563] Updated weights for policy 0, policy_version 25314 (0.0011) -[2026-06-07 02:23:00,495][324563] Updated weights for policy 0, policy_version 25325 (0.0011) -[2026-06-07 02:23:00,679][324563] Updated weights for policy 0, policy_version 25335 (0.0008) -[2026-06-07 02:23:00,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 12976128. Throughput: 0: 17558.7. Samples: 13015040. Policy #0 lag: (min: 53.0, avg: 68.3, max: 117.0) -[2026-06-07 02:23:00,745][321791] Avg episode reward: [(0, '788.387')] -[2026-06-07 02:23:00,894][324563] Updated weights for policy 0, policy_version 25345 (0.0007) -[2026-06-07 02:23:01,093][324563] Updated weights for policy 0, policy_version 25355 (0.0009) -[2026-06-07 02:23:01,304][324563] Updated weights for policy 0, policy_version 25365 (0.0011) -[2026-06-07 02:23:02,077][324563] Updated weights for policy 0, policy_version 25375 (0.0011) -[2026-06-07 02:23:02,286][324563] Updated weights for policy 0, policy_version 25385 (0.0011) -[2026-06-07 02:23:02,483][324563] Updated weights for policy 0, policy_version 25395 (0.0011) -[2026-06-07 02:23:02,676][324563] Updated weights for policy 0, policy_version 25405 (0.0008) -[2026-06-07 02:23:02,874][324563] Updated weights for policy 0, policy_version 25415 (0.0007) -[2026-06-07 02:23:03,059][324563] Updated weights for policy 0, policy_version 25425 (0.0010) -[2026-06-07 02:23:03,861][324563] Updated weights for policy 0, policy_version 25436 (0.0009) -[2026-06-07 02:23:04,050][324563] Updated weights for policy 0, policy_version 25446 (0.0011) -[2026-06-07 02:23:04,274][324563] Updated weights for policy 0, policy_version 25457 (0.0011) -[2026-06-07 02:23:04,498][324563] Updated weights for policy 0, policy_version 25467 (0.0011) -[2026-06-07 02:23:04,681][324563] Updated weights for policy 0, policy_version 25477 (0.0009) -[2026-06-07 02:23:04,892][324563] Updated weights for policy 0, policy_version 25487 (0.0008) -[2026-06-07 02:23:05,610][324563] Updated weights for policy 0, policy_version 25497 (0.0006) -[2026-06-07 02:23:05,744][321791] Fps is (10 sec: 19660.7, 60 sec: 17476.3, 300 sec: 17661.4). Total num frames: 13074432. Throughput: 0: 17794.9. Samples: 13070080. Policy #0 lag: (min: 53.0, avg: 68.3, max: 117.0) -[2026-06-07 02:23:05,745][321791] Avg episode reward: [(0, '790.517')] -[2026-06-07 02:23:05,834][324563] Updated weights for policy 0, policy_version 25508 (0.0007) -[2026-06-07 02:23:06,045][324563] Updated weights for policy 0, policy_version 25518 (0.0007) -[2026-06-07 02:23:06,256][324563] Updated weights for policy 0, policy_version 25528 (0.0006) -[2026-06-07 02:23:06,449][324563] Updated weights for policy 0, policy_version 25538 (0.0007) -[2026-06-07 02:23:06,662][324563] Updated weights for policy 0, policy_version 25548 (0.0007) -[2026-06-07 02:23:06,847][324563] Updated weights for policy 0, policy_version 25558 (0.0007) -[2026-06-07 02:23:07,576][324563] Updated weights for policy 0, policy_version 25568 (0.0006) -[2026-06-07 02:23:07,783][324563] Updated weights for policy 0, policy_version 25578 (0.0006) -[2026-06-07 02:23:07,982][324563] Updated weights for policy 0, policy_version 25588 (0.0007) -[2026-06-07 02:23:08,194][324563] Updated weights for policy 0, policy_version 25598 (0.0006) -[2026-06-07 02:23:08,403][324563] Updated weights for policy 0, policy_version 25608 (0.0007) -[2026-06-07 02:23:08,603][324563] Updated weights for policy 0, policy_version 25618 (0.0006) -[2026-06-07 02:23:09,362][324563] Updated weights for policy 0, policy_version 25628 (0.0006) -[2026-06-07 02:23:09,545][324563] Updated weights for policy 0, policy_version 25638 (0.0006) -[2026-06-07 02:23:09,800][324563] Updated weights for policy 0, policy_version 25650 (0.0006) -[2026-06-07 02:23:10,009][324563] Updated weights for policy 0, policy_version 25660 (0.0007) -[2026-06-07 02:23:10,210][324563] Updated weights for policy 0, policy_version 25670 (0.0006) -[2026-06-07 02:23:10,443][324563] Updated weights for policy 0, policy_version 25681 (0.0007) -[2026-06-07 02:23:10,744][321791] Fps is (10 sec: 19660.6, 60 sec: 18022.4, 300 sec: 17661.4). Total num frames: 13172736. Throughput: 0: 17791.9. Samples: 13178112. Policy #0 lag: (min: 53.0, avg: 68.3, max: 117.0) -[2026-06-07 02:23:10,745][321791] Avg episode reward: [(0, '777.565')] -[2026-06-07 02:23:11,236][324563] Updated weights for policy 0, policy_version 25691 (0.0007) -[2026-06-07 02:23:11,436][324563] Updated weights for policy 0, policy_version 25701 (0.0006) -[2026-06-07 02:23:11,675][324563] Updated weights for policy 0, policy_version 25712 (0.0006) -[2026-06-07 02:23:11,875][324563] Updated weights for policy 0, policy_version 25722 (0.0006) -[2026-06-07 02:23:12,076][324563] Updated weights for policy 0, policy_version 25732 (0.0007) -[2026-06-07 02:23:12,280][324563] Updated weights for policy 0, policy_version 25742 (0.0006) -[2026-06-07 02:23:12,488][324563] Updated weights for policy 0, policy_version 25752 (0.0006) -[2026-06-07 02:23:13,245][324563] Updated weights for policy 0, policy_version 25763 (0.0007) -[2026-06-07 02:23:13,433][324563] Updated weights for policy 0, policy_version 25773 (0.0007) -[2026-06-07 02:23:13,636][324563] Updated weights for policy 0, policy_version 25783 (0.0006) -[2026-06-07 02:23:13,823][324563] Updated weights for policy 0, policy_version 25793 (0.0006) -[2026-06-07 02:23:14,049][324563] Updated weights for policy 0, policy_version 25803 (0.0007) -[2026-06-07 02:23:14,251][324563] Updated weights for policy 0, policy_version 25813 (0.0006) -[2026-06-07 02:23:14,994][324563] Updated weights for policy 0, policy_version 25823 (0.0007) -[2026-06-07 02:23:15,207][324563] Updated weights for policy 0, policy_version 25833 (0.0006) -[2026-06-07 02:23:15,390][324563] Updated weights for policy 0, policy_version 25843 (0.0006) -[2026-06-07 02:23:15,591][324563] Updated weights for policy 0, policy_version 25853 (0.0006) -[2026-06-07 02:23:15,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 13238272. Throughput: 0: 17649.9. Samples: 13281664. Policy #0 lag: (min: 53.0, avg: 68.3, max: 117.0) -[2026-06-07 02:23:15,745][321791] Avg episode reward: [(0, '783.930')] -[2026-06-07 02:23:15,802][324563] Updated weights for policy 0, policy_version 25863 (0.0006) -[2026-06-07 02:23:16,019][324563] Updated weights for policy 0, policy_version 25873 (0.0007) -[2026-06-07 02:23:16,746][324563] Updated weights for policy 0, policy_version 25883 (0.0007) -[2026-06-07 02:23:16,989][324563] Updated weights for policy 0, policy_version 25893 (0.0006) -[2026-06-07 02:23:17,191][324563] Updated weights for policy 0, policy_version 25903 (0.0006) -[2026-06-07 02:23:17,406][324563] Updated weights for policy 0, policy_version 25913 (0.0006) -[2026-06-07 02:23:17,632][324563] Updated weights for policy 0, policy_version 25923 (0.0006) -[2026-06-07 02:23:17,840][324563] Updated weights for policy 0, policy_version 25933 (0.0007) -[2026-06-07 02:23:18,054][324563] Updated weights for policy 0, policy_version 25943 (0.0006) -[2026-06-07 02:23:18,773][324563] Updated weights for policy 0, policy_version 25953 (0.0007) -[2026-06-07 02:23:18,972][324563] Updated weights for policy 0, policy_version 25963 (0.0006) -[2026-06-07 02:23:19,190][324563] Updated weights for policy 0, policy_version 25973 (0.0007) -[2026-06-07 02:23:19,400][324563] Updated weights for policy 0, policy_version 25983 (0.0007) -[2026-06-07 02:23:19,607][324563] Updated weights for policy 0, policy_version 25993 (0.0006) -[2026-06-07 02:23:19,803][324563] Updated weights for policy 0, policy_version 26003 (0.0006) -[2026-06-07 02:23:20,527][324563] Updated weights for policy 0, policy_version 26013 (0.0006) -[2026-06-07 02:23:20,735][324563] Updated weights for policy 0, policy_version 26023 (0.0006) -[2026-06-07 02:23:20,744][321791] Fps is (10 sec: 16383.5, 60 sec: 17476.2, 300 sec: 17661.4). Total num frames: 13336576. Throughput: 0: 17931.2. Samples: 13339520. Policy #0 lag: (min: 44.0, avg: 59.2, max: 108.0) -[2026-06-07 02:23:20,746][321791] Avg episode reward: [(0, '794.011')] -[2026-06-07 02:23:20,938][324563] Updated weights for policy 0, policy_version 26033 (0.0007) -[2026-06-07 02:23:21,135][324563] Updated weights for policy 0, policy_version 26043 (0.0006) -[2026-06-07 02:23:21,334][324563] Updated weights for policy 0, policy_version 26053 (0.0006) -[2026-06-07 02:23:21,564][324563] Updated weights for policy 0, policy_version 26063 (0.0007) -[2026-06-07 02:23:22,297][324563] Updated weights for policy 0, policy_version 26073 (0.0006) -[2026-06-07 02:23:22,484][324563] Updated weights for policy 0, policy_version 26083 (0.0007) -[2026-06-07 02:23:22,684][324563] Updated weights for policy 0, policy_version 26093 (0.0007) -[2026-06-07 02:23:22,876][324563] Updated weights for policy 0, policy_version 26103 (0.0006) -[2026-06-07 02:23:23,087][324563] Updated weights for policy 0, policy_version 26113 (0.0007) -[2026-06-07 02:23:23,311][324563] Updated weights for policy 0, policy_version 26123 (0.0007) -[2026-06-07 02:23:23,510][324563] Updated weights for policy 0, policy_version 26133 (0.0007) -[2026-06-07 02:23:24,242][324563] Updated weights for policy 0, policy_version 26143 (0.0007) -[2026-06-07 02:23:24,463][324563] Updated weights for policy 0, policy_version 26153 (0.0006) -[2026-06-07 02:23:24,684][324563] Updated weights for policy 0, policy_version 26163 (0.0007) -[2026-06-07 02:23:24,877][324563] Updated weights for policy 0, policy_version 26173 (0.0006) -[2026-06-07 02:23:25,074][324563] Updated weights for policy 0, policy_version 26183 (0.0007) -[2026-06-07 02:23:25,267][324563] Updated weights for policy 0, policy_version 26193 (0.0007) -[2026-06-07 02:23:25,744][321791] Fps is (10 sec: 19660.8, 60 sec: 18022.4, 300 sec: 17661.4). Total num frames: 13434880. Throughput: 0: 17678.4. Samples: 13441152. Policy #0 lag: (min: 44.0, avg: 59.2, max: 108.0) -[2026-06-07 02:23:25,745][321791] Avg episode reward: [(0, '794.150')] -[2026-06-07 02:23:26,061][324563] Updated weights for policy 0, policy_version 26203 (0.0006) -[2026-06-07 02:23:26,277][324563] Updated weights for policy 0, policy_version 26213 (0.0006) -[2026-06-07 02:23:26,492][324563] Updated weights for policy 0, policy_version 26223 (0.0007) -[2026-06-07 02:23:26,693][324563] Updated weights for policy 0, policy_version 26233 (0.0007) -[2026-06-07 02:23:26,901][324563] Updated weights for policy 0, policy_version 26243 (0.0006) -[2026-06-07 02:23:27,096][324563] Updated weights for policy 0, policy_version 26253 (0.0006) -[2026-06-07 02:23:27,305][324563] Updated weights for policy 0, policy_version 26263 (0.0006) -[2026-06-07 02:23:28,031][324563] Updated weights for policy 0, policy_version 26273 (0.0006) -[2026-06-07 02:23:28,236][324563] Updated weights for policy 0, policy_version 26283 (0.0007) -[2026-06-07 02:23:28,433][324563] Updated weights for policy 0, policy_version 26293 (0.0006) -[2026-06-07 02:23:28,629][324563] Updated weights for policy 0, policy_version 26303 (0.0006) -[2026-06-07 02:23:28,827][324563] Updated weights for policy 0, policy_version 26313 (0.0006) -[2026-06-07 02:23:29,044][324563] Updated weights for policy 0, policy_version 26323 (0.0006) -[2026-06-07 02:23:29,817][324563] Updated weights for policy 0, policy_version 26333 (0.0006) -[2026-06-07 02:23:30,018][324563] Updated weights for policy 0, policy_version 26343 (0.0007) -[2026-06-07 02:23:30,222][324563] Updated weights for policy 0, policy_version 26353 (0.0006) -[2026-06-07 02:23:30,441][324563] Updated weights for policy 0, policy_version 26363 (0.0007) -[2026-06-07 02:23:30,645][324563] Updated weights for policy 0, policy_version 26373 (0.0007) -[2026-06-07 02:23:30,744][321791] Fps is (10 sec: 16384.5, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 13500416. Throughput: 0: 17641.2. Samples: 13545216. Policy #0 lag: (min: 44.0, avg: 59.2, max: 108.0) -[2026-06-07 02:23:30,745][321791] Avg episode reward: [(0, '769.834')] -[2026-06-07 02:23:30,860][324563] Updated weights for policy 0, policy_version 26383 (0.0006) -[2026-06-07 02:23:31,604][324563] Updated weights for policy 0, policy_version 26393 (0.0007) -[2026-06-07 02:23:31,796][324563] Updated weights for policy 0, policy_version 26403 (0.0007) -[2026-06-07 02:23:31,991][324563] Updated weights for policy 0, policy_version 26413 (0.0006) -[2026-06-07 02:23:32,188][324563] Updated weights for policy 0, policy_version 26423 (0.0006) -[2026-06-07 02:23:32,397][324563] Updated weights for policy 0, policy_version 26433 (0.0006) -[2026-06-07 02:23:32,584][324563] Updated weights for policy 0, policy_version 26443 (0.0006) -[2026-06-07 02:23:32,790][324563] Updated weights for policy 0, policy_version 26453 (0.0007) -[2026-06-07 02:23:33,550][324563] Updated weights for policy 0, policy_version 26463 (0.0006) -[2026-06-07 02:23:33,748][324563] Updated weights for policy 0, policy_version 26473 (0.0006) -[2026-06-07 02:23:33,943][324563] Updated weights for policy 0, policy_version 26483 (0.0006) -[2026-06-07 02:23:34,146][324563] Updated weights for policy 0, policy_version 26493 (0.0006) -[2026-06-07 02:23:34,362][324563] Updated weights for policy 0, policy_version 26504 (0.0006) -[2026-06-07 02:23:34,574][324563] Updated weights for policy 0, policy_version 26514 (0.0006) -[2026-06-07 02:23:35,315][324563] Updated weights for policy 0, policy_version 26524 (0.0007) -[2026-06-07 02:23:35,518][324563] Updated weights for policy 0, policy_version 26534 (0.0006) -[2026-06-07 02:23:35,742][324563] Updated weights for policy 0, policy_version 26544 (0.0007) -[2026-06-07 02:23:35,744][321791] Fps is (10 sec: 16383.8, 60 sec: 17476.2, 300 sec: 17661.4). Total num frames: 13598720. Throughput: 0: 17871.6. Samples: 13603328. Policy #0 lag: (min: 44.0, avg: 59.2, max: 108.0) -[2026-06-07 02:23:35,745][321791] Avg episode reward: [(0, '794.224')] -[2026-06-07 02:23:35,945][324563] Updated weights for policy 0, policy_version 26554 (0.0007) -[2026-06-07 02:23:36,181][324563] Updated weights for policy 0, policy_version 26565 (0.0006) -[2026-06-07 02:23:36,392][324563] Updated weights for policy 0, policy_version 26575 (0.0006) -[2026-06-07 02:23:37,137][324563] Updated weights for policy 0, policy_version 26586 (0.0007) -[2026-06-07 02:23:37,349][324563] Updated weights for policy 0, policy_version 26596 (0.0011) -[2026-06-07 02:23:37,553][324563] Updated weights for policy 0, policy_version 26606 (0.0011) -[2026-06-07 02:23:37,744][324563] Updated weights for policy 0, policy_version 26616 (0.0011) -[2026-06-07 02:23:37,965][324563] Updated weights for policy 0, policy_version 26626 (0.0006) -[2026-06-07 02:23:38,173][324563] Updated weights for policy 0, policy_version 26636 (0.0006) -[2026-06-07 02:23:38,395][324563] Updated weights for policy 0, policy_version 26646 (0.0006) -[2026-06-07 02:23:39,162][324563] Updated weights for policy 0, policy_version 26656 (0.0006) -[2026-06-07 02:23:39,357][324563] Updated weights for policy 0, policy_version 26666 (0.0006) -[2026-06-07 02:23:39,560][324563] Updated weights for policy 0, policy_version 26676 (0.0006) -[2026-06-07 02:23:39,770][324563] Updated weights for policy 0, policy_version 26686 (0.0007) -[2026-06-07 02:23:39,999][324563] Updated weights for policy 0, policy_version 26696 (0.0007) -[2026-06-07 02:23:40,185][324563] Updated weights for policy 0, policy_version 26706 (0.0007) -[2026-06-07 02:23:40,745][321791] Fps is (10 sec: 19659.7, 60 sec: 18022.3, 300 sec: 17661.4). Total num frames: 13697024. Throughput: 0: 17638.1. Samples: 13706496. Policy #0 lag: (min: 50.0, avg: 74.8, max: 114.0) -[2026-06-07 02:23:40,746][321791] Avg episode reward: [(0, '785.368')] -[2026-06-07 02:23:40,913][324563] Updated weights for policy 0, policy_version 26716 (0.0007) -[2026-06-07 02:23:41,122][324563] Updated weights for policy 0, policy_version 26726 (0.0006) -[2026-06-07 02:23:41,317][324563] Updated weights for policy 0, policy_version 26736 (0.0008) -[2026-06-07 02:23:41,530][324563] Updated weights for policy 0, policy_version 26746 (0.0007) -[2026-06-07 02:23:41,756][324563] Updated weights for policy 0, policy_version 26756 (0.0007) -[2026-06-07 02:23:41,943][324563] Updated weights for policy 0, policy_version 26766 (0.0006) -[2026-06-07 02:23:42,147][324563] Updated weights for policy 0, policy_version 26776 (0.0006) -[2026-06-07 02:23:42,897][324563] Updated weights for policy 0, policy_version 26786 (0.0007) -[2026-06-07 02:23:43,104][324563] Updated weights for policy 0, policy_version 26796 (0.0007) -[2026-06-07 02:23:43,306][324563] Updated weights for policy 0, policy_version 26806 (0.0007) -[2026-06-07 02:23:43,503][324563] Updated weights for policy 0, policy_version 26816 (0.0008) -[2026-06-07 02:23:43,716][324563] Updated weights for policy 0, policy_version 26826 (0.0008) -[2026-06-07 02:23:43,930][324563] Updated weights for policy 0, policy_version 26836 (0.0007) -[2026-06-07 02:23:44,678][324563] Updated weights for policy 0, policy_version 26846 (0.0008) -[2026-06-07 02:23:44,876][324563] Updated weights for policy 0, policy_version 26856 (0.0007) -[2026-06-07 02:23:45,062][324563] Updated weights for policy 0, policy_version 26866 (0.0007) -[2026-06-07 02:23:45,281][324563] Updated weights for policy 0, policy_version 26876 (0.0007) -[2026-06-07 02:23:45,495][324563] Updated weights for policy 0, policy_version 26886 (0.0006) -[2026-06-07 02:23:45,691][324563] Updated weights for policy 0, policy_version 26896 (0.0007) -[2026-06-07 02:23:45,744][321791] Fps is (10 sec: 16384.2, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 13762560. Throughput: 0: 17621.4. Samples: 13808000. Policy #0 lag: (min: 50.0, avg: 74.8, max: 114.0) -[2026-06-07 02:23:45,745][321791] Avg episode reward: [(0, '797.345')] -[2026-06-07 02:23:46,452][324563] Updated weights for policy 0, policy_version 26906 (0.0007) -[2026-06-07 02:23:46,676][324563] Updated weights for policy 0, policy_version 26917 (0.0007) -[2026-06-07 02:23:46,894][324563] Updated weights for policy 0, policy_version 26928 (0.0007) -[2026-06-07 02:23:47,100][324563] Updated weights for policy 0, policy_version 26938 (0.0007) -[2026-06-07 02:23:47,316][324563] Updated weights for policy 0, policy_version 26948 (0.0007) -[2026-06-07 02:23:47,532][324563] Updated weights for policy 0, policy_version 26958 (0.0007) -[2026-06-07 02:23:47,739][324563] Updated weights for policy 0, policy_version 26968 (0.0007) -[2026-06-07 02:23:48,509][324563] Updated weights for policy 0, policy_version 26978 (0.0007) -[2026-06-07 02:23:48,729][324563] Updated weights for policy 0, policy_version 26989 (0.0007) -[2026-06-07 02:23:48,928][324563] Updated weights for policy 0, policy_version 26999 (0.0007) -[2026-06-07 02:23:49,137][324563] Updated weights for policy 0, policy_version 27009 (0.0008) -[2026-06-07 02:23:49,367][324563] Updated weights for policy 0, policy_version 27019 (0.0007) -[2026-06-07 02:23:49,573][324563] Updated weights for policy 0, policy_version 27029 (0.0007) -[2026-06-07 02:23:50,311][324563] Updated weights for policy 0, policy_version 27039 (0.0007) -[2026-06-07 02:23:50,521][324563] Updated weights for policy 0, policy_version 27049 (0.0007) -[2026-06-07 02:23:50,738][324563] Updated weights for policy 0, policy_version 27059 (0.0007) -[2026-06-07 02:23:50,744][321791] Fps is (10 sec: 16385.0, 60 sec: 17476.3, 300 sec: 17661.4). Total num frames: 13860864. Throughput: 0: 17689.6. Samples: 13866112. Policy #0 lag: (min: 50.0, avg: 74.8, max: 114.0) -[2026-06-07 02:23:50,745][321791] Avg episode reward: [(0, '832.826')] -[2026-06-07 02:23:50,951][324563] Updated weights for policy 0, policy_version 27069 (0.0007) -[2026-06-07 02:23:51,148][324563] Updated weights for policy 0, policy_version 27079 (0.0007) -[2026-06-07 02:23:51,356][324563] Updated weights for policy 0, policy_version 27089 (0.0007) -[2026-06-07 02:23:51,504][324276] Saving new best policy, reward=832.826! -[2026-06-07 02:23:52,084][324563] Updated weights for policy 0, policy_version 27099 (0.0007) -[2026-06-07 02:23:52,291][324563] Updated weights for policy 0, policy_version 27110 (0.0007) -[2026-06-07 02:23:52,528][324563] Updated weights for policy 0, policy_version 27121 (0.0007) -[2026-06-07 02:23:52,753][324563] Updated weights for policy 0, policy_version 27132 (0.0007) -[2026-06-07 02:23:52,942][324563] Updated weights for policy 0, policy_version 27142 (0.0007) -[2026-06-07 02:23:53,132][324563] Updated weights for policy 0, policy_version 27152 (0.0007) -[2026-06-07 02:23:53,887][324563] Updated weights for policy 0, policy_version 27162 (0.0006) -[2026-06-07 02:23:54,107][324563] Updated weights for policy 0, policy_version 27172 (0.0007) -[2026-06-07 02:23:54,295][324563] Updated weights for policy 0, policy_version 27182 (0.0006) -[2026-06-07 02:23:54,514][324563] Updated weights for policy 0, policy_version 27192 (0.0007) -[2026-06-07 02:23:54,716][324563] Updated weights for policy 0, policy_version 27203 (0.0007) -[2026-06-07 02:23:54,944][324563] Updated weights for policy 0, policy_version 27213 (0.0007) -[2026-06-07 02:23:55,153][324563] Updated weights for policy 0, policy_version 27224 (0.0007) -[2026-06-07 02:23:55,744][321791] Fps is (10 sec: 19660.8, 60 sec: 18022.4, 300 sec: 17661.4). Total num frames: 13959168. Throughput: 0: 17561.7. Samples: 13968384. Policy #0 lag: (min: 50.0, avg: 74.8, max: 114.0) -[2026-06-07 02:23:55,745][321791] Avg episode reward: [(0, '839.919')] -[2026-06-07 02:23:55,915][324563] Updated weights for policy 0, policy_version 27234 (0.0008) -[2026-06-07 02:23:56,132][324563] Updated weights for policy 0, policy_version 27245 (0.0007) -[2026-06-07 02:23:56,356][324563] Updated weights for policy 0, policy_version 27255 (0.0006) -[2026-06-07 02:23:56,555][324563] Updated weights for policy 0, policy_version 27265 (0.0006) -[2026-06-07 02:23:56,768][324563] Updated weights for policy 0, policy_version 27275 (0.0006) -[2026-06-07 02:23:56,996][324563] Updated weights for policy 0, policy_version 27286 (0.0006) -[2026-06-07 02:23:57,041][324276] Saving new best policy, reward=839.919! -[2026-06-07 02:23:57,772][324563] Updated weights for policy 0, policy_version 27296 (0.0006) -[2026-06-07 02:23:57,973][324563] Updated weights for policy 0, policy_version 27306 (0.0006) -[2026-06-07 02:23:58,190][324563] Updated weights for policy 0, policy_version 27316 (0.0006) -[2026-06-07 02:23:58,394][324563] Updated weights for policy 0, policy_version 27326 (0.0007) -[2026-06-07 02:23:58,612][324563] Updated weights for policy 0, policy_version 27336 (0.0007) -[2026-06-07 02:23:58,827][324563] Updated weights for policy 0, policy_version 27346 (0.0007) -[2026-06-07 02:23:59,524][324563] Updated weights for policy 0, policy_version 27356 (0.0006) -[2026-06-07 02:23:59,728][324563] Updated weights for policy 0, policy_version 27366 (0.0006) -[2026-06-07 02:23:59,928][324563] Updated weights for policy 0, policy_version 27376 (0.0006) -[2026-06-07 02:24:00,129][324563] Updated weights for policy 0, policy_version 27386 (0.0006) -[2026-06-07 02:24:00,362][324563] Updated weights for policy 0, policy_version 27397 (0.0006) -[2026-06-07 02:24:00,569][324563] Updated weights for policy 0, policy_version 27407 (0.0007) -[2026-06-07 02:24:00,744][321791] Fps is (10 sec: 19660.9, 60 sec: 18022.4, 300 sec: 17661.4). Total num frames: 14057472. Throughput: 0: 17524.6. Samples: 14070272. Policy #0 lag: (min: 63.0, avg: 77.7, max: 127.0) -[2026-06-07 02:24:00,746][321791] Avg episode reward: [(0, '850.219')] -[2026-06-07 02:24:00,752][324276] Saving new best policy, reward=850.219! -[2026-06-07 02:24:01,323][324563] Updated weights for policy 0, policy_version 27417 (0.0007) -[2026-06-07 02:24:01,517][324563] Updated weights for policy 0, policy_version 27427 (0.0007) -[2026-06-07 02:24:01,737][324563] Updated weights for policy 0, policy_version 27437 (0.0008) -[2026-06-07 02:24:01,936][324563] Updated weights for policy 0, policy_version 27447 (0.0009) -[2026-06-07 02:24:02,169][324563] Updated weights for policy 0, policy_version 27458 (0.0008) -[2026-06-07 02:24:02,350][324563] Updated weights for policy 0, policy_version 27468 (0.0011) -[2026-06-07 02:24:02,538][324563] Updated weights for policy 0, policy_version 27478 (0.0011) -[2026-06-07 02:24:03,303][324563] Updated weights for policy 0, policy_version 27488 (0.0008) -[2026-06-07 02:24:03,506][324563] Updated weights for policy 0, policy_version 27498 (0.0006) -[2026-06-07 02:24:03,703][324563] Updated weights for policy 0, policy_version 27508 (0.0006) -[2026-06-07 02:24:03,902][324563] Updated weights for policy 0, policy_version 27518 (0.0006) -[2026-06-07 02:24:04,104][324563] Updated weights for policy 0, policy_version 27528 (0.0007) -[2026-06-07 02:24:04,311][324563] Updated weights for policy 0, policy_version 27538 (0.0006) -[2026-06-07 02:24:05,066][324563] Updated weights for policy 0, policy_version 27548 (0.0008) -[2026-06-07 02:24:05,274][324563] Updated weights for policy 0, policy_version 27558 (0.0010) -[2026-06-07 02:24:05,473][324563] Updated weights for policy 0, policy_version 27568 (0.0009) -[2026-06-07 02:24:05,682][324563] Updated weights for policy 0, policy_version 27578 (0.0011) -[2026-06-07 02:24:05,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 14123008. Throughput: 0: 17539.0. Samples: 14128768. Policy #0 lag: (min: 63.0, avg: 77.7, max: 127.0) -[2026-06-07 02:24:05,745][321791] Avg episode reward: [(0, '870.592')] -[2026-06-07 02:24:05,887][324563] Updated weights for policy 0, policy_version 27588 (0.0007) -[2026-06-07 02:24:06,082][324563] Updated weights for policy 0, policy_version 27598 (0.0006) -[2026-06-07 02:24:06,290][324276] Saving new best policy, reward=870.592! -[2026-06-07 02:24:06,293][324563] Updated weights for policy 0, policy_version 27608 (0.0006) -[2026-06-07 02:24:07,057][324563] Updated weights for policy 0, policy_version 27618 (0.0006) -[2026-06-07 02:24:07,238][324563] Updated weights for policy 0, policy_version 27628 (0.0006) -[2026-06-07 02:24:07,424][324563] Updated weights for policy 0, policy_version 27638 (0.0006) -[2026-06-07 02:24:07,635][324563] Updated weights for policy 0, policy_version 27648 (0.0006) -[2026-06-07 02:24:07,861][324563] Updated weights for policy 0, policy_version 27658 (0.0006) -[2026-06-07 02:24:08,074][324563] Updated weights for policy 0, policy_version 27668 (0.0006) -[2026-06-07 02:24:08,798][324563] Updated weights for policy 0, policy_version 27678 (0.0007) -[2026-06-07 02:24:09,006][324563] Updated weights for policy 0, policy_version 27688 (0.0007) -[2026-06-07 02:24:09,232][324563] Updated weights for policy 0, policy_version 27699 (0.0006) -[2026-06-07 02:24:09,460][324563] Updated weights for policy 0, policy_version 27709 (0.0006) -[2026-06-07 02:24:09,688][324563] Updated weights for policy 0, policy_version 27719 (0.0006) -[2026-06-07 02:24:09,917][324563] Updated weights for policy 0, policy_version 27729 (0.0007) -[2026-06-07 02:24:10,635][324563] Updated weights for policy 0, policy_version 27739 (0.0006) -[2026-06-07 02:24:10,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17661.4). Total num frames: 14221312. Throughput: 0: 17558.7. Samples: 14231296. Policy #0 lag: (min: 63.0, avg: 77.7, max: 127.0) -[2026-06-07 02:24:10,745][321791] Avg episode reward: [(0, '828.474')] -[2026-06-07 02:24:10,847][324563] Updated weights for policy 0, policy_version 27749 (0.0006) -[2026-06-07 02:24:11,047][324563] Updated weights for policy 0, policy_version 27759 (0.0006) -[2026-06-07 02:24:11,270][324563] Updated weights for policy 0, policy_version 27769 (0.0007) -[2026-06-07 02:24:11,467][324563] Updated weights for policy 0, policy_version 27779 (0.0007) -[2026-06-07 02:24:11,670][324563] Updated weights for policy 0, policy_version 27789 (0.0006) -[2026-06-07 02:24:11,900][324563] Updated weights for policy 0, policy_version 27799 (0.0007) -[2026-06-07 02:24:12,656][324563] Updated weights for policy 0, policy_version 27809 (0.0006) -[2026-06-07 02:24:12,881][324563] Updated weights for policy 0, policy_version 27820 (0.0006) -[2026-06-07 02:24:13,096][324563] Updated weights for policy 0, policy_version 27830 (0.0007) -[2026-06-07 02:24:13,321][324563] Updated weights for policy 0, policy_version 27840 (0.0006) -[2026-06-07 02:24:13,533][324563] Updated weights for policy 0, policy_version 27850 (0.0006) -[2026-06-07 02:24:13,731][324563] Updated weights for policy 0, policy_version 27860 (0.0007) -[2026-06-07 02:24:14,429][324563] Updated weights for policy 0, policy_version 27870 (0.0006) -[2026-06-07 02:24:14,640][324563] Updated weights for policy 0, policy_version 27880 (0.0007) -[2026-06-07 02:24:14,863][324563] Updated weights for policy 0, policy_version 27891 (0.0006) -[2026-06-07 02:24:15,078][324563] Updated weights for policy 0, policy_version 27901 (0.0007) -[2026-06-07 02:24:15,282][324563] Updated weights for policy 0, policy_version 27911 (0.0006) -[2026-06-07 02:24:15,485][324563] Updated weights for policy 0, policy_version 27921 (0.0006) -[2026-06-07 02:24:15,744][321791] Fps is (10 sec: 19660.9, 60 sec: 18022.4, 300 sec: 17661.4). Total num frames: 14319616. Throughput: 0: 17507.6. Samples: 14333056. Policy #0 lag: (min: 63.0, avg: 77.7, max: 127.0) -[2026-06-07 02:24:15,745][321791] Avg episode reward: [(0, '856.699')] -[2026-06-07 02:24:16,253][324563] Updated weights for policy 0, policy_version 27931 (0.0007) -[2026-06-07 02:24:16,468][324563] Updated weights for policy 0, policy_version 27941 (0.0007) -[2026-06-07 02:24:16,669][324563] Updated weights for policy 0, policy_version 27951 (0.0006) -[2026-06-07 02:24:16,864][324563] Updated weights for policy 0, policy_version 27961 (0.0006) -[2026-06-07 02:24:17,075][324563] Updated weights for policy 0, policy_version 27971 (0.0006) -[2026-06-07 02:24:17,295][324563] Updated weights for policy 0, policy_version 27981 (0.0007) -[2026-06-07 02:24:17,532][324563] Updated weights for policy 0, policy_version 27992 (0.0007) -[2026-06-07 02:24:18,268][324563] Updated weights for policy 0, policy_version 28002 (0.0006) -[2026-06-07 02:24:18,461][324563] Updated weights for policy 0, policy_version 28012 (0.0007) -[2026-06-07 02:24:18,690][324563] Updated weights for policy 0, policy_version 28023 (0.0007) -[2026-06-07 02:24:18,884][324563] Updated weights for policy 0, policy_version 28033 (0.0006) -[2026-06-07 02:24:19,103][324563] Updated weights for policy 0, policy_version 28043 (0.0007) -[2026-06-07 02:24:19,315][324563] Updated weights for policy 0, policy_version 28053 (0.0006) -[2026-06-07 02:24:20,079][324563] Updated weights for policy 0, policy_version 28063 (0.0007) -[2026-06-07 02:24:20,301][324563] Updated weights for policy 0, policy_version 28074 (0.0007) -[2026-06-07 02:24:20,502][324563] Updated weights for policy 0, policy_version 28084 (0.0006) -[2026-06-07 02:24:20,696][324563] Updated weights for policy 0, policy_version 28094 (0.0007) -[2026-06-07 02:24:20,744][321791] Fps is (10 sec: 16383.8, 60 sec: 17476.4, 300 sec: 17550.3). Total num frames: 14385152. Throughput: 0: 17536.0. Samples: 14392448. Policy #0 lag: (min: 63.0, avg: 77.9, max: 127.0) -[2026-06-07 02:24:20,745][321791] Avg episode reward: [(0, '854.988')] -[2026-06-07 02:24:20,921][324563] Updated weights for policy 0, policy_version 28104 (0.0006) -[2026-06-07 02:24:21,165][324563] Updated weights for policy 0, policy_version 28115 (0.0007) -[2026-06-07 02:24:21,880][324563] Updated weights for policy 0, policy_version 28125 (0.0007) -[2026-06-07 02:24:22,076][324563] Updated weights for policy 0, policy_version 28135 (0.0007) -[2026-06-07 02:24:22,278][324563] Updated weights for policy 0, policy_version 28145 (0.0006) -[2026-06-07 02:24:22,495][324563] Updated weights for policy 0, policy_version 28155 (0.0006) -[2026-06-07 02:24:22,720][324563] Updated weights for policy 0, policy_version 28165 (0.0006) -[2026-06-07 02:24:22,967][324563] Updated weights for policy 0, policy_version 28176 (0.0007) -[2026-06-07 02:24:23,692][324563] Updated weights for policy 0, policy_version 28186 (0.0006) -[2026-06-07 02:24:23,917][324563] Updated weights for policy 0, policy_version 28196 (0.0007) -[2026-06-07 02:24:24,114][324563] Updated weights for policy 0, policy_version 28206 (0.0006) -[2026-06-07 02:24:24,340][324563] Updated weights for policy 0, policy_version 28216 (0.0006) -[2026-06-07 02:24:24,558][324563] Updated weights for policy 0, policy_version 28226 (0.0006) -[2026-06-07 02:24:24,785][324563] Updated weights for policy 0, policy_version 28237 (0.0008) -[2026-06-07 02:24:25,006][324563] Updated weights for policy 0, policy_version 28247 (0.0008) -[2026-06-07 02:24:25,700][324563] Updated weights for policy 0, policy_version 28257 (0.0007) -[2026-06-07 02:24:25,744][321791] Fps is (10 sec: 16383.8, 60 sec: 17476.2, 300 sec: 17661.4). Total num frames: 14483456. Throughput: 0: 17499.2. Samples: 14493952. Policy #0 lag: (min: 63.0, avg: 77.9, max: 127.0) -[2026-06-07 02:24:25,745][321791] Avg episode reward: [(0, '870.014')] -[2026-06-07 02:24:25,949][324563] Updated weights for policy 0, policy_version 28268 (0.0006) -[2026-06-07 02:24:26,160][324563] Updated weights for policy 0, policy_version 28278 (0.0007) -[2026-06-07 02:24:26,366][324563] Updated weights for policy 0, policy_version 28288 (0.0007) -[2026-06-07 02:24:26,575][324563] Updated weights for policy 0, policy_version 28298 (0.0007) -[2026-06-07 02:24:26,794][324563] Updated weights for policy 0, policy_version 28309 (0.0007) -[2026-06-07 02:24:27,577][324563] Updated weights for policy 0, policy_version 28319 (0.0007) -[2026-06-07 02:24:27,812][324563] Updated weights for policy 0, policy_version 28330 (0.0006) -[2026-06-07 02:24:28,026][324563] Updated weights for policy 0, policy_version 28340 (0.0006) -[2026-06-07 02:24:28,227][324563] Updated weights for policy 0, policy_version 28350 (0.0007) -[2026-06-07 02:24:28,433][324563] Updated weights for policy 0, policy_version 28360 (0.0006) -[2026-06-07 02:24:28,664][324563] Updated weights for policy 0, policy_version 28371 (0.0007) -[2026-06-07 02:24:29,415][324563] Updated weights for policy 0, policy_version 28381 (0.0006) -[2026-06-07 02:24:29,625][324563] Updated weights for policy 0, policy_version 28391 (0.0006) -[2026-06-07 02:24:29,835][324563] Updated weights for policy 0, policy_version 28401 (0.0007) -[2026-06-07 02:24:30,039][324563] Updated weights for policy 0, policy_version 28411 (0.0006) -[2026-06-07 02:24:30,232][324563] Updated weights for policy 0, policy_version 28421 (0.0006) -[2026-06-07 02:24:30,432][324563] Updated weights for policy 0, policy_version 28431 (0.0007) -[2026-06-07 02:24:30,744][321791] Fps is (10 sec: 19661.1, 60 sec: 18022.4, 300 sec: 17661.4). Total num frames: 14581760. Throughput: 0: 17555.9. Samples: 14598016. Policy #0 lag: (min: 63.0, avg: 77.9, max: 127.0) -[2026-06-07 02:24:30,745][321791] Avg episode reward: [(0, '871.539')] -[2026-06-07 02:24:30,751][324276] Saving new best policy, reward=871.539! -[2026-06-07 02:24:31,186][324563] Updated weights for policy 0, policy_version 28441 (0.0006) -[2026-06-07 02:24:31,387][324563] Updated weights for policy 0, policy_version 28452 (0.0006) -[2026-06-07 02:24:31,600][324563] Updated weights for policy 0, policy_version 28462 (0.0007) -[2026-06-07 02:24:31,802][324563] Updated weights for policy 0, policy_version 28472 (0.0007) -[2026-06-07 02:24:31,981][324563] Updated weights for policy 0, policy_version 28482 (0.0006) -[2026-06-07 02:24:32,205][324563] Updated weights for policy 0, policy_version 28493 (0.0006) -[2026-06-07 02:24:32,420][324563] Updated weights for policy 0, policy_version 28503 (0.0006) -[2026-06-07 02:24:33,164][324563] Updated weights for policy 0, policy_version 28513 (0.0006) -[2026-06-07 02:24:33,374][324563] Updated weights for policy 0, policy_version 28523 (0.0006) -[2026-06-07 02:24:33,612][324563] Updated weights for policy 0, policy_version 28534 (0.0007) -[2026-06-07 02:24:33,806][324563] Updated weights for policy 0, policy_version 28544 (0.0006) -[2026-06-07 02:24:34,005][324563] Updated weights for policy 0, policy_version 28554 (0.0006) -[2026-06-07 02:24:34,232][324563] Updated weights for policy 0, policy_version 28565 (0.0006) -[2026-06-07 02:24:34,998][324563] Updated weights for policy 0, policy_version 28575 (0.0006) -[2026-06-07 02:24:35,217][324563] Updated weights for policy 0, policy_version 28585 (0.0006) -[2026-06-07 02:24:35,400][324563] Updated weights for policy 0, policy_version 28595 (0.0006) -[2026-06-07 02:24:35,604][324563] Updated weights for policy 0, policy_version 28605 (0.0006) -[2026-06-07 02:24:35,744][321791] Fps is (10 sec: 16384.2, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 14647296. Throughput: 0: 17555.9. Samples: 14656128. Policy #0 lag: (min: 63.0, avg: 77.9, max: 127.0) -[2026-06-07 02:24:35,745][321791] Avg episode reward: [(0, '876.080')] -[2026-06-07 02:24:35,806][324563] Updated weights for policy 0, policy_version 28615 (0.0006) -[2026-06-07 02:24:36,014][324563] Updated weights for policy 0, policy_version 28625 (0.0006) -[2026-06-07 02:24:36,153][324276] Saving new best policy, reward=876.080! -[2026-06-07 02:24:36,766][324563] Updated weights for policy 0, policy_version 28635 (0.0007) -[2026-06-07 02:24:36,950][324563] Updated weights for policy 0, policy_version 28645 (0.0011) -[2026-06-07 02:24:37,147][324563] Updated weights for policy 0, policy_version 28655 (0.0011) -[2026-06-07 02:24:37,381][324563] Updated weights for policy 0, policy_version 28665 (0.0011) -[2026-06-07 02:24:37,588][324563] Updated weights for policy 0, policy_version 28675 (0.0010) -[2026-06-07 02:24:37,805][324563] Updated weights for policy 0, policy_version 28685 (0.0006) -[2026-06-07 02:24:38,025][324563] Updated weights for policy 0, policy_version 28695 (0.0006) -[2026-06-07 02:24:38,717][324563] Updated weights for policy 0, policy_version 28705 (0.0007) -[2026-06-07 02:24:38,939][324563] Updated weights for policy 0, policy_version 28715 (0.0006) -[2026-06-07 02:24:39,155][324563] Updated weights for policy 0, policy_version 28725 (0.0006) -[2026-06-07 02:24:39,353][324563] Updated weights for policy 0, policy_version 28735 (0.0006) -[2026-06-07 02:24:39,555][324563] Updated weights for policy 0, policy_version 28745 (0.0006) -[2026-06-07 02:24:39,762][324563] Updated weights for policy 0, policy_version 28755 (0.0006) -[2026-06-07 02:24:40,518][324563] Updated weights for policy 0, policy_version 28765 (0.0007) -[2026-06-07 02:24:40,712][324563] Updated weights for policy 0, policy_version 28775 (0.0006) -[2026-06-07 02:24:40,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.5, 300 sec: 17661.4). Total num frames: 14745600. Throughput: 0: 17575.8. Samples: 14759296. Policy #0 lag: (min: 31.0, avg: 46.8, max: 95.0) -[2026-06-07 02:24:40,745][321791] Avg episode reward: [(0, '895.186')] -[2026-06-07 02:24:40,921][324563] Updated weights for policy 0, policy_version 28785 (0.0008) -[2026-06-07 02:24:41,121][324563] Updated weights for policy 0, policy_version 28795 (0.0011) -[2026-06-07 02:24:41,333][324563] Updated weights for policy 0, policy_version 28805 (0.0009) -[2026-06-07 02:24:41,545][324563] Updated weights for policy 0, policy_version 28815 (0.0011) -[2026-06-07 02:24:41,719][324276] Saving new best policy, reward=895.186! -[2026-06-07 02:24:42,298][324563] Updated weights for policy 0, policy_version 28825 (0.0011) -[2026-06-07 02:24:42,498][324563] Updated weights for policy 0, policy_version 28835 (0.0006) -[2026-06-07 02:24:42,705][324563] Updated weights for policy 0, policy_version 28845 (0.0006) -[2026-06-07 02:24:42,905][324563] Updated weights for policy 0, policy_version 28855 (0.0007) -[2026-06-07 02:24:43,109][324563] Updated weights for policy 0, policy_version 28865 (0.0006) -[2026-06-07 02:24:43,323][324563] Updated weights for policy 0, policy_version 28875 (0.0007) -[2026-06-07 02:24:43,540][324563] Updated weights for policy 0, policy_version 28885 (0.0006) -[2026-06-07 02:24:44,277][324563] Updated weights for policy 0, policy_version 28895 (0.0006) -[2026-06-07 02:24:44,486][324563] Updated weights for policy 0, policy_version 28905 (0.0006) -[2026-06-07 02:24:44,678][324563] Updated weights for policy 0, policy_version 28915 (0.0006) -[2026-06-07 02:24:44,862][324563] Updated weights for policy 0, policy_version 28925 (0.0006) -[2026-06-07 02:24:45,063][324563] Updated weights for policy 0, policy_version 28935 (0.0006) -[2026-06-07 02:24:45,252][324563] Updated weights for policy 0, policy_version 28945 (0.0006) -[2026-06-07 02:24:45,744][321791] Fps is (10 sec: 19660.7, 60 sec: 18022.4, 300 sec: 17661.4). Total num frames: 14843904. Throughput: 0: 17701.0. Samples: 14866816. Policy #0 lag: (min: 31.0, avg: 46.8, max: 95.0) -[2026-06-07 02:24:45,745][321791] Avg episode reward: [(0, '890.433')] -[2026-06-07 02:24:46,035][324563] Updated weights for policy 0, policy_version 28955 (0.0007) -[2026-06-07 02:24:46,232][324563] Updated weights for policy 0, policy_version 28965 (0.0007) -[2026-06-07 02:24:46,435][324563] Updated weights for policy 0, policy_version 28975 (0.0006) -[2026-06-07 02:24:46,632][324563] Updated weights for policy 0, policy_version 28985 (0.0006) -[2026-06-07 02:24:46,852][324563] Updated weights for policy 0, policy_version 28996 (0.0006) -[2026-06-07 02:24:47,042][324563] Updated weights for policy 0, policy_version 29006 (0.0006) -[2026-06-07 02:24:47,276][324563] Updated weights for policy 0, policy_version 29016 (0.0007) -[2026-06-07 02:24:48,042][324563] Updated weights for policy 0, policy_version 29026 (0.0007) -[2026-06-07 02:24:48,245][324563] Updated weights for policy 0, policy_version 29036 (0.0009) -[2026-06-07 02:24:48,475][324563] Updated weights for policy 0, policy_version 29047 (0.0011) -[2026-06-07 02:24:48,729][324563] Updated weights for policy 0, policy_version 29059 (0.0010) -[2026-06-07 02:24:48,930][324563] Updated weights for policy 0, policy_version 29069 (0.0009) -[2026-06-07 02:24:49,127][324563] Updated weights for policy 0, policy_version 29079 (0.0006) -[2026-06-07 02:24:49,887][324563] Updated weights for policy 0, policy_version 29090 (0.0006) -[2026-06-07 02:24:50,100][324563] Updated weights for policy 0, policy_version 29100 (0.0006) -[2026-06-07 02:24:50,301][324563] Updated weights for policy 0, policy_version 29110 (0.0006) -[2026-06-07 02:24:50,494][324563] Updated weights for policy 0, policy_version 29120 (0.0006) -[2026-06-07 02:24:50,703][324563] Updated weights for policy 0, policy_version 29130 (0.0006) -[2026-06-07 02:24:50,744][321791] Fps is (10 sec: 16383.7, 60 sec: 17476.2, 300 sec: 17550.3). Total num frames: 14909440. Throughput: 0: 17649.7. Samples: 14923008. Policy #0 lag: (min: 31.0, avg: 46.8, max: 95.0) -[2026-06-07 02:24:50,745][321791] Avg episode reward: [(0, '927.271')] -[2026-06-07 02:24:50,920][324563] Updated weights for policy 0, policy_version 29140 (0.0007) -[2026-06-07 02:24:51,005][324276] Saving new best policy, reward=927.271! -[2026-06-07 02:24:51,704][324563] Updated weights for policy 0, policy_version 29150 (0.0006) -[2026-06-07 02:24:51,907][324563] Updated weights for policy 0, policy_version 29160 (0.0006) -[2026-06-07 02:24:52,107][324563] Updated weights for policy 0, policy_version 29170 (0.0007) -[2026-06-07 02:24:52,347][324563] Updated weights for policy 0, policy_version 29180 (0.0007) -[2026-06-07 02:24:52,551][324563] Updated weights for policy 0, policy_version 29190 (0.0007) -[2026-06-07 02:24:52,790][324563] Updated weights for policy 0, policy_version 29201 (0.0007) -[2026-06-07 02:24:53,528][324563] Updated weights for policy 0, policy_version 29211 (0.0006) -[2026-06-07 02:24:53,736][324563] Updated weights for policy 0, policy_version 29221 (0.0006) -[2026-06-07 02:24:53,973][324563] Updated weights for policy 0, policy_version 29232 (0.0006) -[2026-06-07 02:24:54,182][324563] Updated weights for policy 0, policy_version 29242 (0.0006) -[2026-06-07 02:24:54,371][324563] Updated weights for policy 0, policy_version 29252 (0.0006) -[2026-06-07 02:24:54,570][324563] Updated weights for policy 0, policy_version 29262 (0.0006) -[2026-06-07 02:24:54,770][324563] Updated weights for policy 0, policy_version 29272 (0.0006) -[2026-06-07 02:24:55,524][324563] Updated weights for policy 0, policy_version 29282 (0.0007) -[2026-06-07 02:24:55,732][324563] Updated weights for policy 0, policy_version 29292 (0.0007) -[2026-06-07 02:24:55,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17661.4). Total num frames: 15007744. Throughput: 0: 17661.2. Samples: 15026048. Policy #0 lag: (min: 31.0, avg: 46.8, max: 95.0) -[2026-06-07 02:24:55,745][321791] Avg episode reward: [(0, '931.170')] -[2026-06-07 02:24:55,923][324563] Updated weights for policy 0, policy_version 29302 (0.0006) -[2026-06-07 02:24:56,141][324563] Updated weights for policy 0, policy_version 29313 (0.0006) -[2026-06-07 02:24:56,345][324563] Updated weights for policy 0, policy_version 29323 (0.0006) -[2026-06-07 02:24:56,549][324563] Updated weights for policy 0, policy_version 29333 (0.0006) -[2026-06-07 02:24:56,602][324276] Saving new best policy, reward=931.170! -[2026-06-07 02:24:57,338][324563] Updated weights for policy 0, policy_version 29343 (0.0008) -[2026-06-07 02:24:57,537][324563] Updated weights for policy 0, policy_version 29353 (0.0006) -[2026-06-07 02:24:57,774][324563] Updated weights for policy 0, policy_version 29364 (0.0006) -[2026-06-07 02:24:57,977][324563] Updated weights for policy 0, policy_version 29374 (0.0006) -[2026-06-07 02:24:58,171][324563] Updated weights for policy 0, policy_version 29384 (0.0006) -[2026-06-07 02:24:58,383][324563] Updated weights for policy 0, policy_version 29394 (0.0007) -[2026-06-07 02:24:59,132][324563] Updated weights for policy 0, policy_version 29404 (0.0006) -[2026-06-07 02:24:59,348][324563] Updated weights for policy 0, policy_version 29415 (0.0007) -[2026-06-07 02:24:59,572][324563] Updated weights for policy 0, policy_version 29425 (0.0006) -[2026-06-07 02:24:59,767][324563] Updated weights for policy 0, policy_version 29435 (0.0006) -[2026-06-07 02:24:59,968][324563] Updated weights for policy 0, policy_version 29445 (0.0007) -[2026-06-07 02:25:00,171][324563] Updated weights for policy 0, policy_version 29455 (0.0006) -[2026-06-07 02:25:00,744][321791] Fps is (10 sec: 19661.2, 60 sec: 17476.3, 300 sec: 17661.4). Total num frames: 15106048. Throughput: 0: 17814.8. Samples: 15134720. Policy #0 lag: (min: 10.0, avg: 50.0, max: 74.0) -[2026-06-07 02:25:00,745][321791] Avg episode reward: [(0, '936.201')] -[2026-06-07 02:25:00,751][324276] Saving new best policy, reward=936.201! -[2026-06-07 02:25:00,893][324563] Updated weights for policy 0, policy_version 29465 (0.0007) -[2026-06-07 02:25:01,114][324563] Updated weights for policy 0, policy_version 29475 (0.0007) -[2026-06-07 02:25:01,292][324563] Updated weights for policy 0, policy_version 29485 (0.0008) -[2026-06-07 02:25:01,504][324563] Updated weights for policy 0, policy_version 29495 (0.0007) -[2026-06-07 02:25:01,726][324563] Updated weights for policy 0, policy_version 29505 (0.0006) -[2026-06-07 02:25:01,950][324563] Updated weights for policy 0, policy_version 29515 (0.0007) -[2026-06-07 02:25:02,139][324563] Updated weights for policy 0, policy_version 29525 (0.0006) -[2026-06-07 02:25:02,847][324563] Updated weights for policy 0, policy_version 29535 (0.0006) -[2026-06-07 02:25:03,054][324563] Updated weights for policy 0, policy_version 29545 (0.0007) -[2026-06-07 02:25:03,249][324563] Updated weights for policy 0, policy_version 29555 (0.0007) -[2026-06-07 02:25:03,442][324563] Updated weights for policy 0, policy_version 29565 (0.0006) -[2026-06-07 02:25:03,632][324563] Updated weights for policy 0, policy_version 29575 (0.0006) -[2026-06-07 02:25:03,846][324563] Updated weights for policy 0, policy_version 29585 (0.0006) -[2026-06-07 02:25:04,614][324563] Updated weights for policy 0, policy_version 29595 (0.0007) -[2026-06-07 02:25:04,826][324563] Updated weights for policy 0, policy_version 29605 (0.0008) -[2026-06-07 02:25:05,031][324563] Updated weights for policy 0, policy_version 29615 (0.0011) -[2026-06-07 02:25:05,233][324563] Updated weights for policy 0, policy_version 29625 (0.0011) -[2026-06-07 02:25:05,434][324563] Updated weights for policy 0, policy_version 29635 (0.0006) -[2026-06-07 02:25:05,643][324563] Updated weights for policy 0, policy_version 29645 (0.0007) -[2026-06-07 02:25:05,744][321791] Fps is (10 sec: 16383.9, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 15171584. Throughput: 0: 17669.7. Samples: 15187584. Policy #0 lag: (min: 10.0, avg: 50.0, max: 74.0) -[2026-06-07 02:25:05,745][321791] Avg episode reward: [(0, '926.702')] -[2026-06-07 02:25:05,887][324563] Updated weights for policy 0, policy_version 29655 (0.0007) -[2026-06-07 02:25:06,596][324563] Updated weights for policy 0, policy_version 29665 (0.0007) -[2026-06-07 02:25:06,803][324563] Updated weights for policy 0, policy_version 29675 (0.0006) -[2026-06-07 02:25:06,995][324563] Updated weights for policy 0, policy_version 29685 (0.0007) -[2026-06-07 02:25:07,194][324563] Updated weights for policy 0, policy_version 29695 (0.0007) -[2026-06-07 02:25:07,406][324563] Updated weights for policy 0, policy_version 29705 (0.0009) -[2026-06-07 02:25:07,625][324563] Updated weights for policy 0, policy_version 29715 (0.0008) -[2026-06-07 02:25:08,364][324563] Updated weights for policy 0, policy_version 29725 (0.0006) -[2026-06-07 02:25:08,573][324563] Updated weights for policy 0, policy_version 29735 (0.0006) -[2026-06-07 02:25:08,808][324563] Updated weights for policy 0, policy_version 29746 (0.0007) -[2026-06-07 02:25:09,005][324563] Updated weights for policy 0, policy_version 29756 (0.0006) -[2026-06-07 02:25:09,204][324563] Updated weights for policy 0, policy_version 29766 (0.0007) -[2026-06-07 02:25:09,401][324563] Updated weights for policy 0, policy_version 29776 (0.0006) -[2026-06-07 02:25:10,161][324563] Updated weights for policy 0, policy_version 29786 (0.0006) -[2026-06-07 02:25:10,382][324563] Updated weights for policy 0, policy_version 29796 (0.0006) -[2026-06-07 02:25:10,589][324563] Updated weights for policy 0, policy_version 29806 (0.0007) -[2026-06-07 02:25:10,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17661.4). Total num frames: 15269888. Throughput: 0: 17683.9. Samples: 15289728. Policy #0 lag: (min: 10.0, avg: 50.0, max: 74.0) -[2026-06-07 02:25:10,745][321791] Avg episode reward: [(0, '978.034')] -[2026-06-07 02:25:10,789][324563] Updated weights for policy 0, policy_version 29816 (0.0006) -[2026-06-07 02:25:11,021][324563] Updated weights for policy 0, policy_version 29826 (0.0006) -[2026-06-07 02:25:11,223][324563] Updated weights for policy 0, policy_version 29836 (0.0006) -[2026-06-07 02:25:11,423][324563] Updated weights for policy 0, policy_version 29846 (0.0006) -[2026-06-07 02:25:11,457][324276] Saving new best policy, reward=978.034! -[2026-06-07 02:25:12,155][324563] Updated weights for policy 0, policy_version 29856 (0.0007) -[2026-06-07 02:25:12,364][324563] Updated weights for policy 0, policy_version 29866 (0.0006) -[2026-06-07 02:25:12,562][324563] Updated weights for policy 0, policy_version 29877 (0.0006) -[2026-06-07 02:25:12,787][324563] Updated weights for policy 0, policy_version 29888 (0.0006) -[2026-06-07 02:25:12,997][324563] Updated weights for policy 0, policy_version 29898 (0.0007) -[2026-06-07 02:25:13,233][324563] Updated weights for policy 0, policy_version 29908 (0.0007) -[2026-06-07 02:25:13,977][324563] Updated weights for policy 0, policy_version 29918 (0.0010) -[2026-06-07 02:25:14,191][324563] Updated weights for policy 0, policy_version 29928 (0.0011) -[2026-06-07 02:25:14,414][324563] Updated weights for policy 0, policy_version 29939 (0.0010) -[2026-06-07 02:25:14,624][324563] Updated weights for policy 0, policy_version 29949 (0.0011) -[2026-06-07 02:25:14,844][324563] Updated weights for policy 0, policy_version 29960 (0.0011) -[2026-06-07 02:25:15,056][324563] Updated weights for policy 0, policy_version 29970 (0.0008) -[2026-06-07 02:25:15,744][321791] Fps is (10 sec: 19660.8, 60 sec: 17476.2, 300 sec: 17661.4). Total num frames: 15368192. Throughput: 0: 17863.1. Samples: 15401856. Policy #0 lag: (min: 10.0, avg: 50.0, max: 74.0) -[2026-06-07 02:25:15,745][321791] Avg episode reward: [(0, '1003.133')] -[2026-06-07 02:25:15,817][324563] Updated weights for policy 0, policy_version 29980 (0.0008) -[2026-06-07 02:25:16,021][324563] Updated weights for policy 0, policy_version 29990 (0.0011) -[2026-06-07 02:25:16,240][324563] Updated weights for policy 0, policy_version 30000 (0.0011) -[2026-06-07 02:25:16,473][324563] Updated weights for policy 0, policy_version 30011 (0.0008) -[2026-06-07 02:25:16,702][324563] Updated weights for policy 0, policy_version 30022 (0.0007) -[2026-06-07 02:25:16,906][324563] Updated weights for policy 0, policy_version 30032 (0.0007) -[2026-06-07 02:25:17,071][324276] Saving new best policy, reward=1003.133! -[2026-06-07 02:25:17,650][324563] Updated weights for policy 0, policy_version 30042 (0.0007) -[2026-06-07 02:25:17,849][324563] Updated weights for policy 0, policy_version 30052 (0.0007) -[2026-06-07 02:25:18,085][324563] Updated weights for policy 0, policy_version 30063 (0.0007) -[2026-06-07 02:25:18,285][324563] Updated weights for policy 0, policy_version 30073 (0.0007) -[2026-06-07 02:25:18,490][324563] Updated weights for policy 0, policy_version 30083 (0.0007) -[2026-06-07 02:25:18,707][324563] Updated weights for policy 0, policy_version 30093 (0.0007) -[2026-06-07 02:25:18,901][324563] Updated weights for policy 0, policy_version 30103 (0.0006) -[2026-06-07 02:25:19,658][324563] Updated weights for policy 0, policy_version 30114 (0.0006) -[2026-06-07 02:25:19,841][324563] Updated weights for policy 0, policy_version 30124 (0.0006) -[2026-06-07 02:25:20,058][324563] Updated weights for policy 0, policy_version 30134 (0.0008) -[2026-06-07 02:25:20,261][324563] Updated weights for policy 0, policy_version 30144 (0.0007) -[2026-06-07 02:25:20,487][324563] Updated weights for policy 0, policy_version 30154 (0.0007) -[2026-06-07 02:25:20,668][324563] Updated weights for policy 0, policy_version 30164 (0.0007) -[2026-06-07 02:25:20,744][321791] Fps is (10 sec: 16383.6, 60 sec: 17476.2, 300 sec: 17550.3). Total num frames: 15433728. Throughput: 0: 17655.3. Samples: 15450624. Policy #0 lag: (min: 37.0, avg: 52.7, max: 101.0) -[2026-06-07 02:25:20,746][321791] Avg episode reward: [(0, '1004.757')] -[2026-06-07 02:25:20,752][324276] Saving new best policy, reward=1004.757! -[2026-06-07 02:25:21,442][324563] Updated weights for policy 0, policy_version 30174 (0.0007) -[2026-06-07 02:25:21,661][324563] Updated weights for policy 0, policy_version 30184 (0.0007) -[2026-06-07 02:25:21,859][324563] Updated weights for policy 0, policy_version 30194 (0.0006) -[2026-06-07 02:25:22,048][324563] Updated weights for policy 0, policy_version 30204 (0.0006) -[2026-06-07 02:25:22,263][324563] Updated weights for policy 0, policy_version 30214 (0.0006) -[2026-06-07 02:25:22,466][324563] Updated weights for policy 0, policy_version 30224 (0.0007) -[2026-06-07 02:25:23,222][324563] Updated weights for policy 0, policy_version 30234 (0.0006) -[2026-06-07 02:25:23,427][324563] Updated weights for policy 0, policy_version 30244 (0.0011) -[2026-06-07 02:25:23,651][324563] Updated weights for policy 0, policy_version 30254 (0.0010) -[2026-06-07 02:25:23,850][324563] Updated weights for policy 0, policy_version 30264 (0.0011) -[2026-06-07 02:25:24,059][324563] Updated weights for policy 0, policy_version 30274 (0.0011) -[2026-06-07 02:25:24,242][324563] Updated weights for policy 0, policy_version 30284 (0.0011) -[2026-06-07 02:25:24,449][324563] Updated weights for policy 0, policy_version 30294 (0.0010) -[2026-06-07 02:25:25,194][324563] Updated weights for policy 0, policy_version 30304 (0.0007) -[2026-06-07 02:25:25,404][324563] Updated weights for policy 0, policy_version 30314 (0.0006) -[2026-06-07 02:25:25,619][324563] Updated weights for policy 0, policy_version 30324 (0.0006) -[2026-06-07 02:25:25,744][321791] Fps is (10 sec: 16383.7, 60 sec: 17476.2, 300 sec: 17661.4). Total num frames: 15532032. Throughput: 0: 17681.0. Samples: 15554944. Policy #0 lag: (min: 37.0, avg: 52.7, max: 101.0) -[2026-06-07 02:25:25,746][321791] Avg episode reward: [(0, '1030.636')] -[2026-06-07 02:25:25,819][324563] Updated weights for policy 0, policy_version 30334 (0.0006) -[2026-06-07 02:25:26,023][324563] Updated weights for policy 0, policy_version 30344 (0.0006) -[2026-06-07 02:25:26,213][324563] Updated weights for policy 0, policy_version 30354 (0.0006) -[2026-06-07 02:25:26,337][324276] Saving new best policy, reward=1030.636! -[2026-06-07 02:25:27,000][324563] Updated weights for policy 0, policy_version 30364 (0.0006) -[2026-06-07 02:25:27,208][324563] Updated weights for policy 0, policy_version 30374 (0.0007) -[2026-06-07 02:25:27,406][324563] Updated weights for policy 0, policy_version 30384 (0.0007) -[2026-06-07 02:25:27,618][324563] Updated weights for policy 0, policy_version 30395 (0.0006) -[2026-06-07 02:25:27,855][324563] Updated weights for policy 0, policy_version 30405 (0.0006) -[2026-06-07 02:25:28,070][324563] Updated weights for policy 0, policy_version 30416 (0.0006) -[2026-06-07 02:25:28,840][324563] Updated weights for policy 0, policy_version 30426 (0.0006) -[2026-06-07 02:25:29,046][324563] Updated weights for policy 0, policy_version 30436 (0.0006) -[2026-06-07 02:25:29,229][324563] Updated weights for policy 0, policy_version 30446 (0.0006) -[2026-06-07 02:25:29,432][324563] Updated weights for policy 0, policy_version 30456 (0.0006) -[2026-06-07 02:25:29,645][324563] Updated weights for policy 0, policy_version 30466 (0.0006) -[2026-06-07 02:25:29,842][324563] Updated weights for policy 0, policy_version 30476 (0.0007) -[2026-06-07 02:25:30,050][324563] Updated weights for policy 0, policy_version 30486 (0.0010) -[2026-06-07 02:25:30,744][321791] Fps is (10 sec: 19661.1, 60 sec: 17476.2, 300 sec: 17661.4). Total num frames: 15630336. Throughput: 0: 17902.9. Samples: 15672448. Policy #0 lag: (min: 37.0, avg: 52.7, max: 101.0) -[2026-06-07 02:25:30,745][321791] Avg episode reward: [(0, '1095.433')] -[2026-06-07 02:25:30,854][324563] Updated weights for policy 0, policy_version 30496 (0.0006) -[2026-06-07 02:25:31,055][324563] Updated weights for policy 0, policy_version 30506 (0.0007) -[2026-06-07 02:25:31,250][324563] Updated weights for policy 0, policy_version 30516 (0.0006) -[2026-06-07 02:25:31,479][324563] Updated weights for policy 0, policy_version 30526 (0.0007) -[2026-06-07 02:25:31,699][324563] Updated weights for policy 0, policy_version 30536 (0.0006) -[2026-06-07 02:25:31,893][324563] Updated weights for policy 0, policy_version 30546 (0.0007) -[2026-06-07 02:25:32,015][324276] Saving new best policy, reward=1095.433! -[2026-06-07 02:25:32,627][324563] Updated weights for policy 0, policy_version 30556 (0.0006) -[2026-06-07 02:25:32,826][324563] Updated weights for policy 0, policy_version 30566 (0.0006) -[2026-06-07 02:25:33,036][324563] Updated weights for policy 0, policy_version 30576 (0.0010) -[2026-06-07 02:25:33,256][324563] Updated weights for policy 0, policy_version 30586 (0.0011) -[2026-06-07 02:25:33,483][324563] Updated weights for policy 0, policy_version 30596 (0.0010) -[2026-06-07 02:25:33,695][324563] Updated weights for policy 0, policy_version 30606 (0.0006) -[2026-06-07 02:25:33,907][324563] Updated weights for policy 0, policy_version 30616 (0.0007) -[2026-06-07 02:25:34,664][324563] Updated weights for policy 0, policy_version 30626 (0.0011) -[2026-06-07 02:25:34,862][324563] Updated weights for policy 0, policy_version 30636 (0.0011) -[2026-06-07 02:25:35,095][324563] Updated weights for policy 0, policy_version 30647 (0.0011) -[2026-06-07 02:25:35,291][324563] Updated weights for policy 0, policy_version 30657 (0.0009) -[2026-06-07 02:25:35,529][324563] Updated weights for policy 0, policy_version 30668 (0.0007) -[2026-06-07 02:25:35,734][324563] Updated weights for policy 0, policy_version 30678 (0.0006) -[2026-06-07 02:25:35,744][321791] Fps is (10 sec: 16384.3, 60 sec: 17476.2, 300 sec: 17550.3). Total num frames: 15695872. Throughput: 0: 17684.0. Samples: 15718784. Policy #0 lag: (min: 37.0, avg: 52.7, max: 101.0) -[2026-06-07 02:25:35,745][321791] Avg episode reward: [(0, '1110.031')] -[2026-06-07 02:25:35,769][324276] Saving new best policy, reward=1110.031! -[2026-06-07 02:25:36,495][324563] Updated weights for policy 0, policy_version 30688 (0.0010) -[2026-06-07 02:25:36,703][324563] Updated weights for policy 0, policy_version 30698 (0.0011) -[2026-06-07 02:25:36,903][324563] Updated weights for policy 0, policy_version 30708 (0.0011) -[2026-06-07 02:25:37,111][324563] Updated weights for policy 0, policy_version 30718 (0.0011) -[2026-06-07 02:25:37,308][324563] Updated weights for policy 0, policy_version 30728 (0.0013) -[2026-06-07 02:25:37,523][324563] Updated weights for policy 0, policy_version 30739 (0.0011) -[2026-06-07 02:25:38,281][324563] Updated weights for policy 0, policy_version 30749 (0.0009) -[2026-06-07 02:25:38,498][324563] Updated weights for policy 0, policy_version 30759 (0.0006) -[2026-06-07 02:25:38,679][324563] Updated weights for policy 0, policy_version 30769 (0.0007) -[2026-06-07 02:25:38,888][324563] Updated weights for policy 0, policy_version 30779 (0.0007) -[2026-06-07 02:25:39,074][324563] Updated weights for policy 0, policy_version 30789 (0.0008) -[2026-06-07 02:25:39,277][324563] Updated weights for policy 0, policy_version 30799 (0.0011) -[2026-06-07 02:25:40,050][324563] Updated weights for policy 0, policy_version 30809 (0.0011) -[2026-06-07 02:25:40,241][324563] Updated weights for policy 0, policy_version 30819 (0.0011) -[2026-06-07 02:25:40,514][324563] Updated weights for policy 0, policy_version 30832 (0.0011) -[2026-06-07 02:25:40,717][324563] Updated weights for policy 0, policy_version 30842 (0.0011) -[2026-06-07 02:25:40,744][321791] Fps is (10 sec: 16384.2, 60 sec: 17476.3, 300 sec: 17661.4). Total num frames: 15794176. Throughput: 0: 17686.8. Samples: 15821952. Policy #0 lag: (min: 37.0, avg: 52.7, max: 101.0) -[2026-06-07 02:25:40,745][321791] Avg episode reward: [(0, '1119.827')] -[2026-06-07 02:25:40,923][324563] Updated weights for policy 0, policy_version 30853 (0.0011) -[2026-06-07 02:25:41,137][324563] Updated weights for policy 0, policy_version 30863 (0.0011) -[2026-06-07 02:25:41,325][324276] Saving new best policy, reward=1119.827! -[2026-06-07 02:25:41,918][324563] Updated weights for policy 0, policy_version 30873 (0.0011) -[2026-06-07 02:25:42,119][324563] Updated weights for policy 0, policy_version 30883 (0.0010) -[2026-06-07 02:25:42,335][324563] Updated weights for policy 0, policy_version 30894 (0.0011) -[2026-06-07 02:25:42,536][324563] Updated weights for policy 0, policy_version 30904 (0.0010) -[2026-06-07 02:25:42,744][324563] Updated weights for policy 0, policy_version 30914 (0.0011) -[2026-06-07 02:25:42,948][324563] Updated weights for policy 0, policy_version 30924 (0.0011) -[2026-06-07 02:25:43,161][324563] Updated weights for policy 0, policy_version 30934 (0.0011) -[2026-06-07 02:25:43,966][324563] Updated weights for policy 0, policy_version 30944 (0.0011) -[2026-06-07 02:25:44,174][324563] Updated weights for policy 0, policy_version 30954 (0.0011) -[2026-06-07 02:25:44,379][324563] Updated weights for policy 0, policy_version 30964 (0.0011) -[2026-06-07 02:25:44,604][324563] Updated weights for policy 0, policy_version 30974 (0.0011) -[2026-06-07 02:25:44,798][324563] Updated weights for policy 0, policy_version 30984 (0.0011) -[2026-06-07 02:25:45,035][324563] Updated weights for policy 0, policy_version 30995 (0.0011) -[2026-06-07 02:25:45,744][321791] Fps is (10 sec: 19660.9, 60 sec: 17476.3, 300 sec: 17661.4). Total num frames: 15892480. Throughput: 0: 17814.8. Samples: 15936384. Policy #0 lag: (min: 61.0, avg: 75.5, max: 125.0) -[2026-06-07 02:25:45,745][321791] Avg episode reward: [(0, '1091.685')] -[2026-06-07 02:25:45,815][324563] Updated weights for policy 0, policy_version 31005 (0.0011) -[2026-06-07 02:25:46,015][324563] Updated weights for policy 0, policy_version 31015 (0.0011) -[2026-06-07 02:25:46,247][324563] Updated weights for policy 0, policy_version 31026 (0.0011) -[2026-06-07 02:25:46,483][324563] Updated weights for policy 0, policy_version 31037 (0.0011) -[2026-06-07 02:25:46,678][324563] Updated weights for policy 0, policy_version 31047 (0.0011) -[2026-06-07 02:25:46,871][324563] Updated weights for policy 0, policy_version 31057 (0.0011) -[2026-06-07 02:25:47,619][324563] Updated weights for policy 0, policy_version 31067 (0.0011) -[2026-06-07 02:25:47,822][324563] Updated weights for policy 0, policy_version 31077 (0.0011) -[2026-06-07 02:25:48,040][324563] Updated weights for policy 0, policy_version 31087 (0.0011) -[2026-06-07 02:25:48,241][324563] Updated weights for policy 0, policy_version 31097 (0.0011) -[2026-06-07 02:25:48,447][324563] Updated weights for policy 0, policy_version 31107 (0.0011) -[2026-06-07 02:25:48,667][324563] Updated weights for policy 0, policy_version 31118 (0.0010) -[2026-06-07 02:25:48,854][324563] Updated weights for policy 0, policy_version 31128 (0.0011) -[2026-06-07 02:25:49,621][324563] Updated weights for policy 0, policy_version 31138 (0.0007) -[2026-06-07 02:25:49,816][324563] Updated weights for policy 0, policy_version 31148 (0.0006) -[2026-06-07 02:25:50,010][324563] Updated weights for policy 0, policy_version 31158 (0.0007) -[2026-06-07 02:25:50,222][324563] Updated weights for policy 0, policy_version 31168 (0.0007) -[2026-06-07 02:25:50,437][324563] Updated weights for policy 0, policy_version 31178 (0.0006) -[2026-06-07 02:25:50,676][324563] Updated weights for policy 0, policy_version 31188 (0.0007) -[2026-06-07 02:25:50,744][321791] Fps is (10 sec: 19660.7, 60 sec: 18022.5, 300 sec: 17661.4). Total num frames: 15990784. Throughput: 0: 17703.8. Samples: 15984256. Policy #0 lag: (min: 61.0, avg: 75.5, max: 125.0) -[2026-06-07 02:25:50,745][321791] Avg episode reward: [(0, '1095.691')] -[2026-06-07 02:25:51,411][324563] Updated weights for policy 0, policy_version 31198 (0.0009) -[2026-06-07 02:25:51,629][324563] Updated weights for policy 0, policy_version 31208 (0.0010) -[2026-06-07 02:25:51,834][324563] Updated weights for policy 0, policy_version 31218 (0.0006) -[2026-06-07 02:25:52,016][324563] Updated weights for policy 0, policy_version 31228 (0.0006) -[2026-06-07 02:25:52,219][324563] Updated weights for policy 0, policy_version 31238 (0.0006) -[2026-06-07 02:25:52,465][324563] Updated weights for policy 0, policy_version 31250 (0.0006) -[2026-06-07 02:25:53,226][324563] Updated weights for policy 0, policy_version 31260 (0.0006) -[2026-06-07 02:25:53,411][324563] Updated weights for policy 0, policy_version 31270 (0.0006) -[2026-06-07 02:25:53,603][324563] Updated weights for policy 0, policy_version 31280 (0.0006) -[2026-06-07 02:25:53,805][324563] Updated weights for policy 0, policy_version 31290 (0.0006) -[2026-06-07 02:25:54,040][324563] Updated weights for policy 0, policy_version 31301 (0.0006) -[2026-06-07 02:25:54,255][324563] Updated weights for policy 0, policy_version 31311 (0.0007) -[2026-06-07 02:25:55,005][324563] Updated weights for policy 0, policy_version 31321 (0.0006) -[2026-06-07 02:25:55,216][324563] Updated weights for policy 0, policy_version 31331 (0.0006) -[2026-06-07 02:25:55,425][324563] Updated weights for policy 0, policy_version 31342 (0.0006) -[2026-06-07 02:25:55,673][324563] Updated weights for policy 0, policy_version 31353 (0.0006) -[2026-06-07 02:25:55,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17661.4). Total num frames: 16056320. Throughput: 0: 17769.3. Samples: 16089344. Policy #0 lag: (min: 61.0, avg: 75.5, max: 125.0) -[2026-06-07 02:25:55,745][321791] Avg episode reward: [(0, '1126.253')] -[2026-06-07 02:25:55,865][324563] Updated weights for policy 0, policy_version 31363 (0.0007) -[2026-06-07 02:25:56,083][324563] Updated weights for policy 0, policy_version 31373 (0.0006) -[2026-06-07 02:25:56,283][324563] Updated weights for policy 0, policy_version 31383 (0.0006) -[2026-06-07 02:25:56,293][324276] Saving new best policy, reward=1126.253! -[2026-06-07 02:25:57,031][324563] Updated weights for policy 0, policy_version 31393 (0.0006) -[2026-06-07 02:25:57,255][324563] Updated weights for policy 0, policy_version 31403 (0.0006) -[2026-06-07 02:25:57,464][324563] Updated weights for policy 0, policy_version 31413 (0.0006) -[2026-06-07 02:25:57,671][324563] Updated weights for policy 0, policy_version 31423 (0.0007) -[2026-06-07 02:25:57,870][324563] Updated weights for policy 0, policy_version 31433 (0.0006) -[2026-06-07 02:25:58,063][324563] Updated weights for policy 0, policy_version 31443 (0.0006) -[2026-06-07 02:25:58,787][324563] Updated weights for policy 0, policy_version 31453 (0.0006) -[2026-06-07 02:25:59,002][324563] Updated weights for policy 0, policy_version 31463 (0.0007) -[2026-06-07 02:25:59,206][324563] Updated weights for policy 0, policy_version 31473 (0.0006) -[2026-06-07 02:25:59,429][324563] Updated weights for policy 0, policy_version 31484 (0.0006) -[2026-06-07 02:25:59,639][324563] Updated weights for policy 0, policy_version 31495 (0.0006) -[2026-06-07 02:25:59,841][324563] Updated weights for policy 0, policy_version 31505 (0.0006) -[2026-06-07 02:26:00,560][324563] Updated weights for policy 0, policy_version 31515 (0.0007) -[2026-06-07 02:26:00,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17661.4). Total num frames: 16154624. Throughput: 0: 17811.9. Samples: 16203392. Policy #0 lag: (min: 61.0, avg: 75.5, max: 125.0) -[2026-06-07 02:26:00,745][321791] Avg episode reward: [(0, '1101.412')] -[2026-06-07 02:26:00,784][324563] Updated weights for policy 0, policy_version 31525 (0.0006) -[2026-06-07 02:26:00,998][324563] Updated weights for policy 0, policy_version 31535 (0.0007) -[2026-06-07 02:26:01,218][324563] Updated weights for policy 0, policy_version 31545 (0.0006) -[2026-06-07 02:26:01,419][324563] Updated weights for policy 0, policy_version 31555 (0.0006) -[2026-06-07 02:26:01,623][324563] Updated weights for policy 0, policy_version 31565 (0.0009) -[2026-06-07 02:26:01,823][324563] Updated weights for policy 0, policy_version 31575 (0.0010) -[2026-06-07 02:26:02,537][324563] Updated weights for policy 0, policy_version 31585 (0.0007) -[2026-06-07 02:26:02,755][324563] Updated weights for policy 0, policy_version 31595 (0.0006) -[2026-06-07 02:26:02,946][324563] Updated weights for policy 0, policy_version 31605 (0.0006) -[2026-06-07 02:26:03,163][324563] Updated weights for policy 0, policy_version 31615 (0.0006) -[2026-06-07 02:26:03,382][324563] Updated weights for policy 0, policy_version 31625 (0.0006) -[2026-06-07 02:26:03,595][324563] Updated weights for policy 0, policy_version 31635 (0.0007) -[2026-06-07 02:26:04,299][324563] Updated weights for policy 0, policy_version 31645 (0.0006) -[2026-06-07 02:26:04,511][324563] Updated weights for policy 0, policy_version 31655 (0.0006) -[2026-06-07 02:26:04,728][324563] Updated weights for policy 0, policy_version 31665 (0.0006) -[2026-06-07 02:26:04,928][324563] Updated weights for policy 0, policy_version 31675 (0.0006) -[2026-06-07 02:26:05,134][324563] Updated weights for policy 0, policy_version 31685 (0.0006) -[2026-06-07 02:26:05,341][324563] Updated weights for policy 0, policy_version 31695 (0.0006) -[2026-06-07 02:26:05,744][321791] Fps is (10 sec: 19660.7, 60 sec: 18022.4, 300 sec: 17661.4). Total num frames: 16252928. Throughput: 0: 17732.4. Samples: 16248576. Policy #0 lag: (min: 61.0, avg: 76.7, max: 125.0) -[2026-06-07 02:26:05,745][321791] Avg episode reward: [(0, '1085.757')] -[2026-06-07 02:26:06,077][324563] Updated weights for policy 0, policy_version 31705 (0.0006) -[2026-06-07 02:26:06,279][324563] Updated weights for policy 0, policy_version 31715 (0.0007) -[2026-06-07 02:26:06,480][324563] Updated weights for policy 0, policy_version 31725 (0.0006) -[2026-06-07 02:26:06,706][324563] Updated weights for policy 0, policy_version 31735 (0.0006) -[2026-06-07 02:26:06,913][324563] Updated weights for policy 0, policy_version 31745 (0.0006) -[2026-06-07 02:26:07,094][324563] Updated weights for policy 0, policy_version 31755 (0.0006) -[2026-06-07 02:26:07,295][324563] Updated weights for policy 0, policy_version 31765 (0.0006) -[2026-06-07 02:26:08,027][324563] Updated weights for policy 0, policy_version 31775 (0.0007) -[2026-06-07 02:26:08,215][324563] Updated weights for policy 0, policy_version 31785 (0.0006) -[2026-06-07 02:26:08,433][324563] Updated weights for policy 0, policy_version 31795 (0.0006) -[2026-06-07 02:26:08,653][324563] Updated weights for policy 0, policy_version 31805 (0.0006) -[2026-06-07 02:26:08,856][324563] Updated weights for policy 0, policy_version 31815 (0.0007) -[2026-06-07 02:26:09,082][324563] Updated weights for policy 0, policy_version 31825 (0.0007) -[2026-06-07 02:26:09,815][324563] Updated weights for policy 0, policy_version 31835 (0.0006) -[2026-06-07 02:26:10,001][324563] Updated weights for policy 0, policy_version 31845 (0.0006) -[2026-06-07 02:26:10,214][324563] Updated weights for policy 0, policy_version 31855 (0.0006) -[2026-06-07 02:26:10,418][324563] Updated weights for policy 0, policy_version 31865 (0.0007) -[2026-06-07 02:26:10,627][324563] Updated weights for policy 0, policy_version 31875 (0.0006) -[2026-06-07 02:26:10,744][321791] Fps is (10 sec: 16383.8, 60 sec: 17476.2, 300 sec: 17550.3). Total num frames: 16318464. Throughput: 0: 17885.9. Samples: 16359808. Policy #0 lag: (min: 61.0, avg: 76.7, max: 125.0) -[2026-06-07 02:26:10,745][321791] Avg episode reward: [(0, '1131.747')] -[2026-06-07 02:26:10,851][324563] Updated weights for policy 0, policy_version 31885 (0.0007) -[2026-06-07 02:26:11,058][324563] Updated weights for policy 0, policy_version 31895 (0.0006) -[2026-06-07 02:26:11,084][324276] Saving new best policy, reward=1131.747! -[2026-06-07 02:26:11,809][324563] Updated weights for policy 0, policy_version 31905 (0.0006) -[2026-06-07 02:26:12,001][324563] Updated weights for policy 0, policy_version 31915 (0.0007) -[2026-06-07 02:26:12,201][324563] Updated weights for policy 0, policy_version 31925 (0.0006) -[2026-06-07 02:26:12,425][324563] Updated weights for policy 0, policy_version 31936 (0.0006) -[2026-06-07 02:26:12,638][324563] Updated weights for policy 0, policy_version 31946 (0.0006) -[2026-06-07 02:26:12,861][324563] Updated weights for policy 0, policy_version 31956 (0.0006) -[2026-06-07 02:26:13,565][324563] Updated weights for policy 0, policy_version 31966 (0.0006) -[2026-06-07 02:26:13,764][324563] Updated weights for policy 0, policy_version 31976 (0.0006) -[2026-06-07 02:26:13,973][324563] Updated weights for policy 0, policy_version 31986 (0.0007) -[2026-06-07 02:26:14,168][324563] Updated weights for policy 0, policy_version 31996 (0.0007) -[2026-06-07 02:26:14,368][324563] Updated weights for policy 0, policy_version 32006 (0.0006) -[2026-06-07 02:26:14,557][324563] Updated weights for policy 0, policy_version 32016 (0.0006) -[2026-06-07 02:26:15,278][324563] Updated weights for policy 0, policy_version 32026 (0.0006) -[2026-06-07 02:26:15,506][324563] Updated weights for policy 0, policy_version 32037 (0.0007) -[2026-06-07 02:26:15,718][324563] Updated weights for policy 0, policy_version 32047 (0.0006) -[2026-06-07 02:26:15,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17661.4). Total num frames: 16416768. Throughput: 0: 17595.8. Samples: 16464256. Policy #0 lag: (min: 61.0, avg: 76.7, max: 125.0) -[2026-06-07 02:26:15,745][321791] Avg episode reward: [(0, '1153.651')] -[2026-06-07 02:26:15,924][324563] Updated weights for policy 0, policy_version 32057 (0.0006) -[2026-06-07 02:26:16,131][324563] Updated weights for policy 0, policy_version 32068 (0.0007) -[2026-06-07 02:26:16,328][324563] Updated weights for policy 0, policy_version 32078 (0.0006) -[2026-06-07 02:26:16,538][324276] Saving new best policy, reward=1153.651! -[2026-06-07 02:26:16,540][324563] Updated weights for policy 0, policy_version 32088 (0.0006) -[2026-06-07 02:26:17,260][324563] Updated weights for policy 0, policy_version 32098 (0.0007) -[2026-06-07 02:26:17,454][324563] Updated weights for policy 0, policy_version 32108 (0.0006) -[2026-06-07 02:26:17,708][324563] Updated weights for policy 0, policy_version 32119 (0.0006) -[2026-06-07 02:26:17,906][324563] Updated weights for policy 0, policy_version 32129 (0.0006) -[2026-06-07 02:26:18,129][324563] Updated weights for policy 0, policy_version 32139 (0.0007) -[2026-06-07 02:26:18,334][324563] Updated weights for policy 0, policy_version 32149 (0.0006) -[2026-06-07 02:26:19,087][324563] Updated weights for policy 0, policy_version 32160 (0.0007) -[2026-06-07 02:26:19,295][324563] Updated weights for policy 0, policy_version 32170 (0.0006) -[2026-06-07 02:26:19,498][324563] Updated weights for policy 0, policy_version 32180 (0.0006) -[2026-06-07 02:26:19,698][324563] Updated weights for policy 0, policy_version 32190 (0.0007) -[2026-06-07 02:26:19,927][324563] Updated weights for policy 0, policy_version 32200 (0.0006) -[2026-06-07 02:26:20,139][324563] Updated weights for policy 0, policy_version 32210 (0.0007) -[2026-06-07 02:26:20,744][321791] Fps is (10 sec: 19660.8, 60 sec: 18022.5, 300 sec: 17661.4). Total num frames: 16515072. Throughput: 0: 17641.2. Samples: 16512640. Policy #0 lag: (min: 61.0, avg: 76.7, max: 125.0) -[2026-06-07 02:26:20,745][321791] Avg episode reward: [(0, '1107.162')] -[2026-06-07 02:26:20,878][324563] Updated weights for policy 0, policy_version 32220 (0.0007) -[2026-06-07 02:26:21,078][324563] Updated weights for policy 0, policy_version 32230 (0.0007) -[2026-06-07 02:26:21,290][324563] Updated weights for policy 0, policy_version 32240 (0.0008) -[2026-06-07 02:26:21,504][324563] Updated weights for policy 0, policy_version 32250 (0.0011) -[2026-06-07 02:26:21,716][324563] Updated weights for policy 0, policy_version 32260 (0.0011) -[2026-06-07 02:26:21,933][324563] Updated weights for policy 0, policy_version 32271 (0.0009) -[2026-06-07 02:26:22,663][324563] Updated weights for policy 0, policy_version 32281 (0.0010) -[2026-06-07 02:26:22,881][324563] Updated weights for policy 0, policy_version 32291 (0.0006) -[2026-06-07 02:26:23,110][324563] Updated weights for policy 0, policy_version 32301 (0.0006) -[2026-06-07 02:26:23,315][324563] Updated weights for policy 0, policy_version 32311 (0.0007) -[2026-06-07 02:26:23,549][324563] Updated weights for policy 0, policy_version 32321 (0.0006) -[2026-06-07 02:26:23,743][324563] Updated weights for policy 0, policy_version 32331 (0.0006) -[2026-06-07 02:26:23,973][324563] Updated weights for policy 0, policy_version 32341 (0.0007) -[2026-06-07 02:26:24,679][324563] Updated weights for policy 0, policy_version 32351 (0.0007) -[2026-06-07 02:26:24,880][324563] Updated weights for policy 0, policy_version 32361 (0.0007) -[2026-06-07 02:26:25,086][324563] Updated weights for policy 0, policy_version 32371 (0.0006) -[2026-06-07 02:26:25,283][324563] Updated weights for policy 0, policy_version 32381 (0.0006) -[2026-06-07 02:26:25,506][324563] Updated weights for policy 0, policy_version 32391 (0.0007) -[2026-06-07 02:26:25,713][324563] Updated weights for policy 0, policy_version 32401 (0.0006) -[2026-06-07 02:26:25,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 16580608. Throughput: 0: 17814.8. Samples: 16623616. Policy #0 lag: (min: 61.0, avg: 76.7, max: 125.0) -[2026-06-07 02:26:25,745][321791] Avg episode reward: [(0, '1132.727')] -[2026-06-07 02:26:26,477][324563] Updated weights for policy 0, policy_version 32413 (0.0007) -[2026-06-07 02:26:26,677][324563] Updated weights for policy 0, policy_version 32423 (0.0006) -[2026-06-07 02:26:26,897][324563] Updated weights for policy 0, policy_version 32433 (0.0007) -[2026-06-07 02:26:27,107][324563] Updated weights for policy 0, policy_version 32443 (0.0007) -[2026-06-07 02:26:27,342][324563] Updated weights for policy 0, policy_version 32454 (0.0010) -[2026-06-07 02:26:27,539][324563] Updated weights for policy 0, policy_version 32464 (0.0007) -[2026-06-07 02:26:28,253][324563] Updated weights for policy 0, policy_version 32474 (0.0007) -[2026-06-07 02:26:28,483][324563] Updated weights for policy 0, policy_version 32485 (0.0011) -[2026-06-07 02:26:28,676][324563] Updated weights for policy 0, policy_version 32495 (0.0011) -[2026-06-07 02:26:28,900][324563] Updated weights for policy 0, policy_version 32505 (0.0011) -[2026-06-07 02:26:29,092][324563] Updated weights for policy 0, policy_version 32515 (0.0009) -[2026-06-07 02:26:29,273][324563] Updated weights for policy 0, policy_version 32525 (0.0007) -[2026-06-07 02:26:29,481][324563] Updated weights for policy 0, policy_version 32535 (0.0007) -[2026-06-07 02:26:30,208][324563] Updated weights for policy 0, policy_version 32545 (0.0008) -[2026-06-07 02:26:30,416][324563] Updated weights for policy 0, policy_version 32555 (0.0009) -[2026-06-07 02:26:30,634][324563] Updated weights for policy 0, policy_version 32565 (0.0011) -[2026-06-07 02:26:30,744][321791] Fps is (10 sec: 16384.1, 60 sec: 17476.3, 300 sec: 17661.4). Total num frames: 16678912. Throughput: 0: 17530.3. Samples: 16725248. Policy #0 lag: (min: 63.0, avg: 79.5, max: 127.0) -[2026-06-07 02:26:30,745][321791] Avg episode reward: [(0, '1121.225')] -[2026-06-07 02:26:30,828][324563] Updated weights for policy 0, policy_version 32575 (0.0011) -[2026-06-07 02:26:31,020][324563] Updated weights for policy 0, policy_version 32585 (0.0011) -[2026-06-07 02:26:31,242][324563] Updated weights for policy 0, policy_version 32595 (0.0011) -[2026-06-07 02:26:31,982][324563] Updated weights for policy 0, policy_version 32605 (0.0010) -[2026-06-07 02:26:32,178][324563] Updated weights for policy 0, policy_version 32615 (0.0010) -[2026-06-07 02:26:32,388][324563] Updated weights for policy 0, policy_version 32625 (0.0011) -[2026-06-07 02:26:32,597][324563] Updated weights for policy 0, policy_version 32635 (0.0010) -[2026-06-07 02:26:32,795][324563] Updated weights for policy 0, policy_version 32645 (0.0011) -[2026-06-07 02:26:33,028][324563] Updated weights for policy 0, policy_version 32655 (0.0010) -[2026-06-07 02:26:33,737][324563] Updated weights for policy 0, policy_version 32665 (0.0010) -[2026-06-07 02:26:33,962][324563] Updated weights for policy 0, policy_version 32676 (0.0006) -[2026-06-07 02:26:34,186][324563] Updated weights for policy 0, policy_version 32686 (0.0006) -[2026-06-07 02:26:34,397][324563] Updated weights for policy 0, policy_version 32696 (0.0006) -[2026-06-07 02:26:34,586][324563] Updated weights for policy 0, policy_version 32706 (0.0006) -[2026-06-07 02:26:34,796][324563] Updated weights for policy 0, policy_version 32716 (0.0006) -[2026-06-07 02:26:35,004][324563] Updated weights for policy 0, policy_version 32726 (0.0006) -[2026-06-07 02:26:35,718][324563] Updated weights for policy 0, policy_version 32736 (0.0006) -[2026-06-07 02:26:35,744][321791] Fps is (10 sec: 19660.7, 60 sec: 18022.4, 300 sec: 17661.4). Total num frames: 16777216. Throughput: 0: 17666.8. Samples: 16779264. Policy #0 lag: (min: 63.0, avg: 79.5, max: 127.0) -[2026-06-07 02:26:35,745][321791] Avg episode reward: [(0, '1167.785')] -[2026-06-07 02:26:35,940][324563] Updated weights for policy 0, policy_version 32746 (0.0006) -[2026-06-07 02:26:36,158][324563] Updated weights for policy 0, policy_version 32756 (0.0006) -[2026-06-07 02:26:36,363][324563] Updated weights for policy 0, policy_version 32766 (0.0006) -[2026-06-07 02:26:36,571][324563] Updated weights for policy 0, policy_version 32776 (0.0011) -[2026-06-07 02:26:36,795][324563] Updated weights for policy 0, policy_version 32786 (0.0007) -[2026-06-07 02:26:36,912][324276] Saving new best policy, reward=1167.785! -[2026-06-07 02:26:37,523][324563] Updated weights for policy 0, policy_version 32796 (0.0006) -[2026-06-07 02:26:37,744][324563] Updated weights for policy 0, policy_version 32806 (0.0008) -[2026-06-07 02:26:37,948][324563] Updated weights for policy 0, policy_version 32816 (0.0011) -[2026-06-07 02:26:38,136][324563] Updated weights for policy 0, policy_version 32826 (0.0007) -[2026-06-07 02:26:38,349][324563] Updated weights for policy 0, policy_version 32836 (0.0010) -[2026-06-07 02:26:38,562][324563] Updated weights for policy 0, policy_version 32846 (0.0011) -[2026-06-07 02:26:38,752][324563] Updated weights for policy 0, policy_version 32856 (0.0012) -[2026-06-07 02:26:39,481][324563] Updated weights for policy 0, policy_version 32866 (0.0011) -[2026-06-07 02:26:39,695][324563] Updated weights for policy 0, policy_version 32876 (0.0011) -[2026-06-07 02:26:39,915][324563] Updated weights for policy 0, policy_version 32886 (0.0012) -[2026-06-07 02:26:40,144][324563] Updated weights for policy 0, policy_version 32897 (0.0011) -[2026-06-07 02:26:40,354][324563] Updated weights for policy 0, policy_version 32907 (0.0009) -[2026-06-07 02:26:40,536][324563] Updated weights for policy 0, policy_version 32917 (0.0007) -[2026-06-07 02:26:40,744][321791] Fps is (10 sec: 19660.7, 60 sec: 18022.4, 300 sec: 17661.4). Total num frames: 16875520. Throughput: 0: 17712.3. Samples: 16886400. Policy #0 lag: (min: 63.0, avg: 79.5, max: 127.0) -[2026-06-07 02:26:40,745][321791] Avg episode reward: [(0, '1168.637')] -[2026-06-07 02:26:40,752][324276] Saving new best policy, reward=1168.637! -[2026-06-07 02:26:41,273][324563] Updated weights for policy 0, policy_version 32927 (0.0006) -[2026-06-07 02:26:41,485][324563] Updated weights for policy 0, policy_version 32937 (0.0006) -[2026-06-07 02:26:41,685][324563] Updated weights for policy 0, policy_version 32947 (0.0006) -[2026-06-07 02:26:41,917][324563] Updated weights for policy 0, policy_version 32958 (0.0006) -[2026-06-07 02:26:42,125][324563] Updated weights for policy 0, policy_version 32968 (0.0006) -[2026-06-07 02:26:42,346][324563] Updated weights for policy 0, policy_version 32978 (0.0006) -[2026-06-07 02:26:43,103][324563] Updated weights for policy 0, policy_version 32988 (0.0007) -[2026-06-07 02:26:43,338][324563] Updated weights for policy 0, policy_version 32999 (0.0007) -[2026-06-07 02:26:43,561][324563] Updated weights for policy 0, policy_version 33009 (0.0007) -[2026-06-07 02:26:43,790][324563] Updated weights for policy 0, policy_version 33020 (0.0007) -[2026-06-07 02:26:43,994][324563] Updated weights for policy 0, policy_version 33031 (0.0006) -[2026-06-07 02:26:44,204][324563] Updated weights for policy 0, policy_version 33041 (0.0006) -[2026-06-07 02:26:44,901][324563] Updated weights for policy 0, policy_version 33051 (0.0007) -[2026-06-07 02:26:45,101][324563] Updated weights for policy 0, policy_version 33061 (0.0006) -[2026-06-07 02:26:45,296][324563] Updated weights for policy 0, policy_version 33071 (0.0007) -[2026-06-07 02:26:45,502][324563] Updated weights for policy 0, policy_version 33081 (0.0006) -[2026-06-07 02:26:45,702][324563] Updated weights for policy 0, policy_version 33091 (0.0006) -[2026-06-07 02:26:45,744][321791] Fps is (10 sec: 16383.9, 60 sec: 17476.2, 300 sec: 17661.4). Total num frames: 16941056. Throughput: 0: 17450.6. Samples: 16988672. Policy #0 lag: (min: 63.0, avg: 79.5, max: 127.0) -[2026-06-07 02:26:45,745][321791] Avg episode reward: [(0, '1182.683')] -[2026-06-07 02:26:45,901][324563] Updated weights for policy 0, policy_version 33101 (0.0009) -[2026-06-07 02:26:46,103][324563] Updated weights for policy 0, policy_version 33111 (0.0011) -[2026-06-07 02:26:46,117][324276] Saving new best policy, reward=1182.683! -[2026-06-07 02:26:46,868][324563] Updated weights for policy 0, policy_version 33122 (0.0007) -[2026-06-07 02:26:47,047][324563] Updated weights for policy 0, policy_version 33132 (0.0006) -[2026-06-07 02:26:47,243][324563] Updated weights for policy 0, policy_version 33142 (0.0006) -[2026-06-07 02:26:47,443][324563] Updated weights for policy 0, policy_version 33152 (0.0006) -[2026-06-07 02:26:47,673][324563] Updated weights for policy 0, policy_version 33162 (0.0006) -[2026-06-07 02:26:47,893][324563] Updated weights for policy 0, policy_version 33172 (0.0006) -[2026-06-07 02:26:48,571][324563] Updated weights for policy 0, policy_version 33182 (0.0007) -[2026-06-07 02:26:48,796][324563] Updated weights for policy 0, policy_version 33193 (0.0007) -[2026-06-07 02:26:48,988][324563] Updated weights for policy 0, policy_version 33203 (0.0006) -[2026-06-07 02:26:49,193][324563] Updated weights for policy 0, policy_version 33213 (0.0006) -[2026-06-07 02:26:49,387][324563] Updated weights for policy 0, policy_version 33223 (0.0007) -[2026-06-07 02:26:49,631][324563] Updated weights for policy 0, policy_version 33234 (0.0007) -[2026-06-07 02:26:50,359][324563] Updated weights for policy 0, policy_version 33245 (0.0006) -[2026-06-07 02:26:50,556][324563] Updated weights for policy 0, policy_version 33255 (0.0006) -[2026-06-07 02:26:50,744][321791] Fps is (10 sec: 16384.2, 60 sec: 17476.3, 300 sec: 17661.4). Total num frames: 17039360. Throughput: 0: 17718.1. Samples: 17045888. Policy #0 lag: (min: 63.0, avg: 78.9, max: 127.0) -[2026-06-07 02:26:50,745][321791] Avg episode reward: [(0, '1198.929')] -[2026-06-07 02:26:50,770][324563] Updated weights for policy 0, policy_version 33265 (0.0007) -[2026-06-07 02:26:50,961][324563] Updated weights for policy 0, policy_version 33275 (0.0007) -[2026-06-07 02:26:51,168][324563] Updated weights for policy 0, policy_version 33286 (0.0007) -[2026-06-07 02:26:51,365][324563] Updated weights for policy 0, policy_version 33296 (0.0006) -[2026-06-07 02:26:51,528][324276] Saving new best policy, reward=1198.929! -[2026-06-07 02:26:52,131][324563] Updated weights for policy 0, policy_version 33307 (0.0006) -[2026-06-07 02:26:52,328][324563] Updated weights for policy 0, policy_version 33317 (0.0006) -[2026-06-07 02:26:52,521][324563] Updated weights for policy 0, policy_version 33327 (0.0006) -[2026-06-07 02:26:52,715][324563] Updated weights for policy 0, policy_version 33337 (0.0006) -[2026-06-07 02:26:52,914][324563] Updated weights for policy 0, policy_version 33347 (0.0006) -[2026-06-07 02:26:53,129][324563] Updated weights for policy 0, policy_version 33357 (0.0007) -[2026-06-07 02:26:53,336][324563] Updated weights for policy 0, policy_version 33367 (0.0006) -[2026-06-07 02:26:54,116][324563] Updated weights for policy 0, policy_version 33378 (0.0007) -[2026-06-07 02:26:54,317][324563] Updated weights for policy 0, policy_version 33388 (0.0006) -[2026-06-07 02:26:54,513][324563] Updated weights for policy 0, policy_version 33398 (0.0006) -[2026-06-07 02:26:54,737][324563] Updated weights for policy 0, policy_version 33408 (0.0007) -[2026-06-07 02:26:54,955][324563] Updated weights for policy 0, policy_version 33419 (0.0007) -[2026-06-07 02:26:55,174][324563] Updated weights for policy 0, policy_version 33429 (0.0006) -[2026-06-07 02:26:55,744][321791] Fps is (10 sec: 19660.9, 60 sec: 18022.4, 300 sec: 17661.4). Total num frames: 17137664. Throughput: 0: 17538.8. Samples: 17149056. Policy #0 lag: (min: 63.0, avg: 78.9, max: 127.0) -[2026-06-07 02:26:55,745][321791] Avg episode reward: [(0, '1221.359')] -[2026-06-07 02:26:55,887][324563] Updated weights for policy 0, policy_version 33439 (0.0006) -[2026-06-07 02:26:56,103][324563] Updated weights for policy 0, policy_version 33450 (0.0006) -[2026-06-07 02:26:56,319][324563] Updated weights for policy 0, policy_version 33460 (0.0007) -[2026-06-07 02:26:56,524][324563] Updated weights for policy 0, policy_version 33470 (0.0009) -[2026-06-07 02:26:56,733][324563] Updated weights for policy 0, policy_version 33480 (0.0011) -[2026-06-07 02:26:56,943][324563] Updated weights for policy 0, policy_version 33490 (0.0007) -[2026-06-07 02:26:57,063][324276] Saving new best policy, reward=1221.359! -[2026-06-07 02:26:57,702][324563] Updated weights for policy 0, policy_version 33500 (0.0009) -[2026-06-07 02:26:57,921][324563] Updated weights for policy 0, policy_version 33511 (0.0006) -[2026-06-07 02:26:58,126][324563] Updated weights for policy 0, policy_version 33521 (0.0006) -[2026-06-07 02:26:58,309][324563] Updated weights for policy 0, policy_version 33531 (0.0006) -[2026-06-07 02:26:58,531][324563] Updated weights for policy 0, policy_version 33541 (0.0007) -[2026-06-07 02:26:58,737][324563] Updated weights for policy 0, policy_version 33551 (0.0007) -[2026-06-07 02:26:59,454][324563] Updated weights for policy 0, policy_version 33562 (0.0006) -[2026-06-07 02:26:59,672][324563] Updated weights for policy 0, policy_version 33572 (0.0006) -[2026-06-07 02:26:59,888][324563] Updated weights for policy 0, policy_version 33583 (0.0006) -[2026-06-07 02:27:00,126][324563] Updated weights for policy 0, policy_version 33594 (0.0006) -[2026-06-07 02:27:00,320][324563] Updated weights for policy 0, policy_version 33604 (0.0006) -[2026-06-07 02:27:00,536][324563] Updated weights for policy 0, policy_version 33614 (0.0007) -[2026-06-07 02:27:00,739][324563] Updated weights for policy 0, policy_version 33624 (0.0007) -[2026-06-07 02:27:00,744][321791] Fps is (10 sec: 19660.8, 60 sec: 18022.4, 300 sec: 17661.4). Total num frames: 17235968. Throughput: 0: 17470.6. Samples: 17250432. Policy #0 lag: (min: 63.0, avg: 78.9, max: 127.0) -[2026-06-07 02:27:00,745][321791] Avg episode reward: [(0, '1239.583')] -[2026-06-07 02:27:00,751][324276] Saving new best policy, reward=1239.583! -[2026-06-07 02:27:01,487][324563] Updated weights for policy 0, policy_version 33634 (0.0007) -[2026-06-07 02:27:01,701][324563] Updated weights for policy 0, policy_version 33644 (0.0007) -[2026-06-07 02:27:01,913][324563] Updated weights for policy 0, policy_version 33655 (0.0006) -[2026-06-07 02:27:02,123][324563] Updated weights for policy 0, policy_version 33666 (0.0006) -[2026-06-07 02:27:02,334][324563] Updated weights for policy 0, policy_version 33676 (0.0006) -[2026-06-07 02:27:02,539][324563] Updated weights for policy 0, policy_version 33686 (0.0007) -[2026-06-07 02:27:03,275][324563] Updated weights for policy 0, policy_version 33697 (0.0008) -[2026-06-07 02:27:03,495][324563] Updated weights for policy 0, policy_version 33707 (0.0011) -[2026-06-07 02:27:03,714][324563] Updated weights for policy 0, policy_version 33718 (0.0009) -[2026-06-07 02:27:03,923][324563] Updated weights for policy 0, policy_version 33728 (0.0007) -[2026-06-07 02:27:04,104][324563] Updated weights for policy 0, policy_version 33738 (0.0007) -[2026-06-07 02:27:04,318][324563] Updated weights for policy 0, policy_version 33748 (0.0007) -[2026-06-07 02:27:05,080][324563] Updated weights for policy 0, policy_version 33758 (0.0006) -[2026-06-07 02:27:05,282][324563] Updated weights for policy 0, policy_version 33768 (0.0007) -[2026-06-07 02:27:05,485][324563] Updated weights for policy 0, policy_version 33778 (0.0006) -[2026-06-07 02:27:05,693][324563] Updated weights for policy 0, policy_version 33788 (0.0007) -[2026-06-07 02:27:05,744][321791] Fps is (10 sec: 16384.1, 60 sec: 17476.3, 300 sec: 17661.4). Total num frames: 17301504. Throughput: 0: 17658.3. Samples: 17307264. Policy #0 lag: (min: 63.0, avg: 78.9, max: 127.0) -[2026-06-07 02:27:05,745][321791] Avg episode reward: [(0, '1249.565')] -[2026-06-07 02:27:05,904][324563] Updated weights for policy 0, policy_version 33798 (0.0006) -[2026-06-07 02:27:06,118][324563] Updated weights for policy 0, policy_version 33808 (0.0007) -[2026-06-07 02:27:06,268][324276] Saving new best policy, reward=1249.565! -[2026-06-07 02:27:06,872][324563] Updated weights for policy 0, policy_version 33818 (0.0006) -[2026-06-07 02:27:07,076][324563] Updated weights for policy 0, policy_version 33828 (0.0007) -[2026-06-07 02:27:07,296][324563] Updated weights for policy 0, policy_version 33838 (0.0007) -[2026-06-07 02:27:07,507][324563] Updated weights for policy 0, policy_version 33848 (0.0006) -[2026-06-07 02:27:07,725][324563] Updated weights for policy 0, policy_version 33858 (0.0006) -[2026-06-07 02:27:07,951][324563] Updated weights for policy 0, policy_version 33868 (0.0006) -[2026-06-07 02:27:08,154][324563] Updated weights for policy 0, policy_version 33878 (0.0007) -[2026-06-07 02:27:08,899][324563] Updated weights for policy 0, policy_version 33888 (0.0007) -[2026-06-07 02:27:09,115][324563] Updated weights for policy 0, policy_version 33898 (0.0006) -[2026-06-07 02:27:09,299][324563] Updated weights for policy 0, policy_version 33908 (0.0007) -[2026-06-07 02:27:09,520][324563] Updated weights for policy 0, policy_version 33919 (0.0006) -[2026-06-07 02:27:09,727][324563] Updated weights for policy 0, policy_version 33929 (0.0007) -[2026-06-07 02:27:09,926][324563] Updated weights for policy 0, policy_version 33939 (0.0006) -[2026-06-07 02:27:10,702][324563] Updated weights for policy 0, policy_version 33949 (0.0006) -[2026-06-07 02:27:10,744][321791] Fps is (10 sec: 16383.9, 60 sec: 18022.4, 300 sec: 17661.4). Total num frames: 17399808. Throughput: 0: 17484.8. Samples: 17410432. Policy #0 lag: (min: 63.0, avg: 78.9, max: 127.0) -[2026-06-07 02:27:10,745][321791] Avg episode reward: [(0, '1249.706')] -[2026-06-07 02:27:10,913][324563] Updated weights for policy 0, policy_version 33960 (0.0007) -[2026-06-07 02:27:11,124][324563] Updated weights for policy 0, policy_version 33970 (0.0007) -[2026-06-07 02:27:11,356][324563] Updated weights for policy 0, policy_version 33981 (0.0007) -[2026-06-07 02:27:11,550][324563] Updated weights for policy 0, policy_version 33991 (0.0007) -[2026-06-07 02:27:11,788][324563] Updated weights for policy 0, policy_version 34002 (0.0007) -[2026-06-07 02:27:11,907][324276] Saving new best policy, reward=1249.706! -[2026-06-07 02:27:12,520][324563] Updated weights for policy 0, policy_version 34012 (0.0007) -[2026-06-07 02:27:12,731][324563] Updated weights for policy 0, policy_version 34022 (0.0007) -[2026-06-07 02:27:12,952][324563] Updated weights for policy 0, policy_version 34033 (0.0008) -[2026-06-07 02:27:13,164][324563] Updated weights for policy 0, policy_version 34043 (0.0012) -[2026-06-07 02:27:13,358][324563] Updated weights for policy 0, policy_version 34053 (0.0012) -[2026-06-07 02:27:13,586][324563] Updated weights for policy 0, policy_version 34063 (0.0013) -[2026-06-07 02:27:14,358][324563] Updated weights for policy 0, policy_version 34073 (0.0011) -[2026-06-07 02:27:14,533][324563] Updated weights for policy 0, policy_version 34083 (0.0007) -[2026-06-07 02:27:14,753][324563] Updated weights for policy 0, policy_version 34094 (0.0007) -[2026-06-07 02:27:14,962][324563] Updated weights for policy 0, policy_version 34104 (0.0007) -[2026-06-07 02:27:15,177][324563] Updated weights for policy 0, policy_version 34114 (0.0007) -[2026-06-07 02:27:15,372][324563] Updated weights for policy 0, policy_version 34124 (0.0007) -[2026-06-07 02:27:15,575][324563] Updated weights for policy 0, policy_version 34135 (0.0007) -[2026-06-07 02:27:15,744][321791] Fps is (10 sec: 19660.6, 60 sec: 18022.4, 300 sec: 17661.4). Total num frames: 17498112. Throughput: 0: 17524.6. Samples: 17513856. Policy #0 lag: (min: 3.0, avg: 26.6, max: 67.0) -[2026-06-07 02:27:15,746][321791] Avg episode reward: [(0, '1276.476')] -[2026-06-07 02:27:15,750][324276] Saving new best policy, reward=1276.476! -[2026-06-07 02:27:16,311][324563] Updated weights for policy 0, policy_version 34145 (0.0006) -[2026-06-07 02:27:16,535][324563] Updated weights for policy 0, policy_version 34155 (0.0006) -[2026-06-07 02:27:16,739][324563] Updated weights for policy 0, policy_version 34165 (0.0007) -[2026-06-07 02:27:16,952][324563] Updated weights for policy 0, policy_version 34175 (0.0006) -[2026-06-07 02:27:17,145][324563] Updated weights for policy 0, policy_version 34185 (0.0007) -[2026-06-07 02:27:17,382][324563] Updated weights for policy 0, policy_version 34195 (0.0008) -[2026-06-07 02:27:18,161][324563] Updated weights for policy 0, policy_version 34206 (0.0007) -[2026-06-07 02:27:18,387][324563] Updated weights for policy 0, policy_version 34217 (0.0007) -[2026-06-07 02:27:18,575][324563] Updated weights for policy 0, policy_version 34227 (0.0007) -[2026-06-07 02:27:18,785][324563] Updated weights for policy 0, policy_version 34237 (0.0008) -[2026-06-07 02:27:18,983][324563] Updated weights for policy 0, policy_version 34247 (0.0007) -[2026-06-07 02:27:19,183][324563] Updated weights for policy 0, policy_version 34257 (0.0006) -[2026-06-07 02:27:19,910][324563] Updated weights for policy 0, policy_version 34267 (0.0006) -[2026-06-07 02:27:20,131][324563] Updated weights for policy 0, policy_version 34277 (0.0006) -[2026-06-07 02:27:20,350][324563] Updated weights for policy 0, policy_version 34288 (0.0006) -[2026-06-07 02:27:20,547][324563] Updated weights for policy 0, policy_version 34298 (0.0007) -[2026-06-07 02:27:20,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17661.4). Total num frames: 17563648. Throughput: 0: 17590.1. Samples: 17570816. Policy #0 lag: (min: 3.0, avg: 26.6, max: 67.0) -[2026-06-07 02:27:20,745][321791] Avg episode reward: [(0, '1278.612')] -[2026-06-07 02:27:20,760][324563] Updated weights for policy 0, policy_version 34308 (0.0006) -[2026-06-07 02:27:20,965][324563] Updated weights for policy 0, policy_version 34318 (0.0006) -[2026-06-07 02:27:21,144][324276] Saving new best policy, reward=1278.612! -[2026-06-07 02:27:21,147][324563] Updated weights for policy 0, policy_version 34328 (0.0006) -[2026-06-07 02:27:21,912][324563] Updated weights for policy 0, policy_version 34339 (0.0007) -[2026-06-07 02:27:22,099][324563] Updated weights for policy 0, policy_version 34349 (0.0007) -[2026-06-07 02:27:22,299][324563] Updated weights for policy 0, policy_version 34359 (0.0006) -[2026-06-07 02:27:22,494][324563] Updated weights for policy 0, policy_version 34369 (0.0007) -[2026-06-07 02:27:22,721][324563] Updated weights for policy 0, policy_version 34380 (0.0006) -[2026-06-07 02:27:22,925][324563] Updated weights for policy 0, policy_version 34390 (0.0007) -[2026-06-07 02:27:23,684][324563] Updated weights for policy 0, policy_version 34400 (0.0006) -[2026-06-07 02:27:23,902][324563] Updated weights for policy 0, policy_version 34410 (0.0007) -[2026-06-07 02:27:24,139][324563] Updated weights for policy 0, policy_version 34421 (0.0007) -[2026-06-07 02:27:24,356][324563] Updated weights for policy 0, policy_version 34431 (0.0007) -[2026-06-07 02:27:24,558][324563] Updated weights for policy 0, policy_version 34441 (0.0006) -[2026-06-07 02:27:24,763][324563] Updated weights for policy 0, policy_version 34451 (0.0006) -[2026-06-07 02:27:25,513][324563] Updated weights for policy 0, policy_version 34461 (0.0006) -[2026-06-07 02:27:25,721][324563] Updated weights for policy 0, policy_version 34471 (0.0006) -[2026-06-07 02:27:25,744][321791] Fps is (10 sec: 16384.0, 60 sec: 18022.4, 300 sec: 17661.4). Total num frames: 17661952. Throughput: 0: 17507.5. Samples: 17674240. Policy #0 lag: (min: 3.0, avg: 26.6, max: 67.0) -[2026-06-07 02:27:25,745][321791] Avg episode reward: [(0, '1295.620')] -[2026-06-07 02:27:25,923][324563] Updated weights for policy 0, policy_version 34481 (0.0006) -[2026-06-07 02:27:26,133][324563] Updated weights for policy 0, policy_version 34491 (0.0006) -[2026-06-07 02:27:26,333][324563] Updated weights for policy 0, policy_version 34501 (0.0006) -[2026-06-07 02:27:26,554][324563] Updated weights for policy 0, policy_version 34512 (0.0006) -[2026-06-07 02:27:26,714][324276] Saving new best policy, reward=1295.620! -[2026-06-07 02:27:27,339][324563] Updated weights for policy 0, policy_version 34522 (0.0006) -[2026-06-07 02:27:27,532][324563] Updated weights for policy 0, policy_version 34532 (0.0006) -[2026-06-07 02:27:27,740][324563] Updated weights for policy 0, policy_version 34542 (0.0006) -[2026-06-07 02:27:27,946][324563] Updated weights for policy 0, policy_version 34553 (0.0007) -[2026-06-07 02:27:28,165][324563] Updated weights for policy 0, policy_version 34563 (0.0006) -[2026-06-07 02:27:28,391][324563] Updated weights for policy 0, policy_version 34573 (0.0007) -[2026-06-07 02:27:28,584][324563] Updated weights for policy 0, policy_version 34583 (0.0006) -[2026-06-07 02:27:29,311][324563] Updated weights for policy 0, policy_version 34593 (0.0006) -[2026-06-07 02:27:29,522][324563] Updated weights for policy 0, policy_version 34603 (0.0006) -[2026-06-07 02:27:29,728][324563] Updated weights for policy 0, policy_version 34613 (0.0010) -[2026-06-07 02:27:29,941][324563] Updated weights for policy 0, policy_version 34623 (0.0009) -[2026-06-07 02:27:30,143][324563] Updated weights for policy 0, policy_version 34633 (0.0007) -[2026-06-07 02:27:30,379][324563] Updated weights for policy 0, policy_version 34644 (0.0006) -[2026-06-07 02:27:30,744][321791] Fps is (10 sec: 19660.8, 60 sec: 18022.4, 300 sec: 17661.4). Total num frames: 17760256. Throughput: 0: 17567.3. Samples: 17779200. Policy #0 lag: (min: 3.0, avg: 26.6, max: 67.0) -[2026-06-07 02:27:30,745][321791] Avg episode reward: [(0, '1290.866')] -[2026-06-07 02:27:31,115][324563] Updated weights for policy 0, policy_version 34654 (0.0006) -[2026-06-07 02:27:31,326][324563] Updated weights for policy 0, policy_version 34664 (0.0006) -[2026-06-07 02:27:31,515][324563] Updated weights for policy 0, policy_version 34674 (0.0006) -[2026-06-07 02:27:31,704][324563] Updated weights for policy 0, policy_version 34684 (0.0006) -[2026-06-07 02:27:31,933][324563] Updated weights for policy 0, policy_version 34695 (0.0006) -[2026-06-07 02:27:32,126][324563] Updated weights for policy 0, policy_version 34705 (0.0007) -[2026-06-07 02:27:32,872][324563] Updated weights for policy 0, policy_version 34715 (0.0006) -[2026-06-07 02:27:33,072][324563] Updated weights for policy 0, policy_version 34725 (0.0006) -[2026-06-07 02:27:33,294][324563] Updated weights for policy 0, policy_version 34735 (0.0007) -[2026-06-07 02:27:33,492][324563] Updated weights for policy 0, policy_version 34745 (0.0006) -[2026-06-07 02:27:33,730][324563] Updated weights for policy 0, policy_version 34757 (0.0006) -[2026-06-07 02:27:33,947][324563] Updated weights for policy 0, policy_version 34767 (0.0008) -[2026-06-07 02:27:34,687][324563] Updated weights for policy 0, policy_version 34777 (0.0008) -[2026-06-07 02:27:34,893][324563] Updated weights for policy 0, policy_version 34787 (0.0007) -[2026-06-07 02:27:35,117][324563] Updated weights for policy 0, policy_version 34797 (0.0006) -[2026-06-07 02:27:35,337][324563] Updated weights for policy 0, policy_version 34808 (0.0006) -[2026-06-07 02:27:35,526][324563] Updated weights for policy 0, policy_version 34818 (0.0007) -[2026-06-07 02:27:35,723][324563] Updated weights for policy 0, policy_version 34828 (0.0006) -[2026-06-07 02:27:35,744][321791] Fps is (10 sec: 16384.2, 60 sec: 17476.3, 300 sec: 17661.4). Total num frames: 17825792. Throughput: 0: 17561.6. Samples: 17836160. Policy #0 lag: (min: 3.0, avg: 26.6, max: 67.0) -[2026-06-07 02:27:35,745][321791] Avg episode reward: [(0, '1335.562')] -[2026-06-07 02:27:35,914][324563] Updated weights for policy 0, policy_version 34838 (0.0006) -[2026-06-07 02:27:35,955][324276] Saving new best policy, reward=1335.562! -[2026-06-07 02:27:36,714][324563] Updated weights for policy 0, policy_version 34849 (0.0006) -[2026-06-07 02:27:36,932][324563] Updated weights for policy 0, policy_version 34859 (0.0006) -[2026-06-07 02:27:37,141][324563] Updated weights for policy 0, policy_version 34870 (0.0006) -[2026-06-07 02:27:37,359][324563] Updated weights for policy 0, policy_version 34881 (0.0006) -[2026-06-07 02:27:37,581][324563] Updated weights for policy 0, policy_version 34891 (0.0006) -[2026-06-07 02:27:37,821][324563] Updated weights for policy 0, policy_version 34902 (0.0007) -[2026-06-07 02:27:38,567][324563] Updated weights for policy 0, policy_version 34912 (0.0007) -[2026-06-07 02:27:38,765][324563] Updated weights for policy 0, policy_version 34922 (0.0006) -[2026-06-07 02:27:38,963][324563] Updated weights for policy 0, policy_version 34932 (0.0006) -[2026-06-07 02:27:39,147][324563] Updated weights for policy 0, policy_version 34942 (0.0007) -[2026-06-07 02:27:39,356][324563] Updated weights for policy 0, policy_version 34952 (0.0007) -[2026-06-07 02:27:39,567][324563] Updated weights for policy 0, policy_version 34962 (0.0007) -[2026-06-07 02:27:40,365][324563] Updated weights for policy 0, policy_version 34973 (0.0007) -[2026-06-07 02:27:40,571][324563] Updated weights for policy 0, policy_version 34983 (0.0006) -[2026-06-07 02:27:40,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17661.4). Total num frames: 17924096. Throughput: 0: 17573.0. Samples: 17939840. Policy #0 lag: (min: 8.0, avg: 29.7, max: 72.0) -[2026-06-07 02:27:40,745][321791] Avg episode reward: [(0, '1342.201')] -[2026-06-07 02:27:40,786][324563] Updated weights for policy 0, policy_version 34993 (0.0007) -[2026-06-07 02:27:40,980][324563] Updated weights for policy 0, policy_version 35003 (0.0006) -[2026-06-07 02:27:41,182][324563] Updated weights for policy 0, policy_version 35013 (0.0006) -[2026-06-07 02:27:41,393][324563] Updated weights for policy 0, policy_version 35024 (0.0006) -[2026-06-07 02:27:41,552][324276] Saving new best policy, reward=1342.201! -[2026-06-07 02:27:42,155][324563] Updated weights for policy 0, policy_version 35034 (0.0007) -[2026-06-07 02:27:42,361][324563] Updated weights for policy 0, policy_version 35044 (0.0006) -[2026-06-07 02:27:42,576][324563] Updated weights for policy 0, policy_version 35054 (0.0006) -[2026-06-07 02:27:42,779][324563] Updated weights for policy 0, policy_version 35064 (0.0006) -[2026-06-07 02:27:43,033][324563] Updated weights for policy 0, policy_version 35076 (0.0007) -[2026-06-07 02:27:43,229][324563] Updated weights for policy 0, policy_version 35086 (0.0007) -[2026-06-07 02:27:43,425][324563] Updated weights for policy 0, policy_version 35096 (0.0007) -[2026-06-07 02:27:44,211][324563] Updated weights for policy 0, policy_version 35107 (0.0006) -[2026-06-07 02:27:44,417][324563] Updated weights for policy 0, policy_version 35117 (0.0006) -[2026-06-07 02:27:44,622][324563] Updated weights for policy 0, policy_version 35127 (0.0007) -[2026-06-07 02:27:44,839][324563] Updated weights for policy 0, policy_version 35137 (0.0006) -[2026-06-07 02:27:45,047][324563] Updated weights for policy 0, policy_version 35147 (0.0006) -[2026-06-07 02:27:45,260][324563] Updated weights for policy 0, policy_version 35157 (0.0007) -[2026-06-07 02:27:45,744][321791] Fps is (10 sec: 19660.9, 60 sec: 18022.4, 300 sec: 17661.4). Total num frames: 18022400. Throughput: 0: 17766.4. Samples: 18049920. Policy #0 lag: (min: 8.0, avg: 29.7, max: 72.0) -[2026-06-07 02:27:45,745][321791] Avg episode reward: [(0, '1323.075')] -[2026-06-07 02:27:46,040][324563] Updated weights for policy 0, policy_version 35168 (0.0006) -[2026-06-07 02:27:46,236][324563] Updated weights for policy 0, policy_version 35178 (0.0006) -[2026-06-07 02:27:46,453][324563] Updated weights for policy 0, policy_version 35188 (0.0007) -[2026-06-07 02:27:46,660][324563] Updated weights for policy 0, policy_version 35198 (0.0006) -[2026-06-07 02:27:46,887][324563] Updated weights for policy 0, policy_version 35208 (0.0007) -[2026-06-07 02:27:47,081][324563] Updated weights for policy 0, policy_version 35218 (0.0007) -[2026-06-07 02:27:47,819][324563] Updated weights for policy 0, policy_version 35228 (0.0006) -[2026-06-07 02:27:48,025][324563] Updated weights for policy 0, policy_version 35238 (0.0006) -[2026-06-07 02:27:48,242][324563] Updated weights for policy 0, policy_version 35248 (0.0006) -[2026-06-07 02:27:48,451][324563] Updated weights for policy 0, policy_version 35258 (0.0006) -[2026-06-07 02:27:48,656][324563] Updated weights for policy 0, policy_version 35268 (0.0006) -[2026-06-07 02:27:48,865][324563] Updated weights for policy 0, policy_version 35278 (0.0006) -[2026-06-07 02:27:49,056][324563] Updated weights for policy 0, policy_version 35288 (0.0006) -[2026-06-07 02:27:49,837][324563] Updated weights for policy 0, policy_version 35298 (0.0006) -[2026-06-07 02:27:50,070][324563] Updated weights for policy 0, policy_version 35309 (0.0007) -[2026-06-07 02:27:50,278][324563] Updated weights for policy 0, policy_version 35319 (0.0007) -[2026-06-07 02:27:50,469][324563] Updated weights for policy 0, policy_version 35329 (0.0007) -[2026-06-07 02:27:50,671][324563] Updated weights for policy 0, policy_version 35339 (0.0007) -[2026-06-07 02:27:50,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17661.4). Total num frames: 18087936. Throughput: 0: 17678.2. Samples: 18102784. Policy #0 lag: (min: 8.0, avg: 29.7, max: 72.0) -[2026-06-07 02:27:50,745][321791] Avg episode reward: [(0, '1364.606')] -[2026-06-07 02:27:50,897][324563] Updated weights for policy 0, policy_version 35349 (0.0007) -[2026-06-07 02:27:50,956][324276] Saving new best policy, reward=1364.606! -[2026-06-07 02:27:51,659][324563] Updated weights for policy 0, policy_version 35359 (0.0007) -[2026-06-07 02:27:51,859][324563] Updated weights for policy 0, policy_version 35369 (0.0007) -[2026-06-07 02:27:52,075][324563] Updated weights for policy 0, policy_version 35379 (0.0006) -[2026-06-07 02:27:52,262][324563] Updated weights for policy 0, policy_version 35389 (0.0007) -[2026-06-07 02:27:52,468][324563] Updated weights for policy 0, policy_version 35399 (0.0006) -[2026-06-07 02:27:52,714][324563] Updated weights for policy 0, policy_version 35410 (0.0007) -[2026-06-07 02:27:53,458][324563] Updated weights for policy 0, policy_version 35420 (0.0006) -[2026-06-07 02:27:53,667][324563] Updated weights for policy 0, policy_version 35430 (0.0006) -[2026-06-07 02:27:53,872][324563] Updated weights for policy 0, policy_version 35440 (0.0007) -[2026-06-07 02:27:54,095][324563] Updated weights for policy 0, policy_version 35450 (0.0007) -[2026-06-07 02:27:54,310][324563] Updated weights for policy 0, policy_version 35460 (0.0006) -[2026-06-07 02:27:54,527][324563] Updated weights for policy 0, policy_version 35470 (0.0007) -[2026-06-07 02:27:54,721][324563] Updated weights for policy 0, policy_version 35480 (0.0006) -[2026-06-07 02:27:55,468][324563] Updated weights for policy 0, policy_version 35490 (0.0010) -[2026-06-07 02:27:55,678][324563] Updated weights for policy 0, policy_version 35500 (0.0011) -[2026-06-07 02:27:55,744][321791] Fps is (10 sec: 16383.9, 60 sec: 17476.3, 300 sec: 17661.4). Total num frames: 18186240. Throughput: 0: 17712.4. Samples: 18207488. Policy #0 lag: (min: 8.0, avg: 29.7, max: 72.0) -[2026-06-07 02:27:55,745][321791] Avg episode reward: [(0, '1378.863')] -[2026-06-07 02:27:55,896][324563] Updated weights for policy 0, policy_version 35510 (0.0010) -[2026-06-07 02:27:56,110][324563] Updated weights for policy 0, policy_version 35520 (0.0010) -[2026-06-07 02:27:56,319][324563] Updated weights for policy 0, policy_version 35530 (0.0011) -[2026-06-07 02:27:56,504][324563] Updated weights for policy 0, policy_version 35540 (0.0011) -[2026-06-07 02:27:56,579][324276] Saving new best policy, reward=1378.863! -[2026-06-07 02:27:57,286][324563] Updated weights for policy 0, policy_version 35551 (0.0009) -[2026-06-07 02:27:57,498][324563] Updated weights for policy 0, policy_version 35561 (0.0007) -[2026-06-07 02:27:57,687][324563] Updated weights for policy 0, policy_version 35571 (0.0007) -[2026-06-07 02:27:57,905][324563] Updated weights for policy 0, policy_version 35581 (0.0007) -[2026-06-07 02:27:58,107][324563] Updated weights for policy 0, policy_version 35591 (0.0007) -[2026-06-07 02:27:58,331][324563] Updated weights for policy 0, policy_version 35601 (0.0007) -[2026-06-07 02:27:59,065][324563] Updated weights for policy 0, policy_version 35611 (0.0007) -[2026-06-07 02:27:59,269][324563] Updated weights for policy 0, policy_version 35621 (0.0007) -[2026-06-07 02:27:59,470][324563] Updated weights for policy 0, policy_version 35631 (0.0007) -[2026-06-07 02:27:59,691][324563] Updated weights for policy 0, policy_version 35642 (0.0007) -[2026-06-07 02:27:59,900][324563] Updated weights for policy 0, policy_version 35652 (0.0008) -[2026-06-07 02:28:00,122][324563] Updated weights for policy 0, policy_version 35662 (0.0008) -[2026-06-07 02:28:00,335][324563] Updated weights for policy 0, policy_version 35672 (0.0008) -[2026-06-07 02:28:00,744][321791] Fps is (10 sec: 19660.7, 60 sec: 17476.3, 300 sec: 17661.4). Total num frames: 18284544. Throughput: 0: 17840.4. Samples: 18316672. Policy #0 lag: (min: 8.0, avg: 29.7, max: 72.0) -[2026-06-07 02:28:00,745][321791] Avg episode reward: [(0, '1388.191')] -[2026-06-07 02:28:00,751][324276] Saving new best policy, reward=1388.191! -[2026-06-07 02:28:01,112][324563] Updated weights for policy 0, policy_version 35682 (0.0010) -[2026-06-07 02:28:01,317][324563] Updated weights for policy 0, policy_version 35692 (0.0011) -[2026-06-07 02:28:01,535][324563] Updated weights for policy 0, policy_version 35702 (0.0011) -[2026-06-07 02:28:01,753][324563] Updated weights for policy 0, policy_version 35712 (0.0011) -[2026-06-07 02:28:01,979][324563] Updated weights for policy 0, policy_version 35722 (0.0011) -[2026-06-07 02:28:02,184][324563] Updated weights for policy 0, policy_version 35732 (0.0011) -[2026-06-07 02:28:02,906][324563] Updated weights for policy 0, policy_version 35742 (0.0009) -[2026-06-07 02:28:03,135][324563] Updated weights for policy 0, policy_version 35752 (0.0011) -[2026-06-07 02:28:03,331][324563] Updated weights for policy 0, policy_version 35762 (0.0011) -[2026-06-07 02:28:03,533][324563] Updated weights for policy 0, policy_version 35772 (0.0008) -[2026-06-07 02:28:03,742][324563] Updated weights for policy 0, policy_version 35782 (0.0006) -[2026-06-07 02:28:03,962][324563] Updated weights for policy 0, policy_version 35792 (0.0006) -[2026-06-07 02:28:04,686][324563] Updated weights for policy 0, policy_version 35802 (0.0007) -[2026-06-07 02:28:04,893][324563] Updated weights for policy 0, policy_version 35812 (0.0006) -[2026-06-07 02:28:05,103][324563] Updated weights for policy 0, policy_version 35822 (0.0007) -[2026-06-07 02:28:05,309][324563] Updated weights for policy 0, policy_version 35832 (0.0006) -[2026-06-07 02:28:05,551][324563] Updated weights for policy 0, policy_version 35843 (0.0007) -[2026-06-07 02:28:05,744][321791] Fps is (10 sec: 16383.8, 60 sec: 17476.2, 300 sec: 17550.3). Total num frames: 18350080. Throughput: 0: 17757.8. Samples: 18369920. Policy #0 lag: (min: 63.0, avg: 79.0, max: 127.0) -[2026-06-07 02:28:05,745][321791] Avg episode reward: [(0, '1433.563')] -[2026-06-07 02:28:05,759][324563] Updated weights for policy 0, policy_version 35853 (0.0007) -[2026-06-07 02:28:05,982][324563] Updated weights for policy 0, policy_version 35863 (0.0006) -[2026-06-07 02:28:05,997][324276] Saving new best policy, reward=1433.563! -[2026-06-07 02:28:06,726][324563] Updated weights for policy 0, policy_version 35873 (0.0006) -[2026-06-07 02:28:06,935][324563] Updated weights for policy 0, policy_version 35883 (0.0007) -[2026-06-07 02:28:07,136][324563] Updated weights for policy 0, policy_version 35893 (0.0006) -[2026-06-07 02:28:07,325][324563] Updated weights for policy 0, policy_version 35903 (0.0006) -[2026-06-07 02:28:07,551][324563] Updated weights for policy 0, policy_version 35913 (0.0006) -[2026-06-07 02:28:07,774][324563] Updated weights for policy 0, policy_version 35923 (0.0007) -[2026-06-07 02:28:08,484][324563] Updated weights for policy 0, policy_version 35933 (0.0007) -[2026-06-07 02:28:08,692][324563] Updated weights for policy 0, policy_version 35943 (0.0006) -[2026-06-07 02:28:08,908][324563] Updated weights for policy 0, policy_version 35953 (0.0006) -[2026-06-07 02:28:09,123][324563] Updated weights for policy 0, policy_version 35963 (0.0006) -[2026-06-07 02:28:09,315][324563] Updated weights for policy 0, policy_version 35973 (0.0007) -[2026-06-07 02:28:09,525][324563] Updated weights for policy 0, policy_version 35983 (0.0007) -[2026-06-07 02:28:10,274][324563] Updated weights for policy 0, policy_version 35994 (0.0007) -[2026-06-07 02:28:10,484][324563] Updated weights for policy 0, policy_version 36004 (0.0007) -[2026-06-07 02:28:10,701][324563] Updated weights for policy 0, policy_version 36014 (0.0007) -[2026-06-07 02:28:10,744][321791] Fps is (10 sec: 16383.6, 60 sec: 17476.2, 300 sec: 17661.4). Total num frames: 18448384. Throughput: 0: 17757.8. Samples: 18473344. Policy #0 lag: (min: 63.0, avg: 79.0, max: 127.0) -[2026-06-07 02:28:10,745][321791] Avg episode reward: [(0, '1423.414')] -[2026-06-07 02:28:10,908][324563] Updated weights for policy 0, policy_version 36024 (0.0006) -[2026-06-07 02:28:11,130][324563] Updated weights for policy 0, policy_version 36034 (0.0008) -[2026-06-07 02:28:11,308][324563] Updated weights for policy 0, policy_version 36044 (0.0007) -[2026-06-07 02:28:11,523][324563] Updated weights for policy 0, policy_version 36054 (0.0007) -[2026-06-07 02:28:12,292][324563] Updated weights for policy 0, policy_version 36064 (0.0007) -[2026-06-07 02:28:12,507][324563] Updated weights for policy 0, policy_version 36074 (0.0007) -[2026-06-07 02:28:12,707][324563] Updated weights for policy 0, policy_version 36084 (0.0006) -[2026-06-07 02:28:12,899][324563] Updated weights for policy 0, policy_version 36094 (0.0007) -[2026-06-07 02:28:13,109][324563] Updated weights for policy 0, policy_version 36104 (0.0006) -[2026-06-07 02:28:13,342][324563] Updated weights for policy 0, policy_version 36114 (0.0007) -[2026-06-07 02:28:14,113][324563] Updated weights for policy 0, policy_version 36124 (0.0006) -[2026-06-07 02:28:14,302][324563] Updated weights for policy 0, policy_version 36134 (0.0006) -[2026-06-07 02:28:14,518][324563] Updated weights for policy 0, policy_version 36144 (0.0006) -[2026-06-07 02:28:14,728][324563] Updated weights for policy 0, policy_version 36154 (0.0006) -[2026-06-07 02:28:14,971][324563] Updated weights for policy 0, policy_version 36165 (0.0007) -[2026-06-07 02:28:15,192][324563] Updated weights for policy 0, policy_version 36175 (0.0007) -[2026-06-07 02:28:15,744][321791] Fps is (10 sec: 19661.1, 60 sec: 17476.3, 300 sec: 17661.4). Total num frames: 18546688. Throughput: 0: 17814.8. Samples: 18580864. Policy #0 lag: (min: 63.0, avg: 79.0, max: 127.0) -[2026-06-07 02:28:15,745][321791] Avg episode reward: [(0, '1380.358')] -[2026-06-07 02:28:15,933][324563] Updated weights for policy 0, policy_version 36185 (0.0007) -[2026-06-07 02:28:16,157][324563] Updated weights for policy 0, policy_version 36195 (0.0006) -[2026-06-07 02:28:16,357][324563] Updated weights for policy 0, policy_version 36205 (0.0006) -[2026-06-07 02:28:16,582][324563] Updated weights for policy 0, policy_version 36215 (0.0007) -[2026-06-07 02:28:16,799][324563] Updated weights for policy 0, policy_version 36225 (0.0006) -[2026-06-07 02:28:17,010][324563] Updated weights for policy 0, policy_version 36236 (0.0007) -[2026-06-07 02:28:17,224][324563] Updated weights for policy 0, policy_version 36246 (0.0007) -[2026-06-07 02:28:17,928][324563] Updated weights for policy 0, policy_version 36256 (0.0007) -[2026-06-07 02:28:18,161][324563] Updated weights for policy 0, policy_version 36267 (0.0007) -[2026-06-07 02:28:18,375][324563] Updated weights for policy 0, policy_version 36277 (0.0006) -[2026-06-07 02:28:18,600][324563] Updated weights for policy 0, policy_version 36288 (0.0006) -[2026-06-07 02:28:18,828][324563] Updated weights for policy 0, policy_version 36299 (0.0006) -[2026-06-07 02:28:19,026][324563] Updated weights for policy 0, policy_version 36309 (0.0007) -[2026-06-07 02:28:19,798][324563] Updated weights for policy 0, policy_version 36319 (0.0007) -[2026-06-07 02:28:19,997][324563] Updated weights for policy 0, policy_version 36329 (0.0006) -[2026-06-07 02:28:20,203][324563] Updated weights for policy 0, policy_version 36339 (0.0006) -[2026-06-07 02:28:20,412][324563] Updated weights for policy 0, policy_version 36349 (0.0006) -[2026-06-07 02:28:20,621][324563] Updated weights for policy 0, policy_version 36359 (0.0006) -[2026-06-07 02:28:20,744][321791] Fps is (10 sec: 16384.5, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 18612224. Throughput: 0: 17757.9. Samples: 18635264. Policy #0 lag: (min: 63.0, avg: 79.0, max: 127.0) -[2026-06-07 02:28:20,745][321791] Avg episode reward: [(0, '1362.765')] -[2026-06-07 02:28:20,821][324563] Updated weights for policy 0, policy_version 36369 (0.0006) -[2026-06-07 02:28:21,546][324563] Updated weights for policy 0, policy_version 36379 (0.0007) -[2026-06-07 02:28:21,741][324563] Updated weights for policy 0, policy_version 36389 (0.0006) -[2026-06-07 02:28:21,961][324563] Updated weights for policy 0, policy_version 36399 (0.0006) -[2026-06-07 02:28:22,189][324563] Updated weights for policy 0, policy_version 36409 (0.0007) -[2026-06-07 02:28:22,398][324563] Updated weights for policy 0, policy_version 36419 (0.0006) -[2026-06-07 02:28:22,605][324563] Updated weights for policy 0, policy_version 36429 (0.0007) -[2026-06-07 02:28:22,809][324563] Updated weights for policy 0, policy_version 36439 (0.0006) -[2026-06-07 02:28:23,567][324563] Updated weights for policy 0, policy_version 36450 (0.0006) -[2026-06-07 02:28:23,773][324563] Updated weights for policy 0, policy_version 36460 (0.0007) -[2026-06-07 02:28:23,995][324563] Updated weights for policy 0, policy_version 36470 (0.0006) -[2026-06-07 02:28:24,206][324563] Updated weights for policy 0, policy_version 36480 (0.0010) -[2026-06-07 02:28:24,398][324563] Updated weights for policy 0, policy_version 36490 (0.0008) -[2026-06-07 02:28:24,592][324563] Updated weights for policy 0, policy_version 36500 (0.0009) -[2026-06-07 02:28:25,328][324563] Updated weights for policy 0, policy_version 36510 (0.0006) -[2026-06-07 02:28:25,544][324563] Updated weights for policy 0, policy_version 36520 (0.0006) -[2026-06-07 02:28:25,731][324563] Updated weights for policy 0, policy_version 36530 (0.0006) -[2026-06-07 02:28:25,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17661.4). Total num frames: 18710528. Throughput: 0: 17769.2. Samples: 18739456. Policy #0 lag: (min: 63.0, avg: 79.0, max: 127.0) -[2026-06-07 02:28:25,745][321791] Avg episode reward: [(0, '1361.636')] -[2026-06-07 02:28:25,946][324563] Updated weights for policy 0, policy_version 36540 (0.0006) -[2026-06-07 02:28:26,153][324563] Updated weights for policy 0, policy_version 36550 (0.0006) -[2026-06-07 02:28:26,383][324563] Updated weights for policy 0, policy_version 36560 (0.0006) -[2026-06-07 02:28:27,120][324563] Updated weights for policy 0, policy_version 36570 (0.0006) -[2026-06-07 02:28:27,332][324563] Updated weights for policy 0, policy_version 36580 (0.0006) -[2026-06-07 02:28:27,566][324563] Updated weights for policy 0, policy_version 36591 (0.0006) -[2026-06-07 02:28:27,786][324563] Updated weights for policy 0, policy_version 36602 (0.0006) -[2026-06-07 02:28:28,012][324563] Updated weights for policy 0, policy_version 36612 (0.0006) -[2026-06-07 02:28:28,216][324563] Updated weights for policy 0, policy_version 36622 (0.0006) -[2026-06-07 02:28:28,446][324563] Updated weights for policy 0, policy_version 36632 (0.0007) -[2026-06-07 02:28:29,157][324563] Updated weights for policy 0, policy_version 36642 (0.0007) -[2026-06-07 02:28:29,352][324563] Updated weights for policy 0, policy_version 36652 (0.0006) -[2026-06-07 02:28:29,559][324563] Updated weights for policy 0, policy_version 36662 (0.0006) -[2026-06-07 02:28:29,787][324563] Updated weights for policy 0, policy_version 36672 (0.0006) -[2026-06-07 02:28:30,015][324563] Updated weights for policy 0, policy_version 36682 (0.0007) -[2026-06-07 02:28:30,219][324563] Updated weights for policy 0, policy_version 36692 (0.0006) -[2026-06-07 02:28:30,744][321791] Fps is (10 sec: 19660.9, 60 sec: 17476.3, 300 sec: 17661.4). Total num frames: 18808832. Throughput: 0: 17738.0. Samples: 18848128. Policy #0 lag: (min: 62.0, avg: 77.6, max: 126.0) -[2026-06-07 02:28:30,745][321791] Avg episode reward: [(0, '1348.505')] -[2026-06-07 02:28:30,981][324563] Updated weights for policy 0, policy_version 36702 (0.0006) -[2026-06-07 02:28:31,182][324563] Updated weights for policy 0, policy_version 36712 (0.0006) -[2026-06-07 02:28:31,400][324563] Updated weights for policy 0, policy_version 36722 (0.0006) -[2026-06-07 02:28:31,591][324563] Updated weights for policy 0, policy_version 36732 (0.0006) -[2026-06-07 02:28:31,808][324563] Updated weights for policy 0, policy_version 36742 (0.0007) -[2026-06-07 02:28:32,028][324563] Updated weights for policy 0, policy_version 36752 (0.0007) -[2026-06-07 02:28:32,737][324563] Updated weights for policy 0, policy_version 36762 (0.0006) -[2026-06-07 02:28:32,947][324563] Updated weights for policy 0, policy_version 36772 (0.0007) -[2026-06-07 02:28:33,171][324563] Updated weights for policy 0, policy_version 36782 (0.0007) -[2026-06-07 02:28:33,395][324563] Updated weights for policy 0, policy_version 36793 (0.0006) -[2026-06-07 02:28:33,637][324563] Updated weights for policy 0, policy_version 36804 (0.0007) -[2026-06-07 02:28:33,847][324563] Updated weights for policy 0, policy_version 36814 (0.0007) -[2026-06-07 02:28:34,040][324563] Updated weights for policy 0, policy_version 36824 (0.0006) -[2026-06-07 02:28:34,793][324563] Updated weights for policy 0, policy_version 36834 (0.0006) -[2026-06-07 02:28:35,025][324563] Updated weights for policy 0, policy_version 36845 (0.0007) -[2026-06-07 02:28:35,237][324563] Updated weights for policy 0, policy_version 36855 (0.0008) -[2026-06-07 02:28:35,441][324563] Updated weights for policy 0, policy_version 36865 (0.0007) -[2026-06-07 02:28:35,647][324563] Updated weights for policy 0, policy_version 36875 (0.0006) -[2026-06-07 02:28:35,744][321791] Fps is (10 sec: 16383.9, 60 sec: 17476.3, 300 sec: 17550.4). Total num frames: 18874368. Throughput: 0: 17715.2. Samples: 18899968. Policy #0 lag: (min: 62.0, avg: 77.6, max: 126.0) -[2026-06-07 02:28:35,745][321791] Avg episode reward: [(0, '1341.921')] -[2026-06-07 02:28:35,841][324563] Updated weights for policy 0, policy_version 36885 (0.0006) -[2026-06-07 02:28:36,583][324563] Updated weights for policy 0, policy_version 36895 (0.0006) -[2026-06-07 02:28:36,794][324563] Updated weights for policy 0, policy_version 36905 (0.0007) -[2026-06-07 02:28:37,017][324563] Updated weights for policy 0, policy_version 36915 (0.0007) -[2026-06-07 02:28:37,235][324563] Updated weights for policy 0, policy_version 36926 (0.0006) -[2026-06-07 02:28:37,451][324563] Updated weights for policy 0, policy_version 36936 (0.0007) -[2026-06-07 02:28:37,663][324563] Updated weights for policy 0, policy_version 36946 (0.0006) -[2026-06-07 02:28:38,383][324563] Updated weights for policy 0, policy_version 36956 (0.0006) -[2026-06-07 02:28:38,590][324563] Updated weights for policy 0, policy_version 36966 (0.0007) -[2026-06-07 02:28:38,799][324563] Updated weights for policy 0, policy_version 36976 (0.0006) -[2026-06-07 02:28:39,020][324563] Updated weights for policy 0, policy_version 36986 (0.0007) -[2026-06-07 02:28:39,236][324563] Updated weights for policy 0, policy_version 36996 (0.0006) -[2026-06-07 02:28:39,433][324563] Updated weights for policy 0, policy_version 37006 (0.0007) -[2026-06-07 02:28:39,660][324563] Updated weights for policy 0, policy_version 37016 (0.0006) -[2026-06-07 02:28:40,400][324563] Updated weights for policy 0, policy_version 37026 (0.0011) -[2026-06-07 02:28:40,614][324563] Updated weights for policy 0, policy_version 37036 (0.0011) -[2026-06-07 02:28:40,744][321791] Fps is (10 sec: 16383.9, 60 sec: 17476.3, 300 sec: 17661.4). Total num frames: 18972672. Throughput: 0: 17655.5. Samples: 19001984. Policy #0 lag: (min: 62.0, avg: 77.6, max: 126.0) -[2026-06-07 02:28:40,745][321791] Avg episode reward: [(0, '1309.999')] -[2026-06-07 02:28:40,807][324563] Updated weights for policy 0, policy_version 37046 (0.0011) -[2026-06-07 02:28:41,003][324563] Updated weights for policy 0, policy_version 37056 (0.0011) -[2026-06-07 02:28:41,223][324563] Updated weights for policy 0, policy_version 37066 (0.0011) -[2026-06-07 02:28:41,444][324563] Updated weights for policy 0, policy_version 37076 (0.0006) -[2026-06-07 02:28:42,187][324563] Updated weights for policy 0, policy_version 37086 (0.0007) -[2026-06-07 02:28:42,396][324563] Updated weights for policy 0, policy_version 37096 (0.0006) -[2026-06-07 02:28:42,619][324563] Updated weights for policy 0, policy_version 37106 (0.0007) -[2026-06-07 02:28:42,821][324563] Updated weights for policy 0, policy_version 37116 (0.0006) -[2026-06-07 02:28:43,028][324563] Updated weights for policy 0, policy_version 37126 (0.0006) -[2026-06-07 02:28:43,257][324563] Updated weights for policy 0, policy_version 37136 (0.0006) -[2026-06-07 02:28:44,007][324563] Updated weights for policy 0, policy_version 37146 (0.0006) -[2026-06-07 02:28:44,207][324563] Updated weights for policy 0, policy_version 37156 (0.0006) -[2026-06-07 02:28:44,408][324563] Updated weights for policy 0, policy_version 37166 (0.0006) -[2026-06-07 02:28:44,623][324563] Updated weights for policy 0, policy_version 37176 (0.0006) -[2026-06-07 02:28:44,851][324563] Updated weights for policy 0, policy_version 37186 (0.0006) -[2026-06-07 02:28:45,061][324563] Updated weights for policy 0, policy_version 37196 (0.0006) -[2026-06-07 02:28:45,263][324563] Updated weights for policy 0, policy_version 37206 (0.0006) -[2026-06-07 02:28:45,744][321791] Fps is (10 sec: 19660.6, 60 sec: 17476.2, 300 sec: 17661.4). Total num frames: 19070976. Throughput: 0: 17652.6. Samples: 19111040. Policy #0 lag: (min: 62.0, avg: 77.6, max: 126.0) -[2026-06-07 02:28:45,746][321791] Avg episode reward: [(0, '1286.681')] -[2026-06-07 02:28:45,989][324563] Updated weights for policy 0, policy_version 37216 (0.0006) -[2026-06-07 02:28:46,213][324563] Updated weights for policy 0, policy_version 37226 (0.0007) -[2026-06-07 02:28:46,431][324563] Updated weights for policy 0, policy_version 37236 (0.0006) -[2026-06-07 02:28:46,631][324563] Updated weights for policy 0, policy_version 37246 (0.0006) -[2026-06-07 02:28:46,824][324563] Updated weights for policy 0, policy_version 37256 (0.0006) -[2026-06-07 02:28:47,049][324563] Updated weights for policy 0, policy_version 37266 (0.0006) -[2026-06-07 02:28:47,751][324563] Updated weights for policy 0, policy_version 37276 (0.0008) -[2026-06-07 02:28:47,953][324563] Updated weights for policy 0, policy_version 37286 (0.0011) -[2026-06-07 02:28:48,168][324563] Updated weights for policy 0, policy_version 37296 (0.0011) -[2026-06-07 02:28:48,375][324563] Updated weights for policy 0, policy_version 37306 (0.0011) -[2026-06-07 02:28:48,587][324563] Updated weights for policy 0, policy_version 37316 (0.0009) -[2026-06-07 02:28:48,813][324563] Updated weights for policy 0, policy_version 37326 (0.0011) -[2026-06-07 02:28:49,007][324563] Updated weights for policy 0, policy_version 37336 (0.0011) -[2026-06-07 02:28:49,754][324563] Updated weights for policy 0, policy_version 37346 (0.0010) -[2026-06-07 02:28:49,975][324563] Updated weights for policy 0, policy_version 37356 (0.0010) -[2026-06-07 02:28:50,194][324563] Updated weights for policy 0, policy_version 37366 (0.0007) -[2026-06-07 02:28:50,377][324563] Updated weights for policy 0, policy_version 37376 (0.0007) -[2026-06-07 02:28:50,584][324563] Updated weights for policy 0, policy_version 37386 (0.0006) -[2026-06-07 02:28:50,744][321791] Fps is (10 sec: 16384.1, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 19136512. Throughput: 0: 17590.1. Samples: 19161472. Policy #0 lag: (min: 62.0, avg: 77.6, max: 126.0) -[2026-06-07 02:28:50,745][321791] Avg episode reward: [(0, '1285.778')] -[2026-06-07 02:28:50,784][324563] Updated weights for policy 0, policy_version 37396 (0.0007) -[2026-06-07 02:28:51,422][324276] KL-divergence is very high: 102.6736 -[2026-06-07 02:28:51,547][324563] Updated weights for policy 0, policy_version 37406 (0.0006) -[2026-06-07 02:28:51,600][324276] KL-divergence is very high: 102.7042 -[2026-06-07 02:28:51,784][324563] Updated weights for policy 0, policy_version 37416 (0.0006) -[2026-06-07 02:28:51,787][324276] KL-divergence is very high: 102.6192 -[2026-06-07 02:28:51,961][324276] KL-divergence is very high: 102.7928 -[2026-06-07 02:28:52,012][324563] Updated weights for policy 0, policy_version 37427 (0.0007) -[2026-06-07 02:28:52,121][324276] KL-divergence is very high: 102.7924 -[2026-06-07 02:28:52,261][324563] Updated weights for policy 0, policy_version 37438 (0.0006) -[2026-06-07 02:28:52,302][324276] KL-divergence is very high: 102.7180 -[2026-06-07 02:28:52,475][324563] Updated weights for policy 0, policy_version 37448 (0.0007) -[2026-06-07 02:28:52,481][324276] KL-divergence is very high: 102.8099 -[2026-06-07 02:28:52,644][324276] KL-divergence is very high: 102.8698 -[2026-06-07 02:28:52,691][324563] Updated weights for policy 0, policy_version 37458 (0.0007) -[2026-06-07 02:28:53,401][324563] Updated weights for policy 0, policy_version 37468 (0.0007) -[2026-06-07 02:28:53,621][324563] Updated weights for policy 0, policy_version 37478 (0.0007) -[2026-06-07 02:28:53,808][324563] Updated weights for policy 0, policy_version 37488 (0.0006) -[2026-06-07 02:28:54,009][324563] Updated weights for policy 0, policy_version 37498 (0.0007) -[2026-06-07 02:28:54,202][324563] Updated weights for policy 0, policy_version 37508 (0.0006) -[2026-06-07 02:28:54,419][324563] Updated weights for policy 0, policy_version 37518 (0.0007) -[2026-06-07 02:28:54,637][324563] Updated weights for policy 0, policy_version 37528 (0.0007) -[2026-06-07 02:28:55,344][324563] Updated weights for policy 0, policy_version 37538 (0.0006) -[2026-06-07 02:28:55,549][324563] Updated weights for policy 0, policy_version 37548 (0.0007) -[2026-06-07 02:28:55,744][321791] Fps is (10 sec: 16383.8, 60 sec: 17476.2, 300 sec: 17550.3). Total num frames: 19234816. Throughput: 0: 17598.6. Samples: 19265280. Policy #0 lag: (min: 127.0, avg: 139.8, max: 189.0) -[2026-06-07 02:28:55,745][321791] Avg episode reward: [(0, '1326.501')] -[2026-06-07 02:28:55,788][324563] Updated weights for policy 0, policy_version 37559 (0.0007) -[2026-06-07 02:28:56,000][324563] Updated weights for policy 0, policy_version 37569 (0.0006) -[2026-06-07 02:28:56,205][324563] Updated weights for policy 0, policy_version 37579 (0.0006) -[2026-06-07 02:28:56,431][324563] Updated weights for policy 0, policy_version 37590 (0.0006) -[2026-06-07 02:28:57,159][324563] Updated weights for policy 0, policy_version 37600 (0.0006) -[2026-06-07 02:28:57,365][324563] Updated weights for policy 0, policy_version 37610 (0.0006) -[2026-06-07 02:28:57,562][324563] Updated weights for policy 0, policy_version 37620 (0.0006) -[2026-06-07 02:28:57,761][324563] Updated weights for policy 0, policy_version 37630 (0.0006) -[2026-06-07 02:28:57,980][324563] Updated weights for policy 0, policy_version 37640 (0.0007) -[2026-06-07 02:28:58,185][324563] Updated weights for policy 0, policy_version 37650 (0.0006) -[2026-06-07 02:28:58,935][324563] Updated weights for policy 0, policy_version 37660 (0.0006) -[2026-06-07 02:28:59,139][324563] Updated weights for policy 0, policy_version 37670 (0.0006) -[2026-06-07 02:28:59,327][324563] Updated weights for policy 0, policy_version 37680 (0.0006) -[2026-06-07 02:28:59,551][324563] Updated weights for policy 0, policy_version 37690 (0.0007) -[2026-06-07 02:28:59,772][324563] Updated weights for policy 0, policy_version 37700 (0.0007) -[2026-06-07 02:28:59,955][324563] Updated weights for policy 0, policy_version 37710 (0.0006) -[2026-06-07 02:29:00,143][324563] Updated weights for policy 0, policy_version 37720 (0.0007) -[2026-06-07 02:29:00,744][321791] Fps is (10 sec: 19660.8, 60 sec: 17476.3, 300 sec: 17661.4). Total num frames: 19333120. Throughput: 0: 17729.4. Samples: 19378688. Policy #0 lag: (min: 127.0, avg: 139.8, max: 189.0) -[2026-06-07 02:29:00,745][321791] Avg episode reward: [(0, '1327.905')] -[2026-06-07 02:29:00,894][324563] Updated weights for policy 0, policy_version 37730 (0.0006) -[2026-06-07 02:29:01,117][324563] Updated weights for policy 0, policy_version 37740 (0.0007) -[2026-06-07 02:29:01,314][324563] Updated weights for policy 0, policy_version 37750 (0.0006) -[2026-06-07 02:29:01,509][324563] Updated weights for policy 0, policy_version 37760 (0.0006) -[2026-06-07 02:29:01,715][324563] Updated weights for policy 0, policy_version 37770 (0.0006) -[2026-06-07 02:29:01,940][324563] Updated weights for policy 0, policy_version 37781 (0.0006) -[2026-06-07 02:29:02,662][324563] Updated weights for policy 0, policy_version 37791 (0.0006) -[2026-06-07 02:29:02,889][324563] Updated weights for policy 0, policy_version 37801 (0.0007) -[2026-06-07 02:29:03,095][324563] Updated weights for policy 0, policy_version 37811 (0.0006) -[2026-06-07 02:29:03,321][324563] Updated weights for policy 0, policy_version 37821 (0.0007) -[2026-06-07 02:29:03,527][324563] Updated weights for policy 0, policy_version 37831 (0.0007) -[2026-06-07 02:29:03,752][324563] Updated weights for policy 0, policy_version 37841 (0.0007) -[2026-06-07 02:29:04,470][324563] Updated weights for policy 0, policy_version 37851 (0.0006) -[2026-06-07 02:29:04,663][324563] Updated weights for policy 0, policy_version 37861 (0.0006) -[2026-06-07 02:29:04,892][324563] Updated weights for policy 0, policy_version 37871 (0.0006) -[2026-06-07 02:29:05,088][324563] Updated weights for policy 0, policy_version 37881 (0.0006) -[2026-06-07 02:29:05,305][324563] Updated weights for policy 0, policy_version 37891 (0.0006) -[2026-06-07 02:29:05,521][324563] Updated weights for policy 0, policy_version 37901 (0.0007) -[2026-06-07 02:29:05,721][324563] Updated weights for policy 0, policy_version 37911 (0.0006) -[2026-06-07 02:29:05,744][321791] Fps is (10 sec: 19661.3, 60 sec: 18022.4, 300 sec: 17661.4). Total num frames: 19431424. Throughput: 0: 17573.0. Samples: 19426048. Policy #0 lag: (min: 127.0, avg: 139.8, max: 189.0) -[2026-06-07 02:29:05,745][321791] Avg episode reward: [(0, '1309.226')] -[2026-06-07 02:29:06,444][324563] Updated weights for policy 0, policy_version 37921 (0.0007) -[2026-06-07 02:29:06,649][324563] Updated weights for policy 0, policy_version 37931 (0.0006) -[2026-06-07 02:29:06,866][324563] Updated weights for policy 0, policy_version 37941 (0.0006) -[2026-06-07 02:29:07,055][324563] Updated weights for policy 0, policy_version 37951 (0.0006) -[2026-06-07 02:29:07,272][324563] Updated weights for policy 0, policy_version 37961 (0.0006) -[2026-06-07 02:29:07,513][324563] Updated weights for policy 0, policy_version 37972 (0.0006) -[2026-06-07 02:29:08,259][324563] Updated weights for policy 0, policy_version 37982 (0.0007) -[2026-06-07 02:29:08,464][324563] Updated weights for policy 0, policy_version 37992 (0.0007) -[2026-06-07 02:29:08,665][324563] Updated weights for policy 0, policy_version 38002 (0.0007) -[2026-06-07 02:29:08,869][324563] Updated weights for policy 0, policy_version 38012 (0.0006) -[2026-06-07 02:29:09,072][324563] Updated weights for policy 0, policy_version 38022 (0.0007) -[2026-06-07 02:29:09,291][324563] Updated weights for policy 0, policy_version 38032 (0.0006) -[2026-06-07 02:29:10,031][324563] Updated weights for policy 0, policy_version 38042 (0.0006) -[2026-06-07 02:29:10,252][324563] Updated weights for policy 0, policy_version 38052 (0.0007) -[2026-06-07 02:29:10,449][324563] Updated weights for policy 0, policy_version 38062 (0.0007) -[2026-06-07 02:29:10,656][324563] Updated weights for policy 0, policy_version 38072 (0.0006) -[2026-06-07 02:29:10,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 19496960. Throughput: 0: 17547.4. Samples: 19529088. Policy #0 lag: (min: 127.0, avg: 139.8, max: 189.0) -[2026-06-07 02:29:10,745][321791] Avg episode reward: [(0, '1315.540')] -[2026-06-07 02:29:10,869][324563] Updated weights for policy 0, policy_version 38082 (0.0006) -[2026-06-07 02:29:11,069][324563] Updated weights for policy 0, policy_version 38092 (0.0006) -[2026-06-07 02:29:11,282][324563] Updated weights for policy 0, policy_version 38102 (0.0006) -[2026-06-07 02:29:12,039][324563] Updated weights for policy 0, policy_version 38112 (0.0007) -[2026-06-07 02:29:12,263][324563] Updated weights for policy 0, policy_version 38123 (0.0007) -[2026-06-07 02:29:12,468][324563] Updated weights for policy 0, policy_version 38133 (0.0006) -[2026-06-07 02:29:12,664][324563] Updated weights for policy 0, policy_version 38143 (0.0006) -[2026-06-07 02:29:12,872][324563] Updated weights for policy 0, policy_version 38153 (0.0006) -[2026-06-07 02:29:13,072][324563] Updated weights for policy 0, policy_version 38163 (0.0007) -[2026-06-07 02:29:13,826][324563] Updated weights for policy 0, policy_version 38173 (0.0007) -[2026-06-07 02:29:14,049][324563] Updated weights for policy 0, policy_version 38183 (0.0007) -[2026-06-07 02:29:14,262][324563] Updated weights for policy 0, policy_version 38193 (0.0006) -[2026-06-07 02:29:14,459][324563] Updated weights for policy 0, policy_version 38203 (0.0006) -[2026-06-07 02:29:14,664][324563] Updated weights for policy 0, policy_version 38213 (0.0007) -[2026-06-07 02:29:14,853][324563] Updated weights for policy 0, policy_version 38223 (0.0007) -[2026-06-07 02:29:15,606][324563] Updated weights for policy 0, policy_version 38233 (0.0007) -[2026-06-07 02:29:15,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17661.4). Total num frames: 19595264. Throughput: 0: 17675.4. Samples: 19643520. Policy #0 lag: (min: 127.0, avg: 139.8, max: 189.0) -[2026-06-07 02:29:15,745][321791] Avg episode reward: [(0, '1332.229')] -[2026-06-07 02:29:15,825][324563] Updated weights for policy 0, policy_version 38243 (0.0006) -[2026-06-07 02:29:16,046][324563] Updated weights for policy 0, policy_version 38253 (0.0007) -[2026-06-07 02:29:16,266][324563] Updated weights for policy 0, policy_version 38264 (0.0007) -[2026-06-07 02:29:16,500][324563] Updated weights for policy 0, policy_version 38274 (0.0006) -[2026-06-07 02:29:16,700][324563] Updated weights for policy 0, policy_version 38284 (0.0009) -[2026-06-07 02:29:16,910][324563] Updated weights for policy 0, policy_version 38294 (0.0011) -[2026-06-07 02:29:17,627][324563] Updated weights for policy 0, policy_version 38304 (0.0007) -[2026-06-07 02:29:17,846][324563] Updated weights for policy 0, policy_version 38314 (0.0007) -[2026-06-07 02:29:18,055][324563] Updated weights for policy 0, policy_version 38324 (0.0006) -[2026-06-07 02:29:18,261][324563] Updated weights for policy 0, policy_version 38334 (0.0007) -[2026-06-07 02:29:18,451][324563] Updated weights for policy 0, policy_version 38344 (0.0007) -[2026-06-07 02:29:18,674][324563] Updated weights for policy 0, policy_version 38354 (0.0006) -[2026-06-07 02:29:19,413][324563] Updated weights for policy 0, policy_version 38364 (0.0007) -[2026-06-07 02:29:19,604][324563] Updated weights for policy 0, policy_version 38374 (0.0009) -[2026-06-07 02:29:19,806][324563] Updated weights for policy 0, policy_version 38384 (0.0008) -[2026-06-07 02:29:20,017][324563] Updated weights for policy 0, policy_version 38394 (0.0010) -[2026-06-07 02:29:20,245][324563] Updated weights for policy 0, policy_version 38404 (0.0011) -[2026-06-07 02:29:20,456][324563] Updated weights for policy 0, policy_version 38414 (0.0011) -[2026-06-07 02:29:20,683][324563] Updated weights for policy 0, policy_version 38424 (0.0010) -[2026-06-07 02:29:20,744][321791] Fps is (10 sec: 19660.6, 60 sec: 18022.4, 300 sec: 17661.4). Total num frames: 19693568. Throughput: 0: 17501.8. Samples: 19687552. Policy #0 lag: (min: 85.0, avg: 108.1, max: 154.0) -[2026-06-07 02:29:20,745][321791] Avg episode reward: [(0, '1305.649')] -[2026-06-07 02:29:21,421][324563] Updated weights for policy 0, policy_version 38434 (0.0007) -[2026-06-07 02:29:21,634][324563] Updated weights for policy 0, policy_version 38444 (0.0006) -[2026-06-07 02:29:21,858][324563] Updated weights for policy 0, policy_version 38455 (0.0006) -[2026-06-07 02:29:22,058][324563] Updated weights for policy 0, policy_version 38465 (0.0006) -[2026-06-07 02:29:22,282][324563] Updated weights for policy 0, policy_version 38476 (0.0007) -[2026-06-07 02:29:22,483][324563] Updated weights for policy 0, policy_version 38486 (0.0007) -[2026-06-07 02:29:23,225][324563] Updated weights for policy 0, policy_version 38496 (0.0006) -[2026-06-07 02:29:23,459][324563] Updated weights for policy 0, policy_version 38507 (0.0007) -[2026-06-07 02:29:23,716][324563] Updated weights for policy 0, policy_version 38518 (0.0006) -[2026-06-07 02:29:23,910][324563] Updated weights for policy 0, policy_version 38528 (0.0007) -[2026-06-07 02:29:24,130][324563] Updated weights for policy 0, policy_version 38538 (0.0007) -[2026-06-07 02:29:24,359][324563] Updated weights for policy 0, policy_version 38548 (0.0006) -[2026-06-07 02:29:25,105][324563] Updated weights for policy 0, policy_version 38559 (0.0007) -[2026-06-07 02:29:25,333][324563] Updated weights for policy 0, policy_version 38570 (0.0006) -[2026-06-07 02:29:25,526][324563] Updated weights for policy 0, policy_version 38580 (0.0007) -[2026-06-07 02:29:25,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 19759104. Throughput: 0: 17567.3. Samples: 19792512. Policy #0 lag: (min: 85.0, avg: 108.1, max: 154.0) -[2026-06-07 02:29:25,745][321791] Avg episode reward: [(0, '1289.300')] -[2026-06-07 02:29:25,766][324563] Updated weights for policy 0, policy_version 38591 (0.0007) -[2026-06-07 02:29:25,979][324563] Updated weights for policy 0, policy_version 38601 (0.0007) -[2026-06-07 02:29:26,203][324563] Updated weights for policy 0, policy_version 38611 (0.0006) -[2026-06-07 02:29:26,954][324563] Updated weights for policy 0, policy_version 38621 (0.0006) -[2026-06-07 02:29:27,179][324563] Updated weights for policy 0, policy_version 38631 (0.0006) -[2026-06-07 02:29:27,395][324563] Updated weights for policy 0, policy_version 38641 (0.0007) -[2026-06-07 02:29:27,613][324563] Updated weights for policy 0, policy_version 38651 (0.0006) -[2026-06-07 02:29:27,815][324563] Updated weights for policy 0, policy_version 38661 (0.0006) -[2026-06-07 02:29:28,030][324563] Updated weights for policy 0, policy_version 38671 (0.0007) -[2026-06-07 02:29:28,768][324563] Updated weights for policy 0, policy_version 38681 (0.0007) -[2026-06-07 02:29:28,982][324563] Updated weights for policy 0, policy_version 38691 (0.0006) -[2026-06-07 02:29:29,189][324563] Updated weights for policy 0, policy_version 38701 (0.0007) -[2026-06-07 02:29:29,395][324563] Updated weights for policy 0, policy_version 38711 (0.0006) -[2026-06-07 02:29:29,595][324563] Updated weights for policy 0, policy_version 38721 (0.0007) -[2026-06-07 02:29:29,796][324563] Updated weights for policy 0, policy_version 38731 (0.0006) -[2026-06-07 02:29:29,993][324563] Updated weights for policy 0, policy_version 38741 (0.0006) -[2026-06-07 02:29:30,733][324563] Updated weights for policy 0, policy_version 38752 (0.0007) -[2026-06-07 02:29:30,744][321791] Fps is (10 sec: 16384.2, 60 sec: 17476.3, 300 sec: 17661.4). Total num frames: 19857408. Throughput: 0: 17672.6. Samples: 19906304. Policy #0 lag: (min: 85.0, avg: 108.1, max: 154.0) -[2026-06-07 02:29:30,745][321791] Avg episode reward: [(0, '1281.462')] -[2026-06-07 02:29:30,960][324563] Updated weights for policy 0, policy_version 38762 (0.0006) -[2026-06-07 02:29:31,166][324563] Updated weights for policy 0, policy_version 38772 (0.0007) -[2026-06-07 02:29:31,375][324563] Updated weights for policy 0, policy_version 38782 (0.0008) -[2026-06-07 02:29:31,571][324563] Updated weights for policy 0, policy_version 38792 (0.0008) -[2026-06-07 02:29:31,791][324563] Updated weights for policy 0, policy_version 38802 (0.0008) -[2026-06-07 02:29:32,547][324563] Updated weights for policy 0, policy_version 38812 (0.0007) -[2026-06-07 02:29:32,762][324563] Updated weights for policy 0, policy_version 38822 (0.0006) -[2026-06-07 02:29:32,961][324563] Updated weights for policy 0, policy_version 38832 (0.0006) -[2026-06-07 02:29:33,172][324563] Updated weights for policy 0, policy_version 38842 (0.0007) -[2026-06-07 02:29:33,372][324563] Updated weights for policy 0, policy_version 38852 (0.0007) -[2026-06-07 02:29:33,576][324563] Updated weights for policy 0, policy_version 38862 (0.0007) -[2026-06-07 02:29:33,801][324563] Updated weights for policy 0, policy_version 38872 (0.0006) -[2026-06-07 02:29:34,533][324563] Updated weights for policy 0, policy_version 38883 (0.0006) -[2026-06-07 02:29:34,726][324563] Updated weights for policy 0, policy_version 38893 (0.0006) -[2026-06-07 02:29:34,923][324563] Updated weights for policy 0, policy_version 38903 (0.0006) -[2026-06-07 02:29:35,129][324563] Updated weights for policy 0, policy_version 38913 (0.0007) -[2026-06-07 02:29:35,343][324563] Updated weights for policy 0, policy_version 38923 (0.0006) -[2026-06-07 02:29:35,531][324563] Updated weights for policy 0, policy_version 38933 (0.0006) -[2026-06-07 02:29:35,744][321791] Fps is (10 sec: 19660.5, 60 sec: 18022.4, 300 sec: 17661.4). Total num frames: 19955712. Throughput: 0: 17564.4. Samples: 19951872. Policy #0 lag: (min: 85.0, avg: 108.1, max: 154.0) -[2026-06-07 02:29:35,745][321791] Avg episode reward: [(0, '1310.473')] -[2026-06-07 02:29:36,264][324563] Updated weights for policy 0, policy_version 38943 (0.0006) -[2026-06-07 02:29:36,478][324563] Updated weights for policy 0, policy_version 38953 (0.0006) -[2026-06-07 02:29:36,670][324563] Updated weights for policy 0, policy_version 38963 (0.0006) -[2026-06-07 02:29:36,876][324563] Updated weights for policy 0, policy_version 38973 (0.0006) -[2026-06-07 02:29:37,083][324563] Updated weights for policy 0, policy_version 38983 (0.0006) -[2026-06-07 02:29:37,321][324563] Updated weights for policy 0, policy_version 38993 (0.0006) -[2026-06-07 02:29:38,022][324563] Updated weights for policy 0, policy_version 39003 (0.0006) -[2026-06-07 02:29:38,236][324563] Updated weights for policy 0, policy_version 39013 (0.0006) -[2026-06-07 02:29:38,444][324563] Updated weights for policy 0, policy_version 39023 (0.0007) -[2026-06-07 02:29:38,653][324563] Updated weights for policy 0, policy_version 39033 (0.0007) -[2026-06-07 02:29:38,848][324563] Updated weights for policy 0, policy_version 39043 (0.0006) -[2026-06-07 02:29:39,039][324563] Updated weights for policy 0, policy_version 39053 (0.0006) -[2026-06-07 02:29:39,256][324563] Updated weights for policy 0, policy_version 39063 (0.0007) -[2026-06-07 02:29:39,975][324563] Updated weights for policy 0, policy_version 39073 (0.0006) -[2026-06-07 02:29:40,204][324563] Updated weights for policy 0, policy_version 39083 (0.0006) -[2026-06-07 02:29:40,394][324563] Updated weights for policy 0, policy_version 39093 (0.0007) -[2026-06-07 02:29:40,630][324563] Updated weights for policy 0, policy_version 39103 (0.0006) -[2026-06-07 02:29:40,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 20021248. Throughput: 0: 17698.2. Samples: 20061696. Policy #0 lag: (min: 85.0, avg: 108.1, max: 154.0) -[2026-06-07 02:29:40,745][321791] Avg episode reward: [(0, '1275.778')] -[2026-06-07 02:29:40,852][324563] Updated weights for policy 0, policy_version 39113 (0.0006) -[2026-06-07 02:29:41,073][324563] Updated weights for policy 0, policy_version 39123 (0.0006) -[2026-06-07 02:29:41,765][324563] Updated weights for policy 0, policy_version 39133 (0.0006) -[2026-06-07 02:29:41,956][324563] Updated weights for policy 0, policy_version 39143 (0.0006) -[2026-06-07 02:29:42,158][324563] Updated weights for policy 0, policy_version 39153 (0.0006) -[2026-06-07 02:29:42,354][324563] Updated weights for policy 0, policy_version 39163 (0.0006) -[2026-06-07 02:29:42,561][324563] Updated weights for policy 0, policy_version 39173 (0.0006) -[2026-06-07 02:29:42,781][324563] Updated weights for policy 0, policy_version 39183 (0.0007) -[2026-06-07 02:29:43,518][324563] Updated weights for policy 0, policy_version 39193 (0.0006) -[2026-06-07 02:29:43,704][324563] Updated weights for policy 0, policy_version 39203 (0.0006) -[2026-06-07 02:29:43,944][324563] Updated weights for policy 0, policy_version 39214 (0.0007) -[2026-06-07 02:29:44,170][324563] Updated weights for policy 0, policy_version 39224 (0.0007) -[2026-06-07 02:29:44,379][324563] Updated weights for policy 0, policy_version 39234 (0.0007) -[2026-06-07 02:29:44,592][324563] Updated weights for policy 0, policy_version 39244 (0.0006) -[2026-06-07 02:29:44,815][324563] Updated weights for policy 0, policy_version 39254 (0.0006) -[2026-06-07 02:29:45,540][324563] Updated weights for policy 0, policy_version 39264 (0.0006) -[2026-06-07 02:29:45,744][321791] Fps is (10 sec: 16384.2, 60 sec: 17476.3, 300 sec: 17661.4). Total num frames: 20119552. Throughput: 0: 17547.4. Samples: 20168320. Policy #0 lag: (min: 85.0, avg: 108.1, max: 154.0) -[2026-06-07 02:29:45,745][321791] Avg episode reward: [(0, '1318.861')] -[2026-06-07 02:29:45,747][324563] Updated weights for policy 0, policy_version 39274 (0.0006) -[2026-06-07 02:29:45,956][324563] Updated weights for policy 0, policy_version 39284 (0.0007) -[2026-06-07 02:29:46,168][324563] Updated weights for policy 0, policy_version 39294 (0.0006) -[2026-06-07 02:29:46,396][324563] Updated weights for policy 0, policy_version 39304 (0.0007) -[2026-06-07 02:29:46,617][324563] Updated weights for policy 0, policy_version 39314 (0.0007) -[2026-06-07 02:29:47,324][324563] Updated weights for policy 0, policy_version 39324 (0.0006) -[2026-06-07 02:29:47,542][324563] Updated weights for policy 0, policy_version 39334 (0.0006) -[2026-06-07 02:29:47,753][324563] Updated weights for policy 0, policy_version 39344 (0.0007) -[2026-06-07 02:29:47,949][324563] Updated weights for policy 0, policy_version 39354 (0.0006) -[2026-06-07 02:29:48,176][324563] Updated weights for policy 0, policy_version 39364 (0.0007) -[2026-06-07 02:29:48,381][324563] Updated weights for policy 0, policy_version 39374 (0.0007) -[2026-06-07 02:29:48,605][324563] Updated weights for policy 0, policy_version 39384 (0.0006) -[2026-06-07 02:29:49,299][324563] Updated weights for policy 0, policy_version 39394 (0.0006) -[2026-06-07 02:29:49,502][324563] Updated weights for policy 0, policy_version 39404 (0.0006) -[2026-06-07 02:29:49,714][324563] Updated weights for policy 0, policy_version 39414 (0.0007) -[2026-06-07 02:29:49,925][324563] Updated weights for policy 0, policy_version 39424 (0.0006) -[2026-06-07 02:29:50,130][324563] Updated weights for policy 0, policy_version 39434 (0.0006) -[2026-06-07 02:29:50,316][324563] Updated weights for policy 0, policy_version 39444 (0.0006) -[2026-06-07 02:29:50,744][321791] Fps is (10 sec: 19660.6, 60 sec: 18022.4, 300 sec: 17661.4). Total num frames: 20217856. Throughput: 0: 17490.4. Samples: 20213120. Policy #0 lag: (min: 88.0, avg: 111.2, max: 153.0) -[2026-06-07 02:29:50,745][321791] Avg episode reward: [(0, '1333.453')] -[2026-06-07 02:29:51,051][324563] Updated weights for policy 0, policy_version 39454 (0.0006) -[2026-06-07 02:29:51,245][324563] Updated weights for policy 0, policy_version 39464 (0.0006) -[2026-06-07 02:29:51,451][324563] Updated weights for policy 0, policy_version 39474 (0.0006) -[2026-06-07 02:29:51,659][324563] Updated weights for policy 0, policy_version 39484 (0.0006) -[2026-06-07 02:29:51,878][324563] Updated weights for policy 0, policy_version 39494 (0.0007) -[2026-06-07 02:29:52,083][324563] Updated weights for policy 0, policy_version 39504 (0.0006) -[2026-06-07 02:29:52,816][324563] Updated weights for policy 0, policy_version 39514 (0.0006) -[2026-06-07 02:29:53,021][324563] Updated weights for policy 0, policy_version 39524 (0.0006) -[2026-06-07 02:29:53,257][324563] Updated weights for policy 0, policy_version 39534 (0.0006) -[2026-06-07 02:29:53,464][324563] Updated weights for policy 0, policy_version 39544 (0.0006) -[2026-06-07 02:29:53,656][324563] Updated weights for policy 0, policy_version 39554 (0.0006) -[2026-06-07 02:29:53,856][324563] Updated weights for policy 0, policy_version 39564 (0.0006) -[2026-06-07 02:29:54,059][324563] Updated weights for policy 0, policy_version 39574 (0.0006) -[2026-06-07 02:29:54,812][324563] Updated weights for policy 0, policy_version 39584 (0.0006) -[2026-06-07 02:29:55,029][324563] Updated weights for policy 0, policy_version 39594 (0.0006) -[2026-06-07 02:29:55,234][324563] Updated weights for policy 0, policy_version 39604 (0.0009) -[2026-06-07 02:29:55,436][324563] Updated weights for policy 0, policy_version 39614 (0.0006) -[2026-06-07 02:29:55,643][324563] Updated weights for policy 0, policy_version 39624 (0.0006) -[2026-06-07 02:29:55,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 20283392. Throughput: 0: 17726.6. Samples: 20326784. Policy #0 lag: (min: 88.0, avg: 111.2, max: 153.0) -[2026-06-07 02:29:55,745][321791] Avg episode reward: [(0, '1342.622')] -[2026-06-07 02:29:55,849][324563] Updated weights for policy 0, policy_version 39634 (0.0007) -[2026-06-07 02:29:56,566][324563] Updated weights for policy 0, policy_version 39644 (0.0007) -[2026-06-07 02:29:56,775][324563] Updated weights for policy 0, policy_version 39654 (0.0007) -[2026-06-07 02:29:56,952][324563] Updated weights for policy 0, policy_version 39664 (0.0006) -[2026-06-07 02:29:57,146][324563] Updated weights for policy 0, policy_version 39674 (0.0006) -[2026-06-07 02:29:57,366][324563] Updated weights for policy 0, policy_version 39684 (0.0006) -[2026-06-07 02:29:57,569][324563] Updated weights for policy 0, policy_version 39694 (0.0006) -[2026-06-07 02:29:58,330][324563] Updated weights for policy 0, policy_version 39705 (0.0007) -[2026-06-07 02:29:58,549][324563] Updated weights for policy 0, policy_version 39716 (0.0006) -[2026-06-07 02:29:58,770][324563] Updated weights for policy 0, policy_version 39726 (0.0006) -[2026-06-07 02:29:58,977][324563] Updated weights for policy 0, policy_version 39736 (0.0006) -[2026-06-07 02:29:59,211][324563] Updated weights for policy 0, policy_version 39746 (0.0007) -[2026-06-07 02:29:59,410][324563] Updated weights for policy 0, policy_version 39756 (0.0006) -[2026-06-07 02:29:59,654][324563] Updated weights for policy 0, policy_version 39767 (0.0007) -[2026-06-07 02:30:00,371][324563] Updated weights for policy 0, policy_version 39777 (0.0006) -[2026-06-07 02:30:00,589][324563] Updated weights for policy 0, policy_version 39787 (0.0006) -[2026-06-07 02:30:00,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.2, 300 sec: 17661.4). Total num frames: 20381696. Throughput: 0: 17439.2. Samples: 20428288. Policy #0 lag: (min: 88.0, avg: 111.2, max: 153.0) -[2026-06-07 02:30:00,745][321791] Avg episode reward: [(0, '1339.458')] -[2026-06-07 02:30:00,857][324563] Updated weights for policy 0, policy_version 39798 (0.0007) -[2026-06-07 02:30:01,052][324563] Updated weights for policy 0, policy_version 39808 (0.0007) -[2026-06-07 02:30:01,266][324563] Updated weights for policy 0, policy_version 39818 (0.0009) -[2026-06-07 02:30:01,476][324563] Updated weights for policy 0, policy_version 39828 (0.0007) -[2026-06-07 02:30:02,161][324563] Updated weights for policy 0, policy_version 39838 (0.0007) -[2026-06-07 02:30:02,365][324563] Updated weights for policy 0, policy_version 39848 (0.0007) -[2026-06-07 02:30:02,561][324563] Updated weights for policy 0, policy_version 39858 (0.0006) -[2026-06-07 02:30:02,765][324563] Updated weights for policy 0, policy_version 39868 (0.0006) -[2026-06-07 02:30:02,959][324563] Updated weights for policy 0, policy_version 39878 (0.0007) -[2026-06-07 02:30:03,167][324563] Updated weights for policy 0, policy_version 39888 (0.0007) -[2026-06-07 02:30:03,927][324563] Updated weights for policy 0, policy_version 39898 (0.0007) -[2026-06-07 02:30:04,123][324563] Updated weights for policy 0, policy_version 39908 (0.0007) -[2026-06-07 02:30:04,313][324563] Updated weights for policy 0, policy_version 39918 (0.0006) -[2026-06-07 02:30:04,507][324563] Updated weights for policy 0, policy_version 39928 (0.0006) -[2026-06-07 02:30:04,709][324563] Updated weights for policy 0, policy_version 39938 (0.0006) -[2026-06-07 02:30:04,929][324563] Updated weights for policy 0, policy_version 39948 (0.0006) -[2026-06-07 02:30:05,127][324563] Updated weights for policy 0, policy_version 39958 (0.0006) -[2026-06-07 02:30:05,744][321791] Fps is (10 sec: 19660.8, 60 sec: 17476.3, 300 sec: 17661.4). Total num frames: 20480000. Throughput: 0: 17561.6. Samples: 20477824. Policy #0 lag: (min: 88.0, avg: 111.2, max: 153.0) -[2026-06-07 02:30:05,745][321791] Avg episode reward: [(0, '1410.114')] -[2026-06-07 02:30:05,854][324563] Updated weights for policy 0, policy_version 39968 (0.0006) -[2026-06-07 02:30:06,105][324563] Updated weights for policy 0, policy_version 39979 (0.0006) -[2026-06-07 02:30:06,304][324563] Updated weights for policy 0, policy_version 39989 (0.0006) -[2026-06-07 02:30:06,502][324563] Updated weights for policy 0, policy_version 39999 (0.0007) -[2026-06-07 02:30:06,730][324563] Updated weights for policy 0, policy_version 40009 (0.0006) -[2026-06-07 02:30:06,941][324563] Updated weights for policy 0, policy_version 40019 (0.0006) -[2026-06-07 02:30:07,664][324563] Updated weights for policy 0, policy_version 40029 (0.0007) -[2026-06-07 02:30:07,884][324563] Updated weights for policy 0, policy_version 40039 (0.0007) -[2026-06-07 02:30:08,094][324563] Updated weights for policy 0, policy_version 40049 (0.0006) -[2026-06-07 02:30:08,304][324563] Updated weights for policy 0, policy_version 40059 (0.0006) -[2026-06-07 02:30:08,509][324563] Updated weights for policy 0, policy_version 40069 (0.0007) -[2026-06-07 02:30:08,736][324563] Updated weights for policy 0, policy_version 40080 (0.0006) -[2026-06-07 02:30:09,504][324563] Updated weights for policy 0, policy_version 40091 (0.0007) -[2026-06-07 02:30:09,704][324563] Updated weights for policy 0, policy_version 40101 (0.0007) -[2026-06-07 02:30:09,930][324563] Updated weights for policy 0, policy_version 40111 (0.0006) -[2026-06-07 02:30:10,136][324563] Updated weights for policy 0, policy_version 40121 (0.0006) -[2026-06-07 02:30:10,336][324563] Updated weights for policy 0, policy_version 40131 (0.0006) -[2026-06-07 02:30:10,539][324563] Updated weights for policy 0, policy_version 40141 (0.0006) -[2026-06-07 02:30:10,744][321791] Fps is (10 sec: 16384.2, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 20545536. Throughput: 0: 17712.3. Samples: 20589568. Policy #0 lag: (min: 88.0, avg: 111.2, max: 153.0) -[2026-06-07 02:30:10,745][321791] Avg episode reward: [(0, '1378.601')] -[2026-06-07 02:30:10,753][324563] Updated weights for policy 0, policy_version 40151 (0.0006) -[2026-06-07 02:30:11,508][324563] Updated weights for policy 0, policy_version 40161 (0.0006) -[2026-06-07 02:30:11,708][324563] Updated weights for policy 0, policy_version 40171 (0.0006) -[2026-06-07 02:30:11,931][324563] Updated weights for policy 0, policy_version 40182 (0.0006) -[2026-06-07 02:30:12,189][324563] Updated weights for policy 0, policy_version 40193 (0.0006) -[2026-06-07 02:30:12,400][324563] Updated weights for policy 0, policy_version 40203 (0.0006) -[2026-06-07 02:30:12,613][324563] Updated weights for policy 0, policy_version 40213 (0.0007) -[2026-06-07 02:30:13,350][324563] Updated weights for policy 0, policy_version 40224 (0.0007) -[2026-06-07 02:30:13,536][324563] Updated weights for policy 0, policy_version 40234 (0.0006) -[2026-06-07 02:30:13,760][324563] Updated weights for policy 0, policy_version 40244 (0.0007) -[2026-06-07 02:30:13,956][324563] Updated weights for policy 0, policy_version 40254 (0.0007) -[2026-06-07 02:30:14,155][324563] Updated weights for policy 0, policy_version 40264 (0.0006) -[2026-06-07 02:30:14,344][324563] Updated weights for policy 0, policy_version 40274 (0.0006) -[2026-06-07 02:30:15,111][324563] Updated weights for policy 0, policy_version 40284 (0.0006) -[2026-06-07 02:30:15,309][324563] Updated weights for policy 0, policy_version 40294 (0.0006) -[2026-06-07 02:30:15,503][324563] Updated weights for policy 0, policy_version 40304 (0.0006) -[2026-06-07 02:30:15,720][324563] Updated weights for policy 0, policy_version 40314 (0.0006) -[2026-06-07 02:30:15,744][321791] Fps is (10 sec: 16384.1, 60 sec: 17476.3, 300 sec: 17661.4). Total num frames: 20643840. Throughput: 0: 17470.6. Samples: 20692480. Policy #0 lag: (min: 63.0, avg: 78.7, max: 127.0) -[2026-06-07 02:30:15,745][321791] Avg episode reward: [(0, '1433.048')] -[2026-06-07 02:30:15,940][324563] Updated weights for policy 0, policy_version 40324 (0.0007) -[2026-06-07 02:30:16,132][324563] Updated weights for policy 0, policy_version 40334 (0.0006) -[2026-06-07 02:30:16,343][324563] Updated weights for policy 0, policy_version 40344 (0.0006) -[2026-06-07 02:30:17,068][324563] Updated weights for policy 0, policy_version 40354 (0.0006) -[2026-06-07 02:30:17,290][324563] Updated weights for policy 0, policy_version 40364 (0.0006) -[2026-06-07 02:30:17,525][324563] Updated weights for policy 0, policy_version 40375 (0.0006) -[2026-06-07 02:30:17,732][324563] Updated weights for policy 0, policy_version 40385 (0.0007) -[2026-06-07 02:30:17,954][324563] Updated weights for policy 0, policy_version 40395 (0.0007) -[2026-06-07 02:30:18,164][324563] Updated weights for policy 0, policy_version 40405 (0.0007) -[2026-06-07 02:30:18,921][324563] Updated weights for policy 0, policy_version 40415 (0.0006) -[2026-06-07 02:30:19,125][324563] Updated weights for policy 0, policy_version 40425 (0.0012) -[2026-06-07 02:30:19,344][324563] Updated weights for policy 0, policy_version 40435 (0.0011) -[2026-06-07 02:30:19,553][324563] Updated weights for policy 0, policy_version 40445 (0.0011) -[2026-06-07 02:30:19,732][324563] Updated weights for policy 0, policy_version 40455 (0.0011) -[2026-06-07 02:30:19,972][324563] Updated weights for policy 0, policy_version 40466 (0.0009) -[2026-06-07 02:30:20,722][324563] Updated weights for policy 0, policy_version 40476 (0.0011) -[2026-06-07 02:30:20,744][321791] Fps is (10 sec: 19660.4, 60 sec: 17476.2, 300 sec: 17661.4). Total num frames: 20742144. Throughput: 0: 17666.8. Samples: 20746880. Policy #0 lag: (min: 63.0, avg: 78.7, max: 127.0) -[2026-06-07 02:30:20,745][321791] Avg episode reward: [(0, '1453.737')] -[2026-06-07 02:30:20,916][324563] Updated weights for policy 0, policy_version 40486 (0.0007) -[2026-06-07 02:30:21,109][324563] Updated weights for policy 0, policy_version 40496 (0.0006) -[2026-06-07 02:30:21,313][324563] Updated weights for policy 0, policy_version 40506 (0.0006) -[2026-06-07 02:30:21,523][324563] Updated weights for policy 0, policy_version 40516 (0.0006) -[2026-06-07 02:30:21,712][324563] Updated weights for policy 0, policy_version 40526 (0.0006) -[2026-06-07 02:30:21,899][324276] Saving results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs1_seed13/checkpoint_p0/checkpoint_000040536_20774912.pth... -[2026-06-07 02:30:21,899][324563] Updated weights for policy 0, policy_version 40536 (0.0006) -[2026-06-07 02:30:21,917][324276] Saving new best policy, reward=1453.737! -[2026-06-07 02:30:22,657][324563] Updated weights for policy 0, policy_version 40546 (0.0006) -[2026-06-07 02:30:22,861][324563] Updated weights for policy 0, policy_version 40556 (0.0006) -[2026-06-07 02:30:23,073][324563] Updated weights for policy 0, policy_version 40566 (0.0006) -[2026-06-07 02:30:23,270][324563] Updated weights for policy 0, policy_version 40576 (0.0006) -[2026-06-07 02:30:23,469][324563] Updated weights for policy 0, policy_version 40586 (0.0006) -[2026-06-07 02:30:23,709][324563] Updated weights for policy 0, policy_version 40596 (0.0006) -[2026-06-07 02:30:24,454][324563] Updated weights for policy 0, policy_version 40606 (0.0007) -[2026-06-07 02:30:24,665][324563] Updated weights for policy 0, policy_version 40616 (0.0006) -[2026-06-07 02:30:24,859][324563] Updated weights for policy 0, policy_version 40626 (0.0006) -[2026-06-07 02:30:25,086][324563] Updated weights for policy 0, policy_version 40636 (0.0006) -[2026-06-07 02:30:25,297][324563] Updated weights for policy 0, policy_version 40646 (0.0006) -[2026-06-07 02:30:25,524][324563] Updated weights for policy 0, policy_version 40656 (0.0006) -[2026-06-07 02:30:25,744][321791] Fps is (10 sec: 19660.6, 60 sec: 18022.4, 300 sec: 17661.4). Total num frames: 20840448. Throughput: 0: 17627.0. Samples: 20854912. Policy #0 lag: (min: 63.0, avg: 78.7, max: 127.0) -[2026-06-07 02:30:25,745][321791] Avg episode reward: [(0, '1453.737')] -[2026-06-07 02:30:26,248][324563] Updated weights for policy 0, policy_version 40666 (0.0007) -[2026-06-07 02:30:26,460][324563] Updated weights for policy 0, policy_version 40676 (0.0007) -[2026-06-07 02:30:26,684][324563] Updated weights for policy 0, policy_version 40686 (0.0006) -[2026-06-07 02:30:26,900][324563] Updated weights for policy 0, policy_version 40696 (0.0006) -[2026-06-07 02:30:27,099][324563] Updated weights for policy 0, policy_version 40706 (0.0006) -[2026-06-07 02:30:27,325][324563] Updated weights for policy 0, policy_version 40716 (0.0006) -[2026-06-07 02:30:27,562][324563] Updated weights for policy 0, policy_version 40727 (0.0006) -[2026-06-07 02:30:28,289][324563] Updated weights for policy 0, policy_version 40738 (0.0006) -[2026-06-07 02:30:28,511][324563] Updated weights for policy 0, policy_version 40748 (0.0006) -[2026-06-07 02:30:28,720][324563] Updated weights for policy 0, policy_version 40758 (0.0006) -[2026-06-07 02:30:28,927][324563] Updated weights for policy 0, policy_version 40768 (0.0007) -[2026-06-07 02:30:29,146][324563] Updated weights for policy 0, policy_version 40778 (0.0006) -[2026-06-07 02:30:29,344][324563] Updated weights for policy 0, policy_version 40788 (0.0006) -[2026-06-07 02:30:30,102][324563] Updated weights for policy 0, policy_version 40798 (0.0007) -[2026-06-07 02:30:30,303][324563] Updated weights for policy 0, policy_version 40808 (0.0007) -[2026-06-07 02:30:30,501][324563] Updated weights for policy 0, policy_version 40818 (0.0006) -[2026-06-07 02:30:30,712][324563] Updated weights for policy 0, policy_version 40828 (0.0006) -[2026-06-07 02:30:30,744][321791] Fps is (10 sec: 16384.3, 60 sec: 17476.3, 300 sec: 17661.4). Total num frames: 20905984. Throughput: 0: 17550.2. Samples: 20958080. Policy #0 lag: (min: 63.0, avg: 78.7, max: 127.0) -[2026-06-07 02:30:30,745][321791] Avg episode reward: [(0, '1475.455')] -[2026-06-07 02:30:30,942][324563] Updated weights for policy 0, policy_version 40838 (0.0007) -[2026-06-07 02:30:31,141][324563] Updated weights for policy 0, policy_version 40848 (0.0006) -[2026-06-07 02:30:31,316][324276] Saving new best policy, reward=1475.455! -[2026-06-07 02:30:31,875][324563] Updated weights for policy 0, policy_version 40858 (0.0006) -[2026-06-07 02:30:32,096][324563] Updated weights for policy 0, policy_version 40868 (0.0006) -[2026-06-07 02:30:32,314][324563] Updated weights for policy 0, policy_version 40879 (0.0006) -[2026-06-07 02:30:32,514][324563] Updated weights for policy 0, policy_version 40889 (0.0006) -[2026-06-07 02:30:32,737][324563] Updated weights for policy 0, policy_version 40899 (0.0007) -[2026-06-07 02:30:32,962][324563] Updated weights for policy 0, policy_version 40909 (0.0006) -[2026-06-07 02:30:33,201][324563] Updated weights for policy 0, policy_version 40920 (0.0006) -[2026-06-07 02:30:33,927][324563] Updated weights for policy 0, policy_version 40930 (0.0007) -[2026-06-07 02:30:34,148][324563] Updated weights for policy 0, policy_version 40940 (0.0007) -[2026-06-07 02:30:34,370][324563] Updated weights for policy 0, policy_version 40950 (0.0007) -[2026-06-07 02:30:34,581][324563] Updated weights for policy 0, policy_version 40960 (0.0007) -[2026-06-07 02:30:34,771][324563] Updated weights for policy 0, policy_version 40970 (0.0006) -[2026-06-07 02:30:34,999][324563] Updated weights for policy 0, policy_version 40980 (0.0006) -[2026-06-07 02:30:35,726][324563] Updated weights for policy 0, policy_version 40990 (0.0006) -[2026-06-07 02:30:35,744][321791] Fps is (10 sec: 16384.1, 60 sec: 17476.3, 300 sec: 17661.4). Total num frames: 21004288. Throughput: 0: 17757.9. Samples: 21012224. Policy #0 lag: (min: 63.0, avg: 78.7, max: 127.0) -[2026-06-07 02:30:35,745][321791] Avg episode reward: [(0, '1469.471')] -[2026-06-07 02:30:35,912][324563] Updated weights for policy 0, policy_version 41000 (0.0007) -[2026-06-07 02:30:36,108][324563] Updated weights for policy 0, policy_version 41010 (0.0006) -[2026-06-07 02:30:36,318][324563] Updated weights for policy 0, policy_version 41020 (0.0006) -[2026-06-07 02:30:36,535][324563] Updated weights for policy 0, policy_version 41030 (0.0006) -[2026-06-07 02:30:36,756][324563] Updated weights for policy 0, policy_version 41040 (0.0006) -[2026-06-07 02:30:37,489][324563] Updated weights for policy 0, policy_version 41050 (0.0006) -[2026-06-07 02:30:37,704][324563] Updated weights for policy 0, policy_version 41060 (0.0006) -[2026-06-07 02:30:37,905][324563] Updated weights for policy 0, policy_version 41070 (0.0006) -[2026-06-07 02:30:38,101][324563] Updated weights for policy 0, policy_version 41080 (0.0006) -[2026-06-07 02:30:38,310][324563] Updated weights for policy 0, policy_version 41091 (0.0006) -[2026-06-07 02:30:38,540][324563] Updated weights for policy 0, policy_version 41101 (0.0007) -[2026-06-07 02:30:38,732][324563] Updated weights for policy 0, policy_version 41111 (0.0007) -[2026-06-07 02:30:39,510][324563] Updated weights for policy 0, policy_version 41121 (0.0007) -[2026-06-07 02:30:39,723][324563] Updated weights for policy 0, policy_version 41131 (0.0007) -[2026-06-07 02:30:39,957][324563] Updated weights for policy 0, policy_version 41142 (0.0007) -[2026-06-07 02:30:40,161][324563] Updated weights for policy 0, policy_version 41152 (0.0007) -[2026-06-07 02:30:40,387][324563] Updated weights for policy 0, policy_version 41162 (0.0007) -[2026-06-07 02:30:40,602][324563] Updated weights for policy 0, policy_version 41172 (0.0007) -[2026-06-07 02:30:40,744][321791] Fps is (10 sec: 19660.7, 60 sec: 18022.4, 300 sec: 17661.4). Total num frames: 21102592. Throughput: 0: 17595.7. Samples: 21118592. Policy #0 lag: (min: 63.0, avg: 78.7, max: 127.0) -[2026-06-07 02:30:40,745][321791] Avg episode reward: [(0, '1459.944')] -[2026-06-07 02:30:41,298][324563] Updated weights for policy 0, policy_version 41182 (0.0006) -[2026-06-07 02:30:41,512][324563] Updated weights for policy 0, policy_version 41192 (0.0006) -[2026-06-07 02:30:41,721][324563] Updated weights for policy 0, policy_version 41202 (0.0007) -[2026-06-07 02:30:41,922][324563] Updated weights for policy 0, policy_version 41212 (0.0007) -[2026-06-07 02:30:42,161][324563] Updated weights for policy 0, policy_version 41222 (0.0006) -[2026-06-07 02:30:42,388][324563] Updated weights for policy 0, policy_version 41233 (0.0006) -[2026-06-07 02:30:43,115][324563] Updated weights for policy 0, policy_version 41243 (0.0007) -[2026-06-07 02:30:43,323][324563] Updated weights for policy 0, policy_version 41253 (0.0007) -[2026-06-07 02:30:43,531][324563] Updated weights for policy 0, policy_version 41263 (0.0007) -[2026-06-07 02:30:43,739][324563] Updated weights for policy 0, policy_version 41273 (0.0006) -[2026-06-07 02:30:43,947][324563] Updated weights for policy 0, policy_version 41283 (0.0006) -[2026-06-07 02:30:44,171][324563] Updated weights for policy 0, policy_version 41294 (0.0006) -[2026-06-07 02:30:44,371][324563] Updated weights for policy 0, policy_version 41304 (0.0006) -[2026-06-07 02:30:45,102][324563] Updated weights for policy 0, policy_version 41314 (0.0006) -[2026-06-07 02:30:45,291][324563] Updated weights for policy 0, policy_version 41324 (0.0006) -[2026-06-07 02:30:45,495][324563] Updated weights for policy 0, policy_version 41334 (0.0006) -[2026-06-07 02:30:45,695][324563] Updated weights for policy 0, policy_version 41344 (0.0006) -[2026-06-07 02:30:45,744][321791] Fps is (10 sec: 16383.8, 60 sec: 17476.2, 300 sec: 17550.3). Total num frames: 21168128. Throughput: 0: 17590.0. Samples: 21219840. Policy #0 lag: (min: 63.0, avg: 79.5, max: 127.0) -[2026-06-07 02:30:45,746][321791] Avg episode reward: [(0, '1473.187')] -[2026-06-07 02:30:45,906][324563] Updated weights for policy 0, policy_version 41354 (0.0007) -[2026-06-07 02:30:46,132][324563] Updated weights for policy 0, policy_version 41364 (0.0006) -[2026-06-07 02:30:46,866][324563] Updated weights for policy 0, policy_version 41374 (0.0006) -[2026-06-07 02:30:47,065][324563] Updated weights for policy 0, policy_version 41384 (0.0006) -[2026-06-07 02:30:47,268][324563] Updated weights for policy 0, policy_version 41394 (0.0007) -[2026-06-07 02:30:47,468][324563] Updated weights for policy 0, policy_version 41404 (0.0007) -[2026-06-07 02:30:47,694][324563] Updated weights for policy 0, policy_version 41414 (0.0007) -[2026-06-07 02:30:47,911][324563] Updated weights for policy 0, policy_version 41424 (0.0006) -[2026-06-07 02:30:48,654][324563] Updated weights for policy 0, policy_version 41434 (0.0007) -[2026-06-07 02:30:48,869][324563] Updated weights for policy 0, policy_version 41444 (0.0006) -[2026-06-07 02:30:49,079][324563] Updated weights for policy 0, policy_version 41454 (0.0007) -[2026-06-07 02:30:49,271][324563] Updated weights for policy 0, policy_version 41464 (0.0006) -[2026-06-07 02:30:49,492][324563] Updated weights for policy 0, policy_version 41474 (0.0006) -[2026-06-07 02:30:49,694][324563] Updated weights for policy 0, policy_version 41484 (0.0006) -[2026-06-07 02:30:49,881][324563] Updated weights for policy 0, policy_version 41494 (0.0007) -[2026-06-07 02:30:50,623][324563] Updated weights for policy 0, policy_version 41504 (0.0007) -[2026-06-07 02:30:50,744][321791] Fps is (10 sec: 16383.9, 60 sec: 17476.3, 300 sec: 17661.4). Total num frames: 21266432. Throughput: 0: 17757.8. Samples: 21276928. Policy #0 lag: (min: 63.0, avg: 79.5, max: 127.0) -[2026-06-07 02:30:50,745][321791] Avg episode reward: [(0, '1466.241')] -[2026-06-07 02:30:50,831][324563] Updated weights for policy 0, policy_version 41514 (0.0007) -[2026-06-07 02:30:51,043][324563] Updated weights for policy 0, policy_version 41524 (0.0007) -[2026-06-07 02:30:51,258][324563] Updated weights for policy 0, policy_version 41534 (0.0007) -[2026-06-07 02:30:51,454][324563] Updated weights for policy 0, policy_version 41544 (0.0007) -[2026-06-07 02:30:51,655][324563] Updated weights for policy 0, policy_version 41554 (0.0007) -[2026-06-07 02:30:52,384][324563] Updated weights for policy 0, policy_version 41564 (0.0007) -[2026-06-07 02:30:52,570][324563] Updated weights for policy 0, policy_version 41574 (0.0007) -[2026-06-07 02:30:52,777][324563] Updated weights for policy 0, policy_version 41584 (0.0006) -[2026-06-07 02:30:53,026][324563] Updated weights for policy 0, policy_version 41595 (0.0006) -[2026-06-07 02:30:53,211][324563] Updated weights for policy 0, policy_version 41605 (0.0007) -[2026-06-07 02:30:53,413][324563] Updated weights for policy 0, policy_version 41615 (0.0007) -[2026-06-07 02:30:54,138][324563] Updated weights for policy 0, policy_version 41625 (0.0007) -[2026-06-07 02:30:54,351][324563] Updated weights for policy 0, policy_version 41635 (0.0006) -[2026-06-07 02:30:54,602][324563] Updated weights for policy 0, policy_version 41646 (0.0006) -[2026-06-07 02:30:54,802][324563] Updated weights for policy 0, policy_version 41656 (0.0007) -[2026-06-07 02:30:55,017][324563] Updated weights for policy 0, policy_version 41666 (0.0007) -[2026-06-07 02:30:55,231][324563] Updated weights for policy 0, policy_version 41676 (0.0006) -[2026-06-07 02:30:55,444][324563] Updated weights for policy 0, policy_version 41686 (0.0007) -[2026-06-07 02:30:55,744][321791] Fps is (10 sec: 19661.1, 60 sec: 18022.4, 300 sec: 17661.4). Total num frames: 21364736. Throughput: 0: 17533.2. Samples: 21378560. Policy #0 lag: (min: 63.0, avg: 79.5, max: 127.0) -[2026-06-07 02:30:55,745][321791] Avg episode reward: [(0, '1458.880')] -[2026-06-07 02:30:56,185][324563] Updated weights for policy 0, policy_version 41697 (0.0007) -[2026-06-07 02:30:56,379][324563] Updated weights for policy 0, policy_version 41707 (0.0007) -[2026-06-07 02:30:56,576][324563] Updated weights for policy 0, policy_version 41717 (0.0007) -[2026-06-07 02:30:56,802][324563] Updated weights for policy 0, policy_version 41728 (0.0006) -[2026-06-07 02:30:57,013][324563] Updated weights for policy 0, policy_version 41738 (0.0006) -[2026-06-07 02:30:57,211][324563] Updated weights for policy 0, policy_version 41748 (0.0006) -[2026-06-07 02:30:57,946][324563] Updated weights for policy 0, policy_version 41758 (0.0006) -[2026-06-07 02:30:58,153][324563] Updated weights for policy 0, policy_version 41768 (0.0006) -[2026-06-07 02:30:58,370][324563] Updated weights for policy 0, policy_version 41778 (0.0007) -[2026-06-07 02:30:58,581][324563] Updated weights for policy 0, policy_version 41788 (0.0006) -[2026-06-07 02:30:58,784][324563] Updated weights for policy 0, policy_version 41798 (0.0006) -[2026-06-07 02:30:58,992][324563] Updated weights for policy 0, policy_version 41808 (0.0006) -[2026-06-07 02:30:59,751][324563] Updated weights for policy 0, policy_version 41818 (0.0006) -[2026-06-07 02:30:59,949][324563] Updated weights for policy 0, policy_version 41828 (0.0007) -[2026-06-07 02:31:00,149][324563] Updated weights for policy 0, policy_version 41838 (0.0007) -[2026-06-07 02:31:00,389][324563] Updated weights for policy 0, policy_version 41849 (0.0006) -[2026-06-07 02:31:00,616][324563] Updated weights for policy 0, policy_version 41859 (0.0006) -[2026-06-07 02:31:00,744][321791] Fps is (10 sec: 16384.2, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 21430272. Throughput: 0: 17510.4. Samples: 21480448. Policy #0 lag: (min: 63.0, avg: 79.5, max: 127.0) -[2026-06-07 02:31:00,745][321791] Avg episode reward: [(0, '1497.058')] -[2026-06-07 02:31:00,819][324563] Updated weights for policy 0, policy_version 41869 (0.0006) -[2026-06-07 02:31:01,022][324563] Updated weights for policy 0, policy_version 41879 (0.0006) -[2026-06-07 02:31:01,031][324276] Saving new best policy, reward=1497.058! -[2026-06-07 02:31:01,755][324563] Updated weights for policy 0, policy_version 41889 (0.0006) -[2026-06-07 02:31:01,967][324563] Updated weights for policy 0, policy_version 41899 (0.0006) -[2026-06-07 02:31:02,177][324563] Updated weights for policy 0, policy_version 41909 (0.0006) -[2026-06-07 02:31:02,414][324563] Updated weights for policy 0, policy_version 41919 (0.0007) -[2026-06-07 02:31:02,592][324563] Updated weights for policy 0, policy_version 41929 (0.0006) -[2026-06-07 02:31:02,790][324563] Updated weights for policy 0, policy_version 41939 (0.0006) -[2026-06-07 02:31:03,555][324563] Updated weights for policy 0, policy_version 41949 (0.0006) -[2026-06-07 02:31:03,762][324563] Updated weights for policy 0, policy_version 41959 (0.0006) -[2026-06-07 02:31:03,965][324563] Updated weights for policy 0, policy_version 41969 (0.0006) -[2026-06-07 02:31:04,171][324563] Updated weights for policy 0, policy_version 41979 (0.0007) -[2026-06-07 02:31:04,382][324563] Updated weights for policy 0, policy_version 41989 (0.0006) -[2026-06-07 02:31:04,591][324563] Updated weights for policy 0, policy_version 41999 (0.0007) -[2026-06-07 02:31:05,325][324563] Updated weights for policy 0, policy_version 42009 (0.0006) -[2026-06-07 02:31:05,515][324563] Updated weights for policy 0, policy_version 42019 (0.0006) -[2026-06-07 02:31:05,730][324563] Updated weights for policy 0, policy_version 42029 (0.0007) -[2026-06-07 02:31:05,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17661.4). Total num frames: 21528576. Throughput: 0: 17584.4. Samples: 21538176. Policy #0 lag: (min: 63.0, avg: 79.5, max: 127.0) -[2026-06-07 02:31:05,745][321791] Avg episode reward: [(0, '1503.107')] -[2026-06-07 02:31:05,932][324563] Updated weights for policy 0, policy_version 42039 (0.0007) -[2026-06-07 02:31:06,130][324563] Updated weights for policy 0, policy_version 42049 (0.0007) -[2026-06-07 02:31:06,352][324563] Updated weights for policy 0, policy_version 42059 (0.0006) -[2026-06-07 02:31:06,549][324563] Updated weights for policy 0, policy_version 42069 (0.0006) -[2026-06-07 02:31:06,611][324276] Saving new best policy, reward=1503.107! -[2026-06-07 02:31:07,277][324563] Updated weights for policy 0, policy_version 42079 (0.0007) -[2026-06-07 02:31:07,476][324563] Updated weights for policy 0, policy_version 42089 (0.0007) -[2026-06-07 02:31:07,678][324563] Updated weights for policy 0, policy_version 42099 (0.0007) -[2026-06-07 02:31:07,873][324563] Updated weights for policy 0, policy_version 42109 (0.0007) -[2026-06-07 02:31:08,084][324563] Updated weights for policy 0, policy_version 42119 (0.0006) -[2026-06-07 02:31:08,265][324563] Updated weights for policy 0, policy_version 42129 (0.0006) -[2026-06-07 02:31:08,998][324563] Updated weights for policy 0, policy_version 42139 (0.0008) -[2026-06-07 02:31:09,232][324563] Updated weights for policy 0, policy_version 42150 (0.0011) -[2026-06-07 02:31:09,445][324563] Updated weights for policy 0, policy_version 42160 (0.0007) -[2026-06-07 02:31:09,644][324563] Updated weights for policy 0, policy_version 42170 (0.0006) -[2026-06-07 02:31:09,836][324563] Updated weights for policy 0, policy_version 42180 (0.0006) -[2026-06-07 02:31:10,042][324563] Updated weights for policy 0, policy_version 42190 (0.0006) -[2026-06-07 02:31:10,744][321791] Fps is (10 sec: 19660.8, 60 sec: 18022.4, 300 sec: 17661.4). Total num frames: 21626880. Throughput: 0: 17476.3. Samples: 21641344. Policy #0 lag: (min: 63.0, avg: 79.5, max: 127.0) -[2026-06-07 02:31:10,745][321791] Avg episode reward: [(0, '1498.225')] -[2026-06-07 02:31:10,779][324563] Updated weights for policy 0, policy_version 42201 (0.0006) -[2026-06-07 02:31:11,018][324563] Updated weights for policy 0, policy_version 42211 (0.0006) -[2026-06-07 02:31:11,215][324563] Updated weights for policy 0, policy_version 42221 (0.0010) -[2026-06-07 02:31:11,442][324563] Updated weights for policy 0, policy_version 42231 (0.0007) -[2026-06-07 02:31:11,669][324563] Updated weights for policy 0, policy_version 42241 (0.0007) -[2026-06-07 02:31:11,865][324563] Updated weights for policy 0, policy_version 42251 (0.0006) -[2026-06-07 02:31:12,077][324563] Updated weights for policy 0, policy_version 42261 (0.0006) -[2026-06-07 02:31:12,766][324563] Updated weights for policy 0, policy_version 42271 (0.0007) -[2026-06-07 02:31:12,967][324563] Updated weights for policy 0, policy_version 42281 (0.0006) -[2026-06-07 02:31:13,193][324563] Updated weights for policy 0, policy_version 42292 (0.0006) -[2026-06-07 02:31:13,394][324563] Updated weights for policy 0, policy_version 42302 (0.0006) -[2026-06-07 02:31:13,593][324563] Updated weights for policy 0, policy_version 42312 (0.0008) -[2026-06-07 02:31:13,795][324563] Updated weights for policy 0, policy_version 42322 (0.0011) -[2026-06-07 02:31:14,539][324563] Updated weights for policy 0, policy_version 42333 (0.0009) -[2026-06-07 02:31:14,765][324563] Updated weights for policy 0, policy_version 42343 (0.0007) -[2026-06-07 02:31:14,971][324563] Updated weights for policy 0, policy_version 42353 (0.0007) -[2026-06-07 02:31:15,205][324563] Updated weights for policy 0, policy_version 42364 (0.0006) -[2026-06-07 02:31:15,418][324563] Updated weights for policy 0, policy_version 42374 (0.0006) -[2026-06-07 02:31:15,627][324563] Updated weights for policy 0, policy_version 42384 (0.0006) -[2026-06-07 02:31:15,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 21692416. Throughput: 0: 17436.5. Samples: 21742720. Policy #0 lag: (min: 63.0, avg: 79.4, max: 127.0) -[2026-06-07 02:31:15,745][321791] Avg episode reward: [(0, '1517.839')] -[2026-06-07 02:31:15,800][324276] Saving new best policy, reward=1517.839! -[2026-06-07 02:31:16,388][324563] Updated weights for policy 0, policy_version 42394 (0.0007) -[2026-06-07 02:31:16,607][324563] Updated weights for policy 0, policy_version 42404 (0.0006) -[2026-06-07 02:31:16,817][324563] Updated weights for policy 0, policy_version 42414 (0.0006) -[2026-06-07 02:31:17,045][324563] Updated weights for policy 0, policy_version 42425 (0.0007) -[2026-06-07 02:31:17,247][324563] Updated weights for policy 0, policy_version 42435 (0.0007) -[2026-06-07 02:31:17,458][324563] Updated weights for policy 0, policy_version 42445 (0.0006) -[2026-06-07 02:31:17,656][324563] Updated weights for policy 0, policy_version 42455 (0.0006) -[2026-06-07 02:31:18,404][324563] Updated weights for policy 0, policy_version 42466 (0.0007) -[2026-06-07 02:31:18,601][324563] Updated weights for policy 0, policy_version 42476 (0.0006) -[2026-06-07 02:31:18,802][324563] Updated weights for policy 0, policy_version 42486 (0.0007) -[2026-06-07 02:31:19,008][324563] Updated weights for policy 0, policy_version 42496 (0.0006) -[2026-06-07 02:31:19,208][324563] Updated weights for policy 0, policy_version 42506 (0.0007) -[2026-06-07 02:31:19,408][324563] Updated weights for policy 0, policy_version 42516 (0.0007) -[2026-06-07 02:31:20,121][324563] Updated weights for policy 0, policy_version 42526 (0.0006) -[2026-06-07 02:31:20,326][324563] Updated weights for policy 0, policy_version 42536 (0.0006) -[2026-06-07 02:31:20,515][324563] Updated weights for policy 0, policy_version 42546 (0.0006) -[2026-06-07 02:31:20,724][324563] Updated weights for policy 0, policy_version 42556 (0.0006) -[2026-06-07 02:31:20,744][321791] Fps is (10 sec: 16383.7, 60 sec: 17476.3, 300 sec: 17661.4). Total num frames: 21790720. Throughput: 0: 17513.2. Samples: 21800320. Policy #0 lag: (min: 63.0, avg: 79.4, max: 127.0) -[2026-06-07 02:31:20,745][321791] Avg episode reward: [(0, '1564.140')] -[2026-06-07 02:31:20,923][324563] Updated weights for policy 0, policy_version 42566 (0.0006) -[2026-06-07 02:31:21,144][324563] Updated weights for policy 0, policy_version 42577 (0.0007) -[2026-06-07 02:31:21,271][324276] Saving new best policy, reward=1564.140! -[2026-06-07 02:31:21,864][324563] Updated weights for policy 0, policy_version 42587 (0.0006) -[2026-06-07 02:31:22,052][324563] Updated weights for policy 0, policy_version 42597 (0.0007) -[2026-06-07 02:31:22,254][324563] Updated weights for policy 0, policy_version 42607 (0.0007) -[2026-06-07 02:31:22,470][324563] Updated weights for policy 0, policy_version 42617 (0.0006) -[2026-06-07 02:31:22,683][324563] Updated weights for policy 0, policy_version 42627 (0.0006) -[2026-06-07 02:31:22,896][324563] Updated weights for policy 0, policy_version 42637 (0.0006) -[2026-06-07 02:31:23,122][324563] Updated weights for policy 0, policy_version 42647 (0.0006) -[2026-06-07 02:31:23,851][324563] Updated weights for policy 0, policy_version 42657 (0.0007) -[2026-06-07 02:31:24,055][324563] Updated weights for policy 0, policy_version 42667 (0.0006) -[2026-06-07 02:31:24,275][324563] Updated weights for policy 0, policy_version 42677 (0.0007) -[2026-06-07 02:31:24,480][324563] Updated weights for policy 0, policy_version 42687 (0.0006) -[2026-06-07 02:31:24,684][324563] Updated weights for policy 0, policy_version 42698 (0.0006) -[2026-06-07 02:31:24,881][324563] Updated weights for policy 0, policy_version 42708 (0.0007) -[2026-06-07 02:31:25,601][324563] Updated weights for policy 0, policy_version 42718 (0.0006) -[2026-06-07 02:31:25,744][321791] Fps is (10 sec: 19660.5, 60 sec: 17476.3, 300 sec: 17661.4). Total num frames: 21889024. Throughput: 0: 17450.6. Samples: 21903872. Policy #0 lag: (min: 63.0, avg: 79.4, max: 127.0) -[2026-06-07 02:31:25,745][321791] Avg episode reward: [(0, '1582.230')] -[2026-06-07 02:31:25,795][324563] Updated weights for policy 0, policy_version 42728 (0.0006) -[2026-06-07 02:31:26,008][324563] Updated weights for policy 0, policy_version 42738 (0.0006) -[2026-06-07 02:31:26,188][324563] Updated weights for policy 0, policy_version 42748 (0.0006) -[2026-06-07 02:31:26,390][324563] Updated weights for policy 0, policy_version 42758 (0.0006) -[2026-06-07 02:31:26,589][324563] Updated weights for policy 0, policy_version 42768 (0.0007) -[2026-06-07 02:31:26,742][324276] Saving new best policy, reward=1582.230! -[2026-06-07 02:31:27,330][324563] Updated weights for policy 0, policy_version 42778 (0.0006) -[2026-06-07 02:31:27,571][324563] Updated weights for policy 0, policy_version 42789 (0.0007) -[2026-06-07 02:31:27,785][324563] Updated weights for policy 0, policy_version 42799 (0.0006) -[2026-06-07 02:31:27,988][324563] Updated weights for policy 0, policy_version 42809 (0.0007) -[2026-06-07 02:31:28,181][324563] Updated weights for policy 0, policy_version 42819 (0.0006) -[2026-06-07 02:31:28,378][324563] Updated weights for policy 0, policy_version 42829 (0.0006) -[2026-06-07 02:31:28,578][324563] Updated weights for policy 0, policy_version 42839 (0.0006) -[2026-06-07 02:31:29,257][324563] Updated weights for policy 0, policy_version 42849 (0.0007) -[2026-06-07 02:31:29,453][324563] Updated weights for policy 0, policy_version 42859 (0.0006) -[2026-06-07 02:31:29,645][324563] Updated weights for policy 0, policy_version 42869 (0.0007) -[2026-06-07 02:31:29,863][324563] Updated weights for policy 0, policy_version 42879 (0.0007) -[2026-06-07 02:31:30,062][324563] Updated weights for policy 0, policy_version 42889 (0.0006) -[2026-06-07 02:31:30,238][324563] Updated weights for policy 0, policy_version 42899 (0.0006) -[2026-06-07 02:31:30,744][321791] Fps is (10 sec: 19660.7, 60 sec: 18022.3, 300 sec: 17661.4). Total num frames: 21987328. Throughput: 0: 17555.9. Samples: 22009856. Policy #0 lag: (min: 63.0, avg: 79.4, max: 127.0) -[2026-06-07 02:31:30,745][321791] Avg episode reward: [(0, '1609.791')] -[2026-06-07 02:31:30,752][324276] Saving new best policy, reward=1609.791! -[2026-06-07 02:31:30,959][324563] Updated weights for policy 0, policy_version 42909 (0.0006) -[2026-06-07 02:31:31,172][324563] Updated weights for policy 0, policy_version 42919 (0.0006) -[2026-06-07 02:31:31,380][324563] Updated weights for policy 0, policy_version 42929 (0.0006) -[2026-06-07 02:31:31,577][324563] Updated weights for policy 0, policy_version 42939 (0.0006) -[2026-06-07 02:31:31,787][324563] Updated weights for policy 0, policy_version 42949 (0.0007) -[2026-06-07 02:31:32,005][324563] Updated weights for policy 0, policy_version 42959 (0.0006) -[2026-06-07 02:31:32,719][324563] Updated weights for policy 0, policy_version 42969 (0.0007) -[2026-06-07 02:31:32,924][324563] Updated weights for policy 0, policy_version 42979 (0.0006) -[2026-06-07 02:31:33,140][324563] Updated weights for policy 0, policy_version 42990 (0.0006) -[2026-06-07 02:31:33,341][324563] Updated weights for policy 0, policy_version 43000 (0.0006) -[2026-06-07 02:31:33,554][324563] Updated weights for policy 0, policy_version 43010 (0.0007) -[2026-06-07 02:31:33,781][324563] Updated weights for policy 0, policy_version 43020 (0.0007) -[2026-06-07 02:31:34,006][324563] Updated weights for policy 0, policy_version 43030 (0.0006) -[2026-06-07 02:31:34,731][324563] Updated weights for policy 0, policy_version 43040 (0.0007) -[2026-06-07 02:31:34,931][324563] Updated weights for policy 0, policy_version 43050 (0.0007) -[2026-06-07 02:31:35,112][324563] Updated weights for policy 0, policy_version 43060 (0.0009) -[2026-06-07 02:31:35,339][324563] Updated weights for policy 0, policy_version 43070 (0.0011) -[2026-06-07 02:31:35,569][324563] Updated weights for policy 0, policy_version 43081 (0.0011) -[2026-06-07 02:31:35,744][321791] Fps is (10 sec: 16383.4, 60 sec: 17476.1, 300 sec: 17550.3). Total num frames: 22052864. Throughput: 0: 17459.1. Samples: 22062592. Policy #0 lag: (min: 63.0, avg: 79.4, max: 127.0) -[2026-06-07 02:31:35,745][321791] Avg episode reward: [(0, '1629.741')] -[2026-06-07 02:31:35,773][324563] Updated weights for policy 0, policy_version 43091 (0.0011) -[2026-06-07 02:31:35,881][324276] Saving new best policy, reward=1629.741! -[2026-06-07 02:31:36,525][324563] Updated weights for policy 0, policy_version 43101 (0.0009) -[2026-06-07 02:31:36,712][324563] Updated weights for policy 0, policy_version 43111 (0.0007) -[2026-06-07 02:31:36,941][324563] Updated weights for policy 0, policy_version 43121 (0.0006) -[2026-06-07 02:31:37,148][324563] Updated weights for policy 0, policy_version 43131 (0.0007) -[2026-06-07 02:31:37,359][324563] Updated weights for policy 0, policy_version 43141 (0.0006) -[2026-06-07 02:31:37,563][324563] Updated weights for policy 0, policy_version 43152 (0.0006) -[2026-06-07 02:31:38,332][324563] Updated weights for policy 0, policy_version 43162 (0.0006) -[2026-06-07 02:31:38,545][324563] Updated weights for policy 0, policy_version 43172 (0.0009) -[2026-06-07 02:31:38,767][324563] Updated weights for policy 0, policy_version 43183 (0.0011) -[2026-06-07 02:31:38,983][324563] Updated weights for policy 0, policy_version 43194 (0.0010) -[2026-06-07 02:31:39,179][324563] Updated weights for policy 0, policy_version 43204 (0.0007) -[2026-06-07 02:31:39,390][324563] Updated weights for policy 0, policy_version 43214 (0.0011) -[2026-06-07 02:31:39,587][324563] Updated weights for policy 0, policy_version 43224 (0.0011) -[2026-06-07 02:31:40,360][324563] Updated weights for policy 0, policy_version 43235 (0.0006) -[2026-06-07 02:31:40,551][324563] Updated weights for policy 0, policy_version 43245 (0.0011) -[2026-06-07 02:31:40,744][321791] Fps is (10 sec: 16384.2, 60 sec: 17476.3, 300 sec: 17661.4). Total num frames: 22151168. Throughput: 0: 17507.5. Samples: 22166400. Policy #0 lag: (min: 63.0, avg: 79.4, max: 127.0) -[2026-06-07 02:31:40,745][321791] Avg episode reward: [(0, '1629.741')] -[2026-06-07 02:31:40,751][324563] Updated weights for policy 0, policy_version 43255 (0.0011) -[2026-06-07 02:31:40,950][324563] Updated weights for policy 0, policy_version 43265 (0.0011) -[2026-06-07 02:31:41,154][324563] Updated weights for policy 0, policy_version 43275 (0.0007) -[2026-06-07 02:31:41,374][324563] Updated weights for policy 0, policy_version 43285 (0.0007) -[2026-06-07 02:31:42,132][324563] Updated weights for policy 0, policy_version 43295 (0.0007) -[2026-06-07 02:31:42,358][324563] Updated weights for policy 0, policy_version 43305 (0.0007) -[2026-06-07 02:31:42,566][324563] Updated weights for policy 0, policy_version 43315 (0.0007) -[2026-06-07 02:31:42,759][324563] Updated weights for policy 0, policy_version 43325 (0.0011) -[2026-06-07 02:31:42,951][324563] Updated weights for policy 0, policy_version 43335 (0.0011) -[2026-06-07 02:31:43,160][324563] Updated weights for policy 0, policy_version 43345 (0.0011) -[2026-06-07 02:31:43,884][324563] Updated weights for policy 0, policy_version 43355 (0.0010) -[2026-06-07 02:31:44,101][324563] Updated weights for policy 0, policy_version 43365 (0.0007) -[2026-06-07 02:31:44,292][324563] Updated weights for policy 0, policy_version 43375 (0.0006) -[2026-06-07 02:31:44,491][324563] Updated weights for policy 0, policy_version 43385 (0.0006) -[2026-06-07 02:31:44,727][324563] Updated weights for policy 0, policy_version 43397 (0.0006) -[2026-06-07 02:31:44,928][324563] Updated weights for policy 0, policy_version 43407 (0.0006) -[2026-06-07 02:31:45,707][324563] Updated weights for policy 0, policy_version 43417 (0.0007) -[2026-06-07 02:31:45,744][321791] Fps is (10 sec: 19661.6, 60 sec: 18022.4, 300 sec: 17661.4). Total num frames: 22249472. Throughput: 0: 17791.9. Samples: 22281088. Policy #0 lag: (min: 52.0, avg: 67.0, max: 116.0) -[2026-06-07 02:31:45,745][321791] Avg episode reward: [(0, '1651.339')] -[2026-06-07 02:31:45,918][324563] Updated weights for policy 0, policy_version 43427 (0.0006) -[2026-06-07 02:31:46,108][324563] Updated weights for policy 0, policy_version 43437 (0.0006) -[2026-06-07 02:31:46,322][324563] Updated weights for policy 0, policy_version 43447 (0.0006) -[2026-06-07 02:31:46,520][324563] Updated weights for policy 0, policy_version 43457 (0.0007) -[2026-06-07 02:31:46,726][324563] Updated weights for policy 0, policy_version 43467 (0.0006) -[2026-06-07 02:31:46,948][324563] Updated weights for policy 0, policy_version 43477 (0.0006) -[2026-06-07 02:31:47,015][324276] Saving new best policy, reward=1651.339! -[2026-06-07 02:31:47,722][324563] Updated weights for policy 0, policy_version 43487 (0.0007) -[2026-06-07 02:31:47,943][324563] Updated weights for policy 0, policy_version 43497 (0.0007) -[2026-06-07 02:31:48,166][324563] Updated weights for policy 0, policy_version 43507 (0.0006) -[2026-06-07 02:31:48,375][324563] Updated weights for policy 0, policy_version 43517 (0.0006) -[2026-06-07 02:31:48,591][324563] Updated weights for policy 0, policy_version 43527 (0.0006) -[2026-06-07 02:31:48,833][324563] Updated weights for policy 0, policy_version 43538 (0.0007) -[2026-06-07 02:31:49,563][324563] Updated weights for policy 0, policy_version 43548 (0.0006) -[2026-06-07 02:31:49,797][324563] Updated weights for policy 0, policy_version 43559 (0.0006) -[2026-06-07 02:31:49,998][324563] Updated weights for policy 0, policy_version 43569 (0.0006) -[2026-06-07 02:31:50,207][324563] Updated weights for policy 0, policy_version 43579 (0.0006) -[2026-06-07 02:31:50,432][324563] Updated weights for policy 0, policy_version 43589 (0.0006) -[2026-06-07 02:31:50,638][324563] Updated weights for policy 0, policy_version 43599 (0.0007) -[2026-06-07 02:31:50,744][321791] Fps is (10 sec: 16384.2, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 22315008. Throughput: 0: 17553.1. Samples: 22328064. Policy #0 lag: (min: 52.0, avg: 67.0, max: 116.0) -[2026-06-07 02:31:50,745][321791] Avg episode reward: [(0, '1628.808')] -[2026-06-07 02:31:51,402][324563] Updated weights for policy 0, policy_version 43610 (0.0006) -[2026-06-07 02:31:51,607][324563] Updated weights for policy 0, policy_version 43621 (0.0006) -[2026-06-07 02:31:51,826][324563] Updated weights for policy 0, policy_version 43631 (0.0007) -[2026-06-07 02:31:52,015][324563] Updated weights for policy 0, policy_version 43641 (0.0007) -[2026-06-07 02:31:52,228][324563] Updated weights for policy 0, policy_version 43651 (0.0007) -[2026-06-07 02:31:52,428][324563] Updated weights for policy 0, policy_version 43661 (0.0006) -[2026-06-07 02:31:52,646][324563] Updated weights for policy 0, policy_version 43671 (0.0006) -[2026-06-07 02:31:53,386][324563] Updated weights for policy 0, policy_version 43681 (0.0006) -[2026-06-07 02:31:53,587][324563] Updated weights for policy 0, policy_version 43691 (0.0006) -[2026-06-07 02:31:53,794][324563] Updated weights for policy 0, policy_version 43701 (0.0007) -[2026-06-07 02:31:54,000][324563] Updated weights for policy 0, policy_version 43711 (0.0006) -[2026-06-07 02:31:54,211][324563] Updated weights for policy 0, policy_version 43721 (0.0007) -[2026-06-07 02:31:54,416][324563] Updated weights for policy 0, policy_version 43731 (0.0007) -[2026-06-07 02:31:55,175][324563] Updated weights for policy 0, policy_version 43741 (0.0007) -[2026-06-07 02:31:55,381][324563] Updated weights for policy 0, policy_version 43751 (0.0007) -[2026-06-07 02:31:55,592][324563] Updated weights for policy 0, policy_version 43761 (0.0006) -[2026-06-07 02:31:55,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.2, 300 sec: 17550.3). Total num frames: 22413312. Throughput: 0: 17547.3. Samples: 22430976. Policy #0 lag: (min: 52.0, avg: 67.0, max: 116.0) -[2026-06-07 02:31:55,745][321791] Avg episode reward: [(0, '1653.273')] -[2026-06-07 02:31:55,797][324563] Updated weights for policy 0, policy_version 43771 (0.0006) -[2026-06-07 02:31:55,996][324563] Updated weights for policy 0, policy_version 43781 (0.0006) -[2026-06-07 02:31:56,224][324563] Updated weights for policy 0, policy_version 43792 (0.0006) -[2026-06-07 02:31:56,402][324276] Saving new best policy, reward=1653.273! -[2026-06-07 02:31:56,990][324563] Updated weights for policy 0, policy_version 43802 (0.0007) -[2026-06-07 02:31:57,201][324563] Updated weights for policy 0, policy_version 43812 (0.0007) -[2026-06-07 02:31:57,399][324563] Updated weights for policy 0, policy_version 43822 (0.0006) -[2026-06-07 02:31:57,598][324563] Updated weights for policy 0, policy_version 43832 (0.0007) -[2026-06-07 02:31:57,807][324563] Updated weights for policy 0, policy_version 43842 (0.0006) -[2026-06-07 02:31:57,992][324563] Updated weights for policy 0, policy_version 43852 (0.0007) -[2026-06-07 02:31:58,200][324563] Updated weights for policy 0, policy_version 43862 (0.0006) -[2026-06-07 02:31:58,962][324563] Updated weights for policy 0, policy_version 43872 (0.0007) -[2026-06-07 02:31:59,185][324563] Updated weights for policy 0, policy_version 43882 (0.0007) -[2026-06-07 02:31:59,429][324563] Updated weights for policy 0, policy_version 43893 (0.0007) -[2026-06-07 02:31:59,648][324563] Updated weights for policy 0, policy_version 43903 (0.0007) -[2026-06-07 02:31:59,877][324563] Updated weights for policy 0, policy_version 43913 (0.0007) -[2026-06-07 02:32:00,086][324563] Updated weights for policy 0, policy_version 43923 (0.0007) -[2026-06-07 02:32:00,744][321791] Fps is (10 sec: 19660.7, 60 sec: 18022.4, 300 sec: 17661.4). Total num frames: 22511616. Throughput: 0: 17826.1. Samples: 22544896. Policy #0 lag: (min: 52.0, avg: 67.0, max: 116.0) -[2026-06-07 02:32:00,745][321791] Avg episode reward: [(0, '1670.504')] -[2026-06-07 02:32:00,794][324563] Updated weights for policy 0, policy_version 43933 (0.0006) -[2026-06-07 02:32:00,995][324563] Updated weights for policy 0, policy_version 43943 (0.0007) -[2026-06-07 02:32:01,210][324563] Updated weights for policy 0, policy_version 43953 (0.0007) -[2026-06-07 02:32:01,419][324563] Updated weights for policy 0, policy_version 43963 (0.0007) -[2026-06-07 02:32:01,627][324563] Updated weights for policy 0, policy_version 43973 (0.0007) -[2026-06-07 02:32:01,842][324563] Updated weights for policy 0, policy_version 43983 (0.0007) -[2026-06-07 02:32:02,029][324276] Saving new best policy, reward=1670.504! -[2026-06-07 02:32:02,582][324563] Updated weights for policy 0, policy_version 43993 (0.0006) -[2026-06-07 02:32:02,793][324563] Updated weights for policy 0, policy_version 44003 (0.0007) -[2026-06-07 02:32:03,015][324563] Updated weights for policy 0, policy_version 44013 (0.0007) -[2026-06-07 02:32:03,207][324563] Updated weights for policy 0, policy_version 44023 (0.0007) -[2026-06-07 02:32:03,417][324563] Updated weights for policy 0, policy_version 44033 (0.0007) -[2026-06-07 02:32:03,652][324563] Updated weights for policy 0, policy_version 44044 (0.0007) -[2026-06-07 02:32:03,854][324563] Updated weights for policy 0, policy_version 44054 (0.0007) -[2026-06-07 02:32:04,596][324563] Updated weights for policy 0, policy_version 44064 (0.0007) -[2026-06-07 02:32:04,794][324563] Updated weights for policy 0, policy_version 44074 (0.0006) -[2026-06-07 02:32:05,007][324563] Updated weights for policy 0, policy_version 44084 (0.0006) -[2026-06-07 02:32:05,219][324563] Updated weights for policy 0, policy_version 44094 (0.0007) -[2026-06-07 02:32:05,443][324563] Updated weights for policy 0, policy_version 44104 (0.0006) -[2026-06-07 02:32:05,666][324563] Updated weights for policy 0, policy_version 44114 (0.0006) -[2026-06-07 02:32:05,744][321791] Fps is (10 sec: 16384.1, 60 sec: 17476.2, 300 sec: 17550.3). Total num frames: 22577152. Throughput: 0: 17610.0. Samples: 22592768. Policy #0 lag: (min: 52.0, avg: 67.0, max: 116.0) -[2026-06-07 02:32:05,745][321791] Avg episode reward: [(0, '1670.504')] -[2026-06-07 02:32:06,422][324563] Updated weights for policy 0, policy_version 44124 (0.0006) -[2026-06-07 02:32:06,623][324563] Updated weights for policy 0, policy_version 44134 (0.0006) -[2026-06-07 02:32:06,801][324563] Updated weights for policy 0, policy_version 44144 (0.0006) -[2026-06-07 02:32:07,007][324563] Updated weights for policy 0, policy_version 44154 (0.0006) -[2026-06-07 02:32:07,203][324563] Updated weights for policy 0, policy_version 44164 (0.0006) -[2026-06-07 02:32:07,399][324563] Updated weights for policy 0, policy_version 44174 (0.0006) -[2026-06-07 02:32:07,601][324563] Updated weights for policy 0, policy_version 44184 (0.0006) -[2026-06-07 02:32:08,322][324563] Updated weights for policy 0, policy_version 44194 (0.0006) -[2026-06-07 02:32:08,568][324563] Updated weights for policy 0, policy_version 44205 (0.0006) -[2026-06-07 02:32:08,787][324563] Updated weights for policy 0, policy_version 44215 (0.0006) -[2026-06-07 02:32:08,991][324563] Updated weights for policy 0, policy_version 44225 (0.0007) -[2026-06-07 02:32:09,189][324563] Updated weights for policy 0, policy_version 44235 (0.0007) -[2026-06-07 02:32:09,412][324563] Updated weights for policy 0, policy_version 44245 (0.0007) -[2026-06-07 02:32:10,127][324563] Updated weights for policy 0, policy_version 44255 (0.0007) -[2026-06-07 02:32:10,345][324563] Updated weights for policy 0, policy_version 44265 (0.0010) -[2026-06-07 02:32:10,556][324563] Updated weights for policy 0, policy_version 44275 (0.0010) -[2026-06-07 02:32:10,744][321791] Fps is (10 sec: 16383.9, 60 sec: 17476.2, 300 sec: 17550.3). Total num frames: 22675456. Throughput: 0: 17627.0. Samples: 22697088. Policy #0 lag: (min: 52.0, avg: 67.0, max: 116.0) -[2026-06-07 02:32:10,745][321791] Avg episode reward: [(0, '1716.514')] -[2026-06-07 02:32:10,779][324563] Updated weights for policy 0, policy_version 44285 (0.0009) -[2026-06-07 02:32:10,990][324563] Updated weights for policy 0, policy_version 44296 (0.0007) -[2026-06-07 02:32:11,175][324563] Updated weights for policy 0, policy_version 44306 (0.0006) -[2026-06-07 02:32:11,304][324276] Saving new best policy, reward=1716.514! -[2026-06-07 02:32:11,924][324563] Updated weights for policy 0, policy_version 44316 (0.0007) -[2026-06-07 02:32:12,134][324563] Updated weights for policy 0, policy_version 44326 (0.0007) -[2026-06-07 02:32:12,323][324563] Updated weights for policy 0, policy_version 44336 (0.0007) -[2026-06-07 02:32:12,533][324563] Updated weights for policy 0, policy_version 44346 (0.0007) -[2026-06-07 02:32:12,722][324563] Updated weights for policy 0, policy_version 44356 (0.0007) -[2026-06-07 02:32:12,933][324563] Updated weights for policy 0, policy_version 44366 (0.0006) -[2026-06-07 02:32:13,142][324563] Updated weights for policy 0, policy_version 44376 (0.0007) -[2026-06-07 02:32:13,927][324563] Updated weights for policy 0, policy_version 44386 (0.0008) -[2026-06-07 02:32:14,139][324563] Updated weights for policy 0, policy_version 44396 (0.0011) -[2026-06-07 02:32:14,334][324563] Updated weights for policy 0, policy_version 44406 (0.0011) -[2026-06-07 02:32:14,519][324563] Updated weights for policy 0, policy_version 44416 (0.0011) -[2026-06-07 02:32:14,731][324563] Updated weights for policy 0, policy_version 44428 (0.0008) -[2026-06-07 02:32:14,952][324563] Updated weights for policy 0, policy_version 44438 (0.0007) -[2026-06-07 02:32:15,674][324563] Updated weights for policy 0, policy_version 44448 (0.0006) -[2026-06-07 02:32:15,744][321791] Fps is (10 sec: 19660.6, 60 sec: 18022.4, 300 sec: 17661.4). Total num frames: 22773760. Throughput: 0: 17843.2. Samples: 22812800. Policy #0 lag: (min: 29.0, avg: 44.1, max: 93.0) -[2026-06-07 02:32:15,745][321791] Avg episode reward: [(0, '1715.430')] -[2026-06-07 02:32:15,866][324563] Updated weights for policy 0, policy_version 44458 (0.0007) -[2026-06-07 02:32:16,063][324563] Updated weights for policy 0, policy_version 44468 (0.0007) -[2026-06-07 02:32:16,261][324563] Updated weights for policy 0, policy_version 44479 (0.0006) -[2026-06-07 02:32:16,463][324563] Updated weights for policy 0, policy_version 44489 (0.0007) -[2026-06-07 02:32:16,671][324563] Updated weights for policy 0, policy_version 44499 (0.0007) -[2026-06-07 02:32:17,426][324563] Updated weights for policy 0, policy_version 44509 (0.0009) -[2026-06-07 02:32:17,613][324563] Updated weights for policy 0, policy_version 44519 (0.0008) -[2026-06-07 02:32:17,804][324563] Updated weights for policy 0, policy_version 44529 (0.0007) -[2026-06-07 02:32:18,015][324563] Updated weights for policy 0, policy_version 44540 (0.0009) -[2026-06-07 02:32:18,197][324563] Updated weights for policy 0, policy_version 44550 (0.0008) -[2026-06-07 02:32:18,388][324563] Updated weights for policy 0, policy_version 44560 (0.0006) -[2026-06-07 02:32:19,156][324563] Updated weights for policy 0, policy_version 44570 (0.0007) -[2026-06-07 02:32:19,367][324563] Updated weights for policy 0, policy_version 44580 (0.0007) -[2026-06-07 02:32:19,561][324563] Updated weights for policy 0, policy_version 44590 (0.0007) -[2026-06-07 02:32:19,760][324563] Updated weights for policy 0, policy_version 44600 (0.0006) -[2026-06-07 02:32:19,953][324563] Updated weights for policy 0, policy_version 44610 (0.0008) -[2026-06-07 02:32:20,162][324563] Updated weights for policy 0, policy_version 44621 (0.0007) -[2026-06-07 02:32:20,348][324563] Updated weights for policy 0, policy_version 44631 (0.0008) -[2026-06-07 02:32:20,744][321791] Fps is (10 sec: 19661.0, 60 sec: 18022.5, 300 sec: 17661.4). Total num frames: 22872064. Throughput: 0: 17701.2. Samples: 22859136. Policy #0 lag: (min: 29.0, avg: 44.1, max: 93.0) -[2026-06-07 02:32:20,745][321791] Avg episode reward: [(0, '1739.196')] -[2026-06-07 02:32:20,749][324276] Saving new best policy, reward=1739.196! -[2026-06-07 02:32:21,094][324563] Updated weights for policy 0, policy_version 44641 (0.0007) -[2026-06-07 02:32:21,290][324563] Updated weights for policy 0, policy_version 44651 (0.0011) -[2026-06-07 02:32:21,484][324563] Updated weights for policy 0, policy_version 44661 (0.0011) -[2026-06-07 02:32:21,693][324563] Updated weights for policy 0, policy_version 44671 (0.0013) -[2026-06-07 02:32:21,906][324563] Updated weights for policy 0, policy_version 44681 (0.0012) -[2026-06-07 02:32:22,102][324563] Updated weights for policy 0, policy_version 44691 (0.0013) -[2026-06-07 02:32:22,837][324563] Updated weights for policy 0, policy_version 44701 (0.0009) -[2026-06-07 02:32:23,054][324563] Updated weights for policy 0, policy_version 44711 (0.0006) -[2026-06-07 02:32:23,241][324563] Updated weights for policy 0, policy_version 44721 (0.0009) -[2026-06-07 02:32:23,480][324563] Updated weights for policy 0, policy_version 44733 (0.0012) -[2026-06-07 02:32:23,678][324563] Updated weights for policy 0, policy_version 44743 (0.0008) -[2026-06-07 02:32:23,870][324563] Updated weights for policy 0, policy_version 44753 (0.0011) -[2026-06-07 02:32:24,671][324563] Updated weights for policy 0, policy_version 44763 (0.0011) -[2026-06-07 02:32:24,862][324563] Updated weights for policy 0, policy_version 44773 (0.0010) -[2026-06-07 02:32:25,072][324563] Updated weights for policy 0, policy_version 44783 (0.0011) -[2026-06-07 02:32:25,271][324563] Updated weights for policy 0, policy_version 44793 (0.0012) -[2026-06-07 02:32:25,466][324563] Updated weights for policy 0, policy_version 44803 (0.0009) -[2026-06-07 02:32:25,667][324563] Updated weights for policy 0, policy_version 44813 (0.0006) -[2026-06-07 02:32:25,744][321791] Fps is (10 sec: 16384.3, 60 sec: 17476.3, 300 sec: 17550.3). Total num frames: 22937600. Throughput: 0: 18045.2. Samples: 22978432. Policy #0 lag: (min: 29.0, avg: 44.1, max: 93.0) -[2026-06-07 02:32:25,745][321791] Avg episode reward: [(0, '1778.707')] -[2026-06-07 02:32:25,852][324563] Updated weights for policy 0, policy_version 44823 (0.0007) -[2026-06-07 02:32:25,864][324276] Saving new best policy, reward=1778.707! -[2026-06-07 02:32:26,624][324563] Updated weights for policy 0, policy_version 44833 (0.0007) -[2026-06-07 02:32:26,825][324563] Updated weights for policy 0, policy_version 44843 (0.0006) -[2026-06-07 02:32:26,997][324563] Updated weights for policy 0, policy_version 44853 (0.0006) -[2026-06-07 02:32:27,205][324563] Updated weights for policy 0, policy_version 44863 (0.0006) -[2026-06-07 02:32:27,392][324563] Updated weights for policy 0, policy_version 44873 (0.0006) -[2026-06-07 02:32:27,582][324563] Updated weights for policy 0, policy_version 44883 (0.0007) -[2026-06-07 02:32:28,334][324563] Updated weights for policy 0, policy_version 44893 (0.0007) -[2026-06-07 02:32:28,534][324563] Updated weights for policy 0, policy_version 44903 (0.0006) -[2026-06-07 02:32:28,766][324563] Updated weights for policy 0, policy_version 44913 (0.0007) -[2026-06-07 02:32:28,966][324563] Updated weights for policy 0, policy_version 44923 (0.0006) -[2026-06-07 02:32:29,187][324563] Updated weights for policy 0, policy_version 44933 (0.0007) -[2026-06-07 02:32:29,376][324563] Updated weights for policy 0, policy_version 44943 (0.0007) -[2026-06-07 02:32:30,120][324563] Updated weights for policy 0, policy_version 44953 (0.0006) -[2026-06-07 02:32:30,326][324563] Updated weights for policy 0, policy_version 44964 (0.0006) -[2026-06-07 02:32:30,520][324563] Updated weights for policy 0, policy_version 44974 (0.0007) -[2026-06-07 02:32:30,726][324563] Updated weights for policy 0, policy_version 44984 (0.0007) -[2026-06-07 02:32:30,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17661.4). Total num frames: 23035904. Throughput: 0: 17831.9. Samples: 23083520. Policy #0 lag: (min: 29.0, avg: 44.1, max: 93.0) -[2026-06-07 02:32:30,745][321791] Avg episode reward: [(0, '1781.249')] -[2026-06-07 02:32:30,927][324563] Updated weights for policy 0, policy_version 44994 (0.0006) -[2026-06-07 02:32:31,172][324563] Updated weights for policy 0, policy_version 45005 (0.0007) -[2026-06-07 02:32:31,388][324563] Updated weights for policy 0, policy_version 45015 (0.0006) -[2026-06-07 02:32:31,401][324276] Saving new best policy, reward=1781.249! -[2026-06-07 02:32:32,117][324563] Updated weights for policy 0, policy_version 45025 (0.0006) -[2026-06-07 02:32:32,304][324563] Updated weights for policy 0, policy_version 45035 (0.0006) -[2026-06-07 02:32:32,486][324563] Updated weights for policy 0, policy_version 45045 (0.0007) -[2026-06-07 02:32:32,688][324563] Updated weights for policy 0, policy_version 45055 (0.0007) -[2026-06-07 02:32:32,943][324563] Updated weights for policy 0, policy_version 45067 (0.0006) -[2026-06-07 02:32:33,166][324563] Updated weights for policy 0, policy_version 45077 (0.0006) -[2026-06-07 02:32:33,910][324563] Updated weights for policy 0, policy_version 45087 (0.0006) -[2026-06-07 02:32:34,094][324563] Updated weights for policy 0, policy_version 45097 (0.0007) -[2026-06-07 02:32:34,333][324563] Updated weights for policy 0, policy_version 45108 (0.0007) -[2026-06-07 02:32:34,538][324563] Updated weights for policy 0, policy_version 45118 (0.0006) -[2026-06-07 02:32:34,732][324563] Updated weights for policy 0, policy_version 45128 (0.0007) -[2026-06-07 02:32:34,925][324563] Updated weights for policy 0, policy_version 45138 (0.0007) -[2026-06-07 02:32:35,690][324563] Updated weights for policy 0, policy_version 45148 (0.0007) -[2026-06-07 02:32:35,744][321791] Fps is (10 sec: 19660.8, 60 sec: 18022.5, 300 sec: 17661.4). Total num frames: 23134208. Throughput: 0: 17996.8. Samples: 23137920. Policy #0 lag: (min: 29.0, avg: 44.1, max: 93.0) -[2026-06-07 02:32:35,745][321791] Avg episode reward: [(0, '1776.615')] -[2026-06-07 02:32:35,897][324563] Updated weights for policy 0, policy_version 45158 (0.0007) -[2026-06-07 02:32:36,118][324563] Updated weights for policy 0, policy_version 45168 (0.0006) -[2026-06-07 02:32:36,325][324563] Updated weights for policy 0, policy_version 45178 (0.0007) -[2026-06-07 02:32:36,543][324563] Updated weights for policy 0, policy_version 45188 (0.0007) -[2026-06-07 02:32:36,734][324563] Updated weights for policy 0, policy_version 45198 (0.0006) -[2026-06-07 02:32:36,928][324563] Updated weights for policy 0, policy_version 45208 (0.0006) -[2026-06-07 02:32:37,654][324563] Updated weights for policy 0, policy_version 45218 (0.0006) -[2026-06-07 02:32:37,864][324563] Updated weights for policy 0, policy_version 45228 (0.0006) -[2026-06-07 02:32:38,089][324563] Updated weights for policy 0, policy_version 45239 (0.0007) -[2026-06-07 02:32:38,296][324563] Updated weights for policy 0, policy_version 45249 (0.0006) -[2026-06-07 02:32:38,501][324563] Updated weights for policy 0, policy_version 45259 (0.0006) -[2026-06-07 02:32:38,694][324563] Updated weights for policy 0, policy_version 45270 (0.0006) -[2026-06-07 02:32:39,495][324563] Updated weights for policy 0, policy_version 45281 (0.0007) -[2026-06-07 02:32:39,701][324563] Updated weights for policy 0, policy_version 45291 (0.0007) -[2026-06-07 02:32:39,902][324563] Updated weights for policy 0, policy_version 45301 (0.0006) -[2026-06-07 02:32:40,086][324563] Updated weights for policy 0, policy_version 45311 (0.0006) -[2026-06-07 02:32:40,331][324563] Updated weights for policy 0, policy_version 45322 (0.0007) -[2026-06-07 02:32:40,537][324563] Updated weights for policy 0, policy_version 45332 (0.0006) -[2026-06-07 02:32:40,744][321791] Fps is (10 sec: 19660.8, 60 sec: 18022.4, 300 sec: 17661.4). Total num frames: 23232512. Throughput: 0: 18144.8. Samples: 23247488. Policy #0 lag: (min: 63.0, avg: 76.9, max: 127.0) -[2026-06-07 02:32:40,745][321791] Avg episode reward: [(0, '1755.679')] -[2026-06-07 02:32:41,232][324563] Updated weights for policy 0, policy_version 45342 (0.0006) -[2026-06-07 02:32:41,473][324563] Updated weights for policy 0, policy_version 45353 (0.0007) -[2026-06-07 02:32:41,682][324563] Updated weights for policy 0, policy_version 45363 (0.0006) -[2026-06-07 02:32:41,885][324563] Updated weights for policy 0, policy_version 45373 (0.0006) -[2026-06-07 02:32:42,116][324563] Updated weights for policy 0, policy_version 45383 (0.0006) -[2026-06-07 02:32:42,312][324563] Updated weights for policy 0, policy_version 45393 (0.0007) -[2026-06-07 02:32:43,068][324563] Updated weights for policy 0, policy_version 45403 (0.0007) -[2026-06-07 02:32:43,274][324563] Updated weights for policy 0, policy_version 45413 (0.0006) -[2026-06-07 02:32:43,492][324563] Updated weights for policy 0, policy_version 45423 (0.0007) -[2026-06-07 02:32:43,693][324563] Updated weights for policy 0, policy_version 45433 (0.0006) -[2026-06-07 02:32:43,924][324563] Updated weights for policy 0, policy_version 45444 (0.0006) -[2026-06-07 02:32:44,130][324563] Updated weights for policy 0, policy_version 45454 (0.0006) -[2026-06-07 02:32:44,326][324563] Updated weights for policy 0, policy_version 45464 (0.0006) -[2026-06-07 02:32:45,084][324563] Updated weights for policy 0, policy_version 45474 (0.0007) -[2026-06-07 02:32:45,291][324563] Updated weights for policy 0, policy_version 45484 (0.0006) -[2026-06-07 02:32:45,504][324563] Updated weights for policy 0, policy_version 45494 (0.0006) -[2026-06-07 02:32:45,724][324563] Updated weights for policy 0, policy_version 45504 (0.0006) -[2026-06-07 02:32:45,744][321791] Fps is (10 sec: 16383.8, 60 sec: 17476.3, 300 sec: 17661.4). Total num frames: 23298048. Throughput: 0: 17931.3. Samples: 23351808. Policy #0 lag: (min: 63.0, avg: 76.9, max: 127.0) -[2026-06-07 02:32:45,746][321791] Avg episode reward: [(0, '1745.650')] -[2026-06-07 02:32:45,966][324563] Updated weights for policy 0, policy_version 45515 (0.0006) -[2026-06-07 02:32:46,177][324563] Updated weights for policy 0, policy_version 45525 (0.0006) -[2026-06-07 02:32:46,899][324563] Updated weights for policy 0, policy_version 45535 (0.0007) -[2026-06-07 02:32:47,117][324563] Updated weights for policy 0, policy_version 45545 (0.0007) -[2026-06-07 02:32:47,327][324563] Updated weights for policy 0, policy_version 45555 (0.0006) -[2026-06-07 02:32:47,534][324563] Updated weights for policy 0, policy_version 45565 (0.0006) -[2026-06-07 02:32:47,754][324563] Updated weights for policy 0, policy_version 45575 (0.0007) -[2026-06-07 02:32:47,965][324563] Updated weights for policy 0, policy_version 45585 (0.0006) -[2026-06-07 02:32:48,715][324563] Updated weights for policy 0, policy_version 45595 (0.0007) -[2026-06-07 02:32:48,903][324563] Updated weights for policy 0, policy_version 45605 (0.0007) -[2026-06-07 02:32:49,094][324563] Updated weights for policy 0, policy_version 45615 (0.0006) -[2026-06-07 02:32:49,318][324563] Updated weights for policy 0, policy_version 45625 (0.0006) -[2026-06-07 02:32:49,527][324563] Updated weights for policy 0, policy_version 45635 (0.0007) -[2026-06-07 02:32:49,761][324563] Updated weights for policy 0, policy_version 45646 (0.0006) -[2026-06-07 02:32:49,973][324563] Updated weights for policy 0, policy_version 45656 (0.0006) -[2026-06-07 02:32:50,710][324563] Updated weights for policy 0, policy_version 45666 (0.0006) -[2026-06-07 02:32:50,744][321791] Fps is (10 sec: 16383.5, 60 sec: 18022.3, 300 sec: 17661.4). Total num frames: 23396352. Throughput: 0: 18138.9. Samples: 23409024. Policy #0 lag: (min: 63.0, avg: 76.9, max: 127.0) -[2026-06-07 02:32:50,745][321791] Avg episode reward: [(0, '1799.731')] -[2026-06-07 02:32:50,920][324563] Updated weights for policy 0, policy_version 45676 (0.0006) -[2026-06-07 02:32:51,146][324563] Updated weights for policy 0, policy_version 45687 (0.0007) -[2026-06-07 02:32:51,364][324563] Updated weights for policy 0, policy_version 45697 (0.0006) -[2026-06-07 02:32:51,581][324563] Updated weights for policy 0, policy_version 45708 (0.0006) -[2026-06-07 02:32:51,776][324563] Updated weights for policy 0, policy_version 45718 (0.0007) -[2026-06-07 02:32:51,817][324276] Saving new best policy, reward=1799.731! -[2026-06-07 02:32:52,512][324563] Updated weights for policy 0, policy_version 45728 (0.0007) -[2026-06-07 02:32:52,737][324563] Updated weights for policy 0, policy_version 45739 (0.0006) -[2026-06-07 02:32:52,923][324563] Updated weights for policy 0, policy_version 45749 (0.0006) -[2026-06-07 02:32:53,126][324563] Updated weights for policy 0, policy_version 45759 (0.0006) -[2026-06-07 02:32:53,320][324563] Updated weights for policy 0, policy_version 45769 (0.0006) -[2026-06-07 02:32:53,531][324563] Updated weights for policy 0, policy_version 45779 (0.0006) -[2026-06-07 02:32:54,296][324563] Updated weights for policy 0, policy_version 45789 (0.0006) -[2026-06-07 02:32:54,487][324563] Updated weights for policy 0, policy_version 45799 (0.0006) -[2026-06-07 02:32:54,691][324563] Updated weights for policy 0, policy_version 45809 (0.0006) -[2026-06-07 02:32:54,902][324563] Updated weights for policy 0, policy_version 45819 (0.0006) -[2026-06-07 02:32:55,109][324563] Updated weights for policy 0, policy_version 45829 (0.0006) -[2026-06-07 02:32:55,307][324563] Updated weights for policy 0, policy_version 45839 (0.0006) -[2026-06-07 02:32:55,744][321791] Fps is (10 sec: 19661.1, 60 sec: 18022.4, 300 sec: 17661.4). Total num frames: 23494656. Throughput: 0: 18164.7. Samples: 23514496. Policy #0 lag: (min: 63.0, avg: 76.9, max: 127.0) -[2026-06-07 02:32:55,745][321791] Avg episode reward: [(0, '1832.149')] -[2026-06-07 02:32:55,749][324276] Saving new best policy, reward=1832.149! -[2026-06-07 02:32:56,056][324563] Updated weights for policy 0, policy_version 45849 (0.0006) -[2026-06-07 02:32:56,256][324563] Updated weights for policy 0, policy_version 45859 (0.0006) -[2026-06-07 02:32:56,439][324563] Updated weights for policy 0, policy_version 45869 (0.0007) -[2026-06-07 02:32:56,628][324563] Updated weights for policy 0, policy_version 45879 (0.0007) -[2026-06-07 02:32:56,827][324563] Updated weights for policy 0, policy_version 45889 (0.0006) -[2026-06-07 02:32:57,016][324563] Updated weights for policy 0, policy_version 45899 (0.0006) -[2026-06-07 02:32:57,231][324563] Updated weights for policy 0, policy_version 45909 (0.0006) -[2026-06-07 02:32:57,997][324563] Updated weights for policy 0, policy_version 45919 (0.0007) -[2026-06-07 02:32:58,192][324563] Updated weights for policy 0, policy_version 45929 (0.0006) -[2026-06-07 02:32:58,409][324563] Updated weights for policy 0, policy_version 45939 (0.0007) -[2026-06-07 02:32:58,624][324563] Updated weights for policy 0, policy_version 45950 (0.0007) -[2026-06-07 02:32:58,839][324563] Updated weights for policy 0, policy_version 45960 (0.0006) -[2026-06-07 02:32:59,046][324563] Updated weights for policy 0, policy_version 45970 (0.0007) -[2026-06-07 02:32:59,777][324563] Updated weights for policy 0, policy_version 45980 (0.0006) -[2026-06-07 02:32:59,982][324563] Updated weights for policy 0, policy_version 45990 (0.0007) -[2026-06-07 02:33:00,201][324563] Updated weights for policy 0, policy_version 46000 (0.0006) -[2026-06-07 02:33:00,428][324563] Updated weights for policy 0, policy_version 46010 (0.0006) -[2026-06-07 02:33:00,619][324563] Updated weights for policy 0, policy_version 46020 (0.0006) -[2026-06-07 02:33:00,744][321791] Fps is (10 sec: 16384.5, 60 sec: 17476.3, 300 sec: 17661.4). Total num frames: 23560192. Throughput: 0: 17900.2. Samples: 23618304. Policy #0 lag: (min: 63.0, avg: 76.9, max: 127.0) -[2026-06-07 02:33:00,745][321791] Avg episode reward: [(0, '1832.149')] -[2026-06-07 02:33:00,823][324563] Updated weights for policy 0, policy_version 46030 (0.0007) -[2026-06-07 02:33:01,021][324563] Updated weights for policy 0, policy_version 46040 (0.0006) -[2026-06-07 02:33:01,759][324563] Updated weights for policy 0, policy_version 46050 (0.0006) -[2026-06-07 02:33:01,965][324563] Updated weights for policy 0, policy_version 46060 (0.0007) -[2026-06-07 02:33:02,177][324563] Updated weights for policy 0, policy_version 46070 (0.0006) -[2026-06-07 02:33:02,379][324563] Updated weights for policy 0, policy_version 46080 (0.0007) -[2026-06-07 02:33:02,573][324563] Updated weights for policy 0, policy_version 46090 (0.0006) -[2026-06-07 02:33:02,769][324563] Updated weights for policy 0, policy_version 46100 (0.0006) -[2026-06-07 02:33:03,511][324563] Updated weights for policy 0, policy_version 46110 (0.0007) -[2026-06-07 02:33:03,754][324563] Updated weights for policy 0, policy_version 46121 (0.0007) -[2026-06-07 02:33:03,960][324563] Updated weights for policy 0, policy_version 46131 (0.0007) -[2026-06-07 02:33:04,166][324563] Updated weights for policy 0, policy_version 46141 (0.0007) -[2026-06-07 02:33:04,388][324563] Updated weights for policy 0, policy_version 46151 (0.0006) -[2026-06-07 02:33:04,598][324563] Updated weights for policy 0, policy_version 46161 (0.0007) -[2026-06-07 02:33:05,318][324563] Updated weights for policy 0, policy_version 46171 (0.0007) -[2026-06-07 02:33:05,542][324563] Updated weights for policy 0, policy_version 46181 (0.0007) -[2026-06-07 02:33:05,741][324563] Updated weights for policy 0, policy_version 46191 (0.0007) -[2026-06-07 02:33:05,744][321791] Fps is (10 sec: 16383.8, 60 sec: 18022.4, 300 sec: 17661.4). Total num frames: 23658496. Throughput: 0: 18156.0. Samples: 23676160. Policy #0 lag: (min: 63.0, avg: 76.9, max: 127.0) -[2026-06-07 02:33:05,745][321791] Avg episode reward: [(0, '1884.272')] -[2026-06-07 02:33:05,964][324563] Updated weights for policy 0, policy_version 46201 (0.0006) -[2026-06-07 02:33:06,150][324563] Updated weights for policy 0, policy_version 46211 (0.0006) -[2026-06-07 02:33:06,359][324563] Updated weights for policy 0, policy_version 46221 (0.0006) -[2026-06-07 02:33:06,552][324563] Updated weights for policy 0, policy_version 46231 (0.0006) -[2026-06-07 02:33:06,577][324276] Saving new best policy, reward=1884.272! -[2026-06-07 02:33:07,319][324563] Updated weights for policy 0, policy_version 46242 (0.0006) -[2026-06-07 02:33:07,518][324563] Updated weights for policy 0, policy_version 46252 (0.0006) -[2026-06-07 02:33:07,720][324563] Updated weights for policy 0, policy_version 46262 (0.0007) -[2026-06-07 02:33:07,934][324563] Updated weights for policy 0, policy_version 46272 (0.0006) -[2026-06-07 02:33:08,122][324563] Updated weights for policy 0, policy_version 46282 (0.0006) -[2026-06-07 02:33:08,329][324563] Updated weights for policy 0, policy_version 46292 (0.0006) -[2026-06-07 02:33:09,100][324563] Updated weights for policy 0, policy_version 46303 (0.0006) -[2026-06-07 02:33:09,292][324563] Updated weights for policy 0, policy_version 46313 (0.0007) -[2026-06-07 02:33:09,487][324563] Updated weights for policy 0, policy_version 46323 (0.0006) -[2026-06-07 02:33:09,703][324563] Updated weights for policy 0, policy_version 46333 (0.0006) -[2026-06-07 02:33:09,929][324563] Updated weights for policy 0, policy_version 46344 (0.0006) -[2026-06-07 02:33:10,164][324563] Updated weights for policy 0, policy_version 46354 (0.0006) -[2026-06-07 02:33:10,744][321791] Fps is (10 sec: 19660.8, 60 sec: 18022.4, 300 sec: 17661.4). Total num frames: 23756800. Throughput: 0: 17769.2. Samples: 23778048. Policy #0 lag: (min: 63.0, avg: 76.9, max: 127.0) -[2026-06-07 02:33:10,745][321791] Avg episode reward: [(0, '1912.884')] -[2026-06-07 02:33:10,898][324563] Updated weights for policy 0, policy_version 46364 (0.0006) -[2026-06-07 02:33:11,113][324563] Updated weights for policy 0, policy_version 46374 (0.0006) -[2026-06-07 02:33:11,315][324563] Updated weights for policy 0, policy_version 46384 (0.0006) -[2026-06-07 02:33:11,532][324563] Updated weights for policy 0, policy_version 46394 (0.0006) -[2026-06-07 02:33:11,725][324563] Updated weights for policy 0, policy_version 46404 (0.0007) -[2026-06-07 02:33:11,950][324563] Updated weights for policy 0, policy_version 46414 (0.0007) -[2026-06-07 02:33:12,146][324276] Saving new best policy, reward=1912.884! -[2026-06-07 02:33:12,146][324563] Updated weights for policy 0, policy_version 46424 (0.0007) -[2026-06-07 02:33:12,861][324563] Updated weights for policy 0, policy_version 46434 (0.0006) -[2026-06-07 02:33:13,066][324563] Updated weights for policy 0, policy_version 46444 (0.0006) -[2026-06-07 02:33:13,292][324563] Updated weights for policy 0, policy_version 46454 (0.0006) -[2026-06-07 02:33:13,516][324563] Updated weights for policy 0, policy_version 46464 (0.0006) -[2026-06-07 02:33:13,717][324563] Updated weights for policy 0, policy_version 46474 (0.0006) -[2026-06-07 02:33:13,917][324563] Updated weights for policy 0, policy_version 46484 (0.0006) -[2026-06-07 02:33:14,631][324563] Updated weights for policy 0, policy_version 46494 (0.0006) -[2026-06-07 02:33:14,861][324563] Updated weights for policy 0, policy_version 46504 (0.0006) -[2026-06-07 02:33:15,089][324563] Updated weights for policy 0, policy_version 46514 (0.0006) -[2026-06-07 02:33:15,299][324563] Updated weights for policy 0, policy_version 46524 (0.0007) -[2026-06-07 02:33:15,491][324563] Updated weights for policy 0, policy_version 46534 (0.0006) -[2026-06-07 02:33:15,705][324563] Updated weights for policy 0, policy_version 46544 (0.0006) -[2026-06-07 02:33:15,744][321791] Fps is (10 sec: 16384.2, 60 sec: 17476.3, 300 sec: 17661.4). Total num frames: 23822336. Throughput: 0: 17720.9. Samples: 23880960. Policy #0 lag: (min: 49.0, avg: 71.2, max: 112.0) -[2026-06-07 02:33:15,745][321791] Avg episode reward: [(0, '1927.826')] -[2026-06-07 02:33:15,850][324276] Saving new best policy, reward=1927.826! -[2026-06-07 02:33:16,438][324563] Updated weights for policy 0, policy_version 46554 (0.0006) -[2026-06-07 02:33:16,661][324563] Updated weights for policy 0, policy_version 46564 (0.0006) -[2026-06-07 02:33:16,868][324563] Updated weights for policy 0, policy_version 46574 (0.0007) -[2026-06-07 02:33:17,060][324563] Updated weights for policy 0, policy_version 46584 (0.0006) -[2026-06-07 02:33:17,267][324563] Updated weights for policy 0, policy_version 46594 (0.0006) -[2026-06-07 02:33:17,467][324563] Updated weights for policy 0, policy_version 46604 (0.0007) -[2026-06-07 02:33:17,675][324563] Updated weights for policy 0, policy_version 46614 (0.0006) -[2026-06-07 02:33:18,400][324563] Updated weights for policy 0, policy_version 46624 (0.0006) -[2026-06-07 02:33:18,604][324563] Updated weights for policy 0, policy_version 46634 (0.0006) -[2026-06-07 02:33:18,850][324563] Updated weights for policy 0, policy_version 46644 (0.0007) -[2026-06-07 02:33:19,038][324563] Updated weights for policy 0, policy_version 46654 (0.0006) -[2026-06-07 02:33:19,248][324563] Updated weights for policy 0, policy_version 46664 (0.0006) -[2026-06-07 02:33:19,453][324563] Updated weights for policy 0, policy_version 46674 (0.0006) -[2026-06-07 02:33:20,193][324563] Updated weights for policy 0, policy_version 46684 (0.0006) -[2026-06-07 02:33:20,407][324563] Updated weights for policy 0, policy_version 46694 (0.0006) -[2026-06-07 02:33:20,611][324563] Updated weights for policy 0, policy_version 46704 (0.0006) -[2026-06-07 02:33:20,744][321791] Fps is (10 sec: 16383.7, 60 sec: 17476.2, 300 sec: 17661.4). Total num frames: 23920640. Throughput: 0: 17800.5. Samples: 23938944. Policy #0 lag: (min: 49.0, avg: 71.2, max: 112.0) -[2026-06-07 02:33:20,745][321791] Avg episode reward: [(0, '1983.661')] -[2026-06-07 02:33:20,816][324563] Updated weights for policy 0, policy_version 46714 (0.0007) -[2026-06-07 02:33:21,026][324563] Updated weights for policy 0, policy_version 46724 (0.0006) -[2026-06-07 02:33:21,221][324563] Updated weights for policy 0, policy_version 46734 (0.0006) -[2026-06-07 02:33:21,420][324276] Saving new best policy, reward=1983.661! -[2026-06-07 02:33:21,421][324563] Updated weights for policy 0, policy_version 46744 (0.0006) -[2026-06-07 02:33:22,200][324563] Updated weights for policy 0, policy_version 46754 (0.0007) -[2026-06-07 02:33:22,390][324563] Updated weights for policy 0, policy_version 46764 (0.0006) -[2026-06-07 02:33:22,586][324563] Updated weights for policy 0, policy_version 46774 (0.0006) -[2026-06-07 02:33:22,834][324563] Updated weights for policy 0, policy_version 46786 (0.0007) -[2026-06-07 02:33:23,056][324563] Updated weights for policy 0, policy_version 46796 (0.0006) -[2026-06-07 02:33:23,264][324563] Updated weights for policy 0, policy_version 46806 (0.0006) -[2026-06-07 02:33:24,013][324563] Updated weights for policy 0, policy_version 46816 (0.0006) -[2026-06-07 02:33:24,222][324563] Updated weights for policy 0, policy_version 46826 (0.0006) -[2026-06-07 02:33:24,436][324563] Updated weights for policy 0, policy_version 46837 (0.0006) -[2026-06-07 02:33:24,639][324563] Updated weights for policy 0, policy_version 46847 (0.0006) -[2026-06-07 02:33:24,864][324563] Updated weights for policy 0, policy_version 46857 (0.0007) -[2026-06-07 02:33:25,095][324563] Updated weights for policy 0, policy_version 46868 (0.0007) -[2026-06-07 02:33:25,744][321791] Fps is (10 sec: 19660.8, 60 sec: 18022.4, 300 sec: 17661.4). Total num frames: 24018944. Throughput: 0: 17658.3. Samples: 24042112. Policy #0 lag: (min: 49.0, avg: 71.2, max: 112.0) -[2026-06-07 02:33:25,745][321791] Avg episode reward: [(0, '1996.025')] -[2026-06-07 02:33:25,825][324563] Updated weights for policy 0, policy_version 46878 (0.0006) -[2026-06-07 02:33:26,034][324563] Updated weights for policy 0, policy_version 46888 (0.0007) -[2026-06-07 02:33:26,227][324563] Updated weights for policy 0, policy_version 46898 (0.0006) -[2026-06-07 02:33:26,439][324563] Updated weights for policy 0, policy_version 46908 (0.0007) -[2026-06-07 02:33:26,642][324563] Updated weights for policy 0, policy_version 46918 (0.0006) -[2026-06-07 02:33:26,853][324563] Updated weights for policy 0, policy_version 46928 (0.0006) -[2026-06-07 02:33:27,015][324276] Saving new best policy, reward=1996.025! -[2026-06-07 02:33:27,603][324563] Updated weights for policy 0, policy_version 46938 (0.0006) -[2026-06-07 02:33:27,821][324563] Updated weights for policy 0, policy_version 46948 (0.0007) -[2026-06-07 02:33:28,037][324563] Updated weights for policy 0, policy_version 46958 (0.0006) -[2026-06-07 02:33:28,240][324563] Updated weights for policy 0, policy_version 46968 (0.0007) -[2026-06-07 02:33:28,453][324563] Updated weights for policy 0, policy_version 46978 (0.0006) -[2026-06-07 02:33:28,660][324563] Updated weights for policy 0, policy_version 46988 (0.0006) -[2026-06-07 02:33:28,878][324563] Updated weights for policy 0, policy_version 46998 (0.0006) -[2026-06-07 02:33:29,587][324563] Updated weights for policy 0, policy_version 47008 (0.0007) -[2026-06-07 02:33:29,774][324563] Updated weights for policy 0, policy_version 47018 (0.0006) -[2026-06-07 02:33:29,980][324563] Updated weights for policy 0, policy_version 47028 (0.0006) -[2026-06-07 02:33:30,205][324563] Updated weights for policy 0, policy_version 47038 (0.0007) -[2026-06-07 02:33:30,412][324563] Updated weights for policy 0, policy_version 47048 (0.0006) -[2026-06-07 02:33:30,623][324563] Updated weights for policy 0, policy_version 47058 (0.0007) -[2026-06-07 02:33:30,744][321791] Fps is (10 sec: 19660.9, 60 sec: 18022.4, 300 sec: 17772.5). Total num frames: 24117248. Throughput: 0: 17649.8. Samples: 24146048. Policy #0 lag: (min: 49.0, avg: 71.2, max: 112.0) -[2026-06-07 02:33:30,745][321791] Avg episode reward: [(0, '1996.025')] -[2026-06-07 02:33:31,382][324563] Updated weights for policy 0, policy_version 47069 (0.0007) -[2026-06-07 02:33:31,579][324563] Updated weights for policy 0, policy_version 47079 (0.0006) -[2026-06-07 02:33:31,794][324563] Updated weights for policy 0, policy_version 47089 (0.0006) -[2026-06-07 02:33:31,985][324563] Updated weights for policy 0, policy_version 47099 (0.0006) -[2026-06-07 02:33:32,184][324563] Updated weights for policy 0, policy_version 47109 (0.0007) -[2026-06-07 02:33:32,403][324563] Updated weights for policy 0, policy_version 47119 (0.0006) -[2026-06-07 02:33:33,133][324563] Updated weights for policy 0, policy_version 47129 (0.0006) -[2026-06-07 02:33:33,329][324563] Updated weights for policy 0, policy_version 47139 (0.0007) -[2026-06-07 02:33:33,533][324563] Updated weights for policy 0, policy_version 47149 (0.0006) -[2026-06-07 02:33:33,739][324563] Updated weights for policy 0, policy_version 47159 (0.0006) -[2026-06-07 02:33:33,936][324563] Updated weights for policy 0, policy_version 47169 (0.0006) -[2026-06-07 02:33:34,141][324563] Updated weights for policy 0, policy_version 47179 (0.0006) -[2026-06-07 02:33:34,364][324563] Updated weights for policy 0, policy_version 47189 (0.0006) -[2026-06-07 02:33:35,087][324563] Updated weights for policy 0, policy_version 47199 (0.0010) -[2026-06-07 02:33:35,308][324563] Updated weights for policy 0, policy_version 47209 (0.0011) -[2026-06-07 02:33:35,497][324563] Updated weights for policy 0, policy_version 47219 (0.0010) -[2026-06-07 02:33:35,701][324563] Updated weights for policy 0, policy_version 47229 (0.0007) -[2026-06-07 02:33:35,744][321791] Fps is (10 sec: 16383.8, 60 sec: 17476.2, 300 sec: 17661.4). Total num frames: 24182784. Throughput: 0: 17669.8. Samples: 24204160. Policy #0 lag: (min: 49.0, avg: 71.2, max: 112.0) -[2026-06-07 02:33:35,746][321791] Avg episode reward: [(0, '2008.336')] -[2026-06-07 02:33:35,919][324563] Updated weights for policy 0, policy_version 47239 (0.0006) -[2026-06-07 02:33:36,148][324563] Updated weights for policy 0, policy_version 47250 (0.0008) -[2026-06-07 02:33:36,262][324276] Saving new best policy, reward=2008.336! -[2026-06-07 02:33:36,884][324563] Updated weights for policy 0, policy_version 47260 (0.0009) -[2026-06-07 02:33:37,092][324563] Updated weights for policy 0, policy_version 47270 (0.0006) -[2026-06-07 02:33:37,310][324563] Updated weights for policy 0, policy_version 47280 (0.0006) -[2026-06-07 02:33:37,509][324563] Updated weights for policy 0, policy_version 47290 (0.0006) -[2026-06-07 02:33:37,701][324563] Updated weights for policy 0, policy_version 47300 (0.0007) -[2026-06-07 02:33:37,902][324563] Updated weights for policy 0, policy_version 47310 (0.0006) -[2026-06-07 02:33:38,112][324563] Updated weights for policy 0, policy_version 47320 (0.0006) -[2026-06-07 02:33:38,835][324563] Updated weights for policy 0, policy_version 47330 (0.0006) -[2026-06-07 02:33:39,045][324563] Updated weights for policy 0, policy_version 47340 (0.0007) -[2026-06-07 02:33:39,263][324563] Updated weights for policy 0, policy_version 47350 (0.0006) -[2026-06-07 02:33:39,465][324563] Updated weights for policy 0, policy_version 47360 (0.0006) -[2026-06-07 02:33:39,678][324563] Updated weights for policy 0, policy_version 47370 (0.0006) -[2026-06-07 02:33:39,879][324563] Updated weights for policy 0, policy_version 47380 (0.0006) -[2026-06-07 02:33:40,594][324563] Updated weights for policy 0, policy_version 47390 (0.0007) -[2026-06-07 02:33:40,744][321791] Fps is (10 sec: 16384.2, 60 sec: 17476.3, 300 sec: 17661.4). Total num frames: 24281088. Throughput: 0: 17629.9. Samples: 24307840. Policy #0 lag: (min: 49.0, avg: 71.2, max: 112.0) -[2026-06-07 02:33:40,745][321791] Avg episode reward: [(0, '2006.074')] -[2026-06-07 02:33:40,794][324563] Updated weights for policy 0, policy_version 47400 (0.0006) -[2026-06-07 02:33:41,025][324563] Updated weights for policy 0, policy_version 47410 (0.0006) -[2026-06-07 02:33:41,222][324563] Updated weights for policy 0, policy_version 47420 (0.0006) -[2026-06-07 02:33:41,447][324563] Updated weights for policy 0, policy_version 47430 (0.0006) -[2026-06-07 02:33:41,669][324563] Updated weights for policy 0, policy_version 47440 (0.0006) -[2026-06-07 02:33:42,340][324563] Updated weights for policy 0, policy_version 47450 (0.0010) -[2026-06-07 02:33:42,551][324563] Updated weights for policy 0, policy_version 47460 (0.0011) -[2026-06-07 02:33:42,759][324563] Updated weights for policy 0, policy_version 47470 (0.0011) -[2026-06-07 02:33:42,947][324563] Updated weights for policy 0, policy_version 47480 (0.0008) -[2026-06-07 02:33:43,141][324563] Updated weights for policy 0, policy_version 47490 (0.0008) -[2026-06-07 02:33:43,364][324563] Updated weights for policy 0, policy_version 47500 (0.0009) -[2026-06-07 02:33:43,580][324563] Updated weights for policy 0, policy_version 47510 (0.0011) -[2026-06-07 02:33:44,301][324563] Updated weights for policy 0, policy_version 47520 (0.0010) -[2026-06-07 02:33:44,518][324563] Updated weights for policy 0, policy_version 47530 (0.0008) -[2026-06-07 02:33:44,724][324563] Updated weights for policy 0, policy_version 47540 (0.0007) -[2026-06-07 02:33:44,936][324563] Updated weights for policy 0, policy_version 47550 (0.0007) -[2026-06-07 02:33:45,140][324563] Updated weights for policy 0, policy_version 47560 (0.0007) -[2026-06-07 02:33:45,355][324563] Updated weights for policy 0, policy_version 47570 (0.0007) -[2026-06-07 02:33:45,744][321791] Fps is (10 sec: 19660.9, 60 sec: 18022.4, 300 sec: 17772.5). Total num frames: 24379392. Throughput: 0: 17629.8. Samples: 24411648. Policy #0 lag: (min: 107.0, avg: 123.6, max: 171.0) -[2026-06-07 02:33:45,745][321791] Avg episode reward: [(0, '1973.600')] -[2026-06-07 02:33:46,088][324563] Updated weights for policy 0, policy_version 47580 (0.0008) -[2026-06-07 02:33:46,305][324563] Updated weights for policy 0, policy_version 47590 (0.0007) -[2026-06-07 02:33:46,512][324563] Updated weights for policy 0, policy_version 47600 (0.0007) -[2026-06-07 02:33:46,714][324563] Updated weights for policy 0, policy_version 47610 (0.0007) -[2026-06-07 02:33:46,915][324563] Updated weights for policy 0, policy_version 47620 (0.0007) -[2026-06-07 02:33:47,102][324563] Updated weights for policy 0, policy_version 47630 (0.0007) -[2026-06-07 02:33:47,292][324563] Updated weights for policy 0, policy_version 47640 (0.0007) -[2026-06-07 02:33:47,987][324563] Updated weights for policy 0, policy_version 47650 (0.0006) -[2026-06-07 02:33:48,203][324563] Updated weights for policy 0, policy_version 47660 (0.0006) -[2026-06-07 02:33:48,427][324563] Updated weights for policy 0, policy_version 47671 (0.0007) -[2026-06-07 02:33:48,632][324563] Updated weights for policy 0, policy_version 47681 (0.0011) -[2026-06-07 02:33:48,848][324563] Updated weights for policy 0, policy_version 47691 (0.0011) -[2026-06-07 02:33:49,041][324563] Updated weights for policy 0, policy_version 47701 (0.0011) -[2026-06-07 02:33:49,760][324563] Updated weights for policy 0, policy_version 47711 (0.0009) -[2026-06-07 02:33:49,982][324563] Updated weights for policy 0, policy_version 47721 (0.0007) -[2026-06-07 02:33:50,191][324563] Updated weights for policy 0, policy_version 47731 (0.0007) -[2026-06-07 02:33:50,392][324563] Updated weights for policy 0, policy_version 47741 (0.0007) -[2026-06-07 02:33:50,622][324563] Updated weights for policy 0, policy_version 47752 (0.0006) -[2026-06-07 02:33:50,744][321791] Fps is (10 sec: 16383.9, 60 sec: 17476.3, 300 sec: 17661.4). Total num frames: 24444928. Throughput: 0: 17612.8. Samples: 24468736. Policy #0 lag: (min: 107.0, avg: 123.6, max: 171.0) -[2026-06-07 02:33:50,745][321791] Avg episode reward: [(0, '1987.994')] -[2026-06-07 02:33:50,824][324563] Updated weights for policy 0, policy_version 47762 (0.0007) -[2026-06-07 02:33:51,598][324563] Updated weights for policy 0, policy_version 47772 (0.0007) -[2026-06-07 02:33:51,805][324563] Updated weights for policy 0, policy_version 47782 (0.0007) -[2026-06-07 02:33:51,990][324563] Updated weights for policy 0, policy_version 47792 (0.0007) -[2026-06-07 02:33:52,226][324563] Updated weights for policy 0, policy_version 47803 (0.0006) -[2026-06-07 02:33:52,411][324563] Updated weights for policy 0, policy_version 47813 (0.0006) -[2026-06-07 02:33:52,627][324563] Updated weights for policy 0, policy_version 47823 (0.0007) -[2026-06-07 02:33:53,405][324563] Updated weights for policy 0, policy_version 47833 (0.0007) -[2026-06-07 02:33:53,583][324563] Updated weights for policy 0, policy_version 47843 (0.0006) -[2026-06-07 02:33:53,825][324563] Updated weights for policy 0, policy_version 47854 (0.0007) -[2026-06-07 02:33:54,043][324563] Updated weights for policy 0, policy_version 47864 (0.0007) -[2026-06-07 02:33:54,284][324563] Updated weights for policy 0, policy_version 47875 (0.0006) -[2026-06-07 02:33:54,494][324563] Updated weights for policy 0, policy_version 47885 (0.0008) -[2026-06-07 02:33:54,711][324563] Updated weights for policy 0, policy_version 47895 (0.0011) -[2026-06-07 02:33:55,451][324563] Updated weights for policy 0, policy_version 47905 (0.0011) -[2026-06-07 02:33:55,667][324563] Updated weights for policy 0, policy_version 47915 (0.0011) -[2026-06-07 02:33:55,744][321791] Fps is (10 sec: 16384.2, 60 sec: 17476.3, 300 sec: 17661.4). Total num frames: 24543232. Throughput: 0: 17652.6. Samples: 24572416. Policy #0 lag: (min: 107.0, avg: 123.6, max: 171.0) -[2026-06-07 02:33:55,745][321791] Avg episode reward: [(0, '1981.830')] -[2026-06-07 02:33:55,869][324563] Updated weights for policy 0, policy_version 47925 (0.0011) -[2026-06-07 02:33:56,081][324563] Updated weights for policy 0, policy_version 47935 (0.0008) -[2026-06-07 02:33:56,272][324563] Updated weights for policy 0, policy_version 47945 (0.0007) -[2026-06-07 02:33:56,514][324563] Updated weights for policy 0, policy_version 47955 (0.0006) -[2026-06-07 02:33:57,221][324563] Updated weights for policy 0, policy_version 47965 (0.0007) -[2026-06-07 02:33:57,416][324563] Updated weights for policy 0, policy_version 47975 (0.0007) -[2026-06-07 02:33:57,632][324563] Updated weights for policy 0, policy_version 47985 (0.0007) -[2026-06-07 02:33:57,858][324563] Updated weights for policy 0, policy_version 47996 (0.0007) -[2026-06-07 02:33:58,081][324563] Updated weights for policy 0, policy_version 48006 (0.0006) -[2026-06-07 02:33:58,260][324563] Updated weights for policy 0, policy_version 48016 (0.0008) -[2026-06-07 02:33:58,980][324563] Updated weights for policy 0, policy_version 48026 (0.0007) -[2026-06-07 02:33:59,189][324563] Updated weights for policy 0, policy_version 48036 (0.0006) -[2026-06-07 02:33:59,401][324563] Updated weights for policy 0, policy_version 48046 (0.0006) -[2026-06-07 02:33:59,599][324563] Updated weights for policy 0, policy_version 48056 (0.0006) -[2026-06-07 02:33:59,790][324563] Updated weights for policy 0, policy_version 48066 (0.0007) -[2026-06-07 02:33:59,993][324563] Updated weights for policy 0, policy_version 48076 (0.0007) -[2026-06-07 02:34:00,192][324563] Updated weights for policy 0, policy_version 48086 (0.0007) -[2026-06-07 02:34:00,744][321791] Fps is (10 sec: 19660.9, 60 sec: 18022.4, 300 sec: 17661.4). Total num frames: 24641536. Throughput: 0: 17820.4. Samples: 24682880. Policy #0 lag: (min: 107.0, avg: 123.6, max: 171.0) -[2026-06-07 02:34:00,745][321791] Avg episode reward: [(0, '2020.643')] -[2026-06-07 02:34:00,914][324563] Updated weights for policy 0, policy_version 48096 (0.0010) -[2026-06-07 02:34:01,118][324563] Updated weights for policy 0, policy_version 48106 (0.0009) -[2026-06-07 02:34:01,302][324563] Updated weights for policy 0, policy_version 48116 (0.0007) -[2026-06-07 02:34:01,530][324563] Updated weights for policy 0, policy_version 48127 (0.0007) -[2026-06-07 02:34:01,747][324563] Updated weights for policy 0, policy_version 48137 (0.0006) -[2026-06-07 02:34:01,970][324563] Updated weights for policy 0, policy_version 48147 (0.0007) -[2026-06-07 02:34:02,067][324276] Saving new best policy, reward=2020.643! -[2026-06-07 02:34:02,677][324563] Updated weights for policy 0, policy_version 48157 (0.0010) -[2026-06-07 02:34:02,900][324563] Updated weights for policy 0, policy_version 48167 (0.0011) -[2026-06-07 02:34:03,104][324563] Updated weights for policy 0, policy_version 48177 (0.0012) -[2026-06-07 02:34:03,298][324563] Updated weights for policy 0, policy_version 48187 (0.0011) -[2026-06-07 02:34:03,496][324563] Updated weights for policy 0, policy_version 48197 (0.0011) -[2026-06-07 02:34:03,701][324563] Updated weights for policy 0, policy_version 48207 (0.0011) -[2026-06-07 02:34:04,439][324563] Updated weights for policy 0, policy_version 48217 (0.0011) -[2026-06-07 02:34:04,625][324563] Updated weights for policy 0, policy_version 48227 (0.0011) -[2026-06-07 02:34:04,850][324563] Updated weights for policy 0, policy_version 48237 (0.0011) -[2026-06-07 02:34:05,049][324563] Updated weights for policy 0, policy_version 48247 (0.0011) -[2026-06-07 02:34:05,250][324563] Updated weights for policy 0, policy_version 48257 (0.0011) -[2026-06-07 02:34:05,454][324563] Updated weights for policy 0, policy_version 48267 (0.0011) -[2026-06-07 02:34:05,663][324563] Updated weights for policy 0, policy_version 48277 (0.0011) -[2026-06-07 02:34:05,744][321791] Fps is (10 sec: 19660.6, 60 sec: 18022.4, 300 sec: 17772.5). Total num frames: 24739840. Throughput: 0: 17635.6. Samples: 24732544. Policy #0 lag: (min: 107.0, avg: 123.6, max: 171.0) -[2026-06-07 02:34:05,745][321791] Avg episode reward: [(0, '2010.866')] -[2026-06-07 02:34:06,390][324563] Updated weights for policy 0, policy_version 48287 (0.0008) -[2026-06-07 02:34:06,573][324563] Updated weights for policy 0, policy_version 48297 (0.0006) -[2026-06-07 02:34:06,757][324563] Updated weights for policy 0, policy_version 48307 (0.0007) -[2026-06-07 02:34:06,960][324563] Updated weights for policy 0, policy_version 48317 (0.0007) -[2026-06-07 02:34:07,177][324563] Updated weights for policy 0, policy_version 48327 (0.0006) -[2026-06-07 02:34:07,363][324563] Updated weights for policy 0, policy_version 48337 (0.0007) -[2026-06-07 02:34:08,071][324563] Updated weights for policy 0, policy_version 48347 (0.0008) -[2026-06-07 02:34:08,276][324563] Updated weights for policy 0, policy_version 48357 (0.0007) -[2026-06-07 02:34:08,491][324563] Updated weights for policy 0, policy_version 48367 (0.0007) -[2026-06-07 02:34:08,687][324563] Updated weights for policy 0, policy_version 48377 (0.0007) -[2026-06-07 02:34:08,904][324563] Updated weights for policy 0, policy_version 48387 (0.0007) -[2026-06-07 02:34:09,135][324563] Updated weights for policy 0, policy_version 48398 (0.0007) -[2026-06-07 02:34:09,339][324563] Updated weights for policy 0, policy_version 48408 (0.0008) -[2026-06-07 02:34:10,093][324563] Updated weights for policy 0, policy_version 48418 (0.0007) -[2026-06-07 02:34:10,273][324563] Updated weights for policy 0, policy_version 48428 (0.0007) -[2026-06-07 02:34:10,482][324563] Updated weights for policy 0, policy_version 48438 (0.0007) -[2026-06-07 02:34:10,681][324563] Updated weights for policy 0, policy_version 48448 (0.0007) -[2026-06-07 02:34:10,744][321791] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17661.4). Total num frames: 24805376. Throughput: 0: 17732.3. Samples: 24840064. Policy #0 lag: (min: 107.0, avg: 123.6, max: 171.0) -[2026-06-07 02:34:10,745][321791] Avg episode reward: [(0, '2017.461')] -[2026-06-07 02:34:10,895][324563] Updated weights for policy 0, policy_version 48458 (0.0007) -[2026-06-07 02:34:11,097][324563] Updated weights for policy 0, policy_version 48468 (0.0007) -[2026-06-07 02:34:11,816][324563] Updated weights for policy 0, policy_version 48478 (0.0006) -[2026-06-07 02:34:12,017][324563] Updated weights for policy 0, policy_version 48488 (0.0006) -[2026-06-07 02:34:12,249][324563] Updated weights for policy 0, policy_version 48499 (0.0006) -[2026-06-07 02:34:12,472][324563] Updated weights for policy 0, policy_version 48510 (0.0008) -[2026-06-07 02:34:12,652][324563] Updated weights for policy 0, policy_version 48520 (0.0010) -[2026-06-07 02:34:12,875][324563] Updated weights for policy 0, policy_version 48531 (0.0012) -[2026-06-07 02:34:13,581][324563] Updated weights for policy 0, policy_version 48541 (0.0007) -[2026-06-07 02:34:13,800][324563] Updated weights for policy 0, policy_version 48551 (0.0008) -[2026-06-07 02:34:14,002][324563] Updated weights for policy 0, policy_version 48561 (0.0007) -[2026-06-07 02:34:14,213][324563] Updated weights for policy 0, policy_version 48571 (0.0008) -[2026-06-07 02:34:14,420][324563] Updated weights for policy 0, policy_version 48581 (0.0009) -[2026-06-07 02:34:14,636][324563] Updated weights for policy 0, policy_version 48591 (0.0008) -[2026-06-07 02:34:15,387][324563] Updated weights for policy 0, policy_version 48601 (0.0008) -[2026-06-07 02:34:15,594][324563] Updated weights for policy 0, policy_version 48611 (0.0007) -[2026-06-07 02:34:15,744][321791] Fps is (10 sec: 16384.1, 60 sec: 18022.4, 300 sec: 17661.4). Total num frames: 24903680. Throughput: 0: 17874.5. Samples: 24950400. Policy #0 lag: (min: 63.0, avg: 78.9, max: 127.0) -[2026-06-07 02:34:15,745][321791] Avg episode reward: [(0, '2006.299')] -[2026-06-07 02:34:15,805][324563] Updated weights for policy 0, policy_version 48621 (0.0007) -[2026-06-07 02:34:16,011][324563] Updated weights for policy 0, policy_version 48631 (0.0006) -[2026-06-07 02:34:16,238][324563] Updated weights for policy 0, policy_version 48642 (0.0007) -[2026-06-07 02:34:16,453][324563] Updated weights for policy 0, policy_version 48652 (0.0006) -[2026-06-07 02:34:16,673][324563] Updated weights for policy 0, policy_version 48662 (0.0006) -[2026-06-07 02:34:17,371][324563] Updated weights for policy 0, policy_version 48672 (0.0006) -[2026-06-07 02:34:17,583][324563] Updated weights for policy 0, policy_version 48682 (0.0006) -[2026-06-07 02:34:17,788][324563] Updated weights for policy 0, policy_version 48692 (0.0006) -[2026-06-07 02:34:18,007][324563] Updated weights for policy 0, policy_version 48702 (0.0006) -[2026-06-07 02:34:18,226][324563] Updated weights for policy 0, policy_version 48712 (0.0006) -[2026-06-07 02:34:18,471][324563] Updated weights for policy 0, policy_version 48724 (0.0006) -[2026-06-07 02:34:19,204][324563] Updated weights for policy 0, policy_version 48734 (0.0008) -[2026-06-07 02:34:19,424][324563] Updated weights for policy 0, policy_version 48744 (0.0007) -[2026-06-07 02:34:19,629][324563] Updated weights for policy 0, policy_version 48754 (0.0011) -[2026-06-07 02:34:19,860][324563] Updated weights for policy 0, policy_version 48765 (0.0011) -[2026-06-07 02:34:20,057][324563] Updated weights for policy 0, policy_version 48775 (0.0011) -[2026-06-07 02:34:20,249][324563] Updated weights for policy 0, policy_version 48785 (0.0011) -[2026-06-07 02:34:20,744][321791] Fps is (10 sec: 19660.4, 60 sec: 18022.4, 300 sec: 17772.5). Total num frames: 25001984. Throughput: 0: 17561.6. Samples: 24994432. Policy #0 lag: (min: 63.0, avg: 78.9, max: 127.0) -[2026-06-07 02:34:20,745][321791] Avg episode reward: [(0, '2053.241')] -[2026-06-07 02:34:20,752][324276] Saving new best policy, reward=2053.241! -[2026-06-07 02:34:20,953][324563] Updated weights for policy 0, policy_version 48795 (0.0008) -[2026-06-07 02:34:21,172][324563] Updated weights for policy 0, policy_version 48806 (0.0011) -[2026-06-07 02:34:21,383][324563] Updated weights for policy 0, policy_version 48816 (0.0011) -[2026-06-07 02:34:21,585][324563] Updated weights for policy 0, policy_version 48826 (0.0011) -[2026-06-07 02:34:21,779][324563] Updated weights for policy 0, policy_version 48836 (0.0011) -[2026-06-07 02:34:21,842][324276] Early stopping after 6 epochs (48 sgd steps), loss delta 0.0000000 -[2026-06-07 02:34:21,843][324276] Stopping Batcher_0... -[2026-06-07 02:34:21,843][321791] Component Batcher_0 stopped! -[2026-06-07 02:34:21,843][324564] Stopping RolloutWorker_w0... -[2026-06-07 02:34:21,843][324276] Saving results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs1_seed13/checkpoint_p0/checkpoint_000048840_25034752.pth... -[2026-06-07 02:34:21,844][324564] Loop rollout_proc0_evt_loop terminating... -[2026-06-07 02:34:21,844][321791] Component RolloutWorker_w0 stopped! -[2026-06-07 02:34:21,843][324276] Loop batcher_evt_loop terminating... -[2026-06-07 02:34:21,846][321791] Component RolloutWorker_w1 stopped! -[2026-06-07 02:34:21,846][324567] Stopping RolloutWorker_w1... -[2026-06-07 02:34:21,846][324567] Loop rollout_proc1_evt_loop terminating... -[2026-06-07 02:34:21,868][324276] Saving results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs1_seed13/checkpoint_p0/checkpoint_000048840_25034752.pth... -[2026-06-07 02:34:21,890][324563] Weights refcount: 2 0 -[2026-06-07 02:34:21,891][324563] Stopping InferenceWorker_p0-w0... -[2026-06-07 02:34:21,891][324563] Loop inference_proc0-0_evt_loop terminating... -[2026-06-07 02:34:21,891][321791] Component InferenceWorker_p0-w0 stopped! -[2026-06-07 02:34:21,892][324276] Stopping LearnerWorker_p0... -[2026-06-07 02:34:21,892][324276] Loop learner_proc0_evt_loop terminating... -[2026-06-07 02:34:21,892][321791] Component LearnerWorker_p0 stopped! -[2026-06-07 02:34:21,893][321791] Waiting for process learner_proc0 to stop... -[2026-06-07 02:34:22,714][321791] Waiting for process inference_proc0-0 to join... -[2026-06-07 02:34:22,715][321791] Waiting for process rollout_proc0 to join... -[2026-06-07 02:34:22,716][321791] Waiting for process rollout_proc1 to join... -[2026-06-07 02:34:22,717][321791] Batcher 0 profile tree view: -batching: 0.8762, releasing_batches: 0.0230 -[2026-06-07 02:34:22,717][321791] InferenceWorker_p0-w0 profile tree view: +[2026-06-07 02:47:40,162][472028] Using optimizer +[2026-06-07 02:47:41,265][472028] No checkpoints found +[2026-06-07 02:47:41,266][472028] Did not load from checkpoint, starting from scratch! +[2026-06-07 02:47:41,266][472028] Initialized policy 0 weights for model version 0 +[2026-06-07 02:47:41,273][472028] LearnerWorker_p0 finished initialization! +[2026-06-07 02:47:41,273][472028] Using GPUs [0] for process 0 (actually maps to GPUs [3]) +[2026-06-07 02:47:43,103][464932] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2026-06-07 02:47:43,963][472575] Worker 1 uses CPU cores [192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383] +[2026-06-07 02:47:43,964][472575] Using GPUs [0] for process 1 (actually maps to GPUs [3]) +[2026-06-07 02:47:43,964][472575] Set environment var CUDA_VISIBLE_DEVICES to '3' (GPU indices [0]) for actor process 1 +[2026-06-07 02:47:43,967][472575] Num visible devices: 1 +[2026-06-07 02:47:44,354][472573] Using GPUs [0] for process 0 (actually maps to GPUs [3]) +[2026-06-07 02:47:44,354][472573] Set environment var CUDA_VISIBLE_DEVICES to '3' (GPU indices [0]) for inference process 0 +[2026-06-07 02:47:44,354][472573] Num visible devices: 1 +[2026-06-07 02:47:44,376][472573] RunningMeanStd input shape: (3, 84, 84) +[2026-06-07 02:47:44,413][472573] RunningMeanStd input shape: (1,) +[2026-06-07 02:47:44,433][472573] ConvEncoder: input_channels=3 +[2026-06-07 02:47:44,501][472573] Conv encoder output size: 512 +[2026-06-07 02:47:44,523][464932] Inference worker 0-0 is ready! +[2026-06-07 02:47:44,524][464932] All inference workers are ready! Signal rollout workers to start! +[2026-06-07 02:47:44,526][472575] EnvRunner 1-0 uses policy 0 +[2026-06-07 02:47:44,775][472574] Worker 0 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191] +[2026-06-07 02:47:44,777][472574] Using GPUs [0] for process 0 (actually maps to GPUs [3]) +[2026-06-07 02:47:44,777][472574] Set environment var CUDA_VISIBLE_DEVICES to '3' (GPU indices [0]) for actor process 0 +[2026-06-07 02:47:44,777][472574] Num visible devices: 1 +[2026-06-07 02:47:44,780][472574] EnvRunner 0-0 uses policy 0 +[2026-06-07 02:47:47,243][472028] Signal inference workers to stop experience collection... +[2026-06-07 02:47:47,247][472573] InferenceWorker_p0-w0: stopping experience collection +[2026-06-07 02:47:48,103][464932] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 13542.1. Samples: 67712. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2026-06-07 02:47:48,104][464932] Avg episode reward: [(0, '-7.200')] +[2026-06-07 02:47:48,620][472028] Signal inference workers to resume experience collection... +[2026-06-07 02:47:48,621][472573] InferenceWorker_p0-w0: resuming experience collection +[2026-06-07 02:47:48,912][472573] Updated weights for policy 0, policy_version 87 (0.0028) +[2026-06-07 02:47:49,022][472573] Updated weights for policy 0, policy_version 97 (0.0004) +[2026-06-07 02:47:49,133][472573] Updated weights for policy 0, policy_version 107 (0.0004) +[2026-06-07 02:47:49,262][472573] Updated weights for policy 0, policy_version 119 (0.0006) +[2026-06-07 02:47:49,747][472573] Updated weights for policy 0, policy_version 129 (0.0008) +[2026-06-07 02:47:49,884][472573] Updated weights for policy 0, policy_version 142 (0.0006) +[2026-06-07 02:47:49,997][472573] Updated weights for policy 0, policy_version 152 (0.0008) +[2026-06-07 02:47:50,122][472573] Updated weights for policy 0, policy_version 162 (0.0008) +[2026-06-07 02:47:50,248][472573] Updated weights for policy 0, policy_version 172 (0.0005) +[2026-06-07 02:47:50,370][472573] Updated weights for policy 0, policy_version 182 (0.0004) +[2026-06-07 02:47:50,823][472573] Updated weights for policy 0, policy_version 195 (0.0007) +[2026-06-07 02:47:50,954][472573] Updated weights for policy 0, policy_version 205 (0.0004) +[2026-06-07 02:47:51,067][472573] Updated weights for policy 0, policy_version 215 (0.0006) +[2026-06-07 02:47:51,174][472573] Updated weights for policy 0, policy_version 225 (0.0008) +[2026-06-07 02:47:51,290][472573] Updated weights for policy 0, policy_version 235 (0.0008) +[2026-06-07 02:47:51,415][472573] Updated weights for policy 0, policy_version 245 (0.0007) +[2026-06-07 02:47:51,523][472573] Updated weights for policy 0, policy_version 255 (0.0007) +[2026-06-07 02:47:51,912][472573] Updated weights for policy 0, policy_version 266 (0.0007) +[2026-06-07 02:47:52,025][472573] Updated weights for policy 0, policy_version 276 (0.0008) +[2026-06-07 02:47:52,146][472573] Updated weights for policy 0, policy_version 286 (0.0008) +[2026-06-07 02:47:52,269][472573] Updated weights for policy 0, policy_version 296 (0.0007) +[2026-06-07 02:47:52,383][472573] Updated weights for policy 0, policy_version 306 (0.0006) +[2026-06-07 02:47:52,522][472573] Updated weights for policy 0, policy_version 319 (0.0008) +[2026-06-07 02:47:52,982][472573] Updated weights for policy 0, policy_version 329 (0.0004) +[2026-06-07 02:47:53,094][472573] Updated weights for policy 0, policy_version 339 (0.0004) +[2026-06-07 02:47:53,103][464932] Fps is (10 sec: 16383.8, 60 sec: 16383.8, 300 sec: 16383.8). Total num frames: 163840. Throughput: 0: 11955.0. Samples: 119552. Policy #0 lag: (min: 63.0, avg: 80.7, max: 127.0) +[2026-06-07 02:47:53,105][464932] Avg episode reward: [(0, '-4.716')] +[2026-06-07 02:47:53,221][472573] Updated weights for policy 0, policy_version 351 (0.0008) +[2026-06-07 02:47:53,339][472573] Updated weights for policy 0, policy_version 361 (0.0008) +[2026-06-07 02:47:53,453][472573] Updated weights for policy 0, policy_version 371 (0.0007) +[2026-06-07 02:47:53,561][472573] Updated weights for policy 0, policy_version 381 (0.0006) +[2026-06-07 02:47:53,596][472028] Saving new best policy, reward=-4.716! +[2026-06-07 02:47:53,960][472573] Updated weights for policy 0, policy_version 392 (0.0008) +[2026-06-07 02:47:54,081][472573] Updated weights for policy 0, policy_version 402 (0.0008) +[2026-06-07 02:47:54,199][472573] Updated weights for policy 0, policy_version 412 (0.0007) +[2026-06-07 02:47:54,309][472573] Updated weights for policy 0, policy_version 422 (0.0007) +[2026-06-07 02:47:54,417][472573] Updated weights for policy 0, policy_version 432 (0.0006) +[2026-06-07 02:47:54,550][472573] Updated weights for policy 0, policy_version 445 (0.0008) +[2026-06-07 02:47:55,049][472573] Updated weights for policy 0, policy_version 455 (0.0005) +[2026-06-07 02:47:55,159][472573] Updated weights for policy 0, policy_version 465 (0.0006) +[2026-06-07 02:47:55,269][472573] Updated weights for policy 0, policy_version 475 (0.0008) +[2026-06-07 02:47:55,385][472573] Updated weights for policy 0, policy_version 485 (0.0008) +[2026-06-07 02:47:55,505][472573] Updated weights for policy 0, policy_version 495 (0.0008) +[2026-06-07 02:47:55,617][472573] Updated weights for policy 0, policy_version 505 (0.0008) +[2026-06-07 02:47:56,084][472573] Updated weights for policy 0, policy_version 515 (0.0008) +[2026-06-07 02:47:56,197][472573] Updated weights for policy 0, policy_version 525 (0.0007) +[2026-06-07 02:47:56,311][472573] Updated weights for policy 0, policy_version 535 (0.0007) +[2026-06-07 02:47:56,428][472573] Updated weights for policy 0, policy_version 546 (0.0008) +[2026-06-07 02:47:56,538][472573] Updated weights for policy 0, policy_version 556 (0.0008) +[2026-06-07 02:47:56,666][472573] Updated weights for policy 0, policy_version 568 (0.0008) +[2026-06-07 02:47:57,151][472573] Updated weights for policy 0, policy_version 578 (0.0008) +[2026-06-07 02:47:57,266][472573] Updated weights for policy 0, policy_version 588 (0.0009) +[2026-06-07 02:47:57,408][472573] Updated weights for policy 0, policy_version 602 (0.0008) +[2026-06-07 02:47:57,522][472573] Updated weights for policy 0, policy_version 612 (0.0008) +[2026-06-07 02:47:57,644][472573] Updated weights for policy 0, policy_version 622 (0.0008) +[2026-06-07 02:47:57,765][472573] Updated weights for policy 0, policy_version 632 (0.0008) +[2026-06-07 02:47:57,893][464932] Heartbeat connected on Batcher_0 +[2026-06-07 02:47:57,897][464932] Heartbeat connected on LearnerWorker_p0 +[2026-06-07 02:47:57,906][464932] Heartbeat connected on InferenceWorker_p0-w0 +[2026-06-07 02:47:57,910][464932] Heartbeat connected on RolloutWorker_w0 +[2026-06-07 02:47:57,917][464932] Heartbeat connected on RolloutWorker_w1 +[2026-06-07 02:47:58,103][464932] Fps is (10 sec: 32768.1, 60 sec: 21845.3, 300 sec: 21845.3). Total num frames: 327680. Throughput: 0: 20505.5. Samples: 307584. Policy #0 lag: (min: 43.0, avg: 67.6, max: 107.0) +[2026-06-07 02:47:58,104][464932] Avg episode reward: [(0, '3.746')] +[2026-06-07 02:47:58,110][472028] Saving new best policy, reward=3.746! +[2026-06-07 02:47:58,304][472573] Updated weights for policy 0, policy_version 642 (0.0008) +[2026-06-07 02:47:58,418][472573] Updated weights for policy 0, policy_version 652 (0.0007) +[2026-06-07 02:47:58,545][472573] Updated weights for policy 0, policy_version 664 (0.0009) +[2026-06-07 02:47:58,672][472573] Updated weights for policy 0, policy_version 675 (0.0009) +[2026-06-07 02:47:58,784][472573] Updated weights for policy 0, policy_version 685 (0.0008) +[2026-06-07 02:47:58,925][472573] Updated weights for policy 0, policy_version 698 (0.0008) +[2026-06-07 02:47:59,528][472573] Updated weights for policy 0, policy_version 708 (0.0008) +[2026-06-07 02:47:59,636][472573] Updated weights for policy 0, policy_version 718 (0.0008) +[2026-06-07 02:47:59,766][472573] Updated weights for policy 0, policy_version 730 (0.0008) +[2026-06-07 02:47:59,882][472573] Updated weights for policy 0, policy_version 740 (0.0008) +[2026-06-07 02:48:00,022][472573] Updated weights for policy 0, policy_version 753 (0.0008) +[2026-06-07 02:48:00,134][472573] Updated weights for policy 0, policy_version 764 (0.0008) +[2026-06-07 02:48:00,816][472573] Updated weights for policy 0, policy_version 777 (0.0008) +[2026-06-07 02:48:00,939][472573] Updated weights for policy 0, policy_version 789 (0.0006) +[2026-06-07 02:48:01,064][472573] Updated weights for policy 0, policy_version 801 (0.0008) +[2026-06-07 02:48:01,181][472573] Updated weights for policy 0, policy_version 811 (0.0006) +[2026-06-07 02:48:01,294][472573] Updated weights for policy 0, policy_version 822 (0.0006) +[2026-06-07 02:48:01,961][472573] Updated weights for policy 0, policy_version 834 (0.0005) +[2026-06-07 02:48:02,079][472573] Updated weights for policy 0, policy_version 845 (0.0009) +[2026-06-07 02:48:02,201][472573] Updated weights for policy 0, policy_version 857 (0.0009) +[2026-06-07 02:48:02,312][472573] Updated weights for policy 0, policy_version 868 (0.0008) +[2026-06-07 02:48:02,426][472573] Updated weights for policy 0, policy_version 879 (0.0009) +[2026-06-07 02:48:02,559][472573] Updated weights for policy 0, policy_version 892 (0.0008) +[2026-06-07 02:48:03,103][464932] Fps is (10 sec: 29491.6, 60 sec: 22937.6, 300 sec: 22937.6). Total num frames: 458752. Throughput: 0: 23635.2. Samples: 472704. Policy #0 lag: (min: 34.0, avg: 61.5, max: 98.0) +[2026-06-07 02:48:03,104][464932] Avg episode reward: [(0, '4.054')] +[2026-06-07 02:48:03,111][472028] Saving new best policy, reward=4.054! +[2026-06-07 02:48:03,282][472573] Updated weights for policy 0, policy_version 903 (0.0009) +[2026-06-07 02:48:03,425][472573] Updated weights for policy 0, policy_version 917 (0.0008) +[2026-06-07 02:48:03,535][472573] Updated weights for policy 0, policy_version 927 (0.0008) +[2026-06-07 02:48:03,678][472573] Updated weights for policy 0, policy_version 940 (0.0008) +[2026-06-07 02:48:03,821][472573] Updated weights for policy 0, policy_version 954 (0.0008) +[2026-06-07 02:48:04,523][472573] Updated weights for policy 0, policy_version 965 (0.0009) +[2026-06-07 02:48:04,662][472573] Updated weights for policy 0, policy_version 978 (0.0009) +[2026-06-07 02:48:04,779][472573] Updated weights for policy 0, policy_version 990 (0.0009) +[2026-06-07 02:48:04,909][472573] Updated weights for policy 0, policy_version 1002 (0.0010) +[2026-06-07 02:48:05,043][472573] Updated weights for policy 0, policy_version 1015 (0.0008) +[2026-06-07 02:48:05,761][472573] Updated weights for policy 0, policy_version 1027 (0.0009) +[2026-06-07 02:48:05,887][472573] Updated weights for policy 0, policy_version 1039 (0.0008) +[2026-06-07 02:48:06,003][472573] Updated weights for policy 0, policy_version 1050 (0.0008) +[2026-06-07 02:48:06,136][472573] Updated weights for policy 0, policy_version 1062 (0.0010) +[2026-06-07 02:48:06,255][472573] Updated weights for policy 0, policy_version 1074 (0.0008) +[2026-06-07 02:48:06,395][472573] Updated weights for policy 0, policy_version 1087 (0.0008) +[2026-06-07 02:48:07,125][472573] Updated weights for policy 0, policy_version 1101 (0.0008) +[2026-06-07 02:48:07,249][472573] Updated weights for policy 0, policy_version 1112 (0.0007) +[2026-06-07 02:48:07,386][472573] Updated weights for policy 0, policy_version 1125 (0.0008) +[2026-06-07 02:48:07,502][472573] Updated weights for policy 0, policy_version 1136 (0.0008) +[2026-06-07 02:48:07,635][472573] Updated weights for policy 0, policy_version 1149 (0.0009) +[2026-06-07 02:48:08,103][464932] Fps is (10 sec: 26214.7, 60 sec: 23593.0, 300 sec: 23593.0). Total num frames: 589824. Throughput: 0: 22241.3. Samples: 556032. Policy #0 lag: (min: 63.0, avg: 73.6, max: 127.0) +[2026-06-07 02:48:08,104][464932] Avg episode reward: [(0, '4.355')] +[2026-06-07 02:48:08,110][472028] Saving new best policy, reward=4.355! +[2026-06-07 02:48:08,323][472573] Updated weights for policy 0, policy_version 1159 (0.0008) +[2026-06-07 02:48:08,453][472573] Updated weights for policy 0, policy_version 1171 (0.0008) +[2026-06-07 02:48:08,563][472573] Updated weights for policy 0, policy_version 1181 (0.0008) +[2026-06-07 02:48:08,678][472573] Updated weights for policy 0, policy_version 1193 (0.0009) +[2026-06-07 02:48:08,804][472573] Updated weights for policy 0, policy_version 1205 (0.0009) +[2026-06-07 02:48:09,553][472573] Updated weights for policy 0, policy_version 1217 (0.0009) +[2026-06-07 02:48:09,672][472573] Updated weights for policy 0, policy_version 1228 (0.0008) +[2026-06-07 02:48:09,793][472573] Updated weights for policy 0, policy_version 1239 (0.0008) +[2026-06-07 02:48:09,929][472573] Updated weights for policy 0, policy_version 1252 (0.0008) +[2026-06-07 02:48:10,040][472573] Updated weights for policy 0, policy_version 1262 (0.0009) +[2026-06-07 02:48:10,184][472573] Updated weights for policy 0, policy_version 1276 (0.0008) +[2026-06-07 02:48:10,922][472573] Updated weights for policy 0, policy_version 1288 (0.0006) +[2026-06-07 02:48:11,057][472573] Updated weights for policy 0, policy_version 1301 (0.0008) +[2026-06-07 02:48:11,187][472573] Updated weights for policy 0, policy_version 1314 (0.0008) +[2026-06-07 02:48:11,298][472573] Updated weights for policy 0, policy_version 1325 (0.0008) +[2026-06-07 02:48:11,410][472573] Updated weights for policy 0, policy_version 1335 (0.0008) +[2026-06-07 02:48:12,136][472573] Updated weights for policy 0, policy_version 1347 (0.0008) +[2026-06-07 02:48:12,260][472573] Updated weights for policy 0, policy_version 1359 (0.0008) +[2026-06-07 02:48:12,389][472573] Updated weights for policy 0, policy_version 1370 (0.0008) +[2026-06-07 02:48:12,518][472573] Updated weights for policy 0, policy_version 1382 (0.0008) +[2026-06-07 02:48:12,654][472573] Updated weights for policy 0, policy_version 1395 (0.0009) +[2026-06-07 02:48:12,777][472573] Updated weights for policy 0, policy_version 1406 (0.0009) +[2026-06-07 02:48:13,105][464932] Fps is (10 sec: 26212.0, 60 sec: 24029.1, 300 sec: 24029.1). Total num frames: 720896. Throughput: 0: 23478.7. Samples: 704384. Policy #0 lag: (min: 59.0, avg: 90.4, max: 123.0) +[2026-06-07 02:48:13,109][464932] Avg episode reward: [(0, '5.166')] +[2026-06-07 02:48:13,121][472028] Saving new best policy, reward=5.166! +[2026-06-07 02:48:13,480][472573] Updated weights for policy 0, policy_version 1416 (0.0009) +[2026-06-07 02:48:13,585][472573] Updated weights for policy 0, policy_version 1426 (0.0008) +[2026-06-07 02:48:13,715][472573] Updated weights for policy 0, policy_version 1438 (0.0008) +[2026-06-07 02:48:13,822][472573] Updated weights for policy 0, policy_version 1448 (0.0008) +[2026-06-07 02:48:13,942][472573] Updated weights for policy 0, policy_version 1459 (0.0008) +[2026-06-07 02:48:14,080][472573] Updated weights for policy 0, policy_version 1472 (0.0008) +[2026-06-07 02:48:14,788][472573] Updated weights for policy 0, policy_version 1482 (0.0008) +[2026-06-07 02:48:14,917][472573] Updated weights for policy 0, policy_version 1494 (0.0008) +[2026-06-07 02:48:15,054][472573] Updated weights for policy 0, policy_version 1508 (0.0008) +[2026-06-07 02:48:15,173][472573] Updated weights for policy 0, policy_version 1519 (0.0008) +[2026-06-07 02:48:15,296][472573] Updated weights for policy 0, policy_version 1531 (0.0008) +[2026-06-07 02:48:16,043][472573] Updated weights for policy 0, policy_version 1541 (0.0008) +[2026-06-07 02:48:16,172][472573] Updated weights for policy 0, policy_version 1553 (0.0008) +[2026-06-07 02:48:16,273][472573] Updated weights for policy 0, policy_version 1563 (0.0008) +[2026-06-07 02:48:16,375][472573] Updated weights for policy 0, policy_version 1573 (0.0008) +[2026-06-07 02:48:16,506][472573] Updated weights for policy 0, policy_version 1585 (0.0008) +[2026-06-07 02:48:16,625][472573] Updated weights for policy 0, policy_version 1596 (0.0008) +[2026-06-07 02:48:17,365][472573] Updated weights for policy 0, policy_version 1607 (0.0008) +[2026-06-07 02:48:17,491][472573] Updated weights for policy 0, policy_version 1619 (0.0008) +[2026-06-07 02:48:17,609][472573] Updated weights for policy 0, policy_version 1630 (0.0007) +[2026-06-07 02:48:17,773][472573] Updated weights for policy 0, policy_version 1645 (0.0009) +[2026-06-07 02:48:17,892][472573] Updated weights for policy 0, policy_version 1656 (0.0009) +[2026-06-07 02:48:18,104][464932] Fps is (10 sec: 26213.7, 60 sec: 24341.8, 300 sec: 24341.8). Total num frames: 851968. Throughput: 0: 24638.0. Samples: 862336. Policy #0 lag: (min: 2.0, avg: 33.7, max: 66.0) +[2026-06-07 02:48:18,106][464932] Avg episode reward: [(0, '5.801')] +[2026-06-07 02:48:18,115][472028] Saving new best policy, reward=5.801! +[2026-06-07 02:48:18,650][472573] Updated weights for policy 0, policy_version 1668 (0.0009) +[2026-06-07 02:48:18,758][472573] Updated weights for policy 0, policy_version 1679 (0.0009) +[2026-06-07 02:48:18,923][472573] Updated weights for policy 0, policy_version 1695 (0.0009) +[2026-06-07 02:48:19,044][472573] Updated weights for policy 0, policy_version 1706 (0.0008) +[2026-06-07 02:48:19,196][472573] Updated weights for policy 0, policy_version 1720 (0.0009) +[2026-06-07 02:48:19,904][472573] Updated weights for policy 0, policy_version 1731 (0.0009) +[2026-06-07 02:48:20,021][472573] Updated weights for policy 0, policy_version 1741 (0.0008) +[2026-06-07 02:48:20,143][472573] Updated weights for policy 0, policy_version 1753 (0.0008) +[2026-06-07 02:48:20,266][472573] Updated weights for policy 0, policy_version 1764 (0.0009) +[2026-06-07 02:48:20,422][472573] Updated weights for policy 0, policy_version 1779 (0.0012) +[2026-06-07 02:48:20,542][472573] Updated weights for policy 0, policy_version 1791 (0.0008) +[2026-06-07 02:48:21,318][472573] Updated weights for policy 0, policy_version 1803 (0.0011) +[2026-06-07 02:48:21,425][472573] Updated weights for policy 0, policy_version 1813 (0.0011) +[2026-06-07 02:48:21,553][472573] Updated weights for policy 0, policy_version 1825 (0.0012) +[2026-06-07 02:48:21,674][472573] Updated weights for policy 0, policy_version 1836 (0.0011) +[2026-06-07 02:48:21,798][472573] Updated weights for policy 0, policy_version 1848 (0.0011) +[2026-06-07 02:48:22,545][472573] Updated weights for policy 0, policy_version 1859 (0.0009) +[2026-06-07 02:48:22,684][472573] Updated weights for policy 0, policy_version 1872 (0.0008) +[2026-06-07 02:48:22,807][472573] Updated weights for policy 0, policy_version 1884 (0.0008) +[2026-06-07 02:48:22,943][472573] Updated weights for policy 0, policy_version 1897 (0.0008) +[2026-06-07 02:48:23,053][472573] Updated weights for policy 0, policy_version 1907 (0.0008) +[2026-06-07 02:48:23,103][464932] Fps is (10 sec: 22939.5, 60 sec: 23756.7, 300 sec: 23756.7). Total num frames: 950272. Throughput: 0: 23372.7. Samples: 934912. Policy #0 lag: (min: 63.0, avg: 71.9, max: 127.0) +[2026-06-07 02:48:23,105][464932] Avg episode reward: [(0, '7.334')] +[2026-06-07 02:48:23,179][472573] Updated weights for policy 0, policy_version 1919 (0.0011) +[2026-06-07 02:48:23,185][472028] Saving new best policy, reward=7.334! +[2026-06-07 02:48:23,923][472573] Updated weights for policy 0, policy_version 1929 (0.0009) +[2026-06-07 02:48:24,052][472573] Updated weights for policy 0, policy_version 1941 (0.0008) +[2026-06-07 02:48:24,168][472573] Updated weights for policy 0, policy_version 1952 (0.0008) +[2026-06-07 02:48:24,307][472573] Updated weights for policy 0, policy_version 1965 (0.0008) +[2026-06-07 02:48:24,426][472573] Updated weights for policy 0, policy_version 1976 (0.0008) +[2026-06-07 02:48:25,172][472573] Updated weights for policy 0, policy_version 1990 (0.0009) +[2026-06-07 02:48:25,314][472573] Updated weights for policy 0, policy_version 2003 (0.0009) +[2026-06-07 02:48:25,454][472573] Updated weights for policy 0, policy_version 2017 (0.0009) +[2026-06-07 02:48:25,581][472573] Updated weights for policy 0, policy_version 2029 (0.0009) +[2026-06-07 02:48:25,709][472573] Updated weights for policy 0, policy_version 2041 (0.0009) +[2026-06-07 02:48:26,502][472573] Updated weights for policy 0, policy_version 2054 (0.0009) +[2026-06-07 02:48:26,618][472573] Updated weights for policy 0, policy_version 2065 (0.0008) +[2026-06-07 02:48:26,724][472573] Updated weights for policy 0, policy_version 2075 (0.0008) +[2026-06-07 02:48:26,864][472573] Updated weights for policy 0, policy_version 2088 (0.0009) +[2026-06-07 02:48:26,985][472573] Updated weights for policy 0, policy_version 2100 (0.0009) +[2026-06-07 02:48:27,112][472573] Updated weights for policy 0, policy_version 2112 (0.0009) +[2026-06-07 02:48:27,874][472573] Updated weights for policy 0, policy_version 2123 (0.0008) +[2026-06-07 02:48:28,017][472573] Updated weights for policy 0, policy_version 2137 (0.0009) +[2026-06-07 02:48:28,103][464932] Fps is (10 sec: 22938.5, 60 sec: 24030.0, 300 sec: 24030.0). Total num frames: 1081344. Throughput: 0: 24266.0. Samples: 1091968. Policy #0 lag: (min: 17.0, avg: 59.5, max: 81.0) +[2026-06-07 02:48:28,104][464932] Avg episode reward: [(0, '7.977')] +[2026-06-07 02:48:28,174][472573] Updated weights for policy 0, policy_version 2153 (0.0009) +[2026-06-07 02:48:28,336][472573] Updated weights for policy 0, policy_version 2168 (0.0008) +[2026-06-07 02:48:28,414][472028] Saving new best policy, reward=7.977! +[2026-06-07 02:48:29,084][472573] Updated weights for policy 0, policy_version 2180 (0.0009) +[2026-06-07 02:48:29,198][472573] Updated weights for policy 0, policy_version 2192 (0.0008) +[2026-06-07 02:48:29,328][472573] Updated weights for policy 0, policy_version 2204 (0.0009) +[2026-06-07 02:48:29,431][472573] Updated weights for policy 0, policy_version 2214 (0.0009) +[2026-06-07 02:48:29,577][472573] Updated weights for policy 0, policy_version 2228 (0.0009) +[2026-06-07 02:48:29,706][472573] Updated weights for policy 0, policy_version 2240 (0.0008) +[2026-06-07 02:48:30,429][472573] Updated weights for policy 0, policy_version 2250 (0.0009) +[2026-06-07 02:48:30,554][472573] Updated weights for policy 0, policy_version 2262 (0.0008) +[2026-06-07 02:48:30,694][472573] Updated weights for policy 0, policy_version 2275 (0.0009) +[2026-06-07 02:48:30,816][472573] Updated weights for policy 0, policy_version 2287 (0.0009) +[2026-06-07 02:48:30,949][472573] Updated weights for policy 0, policy_version 2299 (0.0009) +[2026-06-07 02:48:31,699][472573] Updated weights for policy 0, policy_version 2315 (0.0009) +[2026-06-07 02:48:31,864][472573] Updated weights for policy 0, policy_version 2331 (0.0009) +[2026-06-07 02:48:32,032][472573] Updated weights for policy 0, policy_version 2348 (0.0008) +[2026-06-07 02:48:32,182][472573] Updated weights for policy 0, policy_version 2363 (0.0008) +[2026-06-07 02:48:32,971][472573] Updated weights for policy 0, policy_version 2375 (0.0009) +[2026-06-07 02:48:33,097][472573] Updated weights for policy 0, policy_version 2387 (0.0008) +[2026-06-07 02:48:33,103][464932] Fps is (10 sec: 26214.9, 60 sec: 24248.4, 300 sec: 24248.4). Total num frames: 1212416. Throughput: 0: 26066.6. Samples: 1240704. Policy #0 lag: (min: 21.0, avg: 30.8, max: 85.0) +[2026-06-07 02:48:33,105][464932] Avg episode reward: [(0, '10.063')] +[2026-06-07 02:48:33,218][472573] Updated weights for policy 0, policy_version 2399 (0.0008) +[2026-06-07 02:48:33,328][472573] Updated weights for policy 0, policy_version 2409 (0.0008) +[2026-06-07 02:48:33,470][472573] Updated weights for policy 0, policy_version 2423 (0.0008) +[2026-06-07 02:48:33,567][472028] Saving new best policy, reward=10.063! +[2026-06-07 02:48:34,188][472573] Updated weights for policy 0, policy_version 2434 (0.0009) +[2026-06-07 02:48:34,310][472573] Updated weights for policy 0, policy_version 2446 (0.0008) +[2026-06-07 02:48:34,450][472573] Updated weights for policy 0, policy_version 2459 (0.0008) +[2026-06-07 02:48:34,574][472573] Updated weights for policy 0, policy_version 2470 (0.0008) +[2026-06-07 02:48:34,718][472573] Updated weights for policy 0, policy_version 2484 (0.0008) +[2026-06-07 02:48:34,851][472573] Updated weights for policy 0, policy_version 2496 (0.0009) +[2026-06-07 02:48:35,569][472573] Updated weights for policy 0, policy_version 2510 (0.0008) +[2026-06-07 02:48:35,713][472573] Updated weights for policy 0, policy_version 2523 (0.0009) +[2026-06-07 02:48:35,843][472573] Updated weights for policy 0, policy_version 2535 (0.0008) +[2026-06-07 02:48:35,971][472573] Updated weights for policy 0, policy_version 2547 (0.0008) +[2026-06-07 02:48:36,754][472573] Updated weights for policy 0, policy_version 2561 (0.0009) +[2026-06-07 02:48:36,886][472573] Updated weights for policy 0, policy_version 2573 (0.0007) +[2026-06-07 02:48:37,028][472573] Updated weights for policy 0, policy_version 2587 (0.0009) +[2026-06-07 02:48:37,162][472573] Updated weights for policy 0, policy_version 2599 (0.0008) +[2026-06-07 02:48:37,285][472573] Updated weights for policy 0, policy_version 2611 (0.0008) +[2026-06-07 02:48:37,412][472573] Updated weights for policy 0, policy_version 2623 (0.0009) +[2026-06-07 02:48:38,104][464932] Fps is (10 sec: 26213.4, 60 sec: 24427.0, 300 sec: 24427.0). Total num frames: 1343488. Throughput: 0: 26700.8. Samples: 1321088. Policy #0 lag: (min: 25.0, avg: 69.7, max: 89.0) +[2026-06-07 02:48:38,106][464932] Avg episode reward: [(0, '11.708')] +[2026-06-07 02:48:38,113][472573] Updated weights for policy 0, policy_version 2634 (0.0008) +[2026-06-07 02:48:38,260][472573] Updated weights for policy 0, policy_version 2648 (0.0008) +[2026-06-07 02:48:38,397][472573] Updated weights for policy 0, policy_version 2661 (0.0008) +[2026-06-07 02:48:38,512][472573] Updated weights for policy 0, policy_version 2671 (0.0008) +[2026-06-07 02:48:38,633][472573] Updated weights for policy 0, policy_version 2682 (0.0009) +[2026-06-07 02:48:38,690][472028] Saving new best policy, reward=11.708! +[2026-06-07 02:48:39,345][472573] Updated weights for policy 0, policy_version 2694 (0.0009) +[2026-06-07 02:48:39,480][472573] Updated weights for policy 0, policy_version 2706 (0.0008) +[2026-06-07 02:48:39,595][472573] Updated weights for policy 0, policy_version 2717 (0.0008) +[2026-06-07 02:48:39,725][472573] Updated weights for policy 0, policy_version 2728 (0.0008) +[2026-06-07 02:48:39,848][472573] Updated weights for policy 0, policy_version 2739 (0.0008) +[2026-06-07 02:48:40,546][472573] Updated weights for policy 0, policy_version 2753 (0.0009) +[2026-06-07 02:48:40,677][472573] Updated weights for policy 0, policy_version 2765 (0.0008) +[2026-06-07 02:48:40,846][472573] Updated weights for policy 0, policy_version 2781 (0.0008) +[2026-06-07 02:48:40,959][472573] Updated weights for policy 0, policy_version 2792 (0.0008) +[2026-06-07 02:48:41,084][472573] Updated weights for policy 0, policy_version 2803 (0.0008) +[2026-06-07 02:48:41,218][472573] Updated weights for policy 0, policy_version 2816 (0.0008) +[2026-06-07 02:48:41,929][472573] Updated weights for policy 0, policy_version 2827 (0.0009) +[2026-06-07 02:48:42,067][472573] Updated weights for policy 0, policy_version 2840 (0.0008) +[2026-06-07 02:48:42,195][472573] Updated weights for policy 0, policy_version 2852 (0.0008) +[2026-06-07 02:48:42,348][472573] Updated weights for policy 0, policy_version 2866 (0.0009) +[2026-06-07 02:48:42,492][472573] Updated weights for policy 0, policy_version 2879 (0.0008) +[2026-06-07 02:48:43,103][464932] Fps is (10 sec: 26214.6, 60 sec: 24576.1, 300 sec: 24576.1). Total num frames: 1474560. Throughput: 0: 25927.2. Samples: 1474304. Policy #0 lag: (min: 30.0, avg: 39.8, max: 94.0) +[2026-06-07 02:48:43,104][464932] Avg episode reward: [(0, '14.627')] +[2026-06-07 02:48:43,225][472573] Updated weights for policy 0, policy_version 2892 (0.0008) +[2026-06-07 02:48:43,345][472573] Updated weights for policy 0, policy_version 2903 (0.0009) +[2026-06-07 02:48:43,487][472573] Updated weights for policy 0, policy_version 2916 (0.0008) +[2026-06-07 02:48:43,615][472573] Updated weights for policy 0, policy_version 2928 (0.0008) +[2026-06-07 02:48:43,732][472573] Updated weights for policy 0, policy_version 2939 (0.0008) +[2026-06-07 02:48:43,784][472028] Saving new best policy, reward=14.627! +[2026-06-07 02:48:44,440][472573] Updated weights for policy 0, policy_version 2952 (0.0008) +[2026-06-07 02:48:44,558][472573] Updated weights for policy 0, policy_version 2963 (0.0010) +[2026-06-07 02:48:44,710][472573] Updated weights for policy 0, policy_version 2977 (0.0009) +[2026-06-07 02:48:44,853][472573] Updated weights for policy 0, policy_version 2990 (0.0008) +[2026-06-07 02:48:44,960][472573] Updated weights for policy 0, policy_version 3000 (0.0008) +[2026-06-07 02:48:45,615][472573] Updated weights for policy 0, policy_version 3011 (0.0009) +[2026-06-07 02:48:45,740][472573] Updated weights for policy 0, policy_version 3023 (0.0008) +[2026-06-07 02:48:45,895][472573] Updated weights for policy 0, policy_version 3037 (0.0009) +[2026-06-07 02:48:46,034][472573] Updated weights for policy 0, policy_version 3051 (0.0009) +[2026-06-07 02:48:46,170][472573] Updated weights for policy 0, policy_version 3063 (0.0009) +[2026-06-07 02:48:46,856][472573] Updated weights for policy 0, policy_version 3076 (0.0009) +[2026-06-07 02:48:46,981][472573] Updated weights for policy 0, policy_version 3088 (0.0009) +[2026-06-07 02:48:47,108][472573] Updated weights for policy 0, policy_version 3100 (0.0009) +[2026-06-07 02:48:47,227][472573] Updated weights for policy 0, policy_version 3111 (0.0009) +[2026-06-07 02:48:47,344][472573] Updated weights for policy 0, policy_version 3122 (0.0009) +[2026-06-07 02:48:47,490][472573] Updated weights for policy 0, policy_version 3136 (0.0009) +[2026-06-07 02:48:48,104][464932] Fps is (10 sec: 26214.4, 60 sec: 26760.5, 300 sec: 24702.0). Total num frames: 1605632. Throughput: 0: 25656.8. Samples: 1627264. Policy #0 lag: (min: 63.0, avg: 74.4, max: 127.0) +[2026-06-07 02:48:48,106][464932] Avg episode reward: [(0, '18.148')] +[2026-06-07 02:48:48,202][472573] Updated weights for policy 0, policy_version 3147 (0.0008) +[2026-06-07 02:48:48,330][472573] Updated weights for policy 0, policy_version 3159 (0.0008) +[2026-06-07 02:48:48,462][472573] Updated weights for policy 0, policy_version 3171 (0.0008) +[2026-06-07 02:48:48,591][472573] Updated weights for policy 0, policy_version 3183 (0.0009) +[2026-06-07 02:48:48,708][472573] Updated weights for policy 0, policy_version 3194 (0.0008) +[2026-06-07 02:48:48,769][472028] Saving new best policy, reward=18.148! +[2026-06-07 02:48:49,428][472573] Updated weights for policy 0, policy_version 3207 (0.0008) +[2026-06-07 02:48:49,585][472573] Updated weights for policy 0, policy_version 3221 (0.0008) +[2026-06-07 02:48:49,740][472573] Updated weights for policy 0, policy_version 3236 (0.0009) +[2026-06-07 02:48:49,864][472573] Updated weights for policy 0, policy_version 3247 (0.0008) +[2026-06-07 02:48:49,987][472573] Updated weights for policy 0, policy_version 3259 (0.0008) +[2026-06-07 02:48:50,683][472573] Updated weights for policy 0, policy_version 3272 (0.0008) +[2026-06-07 02:48:50,811][472573] Updated weights for policy 0, policy_version 3284 (0.0008) +[2026-06-07 02:48:50,945][472573] Updated weights for policy 0, policy_version 3296 (0.0008) +[2026-06-07 02:48:51,074][472573] Updated weights for policy 0, policy_version 3308 (0.0008) +[2026-06-07 02:48:51,198][472573] Updated weights for policy 0, policy_version 3320 (0.0008) +[2026-06-07 02:48:51,890][472573] Updated weights for policy 0, policy_version 3333 (0.0009) +[2026-06-07 02:48:52,011][472573] Updated weights for policy 0, policy_version 3344 (0.0008) +[2026-06-07 02:48:52,150][472573] Updated weights for policy 0, policy_version 3357 (0.0008) +[2026-06-07 02:48:52,276][472573] Updated weights for policy 0, policy_version 3369 (0.0008) +[2026-06-07 02:48:52,395][472573] Updated weights for policy 0, policy_version 3380 (0.0008) +[2026-06-07 02:48:52,522][472573] Updated weights for policy 0, policy_version 3392 (0.0008) +[2026-06-07 02:48:53,103][464932] Fps is (10 sec: 26214.1, 60 sec: 26214.5, 300 sec: 24810.1). Total num frames: 1736704. Throughput: 0: 25753.6. Samples: 1714944. Policy #0 lag: (min: 63.0, avg: 74.2, max: 127.0) +[2026-06-07 02:48:53,105][464932] Avg episode reward: [(0, '18.540')] +[2026-06-07 02:48:53,183][472573] Updated weights for policy 0, policy_version 3403 (0.0008) +[2026-06-07 02:48:53,306][472573] Updated weights for policy 0, policy_version 3414 (0.0008) +[2026-06-07 02:48:53,440][472573] Updated weights for policy 0, policy_version 3426 (0.0008) +[2026-06-07 02:48:53,562][472573] Updated weights for policy 0, policy_version 3438 (0.0008) +[2026-06-07 02:48:53,688][472573] Updated weights for policy 0, policy_version 3450 (0.0008) +[2026-06-07 02:48:53,751][472028] Saving new best policy, reward=18.540! +[2026-06-07 02:48:54,335][472573] Updated weights for policy 0, policy_version 3462 (0.0007) +[2026-06-07 02:48:54,457][472573] Updated weights for policy 0, policy_version 3473 (0.0008) +[2026-06-07 02:48:54,608][472573] Updated weights for policy 0, policy_version 3487 (0.0008) +[2026-06-07 02:48:54,763][472573] Updated weights for policy 0, policy_version 3501 (0.0008) +[2026-06-07 02:48:54,883][472573] Updated weights for policy 0, policy_version 3513 (0.0009) +[2026-06-07 02:48:55,577][472573] Updated weights for policy 0, policy_version 3526 (0.0009) +[2026-06-07 02:48:55,696][472573] Updated weights for policy 0, policy_version 3537 (0.0008) +[2026-06-07 02:48:55,819][472573] Updated weights for policy 0, policy_version 3549 (0.0008) +[2026-06-07 02:48:55,937][472573] Updated weights for policy 0, policy_version 3560 (0.0010) +[2026-06-07 02:48:56,079][472573] Updated weights for policy 0, policy_version 3573 (0.0009) +[2026-06-07 02:48:56,791][472573] Updated weights for policy 0, policy_version 3588 (0.0009) +[2026-06-07 02:48:56,905][472573] Updated weights for policy 0, policy_version 3598 (0.0008) +[2026-06-07 02:48:57,025][472573] Updated weights for policy 0, policy_version 3609 (0.0008) +[2026-06-07 02:48:57,132][472573] Updated weights for policy 0, policy_version 3619 (0.0008) +[2026-06-07 02:48:57,268][472573] Updated weights for policy 0, policy_version 3632 (0.0008) +[2026-06-07 02:48:57,394][472573] Updated weights for policy 0, policy_version 3643 (0.0009) +[2026-06-07 02:48:58,046][472573] Updated weights for policy 0, policy_version 3654 (0.0009) +[2026-06-07 02:48:58,103][464932] Fps is (10 sec: 26215.1, 60 sec: 25668.3, 300 sec: 24903.7). Total num frames: 1867776. Throughput: 0: 25927.7. Samples: 1871104. Policy #0 lag: (min: 63.0, avg: 73.7, max: 127.0) +[2026-06-07 02:48:58,104][464932] Avg episode reward: [(0, '24.172')] +[2026-06-07 02:48:58,160][472573] Updated weights for policy 0, policy_version 3665 (0.0008) +[2026-06-07 02:48:58,318][472573] Updated weights for policy 0, policy_version 3679 (0.0008) +[2026-06-07 02:48:58,479][472573] Updated weights for policy 0, policy_version 3695 (0.0008) +[2026-06-07 02:48:58,608][472573] Updated weights for policy 0, policy_version 3707 (0.0008) +[2026-06-07 02:48:58,660][472028] Saving new best policy, reward=24.172! +[2026-06-07 02:48:59,303][472573] Updated weights for policy 0, policy_version 3718 (0.0008) +[2026-06-07 02:48:59,429][472573] Updated weights for policy 0, policy_version 3729 (0.0008) +[2026-06-07 02:48:59,529][472573] Updated weights for policy 0, policy_version 3739 (0.0008) +[2026-06-07 02:48:59,651][472573] Updated weights for policy 0, policy_version 3750 (0.0008) +[2026-06-07 02:48:59,785][472573] Updated weights for policy 0, policy_version 3762 (0.0008) +[2026-06-07 02:48:59,918][472573] Updated weights for policy 0, policy_version 3774 (0.0008) +[2026-06-07 02:49:00,583][472573] Updated weights for policy 0, policy_version 3786 (0.0009) +[2026-06-07 02:49:00,700][472573] Updated weights for policy 0, policy_version 3797 (0.0008) +[2026-06-07 02:49:00,846][472573] Updated weights for policy 0, policy_version 3810 (0.0009) +[2026-06-07 02:49:00,982][472573] Updated weights for policy 0, policy_version 3823 (0.0009) +[2026-06-07 02:49:01,091][472573] Updated weights for policy 0, policy_version 3833 (0.0008) +[2026-06-07 02:49:01,785][472573] Updated weights for policy 0, policy_version 3847 (0.0008) +[2026-06-07 02:49:01,937][472573] Updated weights for policy 0, policy_version 3861 (0.0008) +[2026-06-07 02:49:02,073][472573] Updated weights for policy 0, policy_version 3874 (0.0008) +[2026-06-07 02:49:02,210][472573] Updated weights for policy 0, policy_version 3887 (0.0008) +[2026-06-07 02:49:02,345][472573] Updated weights for policy 0, policy_version 3899 (0.0008) +[2026-06-07 02:49:03,022][472573] Updated weights for policy 0, policy_version 3912 (0.0008) +[2026-06-07 02:49:03,103][464932] Fps is (10 sec: 26214.1, 60 sec: 25668.3, 300 sec: 24985.6). Total num frames: 1998848. Throughput: 0: 25944.3. Samples: 2029824. Policy #0 lag: (min: 54.0, avg: 98.8, max: 111.0) +[2026-06-07 02:49:03,104][464932] Avg episode reward: [(0, '27.381')] +[2026-06-07 02:49:03,153][472573] Updated weights for policy 0, policy_version 3925 (0.0008) +[2026-06-07 02:49:03,268][472573] Updated weights for policy 0, policy_version 3935 (0.0009) +[2026-06-07 02:49:03,419][472573] Updated weights for policy 0, policy_version 3949 (0.0008) +[2026-06-07 02:49:03,559][472573] Updated weights for policy 0, policy_version 3962 (0.0008) +[2026-06-07 02:49:03,619][472028] Saving new best policy, reward=27.381! +[2026-06-07 02:49:04,231][472573] Updated weights for policy 0, policy_version 3973 (0.0008) +[2026-06-07 02:49:04,357][472573] Updated weights for policy 0, policy_version 3985 (0.0008) +[2026-06-07 02:49:04,494][472573] Updated weights for policy 0, policy_version 3997 (0.0010) +[2026-06-07 02:49:04,631][472573] Updated weights for policy 0, policy_version 4010 (0.0010) +[2026-06-07 02:49:04,762][472573] Updated weights for policy 0, policy_version 4022 (0.0006) +[2026-06-07 02:49:05,339][472573] Updated weights for policy 0, policy_version 4033 (0.0005) +[2026-06-07 02:49:05,469][472573] Updated weights for policy 0, policy_version 4045 (0.0008) +[2026-06-07 02:49:05,602][472573] Updated weights for policy 0, policy_version 4058 (0.0009) +[2026-06-07 02:49:05,746][472573] Updated weights for policy 0, policy_version 4071 (0.0006) +[2026-06-07 02:49:05,874][472573] Updated weights for policy 0, policy_version 4083 (0.0005) +[2026-06-07 02:49:05,997][472573] Updated weights for policy 0, policy_version 4094 (0.0007) +[2026-06-07 02:49:06,621][472573] Updated weights for policy 0, policy_version 4105 (0.0006) +[2026-06-07 02:49:06,735][472573] Updated weights for policy 0, policy_version 4116 (0.0009) +[2026-06-07 02:49:06,873][472573] Updated weights for policy 0, policy_version 4129 (0.0008) +[2026-06-07 02:49:07,014][472573] Updated weights for policy 0, policy_version 4142 (0.0008) +[2026-06-07 02:49:07,138][472573] Updated weights for policy 0, policy_version 4153 (0.0008) +[2026-06-07 02:49:07,767][472573] Updated weights for policy 0, policy_version 4164 (0.0008) +[2026-06-07 02:49:07,910][472573] Updated weights for policy 0, policy_version 4177 (0.0008) +[2026-06-07 02:49:08,017][472573] Updated weights for policy 0, policy_version 4187 (0.0008) +[2026-06-07 02:49:08,103][464932] Fps is (10 sec: 26214.4, 60 sec: 25668.3, 300 sec: 25057.9). Total num frames: 2129920. Throughput: 0: 26234.4. Samples: 2115456. Policy #0 lag: (min: 51.0, avg: 68.5, max: 112.0) +[2026-06-07 02:49:08,104][464932] Avg episode reward: [(0, '31.106')] +[2026-06-07 02:49:08,152][472573] Updated weights for policy 0, policy_version 4199 (0.0008) +[2026-06-07 02:49:08,282][472573] Updated weights for policy 0, policy_version 4211 (0.0008) +[2026-06-07 02:49:08,389][472573] Updated weights for policy 0, policy_version 4221 (0.0008) +[2026-06-07 02:49:08,416][472028] Saving new best policy, reward=31.106! +[2026-06-07 02:49:09,052][472573] Updated weights for policy 0, policy_version 4233 (0.0009) +[2026-06-07 02:49:09,188][472573] Updated weights for policy 0, policy_version 4246 (0.0009) +[2026-06-07 02:49:09,309][472573] Updated weights for policy 0, policy_version 4257 (0.0009) +[2026-06-07 02:49:09,428][472573] Updated weights for policy 0, policy_version 4268 (0.0009) +[2026-06-07 02:49:09,546][472573] Updated weights for policy 0, policy_version 4279 (0.0009) +[2026-06-07 02:49:10,159][472573] Updated weights for policy 0, policy_version 4292 (0.0008) +[2026-06-07 02:49:10,299][472573] Updated weights for policy 0, policy_version 4305 (0.0009) +[2026-06-07 02:49:10,449][472573] Updated weights for policy 0, policy_version 4319 (0.0009) +[2026-06-07 02:49:10,572][472573] Updated weights for policy 0, policy_version 4330 (0.0009) +[2026-06-07 02:49:10,712][472573] Updated weights for policy 0, policy_version 4343 (0.0009) +[2026-06-07 02:49:11,331][472573] Updated weights for policy 0, policy_version 4353 (0.0008) +[2026-06-07 02:49:11,451][472573] Updated weights for policy 0, policy_version 4364 (0.0009) +[2026-06-07 02:49:11,575][472573] Updated weights for policy 0, policy_version 4375 (0.0010) +[2026-06-07 02:49:11,689][472573] Updated weights for policy 0, policy_version 4385 (0.0008) +[2026-06-07 02:49:11,817][472573] Updated weights for policy 0, policy_version 4397 (0.0009) +[2026-06-07 02:49:11,964][472573] Updated weights for policy 0, policy_version 4411 (0.0009) +[2026-06-07 02:49:12,557][472573] Updated weights for policy 0, policy_version 4423 (0.0008) +[2026-06-07 02:49:12,682][472573] Updated weights for policy 0, policy_version 4435 (0.0009) +[2026-06-07 02:49:12,789][472573] Updated weights for policy 0, policy_version 4445 (0.0008) +[2026-06-07 02:49:12,907][472573] Updated weights for policy 0, policy_version 4456 (0.0008) +[2026-06-07 02:49:13,034][472573] Updated weights for policy 0, policy_version 4468 (0.0008) +[2026-06-07 02:49:13,103][464932] Fps is (10 sec: 26213.7, 60 sec: 25668.5, 300 sec: 25122.0). Total num frames: 2260992. Throughput: 0: 26396.2. Samples: 2279808. Policy #0 lag: (min: 63.0, avg: 73.2, max: 127.0) +[2026-06-07 02:49:13,105][464932] Avg episode reward: [(0, '38.576')] +[2026-06-07 02:49:13,157][472573] Updated weights for policy 0, policy_version 4479 (0.0009) +[2026-06-07 02:49:13,162][472028] Saving new best policy, reward=38.576! +[2026-06-07 02:49:13,819][472573] Updated weights for policy 0, policy_version 4491 (0.0004) +[2026-06-07 02:49:13,937][472573] Updated weights for policy 0, policy_version 4501 (0.0004) +[2026-06-07 02:49:14,043][472573] Updated weights for policy 0, policy_version 4511 (0.0003) +[2026-06-07 02:49:14,184][472573] Updated weights for policy 0, policy_version 4524 (0.0006) +[2026-06-07 02:49:14,334][472573] Updated weights for policy 0, policy_version 4537 (0.0008) +[2026-06-07 02:49:14,923][472573] Updated weights for policy 0, policy_version 4550 (0.0007) +[2026-06-07 02:49:15,053][472573] Updated weights for policy 0, policy_version 4562 (0.0011) +[2026-06-07 02:49:15,191][472573] Updated weights for policy 0, policy_version 4575 (0.0008) +[2026-06-07 02:49:15,303][472573] Updated weights for policy 0, policy_version 4585 (0.0008) +[2026-06-07 02:49:15,428][472573] Updated weights for policy 0, policy_version 4597 (0.0008) +[2026-06-07 02:49:15,546][472573] Updated weights for policy 0, policy_version 4607 (0.0008) +[2026-06-07 02:49:16,171][472573] Updated weights for policy 0, policy_version 4618 (0.0008) +[2026-06-07 02:49:16,297][472573] Updated weights for policy 0, policy_version 4630 (0.0008) +[2026-06-07 02:49:16,403][472573] Updated weights for policy 0, policy_version 4640 (0.0008) +[2026-06-07 02:49:16,533][472573] Updated weights for policy 0, policy_version 4652 (0.0008) +[2026-06-07 02:49:16,643][472573] Updated weights for policy 0, policy_version 4662 (0.0008) +[2026-06-07 02:49:16,750][472573] Updated weights for policy 0, policy_version 4672 (0.0008) +[2026-06-07 02:49:17,383][472573] Updated weights for policy 0, policy_version 4685 (0.0009) +[2026-06-07 02:49:17,520][472573] Updated weights for policy 0, policy_version 4698 (0.0008) +[2026-06-07 02:49:17,660][472573] Updated weights for policy 0, policy_version 4711 (0.0008) +[2026-06-07 02:49:17,784][472573] Updated weights for policy 0, policy_version 4722 (0.0008) +[2026-06-07 02:49:17,913][472573] Updated weights for policy 0, policy_version 4734 (0.0008) +[2026-06-07 02:49:18,103][464932] Fps is (10 sec: 29491.5, 60 sec: 26214.6, 300 sec: 25524.6). Total num frames: 2424832. Throughput: 0: 26723.6. Samples: 2443264. Policy #0 lag: (min: 118.0, avg: 124.3, max: 161.0) +[2026-06-07 02:49:18,104][464932] Avg episode reward: [(0, '49.894')] +[2026-06-07 02:49:18,108][472028] Saving new best policy, reward=49.894! +[2026-06-07 02:49:18,518][472573] Updated weights for policy 0, policy_version 4744 (0.0008) +[2026-06-07 02:49:18,648][472573] Updated weights for policy 0, policy_version 4756 (0.0008) +[2026-06-07 02:49:18,773][472573] Updated weights for policy 0, policy_version 4767 (0.0008) +[2026-06-07 02:49:18,928][472573] Updated weights for policy 0, policy_version 4782 (0.0008) +[2026-06-07 02:49:19,070][472573] Updated weights for policy 0, policy_version 4795 (0.0008) +[2026-06-07 02:49:19,683][472573] Updated weights for policy 0, policy_version 4805 (0.0008) +[2026-06-07 02:49:19,838][472573] Updated weights for policy 0, policy_version 4819 (0.0008) +[2026-06-07 02:49:19,976][472573] Updated weights for policy 0, policy_version 4832 (0.0008) +[2026-06-07 02:49:20,118][472573] Updated weights for policy 0, policy_version 4845 (0.0008) +[2026-06-07 02:49:20,252][472573] Updated weights for policy 0, policy_version 4857 (0.0008) +[2026-06-07 02:49:20,874][472573] Updated weights for policy 0, policy_version 4867 (0.0008) +[2026-06-07 02:49:21,016][472573] Updated weights for policy 0, policy_version 4880 (0.0008) +[2026-06-07 02:49:21,124][472573] Updated weights for policy 0, policy_version 4890 (0.0008) +[2026-06-07 02:49:21,263][472573] Updated weights for policy 0, policy_version 4902 (0.0008) +[2026-06-07 02:49:21,402][472573] Updated weights for policy 0, policy_version 4915 (0.0007) +[2026-06-07 02:49:21,534][472573] Updated weights for policy 0, policy_version 4927 (0.0013) +[2026-06-07 02:49:22,125][472573] Updated weights for policy 0, policy_version 4939 (0.0009) +[2026-06-07 02:49:22,244][472573] Updated weights for policy 0, policy_version 4950 (0.0008) +[2026-06-07 02:49:22,378][472573] Updated weights for policy 0, policy_version 4962 (0.0008) +[2026-06-07 02:49:22,507][472573] Updated weights for policy 0, policy_version 4974 (0.0008) +[2026-06-07 02:49:22,639][472573] Updated weights for policy 0, policy_version 4986 (0.0008) +[2026-06-07 02:49:23,103][464932] Fps is (10 sec: 29491.8, 60 sec: 26760.5, 300 sec: 25559.0). Total num frames: 2555904. Throughput: 0: 26746.4. Samples: 2524672. Policy #0 lag: (min: 28.0, avg: 39.2, max: 92.0) +[2026-06-07 02:49:23,105][464932] Avg episode reward: [(0, '48.780')] +[2026-06-07 02:49:23,258][472573] Updated weights for policy 0, policy_version 4998 (0.0008) +[2026-06-07 02:49:23,392][472573] Updated weights for policy 0, policy_version 5010 (0.0008) +[2026-06-07 02:49:23,510][472573] Updated weights for policy 0, policy_version 5021 (0.0008) +[2026-06-07 02:49:23,642][472573] Updated weights for policy 0, policy_version 5033 (0.0008) +[2026-06-07 02:49:23,760][472573] Updated weights for policy 0, policy_version 5044 (0.0008) +[2026-06-07 02:49:23,888][472573] Updated weights for policy 0, policy_version 5056 (0.0008) +[2026-06-07 02:49:24,539][472573] Updated weights for policy 0, policy_version 5069 (0.0008) +[2026-06-07 02:49:24,672][472573] Updated weights for policy 0, policy_version 5081 (0.0008) +[2026-06-07 02:49:24,789][472573] Updated weights for policy 0, policy_version 5092 (0.0008) +[2026-06-07 02:49:24,901][472573] Updated weights for policy 0, policy_version 5102 (0.0008) +[2026-06-07 02:49:25,043][472573] Updated weights for policy 0, policy_version 5115 (0.0008) +[2026-06-07 02:49:25,651][472573] Updated weights for policy 0, policy_version 5128 (0.0008) +[2026-06-07 02:49:25,800][472573] Updated weights for policy 0, policy_version 5142 (0.0008) +[2026-06-07 02:49:25,924][472573] Updated weights for policy 0, policy_version 5154 (0.0008) +[2026-06-07 02:49:26,066][472573] Updated weights for policy 0, policy_version 5167 (0.0008) +[2026-06-07 02:49:26,196][472573] Updated weights for policy 0, policy_version 5179 (0.0008) +[2026-06-07 02:49:26,799][472573] Updated weights for policy 0, policy_version 5190 (0.0008) +[2026-06-07 02:49:26,917][472573] Updated weights for policy 0, policy_version 5201 (0.0008) +[2026-06-07 02:49:27,045][472573] Updated weights for policy 0, policy_version 5212 (0.0009) +[2026-06-07 02:49:27,180][472573] Updated weights for policy 0, policy_version 5225 (0.0008) +[2026-06-07 02:49:27,309][472573] Updated weights for policy 0, policy_version 5236 (0.0008) +[2026-06-07 02:49:27,932][472573] Updated weights for policy 0, policy_version 5249 (0.0008) +[2026-06-07 02:49:28,075][472573] Updated weights for policy 0, policy_version 5262 (0.0008) +[2026-06-07 02:49:28,103][464932] Fps is (10 sec: 26213.4, 60 sec: 26760.4, 300 sec: 25590.2). Total num frames: 2686976. Throughput: 0: 27073.2. Samples: 2692608. Policy #0 lag: (min: 63.0, avg: 72.5, max: 127.0) +[2026-06-07 02:49:28,105][464932] Avg episode reward: [(0, '54.459')] +[2026-06-07 02:49:28,192][472573] Updated weights for policy 0, policy_version 5273 (0.0008) +[2026-06-07 02:49:28,314][472573] Updated weights for policy 0, policy_version 5284 (0.0008) +[2026-06-07 02:49:28,420][472573] Updated weights for policy 0, policy_version 5294 (0.0008) +[2026-06-07 02:49:28,554][472573] Updated weights for policy 0, policy_version 5306 (0.0008) +[2026-06-07 02:49:28,612][472028] Saving new best policy, reward=54.459! +[2026-06-07 02:49:29,170][472573] Updated weights for policy 0, policy_version 5317 (0.0008) +[2026-06-07 02:49:29,308][472573] Updated weights for policy 0, policy_version 5330 (0.0008) +[2026-06-07 02:49:29,445][472573] Updated weights for policy 0, policy_version 5343 (0.0008) +[2026-06-07 02:49:29,556][472573] Updated weights for policy 0, policy_version 5353 (0.0008) +[2026-06-07 02:49:29,675][472573] Updated weights for policy 0, policy_version 5364 (0.0008) +[2026-06-07 02:49:29,790][472573] Updated weights for policy 0, policy_version 5374 (0.0008) +[2026-06-07 02:49:30,395][472573] Updated weights for policy 0, policy_version 5386 (0.0008) +[2026-06-07 02:49:30,523][472573] Updated weights for policy 0, policy_version 5398 (0.0008) +[2026-06-07 02:49:30,643][472573] Updated weights for policy 0, policy_version 5409 (0.0008) +[2026-06-07 02:49:30,787][472573] Updated weights for policy 0, policy_version 5422 (0.0008) +[2026-06-07 02:49:30,907][472573] Updated weights for policy 0, policy_version 5433 (0.0008) +[2026-06-07 02:49:31,540][472573] Updated weights for policy 0, policy_version 5445 (0.0008) +[2026-06-07 02:49:31,654][472573] Updated weights for policy 0, policy_version 5455 (0.0008) +[2026-06-07 02:49:31,785][472573] Updated weights for policy 0, policy_version 5467 (0.0008) +[2026-06-07 02:49:31,891][472573] Updated weights for policy 0, policy_version 5477 (0.0008) +[2026-06-07 02:49:32,012][472573] Updated weights for policy 0, policy_version 5488 (0.0008) +[2026-06-07 02:49:32,133][472573] Updated weights for policy 0, policy_version 5499 (0.0008) +[2026-06-07 02:49:32,731][472573] Updated weights for policy 0, policy_version 5512 (0.0008) +[2026-06-07 02:49:32,846][472573] Updated weights for policy 0, policy_version 5523 (0.0008) +[2026-06-07 02:49:32,983][472573] Updated weights for policy 0, policy_version 5535 (0.0008) +[2026-06-07 02:49:33,098][472573] Updated weights for policy 0, policy_version 5546 (0.0008) +[2026-06-07 02:49:33,103][464932] Fps is (10 sec: 26214.8, 60 sec: 26760.5, 300 sec: 25618.6). Total num frames: 2818048. Throughput: 0: 27420.6. Samples: 2861184. Policy #0 lag: (min: 63.0, avg: 72.5, max: 127.0) +[2026-06-07 02:49:33,104][464932] Avg episode reward: [(0, '67.413')] +[2026-06-07 02:49:33,207][472573] Updated weights for policy 0, policy_version 5556 (0.0008) +[2026-06-07 02:49:33,318][472573] Updated weights for policy 0, policy_version 5566 (0.0008) +[2026-06-07 02:49:33,342][472028] Saving new best policy, reward=67.413! +[2026-06-07 02:49:33,914][472573] Updated weights for policy 0, policy_version 5577 (0.0007) +[2026-06-07 02:49:34,037][472573] Updated weights for policy 0, policy_version 5588 (0.0008) +[2026-06-07 02:49:34,158][472573] Updated weights for policy 0, policy_version 5599 (0.0008) +[2026-06-07 02:49:34,292][472573] Updated weights for policy 0, policy_version 5611 (0.0008) +[2026-06-07 02:49:34,425][472573] Updated weights for policy 0, policy_version 5623 (0.0008) +[2026-06-07 02:49:35,040][472573] Updated weights for policy 0, policy_version 5634 (0.0007) +[2026-06-07 02:49:35,171][472573] Updated weights for policy 0, policy_version 5646 (0.0008) +[2026-06-07 02:49:35,292][472573] Updated weights for policy 0, policy_version 5657 (0.0008) +[2026-06-07 02:49:35,416][472573] Updated weights for policy 0, policy_version 5668 (0.0008) +[2026-06-07 02:49:35,529][472573] Updated weights for policy 0, policy_version 5678 (0.0008) +[2026-06-07 02:49:35,637][472573] Updated weights for policy 0, policy_version 5688 (0.0008) +[2026-06-07 02:49:36,222][472573] Updated weights for policy 0, policy_version 5699 (0.0009) +[2026-06-07 02:49:36,328][472573] Updated weights for policy 0, policy_version 5709 (0.0008) +[2026-06-07 02:49:36,438][472573] Updated weights for policy 0, policy_version 5719 (0.0008) +[2026-06-07 02:49:36,577][472573] Updated weights for policy 0, policy_version 5732 (0.0008) +[2026-06-07 02:49:36,687][472573] Updated weights for policy 0, policy_version 5742 (0.0008) +[2026-06-07 02:49:36,812][472573] Updated weights for policy 0, policy_version 5753 (0.0008) +[2026-06-07 02:49:37,420][472573] Updated weights for policy 0, policy_version 5765 (0.0007) +[2026-06-07 02:49:37,537][472573] Updated weights for policy 0, policy_version 5776 (0.0008) +[2026-06-07 02:49:37,667][472573] Updated weights for policy 0, policy_version 5788 (0.0008) +[2026-06-07 02:49:37,805][472573] Updated weights for policy 0, policy_version 5800 (0.0009) +[2026-06-07 02:49:37,932][472573] Updated weights for policy 0, policy_version 5812 (0.0008) +[2026-06-07 02:49:38,058][472573] Updated weights for policy 0, policy_version 5824 (0.0007) +[2026-06-07 02:49:38,103][464932] Fps is (10 sec: 29492.1, 60 sec: 27306.8, 300 sec: 25929.5). Total num frames: 2981888. Throughput: 0: 27218.5. Samples: 2939776. Policy #0 lag: (min: 63.0, avg: 75.3, max: 127.0) +[2026-06-07 02:49:38,104][464932] Avg episode reward: [(0, '67.030')] +[2026-06-07 02:49:38,662][472573] Updated weights for policy 0, policy_version 5835 (0.0008) +[2026-06-07 02:49:38,783][472573] Updated weights for policy 0, policy_version 5846 (0.0008) +[2026-06-07 02:49:38,949][472573] Updated weights for policy 0, policy_version 5861 (0.0008) +[2026-06-07 02:49:39,061][472573] Updated weights for policy 0, policy_version 5872 (0.0008) +[2026-06-07 02:49:39,186][472573] Updated weights for policy 0, policy_version 5883 (0.0008) +[2026-06-07 02:49:39,763][472573] Updated weights for policy 0, policy_version 5894 (0.0008) +[2026-06-07 02:49:39,900][472573] Updated weights for policy 0, policy_version 5907 (0.0008) +[2026-06-07 02:49:40,048][472573] Updated weights for policy 0, policy_version 5920 (0.0008) +[2026-06-07 02:49:40,181][472573] Updated weights for policy 0, policy_version 5932 (0.0008) +[2026-06-07 02:49:40,297][472573] Updated weights for policy 0, policy_version 5943 (0.0008) +[2026-06-07 02:49:40,852][472573] Updated weights for policy 0, policy_version 5953 (0.0008) +[2026-06-07 02:49:41,001][472573] Updated weights for policy 0, policy_version 5966 (0.0008) +[2026-06-07 02:49:41,139][472573] Updated weights for policy 0, policy_version 5979 (0.0008) +[2026-06-07 02:49:41,248][472573] Updated weights for policy 0, policy_version 5989 (0.0008) +[2026-06-07 02:49:41,368][472573] Updated weights for policy 0, policy_version 6000 (0.0008) +[2026-06-07 02:49:41,483][472573] Updated weights for policy 0, policy_version 6010 (0.0008) +[2026-06-07 02:49:42,063][472573] Updated weights for policy 0, policy_version 6020 (0.0008) +[2026-06-07 02:49:42,175][472573] Updated weights for policy 0, policy_version 6030 (0.0008) +[2026-06-07 02:49:42,283][472573] Updated weights for policy 0, policy_version 6040 (0.0008) +[2026-06-07 02:49:42,431][472573] Updated weights for policy 0, policy_version 6053 (0.0008) +[2026-06-07 02:49:42,544][472573] Updated weights for policy 0, policy_version 6064 (0.0008) +[2026-06-07 02:49:42,668][472573] Updated weights for policy 0, policy_version 6075 (0.0008) +[2026-06-07 02:49:43,103][464932] Fps is (10 sec: 29491.6, 60 sec: 27306.7, 300 sec: 25941.4). Total num frames: 3112960. Throughput: 0: 27454.6. Samples: 3106560. Policy #0 lag: (min: 63.0, avg: 75.9, max: 127.0) +[2026-06-07 02:49:43,104][464932] Avg episode reward: [(0, '99.958')] +[2026-06-07 02:49:43,107][472028] Saving new best policy, reward=99.958! +[2026-06-07 02:49:43,281][472573] Updated weights for policy 0, policy_version 6088 (0.0007) +[2026-06-07 02:49:43,414][472573] Updated weights for policy 0, policy_version 6100 (0.0008) +[2026-06-07 02:49:43,571][472573] Updated weights for policy 0, policy_version 6115 (0.0008) +[2026-06-07 02:49:43,706][472573] Updated weights for policy 0, policy_version 6126 (0.0008) +[2026-06-07 02:49:43,825][472573] Updated weights for policy 0, policy_version 6138 (0.0008) +[2026-06-07 02:49:44,372][472573] Updated weights for policy 0, policy_version 6148 (0.0008) +[2026-06-07 02:49:44,487][472573] Updated weights for policy 0, policy_version 6159 (0.0008) +[2026-06-07 02:49:44,620][472573] Updated weights for policy 0, policy_version 6171 (0.0008) +[2026-06-07 02:49:44,769][472573] Updated weights for policy 0, policy_version 6185 (0.0008) +[2026-06-07 02:49:44,885][472573] Updated weights for policy 0, policy_version 6195 (0.0008) +[2026-06-07 02:49:44,996][472573] Updated weights for policy 0, policy_version 6205 (0.0008) +[2026-06-07 02:49:45,585][472573] Updated weights for policy 0, policy_version 6216 (0.0008) +[2026-06-07 02:49:45,712][472573] Updated weights for policy 0, policy_version 6228 (0.0008) +[2026-06-07 02:49:45,824][472573] Updated weights for policy 0, policy_version 6239 (0.0008) +[2026-06-07 02:49:45,952][472573] Updated weights for policy 0, policy_version 6250 (0.0008) +[2026-06-07 02:49:46,090][472573] Updated weights for policy 0, policy_version 6263 (0.0008) +[2026-06-07 02:49:46,685][472573] Updated weights for policy 0, policy_version 6274 (0.0008) +[2026-06-07 02:49:46,798][472573] Updated weights for policy 0, policy_version 6284 (0.0008) +[2026-06-07 02:49:46,921][472573] Updated weights for policy 0, policy_version 6296 (0.0008) +[2026-06-07 02:49:47,041][472573] Updated weights for policy 0, policy_version 6307 (0.0008) +[2026-06-07 02:49:47,191][472573] Updated weights for policy 0, policy_version 6320 (0.0008) +[2026-06-07 02:49:47,312][472573] Updated weights for policy 0, policy_version 6331 (0.0008) +[2026-06-07 02:49:47,900][472573] Updated weights for policy 0, policy_version 6343 (0.0007) +[2026-06-07 02:49:48,021][472573] Updated weights for policy 0, policy_version 6354 (0.0008) +[2026-06-07 02:49:48,103][464932] Fps is (10 sec: 26214.2, 60 sec: 27306.8, 300 sec: 25952.3). Total num frames: 3244032. Throughput: 0: 27770.3. Samples: 3279488. Policy #0 lag: (min: 36.0, avg: 46.7, max: 100.0) +[2026-06-07 02:49:48,104][464932] Avg episode reward: [(0, '109.483')] +[2026-06-07 02:49:48,162][472573] Updated weights for policy 0, policy_version 6367 (0.0008) +[2026-06-07 02:49:48,280][472573] Updated weights for policy 0, policy_version 6378 (0.0008) +[2026-06-07 02:49:48,447][472573] Updated weights for policy 0, policy_version 6393 (0.0008) +[2026-06-07 02:49:48,515][472028] Saving new best policy, reward=109.483! +[2026-06-07 02:49:49,054][472573] Updated weights for policy 0, policy_version 6404 (0.0008) +[2026-06-07 02:49:49,163][472573] Updated weights for policy 0, policy_version 6414 (0.0008) +[2026-06-07 02:49:49,280][472573] Updated weights for policy 0, policy_version 6425 (0.0008) +[2026-06-07 02:49:49,402][472573] Updated weights for policy 0, policy_version 6436 (0.0008) +[2026-06-07 02:49:49,520][472573] Updated weights for policy 0, policy_version 6447 (0.0008) +[2026-06-07 02:49:49,635][472573] Updated weights for policy 0, policy_version 6458 (0.0008) +[2026-06-07 02:49:50,200][472573] Updated weights for policy 0, policy_version 6468 (0.0008) +[2026-06-07 02:49:50,328][472573] Updated weights for policy 0, policy_version 6479 (0.0009) +[2026-06-07 02:49:50,456][472573] Updated weights for policy 0, policy_version 6491 (0.0008) +[2026-06-07 02:49:50,589][472573] Updated weights for policy 0, policy_version 6503 (0.0008) +[2026-06-07 02:49:50,707][472573] Updated weights for policy 0, policy_version 6514 (0.0008) +[2026-06-07 02:49:50,842][472573] Updated weights for policy 0, policy_version 6526 (0.0008) +[2026-06-07 02:49:51,439][472573] Updated weights for policy 0, policy_version 6539 (0.0007) +[2026-06-07 02:49:51,585][472573] Updated weights for policy 0, policy_version 6552 (0.0008) +[2026-06-07 02:49:51,693][472573] Updated weights for policy 0, policy_version 6562 (0.0008) +[2026-06-07 02:49:51,816][472573] Updated weights for policy 0, policy_version 6573 (0.0009) +[2026-06-07 02:49:51,949][472573] Updated weights for policy 0, policy_version 6585 (0.0009) +[2026-06-07 02:49:52,497][472573] Updated weights for policy 0, policy_version 6595 (0.0007) +[2026-06-07 02:49:52,631][472573] Updated weights for policy 0, policy_version 6606 (0.0008) +[2026-06-07 02:49:52,749][472573] Updated weights for policy 0, policy_version 6617 (0.0008) +[2026-06-07 02:49:52,880][472573] Updated weights for policy 0, policy_version 6629 (0.0009) +[2026-06-07 02:49:53,023][472573] Updated weights for policy 0, policy_version 6642 (0.0008) +[2026-06-07 02:49:53,103][464932] Fps is (10 sec: 26214.0, 60 sec: 27306.7, 300 sec: 25962.3). Total num frames: 3375104. Throughput: 0: 27758.9. Samples: 3364608. Policy #0 lag: (min: 36.0, avg: 46.7, max: 100.0) +[2026-06-07 02:49:53,104][464932] Avg episode reward: [(0, '117.582')] +[2026-06-07 02:49:53,152][472573] Updated weights for policy 0, policy_version 6654 (0.0008) +[2026-06-07 02:49:53,171][472028] Saving new best policy, reward=117.582! +[2026-06-07 02:49:53,745][472573] Updated weights for policy 0, policy_version 6666 (0.0008) +[2026-06-07 02:49:53,878][472573] Updated weights for policy 0, policy_version 6678 (0.0008) +[2026-06-07 02:49:54,005][472573] Updated weights for policy 0, policy_version 6689 (0.0008) +[2026-06-07 02:49:54,116][472573] Updated weights for policy 0, policy_version 6699 (0.0008) +[2026-06-07 02:49:54,244][472573] Updated weights for policy 0, policy_version 6710 (0.0008) +[2026-06-07 02:49:54,354][472573] Updated weights for policy 0, policy_version 6720 (0.0008) +[2026-06-07 02:49:54,910][472573] Updated weights for policy 0, policy_version 6730 (0.0008) +[2026-06-07 02:49:55,033][472573] Updated weights for policy 0, policy_version 6741 (0.0008) +[2026-06-07 02:49:55,155][472573] Updated weights for policy 0, policy_version 6752 (0.0008) +[2026-06-07 02:49:55,295][472573] Updated weights for policy 0, policy_version 6765 (0.0009) +[2026-06-07 02:49:55,407][472573] Updated weights for policy 0, policy_version 6775 (0.0008) +[2026-06-07 02:49:55,977][472573] Updated weights for policy 0, policy_version 6786 (0.0008) +[2026-06-07 02:49:56,085][472573] Updated weights for policy 0, policy_version 6796 (0.0008) +[2026-06-07 02:49:56,194][472573] Updated weights for policy 0, policy_version 6806 (0.0008) +[2026-06-07 02:49:56,309][472573] Updated weights for policy 0, policy_version 6817 (0.0008) +[2026-06-07 02:49:56,438][472573] Updated weights for policy 0, policy_version 6828 (0.0009) +[2026-06-07 02:49:56,566][472573] Updated weights for policy 0, policy_version 6840 (0.0008) +[2026-06-07 02:49:57,173][472573] Updated weights for policy 0, policy_version 6851 (0.0009) +[2026-06-07 02:49:57,287][472573] Updated weights for policy 0, policy_version 6862 (0.0008) +[2026-06-07 02:49:57,426][472573] Updated weights for policy 0, policy_version 6874 (0.0008) +[2026-06-07 02:49:57,537][472573] Updated weights for policy 0, policy_version 6884 (0.0008) +[2026-06-07 02:49:57,657][472573] Updated weights for policy 0, policy_version 6895 (0.0008) +[2026-06-07 02:49:57,781][472573] Updated weights for policy 0, policy_version 6906 (0.0008) +[2026-06-07 02:49:58,103][464932] Fps is (10 sec: 29491.3, 60 sec: 27852.8, 300 sec: 26214.4). Total num frames: 3538944. Throughput: 0: 27804.7. Samples: 3531008. Policy #0 lag: (min: 112.0, avg: 122.7, max: 176.0) +[2026-06-07 02:49:58,104][464932] Avg episode reward: [(0, '133.320')] +[2026-06-07 02:49:58,110][472028] Saving new best policy, reward=133.320! +[2026-06-07 02:49:58,370][472573] Updated weights for policy 0, policy_version 6917 (0.0008) +[2026-06-07 02:49:58,502][472573] Updated weights for policy 0, policy_version 6929 (0.0008) +[2026-06-07 02:49:58,618][472573] Updated weights for policy 0, policy_version 6939 (0.0008) +[2026-06-07 02:49:58,738][472573] Updated weights for policy 0, policy_version 6950 (0.0008) +[2026-06-07 02:49:58,867][472573] Updated weights for policy 0, policy_version 6962 (0.0009) +[2026-06-07 02:49:58,991][472573] Updated weights for policy 0, policy_version 6973 (0.0009) +[2026-06-07 02:49:59,553][472573] Updated weights for policy 0, policy_version 6983 (0.0008) +[2026-06-07 02:49:59,675][472573] Updated weights for policy 0, policy_version 6994 (0.0008) +[2026-06-07 02:49:59,789][472573] Updated weights for policy 0, policy_version 7005 (0.0008) +[2026-06-07 02:49:59,932][472573] Updated weights for policy 0, policy_version 7018 (0.0008) +[2026-06-07 02:50:00,054][472573] Updated weights for policy 0, policy_version 7029 (0.0008) +[2026-06-07 02:50:00,162][472573] Updated weights for policy 0, policy_version 7039 (0.0008) +[2026-06-07 02:50:00,755][472573] Updated weights for policy 0, policy_version 7050 (0.0008) +[2026-06-07 02:50:00,876][472573] Updated weights for policy 0, policy_version 7061 (0.0008) +[2026-06-07 02:50:00,984][472573] Updated weights for policy 0, policy_version 7071 (0.0008) +[2026-06-07 02:50:01,095][472573] Updated weights for policy 0, policy_version 7081 (0.0008) +[2026-06-07 02:50:01,201][472573] Updated weights for policy 0, policy_version 7091 (0.0008) +[2026-06-07 02:50:01,334][472573] Updated weights for policy 0, policy_version 7103 (0.0008) +[2026-06-07 02:50:01,918][472573] Updated weights for policy 0, policy_version 7114 (0.0008) +[2026-06-07 02:50:02,030][472573] Updated weights for policy 0, policy_version 7124 (0.0008) +[2026-06-07 02:50:02,143][472573] Updated weights for policy 0, policy_version 7135 (0.0008) +[2026-06-07 02:50:02,257][472573] Updated weights for policy 0, policy_version 7145 (0.0008) +[2026-06-07 02:50:02,395][472573] Updated weights for policy 0, policy_version 7158 (0.0008) +[2026-06-07 02:50:02,965][472573] Updated weights for policy 0, policy_version 7169 (0.0009) +[2026-06-07 02:50:03,084][472573] Updated weights for policy 0, policy_version 7180 (0.0008) +[2026-06-07 02:50:03,103][464932] Fps is (10 sec: 29491.3, 60 sec: 27852.9, 300 sec: 26214.4). Total num frames: 3670016. Throughput: 0: 27884.0. Samples: 3698048. Policy #0 lag: (min: 63.0, avg: 74.5, max: 127.0) +[2026-06-07 02:50:03,104][464932] Avg episode reward: [(0, '124.174')] +[2026-06-07 02:50:03,203][472573] Updated weights for policy 0, policy_version 7191 (0.0008) +[2026-06-07 02:50:03,324][472573] Updated weights for policy 0, policy_version 7202 (0.0008) +[2026-06-07 02:50:03,456][472573] Updated weights for policy 0, policy_version 7214 (0.0008) +[2026-06-07 02:50:03,565][472573] Updated weights for policy 0, policy_version 7224 (0.0008) +[2026-06-07 02:50:04,149][472573] Updated weights for policy 0, policy_version 7235 (0.0008) +[2026-06-07 02:50:04,269][472573] Updated weights for policy 0, policy_version 7246 (0.0008) +[2026-06-07 02:50:04,402][472573] Updated weights for policy 0, policy_version 7258 (0.0008) +[2026-06-07 02:50:04,520][472573] Updated weights for policy 0, policy_version 7269 (0.0008) +[2026-06-07 02:50:04,649][472573] Updated weights for policy 0, policy_version 7280 (0.0009) +[2026-06-07 02:50:04,754][472573] Updated weights for policy 0, policy_version 7290 (0.0008) +[2026-06-07 02:50:05,332][472573] Updated weights for policy 0, policy_version 7301 (0.0008) +[2026-06-07 02:50:05,445][472573] Updated weights for policy 0, policy_version 7311 (0.0008) +[2026-06-07 02:50:05,562][472573] Updated weights for policy 0, policy_version 7322 (0.0008) +[2026-06-07 02:50:05,684][472573] Updated weights for policy 0, policy_version 7333 (0.0008) +[2026-06-07 02:50:05,806][472573] Updated weights for policy 0, policy_version 7344 (0.0008) +[2026-06-07 02:50:05,923][472573] Updated weights for policy 0, policy_version 7355 (0.0008) +[2026-06-07 02:50:06,503][472573] Updated weights for policy 0, policy_version 7366 (0.0008) +[2026-06-07 02:50:06,612][472573] Updated weights for policy 0, policy_version 7376 (0.0008) +[2026-06-07 02:50:06,742][472573] Updated weights for policy 0, policy_version 7388 (0.0008) +[2026-06-07 02:50:06,851][472573] Updated weights for policy 0, policy_version 7398 (0.0008) +[2026-06-07 02:50:06,955][472573] Updated weights for policy 0, policy_version 7408 (0.0008) +[2026-06-07 02:50:07,070][472573] Updated weights for policy 0, policy_version 7418 (0.0008) +[2026-06-07 02:50:07,622][472573] Updated weights for policy 0, policy_version 7428 (0.0007) +[2026-06-07 02:50:07,745][472573] Updated weights for policy 0, policy_version 7439 (0.0005) +[2026-06-07 02:50:07,869][472573] Updated weights for policy 0, policy_version 7450 (0.0005) +[2026-06-07 02:50:07,998][472573] Updated weights for policy 0, policy_version 7462 (0.0005) +[2026-06-07 02:50:08,103][464932] Fps is (10 sec: 26214.1, 60 sec: 27852.7, 300 sec: 26214.4). Total num frames: 3801088. Throughput: 0: 28077.5. Samples: 3788160. Policy #0 lag: (min: 63.0, avg: 74.5, max: 127.0) +[2026-06-07 02:50:08,105][464932] Avg episode reward: [(0, '122.055')] +[2026-06-07 02:50:08,125][472573] Updated weights for policy 0, policy_version 7473 (0.0006) +[2026-06-07 02:50:08,246][472573] Updated weights for policy 0, policy_version 7484 (0.0006) +[2026-06-07 02:50:08,826][472573] Updated weights for policy 0, policy_version 7496 (0.0008) +[2026-06-07 02:50:08,952][472573] Updated weights for policy 0, policy_version 7507 (0.0005) +[2026-06-07 02:50:09,082][472573] Updated weights for policy 0, policy_version 7518 (0.0010) +[2026-06-07 02:50:09,203][472573] Updated weights for policy 0, policy_version 7529 (0.0012) +[2026-06-07 02:50:09,311][472573] Updated weights for policy 0, policy_version 7539 (0.0008) +[2026-06-07 02:50:09,432][472573] Updated weights for policy 0, policy_version 7550 (0.0009) +[2026-06-07 02:50:09,997][472573] Updated weights for policy 0, policy_version 7561 (0.0007) +[2026-06-07 02:50:10,116][472573] Updated weights for policy 0, policy_version 7572 (0.0007) +[2026-06-07 02:50:10,258][472573] Updated weights for policy 0, policy_version 7586 (0.0009) +[2026-06-07 02:50:10,372][472573] Updated weights for policy 0, policy_version 7596 (0.0009) +[2026-06-07 02:50:10,504][472573] Updated weights for policy 0, policy_version 7608 (0.0010) +[2026-06-07 02:50:11,061][472573] Updated weights for policy 0, policy_version 7618 (0.0008) +[2026-06-07 02:50:11,170][472573] Updated weights for policy 0, policy_version 7628 (0.0008) +[2026-06-07 02:50:11,280][472573] Updated weights for policy 0, policy_version 7638 (0.0008) +[2026-06-07 02:50:11,406][472573] Updated weights for policy 0, policy_version 7650 (0.0009) +[2026-06-07 02:50:11,527][472573] Updated weights for policy 0, policy_version 7661 (0.0009) +[2026-06-07 02:50:11,649][472573] Updated weights for policy 0, policy_version 7672 (0.0009) +[2026-06-07 02:50:12,204][472573] Updated weights for policy 0, policy_version 7682 (0.0008) +[2026-06-07 02:50:12,314][472573] Updated weights for policy 0, policy_version 7692 (0.0008) +[2026-06-07 02:50:12,436][472573] Updated weights for policy 0, policy_version 7703 (0.0008) +[2026-06-07 02:50:12,561][472573] Updated weights for policy 0, policy_version 7714 (0.0008) +[2026-06-07 02:50:12,672][472573] Updated weights for policy 0, policy_version 7725 (0.0006) +[2026-06-07 02:50:12,804][472573] Updated weights for policy 0, policy_version 7736 (0.0008) +[2026-06-07 02:50:13,103][464932] Fps is (10 sec: 29490.8, 60 sec: 28399.0, 300 sec: 26432.8). Total num frames: 3964928. Throughput: 0: 28037.8. Samples: 3954304. Policy #0 lag: (min: 71.0, avg: 89.3, max: 139.0) +[2026-06-07 02:50:13,105][464932] Avg episode reward: [(0, '155.737')] +[2026-06-07 02:50:13,112][472028] Saving new best policy, reward=155.737! +[2026-06-07 02:50:13,347][472573] Updated weights for policy 0, policy_version 7746 (0.0008) +[2026-06-07 02:50:13,467][472573] Updated weights for policy 0, policy_version 7757 (0.0008) +[2026-06-07 02:50:13,576][472573] Updated weights for policy 0, policy_version 7767 (0.0008) +[2026-06-07 02:50:13,716][472573] Updated weights for policy 0, policy_version 7780 (0.0008) +[2026-06-07 02:50:13,850][472573] Updated weights for policy 0, policy_version 7792 (0.0008) +[2026-06-07 02:50:13,963][472573] Updated weights for policy 0, policy_version 7802 (0.0008) +[2026-06-07 02:50:14,527][472573] Updated weights for policy 0, policy_version 7812 (0.0009) +[2026-06-07 02:50:14,645][472573] Updated weights for policy 0, policy_version 7823 (0.0008) +[2026-06-07 02:50:14,765][472573] Updated weights for policy 0, policy_version 7834 (0.0008) +[2026-06-07 02:50:14,887][472573] Updated weights for policy 0, policy_version 7845 (0.0009) +[2026-06-07 02:50:15,000][472573] Updated weights for policy 0, policy_version 7855 (0.0008) +[2026-06-07 02:50:15,119][472573] Updated weights for policy 0, policy_version 7866 (0.0009) +[2026-06-07 02:50:15,715][472573] Updated weights for policy 0, policy_version 7878 (0.0008) +[2026-06-07 02:50:15,826][472573] Updated weights for policy 0, policy_version 7888 (0.0008) +[2026-06-07 02:50:15,951][472573] Updated weights for policy 0, policy_version 7899 (0.0008) +[2026-06-07 02:50:16,074][472573] Updated weights for policy 0, policy_version 7910 (0.0008) +[2026-06-07 02:50:16,175][472573] Updated weights for policy 0, policy_version 7920 (0.0008) +[2026-06-07 02:50:16,288][472573] Updated weights for policy 0, policy_version 7930 (0.0008) +[2026-06-07 02:50:16,861][472573] Updated weights for policy 0, policy_version 7941 (0.0007) +[2026-06-07 02:50:16,971][472573] Updated weights for policy 0, policy_version 7951 (0.0006) +[2026-06-07 02:50:17,089][472573] Updated weights for policy 0, policy_version 7962 (0.0008) +[2026-06-07 02:50:17,224][472573] Updated weights for policy 0, policy_version 7974 (0.0008) +[2026-06-07 02:50:17,344][472573] Updated weights for policy 0, policy_version 7985 (0.0008) +[2026-06-07 02:50:17,462][472573] Updated weights for policy 0, policy_version 7996 (0.0008) +[2026-06-07 02:50:18,054][472573] Updated weights for policy 0, policy_version 8007 (0.0008) +[2026-06-07 02:50:18,103][464932] Fps is (10 sec: 29491.5, 60 sec: 27852.8, 300 sec: 26425.8). Total num frames: 4096000. Throughput: 0: 27986.5. Samples: 4120576. Policy #0 lag: (min: 63.0, avg: 74.8, max: 127.0) +[2026-06-07 02:50:18,104][464932] Avg episode reward: [(0, '181.825')] +[2026-06-07 02:50:18,187][472573] Updated weights for policy 0, policy_version 8019 (0.0008) +[2026-06-07 02:50:18,309][472573] Updated weights for policy 0, policy_version 8030 (0.0009) +[2026-06-07 02:50:18,419][472573] Updated weights for policy 0, policy_version 8040 (0.0008) +[2026-06-07 02:50:18,531][472573] Updated weights for policy 0, policy_version 8051 (0.0008) +[2026-06-07 02:50:18,663][472573] Updated weights for policy 0, policy_version 8063 (0.0008) +[2026-06-07 02:50:18,671][472028] Saving new best policy, reward=181.825! +[2026-06-07 02:50:19,242][472573] Updated weights for policy 0, policy_version 8073 (0.0008) +[2026-06-07 02:50:19,355][472573] Updated weights for policy 0, policy_version 8084 (0.0008) +[2026-06-07 02:50:19,494][472573] Updated weights for policy 0, policy_version 8096 (0.0008) +[2026-06-07 02:50:19,611][472573] Updated weights for policy 0, policy_version 8107 (0.0008) +[2026-06-07 02:50:19,739][472573] Updated weights for policy 0, policy_version 8118 (0.0008) +[2026-06-07 02:50:19,849][472573] Updated weights for policy 0, policy_version 8128 (0.0008) +[2026-06-07 02:50:20,398][472573] Updated weights for policy 0, policy_version 8138 (0.0008) +[2026-06-07 02:50:20,528][472573] Updated weights for policy 0, policy_version 8150 (0.0008) +[2026-06-07 02:50:20,646][472573] Updated weights for policy 0, policy_version 8161 (0.0008) +[2026-06-07 02:50:20,748][472573] Updated weights for policy 0, policy_version 8171 (0.0008) +[2026-06-07 02:50:20,887][472573] Updated weights for policy 0, policy_version 8183 (0.0008) +[2026-06-07 02:50:21,491][472573] Updated weights for policy 0, policy_version 8195 (0.0008) +[2026-06-07 02:50:21,627][472573] Updated weights for policy 0, policy_version 8207 (0.0009) +[2026-06-07 02:50:21,758][472573] Updated weights for policy 0, policy_version 8219 (0.0008) +[2026-06-07 02:50:21,913][472573] Updated weights for policy 0, policy_version 8233 (0.0008) +[2026-06-07 02:50:22,043][472573] Updated weights for policy 0, policy_version 8245 (0.0008) +[2026-06-07 02:50:22,624][472573] Updated weights for policy 0, policy_version 8257 (0.0008) +[2026-06-07 02:50:22,739][472573] Updated weights for policy 0, policy_version 8267 (0.0008) +[2026-06-07 02:50:22,872][472573] Updated weights for policy 0, policy_version 8279 (0.0008) +[2026-06-07 02:50:23,005][472573] Updated weights for policy 0, policy_version 8291 (0.0008) +[2026-06-07 02:50:23,105][464932] Fps is (10 sec: 26209.8, 60 sec: 27852.0, 300 sec: 26418.9). Total num frames: 4227072. Throughput: 0: 28241.3. Samples: 4210688. Policy #0 lag: (min: 63.0, avg: 74.8, max: 127.0) +[2026-06-07 02:50:23,106][464932] Avg episode reward: [(0, '187.998')] +[2026-06-07 02:50:23,122][472573] Updated weights for policy 0, policy_version 8302 (0.0008) +[2026-06-07 02:50:23,253][472573] Updated weights for policy 0, policy_version 8314 (0.0008) +[2026-06-07 02:50:23,314][472028] Saving new best policy, reward=187.998! +[2026-06-07 02:50:23,825][472573] Updated weights for policy 0, policy_version 8324 (0.0008) +[2026-06-07 02:50:23,952][472573] Updated weights for policy 0, policy_version 8335 (0.0008) +[2026-06-07 02:50:24,075][472573] Updated weights for policy 0, policy_version 8346 (0.0008) +[2026-06-07 02:50:24,206][472573] Updated weights for policy 0, policy_version 8358 (0.0008) +[2026-06-07 02:50:24,328][472573] Updated weights for policy 0, policy_version 8369 (0.0008) +[2026-06-07 02:50:24,450][472573] Updated weights for policy 0, policy_version 8380 (0.0008) +[2026-06-07 02:50:25,023][472573] Updated weights for policy 0, policy_version 8391 (0.0008) +[2026-06-07 02:50:25,153][472573] Updated weights for policy 0, policy_version 8403 (0.0008) +[2026-06-07 02:50:25,303][472573] Updated weights for policy 0, policy_version 8417 (0.0008) +[2026-06-07 02:50:25,426][472573] Updated weights for policy 0, policy_version 8428 (0.0008) +[2026-06-07 02:50:25,562][472573] Updated weights for policy 0, policy_version 8440 (0.0008) +[2026-06-07 02:50:26,145][472573] Updated weights for policy 0, policy_version 8452 (0.0008) +[2026-06-07 02:50:26,293][472573] Updated weights for policy 0, policy_version 8466 (0.0008) +[2026-06-07 02:50:26,418][472573] Updated weights for policy 0, policy_version 8477 (0.0008) +[2026-06-07 02:50:26,534][472573] Updated weights for policy 0, policy_version 8488 (0.0008) +[2026-06-07 02:50:26,660][472573] Updated weights for policy 0, policy_version 8499 (0.0008) +[2026-06-07 02:50:26,784][472573] Updated weights for policy 0, policy_version 8510 (0.0008) +[2026-06-07 02:50:27,388][472573] Updated weights for policy 0, policy_version 8522 (0.0008) +[2026-06-07 02:50:27,508][472573] Updated weights for policy 0, policy_version 8533 (0.0008) +[2026-06-07 02:50:27,630][472573] Updated weights for policy 0, policy_version 8544 (0.0008) +[2026-06-07 02:50:27,748][472573] Updated weights for policy 0, policy_version 8555 (0.0008) +[2026-06-07 02:50:27,866][472573] Updated weights for policy 0, policy_version 8565 (0.0008) +[2026-06-07 02:50:27,974][472573] Updated weights for policy 0, policy_version 8575 (0.0010) +[2026-06-07 02:50:28,103][464932] Fps is (10 sec: 29491.2, 60 sec: 28399.1, 300 sec: 26611.6). Total num frames: 4390912. Throughput: 0: 28248.1. Samples: 4377728. Policy #0 lag: (min: 44.0, avg: 55.7, max: 108.0) +[2026-06-07 02:50:28,104][464932] Avg episode reward: [(0, '191.180')] +[2026-06-07 02:50:28,110][472028] Saving new best policy, reward=191.180! +[2026-06-07 02:50:28,553][472573] Updated weights for policy 0, policy_version 8588 (0.0008) +[2026-06-07 02:50:28,660][472573] Updated weights for policy 0, policy_version 8598 (0.0008) +[2026-06-07 02:50:28,788][472573] Updated weights for policy 0, policy_version 8610 (0.0008) +[2026-06-07 02:50:28,914][472573] Updated weights for policy 0, policy_version 8621 (0.0008) +[2026-06-07 02:50:29,023][472573] Updated weights for policy 0, policy_version 8631 (0.0008) +[2026-06-07 02:50:29,586][472573] Updated weights for policy 0, policy_version 8641 (0.0008) +[2026-06-07 02:50:29,702][472573] Updated weights for policy 0, policy_version 8651 (0.0007) +[2026-06-07 02:50:29,824][472573] Updated weights for policy 0, policy_version 8662 (0.0008) +[2026-06-07 02:50:29,947][472573] Updated weights for policy 0, policy_version 8673 (0.0008) +[2026-06-07 02:50:30,102][472573] Updated weights for policy 0, policy_version 8687 (0.0008) +[2026-06-07 02:50:30,210][472573] Updated weights for policy 0, policy_version 8697 (0.0008) +[2026-06-07 02:50:30,785][472573] Updated weights for policy 0, policy_version 8708 (0.0008) +[2026-06-07 02:50:30,912][472573] Updated weights for policy 0, policy_version 8719 (0.0007) +[2026-06-07 02:50:31,028][472573] Updated weights for policy 0, policy_version 8730 (0.0008) +[2026-06-07 02:50:31,143][472573] Updated weights for policy 0, policy_version 8740 (0.0009) +[2026-06-07 02:50:31,255][472573] Updated weights for policy 0, policy_version 8750 (0.0008) +[2026-06-07 02:50:31,374][472573] Updated weights for policy 0, policy_version 8760 (0.0008) +[2026-06-07 02:50:31,923][472573] Updated weights for policy 0, policy_version 8770 (0.0008) +[2026-06-07 02:50:32,040][472573] Updated weights for policy 0, policy_version 8780 (0.0008) +[2026-06-07 02:50:32,163][472573] Updated weights for policy 0, policy_version 8791 (0.0008) +[2026-06-07 02:50:32,283][472573] Updated weights for policy 0, policy_version 8802 (0.0008) +[2026-06-07 02:50:32,394][472573] Updated weights for policy 0, policy_version 8812 (0.0008) +[2026-06-07 02:50:32,501][472573] Updated weights for policy 0, policy_version 8822 (0.0008) +[2026-06-07 02:50:33,095][472573] Updated weights for policy 0, policy_version 8834 (0.0008) +[2026-06-07 02:50:33,104][464932] Fps is (10 sec: 29495.5, 60 sec: 28398.7, 300 sec: 26599.9). Total num frames: 4521984. Throughput: 0: 28114.3. Samples: 4544640. Policy #0 lag: (min: 63.0, avg: 75.5, max: 127.0) +[2026-06-07 02:50:33,105][464932] Avg episode reward: [(0, '184.306')] +[2026-06-07 02:50:33,224][472573] Updated weights for policy 0, policy_version 8845 (0.0008) +[2026-06-07 02:50:33,359][472573] Updated weights for policy 0, policy_version 8857 (0.0008) +[2026-06-07 02:50:33,471][472573] Updated weights for policy 0, policy_version 8867 (0.0008) +[2026-06-07 02:50:33,590][472573] Updated weights for policy 0, policy_version 8878 (0.0008) +[2026-06-07 02:50:33,700][472573] Updated weights for policy 0, policy_version 8888 (0.0009) +[2026-06-07 02:50:34,238][472573] Updated weights for policy 0, policy_version 8898 (0.0008) +[2026-06-07 02:50:34,360][472573] Updated weights for policy 0, policy_version 8909 (0.0008) +[2026-06-07 02:50:34,467][472573] Updated weights for policy 0, policy_version 8919 (0.0008) +[2026-06-07 02:50:34,582][472573] Updated weights for policy 0, policy_version 8929 (0.0008) +[2026-06-07 02:50:34,704][472573] Updated weights for policy 0, policy_version 8940 (0.0008) +[2026-06-07 02:50:34,819][472573] Updated weights for policy 0, policy_version 8950 (0.0008) +[2026-06-07 02:50:35,402][472573] Updated weights for policy 0, policy_version 8962 (0.0008) +[2026-06-07 02:50:35,518][472573] Updated weights for policy 0, policy_version 8972 (0.0008) +[2026-06-07 02:50:35,655][472573] Updated weights for policy 0, policy_version 8984 (0.0008) +[2026-06-07 02:50:35,768][472573] Updated weights for policy 0, policy_version 8994 (0.0008) +[2026-06-07 02:50:35,876][472573] Updated weights for policy 0, policy_version 9004 (0.0008) +[2026-06-07 02:50:35,991][472573] Updated weights for policy 0, policy_version 9014 (0.0008) +[2026-06-07 02:50:36,551][472573] Updated weights for policy 0, policy_version 9025 (0.0008) +[2026-06-07 02:50:36,665][472573] Updated weights for policy 0, policy_version 9035 (0.0008) +[2026-06-07 02:50:36,791][472573] Updated weights for policy 0, policy_version 9047 (0.0008) +[2026-06-07 02:50:36,918][472573] Updated weights for policy 0, policy_version 9058 (0.0008) +[2026-06-07 02:50:37,041][472573] Updated weights for policy 0, policy_version 9069 (0.0008) +[2026-06-07 02:50:37,153][472573] Updated weights for policy 0, policy_version 9079 (0.0008) +[2026-06-07 02:50:37,719][472573] Updated weights for policy 0, policy_version 9090 (0.0008) +[2026-06-07 02:50:37,840][472573] Updated weights for policy 0, policy_version 9100 (0.0008) +[2026-06-07 02:50:37,959][472573] Updated weights for policy 0, policy_version 9111 (0.0008) +[2026-06-07 02:50:38,085][472573] Updated weights for policy 0, policy_version 9122 (0.0008) +[2026-06-07 02:50:38,103][464932] Fps is (10 sec: 26214.4, 60 sec: 27852.8, 300 sec: 26588.9). Total num frames: 4653056. Throughput: 0: 28251.0. Samples: 4635904. Policy #0 lag: (min: 63.0, avg: 75.5, max: 127.0) +[2026-06-07 02:50:38,104][464932] Avg episode reward: [(0, '200.554')] +[2026-06-07 02:50:38,193][472573] Updated weights for policy 0, policy_version 9132 (0.0008) +[2026-06-07 02:50:38,309][472573] Updated weights for policy 0, policy_version 9142 (0.0008) +[2026-06-07 02:50:38,411][472028] Saving new best policy, reward=200.554! +[2026-06-07 02:50:38,888][472573] Updated weights for policy 0, policy_version 9153 (0.0008) +[2026-06-07 02:50:39,022][472573] Updated weights for policy 0, policy_version 9165 (0.0008) +[2026-06-07 02:50:39,150][472573] Updated weights for policy 0, policy_version 9177 (0.0008) +[2026-06-07 02:50:39,304][472573] Updated weights for policy 0, policy_version 9191 (0.0008) +[2026-06-07 02:50:39,421][472573] Updated weights for policy 0, policy_version 9201 (0.0008) +[2026-06-07 02:50:39,537][472573] Updated weights for policy 0, policy_version 9211 (0.0008) +[2026-06-07 02:50:40,137][472573] Updated weights for policy 0, policy_version 9223 (0.0008) +[2026-06-07 02:50:40,271][472573] Updated weights for policy 0, policy_version 9235 (0.0008) +[2026-06-07 02:50:40,392][472573] Updated weights for policy 0, policy_version 9246 (0.0008) +[2026-06-07 02:50:40,499][472573] Updated weights for policy 0, policy_version 9256 (0.0008) +[2026-06-07 02:50:40,626][472573] Updated weights for policy 0, policy_version 9267 (0.0008) +[2026-06-07 02:50:40,738][472573] Updated weights for policy 0, policy_version 9277 (0.0008) +[2026-06-07 02:50:41,323][472573] Updated weights for policy 0, policy_version 9289 (0.0008) +[2026-06-07 02:50:41,436][472573] Updated weights for policy 0, policy_version 9299 (0.0008) +[2026-06-07 02:50:41,561][472573] Updated weights for policy 0, policy_version 9310 (0.0008) +[2026-06-07 02:50:41,688][472573] Updated weights for policy 0, policy_version 9322 (0.0008) +[2026-06-07 02:50:41,819][472573] Updated weights for policy 0, policy_version 9334 (0.0008) +[2026-06-07 02:50:42,403][472573] Updated weights for policy 0, policy_version 9345 (0.0008) +[2026-06-07 02:50:42,520][472573] Updated weights for policy 0, policy_version 9356 (0.0008) +[2026-06-07 02:50:42,669][472573] Updated weights for policy 0, policy_version 9369 (0.0008) +[2026-06-07 02:50:42,776][472573] Updated weights for policy 0, policy_version 9379 (0.0008) +[2026-06-07 02:50:42,898][472573] Updated weights for policy 0, policy_version 9390 (0.0008) +[2026-06-07 02:50:43,040][472573] Updated weights for policy 0, policy_version 9403 (0.0008) +[2026-06-07 02:50:43,103][464932] Fps is (10 sec: 29492.3, 60 sec: 28398.9, 300 sec: 26760.5). Total num frames: 4816896. Throughput: 0: 28239.6. Samples: 4801792. Policy #0 lag: (min: 63.0, avg: 74.8, max: 127.0) +[2026-06-07 02:50:43,104][464932] Avg episode reward: [(0, '214.312')] +[2026-06-07 02:50:43,109][472028] Saving new best policy, reward=214.312! +[2026-06-07 02:50:43,588][472573] Updated weights for policy 0, policy_version 9413 (0.0008) +[2026-06-07 02:50:43,699][472573] Updated weights for policy 0, policy_version 9423 (0.0008) +[2026-06-07 02:50:43,827][472573] Updated weights for policy 0, policy_version 9435 (0.0008) +[2026-06-07 02:50:43,948][472573] Updated weights for policy 0, policy_version 9446 (0.0008) +[2026-06-07 02:50:44,080][472573] Updated weights for policy 0, policy_version 9458 (0.0008) +[2026-06-07 02:50:44,188][472573] Updated weights for policy 0, policy_version 9468 (0.0008) +[2026-06-07 02:50:44,766][472573] Updated weights for policy 0, policy_version 9479 (0.0008) +[2026-06-07 02:50:44,901][472573] Updated weights for policy 0, policy_version 9491 (0.0008) +[2026-06-07 02:50:45,019][472573] Updated weights for policy 0, policy_version 9502 (0.0008) +[2026-06-07 02:50:45,164][472573] Updated weights for policy 0, policy_version 9515 (0.0008) +[2026-06-07 02:50:45,310][472573] Updated weights for policy 0, policy_version 9528 (0.0008) +[2026-06-07 02:50:45,900][472573] Updated weights for policy 0, policy_version 9538 (0.0008) +[2026-06-07 02:50:46,034][472573] Updated weights for policy 0, policy_version 9551 (0.0008) +[2026-06-07 02:50:46,148][472573] Updated weights for policy 0, policy_version 9561 (0.0008) +[2026-06-07 02:50:46,269][472573] Updated weights for policy 0, policy_version 9572 (0.0008) +[2026-06-07 02:50:46,388][472573] Updated weights for policy 0, policy_version 9583 (0.0008) +[2026-06-07 02:50:46,498][472573] Updated weights for policy 0, policy_version 9593 (0.0008) +[2026-06-07 02:50:47,069][472573] Updated weights for policy 0, policy_version 9604 (0.0008) +[2026-06-07 02:50:47,186][472573] Updated weights for policy 0, policy_version 9615 (0.0008) +[2026-06-07 02:50:47,337][472573] Updated weights for policy 0, policy_version 9629 (0.0008) +[2026-06-07 02:50:47,456][472573] Updated weights for policy 0, policy_version 9640 (0.0008) +[2026-06-07 02:50:47,569][472573] Updated weights for policy 0, policy_version 9650 (0.0009) +[2026-06-07 02:50:47,700][472573] Updated weights for policy 0, policy_version 9662 (0.0008) +[2026-06-07 02:50:48,103][464932] Fps is (10 sec: 29491.2, 60 sec: 28399.0, 300 sec: 26745.8). Total num frames: 4947968. Throughput: 0: 28191.3. Samples: 4966656. Policy #0 lag: (min: 63.0, avg: 74.8, max: 127.0) +[2026-06-07 02:50:48,104][464932] Avg episode reward: [(0, '207.640')] +[2026-06-07 02:50:48,290][472573] Updated weights for policy 0, policy_version 9673 (0.0008) +[2026-06-07 02:50:48,401][472573] Updated weights for policy 0, policy_version 9683 (0.0008) +[2026-06-07 02:50:48,517][472573] Updated weights for policy 0, policy_version 9694 (0.0008) +[2026-06-07 02:50:48,645][472573] Updated weights for policy 0, policy_version 9706 (0.0008) +[2026-06-07 02:50:48,763][472573] Updated weights for policy 0, policy_version 9717 (0.0008) +[2026-06-07 02:50:48,878][472573] Updated weights for policy 0, policy_version 9727 (0.0008) +[2026-06-07 02:50:49,445][472573] Updated weights for policy 0, policy_version 9737 (0.0008) +[2026-06-07 02:50:49,569][472573] Updated weights for policy 0, policy_version 9748 (0.0008) +[2026-06-07 02:50:49,704][472573] Updated weights for policy 0, policy_version 9760 (0.0008) +[2026-06-07 02:50:49,811][472573] Updated weights for policy 0, policy_version 9770 (0.0008) +[2026-06-07 02:50:49,956][472573] Updated weights for policy 0, policy_version 9783 (0.0008) +[2026-06-07 02:50:50,531][472573] Updated weights for policy 0, policy_version 9794 (0.0009) +[2026-06-07 02:50:50,656][472573] Updated weights for policy 0, policy_version 9805 (0.0008) +[2026-06-07 02:50:50,783][472573] Updated weights for policy 0, policy_version 9817 (0.0008) +[2026-06-07 02:50:50,900][472573] Updated weights for policy 0, policy_version 9828 (0.0008) +[2026-06-07 02:50:51,020][472573] Updated weights for policy 0, policy_version 9839 (0.0008) +[2026-06-07 02:50:51,149][472573] Updated weights for policy 0, policy_version 9851 (0.0010) +[2026-06-07 02:50:51,721][472573] Updated weights for policy 0, policy_version 9862 (0.0007) +[2026-06-07 02:50:51,855][472573] Updated weights for policy 0, policy_version 9874 (0.0008) +[2026-06-07 02:50:51,988][472573] Updated weights for policy 0, policy_version 9886 (0.0008) +[2026-06-07 02:50:52,111][472573] Updated weights for policy 0, policy_version 9897 (0.0008) +[2026-06-07 02:50:52,233][472573] Updated weights for policy 0, policy_version 9908 (0.0008) +[2026-06-07 02:50:52,347][472573] Updated weights for policy 0, policy_version 9918 (0.0008) +[2026-06-07 02:50:52,911][472573] Updated weights for policy 0, policy_version 9928 (0.0007) +[2026-06-07 02:50:53,023][472573] Updated weights for policy 0, policy_version 9938 (0.0008) +[2026-06-07 02:50:53,103][464932] Fps is (10 sec: 26214.5, 60 sec: 28399.0, 300 sec: 26731.8). Total num frames: 5079040. Throughput: 0: 28205.6. Samples: 5057408. Policy #0 lag: (min: 12.0, avg: 23.6, max: 76.0) +[2026-06-07 02:50:53,104][464932] Avg episode reward: [(0, '208.938')] +[2026-06-07 02:50:53,142][472573] Updated weights for policy 0, policy_version 9949 (0.0008) +[2026-06-07 02:50:53,271][472573] Updated weights for policy 0, policy_version 9961 (0.0008) +[2026-06-07 02:50:53,395][472573] Updated weights for policy 0, policy_version 9972 (0.0008) +[2026-06-07 02:50:53,517][472573] Updated weights for policy 0, policy_version 9983 (0.0008) +[2026-06-07 02:50:54,078][472573] Updated weights for policy 0, policy_version 9993 (0.0008) +[2026-06-07 02:50:54,228][472573] Updated weights for policy 0, policy_version 10007 (0.0008) +[2026-06-07 02:50:54,336][472573] Updated weights for policy 0, policy_version 10017 (0.0009) +[2026-06-07 02:50:54,460][472573] Updated weights for policy 0, policy_version 10028 (0.0008) +[2026-06-07 02:50:54,572][472573] Updated weights for policy 0, policy_version 10038 (0.0008) +[2026-06-07 02:50:54,685][472573] Updated weights for policy 0, policy_version 10048 (0.0008) +[2026-06-07 02:50:55,224][472573] Updated weights for policy 0, policy_version 10059 (0.0008) +[2026-06-07 02:50:55,352][472573] Updated weights for policy 0, policy_version 10071 (0.0009) +[2026-06-07 02:50:55,474][472573] Updated weights for policy 0, policy_version 10081 (0.0008) +[2026-06-07 02:50:55,596][472573] Updated weights for policy 0, policy_version 10093 (0.0009) +[2026-06-07 02:50:55,744][472573] Updated weights for policy 0, policy_version 10106 (0.0008) +[2026-06-07 02:50:56,334][472573] Updated weights for policy 0, policy_version 10117 (0.0008) +[2026-06-07 02:50:56,457][472573] Updated weights for policy 0, policy_version 10128 (0.0008) +[2026-06-07 02:50:56,567][472573] Updated weights for policy 0, policy_version 10138 (0.0008) +[2026-06-07 02:50:56,711][472573] Updated weights for policy 0, policy_version 10151 (0.0008) +[2026-06-07 02:50:56,831][472573] Updated weights for policy 0, policy_version 10162 (0.0008) +[2026-06-07 02:50:56,957][472573] Updated weights for policy 0, policy_version 10173 (0.0009) +[2026-06-07 02:50:57,516][472573] Updated weights for policy 0, policy_version 10184 (0.0008) +[2026-06-07 02:50:57,627][472573] Updated weights for policy 0, policy_version 10194 (0.0008) +[2026-06-07 02:50:57,737][472573] Updated weights for policy 0, policy_version 10204 (0.0008) +[2026-06-07 02:50:57,859][472573] Updated weights for policy 0, policy_version 10215 (0.0008) +[2026-06-07 02:50:57,993][472573] Updated weights for policy 0, policy_version 10227 (0.0008) +[2026-06-07 02:50:58,102][472573] Updated weights for policy 0, policy_version 10237 (0.0008) +[2026-06-07 02:50:58,103][464932] Fps is (10 sec: 26214.3, 60 sec: 27852.8, 300 sec: 26718.5). Total num frames: 5210112. Throughput: 0: 28208.4. Samples: 5223680. Policy #0 lag: (min: 12.0, avg: 23.6, max: 76.0) +[2026-06-07 02:50:58,104][464932] Avg episode reward: [(0, '248.546')] +[2026-06-07 02:50:58,140][472028] Saving new best policy, reward=248.546! +[2026-06-07 02:50:58,677][472573] Updated weights for policy 0, policy_version 10248 (0.0008) +[2026-06-07 02:50:58,809][472573] Updated weights for policy 0, policy_version 10260 (0.0008) +[2026-06-07 02:50:58,931][472573] Updated weights for policy 0, policy_version 10271 (0.0008) +[2026-06-07 02:50:59,067][472573] Updated weights for policy 0, policy_version 10283 (0.0008) +[2026-06-07 02:50:59,185][472573] Updated weights for policy 0, policy_version 10294 (0.0008) +[2026-06-07 02:50:59,753][472573] Updated weights for policy 0, policy_version 10305 (0.0008) +[2026-06-07 02:50:59,865][472573] Updated weights for policy 0, policy_version 10315 (0.0007) +[2026-06-07 02:50:59,984][472573] Updated weights for policy 0, policy_version 10326 (0.0008) +[2026-06-07 02:51:00,110][472573] Updated weights for policy 0, policy_version 10337 (0.0008) +[2026-06-07 02:51:00,227][472573] Updated weights for policy 0, policy_version 10348 (0.0008) +[2026-06-07 02:51:00,346][472573] Updated weights for policy 0, policy_version 10359 (0.0009) +[2026-06-07 02:51:00,891][472573] Updated weights for policy 0, policy_version 10369 (0.0009) +[2026-06-07 02:51:01,018][472573] Updated weights for policy 0, policy_version 10380 (0.0009) +[2026-06-07 02:51:01,153][472573] Updated weights for policy 0, policy_version 10392 (0.0008) +[2026-06-07 02:51:01,281][472573] Updated weights for policy 0, policy_version 10404 (0.0009) +[2026-06-07 02:51:01,408][472573] Updated weights for policy 0, policy_version 10415 (0.0006) +[2026-06-07 02:51:01,534][472573] Updated weights for policy 0, policy_version 10426 (0.0004) +[2026-06-07 02:51:02,091][472573] Updated weights for policy 0, policy_version 10437 (0.0007) +[2026-06-07 02:51:02,200][472573] Updated weights for policy 0, policy_version 10447 (0.0008) +[2026-06-07 02:51:02,313][472573] Updated weights for policy 0, policy_version 10457 (0.0008) +[2026-06-07 02:51:02,422][472573] Updated weights for policy 0, policy_version 10467 (0.0008) +[2026-06-07 02:51:02,563][472573] Updated weights for policy 0, policy_version 10480 (0.0008) +[2026-06-07 02:51:02,691][472573] Updated weights for policy 0, policy_version 10491 (0.0008) +[2026-06-07 02:51:03,103][464932] Fps is (10 sec: 29491.3, 60 sec: 28398.9, 300 sec: 26869.8). Total num frames: 5373952. Throughput: 0: 28202.7. Samples: 5389696. Policy #0 lag: (min: 63.0, avg: 75.5, max: 127.0) +[2026-06-07 02:51:03,104][464932] Avg episode reward: [(0, '254.886')] +[2026-06-07 02:51:03,109][472028] Saving new best policy, reward=254.886! +[2026-06-07 02:51:03,278][472573] Updated weights for policy 0, policy_version 10502 (0.0007) +[2026-06-07 02:51:03,394][472573] Updated weights for policy 0, policy_version 10512 (0.0008) +[2026-06-07 02:51:03,504][472573] Updated weights for policy 0, policy_version 10522 (0.0008) +[2026-06-07 02:51:03,614][472573] Updated weights for policy 0, policy_version 10532 (0.0008) +[2026-06-07 02:51:03,720][472573] Updated weights for policy 0, policy_version 10542 (0.0008) +[2026-06-07 02:51:03,839][472573] Updated weights for policy 0, policy_version 10552 (0.0008) +[2026-06-07 02:51:04,390][472573] Updated weights for policy 0, policy_version 10562 (0.0008) +[2026-06-07 02:51:04,496][472573] Updated weights for policy 0, policy_version 10572 (0.0008) +[2026-06-07 02:51:04,625][472573] Updated weights for policy 0, policy_version 10583 (0.0008) +[2026-06-07 02:51:04,734][472573] Updated weights for policy 0, policy_version 10593 (0.0008) +[2026-06-07 02:51:04,847][472573] Updated weights for policy 0, policy_version 10603 (0.0008) +[2026-06-07 02:51:04,978][472573] Updated weights for policy 0, policy_version 10615 (0.0008) +[2026-06-07 02:51:05,544][472573] Updated weights for policy 0, policy_version 10626 (0.0008) +[2026-06-07 02:51:05,670][472573] Updated weights for policy 0, policy_version 10637 (0.0008) +[2026-06-07 02:51:05,794][472573] Updated weights for policy 0, policy_version 10649 (0.0008) +[2026-06-07 02:51:05,905][472573] Updated weights for policy 0, policy_version 10659 (0.0008) +[2026-06-07 02:51:06,037][472573] Updated weights for policy 0, policy_version 10671 (0.0008) +[2026-06-07 02:51:06,163][472573] Updated weights for policy 0, policy_version 10682 (0.0008) +[2026-06-07 02:51:06,743][472573] Updated weights for policy 0, policy_version 10693 (0.0008) +[2026-06-07 02:51:06,862][472573] Updated weights for policy 0, policy_version 10704 (0.0008) +[2026-06-07 02:51:06,992][472573] Updated weights for policy 0, policy_version 10715 (0.0008) +[2026-06-07 02:51:07,115][472573] Updated weights for policy 0, policy_version 10726 (0.0008) +[2026-06-07 02:51:07,224][472573] Updated weights for policy 0, policy_version 10736 (0.0008) +[2026-06-07 02:51:07,346][472573] Updated weights for policy 0, policy_version 10747 (0.0008) +[2026-06-07 02:51:07,902][472573] Updated weights for policy 0, policy_version 10758 (0.0007) +[2026-06-07 02:51:08,022][472573] Updated weights for policy 0, policy_version 10769 (0.0008) +[2026-06-07 02:51:08,103][464932] Fps is (10 sec: 29491.2, 60 sec: 28399.0, 300 sec: 26853.8). Total num frames: 5505024. Throughput: 0: 28206.7. Samples: 5479936. Policy #0 lag: (min: 23.0, avg: 34.7, max: 87.0) +[2026-06-07 02:51:08,104][464932] Avg episode reward: [(0, '232.746')] +[2026-06-07 02:51:08,148][472573] Updated weights for policy 0, policy_version 10780 (0.0008) +[2026-06-07 02:51:08,256][472573] Updated weights for policy 0, policy_version 10790 (0.0008) +[2026-06-07 02:51:08,382][472573] Updated weights for policy 0, policy_version 10801 (0.0008) +[2026-06-07 02:51:08,526][472573] Updated weights for policy 0, policy_version 10814 (0.0008) +[2026-06-07 02:51:09,092][472573] Updated weights for policy 0, policy_version 10825 (0.0009) +[2026-06-07 02:51:09,218][472573] Updated weights for policy 0, policy_version 10836 (0.0008) +[2026-06-07 02:51:09,338][472573] Updated weights for policy 0, policy_version 10847 (0.0008) +[2026-06-07 02:51:09,460][472573] Updated weights for policy 0, policy_version 10858 (0.0008) +[2026-06-07 02:51:09,570][472573] Updated weights for policy 0, policy_version 10868 (0.0008) +[2026-06-07 02:51:09,688][472573] Updated weights for policy 0, policy_version 10879 (0.0008) +[2026-06-07 02:51:10,272][472573] Updated weights for policy 0, policy_version 10890 (0.0008) +[2026-06-07 02:51:10,384][472573] Updated weights for policy 0, policy_version 10900 (0.0008) +[2026-06-07 02:51:10,502][472573] Updated weights for policy 0, policy_version 10911 (0.0008) +[2026-06-07 02:51:10,609][472573] Updated weights for policy 0, policy_version 10921 (0.0008) +[2026-06-07 02:51:10,752][472573] Updated weights for policy 0, policy_version 10933 (0.0008) +[2026-06-07 02:51:10,869][472573] Updated weights for policy 0, policy_version 10943 (0.0008) +[2026-06-07 02:51:11,456][472573] Updated weights for policy 0, policy_version 10954 (0.0008) +[2026-06-07 02:51:11,596][472573] Updated weights for policy 0, policy_version 10967 (0.0010) +[2026-06-07 02:51:11,736][472573] Updated weights for policy 0, policy_version 10979 (0.0008) +[2026-06-07 02:51:11,856][472573] Updated weights for policy 0, policy_version 10990 (0.0008) +[2026-06-07 02:51:11,981][472573] Updated weights for policy 0, policy_version 11001 (0.0009) +[2026-06-07 02:51:12,547][472573] Updated weights for policy 0, policy_version 11012 (0.0007) +[2026-06-07 02:51:12,670][472573] Updated weights for policy 0, policy_version 11024 (0.0007) +[2026-06-07 02:51:12,782][472573] Updated weights for policy 0, policy_version 11034 (0.0005) +[2026-06-07 02:51:12,911][472573] Updated weights for policy 0, policy_version 11045 (0.0009) +[2026-06-07 02:51:13,017][472573] Updated weights for policy 0, policy_version 11055 (0.0008) +[2026-06-07 02:51:13,103][464932] Fps is (10 sec: 26214.2, 60 sec: 27852.8, 300 sec: 26838.6). Total num frames: 5636096. Throughput: 0: 28205.5. Samples: 5646976. Policy #0 lag: (min: 23.0, avg: 34.7, max: 87.0) +[2026-06-07 02:51:13,104][464932] Avg episode reward: [(0, '223.531')] +[2026-06-07 02:51:13,134][472573] Updated weights for policy 0, policy_version 11065 (0.0009) +[2026-06-07 02:51:13,695][472573] Updated weights for policy 0, policy_version 11075 (0.0008) +[2026-06-07 02:51:13,817][472573] Updated weights for policy 0, policy_version 11086 (0.0008) +[2026-06-07 02:51:13,942][472573] Updated weights for policy 0, policy_version 11097 (0.0009) +[2026-06-07 02:51:14,052][472573] Updated weights for policy 0, policy_version 11107 (0.0008) +[2026-06-07 02:51:14,177][472573] Updated weights for policy 0, policy_version 11118 (0.0008) +[2026-06-07 02:51:14,299][472573] Updated weights for policy 0, policy_version 11129 (0.0009) +[2026-06-07 02:51:14,861][472573] Updated weights for policy 0, policy_version 11140 (0.0008) +[2026-06-07 02:51:14,995][472573] Updated weights for policy 0, policy_version 11152 (0.0008) +[2026-06-07 02:51:15,111][472573] Updated weights for policy 0, policy_version 11163 (0.0008) +[2026-06-07 02:51:15,225][472573] Updated weights for policy 0, policy_version 11173 (0.0008) +[2026-06-07 02:51:15,357][472573] Updated weights for policy 0, policy_version 11185 (0.0008) +[2026-06-07 02:51:15,480][472573] Updated weights for policy 0, policy_version 11196 (0.0008) +[2026-06-07 02:51:16,051][472573] Updated weights for policy 0, policy_version 11207 (0.0008) +[2026-06-07 02:51:16,167][472573] Updated weights for policy 0, policy_version 11218 (0.0008) +[2026-06-07 02:51:16,308][472573] Updated weights for policy 0, policy_version 11230 (0.0008) +[2026-06-07 02:51:16,416][472573] Updated weights for policy 0, policy_version 11240 (0.0008) +[2026-06-07 02:51:16,535][472573] Updated weights for policy 0, policy_version 11251 (0.0008) +[2026-06-07 02:51:16,674][472573] Updated weights for policy 0, policy_version 11264 (0.0009) +[2026-06-07 02:51:17,251][472573] Updated weights for policy 0, policy_version 11275 (0.0008) +[2026-06-07 02:51:17,368][472573] Updated weights for policy 0, policy_version 11286 (0.0008) +[2026-06-07 02:51:17,489][472573] Updated weights for policy 0, policy_version 11296 (0.0008) +[2026-06-07 02:51:17,596][472573] Updated weights for policy 0, policy_version 11306 (0.0008) +[2026-06-07 02:51:17,714][472573] Updated weights for policy 0, policy_version 11317 (0.0008) +[2026-06-07 02:51:17,835][472573] Updated weights for policy 0, policy_version 11328 (0.0009) +[2026-06-07 02:51:18,103][464932] Fps is (10 sec: 29490.8, 60 sec: 28398.9, 300 sec: 26976.4). Total num frames: 5799936. Throughput: 0: 28194.3. Samples: 5813376. Policy #0 lag: (min: 23.0, avg: 34.9, max: 87.0) +[2026-06-07 02:51:18,106][464932] Avg episode reward: [(0, '239.942')] +[2026-06-07 02:51:18,405][472573] Updated weights for policy 0, policy_version 11338 (0.0007) +[2026-06-07 02:51:18,523][472573] Updated weights for policy 0, policy_version 11349 (0.0008) +[2026-06-07 02:51:18,663][472573] Updated weights for policy 0, policy_version 11362 (0.0008) +[2026-06-07 02:51:18,783][472573] Updated weights for policy 0, policy_version 11373 (0.0009) +[2026-06-07 02:51:18,934][472573] Updated weights for policy 0, policy_version 11386 (0.0008) +[2026-06-07 02:51:19,505][472573] Updated weights for policy 0, policy_version 11398 (0.0008) +[2026-06-07 02:51:19,627][472573] Updated weights for policy 0, policy_version 11409 (0.0010) +[2026-06-07 02:51:19,751][472573] Updated weights for policy 0, policy_version 11420 (0.0008) +[2026-06-07 02:51:19,875][472573] Updated weights for policy 0, policy_version 11431 (0.0008) +[2026-06-07 02:51:20,006][472573] Updated weights for policy 0, policy_version 11443 (0.0008) +[2026-06-07 02:51:20,137][472573] Updated weights for policy 0, policy_version 11455 (0.0008) +[2026-06-07 02:51:20,719][472573] Updated weights for policy 0, policy_version 11467 (0.0008) +[2026-06-07 02:51:20,835][472573] Updated weights for policy 0, policy_version 11477 (0.0008) +[2026-06-07 02:51:20,960][472573] Updated weights for policy 0, policy_version 11488 (0.0008) +[2026-06-07 02:51:21,093][472573] Updated weights for policy 0, policy_version 11500 (0.0008) +[2026-06-07 02:51:21,207][472573] Updated weights for policy 0, policy_version 11510 (0.0008) +[2026-06-07 02:51:21,319][472573] Updated weights for policy 0, policy_version 11520 (0.0008) +[2026-06-07 02:51:21,883][472573] Updated weights for policy 0, policy_version 11531 (0.0005) +[2026-06-07 02:51:22,002][472573] Updated weights for policy 0, policy_version 11542 (0.0009) +[2026-06-07 02:51:22,137][472573] Updated weights for policy 0, policy_version 11554 (0.0008) +[2026-06-07 02:51:22,245][472573] Updated weights for policy 0, policy_version 11564 (0.0008) +[2026-06-07 02:51:22,361][472573] Updated weights for policy 0, policy_version 11574 (0.0008) +[2026-06-07 02:51:22,468][472573] Updated weights for policy 0, policy_version 11584 (0.0008) +[2026-06-07 02:51:23,035][472573] Updated weights for policy 0, policy_version 11596 (0.0009) +[2026-06-07 02:51:23,103][464932] Fps is (10 sec: 29491.3, 60 sec: 28399.8, 300 sec: 26959.1). Total num frames: 5931008. Throughput: 0: 28157.2. Samples: 5902976. Policy #0 lag: (min: 23.0, avg: 34.9, max: 87.0) +[2026-06-07 02:51:23,104][464932] Avg episode reward: [(0, '251.195')] +[2026-06-07 02:51:23,157][472573] Updated weights for policy 0, policy_version 11607 (0.0009) +[2026-06-07 02:51:23,283][472573] Updated weights for policy 0, policy_version 11618 (0.0008) +[2026-06-07 02:51:23,409][472573] Updated weights for policy 0, policy_version 11629 (0.0009) +[2026-06-07 02:51:23,522][472573] Updated weights for policy 0, policy_version 11639 (0.0008) +[2026-06-07 02:51:24,103][472573] Updated weights for policy 0, policy_version 11650 (0.0009) +[2026-06-07 02:51:24,232][472573] Updated weights for policy 0, policy_version 11662 (0.0009) +[2026-06-07 02:51:24,349][472573] Updated weights for policy 0, policy_version 11672 (0.0008) +[2026-06-07 02:51:24,500][472573] Updated weights for policy 0, policy_version 11686 (0.0008) +[2026-06-07 02:51:24,612][472573] Updated weights for policy 0, policy_version 11696 (0.0008) +[2026-06-07 02:51:24,759][472573] Updated weights for policy 0, policy_version 11709 (0.0008) +[2026-06-07 02:51:25,321][472573] Updated weights for policy 0, policy_version 11719 (0.0007) +[2026-06-07 02:51:25,465][472573] Updated weights for policy 0, policy_version 11732 (0.0008) +[2026-06-07 02:51:25,586][472573] Updated weights for policy 0, policy_version 11743 (0.0008) +[2026-06-07 02:51:25,692][472573] Updated weights for policy 0, policy_version 11753 (0.0008) +[2026-06-07 02:51:25,815][472573] Updated weights for policy 0, policy_version 11764 (0.0008) +[2026-06-07 02:51:25,945][472573] Updated weights for policy 0, policy_version 11775 (0.0010) +[2026-06-07 02:51:26,538][472573] Updated weights for policy 0, policy_version 11787 (0.0007) +[2026-06-07 02:51:26,658][472573] Updated weights for policy 0, policy_version 11798 (0.0008) +[2026-06-07 02:51:26,798][472573] Updated weights for policy 0, policy_version 11810 (0.0008) +[2026-06-07 02:51:26,902][472573] Updated weights for policy 0, policy_version 11820 (0.0008) +[2026-06-07 02:51:27,029][472573] Updated weights for policy 0, policy_version 11831 (0.0008) +[2026-06-07 02:51:27,600][472573] Updated weights for policy 0, policy_version 11842 (0.0008) +[2026-06-07 02:51:27,709][472573] Updated weights for policy 0, policy_version 11852 (0.0008) +[2026-06-07 02:51:27,818][472573] Updated weights for policy 0, policy_version 11862 (0.0008) +[2026-06-07 02:51:27,941][472573] Updated weights for policy 0, policy_version 11873 (0.0008) +[2026-06-07 02:51:28,059][472573] Updated weights for policy 0, policy_version 11884 (0.0008) +[2026-06-07 02:51:28,103][464932] Fps is (10 sec: 26214.7, 60 sec: 27852.8, 300 sec: 26942.6). Total num frames: 6062080. Throughput: 0: 28219.7. Samples: 6071680. Policy #0 lag: (min: 63.0, avg: 74.4, max: 127.0) +[2026-06-07 02:51:28,104][464932] Avg episode reward: [(0, '292.513')] +[2026-06-07 02:51:28,189][472573] Updated weights for policy 0, policy_version 11895 (0.0008) +[2026-06-07 02:51:28,282][472028] Saving new best policy, reward=292.513! +[2026-06-07 02:51:28,769][472573] Updated weights for policy 0, policy_version 11906 (0.0008) +[2026-06-07 02:51:28,875][472573] Updated weights for policy 0, policy_version 11916 (0.0008) +[2026-06-07 02:51:29,000][472573] Updated weights for policy 0, policy_version 11927 (0.0008) +[2026-06-07 02:51:29,109][472573] Updated weights for policy 0, policy_version 11937 (0.0008) +[2026-06-07 02:51:29,247][472573] Updated weights for policy 0, policy_version 11949 (0.0008) +[2026-06-07 02:51:29,366][472573] Updated weights for policy 0, policy_version 11960 (0.0008) +[2026-06-07 02:51:29,933][472573] Updated weights for policy 0, policy_version 11972 (0.0008) +[2026-06-07 02:51:30,055][472573] Updated weights for policy 0, policy_version 11983 (0.0008) +[2026-06-07 02:51:30,197][472573] Updated weights for policy 0, policy_version 11996 (0.0008) +[2026-06-07 02:51:30,330][472573] Updated weights for policy 0, policy_version 12008 (0.0008) +[2026-06-07 02:51:30,458][472573] Updated weights for policy 0, policy_version 12019 (0.0008) +[2026-06-07 02:51:30,567][472573] Updated weights for policy 0, policy_version 12029 (0.0008) +[2026-06-07 02:51:31,124][472573] Updated weights for policy 0, policy_version 12040 (0.0007) +[2026-06-07 02:51:31,255][472573] Updated weights for policy 0, policy_version 12052 (0.0008) +[2026-06-07 02:51:31,385][472573] Updated weights for policy 0, policy_version 12064 (0.0008) +[2026-06-07 02:51:31,506][472573] Updated weights for policy 0, policy_version 12075 (0.0008) +[2026-06-07 02:51:31,617][472573] Updated weights for policy 0, policy_version 12085 (0.0008) +[2026-06-07 02:51:31,743][472573] Updated weights for policy 0, policy_version 12096 (0.0008) +[2026-06-07 02:51:32,312][472573] Updated weights for policy 0, policy_version 12107 (0.0008) +[2026-06-07 02:51:32,422][472573] Updated weights for policy 0, policy_version 12117 (0.0008) +[2026-06-07 02:51:32,534][472573] Updated weights for policy 0, policy_version 12127 (0.0010) +[2026-06-07 02:51:32,654][472573] Updated weights for policy 0, policy_version 12138 (0.0012) +[2026-06-07 02:51:32,778][472573] Updated weights for policy 0, policy_version 12149 (0.0004) +[2026-06-07 02:51:32,900][472573] Updated weights for policy 0, policy_version 12160 (0.0004) +[2026-06-07 02:51:33,103][464932] Fps is (10 sec: 29491.3, 60 sec: 28399.1, 300 sec: 27069.2). Total num frames: 6225920. Throughput: 0: 28248.2. Samples: 6237824. Policy #0 lag: (min: 63.0, avg: 74.4, max: 127.0) +[2026-06-07 02:51:33,104][464932] Avg episode reward: [(0, '301.495')] +[2026-06-07 02:51:33,109][472028] Saving new best policy, reward=301.495! +[2026-06-07 02:51:33,458][472573] Updated weights for policy 0, policy_version 12171 (0.0004) +[2026-06-07 02:51:33,591][472573] Updated weights for policy 0, policy_version 12183 (0.0008) +[2026-06-07 02:51:33,700][472573] Updated weights for policy 0, policy_version 12193 (0.0008) +[2026-06-07 02:51:33,806][472573] Updated weights for policy 0, policy_version 12203 (0.0008) +[2026-06-07 02:51:33,926][472573] Updated weights for policy 0, policy_version 12213 (0.0008) +[2026-06-07 02:51:34,048][472573] Updated weights for policy 0, policy_version 12224 (0.0008) +[2026-06-07 02:51:34,584][472573] Updated weights for policy 0, policy_version 12234 (0.0008) +[2026-06-07 02:51:34,690][472573] Updated weights for policy 0, policy_version 12244 (0.0008) +[2026-06-07 02:51:34,804][472573] Updated weights for policy 0, policy_version 12254 (0.0008) +[2026-06-07 02:51:34,915][472573] Updated weights for policy 0, policy_version 12264 (0.0008) +[2026-06-07 02:51:35,064][472573] Updated weights for policy 0, policy_version 12277 (0.0008) +[2026-06-07 02:51:35,185][472573] Updated weights for policy 0, policy_version 12288 (0.0008) +[2026-06-07 02:51:35,741][472573] Updated weights for policy 0, policy_version 12298 (0.0008) +[2026-06-07 02:51:35,854][472573] Updated weights for policy 0, policy_version 12308 (0.0008) +[2026-06-07 02:51:35,970][472573] Updated weights for policy 0, policy_version 12319 (0.0008) +[2026-06-07 02:51:36,096][472573] Updated weights for policy 0, policy_version 12330 (0.0008) +[2026-06-07 02:51:36,244][472573] Updated weights for policy 0, policy_version 12343 (0.0008) +[2026-06-07 02:51:36,784][472573] Updated weights for policy 0, policy_version 12353 (0.0008) +[2026-06-07 02:51:36,917][472573] Updated weights for policy 0, policy_version 12365 (0.0008) +[2026-06-07 02:51:37,036][472573] Updated weights for policy 0, policy_version 12376 (0.0008) +[2026-06-07 02:51:37,145][472573] Updated weights for policy 0, policy_version 12386 (0.0008) +[2026-06-07 02:51:37,259][472573] Updated weights for policy 0, policy_version 12396 (0.0008) +[2026-06-07 02:51:37,387][472573] Updated weights for policy 0, policy_version 12407 (0.0008) +[2026-06-07 02:51:37,925][472573] Updated weights for policy 0, policy_version 12417 (0.0008) +[2026-06-07 02:51:38,043][472573] Updated weights for policy 0, policy_version 12427 (0.0008) +[2026-06-07 02:51:38,103][464932] Fps is (10 sec: 29491.1, 60 sec: 28398.9, 300 sec: 27051.0). Total num frames: 6356992. Throughput: 0: 28199.8. Samples: 6326400. Policy #0 lag: (min: 63.0, avg: 75.0, max: 127.0) +[2026-06-07 02:51:38,104][464932] Avg episode reward: [(0, '315.699')] +[2026-06-07 02:51:38,158][472573] Updated weights for policy 0, policy_version 12437 (0.0008) +[2026-06-07 02:51:38,274][472573] Updated weights for policy 0, policy_version 12447 (0.0008) +[2026-06-07 02:51:38,409][472573] Updated weights for policy 0, policy_version 12459 (0.0008) +[2026-06-07 02:51:38,544][472573] Updated weights for policy 0, policy_version 12471 (0.0008) +[2026-06-07 02:51:38,639][472028] Saving new best policy, reward=315.699! +[2026-06-07 02:51:39,105][472573] Updated weights for policy 0, policy_version 12482 (0.0008) +[2026-06-07 02:51:39,237][472573] Updated weights for policy 0, policy_version 12494 (0.0008) +[2026-06-07 02:51:39,352][472573] Updated weights for policy 0, policy_version 12504 (0.0008) +[2026-06-07 02:51:39,462][472573] Updated weights for policy 0, policy_version 12514 (0.0008) +[2026-06-07 02:51:39,577][472573] Updated weights for policy 0, policy_version 12524 (0.0008) +[2026-06-07 02:51:39,689][472573] Updated weights for policy 0, policy_version 12534 (0.0009) +[2026-06-07 02:51:39,798][472573] Updated weights for policy 0, policy_version 12544 (0.0008) +[2026-06-07 02:51:40,372][472573] Updated weights for policy 0, policy_version 12557 (0.0008) +[2026-06-07 02:51:40,481][472573] Updated weights for policy 0, policy_version 12567 (0.0008) +[2026-06-07 02:51:40,594][472573] Updated weights for policy 0, policy_version 12577 (0.0008) +[2026-06-07 02:51:40,730][472573] Updated weights for policy 0, policy_version 12589 (0.0009) +[2026-06-07 02:51:40,849][472573] Updated weights for policy 0, policy_version 12600 (0.0008) +[2026-06-07 02:51:41,396][472573] Updated weights for policy 0, policy_version 12610 (0.0007) +[2026-06-07 02:51:41,510][472573] Updated weights for policy 0, policy_version 12620 (0.0007) +[2026-06-07 02:51:41,622][472573] Updated weights for policy 0, policy_version 12630 (0.0008) +[2026-06-07 02:51:41,753][472573] Updated weights for policy 0, policy_version 12642 (0.0008) +[2026-06-07 02:51:41,899][472573] Updated weights for policy 0, policy_version 12655 (0.0008) +[2026-06-07 02:51:42,015][472573] Updated weights for policy 0, policy_version 12665 (0.0008) +[2026-06-07 02:51:42,576][472573] Updated weights for policy 0, policy_version 12677 (0.0008) +[2026-06-07 02:51:42,692][472573] Updated weights for policy 0, policy_version 12688 (0.0008) +[2026-06-07 02:51:42,829][472573] Updated weights for policy 0, policy_version 12700 (0.0008) +[2026-06-07 02:51:42,959][472573] Updated weights for policy 0, policy_version 12712 (0.0008) +[2026-06-07 02:51:43,078][472573] Updated weights for policy 0, policy_version 12722 (0.0008) +[2026-06-07 02:51:43,103][464932] Fps is (10 sec: 26214.1, 60 sec: 27852.8, 300 sec: 27033.6). Total num frames: 6488064. Throughput: 0: 28253.8. Samples: 6495104. Policy #0 lag: (min: 63.0, avg: 75.0, max: 127.0) +[2026-06-07 02:51:43,104][464932] Avg episode reward: [(0, '327.179')] +[2026-06-07 02:51:43,187][472573] Updated weights for policy 0, policy_version 12732 (0.0008) +[2026-06-07 02:51:43,228][472028] Saving new best policy, reward=327.179! +[2026-06-07 02:51:43,731][472573] Updated weights for policy 0, policy_version 12742 (0.0008) +[2026-06-07 02:51:43,852][472573] Updated weights for policy 0, policy_version 12753 (0.0008) +[2026-06-07 02:51:43,969][472573] Updated weights for policy 0, policy_version 12764 (0.0008) +[2026-06-07 02:51:44,083][472573] Updated weights for policy 0, policy_version 12774 (0.0008) +[2026-06-07 02:51:44,194][472573] Updated weights for policy 0, policy_version 12784 (0.0008) +[2026-06-07 02:51:44,323][472573] Updated weights for policy 0, policy_version 12795 (0.0008) +[2026-06-07 02:51:44,910][472573] Updated weights for policy 0, policy_version 12808 (0.0008) +[2026-06-07 02:51:45,034][472573] Updated weights for policy 0, policy_version 12819 (0.0009) +[2026-06-07 02:51:45,142][472573] Updated weights for policy 0, policy_version 12829 (0.0008) +[2026-06-07 02:51:45,277][472573] Updated weights for policy 0, policy_version 12841 (0.0008) +[2026-06-07 02:51:45,426][472573] Updated weights for policy 0, policy_version 12854 (0.0008) +[2026-06-07 02:51:46,001][472573] Updated weights for policy 0, policy_version 12866 (0.0008) +[2026-06-07 02:51:46,128][472573] Updated weights for policy 0, policy_version 12877 (0.0009) +[2026-06-07 02:51:46,239][472573] Updated weights for policy 0, policy_version 12887 (0.0008) +[2026-06-07 02:51:46,353][472573] Updated weights for policy 0, policy_version 12897 (0.0008) +[2026-06-07 02:51:46,459][472573] Updated weights for policy 0, policy_version 12907 (0.0008) +[2026-06-07 02:51:46,585][472573] Updated weights for policy 0, policy_version 12918 (0.0008) +[2026-06-07 02:51:47,154][472573] Updated weights for policy 0, policy_version 12929 (0.0008) +[2026-06-07 02:51:47,282][472573] Updated weights for policy 0, policy_version 12941 (0.0008) +[2026-06-07 02:51:47,403][472573] Updated weights for policy 0, policy_version 12952 (0.0008) +[2026-06-07 02:51:47,548][472573] Updated weights for policy 0, policy_version 12965 (0.0008) +[2026-06-07 02:51:47,680][472573] Updated weights for policy 0, policy_version 12977 (0.0008) +[2026-06-07 02:51:47,819][472573] Updated weights for policy 0, policy_version 12989 (0.0008) +[2026-06-07 02:51:48,103][464932] Fps is (10 sec: 29491.4, 60 sec: 28398.9, 300 sec: 27150.6). Total num frames: 6651904. Throughput: 0: 28282.3. Samples: 6662400. Policy #0 lag: (min: 54.0, avg: 64.7, max: 118.0) +[2026-06-07 02:51:48,104][464932] Avg episode reward: [(0, '349.684')] +[2026-06-07 02:51:48,109][472028] Saving new best policy, reward=349.684! +[2026-06-07 02:51:48,405][472573] Updated weights for policy 0, policy_version 13001 (0.0008) +[2026-06-07 02:51:48,521][472573] Updated weights for policy 0, policy_version 13012 (0.0008) +[2026-06-07 02:51:48,658][472573] Updated weights for policy 0, policy_version 13024 (0.0008) +[2026-06-07 02:51:48,783][472573] Updated weights for policy 0, policy_version 13035 (0.0008) +[2026-06-07 02:51:48,900][472573] Updated weights for policy 0, policy_version 13046 (0.0008) +[2026-06-07 02:51:49,491][472573] Updated weights for policy 0, policy_version 13059 (0.0008) +[2026-06-07 02:51:49,639][472573] Updated weights for policy 0, policy_version 13072 (0.0008) +[2026-06-07 02:51:49,752][472573] Updated weights for policy 0, policy_version 13082 (0.0008) +[2026-06-07 02:51:49,874][472573] Updated weights for policy 0, policy_version 13093 (0.0008) +[2026-06-07 02:51:50,017][472573] Updated weights for policy 0, policy_version 13106 (0.0008) +[2026-06-07 02:51:50,150][472573] Updated weights for policy 0, policy_version 13118 (0.0008) +[2026-06-07 02:51:50,709][472573] Updated weights for policy 0, policy_version 13128 (0.0007) +[2026-06-07 02:51:50,852][472573] Updated weights for policy 0, policy_version 13141 (0.0008) +[2026-06-07 02:51:50,982][472573] Updated weights for policy 0, policy_version 13153 (0.0008) +[2026-06-07 02:51:51,114][472573] Updated weights for policy 0, policy_version 13165 (0.0009) +[2026-06-07 02:51:51,233][472573] Updated weights for policy 0, policy_version 13176 (0.0008) +[2026-06-07 02:51:51,785][472573] Updated weights for policy 0, policy_version 13187 (0.0008) +[2026-06-07 02:51:51,896][472573] Updated weights for policy 0, policy_version 13197 (0.0008) +[2026-06-07 02:51:52,028][472573] Updated weights for policy 0, policy_version 13209 (0.0008) +[2026-06-07 02:51:52,151][472573] Updated weights for policy 0, policy_version 13220 (0.0008) +[2026-06-07 02:51:52,276][472573] Updated weights for policy 0, policy_version 13231 (0.0008) +[2026-06-07 02:51:52,399][472573] Updated weights for policy 0, policy_version 13242 (0.0008) +[2026-06-07 02:51:52,982][472573] Updated weights for policy 0, policy_version 13254 (0.0008) +[2026-06-07 02:51:53,103][464932] Fps is (10 sec: 29491.4, 60 sec: 28398.9, 300 sec: 27131.9). Total num frames: 6782976. Throughput: 0: 28273.8. Samples: 6752256. Policy #0 lag: (min: 54.0, avg: 64.7, max: 118.0) +[2026-06-07 02:51:53,104][464932] Avg episode reward: [(0, '387.431')] +[2026-06-07 02:51:53,106][472573] Updated weights for policy 0, policy_version 13265 (0.0009) +[2026-06-07 02:51:53,240][472573] Updated weights for policy 0, policy_version 13277 (0.0008) +[2026-06-07 02:51:53,370][472573] Updated weights for policy 0, policy_version 13289 (0.0008) +[2026-06-07 02:51:53,494][472573] Updated weights for policy 0, policy_version 13300 (0.0009) +[2026-06-07 02:51:53,621][472573] Updated weights for policy 0, policy_version 13311 (0.0009) +[2026-06-07 02:51:53,625][472028] Saving new best policy, reward=387.431! +[2026-06-07 02:51:54,164][472573] Updated weights for policy 0, policy_version 13322 (0.0007) +[2026-06-07 02:51:54,296][472573] Updated weights for policy 0, policy_version 13334 (0.0009) +[2026-06-07 02:51:54,414][472573] Updated weights for policy 0, policy_version 13344 (0.0008) +[2026-06-07 02:51:54,530][472573] Updated weights for policy 0, policy_version 13355 (0.0008) +[2026-06-07 02:51:54,643][472573] Updated weights for policy 0, policy_version 13365 (0.0008) +[2026-06-07 02:51:54,769][472573] Updated weights for policy 0, policy_version 13376 (0.0009) +[2026-06-07 02:51:55,339][472573] Updated weights for policy 0, policy_version 13387 (0.0008) +[2026-06-07 02:51:55,469][472573] Updated weights for policy 0, policy_version 13399 (0.0008) +[2026-06-07 02:51:55,588][472573] Updated weights for policy 0, policy_version 13409 (0.0009) +[2026-06-07 02:51:55,728][472573] Updated weights for policy 0, policy_version 13422 (0.0008) +[2026-06-07 02:51:55,852][472573] Updated weights for policy 0, policy_version 13433 (0.0008) +[2026-06-07 02:51:56,417][472573] Updated weights for policy 0, policy_version 13445 (0.0007) +[2026-06-07 02:51:56,531][472573] Updated weights for policy 0, policy_version 13456 (0.0009) +[2026-06-07 02:51:56,640][472573] Updated weights for policy 0, policy_version 13466 (0.0008) +[2026-06-07 02:51:56,762][472573] Updated weights for policy 0, policy_version 13478 (0.0008) +[2026-06-07 02:51:56,903][472573] Updated weights for policy 0, policy_version 13491 (0.0008) +[2026-06-07 02:51:57,033][472573] Updated weights for policy 0, policy_version 13503 (0.0010) +[2026-06-07 02:51:57,647][472573] Updated weights for policy 0, policy_version 13516 (0.0008) +[2026-06-07 02:51:57,769][472573] Updated weights for policy 0, policy_version 13528 (0.0009) +[2026-06-07 02:51:57,892][472573] Updated weights for policy 0, policy_version 13539 (0.0008) +[2026-06-07 02:51:58,021][472573] Updated weights for policy 0, policy_version 13551 (0.0008) +[2026-06-07 02:51:58,103][464932] Fps is (10 sec: 26214.2, 60 sec: 28398.9, 300 sec: 27113.9). Total num frames: 6914048. Throughput: 0: 28293.7. Samples: 6920192. Policy #0 lag: (min: 54.0, avg: 64.7, max: 118.0) +[2026-06-07 02:51:58,104][464932] Avg episode reward: [(0, '379.137')] +[2026-06-07 02:51:58,140][472573] Updated weights for policy 0, policy_version 13562 (0.0008) +[2026-06-07 02:51:58,761][472573] Updated weights for policy 0, policy_version 13575 (0.0008) +[2026-06-07 02:51:58,906][472573] Updated weights for policy 0, policy_version 13588 (0.0008) +[2026-06-07 02:51:59,041][472573] Updated weights for policy 0, policy_version 13600 (0.0008) +[2026-06-07 02:51:59,150][472573] Updated weights for policy 0, policy_version 13610 (0.0008) +[2026-06-07 02:51:59,274][472573] Updated weights for policy 0, policy_version 13621 (0.0008) +[2026-06-07 02:51:59,395][472573] Updated weights for policy 0, policy_version 13632 (0.0008) +[2026-06-07 02:51:59,943][472573] Updated weights for policy 0, policy_version 13642 (0.0007) +[2026-06-07 02:52:00,057][472573] Updated weights for policy 0, policy_version 13652 (0.0009) +[2026-06-07 02:52:00,164][472573] Updated weights for policy 0, policy_version 13662 (0.0008) +[2026-06-07 02:52:00,289][472573] Updated weights for policy 0, policy_version 13673 (0.0008) +[2026-06-07 02:52:00,401][472573] Updated weights for policy 0, policy_version 13683 (0.0008) +[2026-06-07 02:52:00,525][472573] Updated weights for policy 0, policy_version 13694 (0.0008) +[2026-06-07 02:52:01,079][472573] Updated weights for policy 0, policy_version 13704 (0.0008) +[2026-06-07 02:52:01,188][472573] Updated weights for policy 0, policy_version 13714 (0.0008) +[2026-06-07 02:52:01,315][472573] Updated weights for policy 0, policy_version 13725 (0.0008) +[2026-06-07 02:52:01,439][472573] Updated weights for policy 0, policy_version 13736 (0.0008) +[2026-06-07 02:52:01,555][472573] Updated weights for policy 0, policy_version 13746 (0.0008) +[2026-06-07 02:52:01,673][472573] Updated weights for policy 0, policy_version 13757 (0.0008) +[2026-06-07 02:52:02,242][472573] Updated weights for policy 0, policy_version 13767 (0.0006) +[2026-06-07 02:52:02,368][472573] Updated weights for policy 0, policy_version 13778 (0.0007) +[2026-06-07 02:52:02,491][472573] Updated weights for policy 0, policy_version 13789 (0.0008) +[2026-06-07 02:52:02,625][472573] Updated weights for policy 0, policy_version 13801 (0.0008) +[2026-06-07 02:52:02,741][472573] Updated weights for policy 0, policy_version 13811 (0.0008) +[2026-06-07 02:52:02,877][472573] Updated weights for policy 0, policy_version 13823 (0.0008) +[2026-06-07 02:52:03,103][464932] Fps is (10 sec: 29491.3, 60 sec: 28398.9, 300 sec: 27222.7). Total num frames: 7077888. Throughput: 0: 28350.7. Samples: 7089152. Policy #0 lag: (min: 63.0, avg: 73.4, max: 127.0) +[2026-06-07 02:52:03,104][464932] Avg episode reward: [(0, '405.825')] +[2026-06-07 02:52:03,109][472028] Saving new best policy, reward=405.825! +[2026-06-07 02:52:03,433][472573] Updated weights for policy 0, policy_version 13835 (0.0008) +[2026-06-07 02:52:03,560][472573] Updated weights for policy 0, policy_version 13846 (0.0008) +[2026-06-07 02:52:03,682][472573] Updated weights for policy 0, policy_version 13857 (0.0008) +[2026-06-07 02:52:03,826][472573] Updated weights for policy 0, policy_version 13870 (0.0008) +[2026-06-07 02:52:03,970][472573] Updated weights for policy 0, policy_version 13883 (0.0008) +[2026-06-07 02:52:04,533][472573] Updated weights for policy 0, policy_version 13894 (0.0008) +[2026-06-07 02:52:04,644][472573] Updated weights for policy 0, policy_version 13904 (0.0008) +[2026-06-07 02:52:04,766][472573] Updated weights for policy 0, policy_version 13915 (0.0008) +[2026-06-07 02:52:04,914][472573] Updated weights for policy 0, policy_version 13928 (0.0008) +[2026-06-07 02:52:05,036][472573] Updated weights for policy 0, policy_version 13939 (0.0008) +[2026-06-07 02:52:05,152][472573] Updated weights for policy 0, policy_version 13949 (0.0008) +[2026-06-07 02:52:05,683][472573] Updated weights for policy 0, policy_version 13959 (0.0008) +[2026-06-07 02:52:05,793][472573] Updated weights for policy 0, policy_version 13969 (0.0008) +[2026-06-07 02:52:05,915][472573] Updated weights for policy 0, policy_version 13980 (0.0005) +[2026-06-07 02:52:06,043][472573] Updated weights for policy 0, policy_version 13991 (0.0005) +[2026-06-07 02:52:06,184][472573] Updated weights for policy 0, policy_version 14004 (0.0005) +[2026-06-07 02:52:06,313][472573] Updated weights for policy 0, policy_version 14015 (0.0005) +[2026-06-07 02:52:06,853][472573] Updated weights for policy 0, policy_version 14026 (0.0005) +[2026-06-07 02:52:06,968][472573] Updated weights for policy 0, policy_version 14036 (0.0006) +[2026-06-07 02:52:07,091][472573] Updated weights for policy 0, policy_version 14047 (0.0008) +[2026-06-07 02:52:07,220][472573] Updated weights for policy 0, policy_version 14058 (0.0008) +[2026-06-07 02:52:07,333][472573] Updated weights for policy 0, policy_version 14069 (0.0008) +[2026-06-07 02:52:07,446][472573] Updated weights for policy 0, policy_version 14079 (0.0008) +[2026-06-07 02:52:08,009][472573] Updated weights for policy 0, policy_version 14090 (0.0008) +[2026-06-07 02:52:08,103][464932] Fps is (10 sec: 29491.3, 60 sec: 28398.9, 300 sec: 27203.6). Total num frames: 7208960. Throughput: 0: 28330.7. Samples: 7177856. Policy #0 lag: (min: 63.0, avg: 73.4, max: 127.0) +[2026-06-07 02:52:08,104][464932] Avg episode reward: [(0, '403.837')] +[2026-06-07 02:52:08,121][472573] Updated weights for policy 0, policy_version 14100 (0.0008) +[2026-06-07 02:52:08,233][472573] Updated weights for policy 0, policy_version 14110 (0.0008) +[2026-06-07 02:52:08,367][472573] Updated weights for policy 0, policy_version 14122 (0.0009) +[2026-06-07 02:52:08,482][472573] Updated weights for policy 0, policy_version 14132 (0.0008) +[2026-06-07 02:52:08,606][472573] Updated weights for policy 0, policy_version 14143 (0.0008) +[2026-06-07 02:52:09,168][472573] Updated weights for policy 0, policy_version 14153 (0.0008) +[2026-06-07 02:52:09,282][472573] Updated weights for policy 0, policy_version 14163 (0.0008) +[2026-06-07 02:52:09,414][472573] Updated weights for policy 0, policy_version 14175 (0.0008) +[2026-06-07 02:52:09,539][472573] Updated weights for policy 0, policy_version 14186 (0.0008) +[2026-06-07 02:52:09,652][472573] Updated weights for policy 0, policy_version 14196 (0.0008) +[2026-06-07 02:52:09,791][472573] Updated weights for policy 0, policy_version 14208 (0.0008) +[2026-06-07 02:52:10,331][472573] Updated weights for policy 0, policy_version 14218 (0.0008) +[2026-06-07 02:52:10,467][472573] Updated weights for policy 0, policy_version 14230 (0.0008) +[2026-06-07 02:52:10,590][472573] Updated weights for policy 0, policy_version 14241 (0.0008) +[2026-06-07 02:52:10,715][472573] Updated weights for policy 0, policy_version 14252 (0.0008) +[2026-06-07 02:52:10,831][472573] Updated weights for policy 0, policy_version 14262 (0.0008) +[2026-06-07 02:52:10,937][472573] Updated weights for policy 0, policy_version 14272 (0.0008) +[2026-06-07 02:52:11,498][472573] Updated weights for policy 0, policy_version 14283 (0.0008) +[2026-06-07 02:52:11,623][472573] Updated weights for policy 0, policy_version 14294 (0.0008) +[2026-06-07 02:52:11,737][472573] Updated weights for policy 0, policy_version 14304 (0.0009) +[2026-06-07 02:52:11,873][472573] Updated weights for policy 0, policy_version 14316 (0.0008) +[2026-06-07 02:52:11,988][472573] Updated weights for policy 0, policy_version 14326 (0.0008) +[2026-06-07 02:52:12,546][472573] Updated weights for policy 0, policy_version 14337 (0.0008) +[2026-06-07 02:52:12,669][472573] Updated weights for policy 0, policy_version 14348 (0.0008) +[2026-06-07 02:52:12,781][472573] Updated weights for policy 0, policy_version 14358 (0.0008) +[2026-06-07 02:52:12,893][472573] Updated weights for policy 0, policy_version 14368 (0.0008) +[2026-06-07 02:52:13,017][472573] Updated weights for policy 0, policy_version 14379 (0.0008) +[2026-06-07 02:52:13,103][464932] Fps is (10 sec: 26214.3, 60 sec: 28398.9, 300 sec: 27185.3). Total num frames: 7340032. Throughput: 0: 28353.4. Samples: 7347584. Policy #0 lag: (min: 57.0, avg: 83.8, max: 121.0) +[2026-06-07 02:52:13,104][464932] Avg episode reward: [(0, '386.397')] +[2026-06-07 02:52:13,134][472573] Updated weights for policy 0, policy_version 14389 (0.0005) +[2026-06-07 02:52:13,251][472573] Updated weights for policy 0, policy_version 14399 (0.0008) +[2026-06-07 02:52:13,785][472573] Updated weights for policy 0, policy_version 14409 (0.0008) +[2026-06-07 02:52:13,897][472573] Updated weights for policy 0, policy_version 14419 (0.0008) +[2026-06-07 02:52:14,040][472573] Updated weights for policy 0, policy_version 14432 (0.0008) +[2026-06-07 02:52:14,151][472573] Updated weights for policy 0, policy_version 14442 (0.0008) +[2026-06-07 02:52:14,264][472573] Updated weights for policy 0, policy_version 14452 (0.0008) +[2026-06-07 02:52:14,392][472573] Updated weights for policy 0, policy_version 14463 (0.0008) +[2026-06-07 02:52:14,931][472573] Updated weights for policy 0, policy_version 14473 (0.0008) +[2026-06-07 02:52:15,058][472573] Updated weights for policy 0, policy_version 14484 (0.0008) +[2026-06-07 02:52:15,177][472573] Updated weights for policy 0, policy_version 14495 (0.0008) +[2026-06-07 02:52:15,298][472573] Updated weights for policy 0, policy_version 14506 (0.0008) +[2026-06-07 02:52:15,421][472573] Updated weights for policy 0, policy_version 14517 (0.0008) +[2026-06-07 02:52:15,535][472573] Updated weights for policy 0, policy_version 14527 (0.0008) +[2026-06-07 02:52:16,092][472573] Updated weights for policy 0, policy_version 14538 (0.0007) +[2026-06-07 02:52:16,236][472573] Updated weights for policy 0, policy_version 14551 (0.0008) +[2026-06-07 02:52:16,355][472573] Updated weights for policy 0, policy_version 14562 (0.0008) +[2026-06-07 02:52:16,471][472573] Updated weights for policy 0, policy_version 14572 (0.0008) +[2026-06-07 02:52:16,604][472573] Updated weights for policy 0, policy_version 14584 (0.0008) +[2026-06-07 02:52:17,147][472573] Updated weights for policy 0, policy_version 14594 (0.0007) +[2026-06-07 02:52:17,272][472573] Updated weights for policy 0, policy_version 14605 (0.0004) +[2026-06-07 02:52:17,379][472573] Updated weights for policy 0, policy_version 14615 (0.0004) +[2026-06-07 02:52:17,497][472573] Updated weights for policy 0, policy_version 14625 (0.0004) +[2026-06-07 02:52:17,610][472573] Updated weights for policy 0, policy_version 14635 (0.0004) +[2026-06-07 02:52:17,715][472573] Updated weights for policy 0, policy_version 14645 (0.0004) +[2026-06-07 02:52:17,833][472573] Updated weights for policy 0, policy_version 14655 (0.0004) +[2026-06-07 02:52:18,103][464932] Fps is (10 sec: 29491.2, 60 sec: 28399.0, 300 sec: 27286.8). Total num frames: 7503872. Throughput: 0: 28376.2. Samples: 7514752. Policy #0 lag: (min: 57.0, avg: 83.8, max: 121.0) +[2026-06-07 02:52:18,104][464932] Avg episode reward: [(0, '386.042')] +[2026-06-07 02:52:18,354][472573] Updated weights for policy 0, policy_version 14665 (0.0007) +[2026-06-07 02:52:18,474][472573] Updated weights for policy 0, policy_version 14676 (0.0008) +[2026-06-07 02:52:18,600][472573] Updated weights for policy 0, policy_version 14687 (0.0009) +[2026-06-07 02:52:18,719][472573] Updated weights for policy 0, policy_version 14698 (0.0008) +[2026-06-07 02:52:18,845][472573] Updated weights for policy 0, policy_version 14709 (0.0008) +[2026-06-07 02:52:18,956][472573] Updated weights for policy 0, policy_version 14719 (0.0008) +[2026-06-07 02:52:19,512][472573] Updated weights for policy 0, policy_version 14731 (0.0008) +[2026-06-07 02:52:19,660][472573] Updated weights for policy 0, policy_version 14745 (0.0008) +[2026-06-07 02:52:19,776][472573] Updated weights for policy 0, policy_version 14756 (0.0008) +[2026-06-07 02:52:19,917][472573] Updated weights for policy 0, policy_version 14769 (0.0008) +[2026-06-07 02:52:20,033][472573] Updated weights for policy 0, policy_version 14780 (0.0008) +[2026-06-07 02:52:20,619][472573] Updated weights for policy 0, policy_version 14793 (0.0008) +[2026-06-07 02:52:20,737][472573] Updated weights for policy 0, policy_version 14804 (0.0007) +[2026-06-07 02:52:20,848][472573] Updated weights for policy 0, policy_version 14814 (0.0004) +[2026-06-07 02:52:20,961][472573] Updated weights for policy 0, policy_version 14824 (0.0005) +[2026-06-07 02:52:21,083][472573] Updated weights for policy 0, policy_version 14834 (0.0008) +[2026-06-07 02:52:21,203][472573] Updated weights for policy 0, policy_version 14845 (0.0008) +[2026-06-07 02:52:21,761][472573] Updated weights for policy 0, policy_version 14855 (0.0008) +[2026-06-07 02:52:21,886][472573] Updated weights for policy 0, policy_version 14866 (0.0008) +[2026-06-07 02:52:22,018][472573] Updated weights for policy 0, policy_version 14878 (0.0008) +[2026-06-07 02:52:22,131][472573] Updated weights for policy 0, policy_version 14888 (0.0008) +[2026-06-07 02:52:22,242][472573] Updated weights for policy 0, policy_version 14898 (0.0008) +[2026-06-07 02:52:22,350][472573] Updated weights for policy 0, policy_version 14908 (0.0008) +[2026-06-07 02:52:22,916][472573] Updated weights for policy 0, policy_version 14919 (0.0010) +[2026-06-07 02:52:23,049][472573] Updated weights for policy 0, policy_version 14930 (0.0011) +[2026-06-07 02:52:23,103][464932] Fps is (10 sec: 29491.3, 60 sec: 28398.9, 300 sec: 27267.7). Total num frames: 7634944. Throughput: 0: 28401.8. Samples: 7604480. Policy #0 lag: (min: 62.0, avg: 73.0, max: 126.0) +[2026-06-07 02:52:23,104][464932] Avg episode reward: [(0, '409.188')] +[2026-06-07 02:52:23,181][472573] Updated weights for policy 0, policy_version 14942 (0.0011) +[2026-06-07 02:52:23,300][472573] Updated weights for policy 0, policy_version 14952 (0.0011) +[2026-06-07 02:52:23,427][472573] Updated weights for policy 0, policy_version 14963 (0.0011) +[2026-06-07 02:52:23,546][472573] Updated weights for policy 0, policy_version 14973 (0.0010) +[2026-06-07 02:52:23,574][472028] Saving new best policy, reward=409.188! +[2026-06-07 02:52:24,115][472573] Updated weights for policy 0, policy_version 14985 (0.0010) +[2026-06-07 02:52:24,231][472573] Updated weights for policy 0, policy_version 14995 (0.0012) +[2026-06-07 02:52:24,373][472573] Updated weights for policy 0, policy_version 15007 (0.0010) +[2026-06-07 02:52:24,500][472573] Updated weights for policy 0, policy_version 15018 (0.0011) +[2026-06-07 02:52:24,621][472573] Updated weights for policy 0, policy_version 15028 (0.0011) +[2026-06-07 02:52:24,735][472573] Updated weights for policy 0, policy_version 15038 (0.0012) +[2026-06-07 02:52:25,237][472573] Updated weights for policy 0, policy_version 15048 (0.0009) +[2026-06-07 02:52:25,359][472573] Updated weights for policy 0, policy_version 15058 (0.0010) +[2026-06-07 02:52:25,470][472573] Updated weights for policy 0, policy_version 15068 (0.0012) +[2026-06-07 02:52:25,584][472573] Updated weights for policy 0, policy_version 15078 (0.0010) +[2026-06-07 02:52:25,704][472573] Updated weights for policy 0, policy_version 15088 (0.0012) +[2026-06-07 02:52:25,832][472573] Updated weights for policy 0, policy_version 15099 (0.0010) +[2026-06-07 02:52:26,362][472573] Updated weights for policy 0, policy_version 15110 (0.0010) +[2026-06-07 02:52:26,486][472573] Updated weights for policy 0, policy_version 15121 (0.0013) +[2026-06-07 02:52:26,601][472573] Updated weights for policy 0, policy_version 15131 (0.0011) +[2026-06-07 02:52:26,720][472573] Updated weights for policy 0, policy_version 15141 (0.0010) +[2026-06-07 02:52:26,845][472573] Updated weights for policy 0, policy_version 15152 (0.0012) +[2026-06-07 02:52:26,969][472573] Updated weights for policy 0, policy_version 15163 (0.0013) +[2026-06-07 02:52:27,551][472573] Updated weights for policy 0, policy_version 15176 (0.0008) +[2026-06-07 02:52:27,661][472573] Updated weights for policy 0, policy_version 15186 (0.0005) +[2026-06-07 02:52:27,784][472573] Updated weights for policy 0, policy_version 15197 (0.0005) +[2026-06-07 02:52:27,912][472573] Updated weights for policy 0, policy_version 15209 (0.0011) +[2026-06-07 02:52:28,046][472573] Updated weights for policy 0, policy_version 15222 (0.0011) +[2026-06-07 02:52:28,103][464932] Fps is (10 sec: 26214.6, 60 sec: 28399.0, 300 sec: 27249.2). Total num frames: 7766016. Throughput: 0: 28339.3. Samples: 7770368. Policy #0 lag: (min: 62.0, avg: 73.0, max: 126.0) +[2026-06-07 02:52:28,104][464932] Avg episode reward: [(0, '455.373')] +[2026-06-07 02:52:28,154][472028] Saving new best policy, reward=455.373! +[2026-06-07 02:52:28,622][472573] Updated weights for policy 0, policy_version 15233 (0.0011) +[2026-06-07 02:52:28,752][472573] Updated weights for policy 0, policy_version 15245 (0.0005) +[2026-06-07 02:52:28,857][472573] Updated weights for policy 0, policy_version 15255 (0.0004) +[2026-06-07 02:52:28,990][472573] Updated weights for policy 0, policy_version 15267 (0.0007) +[2026-06-07 02:52:29,111][472573] Updated weights for policy 0, policy_version 15278 (0.0008) +[2026-06-07 02:52:29,218][472573] Updated weights for policy 0, policy_version 15288 (0.0008) +[2026-06-07 02:52:29,798][472573] Updated weights for policy 0, policy_version 15299 (0.0008) +[2026-06-07 02:52:29,937][472573] Updated weights for policy 0, policy_version 15312 (0.0008) +[2026-06-07 02:52:30,043][472573] Updated weights for policy 0, policy_version 15322 (0.0008) +[2026-06-07 02:52:30,170][472573] Updated weights for policy 0, policy_version 15333 (0.0008) +[2026-06-07 02:52:30,278][472573] Updated weights for policy 0, policy_version 15343 (0.0008) +[2026-06-07 02:52:30,384][472573] Updated weights for policy 0, policy_version 15353 (0.0008) +[2026-06-07 02:52:30,974][472573] Updated weights for policy 0, policy_version 15365 (0.0008) +[2026-06-07 02:52:31,090][472573] Updated weights for policy 0, policy_version 15376 (0.0008) +[2026-06-07 02:52:31,201][472573] Updated weights for policy 0, policy_version 15386 (0.0008) +[2026-06-07 02:52:31,351][472573] Updated weights for policy 0, policy_version 15400 (0.0008) +[2026-06-07 02:52:31,500][472573] Updated weights for policy 0, policy_version 15414 (0.0008) +[2026-06-07 02:52:32,077][472573] Updated weights for policy 0, policy_version 15425 (0.0008) +[2026-06-07 02:52:32,197][472573] Updated weights for policy 0, policy_version 15436 (0.0008) +[2026-06-07 02:52:32,315][472573] Updated weights for policy 0, policy_version 15447 (0.0008) +[2026-06-07 02:52:32,436][472573] Updated weights for policy 0, policy_version 15458 (0.0008) +[2026-06-07 02:52:32,565][472573] Updated weights for policy 0, policy_version 15470 (0.0008) +[2026-06-07 02:52:32,670][472573] Updated weights for policy 0, policy_version 15480 (0.0008) +[2026-06-07 02:52:33,103][464932] Fps is (10 sec: 29491.3, 60 sec: 28398.9, 300 sec: 27344.3). Total num frames: 7929856. Throughput: 0: 28336.4. Samples: 7937536. Policy #0 lag: (min: 62.0, avg: 73.0, max: 126.0) +[2026-06-07 02:52:33,104][464932] Avg episode reward: [(0, '495.865')] +[2026-06-07 02:52:33,108][472028] Saving new best policy, reward=495.865! +[2026-06-07 02:52:33,245][472573] Updated weights for policy 0, policy_version 15490 (0.0008) +[2026-06-07 02:52:33,380][472573] Updated weights for policy 0, policy_version 15502 (0.0008) +[2026-06-07 02:52:33,485][472573] Updated weights for policy 0, policy_version 15512 (0.0008) +[2026-06-07 02:52:33,615][472573] Updated weights for policy 0, policy_version 15523 (0.0008) +[2026-06-07 02:52:33,730][472573] Updated weights for policy 0, policy_version 15533 (0.0008) +[2026-06-07 02:52:33,851][472573] Updated weights for policy 0, policy_version 15544 (0.0008) +[2026-06-07 02:52:34,390][472573] Updated weights for policy 0, policy_version 15554 (0.0008) +[2026-06-07 02:52:34,508][472573] Updated weights for policy 0, policy_version 15564 (0.0008) +[2026-06-07 02:52:34,618][472573] Updated weights for policy 0, policy_version 15574 (0.0008) +[2026-06-07 02:52:34,725][472573] Updated weights for policy 0, policy_version 15584 (0.0008) +[2026-06-07 02:52:34,839][472573] Updated weights for policy 0, policy_version 15594 (0.0008) +[2026-06-07 02:52:34,951][472573] Updated weights for policy 0, policy_version 15604 (0.0008) +[2026-06-07 02:52:35,072][472573] Updated weights for policy 0, policy_version 15615 (0.0008) +[2026-06-07 02:52:35,626][472573] Updated weights for policy 0, policy_version 15625 (0.0008) +[2026-06-07 02:52:35,738][472573] Updated weights for policy 0, policy_version 15635 (0.0008) +[2026-06-07 02:52:35,853][472573] Updated weights for policy 0, policy_version 15645 (0.0008) +[2026-06-07 02:52:35,959][472573] Updated weights for policy 0, policy_version 15655 (0.0008) +[2026-06-07 02:52:36,075][472573] Updated weights for policy 0, policy_version 15665 (0.0008) +[2026-06-07 02:52:36,219][472573] Updated weights for policy 0, policy_version 15678 (0.0008) +[2026-06-07 02:52:36,762][472573] Updated weights for policy 0, policy_version 15688 (0.0008) +[2026-06-07 02:52:36,879][472573] Updated weights for policy 0, policy_version 15698 (0.0008) +[2026-06-07 02:52:37,001][472573] Updated weights for policy 0, policy_version 15709 (0.0008) +[2026-06-07 02:52:37,137][472573] Updated weights for policy 0, policy_version 15721 (0.0008) +[2026-06-07 02:52:37,265][472573] Updated weights for policy 0, policy_version 15733 (0.0008) +[2026-06-07 02:52:37,380][472573] Updated weights for policy 0, policy_version 15743 (0.0008) +[2026-06-07 02:52:37,921][472573] Updated weights for policy 0, policy_version 15753 (0.0008) +[2026-06-07 02:52:38,031][472573] Updated weights for policy 0, policy_version 15763 (0.0009) +[2026-06-07 02:52:38,103][464932] Fps is (10 sec: 29490.9, 60 sec: 28398.9, 300 sec: 27325.2). Total num frames: 8060928. Throughput: 0: 28384.7. Samples: 8029568. Policy #0 lag: (min: 63.0, avg: 74.3, max: 127.0) +[2026-06-07 02:52:38,104][464932] Avg episode reward: [(0, '507.621')] +[2026-06-07 02:52:38,165][472573] Updated weights for policy 0, policy_version 15775 (0.0008) +[2026-06-07 02:52:38,283][472573] Updated weights for policy 0, policy_version 15785 (0.0008) +[2026-06-07 02:52:38,415][472573] Updated weights for policy 0, policy_version 15797 (0.0009) +[2026-06-07 02:52:38,543][472028] Saving new best policy, reward=507.621! +[2026-06-07 02:52:38,547][472573] Updated weights for policy 0, policy_version 15808 (0.0011) +[2026-06-07 02:52:39,100][472573] Updated weights for policy 0, policy_version 15819 (0.0008) +[2026-06-07 02:52:39,221][472573] Updated weights for policy 0, policy_version 15830 (0.0008) +[2026-06-07 02:52:39,325][472573] Updated weights for policy 0, policy_version 15840 (0.0008) +[2026-06-07 02:52:39,440][472573] Updated weights for policy 0, policy_version 15850 (0.0008) +[2026-06-07 02:52:39,556][472573] Updated weights for policy 0, policy_version 15861 (0.0009) +[2026-06-07 02:52:40,155][472573] Updated weights for policy 0, policy_version 15873 (0.0008) +[2026-06-07 02:52:40,274][472573] Updated weights for policy 0, policy_version 15884 (0.0008) +[2026-06-07 02:52:40,379][472573] Updated weights for policy 0, policy_version 15894 (0.0009) +[2026-06-07 02:52:40,519][472573] Updated weights for policy 0, policy_version 15907 (0.0008) +[2026-06-07 02:52:40,630][472573] Updated weights for policy 0, policy_version 15917 (0.0008) +[2026-06-07 02:52:40,762][472573] Updated weights for policy 0, policy_version 15929 (0.0009) +[2026-06-07 02:52:41,335][472573] Updated weights for policy 0, policy_version 15940 (0.0008) +[2026-06-07 02:52:41,479][472573] Updated weights for policy 0, policy_version 15953 (0.0009) +[2026-06-07 02:52:41,591][472573] Updated weights for policy 0, policy_version 15963 (0.0009) +[2026-06-07 02:52:41,703][472573] Updated weights for policy 0, policy_version 15973 (0.0008) +[2026-06-07 02:52:41,843][472573] Updated weights for policy 0, policy_version 15986 (0.0008) +[2026-06-07 02:52:41,968][472573] Updated weights for policy 0, policy_version 15997 (0.0009) +[2026-06-07 02:52:42,549][472573] Updated weights for policy 0, policy_version 16009 (0.0008) +[2026-06-07 02:52:42,667][472573] Updated weights for policy 0, policy_version 16020 (0.0008) +[2026-06-07 02:52:42,778][472573] Updated weights for policy 0, policy_version 16030 (0.0009) +[2026-06-07 02:52:42,896][472573] Updated weights for policy 0, policy_version 16041 (0.0008) +[2026-06-07 02:52:43,038][472573] Updated weights for policy 0, policy_version 16054 (0.0008) +[2026-06-07 02:52:43,103][464932] Fps is (10 sec: 26214.5, 60 sec: 28399.0, 300 sec: 27769.5). Total num frames: 8192000. Throughput: 0: 28362.1. Samples: 8196480. Policy #0 lag: (min: 63.0, avg: 74.3, max: 127.0) +[2026-06-07 02:52:43,104][464932] Avg episode reward: [(0, '492.161')] +[2026-06-07 02:52:43,605][472573] Updated weights for policy 0, policy_version 16065 (0.0009) +[2026-06-07 02:52:43,730][472573] Updated weights for policy 0, policy_version 16076 (0.0008) +[2026-06-07 02:52:43,869][472573] Updated weights for policy 0, policy_version 16089 (0.0008) +[2026-06-07 02:52:43,991][472573] Updated weights for policy 0, policy_version 16100 (0.0008) +[2026-06-07 02:52:44,105][472573] Updated weights for policy 0, policy_version 16110 (0.0008) +[2026-06-07 02:52:44,231][472573] Updated weights for policy 0, policy_version 16121 (0.0008) +[2026-06-07 02:52:44,798][472573] Updated weights for policy 0, policy_version 16131 (0.0008) +[2026-06-07 02:52:44,926][472573] Updated weights for policy 0, policy_version 16142 (0.0008) +[2026-06-07 02:52:45,044][472573] Updated weights for policy 0, policy_version 16153 (0.0008) +[2026-06-07 02:52:45,157][472573] Updated weights for policy 0, policy_version 16163 (0.0008) +[2026-06-07 02:52:45,267][472573] Updated weights for policy 0, policy_version 16173 (0.0008) +[2026-06-07 02:52:45,399][472573] Updated weights for policy 0, policy_version 16185 (0.0008) +[2026-06-07 02:52:45,964][472573] Updated weights for policy 0, policy_version 16196 (0.0007) +[2026-06-07 02:52:46,091][472573] Updated weights for policy 0, policy_version 16208 (0.0008) +[2026-06-07 02:52:46,218][472573] Updated weights for policy 0, policy_version 16219 (0.0008) +[2026-06-07 02:52:46,377][472573] Updated weights for policy 0, policy_version 16233 (0.0008) +[2026-06-07 02:52:46,497][472573] Updated weights for policy 0, policy_version 16244 (0.0008) +[2026-06-07 02:52:46,615][472573] Updated weights for policy 0, policy_version 16254 (0.0008) +[2026-06-07 02:52:47,171][472573] Updated weights for policy 0, policy_version 16265 (0.0008) +[2026-06-07 02:52:47,277][472573] Updated weights for policy 0, policy_version 16275 (0.0008) +[2026-06-07 02:52:47,399][472573] Updated weights for policy 0, policy_version 16286 (0.0008) +[2026-06-07 02:52:47,515][472573] Updated weights for policy 0, policy_version 16296 (0.0009) +[2026-06-07 02:52:47,627][472573] Updated weights for policy 0, policy_version 16306 (0.0008) +[2026-06-07 02:52:47,748][472573] Updated weights for policy 0, policy_version 16317 (0.0008) +[2026-06-07 02:52:48,103][464932] Fps is (10 sec: 29491.5, 60 sec: 28398.9, 300 sec: 27769.5). Total num frames: 8355840. Throughput: 0: 28327.9. Samples: 8363904. Policy #0 lag: (min: 38.0, avg: 50.4, max: 102.0) +[2026-06-07 02:52:48,104][464932] Avg episode reward: [(0, '455.894')] +[2026-06-07 02:52:48,333][472573] Updated weights for policy 0, policy_version 16328 (0.0008) +[2026-06-07 02:52:48,440][472573] Updated weights for policy 0, policy_version 16338 (0.0009) +[2026-06-07 02:52:48,578][472573] Updated weights for policy 0, policy_version 16350 (0.0008) +[2026-06-07 02:52:48,687][472573] Updated weights for policy 0, policy_version 16360 (0.0008) +[2026-06-07 02:52:48,819][472573] Updated weights for policy 0, policy_version 16372 (0.0009) +[2026-06-07 02:52:48,936][472573] Updated weights for policy 0, policy_version 16382 (0.0009) +[2026-06-07 02:52:49,478][472573] Updated weights for policy 0, policy_version 16392 (0.0008) +[2026-06-07 02:52:49,612][472573] Updated weights for policy 0, policy_version 16404 (0.0008) +[2026-06-07 02:52:49,724][472573] Updated weights for policy 0, policy_version 16414 (0.0008) +[2026-06-07 02:52:49,843][472573] Updated weights for policy 0, policy_version 16425 (0.0009) +[2026-06-07 02:52:49,966][472573] Updated weights for policy 0, policy_version 16436 (0.0009) +[2026-06-07 02:52:50,082][472573] Updated weights for policy 0, policy_version 16447 (0.0009) +[2026-06-07 02:52:50,631][472573] Updated weights for policy 0, policy_version 16457 (0.0008) +[2026-06-07 02:52:50,762][472573] Updated weights for policy 0, policy_version 16469 (0.0009) +[2026-06-07 02:52:50,874][472573] Updated weights for policy 0, policy_version 16479 (0.0009) +[2026-06-07 02:52:51,001][472573] Updated weights for policy 0, policy_version 16491 (0.0008) +[2026-06-07 02:52:51,122][472573] Updated weights for policy 0, policy_version 16502 (0.0009) +[2026-06-07 02:52:51,689][472573] Updated weights for policy 0, policy_version 16513 (0.0008) +[2026-06-07 02:52:51,802][472573] Updated weights for policy 0, policy_version 16523 (0.0008) +[2026-06-07 02:52:51,928][472573] Updated weights for policy 0, policy_version 16535 (0.0008) +[2026-06-07 02:52:52,046][472573] Updated weights for policy 0, policy_version 16546 (0.0008) +[2026-06-07 02:52:52,172][472573] Updated weights for policy 0, policy_version 16557 (0.0008) +[2026-06-07 02:52:52,287][472573] Updated weights for policy 0, policy_version 16567 (0.0009) +[2026-06-07 02:52:52,874][472573] Updated weights for policy 0, policy_version 16578 (0.0008) +[2026-06-07 02:52:53,009][472573] Updated weights for policy 0, policy_version 16590 (0.0008) +[2026-06-07 02:52:53,103][464932] Fps is (10 sec: 29490.9, 60 sec: 28398.9, 300 sec: 27658.4). Total num frames: 8486912. Throughput: 0: 28379.0. Samples: 8454912. Policy #0 lag: (min: 38.0, avg: 50.4, max: 102.0) +[2026-06-07 02:52:53,104][464932] Avg episode reward: [(0, '451.142')] +[2026-06-07 02:52:53,120][472573] Updated weights for policy 0, policy_version 16600 (0.0008) +[2026-06-07 02:52:53,236][472573] Updated weights for policy 0, policy_version 16611 (0.0008) +[2026-06-07 02:52:53,350][472573] Updated weights for policy 0, policy_version 16621 (0.0008) +[2026-06-07 02:52:53,471][472573] Updated weights for policy 0, policy_version 16632 (0.0008) +[2026-06-07 02:52:54,044][472573] Updated weights for policy 0, policy_version 16642 (0.0007) +[2026-06-07 02:52:54,154][472573] Updated weights for policy 0, policy_version 16652 (0.0007) +[2026-06-07 02:52:54,268][472573] Updated weights for policy 0, policy_version 16663 (0.0009) +[2026-06-07 02:52:54,382][472573] Updated weights for policy 0, policy_version 16673 (0.0008) +[2026-06-07 02:52:54,501][472573] Updated weights for policy 0, policy_version 16684 (0.0008) +[2026-06-07 02:52:54,633][472573] Updated weights for policy 0, policy_version 16696 (0.0008) +[2026-06-07 02:52:55,180][472573] Updated weights for policy 0, policy_version 16706 (0.0008) +[2026-06-07 02:52:55,290][472573] Updated weights for policy 0, policy_version 16716 (0.0007) +[2026-06-07 02:52:55,421][472573] Updated weights for policy 0, policy_version 16727 (0.0008) +[2026-06-07 02:52:55,526][472573] Updated weights for policy 0, policy_version 16737 (0.0008) +[2026-06-07 02:52:55,644][472573] Updated weights for policy 0, policy_version 16748 (0.0008) +[2026-06-07 02:52:55,752][472573] Updated weights for policy 0, policy_version 16758 (0.0008) +[2026-06-07 02:52:55,863][472573] Updated weights for policy 0, policy_version 16768 (0.0008) +[2026-06-07 02:52:56,440][472573] Updated weights for policy 0, policy_version 16780 (0.0007) +[2026-06-07 02:52:56,568][472573] Updated weights for policy 0, policy_version 16791 (0.0008) +[2026-06-07 02:52:56,699][472573] Updated weights for policy 0, policy_version 16802 (0.0009) +[2026-06-07 02:52:56,813][472573] Updated weights for policy 0, policy_version 16813 (0.0008) +[2026-06-07 02:52:56,946][472573] Updated weights for policy 0, policy_version 16825 (0.0008) +[2026-06-07 02:52:57,522][472573] Updated weights for policy 0, policy_version 16835 (0.0008) +[2026-06-07 02:52:57,629][472573] Updated weights for policy 0, policy_version 16845 (0.0008) +[2026-06-07 02:52:57,750][472573] Updated weights for policy 0, policy_version 16856 (0.0008) +[2026-06-07 02:52:57,873][472573] Updated weights for policy 0, policy_version 16867 (0.0008) +[2026-06-07 02:52:57,998][472573] Updated weights for policy 0, policy_version 16878 (0.0008) +[2026-06-07 02:52:58,103][464932] Fps is (10 sec: 26214.2, 60 sec: 28399.0, 300 sec: 27658.4). Total num frames: 8617984. Throughput: 0: 28347.7. Samples: 8623232. Policy #0 lag: (min: 38.0, avg: 50.4, max: 102.0) +[2026-06-07 02:52:58,104][464932] Avg episode reward: [(0, '479.202')] +[2026-06-07 02:52:58,105][472573] Updated weights for policy 0, policy_version 16888 (0.0008) +[2026-06-07 02:52:58,688][472573] Updated weights for policy 0, policy_version 16899 (0.0008) +[2026-06-07 02:52:58,811][472573] Updated weights for policy 0, policy_version 16910 (0.0008) +[2026-06-07 02:52:58,941][472573] Updated weights for policy 0, policy_version 16922 (0.0008) +[2026-06-07 02:52:59,070][472573] Updated weights for policy 0, policy_version 16933 (0.0008) +[2026-06-07 02:52:59,181][472573] Updated weights for policy 0, policy_version 16943 (0.0009) +[2026-06-07 02:52:59,308][472573] Updated weights for policy 0, policy_version 16954 (0.0009) +[2026-06-07 02:52:59,849][472573] Updated weights for policy 0, policy_version 16964 (0.0008) +[2026-06-07 02:52:59,984][472573] Updated weights for policy 0, policy_version 16976 (0.0011) +[2026-06-07 02:53:00,105][472573] Updated weights for policy 0, policy_version 16987 (0.0008) +[2026-06-07 02:53:00,216][472573] Updated weights for policy 0, policy_version 16997 (0.0008) +[2026-06-07 02:53:00,352][472573] Updated weights for policy 0, policy_version 17009 (0.0008) +[2026-06-07 02:53:00,477][472573] Updated weights for policy 0, policy_version 17020 (0.0008) +[2026-06-07 02:53:01,033][472573] Updated weights for policy 0, policy_version 17030 (0.0008) +[2026-06-07 02:53:01,143][472573] Updated weights for policy 0, policy_version 17040 (0.0009) +[2026-06-07 02:53:01,252][472573] Updated weights for policy 0, policy_version 17050 (0.0008) +[2026-06-07 02:53:01,397][472573] Updated weights for policy 0, policy_version 17063 (0.0009) +[2026-06-07 02:53:01,514][472573] Updated weights for policy 0, policy_version 17073 (0.0008) +[2026-06-07 02:53:01,648][472573] Updated weights for policy 0, policy_version 17085 (0.0008) +[2026-06-07 02:53:02,198][472573] Updated weights for policy 0, policy_version 17096 (0.0008) +[2026-06-07 02:53:02,322][472573] Updated weights for policy 0, policy_version 17107 (0.0008) +[2026-06-07 02:53:02,429][472573] Updated weights for policy 0, policy_version 17117 (0.0009) +[2026-06-07 02:53:02,554][472573] Updated weights for policy 0, policy_version 17128 (0.0006) +[2026-06-07 02:53:02,676][472573] Updated weights for policy 0, policy_version 17139 (0.0005) +[2026-06-07 02:53:02,792][472573] Updated weights for policy 0, policy_version 17149 (0.0008) +[2026-06-07 02:53:03,103][464932] Fps is (10 sec: 29491.1, 60 sec: 28398.9, 300 sec: 27769.5). Total num frames: 8781824. Throughput: 0: 28339.2. Samples: 8790016. Policy #0 lag: (min: 63.0, avg: 74.4, max: 127.0) +[2026-06-07 02:53:03,104][464932] Avg episode reward: [(0, '479.337')] +[2026-06-07 02:53:03,336][472573] Updated weights for policy 0, policy_version 17159 (0.0008) +[2026-06-07 02:53:03,445][472573] Updated weights for policy 0, policy_version 17169 (0.0010) +[2026-06-07 02:53:03,560][472573] Updated weights for policy 0, policy_version 17179 (0.0008) +[2026-06-07 02:53:03,683][472573] Updated weights for policy 0, policy_version 17191 (0.0008) +[2026-06-07 02:53:03,820][472573] Updated weights for policy 0, policy_version 17203 (0.0009) +[2026-06-07 02:53:03,944][472573] Updated weights for policy 0, policy_version 17214 (0.0009) +[2026-06-07 02:53:04,524][472573] Updated weights for policy 0, policy_version 17226 (0.0007) +[2026-06-07 02:53:04,645][472573] Updated weights for policy 0, policy_version 17237 (0.0008) +[2026-06-07 02:53:04,782][472573] Updated weights for policy 0, policy_version 17250 (0.0008) +[2026-06-07 02:53:04,897][472573] Updated weights for policy 0, policy_version 17260 (0.0008) +[2026-06-07 02:53:05,014][472573] Updated weights for policy 0, policy_version 17270 (0.0008) +[2026-06-07 02:53:05,586][472573] Updated weights for policy 0, policy_version 17281 (0.0008) +[2026-06-07 02:53:05,706][472573] Updated weights for policy 0, policy_version 17291 (0.0009) +[2026-06-07 02:53:05,821][472573] Updated weights for policy 0, policy_version 17301 (0.0008) +[2026-06-07 02:53:05,939][472573] Updated weights for policy 0, policy_version 17312 (0.0008) +[2026-06-07 02:53:06,085][472573] Updated weights for policy 0, policy_version 17325 (0.0009) +[2026-06-07 02:53:06,195][472573] Updated weights for policy 0, policy_version 17335 (0.0008) +[2026-06-07 02:53:06,741][472573] Updated weights for policy 0, policy_version 17345 (0.0009) +[2026-06-07 02:53:06,861][472573] Updated weights for policy 0, policy_version 17356 (0.0008) +[2026-06-07 02:53:06,981][472573] Updated weights for policy 0, policy_version 17367 (0.0009) +[2026-06-07 02:53:07,126][472573] Updated weights for policy 0, policy_version 17380 (0.0008) +[2026-06-07 02:53:07,244][472573] Updated weights for policy 0, policy_version 17390 (0.0008) +[2026-06-07 02:53:07,362][472573] Updated weights for policy 0, policy_version 17401 (0.0009) +[2026-06-07 02:53:07,902][472573] Updated weights for policy 0, policy_version 17411 (0.0008) +[2026-06-07 02:53:08,027][472573] Updated weights for policy 0, policy_version 17422 (0.0008) +[2026-06-07 02:53:08,103][464932] Fps is (10 sec: 29491.4, 60 sec: 28399.0, 300 sec: 27769.6). Total num frames: 8912896. Throughput: 0: 28367.7. Samples: 8881024. Policy #0 lag: (min: 63.0, avg: 74.4, max: 127.0) +[2026-06-07 02:53:08,104][464932] Avg episode reward: [(0, '513.220')] +[2026-06-07 02:53:08,149][472573] Updated weights for policy 0, policy_version 17433 (0.0008) +[2026-06-07 02:53:08,264][472573] Updated weights for policy 0, policy_version 17443 (0.0008) +[2026-06-07 02:53:08,384][472573] Updated weights for policy 0, policy_version 17454 (0.0008) +[2026-06-07 02:53:08,504][472573] Updated weights for policy 0, policy_version 17465 (0.0008) +[2026-06-07 02:53:08,578][472028] Saving new best policy, reward=513.220! +[2026-06-07 02:53:09,078][472573] Updated weights for policy 0, policy_version 17477 (0.0008) +[2026-06-07 02:53:09,222][472573] Updated weights for policy 0, policy_version 17491 (0.0009) +[2026-06-07 02:53:09,367][472573] Updated weights for policy 0, policy_version 17505 (0.0008) +[2026-06-07 02:53:09,515][472573] Updated weights for policy 0, policy_version 17519 (0.0009) +[2026-06-07 02:53:09,678][472573] Updated weights for policy 0, policy_version 17534 (0.0008) +[2026-06-07 02:53:10,270][472573] Updated weights for policy 0, policy_version 17545 (0.0008) +[2026-06-07 02:53:10,420][472573] Updated weights for policy 0, policy_version 17560 (0.0008) +[2026-06-07 02:53:10,541][472573] Updated weights for policy 0, policy_version 17571 (0.0008) +[2026-06-07 02:53:10,647][472573] Updated weights for policy 0, policy_version 17581 (0.0009) +[2026-06-07 02:53:10,775][472573] Updated weights for policy 0, policy_version 17592 (0.0009) +[2026-06-07 02:53:11,338][472573] Updated weights for policy 0, policy_version 17602 (0.0008) +[2026-06-07 02:53:11,473][472573] Updated weights for policy 0, policy_version 17614 (0.0008) +[2026-06-07 02:53:11,584][472573] Updated weights for policy 0, policy_version 17625 (0.0008) +[2026-06-07 02:53:11,728][472573] Updated weights for policy 0, policy_version 17638 (0.0008) +[2026-06-07 02:53:11,853][472573] Updated weights for policy 0, policy_version 17649 (0.0008) +[2026-06-07 02:53:11,976][472573] Updated weights for policy 0, policy_version 17660 (0.0008) +[2026-06-07 02:53:12,556][472573] Updated weights for policy 0, policy_version 17671 (0.0008) +[2026-06-07 02:53:12,678][472573] Updated weights for policy 0, policy_version 17682 (0.0008) +[2026-06-07 02:53:12,804][472573] Updated weights for policy 0, policy_version 17693 (0.0008) +[2026-06-07 02:53:12,914][472573] Updated weights for policy 0, policy_version 17703 (0.0008) +[2026-06-07 02:53:13,047][472573] Updated weights for policy 0, policy_version 17715 (0.0008) +[2026-06-07 02:53:13,103][464932] Fps is (10 sec: 26214.5, 60 sec: 28399.0, 300 sec: 27769.5). Total num frames: 9043968. Throughput: 0: 28387.5. Samples: 9047808. Policy #0 lag: (min: 63.0, avg: 74.4, max: 127.0) +[2026-06-07 02:53:13,104][464932] Avg episode reward: [(0, '513.421')] +[2026-06-07 02:53:13,163][472573] Updated weights for policy 0, policy_version 17725 (0.0008) +[2026-06-07 02:53:13,203][472028] Saving new best policy, reward=513.421! +[2026-06-07 02:53:13,710][472573] Updated weights for policy 0, policy_version 17735 (0.0007) +[2026-06-07 02:53:13,829][472573] Updated weights for policy 0, policy_version 17746 (0.0008) +[2026-06-07 02:53:13,940][472573] Updated weights for policy 0, policy_version 17756 (0.0008) +[2026-06-07 02:53:14,048][472573] Updated weights for policy 0, policy_version 17766 (0.0008) +[2026-06-07 02:53:14,169][472573] Updated weights for policy 0, policy_version 17776 (0.0008) +[2026-06-07 02:53:14,281][472573] Updated weights for policy 0, policy_version 17786 (0.0009) +[2026-06-07 02:53:14,851][472573] Updated weights for policy 0, policy_version 17798 (0.0008) +[2026-06-07 02:53:14,962][472573] Updated weights for policy 0, policy_version 17808 (0.0009) +[2026-06-07 02:53:15,076][472573] Updated weights for policy 0, policy_version 17818 (0.0008) +[2026-06-07 02:53:15,203][472573] Updated weights for policy 0, policy_version 17829 (0.0008) +[2026-06-07 02:53:15,325][472573] Updated weights for policy 0, policy_version 17840 (0.0008) +[2026-06-07 02:53:15,437][472573] Updated weights for policy 0, policy_version 17850 (0.0008) +[2026-06-07 02:53:15,962][472573] Updated weights for policy 0, policy_version 17860 (0.0007) +[2026-06-07 02:53:16,086][472573] Updated weights for policy 0, policy_version 17871 (0.0008) +[2026-06-07 02:53:16,203][472573] Updated weights for policy 0, policy_version 17882 (0.0008) +[2026-06-07 02:53:16,318][472573] Updated weights for policy 0, policy_version 17892 (0.0008) +[2026-06-07 02:53:16,431][472573] Updated weights for policy 0, policy_version 17902 (0.0008) +[2026-06-07 02:53:16,559][472573] Updated weights for policy 0, policy_version 17913 (0.0008) +[2026-06-07 02:53:17,097][472573] Updated weights for policy 0, policy_version 17923 (0.0007) +[2026-06-07 02:53:17,229][472573] Updated weights for policy 0, policy_version 17935 (0.0004) +[2026-06-07 02:53:17,360][472573] Updated weights for policy 0, policy_version 17947 (0.0008) +[2026-06-07 02:53:17,485][472573] Updated weights for policy 0, policy_version 17958 (0.0008) +[2026-06-07 02:53:17,599][472573] Updated weights for policy 0, policy_version 17968 (0.0008) +[2026-06-07 02:53:17,719][472573] Updated weights for policy 0, policy_version 17979 (0.0008) +[2026-06-07 02:53:18,103][464932] Fps is (10 sec: 29491.0, 60 sec: 28398.9, 300 sec: 27991.7). Total num frames: 9207808. Throughput: 0: 28407.4. Samples: 9215872. Policy #0 lag: (min: 63.0, avg: 74.2, max: 127.0) +[2026-06-07 02:53:18,105][464932] Avg episode reward: [(0, '518.574')] +[2026-06-07 02:53:18,109][472028] Saving new best policy, reward=518.574! +[2026-06-07 02:53:18,288][472573] Updated weights for policy 0, policy_version 17990 (0.0008) +[2026-06-07 02:53:18,403][472573] Updated weights for policy 0, policy_version 18000 (0.0008) +[2026-06-07 02:53:18,525][472573] Updated weights for policy 0, policy_version 18011 (0.0008) +[2026-06-07 02:53:18,638][472573] Updated weights for policy 0, policy_version 18021 (0.0008) +[2026-06-07 02:53:18,748][472573] Updated weights for policy 0, policy_version 18031 (0.0008) +[2026-06-07 02:53:18,858][472573] Updated weights for policy 0, policy_version 18041 (0.0008) +[2026-06-07 02:53:19,413][472573] Updated weights for policy 0, policy_version 18052 (0.0009) +[2026-06-07 02:53:19,545][472573] Updated weights for policy 0, policy_version 18064 (0.0008) +[2026-06-07 02:53:19,672][472573] Updated weights for policy 0, policy_version 18076 (0.0008) +[2026-06-07 02:53:19,797][472573] Updated weights for policy 0, policy_version 18087 (0.0008) +[2026-06-07 02:53:19,926][472573] Updated weights for policy 0, policy_version 18098 (0.0008) +[2026-06-07 02:53:20,075][472573] Updated weights for policy 0, policy_version 18112 (0.0009) +[2026-06-07 02:53:20,648][472573] Updated weights for policy 0, policy_version 18123 (0.0008) +[2026-06-07 02:53:20,784][472573] Updated weights for policy 0, policy_version 18135 (0.0008) +[2026-06-07 02:53:20,941][472573] Updated weights for policy 0, policy_version 18149 (0.0008) +[2026-06-07 02:53:21,049][472573] Updated weights for policy 0, policy_version 18159 (0.0008) +[2026-06-07 02:53:21,165][472573] Updated weights for policy 0, policy_version 18169 (0.0008) +[2026-06-07 02:53:21,730][472573] Updated weights for policy 0, policy_version 18181 (0.0007) +[2026-06-07 02:53:21,851][472573] Updated weights for policy 0, policy_version 18192 (0.0008) +[2026-06-07 02:53:21,970][472573] Updated weights for policy 0, policy_version 18203 (0.0008) +[2026-06-07 02:53:22,084][472573] Updated weights for policy 0, policy_version 18213 (0.0008) +[2026-06-07 02:53:22,208][472573] Updated weights for policy 0, policy_version 18224 (0.0008) +[2026-06-07 02:53:22,320][472573] Updated weights for policy 0, policy_version 18234 (0.0008) +[2026-06-07 02:53:22,875][472573] Updated weights for policy 0, policy_version 18245 (0.0008) +[2026-06-07 02:53:22,984][472573] Updated weights for policy 0, policy_version 18255 (0.0008) +[2026-06-07 02:53:23,103][472573] Updated weights for policy 0, policy_version 18266 (0.0008) +[2026-06-07 02:53:23,103][464932] Fps is (10 sec: 29491.3, 60 sec: 28398.9, 300 sec: 27991.6). Total num frames: 9338880. Throughput: 0: 28396.1. Samples: 9307392. Policy #0 lag: (min: 63.0, avg: 74.2, max: 127.0) +[2026-06-07 02:53:23,104][464932] Avg episode reward: [(0, '576.281')] +[2026-06-07 02:53:23,211][472573] Updated weights for policy 0, policy_version 18276 (0.0008) +[2026-06-07 02:53:23,333][472573] Updated weights for policy 0, policy_version 18287 (0.0009) +[2026-06-07 02:53:23,473][472573] Updated weights for policy 0, policy_version 18300 (0.0008) +[2026-06-07 02:53:23,513][472028] Saving new best policy, reward=576.281! +[2026-06-07 02:53:24,040][472573] Updated weights for policy 0, policy_version 18310 (0.0009) +[2026-06-07 02:53:24,147][472573] Updated weights for policy 0, policy_version 18320 (0.0009) +[2026-06-07 02:53:24,273][472573] Updated weights for policy 0, policy_version 18332 (0.0008) +[2026-06-07 02:53:24,396][472573] Updated weights for policy 0, policy_version 18344 (0.0008) +[2026-06-07 02:53:24,531][472573] Updated weights for policy 0, policy_version 18356 (0.0008) +[2026-06-07 02:53:24,658][472573] Updated weights for policy 0, policy_version 18367 (0.0009) +[2026-06-07 02:53:25,250][472573] Updated weights for policy 0, policy_version 18380 (0.0008) +[2026-06-07 02:53:25,366][472573] Updated weights for policy 0, policy_version 18391 (0.0008) +[2026-06-07 02:53:25,486][472573] Updated weights for policy 0, policy_version 18402 (0.0008) +[2026-06-07 02:53:25,595][472573] Updated weights for policy 0, policy_version 18412 (0.0008) +[2026-06-07 02:53:25,729][472573] Updated weights for policy 0, policy_version 18423 (0.0008) +[2026-06-07 02:53:26,277][472573] Updated weights for policy 0, policy_version 18433 (0.0008) +[2026-06-07 02:53:26,388][472573] Updated weights for policy 0, policy_version 18443 (0.0008) +[2026-06-07 02:53:26,520][472573] Updated weights for policy 0, policy_version 18455 (0.0008) +[2026-06-07 02:53:26,633][472573] Updated weights for policy 0, policy_version 18465 (0.0008) +[2026-06-07 02:53:26,761][472573] Updated weights for policy 0, policy_version 18476 (0.0008) +[2026-06-07 02:53:26,885][472573] Updated weights for policy 0, policy_version 18487 (0.0008) +[2026-06-07 02:53:27,449][472573] Updated weights for policy 0, policy_version 18497 (0.0008) +[2026-06-07 02:53:27,594][472573] Updated weights for policy 0, policy_version 18510 (0.0007) +[2026-06-07 02:53:27,730][472573] Updated weights for policy 0, policy_version 18523 (0.0008) +[2026-06-07 02:53:27,860][472573] Updated weights for policy 0, policy_version 18535 (0.0009) +[2026-06-07 02:53:27,984][472573] Updated weights for policy 0, policy_version 18545 (0.0008) +[2026-06-07 02:53:28,103][464932] Fps is (10 sec: 26214.2, 60 sec: 28398.9, 300 sec: 27991.6). Total num frames: 9469952. Throughput: 0: 28413.0. Samples: 9475072. Policy #0 lag: (min: 63.0, avg: 75.0, max: 127.0) +[2026-06-07 02:53:28,104][464932] Avg episode reward: [(0, '579.924')] +[2026-06-07 02:53:28,107][472573] Updated weights for policy 0, policy_version 18555 (0.0008) +[2026-06-07 02:53:28,154][472028] Saving new best policy, reward=579.924! +[2026-06-07 02:53:28,659][472573] Updated weights for policy 0, policy_version 18565 (0.0008) +[2026-06-07 02:53:28,766][472573] Updated weights for policy 0, policy_version 18575 (0.0008) +[2026-06-07 02:53:28,878][472573] Updated weights for policy 0, policy_version 18585 (0.0008) +[2026-06-07 02:53:28,998][472573] Updated weights for policy 0, policy_version 18596 (0.0008) +[2026-06-07 02:53:29,144][472573] Updated weights for policy 0, policy_version 18609 (0.0008) +[2026-06-07 02:53:29,265][472573] Updated weights for policy 0, policy_version 18620 (0.0008) +[2026-06-07 02:53:29,833][472573] Updated weights for policy 0, policy_version 18631 (0.0005) +[2026-06-07 02:53:29,949][472573] Updated weights for policy 0, policy_version 18641 (0.0006) +[2026-06-07 02:53:30,075][472573] Updated weights for policy 0, policy_version 18653 (0.0008) +[2026-06-07 02:53:30,195][472573] Updated weights for policy 0, policy_version 18663 (0.0008) +[2026-06-07 02:53:30,312][472573] Updated weights for policy 0, policy_version 18673 (0.0008) +[2026-06-07 02:53:30,433][472573] Updated weights for policy 0, policy_version 18684 (0.0008) +[2026-06-07 02:53:30,985][472573] Updated weights for policy 0, policy_version 18695 (0.0009) +[2026-06-07 02:53:31,099][472573] Updated weights for policy 0, policy_version 18705 (0.0008) +[2026-06-07 02:53:31,210][472573] Updated weights for policy 0, policy_version 18715 (0.0008) +[2026-06-07 02:53:31,330][472573] Updated weights for policy 0, policy_version 18726 (0.0008) +[2026-06-07 02:53:31,462][472573] Updated weights for policy 0, policy_version 18737 (0.0008) +[2026-06-07 02:53:31,597][472573] Updated weights for policy 0, policy_version 18749 (0.0008) +[2026-06-07 02:53:32,177][472573] Updated weights for policy 0, policy_version 18762 (0.0008) +[2026-06-07 02:53:32,288][472573] Updated weights for policy 0, policy_version 18772 (0.0008) +[2026-06-07 02:53:32,408][472573] Updated weights for policy 0, policy_version 18782 (0.0008) +[2026-06-07 02:53:32,559][472573] Updated weights for policy 0, policy_version 18796 (0.0008) +[2026-06-07 02:53:32,681][472573] Updated weights for policy 0, policy_version 18807 (0.0008) +[2026-06-07 02:53:33,103][464932] Fps is (10 sec: 29491.2, 60 sec: 28398.9, 300 sec: 28102.8). Total num frames: 9633792. Throughput: 0: 28413.1. Samples: 9642496. Policy #0 lag: (min: 63.0, avg: 75.0, max: 127.0) +[2026-06-07 02:53:33,104][464932] Avg episode reward: [(0, '651.208')] +[2026-06-07 02:53:33,109][472028] Saving new best policy, reward=651.208! +[2026-06-07 02:53:33,232][472573] Updated weights for policy 0, policy_version 18818 (0.0008) +[2026-06-07 02:53:33,346][472573] Updated weights for policy 0, policy_version 18828 (0.0007) +[2026-06-07 02:53:33,453][472573] Updated weights for policy 0, policy_version 18838 (0.0008) +[2026-06-07 02:53:33,573][472573] Updated weights for policy 0, policy_version 18849 (0.0008) +[2026-06-07 02:53:33,686][472573] Updated weights for policy 0, policy_version 18859 (0.0008) +[2026-06-07 02:53:33,792][472573] Updated weights for policy 0, policy_version 18869 (0.0008) +[2026-06-07 02:53:33,916][472573] Updated weights for policy 0, policy_version 18879 (0.0008) +[2026-06-07 02:53:34,463][472573] Updated weights for policy 0, policy_version 18889 (0.0008) +[2026-06-07 02:53:34,584][472573] Updated weights for policy 0, policy_version 18900 (0.0008) +[2026-06-07 02:53:34,693][472573] Updated weights for policy 0, policy_version 18910 (0.0008) +[2026-06-07 02:53:34,824][472573] Updated weights for policy 0, policy_version 18921 (0.0008) +[2026-06-07 02:53:34,946][472573] Updated weights for policy 0, policy_version 18932 (0.0008) +[2026-06-07 02:53:35,060][472573] Updated weights for policy 0, policy_version 18942 (0.0008) +[2026-06-07 02:53:35,609][472573] Updated weights for policy 0, policy_version 18952 (0.0008) +[2026-06-07 02:53:35,729][472573] Updated weights for policy 0, policy_version 18962 (0.0009) +[2026-06-07 02:53:35,847][472573] Updated weights for policy 0, policy_version 18973 (0.0008) +[2026-06-07 02:53:35,972][472573] Updated weights for policy 0, policy_version 18985 (0.0008) +[2026-06-07 02:53:36,102][472573] Updated weights for policy 0, policy_version 18997 (0.0008) +[2026-06-07 02:53:36,221][472573] Updated weights for policy 0, policy_version 19007 (0.0008) +[2026-06-07 02:53:36,797][472573] Updated weights for policy 0, policy_version 19020 (0.0007) +[2026-06-07 02:53:36,941][472573] Updated weights for policy 0, policy_version 19033 (0.0008) +[2026-06-07 02:53:37,084][472573] Updated weights for policy 0, policy_version 19046 (0.0008) +[2026-06-07 02:53:37,215][472573] Updated weights for policy 0, policy_version 19058 (0.0009) +[2026-06-07 02:53:37,347][472573] Updated weights for policy 0, policy_version 19070 (0.0009) +[2026-06-07 02:53:37,912][472573] Updated weights for policy 0, policy_version 19081 (0.0008) +[2026-06-07 02:53:38,031][472573] Updated weights for policy 0, policy_version 19092 (0.0008) +[2026-06-07 02:53:38,103][464932] Fps is (10 sec: 29491.5, 60 sec: 28399.0, 300 sec: 28102.7). Total num frames: 9764864. Throughput: 0: 28410.3. Samples: 9733376. Policy #0 lag: (min: 63.0, avg: 75.0, max: 127.0) +[2026-06-07 02:53:38,104][464932] Avg episode reward: [(0, '687.547')] +[2026-06-07 02:53:38,158][472573] Updated weights for policy 0, policy_version 19103 (0.0008) +[2026-06-07 02:53:38,263][472573] Updated weights for policy 0, policy_version 19113 (0.0008) +[2026-06-07 02:53:38,382][472573] Updated weights for policy 0, policy_version 19124 (0.0009) +[2026-06-07 02:53:38,508][472028] Saving new best policy, reward=687.547! +[2026-06-07 02:53:38,510][472573] Updated weights for policy 0, policy_version 19136 (0.0008) +[2026-06-07 02:53:39,081][472573] Updated weights for policy 0, policy_version 19147 (0.0008) +[2026-06-07 02:53:39,200][472573] Updated weights for policy 0, policy_version 19158 (0.0008) +[2026-06-07 02:53:39,323][472573] Updated weights for policy 0, policy_version 19169 (0.0008) +[2026-06-07 02:53:39,447][472573] Updated weights for policy 0, policy_version 19181 (0.0008) +[2026-06-07 02:53:39,584][472573] Updated weights for policy 0, policy_version 19193 (0.0008) +[2026-06-07 02:53:40,150][472573] Updated weights for policy 0, policy_version 19203 (0.0008) +[2026-06-07 02:53:40,307][472573] Updated weights for policy 0, policy_version 19218 (0.0008) +[2026-06-07 02:53:40,417][472573] Updated weights for policy 0, policy_version 19228 (0.0008) +[2026-06-07 02:53:40,529][472573] Updated weights for policy 0, policy_version 19238 (0.0009) +[2026-06-07 02:53:40,679][472573] Updated weights for policy 0, policy_version 19252 (0.0008) +[2026-06-07 02:53:41,272][472573] Updated weights for policy 0, policy_version 19265 (0.0008) +[2026-06-07 02:53:41,383][472573] Updated weights for policy 0, policy_version 19275 (0.0008) +[2026-06-07 02:53:41,533][472573] Updated weights for policy 0, policy_version 19288 (0.0008) +[2026-06-07 02:53:41,661][472573] Updated weights for policy 0, policy_version 19300 (0.0008) +[2026-06-07 02:53:41,771][472573] Updated weights for policy 0, policy_version 19310 (0.0008) +[2026-06-07 02:53:41,882][472573] Updated weights for policy 0, policy_version 19320 (0.0008) +[2026-06-07 02:53:42,428][472573] Updated weights for policy 0, policy_version 19331 (0.0007) +[2026-06-07 02:53:42,541][472573] Updated weights for policy 0, policy_version 19341 (0.0008) +[2026-06-07 02:53:42,654][472573] Updated weights for policy 0, policy_version 19351 (0.0008) +[2026-06-07 02:53:42,764][472573] Updated weights for policy 0, policy_version 19361 (0.0008) +[2026-06-07 02:53:42,876][472573] Updated weights for policy 0, policy_version 19371 (0.0008) +[2026-06-07 02:53:43,010][472573] Updated weights for policy 0, policy_version 19383 (0.0009) +[2026-06-07 02:53:43,104][464932] Fps is (10 sec: 29490.5, 60 sec: 28944.9, 300 sec: 28213.8). Total num frames: 9928704. Throughput: 0: 28418.7. Samples: 9902080. Policy #0 lag: (min: 12.0, avg: 23.1, max: 76.0) +[2026-06-07 02:53:43,105][464932] Avg episode reward: [(0, '705.098')] +[2026-06-07 02:53:43,112][472028] Saving new best policy, reward=705.098! +[2026-06-07 02:53:43,731][472573] Updated weights for policy 0, policy_version 19394 (0.0007) +[2026-06-07 02:53:43,934][472573] Updated weights for policy 0, policy_version 19414 (0.0005) +[2026-06-07 02:53:44,082][472573] Updated weights for policy 0, policy_version 19428 (0.0005) +[2026-06-07 02:53:44,216][472573] Updated weights for policy 0, policy_version 19440 (0.0004) +[2026-06-07 02:53:44,356][472573] Updated weights for policy 0, policy_version 19452 (0.0007) +[2026-06-07 02:53:44,951][472573] Updated weights for policy 0, policy_version 19462 (0.0008) +[2026-06-07 02:53:45,066][472573] Updated weights for policy 0, policy_version 19472 (0.0008) +[2026-06-07 02:53:45,180][472573] Updated weights for policy 0, policy_version 19482 (0.0008) +[2026-06-07 02:53:45,298][472573] Updated weights for policy 0, policy_version 19492 (0.0008) +[2026-06-07 02:53:45,429][472573] Updated weights for policy 0, policy_version 19503 (0.0008) +[2026-06-07 02:53:45,545][472573] Updated weights for policy 0, policy_version 19513 (0.0009) +[2026-06-07 02:53:46,071][472573] Updated weights for policy 0, policy_version 19523 (0.0008) +[2026-06-07 02:53:46,206][472573] Updated weights for policy 0, policy_version 19535 (0.0008) +[2026-06-07 02:53:46,327][472573] Updated weights for policy 0, policy_version 19545 (0.0009) +[2026-06-07 02:53:46,446][472573] Updated weights for policy 0, policy_version 19556 (0.0008) +[2026-06-07 02:53:46,568][472573] Updated weights for policy 0, policy_version 19566 (0.0008) +[2026-06-07 02:53:46,683][472573] Updated weights for policy 0, policy_version 19576 (0.0008) +[2026-06-07 02:53:47,231][472573] Updated weights for policy 0, policy_version 19586 (0.0007) +[2026-06-07 02:53:47,359][472573] Updated weights for policy 0, policy_version 19598 (0.0005) +[2026-06-07 02:53:47,487][472573] Updated weights for policy 0, policy_version 19609 (0.0008) +[2026-06-07 02:53:47,619][472573] Updated weights for policy 0, policy_version 19620 (0.0008) +[2026-06-07 02:53:47,747][472573] Updated weights for policy 0, policy_version 19631 (0.0008) +[2026-06-07 02:53:47,876][472573] Updated weights for policy 0, policy_version 19642 (0.0008) +[2026-06-07 02:53:48,103][464932] Fps is (10 sec: 29491.1, 60 sec: 28398.9, 300 sec: 28213.8). Total num frames: 10059776. Throughput: 0: 28401.8. Samples: 10068096. Policy #0 lag: (min: 12.0, avg: 23.1, max: 76.0) +[2026-06-07 02:53:48,105][464932] Avg episode reward: [(0, '711.361')] +[2026-06-07 02:53:48,112][472028] Saving new best policy, reward=711.361! +[2026-06-07 02:53:48,391][472573] Updated weights for policy 0, policy_version 19652 (0.0008) +[2026-06-07 02:53:48,510][472573] Updated weights for policy 0, policy_version 19663 (0.0008) +[2026-06-07 02:53:48,625][472573] Updated weights for policy 0, policy_version 19673 (0.0008) +[2026-06-07 02:53:48,729][472573] Updated weights for policy 0, policy_version 19683 (0.0008) +[2026-06-07 02:53:48,853][472573] Updated weights for policy 0, policy_version 19693 (0.0008) +[2026-06-07 02:53:48,972][472573] Updated weights for policy 0, policy_version 19704 (0.0010) +[2026-06-07 02:53:49,552][472573] Updated weights for policy 0, policy_version 19716 (0.0008) +[2026-06-07 02:53:49,677][472573] Updated weights for policy 0, policy_version 19727 (0.0008) +[2026-06-07 02:53:49,790][472573] Updated weights for policy 0, policy_version 19737 (0.0008) +[2026-06-07 02:53:49,900][472573] Updated weights for policy 0, policy_version 19747 (0.0008) +[2026-06-07 02:53:50,048][472573] Updated weights for policy 0, policy_version 19760 (0.0009) +[2026-06-07 02:53:50,171][472573] Updated weights for policy 0, policy_version 19771 (0.0008) +[2026-06-07 02:53:50,719][472573] Updated weights for policy 0, policy_version 19781 (0.0008) +[2026-06-07 02:53:50,838][472573] Updated weights for policy 0, policy_version 19792 (0.0009) +[2026-06-07 02:53:50,947][472573] Updated weights for policy 0, policy_version 19802 (0.0008) +[2026-06-07 02:53:51,072][472573] Updated weights for policy 0, policy_version 19813 (0.0008) +[2026-06-07 02:53:51,196][472573] Updated weights for policy 0, policy_version 19824 (0.0008) +[2026-06-07 02:53:51,311][472573] Updated weights for policy 0, policy_version 19834 (0.0008) +[2026-06-07 02:53:51,835][472573] Updated weights for policy 0, policy_version 19844 (0.0008) +[2026-06-07 02:53:51,969][472573] Updated weights for policy 0, policy_version 19856 (0.0008) +[2026-06-07 02:53:52,079][472573] Updated weights for policy 0, policy_version 19866 (0.0008) +[2026-06-07 02:53:52,190][472573] Updated weights for policy 0, policy_version 19876 (0.0008) +[2026-06-07 02:53:52,301][472573] Updated weights for policy 0, policy_version 19886 (0.0009) +[2026-06-07 02:53:52,426][472573] Updated weights for policy 0, policy_version 19897 (0.0008) +[2026-06-07 02:53:53,001][472573] Updated weights for policy 0, policy_version 19908 (0.0008) +[2026-06-07 02:53:53,103][464932] Fps is (10 sec: 26215.1, 60 sec: 28399.0, 300 sec: 28213.8). Total num frames: 10190848. Throughput: 0: 28313.6. Samples: 10155136. Policy #0 lag: (min: 12.0, avg: 23.1, max: 76.0) +[2026-06-07 02:53:53,104][464932] Avg episode reward: [(0, '681.285')] +[2026-06-07 02:53:53,123][472573] Updated weights for policy 0, policy_version 19919 (0.0008) +[2026-06-07 02:53:53,255][472573] Updated weights for policy 0, policy_version 19931 (0.0008) +[2026-06-07 02:53:53,367][472573] Updated weights for policy 0, policy_version 19941 (0.0008) +[2026-06-07 02:53:53,479][472573] Updated weights for policy 0, policy_version 19951 (0.0008) +[2026-06-07 02:53:53,604][472573] Updated weights for policy 0, policy_version 19962 (0.0009) +[2026-06-07 02:53:54,162][472573] Updated weights for policy 0, policy_version 19972 (0.0007) +[2026-06-07 02:53:54,277][472573] Updated weights for policy 0, policy_version 19983 (0.0008) +[2026-06-07 02:53:54,395][472573] Updated weights for policy 0, policy_version 19993 (0.0008) +[2026-06-07 02:53:54,505][472573] Updated weights for policy 0, policy_version 20003 (0.0008) +[2026-06-07 02:53:54,619][472573] Updated weights for policy 0, policy_version 20013 (0.0008) +[2026-06-07 02:53:54,754][472573] Updated weights for policy 0, policy_version 20025 (0.0008) +[2026-06-07 02:53:55,327][472573] Updated weights for policy 0, policy_version 20036 (0.0008) +[2026-06-07 02:53:55,439][472573] Updated weights for policy 0, policy_version 20046 (0.0008) +[2026-06-07 02:53:55,553][472573] Updated weights for policy 0, policy_version 20056 (0.0008) +[2026-06-07 02:53:55,673][472573] Updated weights for policy 0, policy_version 20067 (0.0008) +[2026-06-07 02:53:55,800][472573] Updated weights for policy 0, policy_version 20078 (0.0008) +[2026-06-07 02:53:55,937][472573] Updated weights for policy 0, policy_version 20090 (0.0008) +[2026-06-07 02:53:56,478][472573] Updated weights for policy 0, policy_version 20100 (0.0008) +[2026-06-07 02:53:56,592][472573] Updated weights for policy 0, policy_version 20110 (0.0009) +[2026-06-07 02:53:56,706][472573] Updated weights for policy 0, policy_version 20120 (0.0008) +[2026-06-07 02:53:56,817][472573] Updated weights for policy 0, policy_version 20130 (0.0008) +[2026-06-07 02:53:56,924][472573] Updated weights for policy 0, policy_version 20140 (0.0008) +[2026-06-07 02:53:57,045][472573] Updated weights for policy 0, policy_version 20150 (0.0008) +[2026-06-07 02:53:57,155][472573] Updated weights for policy 0, policy_version 20160 (0.0008) +[2026-06-07 02:53:57,702][472573] Updated weights for policy 0, policy_version 20170 (0.0008) +[2026-06-07 02:53:57,825][472573] Updated weights for policy 0, policy_version 20181 (0.0008) +[2026-06-07 02:53:57,942][472573] Updated weights for policy 0, policy_version 20191 (0.0008) +[2026-06-07 02:53:58,051][472573] Updated weights for policy 0, policy_version 20201 (0.0008) +[2026-06-07 02:53:58,103][464932] Fps is (10 sec: 26214.5, 60 sec: 28399.0, 300 sec: 28213.8). Total num frames: 10321920. Throughput: 0: 28396.1. Samples: 10325632. Policy #0 lag: (min: 63.0, avg: 74.7, max: 127.0) +[2026-06-07 02:53:58,104][464932] Avg episode reward: [(0, '712.724')] +[2026-06-07 02:53:58,187][472573] Updated weights for policy 0, policy_version 20213 (0.0008) +[2026-06-07 02:53:58,307][472028] Saving new best policy, reward=712.724! +[2026-06-07 02:53:58,308][472573] Updated weights for policy 0, policy_version 20224 (0.0008) +[2026-06-07 02:53:58,867][472573] Updated weights for policy 0, policy_version 20234 (0.0008) +[2026-06-07 02:53:58,980][472573] Updated weights for policy 0, policy_version 20244 (0.0009) +[2026-06-07 02:53:59,088][472573] Updated weights for policy 0, policy_version 20254 (0.0008) +[2026-06-07 02:53:59,212][472573] Updated weights for policy 0, policy_version 20265 (0.0008) +[2026-06-07 02:53:59,322][472573] Updated weights for policy 0, policy_version 20275 (0.0008) +[2026-06-07 02:53:59,446][472573] Updated weights for policy 0, policy_version 20286 (0.0008) +[2026-06-07 02:53:59,986][472573] Updated weights for policy 0, policy_version 20297 (0.0006) +[2026-06-07 02:54:00,108][472573] Updated weights for policy 0, policy_version 20308 (0.0008) +[2026-06-07 02:54:00,215][472573] Updated weights for policy 0, policy_version 20318 (0.0008) +[2026-06-07 02:54:00,327][472573] Updated weights for policy 0, policy_version 20328 (0.0008) +[2026-06-07 02:54:00,451][472573] Updated weights for policy 0, policy_version 20339 (0.0009) +[2026-06-07 02:54:00,583][472573] Updated weights for policy 0, policy_version 20351 (0.0008) +[2026-06-07 02:54:01,143][472573] Updated weights for policy 0, policy_version 20361 (0.0008) +[2026-06-07 02:54:01,257][472573] Updated weights for policy 0, policy_version 20371 (0.0008) +[2026-06-07 02:54:01,378][472573] Updated weights for policy 0, policy_version 20382 (0.0008) +[2026-06-07 02:54:01,501][472573] Updated weights for policy 0, policy_version 20393 (0.0008) +[2026-06-07 02:54:01,656][472573] Updated weights for policy 0, policy_version 20407 (0.0008) +[2026-06-07 02:54:02,222][472573] Updated weights for policy 0, policy_version 20418 (0.0008) +[2026-06-07 02:54:02,347][472573] Updated weights for policy 0, policy_version 20429 (0.0008) +[2026-06-07 02:54:02,467][472573] Updated weights for policy 0, policy_version 20440 (0.0008) +[2026-06-07 02:54:02,592][472573] Updated weights for policy 0, policy_version 20451 (0.0008) +[2026-06-07 02:54:02,702][472573] Updated weights for policy 0, policy_version 20461 (0.0008) +[2026-06-07 02:54:02,822][472573] Updated weights for policy 0, policy_version 20471 (0.0009) +[2026-06-07 02:54:03,103][464932] Fps is (10 sec: 29491.1, 60 sec: 28399.0, 300 sec: 28324.9). Total num frames: 10485760. Throughput: 0: 28362.0. Samples: 10492160. Policy #0 lag: (min: 63.0, avg: 74.7, max: 127.0) +[2026-06-07 02:54:03,104][464932] Avg episode reward: [(0, '650.223')] +[2026-06-07 02:54:03,397][472573] Updated weights for policy 0, policy_version 20482 (0.0008) +[2026-06-07 02:54:03,510][472573] Updated weights for policy 0, policy_version 20492 (0.0008) +[2026-06-07 02:54:03,631][472573] Updated weights for policy 0, policy_version 20503 (0.0008) +[2026-06-07 02:54:03,740][472573] Updated weights for policy 0, policy_version 20513 (0.0009) +[2026-06-07 02:54:03,854][472573] Updated weights for policy 0, policy_version 20523 (0.0008) +[2026-06-07 02:54:03,972][472573] Updated weights for policy 0, policy_version 20533 (0.0008) +[2026-06-07 02:54:04,086][472573] Updated weights for policy 0, policy_version 20543 (0.0008) +[2026-06-07 02:54:04,647][472573] Updated weights for policy 0, policy_version 20553 (0.0008) +[2026-06-07 02:54:04,768][472573] Updated weights for policy 0, policy_version 20564 (0.0008) +[2026-06-07 02:54:04,894][472573] Updated weights for policy 0, policy_version 20575 (0.0008) +[2026-06-07 02:54:05,016][472573] Updated weights for policy 0, policy_version 20586 (0.0008) +[2026-06-07 02:54:05,138][472573] Updated weights for policy 0, policy_version 20596 (0.0008) +[2026-06-07 02:54:05,248][472573] Updated weights for policy 0, policy_version 20606 (0.0008) +[2026-06-07 02:54:05,817][472573] Updated weights for policy 0, policy_version 20617 (0.0008) +[2026-06-07 02:54:05,928][472573] Updated weights for policy 0, policy_version 20627 (0.0008) +[2026-06-07 02:54:06,053][472573] Updated weights for policy 0, policy_version 20638 (0.0009) +[2026-06-07 02:54:06,164][472573] Updated weights for policy 0, policy_version 20648 (0.0008) +[2026-06-07 02:54:06,294][472573] Updated weights for policy 0, policy_version 20660 (0.0009) +[2026-06-07 02:54:06,420][472573] Updated weights for policy 0, policy_version 20671 (0.0008) +[2026-06-07 02:54:06,970][472573] Updated weights for policy 0, policy_version 20681 (0.0007) +[2026-06-07 02:54:07,120][472573] Updated weights for policy 0, policy_version 20695 (0.0006) +[2026-06-07 02:54:07,262][472573] Updated weights for policy 0, policy_version 20707 (0.0008) +[2026-06-07 02:54:07,391][472573] Updated weights for policy 0, policy_version 20719 (0.0008) +[2026-06-07 02:54:07,502][472573] Updated weights for policy 0, policy_version 20729 (0.0008) +[2026-06-07 02:54:08,064][472573] Updated weights for policy 0, policy_version 20740 (0.0008) +[2026-06-07 02:54:08,103][464932] Fps is (10 sec: 29491.2, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 10616832. Throughput: 0: 28239.6. Samples: 10578176. Policy #0 lag: (min: 63.0, avg: 74.7, max: 127.0) +[2026-06-07 02:54:08,104][464932] Avg episode reward: [(0, '663.564')] +[2026-06-07 02:54:08,187][472573] Updated weights for policy 0, policy_version 20751 (0.0008) +[2026-06-07 02:54:08,301][472573] Updated weights for policy 0, policy_version 20761 (0.0008) +[2026-06-07 02:54:08,420][472573] Updated weights for policy 0, policy_version 20772 (0.0008) +[2026-06-07 02:54:08,533][472573] Updated weights for policy 0, policy_version 20782 (0.0008) +[2026-06-07 02:54:08,657][472573] Updated weights for policy 0, policy_version 20793 (0.0008) +[2026-06-07 02:54:09,220][472573] Updated weights for policy 0, policy_version 20804 (0.0008) +[2026-06-07 02:54:09,343][472573] Updated weights for policy 0, policy_version 20815 (0.0008) +[2026-06-07 02:54:09,464][472573] Updated weights for policy 0, policy_version 20826 (0.0009) +[2026-06-07 02:54:09,577][472573] Updated weights for policy 0, policy_version 20836 (0.0008) +[2026-06-07 02:54:09,692][472573] Updated weights for policy 0, policy_version 20846 (0.0008) +[2026-06-07 02:54:09,813][472573] Updated weights for policy 0, policy_version 20857 (0.0008) +[2026-06-07 02:54:10,374][472573] Updated weights for policy 0, policy_version 20868 (0.0007) +[2026-06-07 02:54:10,504][472573] Updated weights for policy 0, policy_version 20880 (0.0008) +[2026-06-07 02:54:10,640][472573] Updated weights for policy 0, policy_version 20892 (0.0008) +[2026-06-07 02:54:10,766][472573] Updated weights for policy 0, policy_version 20903 (0.0008) +[2026-06-07 02:54:10,889][472573] Updated weights for policy 0, policy_version 20914 (0.0009) +[2026-06-07 02:54:11,010][472573] Updated weights for policy 0, policy_version 20924 (0.0008) +[2026-06-07 02:54:11,545][472573] Updated weights for policy 0, policy_version 20934 (0.0008) +[2026-06-07 02:54:11,672][472573] Updated weights for policy 0, policy_version 20945 (0.0008) +[2026-06-07 02:54:11,803][472573] Updated weights for policy 0, policy_version 20957 (0.0008) +[2026-06-07 02:54:11,923][472573] Updated weights for policy 0, policy_version 20968 (0.0008) +[2026-06-07 02:54:12,073][472573] Updated weights for policy 0, policy_version 20982 (0.0008) +[2026-06-07 02:54:12,655][472573] Updated weights for policy 0, policy_version 20993 (0.0008) +[2026-06-07 02:54:12,782][472573] Updated weights for policy 0, policy_version 21004 (0.0008) +[2026-06-07 02:54:12,899][472573] Updated weights for policy 0, policy_version 21015 (0.0008) +[2026-06-07 02:54:13,044][472573] Updated weights for policy 0, policy_version 21028 (0.0008) +[2026-06-07 02:54:13,103][464932] Fps is (10 sec: 26214.2, 60 sec: 28398.9, 300 sec: 28213.8). Total num frames: 10747904. Throughput: 0: 28350.6. Samples: 10750848. Policy #0 lag: (min: 55.0, avg: 67.1, max: 119.0) +[2026-06-07 02:54:13,104][464932] Avg episode reward: [(0, '683.495')] +[2026-06-07 02:54:13,162][472573] Updated weights for policy 0, policy_version 21039 (0.0008) +[2026-06-07 02:54:13,280][472573] Updated weights for policy 0, policy_version 21049 (0.0009) +[2026-06-07 02:54:13,863][472573] Updated weights for policy 0, policy_version 21060 (0.0008) +[2026-06-07 02:54:13,969][472573] Updated weights for policy 0, policy_version 21070 (0.0008) +[2026-06-07 02:54:14,084][472573] Updated weights for policy 0, policy_version 21080 (0.0008) +[2026-06-07 02:54:14,193][472573] Updated weights for policy 0, policy_version 21090 (0.0008) +[2026-06-07 02:54:14,312][472573] Updated weights for policy 0, policy_version 21100 (0.0008) +[2026-06-07 02:54:14,426][472573] Updated weights for policy 0, policy_version 21110 (0.0008) +[2026-06-07 02:54:14,539][472573] Updated weights for policy 0, policy_version 21120 (0.0008) +[2026-06-07 02:54:15,107][472573] Updated weights for policy 0, policy_version 21130 (0.0008) +[2026-06-07 02:54:15,226][472573] Updated weights for policy 0, policy_version 21141 (0.0008) +[2026-06-07 02:54:15,353][472573] Updated weights for policy 0, policy_version 21152 (0.0009) +[2026-06-07 02:54:15,465][472573] Updated weights for policy 0, policy_version 21162 (0.0009) +[2026-06-07 02:54:15,591][472573] Updated weights for policy 0, policy_version 21173 (0.0008) +[2026-06-07 02:54:15,711][472573] Updated weights for policy 0, policy_version 21184 (0.0008) +[2026-06-07 02:54:16,253][472573] Updated weights for policy 0, policy_version 21194 (0.0007) +[2026-06-07 02:54:16,382][472573] Updated weights for policy 0, policy_version 21205 (0.0008) +[2026-06-07 02:54:16,506][472573] Updated weights for policy 0, policy_version 21216 (0.0008) +[2026-06-07 02:54:16,631][472573] Updated weights for policy 0, policy_version 21227 (0.0008) +[2026-06-07 02:54:16,745][472573] Updated weights for policy 0, policy_version 21237 (0.0008) +[2026-06-07 02:54:16,867][472573] Updated weights for policy 0, policy_version 21248 (0.0008) +[2026-06-07 02:54:17,426][472573] Updated weights for policy 0, policy_version 21258 (0.0008) +[2026-06-07 02:54:17,536][472573] Updated weights for policy 0, policy_version 21268 (0.0008) +[2026-06-07 02:54:17,655][472573] Updated weights for policy 0, policy_version 21279 (0.0008) +[2026-06-07 02:54:17,782][472573] Updated weights for policy 0, policy_version 21290 (0.0008) +[2026-06-07 02:54:17,891][472573] Updated weights for policy 0, policy_version 21300 (0.0008) +[2026-06-07 02:54:18,016][472573] Updated weights for policy 0, policy_version 21311 (0.0008) +[2026-06-07 02:54:18,103][464932] Fps is (10 sec: 29491.3, 60 sec: 28399.0, 300 sec: 28324.9). Total num frames: 10911744. Throughput: 0: 28376.2. Samples: 10919424. Policy #0 lag: (min: 55.0, avg: 67.1, max: 119.0) +[2026-06-07 02:54:18,104][464932] Avg episode reward: [(0, '678.461')] +[2026-06-07 02:54:18,579][472573] Updated weights for policy 0, policy_version 21322 (0.0008) +[2026-06-07 02:54:18,713][472573] Updated weights for policy 0, policy_version 21334 (0.0008) +[2026-06-07 02:54:18,836][472573] Updated weights for policy 0, policy_version 21345 (0.0008) +[2026-06-07 02:54:18,970][472573] Updated weights for policy 0, policy_version 21357 (0.0010) +[2026-06-07 02:54:19,081][472573] Updated weights for policy 0, policy_version 21367 (0.0008) +[2026-06-07 02:54:19,654][472573] Updated weights for policy 0, policy_version 21378 (0.0008) +[2026-06-07 02:54:19,769][472573] Updated weights for policy 0, policy_version 21388 (0.0008) +[2026-06-07 02:54:19,892][472573] Updated weights for policy 0, policy_version 21399 (0.0008) +[2026-06-07 02:54:20,013][472573] Updated weights for policy 0, policy_version 21410 (0.0008) +[2026-06-07 02:54:20,138][472573] Updated weights for policy 0, policy_version 21421 (0.0008) +[2026-06-07 02:54:20,267][472573] Updated weights for policy 0, policy_version 21432 (0.0008) +[2026-06-07 02:54:20,801][472573] Updated weights for policy 0, policy_version 21442 (0.0007) +[2026-06-07 02:54:20,934][472573] Updated weights for policy 0, policy_version 21454 (0.0008) +[2026-06-07 02:54:21,043][472573] Updated weights for policy 0, policy_version 21464 (0.0008) +[2026-06-07 02:54:21,163][472573] Updated weights for policy 0, policy_version 21474 (0.0008) +[2026-06-07 02:54:21,286][472573] Updated weights for policy 0, policy_version 21485 (0.0008) +[2026-06-07 02:54:21,398][472573] Updated weights for policy 0, policy_version 21495 (0.0008) +[2026-06-07 02:54:21,982][472573] Updated weights for policy 0, policy_version 21507 (0.0008) +[2026-06-07 02:54:22,136][472573] Updated weights for policy 0, policy_version 21521 (0.0008) +[2026-06-07 02:54:22,251][472573] Updated weights for policy 0, policy_version 21531 (0.0008) +[2026-06-07 02:54:22,365][472573] Updated weights for policy 0, policy_version 21541 (0.0008) +[2026-06-07 02:54:22,490][472573] Updated weights for policy 0, policy_version 21552 (0.0008) +[2026-06-07 02:54:22,611][472573] Updated weights for policy 0, policy_version 21563 (0.0008) +[2026-06-07 02:54:23,103][464932] Fps is (10 sec: 29491.5, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 11042816. Throughput: 0: 28194.2. Samples: 11002112. Policy #0 lag: (min: 55.0, avg: 67.1, max: 119.0) +[2026-06-07 02:54:23,104][464932] Avg episode reward: [(0, '647.071')] +[2026-06-07 02:54:23,178][472573] Updated weights for policy 0, policy_version 21574 (0.0008) +[2026-06-07 02:54:23,312][472573] Updated weights for policy 0, policy_version 21586 (0.0008) +[2026-06-07 02:54:23,438][472573] Updated weights for policy 0, policy_version 21597 (0.0007) +[2026-06-07 02:54:23,546][472573] Updated weights for policy 0, policy_version 21607 (0.0008) +[2026-06-07 02:54:23,673][472573] Updated weights for policy 0, policy_version 21618 (0.0009) +[2026-06-07 02:54:23,796][472573] Updated weights for policy 0, policy_version 21628 (0.0008) +[2026-06-07 02:54:24,330][472573] Updated weights for policy 0, policy_version 21638 (0.0008) +[2026-06-07 02:54:24,441][472573] Updated weights for policy 0, policy_version 21648 (0.0008) +[2026-06-07 02:54:24,549][472573] Updated weights for policy 0, policy_version 21658 (0.0008) +[2026-06-07 02:54:24,659][472573] Updated weights for policy 0, policy_version 21668 (0.0008) +[2026-06-07 02:54:24,773][472573] Updated weights for policy 0, policy_version 21678 (0.0008) +[2026-06-07 02:54:24,889][472573] Updated weights for policy 0, policy_version 21688 (0.0008) +[2026-06-07 02:54:25,453][472573] Updated weights for policy 0, policy_version 21699 (0.0008) +[2026-06-07 02:54:25,563][472573] Updated weights for policy 0, policy_version 21709 (0.0008) +[2026-06-07 02:54:25,680][472573] Updated weights for policy 0, policy_version 21719 (0.0008) +[2026-06-07 02:54:25,824][472573] Updated weights for policy 0, policy_version 21732 (0.0008) +[2026-06-07 02:54:25,949][472573] Updated weights for policy 0, policy_version 21743 (0.0008) +[2026-06-07 02:54:26,060][472573] Updated weights for policy 0, policy_version 21753 (0.0008) +[2026-06-07 02:54:26,603][472573] Updated weights for policy 0, policy_version 21763 (0.0008) +[2026-06-07 02:54:26,709][472573] Updated weights for policy 0, policy_version 21773 (0.0007) +[2026-06-07 02:54:26,828][472573] Updated weights for policy 0, policy_version 21784 (0.0008) +[2026-06-07 02:54:26,949][472573] Updated weights for policy 0, policy_version 21795 (0.0008) +[2026-06-07 02:54:27,069][472573] Updated weights for policy 0, policy_version 21805 (0.0008) +[2026-06-07 02:54:27,189][472573] Updated weights for policy 0, policy_version 21815 (0.0008) +[2026-06-07 02:54:27,738][472573] Updated weights for policy 0, policy_version 21825 (0.0008) +[2026-06-07 02:54:27,863][472573] Updated weights for policy 0, policy_version 21836 (0.0008) +[2026-06-07 02:54:27,985][472573] Updated weights for policy 0, policy_version 21847 (0.0008) +[2026-06-07 02:54:28,095][472573] Updated weights for policy 0, policy_version 21857 (0.0008) +[2026-06-07 02:54:28,103][464932] Fps is (10 sec: 26214.2, 60 sec: 28399.0, 300 sec: 28324.9). Total num frames: 11173888. Throughput: 0: 28353.5. Samples: 11177984. Policy #0 lag: (min: 40.0, avg: 68.4, max: 104.0) +[2026-06-07 02:54:28,104][464932] Avg episode reward: [(0, '689.559')] +[2026-06-07 02:54:28,209][472573] Updated weights for policy 0, policy_version 21867 (0.0008) +[2026-06-07 02:54:28,318][472573] Updated weights for policy 0, policy_version 21877 (0.0008) +[2026-06-07 02:54:28,443][472573] Updated weights for policy 0, policy_version 21888 (0.0008) +[2026-06-07 02:54:28,992][472573] Updated weights for policy 0, policy_version 21898 (0.0006) +[2026-06-07 02:54:29,139][472573] Updated weights for policy 0, policy_version 21911 (0.0008) +[2026-06-07 02:54:29,246][472573] Updated weights for policy 0, policy_version 21921 (0.0008) +[2026-06-07 02:54:29,359][472573] Updated weights for policy 0, policy_version 21931 (0.0008) +[2026-06-07 02:54:29,473][472573] Updated weights for policy 0, policy_version 21941 (0.0008) +[2026-06-07 02:54:29,583][472573] Updated weights for policy 0, policy_version 21951 (0.0008) +[2026-06-07 02:54:30,148][472573] Updated weights for policy 0, policy_version 21962 (0.0007) +[2026-06-07 02:54:30,273][472573] Updated weights for policy 0, policy_version 21973 (0.0008) +[2026-06-07 02:54:30,386][472573] Updated weights for policy 0, policy_version 21983 (0.0008) +[2026-06-07 02:54:30,498][472573] Updated weights for policy 0, policy_version 21993 (0.0008) +[2026-06-07 02:54:30,633][472573] Updated weights for policy 0, policy_version 22005 (0.0008) +[2026-06-07 02:54:31,204][472573] Updated weights for policy 0, policy_version 22017 (0.0008) +[2026-06-07 02:54:31,325][472573] Updated weights for policy 0, policy_version 22027 (0.0008) +[2026-06-07 02:54:31,437][472573] Updated weights for policy 0, policy_version 22037 (0.0008) +[2026-06-07 02:54:31,558][472573] Updated weights for policy 0, policy_version 22048 (0.0008) +[2026-06-07 02:54:31,668][472573] Updated weights for policy 0, policy_version 22058 (0.0008) +[2026-06-07 02:54:31,782][472573] Updated weights for policy 0, policy_version 22068 (0.0008) +[2026-06-07 02:54:31,915][472573] Updated weights for policy 0, policy_version 22080 (0.0008) +[2026-06-07 02:54:32,472][472573] Updated weights for policy 0, policy_version 22092 (0.0007) +[2026-06-07 02:54:32,585][472573] Updated weights for policy 0, policy_version 22102 (0.0009) +[2026-06-07 02:54:32,695][472573] Updated weights for policy 0, policy_version 22112 (0.0008) +[2026-06-07 02:54:32,832][472573] Updated weights for policy 0, policy_version 22124 (0.0008) +[2026-06-07 02:54:32,946][472573] Updated weights for policy 0, policy_version 22134 (0.0008) +[2026-06-07 02:54:33,103][464932] Fps is (10 sec: 29491.2, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 11337728. Throughput: 0: 28393.3. Samples: 11345792. Policy #0 lag: (min: 40.0, avg: 68.4, max: 104.0) +[2026-06-07 02:54:33,104][464932] Avg episode reward: [(0, '698.411')] +[2026-06-07 02:54:33,506][472573] Updated weights for policy 0, policy_version 22145 (0.0008) +[2026-06-07 02:54:33,628][472573] Updated weights for policy 0, policy_version 22155 (0.0008) +[2026-06-07 02:54:33,744][472573] Updated weights for policy 0, policy_version 22166 (0.0008) +[2026-06-07 02:54:33,862][472573] Updated weights for policy 0, policy_version 22176 (0.0008) +[2026-06-07 02:54:33,977][472573] Updated weights for policy 0, policy_version 22186 (0.0008) +[2026-06-07 02:54:34,117][472573] Updated weights for policy 0, policy_version 22199 (0.0008) +[2026-06-07 02:54:34,667][472573] Updated weights for policy 0, policy_version 22209 (0.0008) +[2026-06-07 02:54:34,783][472573] Updated weights for policy 0, policy_version 22219 (0.0008) +[2026-06-07 02:54:34,911][472573] Updated weights for policy 0, policy_version 22231 (0.0008) +[2026-06-07 02:54:35,034][472573] Updated weights for policy 0, policy_version 22242 (0.0009) +[2026-06-07 02:54:35,146][472573] Updated weights for policy 0, policy_version 22252 (0.0008) +[2026-06-07 02:54:35,264][472573] Updated weights for policy 0, policy_version 22263 (0.0008) +[2026-06-07 02:54:35,825][472573] Updated weights for policy 0, policy_version 22274 (0.0008) +[2026-06-07 02:54:35,969][472573] Updated weights for policy 0, policy_version 22287 (0.0008) +[2026-06-07 02:54:36,100][472573] Updated weights for policy 0, policy_version 22299 (0.0008) +[2026-06-07 02:54:36,227][472573] Updated weights for policy 0, policy_version 22310 (0.0008) +[2026-06-07 02:54:36,337][472573] Updated weights for policy 0, policy_version 22320 (0.0008) +[2026-06-07 02:54:36,483][472573] Updated weights for policy 0, policy_version 22333 (0.0008) +[2026-06-07 02:54:37,040][472573] Updated weights for policy 0, policy_version 22343 (0.0008) +[2026-06-07 02:54:37,152][472573] Updated weights for policy 0, policy_version 22353 (0.0008) +[2026-06-07 02:54:37,265][472573] Updated weights for policy 0, policy_version 22363 (0.0008) +[2026-06-07 02:54:37,387][472573] Updated weights for policy 0, policy_version 22374 (0.0008) +[2026-06-07 02:54:37,495][472573] Updated weights for policy 0, policy_version 22384 (0.0008) +[2026-06-07 02:54:37,623][472573] Updated weights for policy 0, policy_version 22395 (0.0008) +[2026-06-07 02:54:38,103][464932] Fps is (10 sec: 29491.4, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 11468800. Throughput: 0: 28282.3. Samples: 11427840. Policy #0 lag: (min: 40.0, avg: 68.4, max: 104.0) +[2026-06-07 02:54:38,104][464932] Avg episode reward: [(0, '723.594')] +[2026-06-07 02:54:38,195][472573] Updated weights for policy 0, policy_version 22407 (0.0008) +[2026-06-07 02:54:38,340][472573] Updated weights for policy 0, policy_version 22420 (0.0008) +[2026-06-07 02:54:38,461][472573] Updated weights for policy 0, policy_version 22431 (0.0008) +[2026-06-07 02:54:38,576][472573] Updated weights for policy 0, policy_version 22441 (0.0008) +[2026-06-07 02:54:38,700][472573] Updated weights for policy 0, policy_version 22452 (0.0008) +[2026-06-07 02:54:38,821][472573] Updated weights for policy 0, policy_version 22463 (0.0008) +[2026-06-07 02:54:38,830][472028] Saving new best policy, reward=723.594! +[2026-06-07 02:54:39,395][472573] Updated weights for policy 0, policy_version 22475 (0.0008) +[2026-06-07 02:54:39,510][472573] Updated weights for policy 0, policy_version 22485 (0.0008) +[2026-06-07 02:54:39,623][472573] Updated weights for policy 0, policy_version 22495 (0.0008) +[2026-06-07 02:54:39,744][472573] Updated weights for policy 0, policy_version 22506 (0.0008) +[2026-06-07 02:54:39,889][472573] Updated weights for policy 0, policy_version 22519 (0.0008) +[2026-06-07 02:54:40,480][472573] Updated weights for policy 0, policy_version 22531 (0.0008) +[2026-06-07 02:54:40,602][472573] Updated weights for policy 0, policy_version 22542 (0.0008) +[2026-06-07 02:54:40,722][472573] Updated weights for policy 0, policy_version 22553 (0.0008) +[2026-06-07 02:54:40,831][472573] Updated weights for policy 0, policy_version 22563 (0.0008) +[2026-06-07 02:54:40,967][472573] Updated weights for policy 0, policy_version 22575 (0.0008) +[2026-06-07 02:54:41,108][472573] Updated weights for policy 0, policy_version 22587 (0.0008) +[2026-06-07 02:54:41,646][472573] Updated weights for policy 0, policy_version 22597 (0.0008) +[2026-06-07 02:54:41,778][472573] Updated weights for policy 0, policy_version 22609 (0.0008) +[2026-06-07 02:54:41,902][472573] Updated weights for policy 0, policy_version 22620 (0.0008) +[2026-06-07 02:54:42,011][472573] Updated weights for policy 0, policy_version 22630 (0.0008) +[2026-06-07 02:54:42,130][472573] Updated weights for policy 0, policy_version 22640 (0.0008) +[2026-06-07 02:54:42,254][472573] Updated weights for policy 0, policy_version 22651 (0.0008) +[2026-06-07 02:54:42,822][472573] Updated weights for policy 0, policy_version 22662 (0.0008) +[2026-06-07 02:54:42,940][472573] Updated weights for policy 0, policy_version 22672 (0.0008) +[2026-06-07 02:54:43,063][472573] Updated weights for policy 0, policy_version 22683 (0.0008) +[2026-06-07 02:54:43,103][464932] Fps is (10 sec: 26214.5, 60 sec: 27852.9, 300 sec: 28324.9). Total num frames: 11599872. Throughput: 0: 28390.4. Samples: 11603200. Policy #0 lag: (min: 40.0, avg: 68.4, max: 104.0) +[2026-06-07 02:54:43,104][464932] Avg episode reward: [(0, '727.858')] +[2026-06-07 02:54:43,203][472573] Updated weights for policy 0, policy_version 22696 (0.0008) +[2026-06-07 02:54:43,316][472573] Updated weights for policy 0, policy_version 22706 (0.0008) +[2026-06-07 02:54:43,436][472573] Updated weights for policy 0, policy_version 22716 (0.0008) +[2026-06-07 02:54:43,474][472028] Saving new best policy, reward=727.858! +[2026-06-07 02:54:44,031][472573] Updated weights for policy 0, policy_version 22729 (0.0008) +[2026-06-07 02:54:44,154][472573] Updated weights for policy 0, policy_version 22740 (0.0008) +[2026-06-07 02:54:44,277][472573] Updated weights for policy 0, policy_version 22751 (0.0008) +[2026-06-07 02:54:44,388][472573] Updated weights for policy 0, policy_version 22761 (0.0008) +[2026-06-07 02:54:44,522][472573] Updated weights for policy 0, policy_version 22773 (0.0008) +[2026-06-07 02:54:45,093][472573] Updated weights for policy 0, policy_version 22785 (0.0008) +[2026-06-07 02:54:45,235][472573] Updated weights for policy 0, policy_version 22798 (0.0008) +[2026-06-07 02:54:45,345][472573] Updated weights for policy 0, policy_version 22808 (0.0008) +[2026-06-07 02:54:45,481][472573] Updated weights for policy 0, policy_version 22820 (0.0008) +[2026-06-07 02:54:45,602][472573] Updated weights for policy 0, policy_version 22831 (0.0008) +[2026-06-07 02:54:45,726][472573] Updated weights for policy 0, policy_version 22842 (0.0008) +[2026-06-07 02:54:46,298][472573] Updated weights for policy 0, policy_version 22853 (0.0008) +[2026-06-07 02:54:46,410][472573] Updated weights for policy 0, policy_version 22863 (0.0008) +[2026-06-07 02:54:46,532][472573] Updated weights for policy 0, policy_version 22874 (0.0008) +[2026-06-07 02:54:46,649][472573] Updated weights for policy 0, policy_version 22884 (0.0008) +[2026-06-07 02:54:46,761][472573] Updated weights for policy 0, policy_version 22894 (0.0008) +[2026-06-07 02:54:46,877][472573] Updated weights for policy 0, policy_version 22905 (0.0008) +[2026-06-07 02:54:47,431][472573] Updated weights for policy 0, policy_version 22915 (0.0008) +[2026-06-07 02:54:47,539][472573] Updated weights for policy 0, policy_version 22925 (0.0009) +[2026-06-07 02:54:47,665][472573] Updated weights for policy 0, policy_version 22936 (0.0008) +[2026-06-07 02:54:47,799][472573] Updated weights for policy 0, policy_version 22948 (0.0008) +[2026-06-07 02:54:47,910][472573] Updated weights for policy 0, policy_version 22958 (0.0008) +[2026-06-07 02:54:48,068][472573] Updated weights for policy 0, policy_version 22972 (0.0008) +[2026-06-07 02:54:48,103][464932] Fps is (10 sec: 26214.4, 60 sec: 27852.8, 300 sec: 28324.9). Total num frames: 11730944. Throughput: 0: 28450.1. Samples: 11772416. Policy #0 lag: (min: 56.0, avg: 86.1, max: 120.0) +[2026-06-07 02:54:48,104][464932] Avg episode reward: [(0, '716.186')] +[2026-06-07 02:54:48,636][472573] Updated weights for policy 0, policy_version 22983 (0.0008) +[2026-06-07 02:54:48,769][472573] Updated weights for policy 0, policy_version 22995 (0.0008) +[2026-06-07 02:54:48,881][472573] Updated weights for policy 0, policy_version 23005 (0.0008) +[2026-06-07 02:54:49,001][472573] Updated weights for policy 0, policy_version 23016 (0.0008) +[2026-06-07 02:54:49,115][472573] Updated weights for policy 0, policy_version 23026 (0.0008) +[2026-06-07 02:54:49,225][472573] Updated weights for policy 0, policy_version 23036 (0.0008) +[2026-06-07 02:54:49,801][472573] Updated weights for policy 0, policy_version 23047 (0.0007) +[2026-06-07 02:54:49,911][472573] Updated weights for policy 0, policy_version 23057 (0.0008) +[2026-06-07 02:54:50,023][472573] Updated weights for policy 0, policy_version 23067 (0.0008) +[2026-06-07 02:54:50,160][472573] Updated weights for policy 0, policy_version 23080 (0.0008) +[2026-06-07 02:54:50,281][472573] Updated weights for policy 0, policy_version 23090 (0.0008) +[2026-06-07 02:54:50,402][472573] Updated weights for policy 0, policy_version 23101 (0.0008) +[2026-06-07 02:54:50,965][472573] Updated weights for policy 0, policy_version 23112 (0.0008) +[2026-06-07 02:54:51,092][472573] Updated weights for policy 0, policy_version 23124 (0.0008) +[2026-06-07 02:54:51,207][472573] Updated weights for policy 0, policy_version 23134 (0.0008) +[2026-06-07 02:54:51,318][472573] Updated weights for policy 0, policy_version 23144 (0.0008) +[2026-06-07 02:54:51,427][472573] Updated weights for policy 0, policy_version 23154 (0.0008) +[2026-06-07 02:54:51,541][472573] Updated weights for policy 0, policy_version 23164 (0.0008) +[2026-06-07 02:54:52,105][472573] Updated weights for policy 0, policy_version 23174 (0.0008) +[2026-06-07 02:54:52,212][472573] Updated weights for policy 0, policy_version 23184 (0.0008) +[2026-06-07 02:54:52,335][472573] Updated weights for policy 0, policy_version 23195 (0.0008) +[2026-06-07 02:54:52,448][472573] Updated weights for policy 0, policy_version 23205 (0.0008) +[2026-06-07 02:54:52,585][472573] Updated weights for policy 0, policy_version 23217 (0.0008) +[2026-06-07 02:54:52,700][472573] Updated weights for policy 0, policy_version 23227 (0.0008) +[2026-06-07 02:54:53,103][464932] Fps is (10 sec: 29491.0, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 11894784. Throughput: 0: 28313.6. Samples: 11852288. Policy #0 lag: (min: 56.0, avg: 86.1, max: 120.0) +[2026-06-07 02:54:53,104][464932] Avg episode reward: [(0, '726.561')] +[2026-06-07 02:54:53,232][472573] Updated weights for policy 0, policy_version 23237 (0.0008) +[2026-06-07 02:54:53,336][472573] Updated weights for policy 0, policy_version 23247 (0.0008) +[2026-06-07 02:54:53,450][472573] Updated weights for policy 0, policy_version 23257 (0.0009) +[2026-06-07 02:54:53,577][472573] Updated weights for policy 0, policy_version 23268 (0.0008) +[2026-06-07 02:54:53,723][472573] Updated weights for policy 0, policy_version 23281 (0.0008) +[2026-06-07 02:54:53,858][472573] Updated weights for policy 0, policy_version 23293 (0.0008) +[2026-06-07 02:54:54,438][472573] Updated weights for policy 0, policy_version 23304 (0.0008) +[2026-06-07 02:54:54,565][472573] Updated weights for policy 0, policy_version 23316 (0.0008) +[2026-06-07 02:54:54,681][472573] Updated weights for policy 0, policy_version 23326 (0.0008) +[2026-06-07 02:54:54,792][472573] Updated weights for policy 0, policy_version 23336 (0.0008) +[2026-06-07 02:54:54,917][472573] Updated weights for policy 0, policy_version 23347 (0.0008) +[2026-06-07 02:54:55,029][472573] Updated weights for policy 0, policy_version 23357 (0.0008) +[2026-06-07 02:54:55,561][472573] Updated weights for policy 0, policy_version 23367 (0.0008) +[2026-06-07 02:54:55,680][472573] Updated weights for policy 0, policy_version 23378 (0.0008) +[2026-06-07 02:54:55,800][472573] Updated weights for policy 0, policy_version 23389 (0.0008) +[2026-06-07 02:54:55,919][472573] Updated weights for policy 0, policy_version 23399 (0.0008) +[2026-06-07 02:54:56,035][472573] Updated weights for policy 0, policy_version 23409 (0.0008) +[2026-06-07 02:54:56,147][472573] Updated weights for policy 0, policy_version 23419 (0.0008) +[2026-06-07 02:54:56,694][472573] Updated weights for policy 0, policy_version 23429 (0.0008) +[2026-06-07 02:54:56,804][472573] Updated weights for policy 0, policy_version 23439 (0.0008) +[2026-06-07 02:54:56,915][472573] Updated weights for policy 0, policy_version 23449 (0.0008) +[2026-06-07 02:54:57,051][472573] Updated weights for policy 0, policy_version 23461 (0.0010) +[2026-06-07 02:54:57,216][472573] Updated weights for policy 0, policy_version 23476 (0.0009) +[2026-06-07 02:54:57,329][472573] Updated weights for policy 0, policy_version 23486 (0.0010) +[2026-06-07 02:54:57,886][472573] Updated weights for policy 0, policy_version 23496 (0.0009) +[2026-06-07 02:54:58,016][472573] Updated weights for policy 0, policy_version 23508 (0.0010) +[2026-06-07 02:54:58,103][464932] Fps is (10 sec: 29491.2, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 12025856. Throughput: 0: 28384.7. Samples: 12028160. Policy #0 lag: (min: 56.0, avg: 86.1, max: 120.0) +[2026-06-07 02:54:58,104][464932] Avg episode reward: [(0, '757.099')] +[2026-06-07 02:54:58,146][472573] Updated weights for policy 0, policy_version 23520 (0.0011) +[2026-06-07 02:54:58,277][472573] Updated weights for policy 0, policy_version 23531 (0.0010) +[2026-06-07 02:54:58,405][472573] Updated weights for policy 0, policy_version 23543 (0.0009) +[2026-06-07 02:54:58,508][472028] Saving new best policy, reward=757.099! +[2026-06-07 02:54:58,959][472573] Updated weights for policy 0, policy_version 23553 (0.0009) +[2026-06-07 02:54:59,072][472573] Updated weights for policy 0, policy_version 23563 (0.0009) +[2026-06-07 02:54:59,205][472573] Updated weights for policy 0, policy_version 23575 (0.0009) +[2026-06-07 02:54:59,364][472573] Updated weights for policy 0, policy_version 23589 (0.0009) +[2026-06-07 02:54:59,478][472573] Updated weights for policy 0, policy_version 23599 (0.0008) +[2026-06-07 02:54:59,601][472573] Updated weights for policy 0, policy_version 23610 (0.0008) +[2026-06-07 02:55:00,147][472573] Updated weights for policy 0, policy_version 23620 (0.0008) +[2026-06-07 02:55:00,259][472573] Updated weights for policy 0, policy_version 23630 (0.0009) +[2026-06-07 02:55:00,379][472573] Updated weights for policy 0, policy_version 23640 (0.0008) +[2026-06-07 02:55:00,498][472573] Updated weights for policy 0, policy_version 23651 (0.0008) +[2026-06-07 02:55:00,611][472573] Updated weights for policy 0, policy_version 23661 (0.0009) +[2026-06-07 02:55:00,737][472573] Updated weights for policy 0, policy_version 23672 (0.0008) +[2026-06-07 02:55:01,282][472573] Updated weights for policy 0, policy_version 23682 (0.0008) +[2026-06-07 02:55:01,401][472573] Updated weights for policy 0, policy_version 23693 (0.0008) +[2026-06-07 02:55:01,513][472573] Updated weights for policy 0, policy_version 23703 (0.0008) +[2026-06-07 02:55:01,622][472573] Updated weights for policy 0, policy_version 23713 (0.0008) +[2026-06-07 02:55:01,751][472573] Updated weights for policy 0, policy_version 23724 (0.0008) +[2026-06-07 02:55:01,860][472573] Updated weights for policy 0, policy_version 23734 (0.0008) +[2026-06-07 02:55:01,974][472573] Updated weights for policy 0, policy_version 23744 (0.0008) +[2026-06-07 02:55:02,534][472573] Updated weights for policy 0, policy_version 23755 (0.0007) +[2026-06-07 02:55:02,643][472573] Updated weights for policy 0, policy_version 23765 (0.0008) +[2026-06-07 02:55:02,777][472573] Updated weights for policy 0, policy_version 23777 (0.0008) +[2026-06-07 02:55:02,920][472573] Updated weights for policy 0, policy_version 23790 (0.0008) +[2026-06-07 02:55:03,041][472573] Updated weights for policy 0, policy_version 23801 (0.0008) +[2026-06-07 02:55:03,103][464932] Fps is (10 sec: 26214.4, 60 sec: 27852.8, 300 sec: 28324.9). Total num frames: 12156928. Throughput: 0: 28418.8. Samples: 12198272. Policy #0 lag: (min: 6.0, avg: 18.2, max: 70.0) +[2026-06-07 02:55:03,104][464932] Avg episode reward: [(0, '761.295')] +[2026-06-07 02:55:03,117][472028] Saving new best policy, reward=761.295! +[2026-06-07 02:55:03,605][472573] Updated weights for policy 0, policy_version 23811 (0.0007) +[2026-06-07 02:55:03,706][472573] Updated weights for policy 0, policy_version 23821 (0.0007) +[2026-06-07 02:55:03,825][472573] Updated weights for policy 0, policy_version 23832 (0.0008) +[2026-06-07 02:55:03,948][472573] Updated weights for policy 0, policy_version 23844 (0.0008) +[2026-06-07 02:55:04,075][472573] Updated weights for policy 0, policy_version 23856 (0.0008) +[2026-06-07 02:55:04,235][472573] Updated weights for policy 0, policy_version 23871 (0.0008) +[2026-06-07 02:55:04,827][472573] Updated weights for policy 0, policy_version 23883 (0.0008) +[2026-06-07 02:55:04,932][472573] Updated weights for policy 0, policy_version 23893 (0.0008) +[2026-06-07 02:55:05,070][472573] Updated weights for policy 0, policy_version 23906 (0.0008) +[2026-06-07 02:55:05,194][472573] Updated weights for policy 0, policy_version 23917 (0.0008) +[2026-06-07 02:55:05,313][472573] Updated weights for policy 0, policy_version 23928 (0.0008) +[2026-06-07 02:55:05,898][472573] Updated weights for policy 0, policy_version 23940 (0.0008) +[2026-06-07 02:55:06,030][472573] Updated weights for policy 0, policy_version 23953 (0.0008) +[2026-06-07 02:55:06,148][472573] Updated weights for policy 0, policy_version 23964 (0.0008) +[2026-06-07 02:55:06,308][472573] Updated weights for policy 0, policy_version 23979 (0.0008) +[2026-06-07 02:55:06,447][472573] Updated weights for policy 0, policy_version 23992 (0.0008) +[2026-06-07 02:55:07,062][472573] Updated weights for policy 0, policy_version 24004 (0.0008) +[2026-06-07 02:55:07,205][472573] Updated weights for policy 0, policy_version 24018 (0.0009) +[2026-06-07 02:55:07,321][472573] Updated weights for policy 0, policy_version 24029 (0.0008) +[2026-06-07 02:55:07,441][472573] Updated weights for policy 0, policy_version 24040 (0.0008) +[2026-06-07 02:55:07,560][472573] Updated weights for policy 0, policy_version 24051 (0.0008) +[2026-06-07 02:55:07,678][472573] Updated weights for policy 0, policy_version 24062 (0.0008) +[2026-06-07 02:55:08,103][464932] Fps is (10 sec: 29491.1, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 12320768. Throughput: 0: 28344.8. Samples: 12277632. Policy #0 lag: (min: 6.0, avg: 18.2, max: 70.0) +[2026-06-07 02:55:08,104][464932] Avg episode reward: [(0, '816.913')] +[2026-06-07 02:55:08,109][472028] Saving new best policy, reward=816.913! +[2026-06-07 02:55:08,293][472573] Updated weights for policy 0, policy_version 24073 (0.0008) +[2026-06-07 02:55:08,410][472573] Updated weights for policy 0, policy_version 24084 (0.0008) +[2026-06-07 02:55:08,549][472573] Updated weights for policy 0, policy_version 24096 (0.0009) +[2026-06-07 02:55:08,682][472573] Updated weights for policy 0, policy_version 24108 (0.0008) +[2026-06-07 02:55:08,798][472573] Updated weights for policy 0, policy_version 24118 (0.0008) +[2026-06-07 02:55:08,910][472573] Updated weights for policy 0, policy_version 24128 (0.0008) +[2026-06-07 02:55:09,477][472573] Updated weights for policy 0, policy_version 24140 (0.0008) +[2026-06-07 02:55:09,604][472573] Updated weights for policy 0, policy_version 24152 (0.0008) +[2026-06-07 02:55:09,727][472573] Updated weights for policy 0, policy_version 24163 (0.0008) +[2026-06-07 02:55:09,844][472573] Updated weights for policy 0, policy_version 24173 (0.0008) +[2026-06-07 02:55:09,954][472573] Updated weights for policy 0, policy_version 24183 (0.0008) +[2026-06-07 02:55:10,491][472573] Updated weights for policy 0, policy_version 24193 (0.0008) +[2026-06-07 02:55:10,612][472573] Updated weights for policy 0, policy_version 24204 (0.0008) +[2026-06-07 02:55:10,736][472573] Updated weights for policy 0, policy_version 24215 (0.0008) +[2026-06-07 02:55:10,847][472573] Updated weights for policy 0, policy_version 24225 (0.0008) +[2026-06-07 02:55:10,972][472573] Updated weights for policy 0, policy_version 24236 (0.0008) +[2026-06-07 02:55:11,086][472573] Updated weights for policy 0, policy_version 24246 (0.0008) +[2026-06-07 02:55:11,641][472573] Updated weights for policy 0, policy_version 24257 (0.0008) +[2026-06-07 02:55:11,774][472573] Updated weights for policy 0, policy_version 24269 (0.0008) +[2026-06-07 02:55:11,900][472573] Updated weights for policy 0, policy_version 24280 (0.0009) +[2026-06-07 02:55:12,011][472573] Updated weights for policy 0, policy_version 24290 (0.0009) +[2026-06-07 02:55:12,144][472573] Updated weights for policy 0, policy_version 24302 (0.0008) +[2026-06-07 02:55:12,262][472573] Updated weights for policy 0, policy_version 24312 (0.0008) +[2026-06-07 02:55:12,828][472573] Updated weights for policy 0, policy_version 24324 (0.0008) +[2026-06-07 02:55:12,947][472573] Updated weights for policy 0, policy_version 24335 (0.0009) +[2026-06-07 02:55:13,050][472573] Updated weights for policy 0, policy_version 24345 (0.0010) +[2026-06-07 02:55:13,103][464932] Fps is (10 sec: 29490.9, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 12451840. Throughput: 0: 28347.7. Samples: 12453632. Policy #0 lag: (min: 6.0, avg: 18.2, max: 70.0) +[2026-06-07 02:55:13,105][464932] Avg episode reward: [(0, '815.430')] +[2026-06-07 02:55:13,176][472573] Updated weights for policy 0, policy_version 24356 (0.0010) +[2026-06-07 02:55:13,288][472573] Updated weights for policy 0, policy_version 24366 (0.0009) +[2026-06-07 02:55:13,404][472573] Updated weights for policy 0, policy_version 24376 (0.0008) +[2026-06-07 02:55:13,983][472573] Updated weights for policy 0, policy_version 24387 (0.0007) +[2026-06-07 02:55:14,094][472573] Updated weights for policy 0, policy_version 24397 (0.0008) +[2026-06-07 02:55:14,220][472573] Updated weights for policy 0, policy_version 24408 (0.0008) +[2026-06-07 02:55:14,341][472573] Updated weights for policy 0, policy_version 24419 (0.0008) +[2026-06-07 02:55:14,465][472573] Updated weights for policy 0, policy_version 24430 (0.0008) +[2026-06-07 02:55:14,581][472573] Updated weights for policy 0, policy_version 24440 (0.0008) +[2026-06-07 02:55:15,123][472573] Updated weights for policy 0, policy_version 24450 (0.0008) +[2026-06-07 02:55:15,238][472573] Updated weights for policy 0, policy_version 24460 (0.0008) +[2026-06-07 02:55:15,347][472573] Updated weights for policy 0, policy_version 24470 (0.0008) +[2026-06-07 02:55:15,481][472573] Updated weights for policy 0, policy_version 24482 (0.0008) +[2026-06-07 02:55:15,611][472573] Updated weights for policy 0, policy_version 24493 (0.0008) +[2026-06-07 02:55:15,756][472573] Updated weights for policy 0, policy_version 24506 (0.0009) +[2026-06-07 02:55:16,306][472573] Updated weights for policy 0, policy_version 24516 (0.0008) +[2026-06-07 02:55:16,419][472573] Updated weights for policy 0, policy_version 24526 (0.0008) +[2026-06-07 02:55:16,536][472573] Updated weights for policy 0, policy_version 24537 (0.0008) +[2026-06-07 02:55:16,660][472573] Updated weights for policy 0, policy_version 24548 (0.0008) +[2026-06-07 02:55:16,772][472573] Updated weights for policy 0, policy_version 24558 (0.0008) +[2026-06-07 02:55:16,900][472573] Updated weights for policy 0, policy_version 24569 (0.0009) +[2026-06-07 02:55:17,457][472573] Updated weights for policy 0, policy_version 24579 (0.0008) +[2026-06-07 02:55:17,572][472573] Updated weights for policy 0, policy_version 24589 (0.0008) +[2026-06-07 02:55:17,681][472573] Updated weights for policy 0, policy_version 24599 (0.0008) +[2026-06-07 02:55:17,796][472573] Updated weights for policy 0, policy_version 24610 (0.0008) +[2026-06-07 02:55:17,913][472573] Updated weights for policy 0, policy_version 24620 (0.0008) +[2026-06-07 02:55:18,045][472573] Updated weights for policy 0, policy_version 24631 (0.0008) +[2026-06-07 02:55:18,103][464932] Fps is (10 sec: 26214.3, 60 sec: 27852.8, 300 sec: 28325.1). Total num frames: 12582912. Throughput: 0: 28407.4. Samples: 12624128. Policy #0 lag: (min: 6.0, avg: 18.2, max: 70.0) +[2026-06-07 02:55:18,104][464932] Avg episode reward: [(0, '816.702')] +[2026-06-07 02:55:18,621][472573] Updated weights for policy 0, policy_version 24644 (0.0008) +[2026-06-07 02:55:18,769][472573] Updated weights for policy 0, policy_version 24657 (0.0008) +[2026-06-07 02:55:18,888][472573] Updated weights for policy 0, policy_version 24668 (0.0008) +[2026-06-07 02:55:18,999][472573] Updated weights for policy 0, policy_version 24678 (0.0008) +[2026-06-07 02:55:19,115][472573] Updated weights for policy 0, policy_version 24688 (0.0008) +[2026-06-07 02:55:19,257][472573] Updated weights for policy 0, policy_version 24700 (0.0009) +[2026-06-07 02:55:19,825][472573] Updated weights for policy 0, policy_version 24712 (0.0008) +[2026-06-07 02:55:19,962][472573] Updated weights for policy 0, policy_version 24724 (0.0008) +[2026-06-07 02:55:20,077][472573] Updated weights for policy 0, policy_version 24734 (0.0008) +[2026-06-07 02:55:20,188][472573] Updated weights for policy 0, policy_version 24744 (0.0008) +[2026-06-07 02:55:20,326][472573] Updated weights for policy 0, policy_version 24756 (0.0008) +[2026-06-07 02:55:20,440][472573] Updated weights for policy 0, policy_version 24766 (0.0009) +[2026-06-07 02:55:20,997][472573] Updated weights for policy 0, policy_version 24777 (0.0008) +[2026-06-07 02:55:21,108][472573] Updated weights for policy 0, policy_version 24787 (0.0008) +[2026-06-07 02:55:21,228][472573] Updated weights for policy 0, policy_version 24798 (0.0008) +[2026-06-07 02:55:21,340][472573] Updated weights for policy 0, policy_version 24808 (0.0008) +[2026-06-07 02:55:21,456][472573] Updated weights for policy 0, policy_version 24818 (0.0008) +[2026-06-07 02:55:21,569][472573] Updated weights for policy 0, policy_version 24828 (0.0008) +[2026-06-07 02:55:22,125][472573] Updated weights for policy 0, policy_version 24840 (0.0008) +[2026-06-07 02:55:22,242][472573] Updated weights for policy 0, policy_version 24850 (0.0008) +[2026-06-07 02:55:22,355][472573] Updated weights for policy 0, policy_version 24860 (0.0008) +[2026-06-07 02:55:22,488][472573] Updated weights for policy 0, policy_version 24872 (0.0008) +[2026-06-07 02:55:22,622][472573] Updated weights for policy 0, policy_version 24884 (0.0008) +[2026-06-07 02:55:22,756][472573] Updated weights for policy 0, policy_version 24896 (0.0008) +[2026-06-07 02:55:23,103][464932] Fps is (10 sec: 29491.5, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 12746752. Throughput: 0: 28319.3. Samples: 12702208. Policy #0 lag: (min: 63.0, avg: 73.9, max: 127.0) +[2026-06-07 02:55:23,104][464932] Avg episode reward: [(0, '817.873')] +[2026-06-07 02:55:23,109][472028] Saving new best policy, reward=817.873! +[2026-06-07 02:55:23,316][472573] Updated weights for policy 0, policy_version 24907 (0.0008) +[2026-06-07 02:55:23,438][472573] Updated weights for policy 0, policy_version 24918 (0.0008) +[2026-06-07 02:55:23,556][472573] Updated weights for policy 0, policy_version 24928 (0.0008) +[2026-06-07 02:55:23,699][472573] Updated weights for policy 0, policy_version 24941 (0.0008) +[2026-06-07 02:55:23,819][472573] Updated weights for policy 0, policy_version 24952 (0.0008) +[2026-06-07 02:55:24,386][472573] Updated weights for policy 0, policy_version 24963 (0.0008) +[2026-06-07 02:55:24,521][472573] Updated weights for policy 0, policy_version 24975 (0.0008) +[2026-06-07 02:55:24,655][472573] Updated weights for policy 0, policy_version 24987 (0.0008) +[2026-06-07 02:55:24,768][472573] Updated weights for policy 0, policy_version 24997 (0.0008) +[2026-06-07 02:55:24,881][472573] Updated weights for policy 0, policy_version 25007 (0.0008) +[2026-06-07 02:55:24,995][472573] Updated weights for policy 0, policy_version 25017 (0.0008) +[2026-06-07 02:55:25,553][472573] Updated weights for policy 0, policy_version 25028 (0.0008) +[2026-06-07 02:55:25,670][472573] Updated weights for policy 0, policy_version 25038 (0.0008) +[2026-06-07 02:55:25,791][472573] Updated weights for policy 0, policy_version 25049 (0.0008) +[2026-06-07 02:55:25,897][472573] Updated weights for policy 0, policy_version 25059 (0.0008) +[2026-06-07 02:55:26,008][472573] Updated weights for policy 0, policy_version 25069 (0.0008) +[2026-06-07 02:55:26,127][472573] Updated weights for policy 0, policy_version 25080 (0.0008) +[2026-06-07 02:55:26,685][472573] Updated weights for policy 0, policy_version 25090 (0.0008) +[2026-06-07 02:55:26,794][472573] Updated weights for policy 0, policy_version 25100 (0.0008) +[2026-06-07 02:55:26,907][472573] Updated weights for policy 0, policy_version 25110 (0.0008) +[2026-06-07 02:55:27,049][472573] Updated weights for policy 0, policy_version 25123 (0.0008) +[2026-06-07 02:55:27,160][472573] Updated weights for policy 0, policy_version 25133 (0.0008) +[2026-06-07 02:55:27,275][472573] Updated weights for policy 0, policy_version 25143 (0.0008) +[2026-06-07 02:55:27,863][472573] Updated weights for policy 0, policy_version 25154 (0.0008) +[2026-06-07 02:55:27,982][472573] Updated weights for policy 0, policy_version 25165 (0.0008) +[2026-06-07 02:55:28,103][472573] Updated weights for policy 0, policy_version 25176 (0.0006) +[2026-06-07 02:55:28,103][464932] Fps is (10 sec: 29490.6, 60 sec: 28398.8, 300 sec: 28324.9). Total num frames: 12877824. Throughput: 0: 28324.8. Samples: 12877824. Policy #0 lag: (min: 63.0, avg: 73.9, max: 127.0) +[2026-06-07 02:55:28,105][464932] Avg episode reward: [(0, '848.877')] +[2026-06-07 02:55:28,232][472573] Updated weights for policy 0, policy_version 25188 (0.0009) +[2026-06-07 02:55:28,367][472573] Updated weights for policy 0, policy_version 25201 (0.0011) +[2026-06-07 02:55:28,499][472573] Updated weights for policy 0, policy_version 25213 (0.0010) +[2026-06-07 02:55:28,527][472028] Saving new best policy, reward=848.877! +[2026-06-07 02:55:29,081][472573] Updated weights for policy 0, policy_version 25225 (0.0008) +[2026-06-07 02:55:29,190][472573] Updated weights for policy 0, policy_version 25235 (0.0008) +[2026-06-07 02:55:29,309][472573] Updated weights for policy 0, policy_version 25246 (0.0008) +[2026-06-07 02:55:29,457][472573] Updated weights for policy 0, policy_version 25260 (0.0008) +[2026-06-07 02:55:29,575][472573] Updated weights for policy 0, policy_version 25271 (0.0008) +[2026-06-07 02:55:30,160][472573] Updated weights for policy 0, policy_version 25282 (0.0008) +[2026-06-07 02:55:30,277][472573] Updated weights for policy 0, policy_version 25293 (0.0008) +[2026-06-07 02:55:30,435][472573] Updated weights for policy 0, policy_version 25308 (0.0008) +[2026-06-07 02:55:30,577][472573] Updated weights for policy 0, policy_version 25321 (0.0008) +[2026-06-07 02:55:30,716][472573] Updated weights for policy 0, policy_version 25334 (0.0008) +[2026-06-07 02:55:31,308][472573] Updated weights for policy 0, policy_version 25346 (0.0008) +[2026-06-07 02:55:31,418][472573] Updated weights for policy 0, policy_version 25356 (0.0008) +[2026-06-07 02:55:31,568][472573] Updated weights for policy 0, policy_version 25370 (0.0008) +[2026-06-07 02:55:31,696][472573] Updated weights for policy 0, policy_version 25382 (0.0008) +[2026-06-07 02:55:31,813][472573] Updated weights for policy 0, policy_version 25393 (0.0008) +[2026-06-07 02:55:31,964][472573] Updated weights for policy 0, policy_version 25407 (0.0008) +[2026-06-07 02:55:32,542][472573] Updated weights for policy 0, policy_version 25419 (0.0007) +[2026-06-07 02:55:32,655][472573] Updated weights for policy 0, policy_version 25430 (0.0005) +[2026-06-07 02:55:32,803][472573] Updated weights for policy 0, policy_version 25444 (0.0004) +[2026-06-07 02:55:32,952][472573] Updated weights for policy 0, policy_version 25457 (0.0004) +[2026-06-07 02:55:33,055][472573] Updated weights for policy 0, policy_version 25467 (0.0004) +[2026-06-07 02:55:33,103][464932] Fps is (10 sec: 26214.4, 60 sec: 27852.8, 300 sec: 28324.9). Total num frames: 13008896. Throughput: 0: 28373.3. Samples: 13049216. Policy #0 lag: (min: 63.0, avg: 73.9, max: 127.0) +[2026-06-07 02:55:33,104][464932] Avg episode reward: [(0, '888.467')] +[2026-06-07 02:55:33,109][472028] Saving new best policy, reward=888.467! +[2026-06-07 02:55:33,636][472573] Updated weights for policy 0, policy_version 25479 (0.0006) +[2026-06-07 02:55:33,792][472573] Updated weights for policy 0, policy_version 25493 (0.0008) +[2026-06-07 02:55:33,912][472573] Updated weights for policy 0, policy_version 25504 (0.0008) +[2026-06-07 02:55:34,028][472573] Updated weights for policy 0, policy_version 25514 (0.0008) +[2026-06-07 02:55:34,151][472573] Updated weights for policy 0, policy_version 25525 (0.0008) +[2026-06-07 02:55:34,262][472573] Updated weights for policy 0, policy_version 25535 (0.0009) +[2026-06-07 02:55:34,818][472573] Updated weights for policy 0, policy_version 25546 (0.0009) +[2026-06-07 02:55:34,927][472573] Updated weights for policy 0, policy_version 25556 (0.0008) +[2026-06-07 02:55:35,060][472573] Updated weights for policy 0, policy_version 25568 (0.0009) +[2026-06-07 02:55:35,195][472573] Updated weights for policy 0, policy_version 25580 (0.0008) +[2026-06-07 02:55:35,314][472573] Updated weights for policy 0, policy_version 25591 (0.0008) +[2026-06-07 02:55:35,874][472573] Updated weights for policy 0, policy_version 25601 (0.0008) +[2026-06-07 02:55:36,001][472573] Updated weights for policy 0, policy_version 25612 (0.0008) +[2026-06-07 02:55:36,135][472573] Updated weights for policy 0, policy_version 25624 (0.0008) +[2026-06-07 02:55:36,261][472573] Updated weights for policy 0, policy_version 25635 (0.0008) +[2026-06-07 02:55:36,374][472573] Updated weights for policy 0, policy_version 25645 (0.0008) +[2026-06-07 02:55:36,509][472573] Updated weights for policy 0, policy_version 25657 (0.0008) +[2026-06-07 02:55:37,053][472573] Updated weights for policy 0, policy_version 25667 (0.0008) +[2026-06-07 02:55:37,169][472573] Updated weights for policy 0, policy_version 25677 (0.0009) +[2026-06-07 02:55:37,292][472573] Updated weights for policy 0, policy_version 25688 (0.0008) +[2026-06-07 02:55:37,398][472573] Updated weights for policy 0, policy_version 25698 (0.0008) +[2026-06-07 02:55:37,518][472573] Updated weights for policy 0, policy_version 25708 (0.0008) +[2026-06-07 02:55:37,642][472573] Updated weights for policy 0, policy_version 25719 (0.0008) +[2026-06-07 02:55:38,103][464932] Fps is (10 sec: 29491.9, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 13172736. Throughput: 0: 28384.7. Samples: 13129600. Policy #0 lag: (min: 6.0, avg: 17.7, max: 70.0) +[2026-06-07 02:55:38,105][464932] Avg episode reward: [(0, '882.873')] +[2026-06-07 02:55:38,212][472573] Updated weights for policy 0, policy_version 25730 (0.0008) +[2026-06-07 02:55:38,336][472573] Updated weights for policy 0, policy_version 25741 (0.0008) +[2026-06-07 02:55:38,469][472573] Updated weights for policy 0, policy_version 25753 (0.0008) +[2026-06-07 02:55:38,593][472573] Updated weights for policy 0, policy_version 25764 (0.0008) +[2026-06-07 02:55:38,724][472573] Updated weights for policy 0, policy_version 25776 (0.0008) +[2026-06-07 02:55:38,884][472573] Updated weights for policy 0, policy_version 25790 (0.0009) +[2026-06-07 02:55:39,426][472573] Updated weights for policy 0, policy_version 25800 (0.0007) +[2026-06-07 02:55:39,557][472573] Updated weights for policy 0, policy_version 25812 (0.0008) +[2026-06-07 02:55:39,667][472573] Updated weights for policy 0, policy_version 25822 (0.0008) +[2026-06-07 02:55:39,794][472573] Updated weights for policy 0, policy_version 25833 (0.0009) +[2026-06-07 02:55:39,905][472573] Updated weights for policy 0, policy_version 25843 (0.0008) +[2026-06-07 02:55:40,025][472573] Updated weights for policy 0, policy_version 25854 (0.0008) +[2026-06-07 02:55:40,579][472573] Updated weights for policy 0, policy_version 25864 (0.0007) +[2026-06-07 02:55:40,702][472573] Updated weights for policy 0, policy_version 25875 (0.0008) +[2026-06-07 02:55:40,829][472573] Updated weights for policy 0, policy_version 25886 (0.0008) +[2026-06-07 02:55:40,941][472573] Updated weights for policy 0, policy_version 25896 (0.0008) +[2026-06-07 02:55:41,052][472573] Updated weights for policy 0, policy_version 25906 (0.0008) +[2026-06-07 02:55:41,187][472573] Updated weights for policy 0, policy_version 25918 (0.0008) +[2026-06-07 02:55:41,742][472573] Updated weights for policy 0, policy_version 25930 (0.0007) +[2026-06-07 02:55:41,864][472573] Updated weights for policy 0, policy_version 25941 (0.0008) +[2026-06-07 02:55:41,986][472573] Updated weights for policy 0, policy_version 25952 (0.0008) +[2026-06-07 02:55:42,114][472573] Updated weights for policy 0, policy_version 25963 (0.0009) +[2026-06-07 02:55:42,229][472573] Updated weights for policy 0, policy_version 25973 (0.0008) +[2026-06-07 02:55:42,339][472573] Updated weights for policy 0, policy_version 25983 (0.0008) +[2026-06-07 02:55:42,901][472573] Updated weights for policy 0, policy_version 25995 (0.0008) +[2026-06-07 02:55:43,044][472573] Updated weights for policy 0, policy_version 26008 (0.0008) +[2026-06-07 02:55:43,103][464932] Fps is (10 sec: 29491.0, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 13303808. Throughput: 0: 28407.4. Samples: 13306496. Policy #0 lag: (min: 6.0, avg: 17.7, max: 70.0) +[2026-06-07 02:55:43,104][464932] Avg episode reward: [(0, '910.452')] +[2026-06-07 02:55:43,158][472573] Updated weights for policy 0, policy_version 26018 (0.0008) +[2026-06-07 02:55:43,270][472573] Updated weights for policy 0, policy_version 26028 (0.0008) +[2026-06-07 02:55:43,395][472573] Updated weights for policy 0, policy_version 26039 (0.0008) +[2026-06-07 02:55:43,495][472028] Saving new best policy, reward=910.452! +[2026-06-07 02:55:43,946][472573] Updated weights for policy 0, policy_version 26049 (0.0008) +[2026-06-07 02:55:44,061][472573] Updated weights for policy 0, policy_version 26059 (0.0008) +[2026-06-07 02:55:44,181][472573] Updated weights for policy 0, policy_version 26070 (0.0008) +[2026-06-07 02:55:44,330][472573] Updated weights for policy 0, policy_version 26083 (0.0008) +[2026-06-07 02:55:44,442][472573] Updated weights for policy 0, policy_version 26093 (0.0008) +[2026-06-07 02:55:44,563][472573] Updated weights for policy 0, policy_version 26104 (0.0008) +[2026-06-07 02:55:45,132][472573] Updated weights for policy 0, policy_version 26116 (0.0008) +[2026-06-07 02:55:45,242][472573] Updated weights for policy 0, policy_version 26126 (0.0008) +[2026-06-07 02:55:45,358][472573] Updated weights for policy 0, policy_version 26136 (0.0008) +[2026-06-07 02:55:45,504][472573] Updated weights for policy 0, policy_version 26149 (0.0008) +[2026-06-07 02:55:45,626][472573] Updated weights for policy 0, policy_version 26160 (0.0008) +[2026-06-07 02:55:45,754][472573] Updated weights for policy 0, policy_version 26171 (0.0008) +[2026-06-07 02:55:46,316][472573] Updated weights for policy 0, policy_version 26183 (0.0007) +[2026-06-07 02:55:46,427][472573] Updated weights for policy 0, policy_version 26193 (0.0008) +[2026-06-07 02:55:46,549][472573] Updated weights for policy 0, policy_version 26204 (0.0008) +[2026-06-07 02:55:46,714][472573] Updated weights for policy 0, policy_version 26218 (0.0008) +[2026-06-07 02:55:46,820][472573] Updated weights for policy 0, policy_version 26228 (0.0008) +[2026-06-07 02:55:46,935][472573] Updated weights for policy 0, policy_version 26238 (0.0008) +[2026-06-07 02:55:47,537][472573] Updated weights for policy 0, policy_version 26248 (0.0008) +[2026-06-07 02:55:47,645][472573] Updated weights for policy 0, policy_version 26258 (0.0008) +[2026-06-07 02:55:47,798][472573] Updated weights for policy 0, policy_version 26272 (0.0008) +[2026-06-07 02:55:47,908][472573] Updated weights for policy 0, policy_version 26282 (0.0008) +[2026-06-07 02:55:48,049][472573] Updated weights for policy 0, policy_version 26294 (0.0008) +[2026-06-07 02:55:48,103][464932] Fps is (10 sec: 26214.4, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 13434880. Throughput: 0: 28384.7. Samples: 13475584. Policy #0 lag: (min: 6.0, avg: 17.7, max: 70.0) +[2026-06-07 02:55:48,104][464932] Avg episode reward: [(0, '953.788')] +[2026-06-07 02:55:48,155][472028] Saving new best policy, reward=953.788! +[2026-06-07 02:55:48,612][472573] Updated weights for policy 0, policy_version 26305 (0.0009) +[2026-06-07 02:55:48,727][472573] Updated weights for policy 0, policy_version 26315 (0.0008) +[2026-06-07 02:55:48,857][472573] Updated weights for policy 0, policy_version 26327 (0.0008) +[2026-06-07 02:55:48,963][472573] Updated weights for policy 0, policy_version 26337 (0.0008) +[2026-06-07 02:55:49,080][472573] Updated weights for policy 0, policy_version 26347 (0.0008) +[2026-06-07 02:55:49,201][472573] Updated weights for policy 0, policy_version 26358 (0.0008) +[2026-06-07 02:55:49,795][472573] Updated weights for policy 0, policy_version 26370 (0.0008) +[2026-06-07 02:55:49,911][472573] Updated weights for policy 0, policy_version 26380 (0.0008) +[2026-06-07 02:55:50,050][472573] Updated weights for policy 0, policy_version 26393 (0.0008) +[2026-06-07 02:55:50,175][472573] Updated weights for policy 0, policy_version 26404 (0.0008) +[2026-06-07 02:55:50,297][472573] Updated weights for policy 0, policy_version 26415 (0.0008) +[2026-06-07 02:55:50,433][472573] Updated weights for policy 0, policy_version 26427 (0.0008) +[2026-06-07 02:55:50,989][472573] Updated weights for policy 0, policy_version 26437 (0.0008) +[2026-06-07 02:55:51,123][472573] Updated weights for policy 0, policy_version 26449 (0.0008) +[2026-06-07 02:55:51,243][472573] Updated weights for policy 0, policy_version 26460 (0.0008) +[2026-06-07 02:55:51,366][472573] Updated weights for policy 0, policy_version 26471 (0.0008) +[2026-06-07 02:55:51,477][472573] Updated weights for policy 0, policy_version 26481 (0.0008) +[2026-06-07 02:55:51,622][472573] Updated weights for policy 0, policy_version 26494 (0.0008) +[2026-06-07 02:55:52,187][472573] Updated weights for policy 0, policy_version 26506 (0.0008) +[2026-06-07 02:55:52,300][472573] Updated weights for policy 0, policy_version 26516 (0.0008) +[2026-06-07 02:55:52,426][472573] Updated weights for policy 0, policy_version 26527 (0.0008) +[2026-06-07 02:55:52,546][472573] Updated weights for policy 0, policy_version 26538 (0.0008) +[2026-06-07 02:55:52,668][472573] Updated weights for policy 0, policy_version 26549 (0.0008) +[2026-06-07 02:55:52,790][472573] Updated weights for policy 0, policy_version 26560 (0.0008) +[2026-06-07 02:55:53,103][464932] Fps is (10 sec: 29491.3, 60 sec: 28398.9, 300 sec: 28436.0). Total num frames: 13598720. Throughput: 0: 28336.4. Samples: 13552768. Policy #0 lag: (min: 6.0, avg: 17.7, max: 70.0) +[2026-06-07 02:55:53,104][464932] Avg episode reward: [(0, '954.848')] +[2026-06-07 02:55:53,108][472028] Saving new best policy, reward=954.848! +[2026-06-07 02:55:53,350][472573] Updated weights for policy 0, policy_version 26570 (0.0004) +[2026-06-07 02:55:53,476][472573] Updated weights for policy 0, policy_version 26581 (0.0004) +[2026-06-07 02:55:53,589][472573] Updated weights for policy 0, policy_version 26591 (0.0004) +[2026-06-07 02:55:53,708][472573] Updated weights for policy 0, policy_version 26602 (0.0004) +[2026-06-07 02:55:53,843][472573] Updated weights for policy 0, policy_version 26614 (0.0004) +[2026-06-07 02:55:54,379][472573] Updated weights for policy 0, policy_version 26625 (0.0004) +[2026-06-07 02:55:54,509][472573] Updated weights for policy 0, policy_version 26636 (0.0004) +[2026-06-07 02:55:54,623][472573] Updated weights for policy 0, policy_version 26646 (0.0004) +[2026-06-07 02:55:54,737][472573] Updated weights for policy 0, policy_version 26656 (0.0004) +[2026-06-07 02:55:54,859][472573] Updated weights for policy 0, policy_version 26667 (0.0008) +[2026-06-07 02:55:54,974][472573] Updated weights for policy 0, policy_version 26677 (0.0008) +[2026-06-07 02:55:55,541][472573] Updated weights for policy 0, policy_version 26690 (0.0008) +[2026-06-07 02:55:55,651][472573] Updated weights for policy 0, policy_version 26700 (0.0008) +[2026-06-07 02:55:55,768][472573] Updated weights for policy 0, policy_version 26711 (0.0008) +[2026-06-07 02:55:55,909][472573] Updated weights for policy 0, policy_version 26723 (0.0009) +[2026-06-07 02:55:56,025][472573] Updated weights for policy 0, policy_version 26733 (0.0009) +[2026-06-07 02:55:56,142][472573] Updated weights for policy 0, policy_version 26744 (0.0009) +[2026-06-07 02:55:56,692][472573] Updated weights for policy 0, policy_version 26754 (0.0008) +[2026-06-07 02:55:56,818][472573] Updated weights for policy 0, policy_version 26765 (0.0006) +[2026-06-07 02:55:56,924][472573] Updated weights for policy 0, policy_version 26775 (0.0008) +[2026-06-07 02:55:57,040][472573] Updated weights for policy 0, policy_version 26785 (0.0008) +[2026-06-07 02:55:57,164][472573] Updated weights for policy 0, policy_version 26796 (0.0009) +[2026-06-07 02:55:57,285][472573] Updated weights for policy 0, policy_version 26807 (0.0009) +[2026-06-07 02:55:57,861][472573] Updated weights for policy 0, policy_version 26819 (0.0009) +[2026-06-07 02:55:57,978][472573] Updated weights for policy 0, policy_version 26829 (0.0008) +[2026-06-07 02:55:58,081][472573] Updated weights for policy 0, policy_version 26839 (0.0008) +[2026-06-07 02:55:58,103][464932] Fps is (10 sec: 29490.7, 60 sec: 28398.8, 300 sec: 28324.9). Total num frames: 13729792. Throughput: 0: 28376.1. Samples: 13730560. Policy #0 lag: (min: 3.0, avg: 40.7, max: 67.0) +[2026-06-07 02:55:58,105][464932] Avg episode reward: [(0, '939.359')] +[2026-06-07 02:55:58,193][472573] Updated weights for policy 0, policy_version 26849 (0.0008) +[2026-06-07 02:55:58,340][472573] Updated weights for policy 0, policy_version 26862 (0.0009) +[2026-06-07 02:55:58,476][472573] Updated weights for policy 0, policy_version 26874 (0.0008) +[2026-06-07 02:55:59,041][472573] Updated weights for policy 0, policy_version 26884 (0.0006) +[2026-06-07 02:55:59,198][472573] Updated weights for policy 0, policy_version 26898 (0.0008) +[2026-06-07 02:55:59,311][472573] Updated weights for policy 0, policy_version 26908 (0.0008) +[2026-06-07 02:55:59,432][472573] Updated weights for policy 0, policy_version 26919 (0.0008) +[2026-06-07 02:55:59,551][472573] Updated weights for policy 0, policy_version 26930 (0.0008) +[2026-06-07 02:55:59,662][472573] Updated weights for policy 0, policy_version 26940 (0.0008) +[2026-06-07 02:56:00,234][472573] Updated weights for policy 0, policy_version 26951 (0.0008) +[2026-06-07 02:56:00,352][472573] Updated weights for policy 0, policy_version 26962 (0.0008) +[2026-06-07 02:56:00,477][472573] Updated weights for policy 0, policy_version 26973 (0.0008) +[2026-06-07 02:56:00,586][472573] Updated weights for policy 0, policy_version 26983 (0.0008) +[2026-06-07 02:56:00,697][472573] Updated weights for policy 0, policy_version 26993 (0.0008) +[2026-06-07 02:56:00,816][472573] Updated weights for policy 0, policy_version 27003 (0.0008) +[2026-06-07 02:56:01,361][472573] Updated weights for policy 0, policy_version 27013 (0.0006) +[2026-06-07 02:56:01,475][472573] Updated weights for policy 0, policy_version 27023 (0.0008) +[2026-06-07 02:56:01,600][472573] Updated weights for policy 0, policy_version 27034 (0.0008) +[2026-06-07 02:56:01,716][472573] Updated weights for policy 0, policy_version 27044 (0.0008) +[2026-06-07 02:56:01,838][472573] Updated weights for policy 0, policy_version 27055 (0.0008) +[2026-06-07 02:56:01,952][472573] Updated weights for policy 0, policy_version 27065 (0.0008) +[2026-06-07 02:56:02,501][472573] Updated weights for policy 0, policy_version 27076 (0.0007) +[2026-06-07 02:56:02,628][472573] Updated weights for policy 0, policy_version 27087 (0.0008) +[2026-06-07 02:56:02,738][472573] Updated weights for policy 0, policy_version 27097 (0.0010) +[2026-06-07 02:56:02,860][472573] Updated weights for policy 0, policy_version 27108 (0.0008) +[2026-06-07 02:56:02,976][472573] Updated weights for policy 0, policy_version 27118 (0.0008) +[2026-06-07 02:56:03,092][472573] Updated weights for policy 0, policy_version 27128 (0.0008) +[2026-06-07 02:56:03,103][464932] Fps is (10 sec: 26214.3, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 13860864. Throughput: 0: 28387.6. Samples: 13901568. Policy #0 lag: (min: 3.0, avg: 40.7, max: 67.0) +[2026-06-07 02:56:03,104][464932] Avg episode reward: [(0, '961.526')] +[2026-06-07 02:56:03,175][472028] Saving new best policy, reward=961.526! +[2026-06-07 02:56:03,653][472573] Updated weights for policy 0, policy_version 27138 (0.0008) +[2026-06-07 02:56:03,783][472573] Updated weights for policy 0, policy_version 27149 (0.0008) +[2026-06-07 02:56:03,890][472573] Updated weights for policy 0, policy_version 27159 (0.0008) +[2026-06-07 02:56:04,027][472573] Updated weights for policy 0, policy_version 27171 (0.0008) +[2026-06-07 02:56:04,142][472573] Updated weights for policy 0, policy_version 27182 (0.0008) +[2026-06-07 02:56:04,266][472573] Updated weights for policy 0, policy_version 27192 (0.0008) +[2026-06-07 02:56:04,842][472573] Updated weights for policy 0, policy_version 27202 (0.0008) +[2026-06-07 02:56:04,961][472573] Updated weights for policy 0, policy_version 27212 (0.0008) +[2026-06-07 02:56:05,092][472573] Updated weights for policy 0, policy_version 27224 (0.0008) +[2026-06-07 02:56:05,202][472573] Updated weights for policy 0, policy_version 27234 (0.0008) +[2026-06-07 02:56:05,347][472573] Updated weights for policy 0, policy_version 27247 (0.0008) +[2026-06-07 02:56:05,490][472573] Updated weights for policy 0, policy_version 27259 (0.0009) +[2026-06-07 02:56:06,047][472573] Updated weights for policy 0, policy_version 27269 (0.0007) +[2026-06-07 02:56:06,169][472573] Updated weights for policy 0, policy_version 27280 (0.0008) +[2026-06-07 02:56:06,291][472573] Updated weights for policy 0, policy_version 27292 (0.0008) +[2026-06-07 02:56:06,431][472573] Updated weights for policy 0, policy_version 27305 (0.0008) +[2026-06-07 02:56:06,561][472573] Updated weights for policy 0, policy_version 27317 (0.0008) +[2026-06-07 02:56:07,153][472573] Updated weights for policy 0, policy_version 27329 (0.0008) +[2026-06-07 02:56:07,286][472573] Updated weights for policy 0, policy_version 27342 (0.0008) +[2026-06-07 02:56:07,416][472573] Updated weights for policy 0, policy_version 27354 (0.0008) +[2026-06-07 02:56:07,534][472573] Updated weights for policy 0, policy_version 27365 (0.0008) +[2026-06-07 02:56:07,653][472573] Updated weights for policy 0, policy_version 27376 (0.0008) +[2026-06-07 02:56:07,774][472573] Updated weights for policy 0, policy_version 27387 (0.0008) +[2026-06-07 02:56:08,103][464932] Fps is (10 sec: 29491.2, 60 sec: 28398.9, 300 sec: 28435.9). Total num frames: 14024704. Throughput: 0: 28367.5. Samples: 13978752. Policy #0 lag: (min: 3.0, avg: 40.7, max: 67.0) +[2026-06-07 02:56:08,105][464932] Avg episode reward: [(0, '1043.559')] +[2026-06-07 02:56:08,110][472028] Saving new best policy, reward=1043.559! +[2026-06-07 02:56:08,387][472573] Updated weights for policy 0, policy_version 27400 (0.0008) +[2026-06-07 02:56:08,494][472573] Updated weights for policy 0, policy_version 27410 (0.0008) +[2026-06-07 02:56:08,600][472573] Updated weights for policy 0, policy_version 27420 (0.0008) +[2026-06-07 02:56:08,734][472573] Updated weights for policy 0, policy_version 27433 (0.0008) +[2026-06-07 02:56:08,883][472573] Updated weights for policy 0, policy_version 27447 (0.0008) +[2026-06-07 02:56:09,490][472573] Updated weights for policy 0, policy_version 27459 (0.0008) +[2026-06-07 02:56:09,615][472573] Updated weights for policy 0, policy_version 27471 (0.0008) +[2026-06-07 02:56:09,743][472573] Updated weights for policy 0, policy_version 27483 (0.0008) +[2026-06-07 02:56:09,853][472573] Updated weights for policy 0, policy_version 27493 (0.0008) +[2026-06-07 02:56:09,989][472573] Updated weights for policy 0, policy_version 27506 (0.0008) +[2026-06-07 02:56:10,123][472573] Updated weights for policy 0, policy_version 27518 (0.0008) +[2026-06-07 02:56:10,701][472573] Updated weights for policy 0, policy_version 27528 (0.0007) +[2026-06-07 02:56:10,831][472573] Updated weights for policy 0, policy_version 27540 (0.0008) +[2026-06-07 02:56:10,957][472573] Updated weights for policy 0, policy_version 27552 (0.0008) +[2026-06-07 02:56:11,058][472573] Updated weights for policy 0, policy_version 27562 (0.0008) +[2026-06-07 02:56:11,179][472573] Updated weights for policy 0, policy_version 27573 (0.0008) +[2026-06-07 02:56:11,799][472573] Updated weights for policy 0, policy_version 27586 (0.0008) +[2026-06-07 02:56:11,921][472573] Updated weights for policy 0, policy_version 27597 (0.0008) +[2026-06-07 02:56:12,041][472573] Updated weights for policy 0, policy_version 27608 (0.0008) +[2026-06-07 02:56:12,167][472573] Updated weights for policy 0, policy_version 27619 (0.0008) +[2026-06-07 02:56:12,279][472573] Updated weights for policy 0, policy_version 27629 (0.0008) +[2026-06-07 02:56:12,409][472573] Updated weights for policy 0, policy_version 27641 (0.0008) +[2026-06-07 02:56:12,963][472573] Updated weights for policy 0, policy_version 27652 (0.0008) +[2026-06-07 02:56:13,088][472573] Updated weights for policy 0, policy_version 27663 (0.0008) +[2026-06-07 02:56:13,103][464932] Fps is (10 sec: 29491.2, 60 sec: 28399.0, 300 sec: 28324.9). Total num frames: 14155776. Throughput: 0: 28350.7. Samples: 14153600. Policy #0 lag: (min: 3.0, avg: 40.7, max: 67.0) +[2026-06-07 02:56:13,104][464932] Avg episode reward: [(0, '1070.969')] +[2026-06-07 02:56:13,195][472573] Updated weights for policy 0, policy_version 27673 (0.0008) +[2026-06-07 02:56:13,312][472573] Updated weights for policy 0, policy_version 27683 (0.0008) +[2026-06-07 02:56:13,422][472573] Updated weights for policy 0, policy_version 27693 (0.0008) +[2026-06-07 02:56:13,562][472573] Updated weights for policy 0, policy_version 27705 (0.0008) +[2026-06-07 02:56:13,637][472028] Saving new best policy, reward=1070.969! +[2026-06-07 02:56:14,116][472573] Updated weights for policy 0, policy_version 27715 (0.0008) +[2026-06-07 02:56:14,248][472573] Updated weights for policy 0, policy_version 27727 (0.0008) +[2026-06-07 02:56:14,356][472573] Updated weights for policy 0, policy_version 27737 (0.0008) +[2026-06-07 02:56:14,487][472573] Updated weights for policy 0, policy_version 27748 (0.0008) +[2026-06-07 02:56:14,609][472573] Updated weights for policy 0, policy_version 27759 (0.0008) +[2026-06-07 02:56:14,743][472573] Updated weights for policy 0, policy_version 27771 (0.0008) +[2026-06-07 02:56:15,317][472573] Updated weights for policy 0, policy_version 27782 (0.0008) +[2026-06-07 02:56:15,437][472573] Updated weights for policy 0, policy_version 27793 (0.0008) +[2026-06-07 02:56:15,569][472573] Updated weights for policy 0, policy_version 27805 (0.0008) +[2026-06-07 02:56:15,685][472573] Updated weights for policy 0, policy_version 27815 (0.0008) +[2026-06-07 02:56:15,808][472573] Updated weights for policy 0, policy_version 27826 (0.0008) +[2026-06-07 02:56:15,926][472573] Updated weights for policy 0, policy_version 27836 (0.0008) +[2026-06-07 02:56:16,490][472573] Updated weights for policy 0, policy_version 27846 (0.0007) +[2026-06-07 02:56:16,611][472573] Updated weights for policy 0, policy_version 27857 (0.0008) +[2026-06-07 02:56:16,760][472573] Updated weights for policy 0, policy_version 27871 (0.0008) +[2026-06-07 02:56:16,886][472573] Updated weights for policy 0, policy_version 27882 (0.0008) +[2026-06-07 02:56:17,007][472573] Updated weights for policy 0, policy_version 27893 (0.0009) +[2026-06-07 02:56:17,133][472573] Updated weights for policy 0, policy_version 27904 (0.0008) +[2026-06-07 02:56:17,720][472573] Updated weights for policy 0, policy_version 27914 (0.0008) +[2026-06-07 02:56:17,853][472573] Updated weights for policy 0, policy_version 27926 (0.0008) +[2026-06-07 02:56:17,961][472573] Updated weights for policy 0, policy_version 27936 (0.0008) +[2026-06-07 02:56:18,103][464932] Fps is (10 sec: 26214.9, 60 sec: 28399.0, 300 sec: 28324.9). Total num frames: 14286848. Throughput: 0: 28416.0. Samples: 14327936. Policy #0 lag: (min: 63.0, avg: 75.2, max: 127.0) +[2026-06-07 02:56:18,104][464932] Avg episode reward: [(0, '1093.446')] +[2026-06-07 02:56:18,110][472573] Updated weights for policy 0, policy_version 27949 (0.0004) +[2026-06-07 02:56:18,225][472573] Updated weights for policy 0, policy_version 27959 (0.0009) +[2026-06-07 02:56:18,322][472028] Saving new best policy, reward=1093.446! +[2026-06-07 02:56:18,771][472573] Updated weights for policy 0, policy_version 27970 (0.0008) +[2026-06-07 02:56:18,889][472573] Updated weights for policy 0, policy_version 27980 (0.0008) +[2026-06-07 02:56:19,006][472573] Updated weights for policy 0, policy_version 27991 (0.0008) +[2026-06-07 02:56:19,119][472573] Updated weights for policy 0, policy_version 28001 (0.0008) +[2026-06-07 02:56:19,243][472573] Updated weights for policy 0, policy_version 28012 (0.0008) +[2026-06-07 02:56:19,392][472573] Updated weights for policy 0, policy_version 28025 (0.0008) +[2026-06-07 02:56:19,936][472573] Updated weights for policy 0, policy_version 28035 (0.0008) +[2026-06-07 02:56:20,059][472573] Updated weights for policy 0, policy_version 28046 (0.0008) +[2026-06-07 02:56:20,181][472573] Updated weights for policy 0, policy_version 28057 (0.0008) +[2026-06-07 02:56:20,319][472573] Updated weights for policy 0, policy_version 28069 (0.0008) +[2026-06-07 02:56:20,444][472573] Updated weights for policy 0, policy_version 28080 (0.0008) +[2026-06-07 02:56:20,575][472573] Updated weights for policy 0, policy_version 28092 (0.0008) +[2026-06-07 02:56:21,138][472573] Updated weights for policy 0, policy_version 28102 (0.0006) +[2026-06-07 02:56:21,264][472573] Updated weights for policy 0, policy_version 28113 (0.0007) +[2026-06-07 02:56:21,374][472573] Updated weights for policy 0, policy_version 28123 (0.0005) +[2026-06-07 02:56:21,500][472573] Updated weights for policy 0, policy_version 28134 (0.0006) +[2026-06-07 02:56:21,624][472573] Updated weights for policy 0, policy_version 28145 (0.0006) +[2026-06-07 02:56:21,737][472573] Updated weights for policy 0, policy_version 28155 (0.0005) +[2026-06-07 02:56:22,254][472573] Updated weights for policy 0, policy_version 28165 (0.0006) +[2026-06-07 02:56:22,376][472573] Updated weights for policy 0, policy_version 28176 (0.0008) +[2026-06-07 02:56:22,485][472573] Updated weights for policy 0, policy_version 28186 (0.0008) +[2026-06-07 02:56:22,597][472573] Updated weights for policy 0, policy_version 28196 (0.0008) +[2026-06-07 02:56:22,724][472573] Updated weights for policy 0, policy_version 28207 (0.0008) +[2026-06-07 02:56:22,838][472573] Updated weights for policy 0, policy_version 28217 (0.0008) +[2026-06-07 02:56:23,103][464932] Fps is (10 sec: 29491.2, 60 sec: 28398.9, 300 sec: 28436.0). Total num frames: 14450688. Throughput: 0: 28356.3. Samples: 14405632. Policy #0 lag: (min: 63.0, avg: 75.2, max: 127.0) +[2026-06-07 02:56:23,104][464932] Avg episode reward: [(0, '1083.362')] +[2026-06-07 02:56:23,399][472573] Updated weights for policy 0, policy_version 28227 (0.0008) +[2026-06-07 02:56:23,515][472573] Updated weights for policy 0, policy_version 28237 (0.0008) +[2026-06-07 02:56:23,622][472573] Updated weights for policy 0, policy_version 28247 (0.0008) +[2026-06-07 02:56:23,746][472573] Updated weights for policy 0, policy_version 28258 (0.0008) +[2026-06-07 02:56:23,871][472573] Updated weights for policy 0, policy_version 28269 (0.0008) +[2026-06-07 02:56:24,016][472573] Updated weights for policy 0, policy_version 28282 (0.0008) +[2026-06-07 02:56:24,583][472573] Updated weights for policy 0, policy_version 28294 (0.0008) +[2026-06-07 02:56:24,712][472573] Updated weights for policy 0, policy_version 28305 (0.0008) +[2026-06-07 02:56:24,832][472573] Updated weights for policy 0, policy_version 28316 (0.0008) +[2026-06-07 02:56:24,935][472573] Updated weights for policy 0, policy_version 28326 (0.0008) +[2026-06-07 02:56:25,054][472573] Updated weights for policy 0, policy_version 28336 (0.0008) +[2026-06-07 02:56:25,164][472573] Updated weights for policy 0, policy_version 28346 (0.0008) +[2026-06-07 02:56:25,741][472573] Updated weights for policy 0, policy_version 28358 (0.0006) +[2026-06-07 02:56:25,864][472573] Updated weights for policy 0, policy_version 28369 (0.0008) +[2026-06-07 02:56:25,986][472573] Updated weights for policy 0, policy_version 28380 (0.0008) +[2026-06-07 02:56:26,110][472573] Updated weights for policy 0, policy_version 28391 (0.0008) +[2026-06-07 02:56:26,237][472573] Updated weights for policy 0, policy_version 28402 (0.0008) +[2026-06-07 02:56:26,352][472573] Updated weights for policy 0, policy_version 28412 (0.0008) +[2026-06-07 02:56:26,899][472573] Updated weights for policy 0, policy_version 28422 (0.0007) +[2026-06-07 02:56:27,019][472573] Updated weights for policy 0, policy_version 28433 (0.0008) +[2026-06-07 02:56:27,141][472573] Updated weights for policy 0, policy_version 28444 (0.0008) +[2026-06-07 02:56:27,251][472573] Updated weights for policy 0, policy_version 28454 (0.0009) +[2026-06-07 02:56:27,393][472573] Updated weights for policy 0, policy_version 28466 (0.0008) +[2026-06-07 02:56:27,512][472573] Updated weights for policy 0, policy_version 28477 (0.0008) +[2026-06-07 02:56:28,048][472573] Updated weights for policy 0, policy_version 28487 (0.0005) +[2026-06-07 02:56:28,103][464932] Fps is (10 sec: 29490.6, 60 sec: 28399.0, 300 sec: 28324.9). Total num frames: 14581760. Throughput: 0: 28231.0. Samples: 14576896. Policy #0 lag: (min: 63.0, avg: 75.2, max: 127.0) +[2026-06-07 02:56:28,105][464932] Avg episode reward: [(0, '1068.906')] +[2026-06-07 02:56:28,171][472573] Updated weights for policy 0, policy_version 28498 (0.0006) +[2026-06-07 02:56:28,292][472573] Updated weights for policy 0, policy_version 28509 (0.0009) +[2026-06-07 02:56:28,426][472573] Updated weights for policy 0, policy_version 28521 (0.0008) +[2026-06-07 02:56:28,540][472573] Updated weights for policy 0, policy_version 28531 (0.0008) +[2026-06-07 02:56:28,650][472573] Updated weights for policy 0, policy_version 28541 (0.0008) +[2026-06-07 02:56:29,203][472573] Updated weights for policy 0, policy_version 28551 (0.0008) +[2026-06-07 02:56:29,312][472573] Updated weights for policy 0, policy_version 28561 (0.0009) +[2026-06-07 02:56:29,437][472573] Updated weights for policy 0, policy_version 28572 (0.0009) +[2026-06-07 02:56:29,550][472573] Updated weights for policy 0, policy_version 28582 (0.0008) +[2026-06-07 02:56:29,680][472573] Updated weights for policy 0, policy_version 28594 (0.0008) +[2026-06-07 02:56:29,796][472573] Updated weights for policy 0, policy_version 28604 (0.0008) +[2026-06-07 02:56:30,343][472573] Updated weights for policy 0, policy_version 28615 (0.0008) +[2026-06-07 02:56:30,453][472573] Updated weights for policy 0, policy_version 28625 (0.0008) +[2026-06-07 02:56:30,572][472573] Updated weights for policy 0, policy_version 28635 (0.0008) +[2026-06-07 02:56:30,740][472573] Updated weights for policy 0, policy_version 28650 (0.0008) +[2026-06-07 02:56:30,865][472573] Updated weights for policy 0, policy_version 28661 (0.0008) +[2026-06-07 02:56:31,452][472573] Updated weights for policy 0, policy_version 28673 (0.0008) +[2026-06-07 02:56:31,588][472573] Updated weights for policy 0, policy_version 28685 (0.0008) +[2026-06-07 02:56:31,691][472573] Updated weights for policy 0, policy_version 28695 (0.0009) +[2026-06-07 02:56:31,824][472573] Updated weights for policy 0, policy_version 28706 (0.0008) +[2026-06-07 02:56:31,926][472573] Updated weights for policy 0, policy_version 28716 (0.0009) +[2026-06-07 02:56:32,046][472573] Updated weights for policy 0, policy_version 28726 (0.0009) +[2026-06-07 02:56:32,608][472573] Updated weights for policy 0, policy_version 28737 (0.0008) +[2026-06-07 02:56:32,722][472573] Updated weights for policy 0, policy_version 28747 (0.0008) +[2026-06-07 02:56:32,836][472573] Updated weights for policy 0, policy_version 28757 (0.0008) +[2026-06-07 02:56:32,955][472573] Updated weights for policy 0, policy_version 28767 (0.0008) +[2026-06-07 02:56:33,057][472573] Updated weights for policy 0, policy_version 28777 (0.0008) +[2026-06-07 02:56:33,103][464932] Fps is (10 sec: 26214.7, 60 sec: 28399.0, 300 sec: 28324.9). Total num frames: 14712832. Throughput: 0: 28393.3. Samples: 14753280. Policy #0 lag: (min: 63.0, avg: 75.2, max: 127.0) +[2026-06-07 02:56:33,104][464932] Avg episode reward: [(0, '1098.618')] +[2026-06-07 02:56:33,186][472573] Updated weights for policy 0, policy_version 28788 (0.0008) +[2026-06-07 02:56:33,307][472573] Updated weights for policy 0, policy_version 28799 (0.0008) +[2026-06-07 02:56:33,323][472028] Saving new best policy, reward=1098.618! +[2026-06-07 02:56:33,864][472573] Updated weights for policy 0, policy_version 28809 (0.0006) +[2026-06-07 02:56:33,977][472573] Updated weights for policy 0, policy_version 28819 (0.0008) +[2026-06-07 02:56:34,088][472573] Updated weights for policy 0, policy_version 28829 (0.0008) +[2026-06-07 02:56:34,212][472573] Updated weights for policy 0, policy_version 28840 (0.0008) +[2026-06-07 02:56:34,338][472573] Updated weights for policy 0, policy_version 28851 (0.0008) +[2026-06-07 02:56:34,453][472573] Updated weights for policy 0, policy_version 28861 (0.0008) +[2026-06-07 02:56:35,008][472573] Updated weights for policy 0, policy_version 28872 (0.0008) +[2026-06-07 02:56:35,125][472573] Updated weights for policy 0, policy_version 28883 (0.0008) +[2026-06-07 02:56:35,236][472573] Updated weights for policy 0, policy_version 28893 (0.0008) +[2026-06-07 02:56:35,349][472573] Updated weights for policy 0, policy_version 28903 (0.0008) +[2026-06-07 02:56:35,486][472573] Updated weights for policy 0, policy_version 28915 (0.0008) +[2026-06-07 02:56:35,608][472573] Updated weights for policy 0, policy_version 28926 (0.0008) +[2026-06-07 02:56:36,154][472573] Updated weights for policy 0, policy_version 28937 (0.0008) +[2026-06-07 02:56:36,274][472573] Updated weights for policy 0, policy_version 28947 (0.0008) +[2026-06-07 02:56:36,385][472573] Updated weights for policy 0, policy_version 28957 (0.0008) +[2026-06-07 02:56:36,499][472573] Updated weights for policy 0, policy_version 28967 (0.0008) +[2026-06-07 02:56:36,619][472573] Updated weights for policy 0, policy_version 28978 (0.0008) +[2026-06-07 02:56:36,742][472573] Updated weights for policy 0, policy_version 28989 (0.0008) +[2026-06-07 02:56:37,291][472573] Updated weights for policy 0, policy_version 28999 (0.0007) +[2026-06-07 02:56:37,395][472573] Updated weights for policy 0, policy_version 29009 (0.0008) +[2026-06-07 02:56:37,523][472573] Updated weights for policy 0, policy_version 29020 (0.0009) +[2026-06-07 02:56:37,644][472573] Updated weights for policy 0, policy_version 29031 (0.0008) +[2026-06-07 02:56:37,757][472573] Updated weights for policy 0, policy_version 29041 (0.0008) +[2026-06-07 02:56:37,870][472573] Updated weights for policy 0, policy_version 29051 (0.0008) +[2026-06-07 02:56:38,103][464932] Fps is (10 sec: 29491.8, 60 sec: 28399.0, 300 sec: 28436.0). Total num frames: 14876672. Throughput: 0: 28393.3. Samples: 14830464. Policy #0 lag: (min: 63.0, avg: 74.4, max: 127.0) +[2026-06-07 02:56:38,104][464932] Avg episode reward: [(0, '1057.532')] +[2026-06-07 02:56:38,421][472573] Updated weights for policy 0, policy_version 29061 (0.0008) +[2026-06-07 02:56:38,541][472573] Updated weights for policy 0, policy_version 29072 (0.0008) +[2026-06-07 02:56:38,645][472573] Updated weights for policy 0, policy_version 29082 (0.0008) +[2026-06-07 02:56:38,764][472573] Updated weights for policy 0, policy_version 29092 (0.0008) +[2026-06-07 02:56:38,876][472573] Updated weights for policy 0, policy_version 29102 (0.0008) +[2026-06-07 02:56:38,982][472573] Updated weights for policy 0, policy_version 29112 (0.0008) +[2026-06-07 02:56:39,533][472573] Updated weights for policy 0, policy_version 29122 (0.0008) +[2026-06-07 02:56:39,646][472573] Updated weights for policy 0, policy_version 29132 (0.0008) +[2026-06-07 02:56:39,755][472573] Updated weights for policy 0, policy_version 29142 (0.0008) +[2026-06-07 02:56:39,879][472573] Updated weights for policy 0, policy_version 29153 (0.0008) +[2026-06-07 02:56:39,990][472573] Updated weights for policy 0, policy_version 29163 (0.0008) +[2026-06-07 02:56:40,113][472573] Updated weights for policy 0, policy_version 29174 (0.0008) +[2026-06-07 02:56:40,223][472573] Updated weights for policy 0, policy_version 29184 (0.0008) +[2026-06-07 02:56:40,789][472573] Updated weights for policy 0, policy_version 29195 (0.0008) +[2026-06-07 02:56:40,906][472573] Updated weights for policy 0, policy_version 29206 (0.0008) +[2026-06-07 02:56:41,018][472573] Updated weights for policy 0, policy_version 29216 (0.0008) +[2026-06-07 02:56:41,128][472573] Updated weights for policy 0, policy_version 29226 (0.0008) +[2026-06-07 02:56:41,254][472573] Updated weights for policy 0, policy_version 29237 (0.0008) +[2026-06-07 02:56:41,835][472573] Updated weights for policy 0, policy_version 29249 (0.0008) +[2026-06-07 02:56:41,949][472573] Updated weights for policy 0, policy_version 29259 (0.0008) +[2026-06-07 02:56:42,072][472573] Updated weights for policy 0, policy_version 29270 (0.0008) +[2026-06-07 02:56:42,188][472573] Updated weights for policy 0, policy_version 29280 (0.0008) +[2026-06-07 02:56:42,299][472573] Updated weights for policy 0, policy_version 29290 (0.0008) +[2026-06-07 02:56:42,436][472573] Updated weights for policy 0, policy_version 29302 (0.0008) +[2026-06-07 02:56:43,002][472573] Updated weights for policy 0, policy_version 29314 (0.0008) +[2026-06-07 02:56:43,103][464932] Fps is (10 sec: 29491.0, 60 sec: 28399.0, 300 sec: 28324.9). Total num frames: 15007744. Throughput: 0: 28262.5. Samples: 15002368. Policy #0 lag: (min: 63.0, avg: 74.4, max: 127.0) +[2026-06-07 02:56:43,104][464932] Avg episode reward: [(0, '1049.991')] +[2026-06-07 02:56:43,132][472573] Updated weights for policy 0, policy_version 29326 (0.0006) +[2026-06-07 02:56:43,247][472573] Updated weights for policy 0, policy_version 29336 (0.0008) +[2026-06-07 02:56:43,356][472573] Updated weights for policy 0, policy_version 29346 (0.0008) +[2026-06-07 02:56:43,482][472573] Updated weights for policy 0, policy_version 29357 (0.0008) +[2026-06-07 02:56:43,596][472573] Updated weights for policy 0, policy_version 29367 (0.0008) +[2026-06-07 02:56:44,150][472573] Updated weights for policy 0, policy_version 29377 (0.0008) +[2026-06-07 02:56:44,261][472573] Updated weights for policy 0, policy_version 29387 (0.0008) +[2026-06-07 02:56:44,392][472573] Updated weights for policy 0, policy_version 29399 (0.0008) +[2026-06-07 02:56:44,514][472573] Updated weights for policy 0, policy_version 29410 (0.0008) +[2026-06-07 02:56:44,628][472573] Updated weights for policy 0, policy_version 29420 (0.0008) +[2026-06-07 02:56:44,766][472573] Updated weights for policy 0, policy_version 29432 (0.0009) +[2026-06-07 02:56:45,322][472573] Updated weights for policy 0, policy_version 29443 (0.0008) +[2026-06-07 02:56:45,439][472573] Updated weights for policy 0, policy_version 29453 (0.0008) +[2026-06-07 02:56:45,588][472573] Updated weights for policy 0, policy_version 29466 (0.0009) +[2026-06-07 02:56:45,698][472573] Updated weights for policy 0, policy_version 29476 (0.0008) +[2026-06-07 02:56:45,824][472573] Updated weights for policy 0, policy_version 29487 (0.0008) +[2026-06-07 02:56:45,960][472573] Updated weights for policy 0, policy_version 29499 (0.0009) +[2026-06-07 02:56:46,497][472573] Updated weights for policy 0, policy_version 29509 (0.0008) +[2026-06-07 02:56:46,611][472573] Updated weights for policy 0, policy_version 29520 (0.0008) +[2026-06-07 02:56:46,726][472573] Updated weights for policy 0, policy_version 29530 (0.0008) +[2026-06-07 02:56:46,837][472573] Updated weights for policy 0, policy_version 29540 (0.0008) +[2026-06-07 02:56:46,959][472573] Updated weights for policy 0, policy_version 29551 (0.0008) +[2026-06-07 02:56:47,072][472573] Updated weights for policy 0, policy_version 29561 (0.0008) +[2026-06-07 02:56:47,654][472573] Updated weights for policy 0, policy_version 29573 (0.0008) +[2026-06-07 02:56:47,776][472573] Updated weights for policy 0, policy_version 29584 (0.0008) +[2026-06-07 02:56:47,901][472573] Updated weights for policy 0, policy_version 29595 (0.0008) +[2026-06-07 02:56:48,033][472573] Updated weights for policy 0, policy_version 29607 (0.0008) +[2026-06-07 02:56:48,103][464932] Fps is (10 sec: 26214.4, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 15138816. Throughput: 0: 28387.6. Samples: 15179008. Policy #0 lag: (min: 63.0, avg: 74.4, max: 127.0) +[2026-06-07 02:56:48,104][464932] Avg episode reward: [(0, '999.666')] +[2026-06-07 02:56:48,147][472573] Updated weights for policy 0, policy_version 29617 (0.0008) +[2026-06-07 02:56:48,220][472028] Early stopping after 7 epochs (56 sgd steps), loss delta 0.0000008 +[2026-06-07 02:56:48,745][472573] Updated weights for policy 0, policy_version 29630 (0.0007) +[2026-06-07 02:56:48,855][472573] Updated weights for policy 0, policy_version 29640 (0.0008) +[2026-06-07 02:56:48,971][472573] Updated weights for policy 0, policy_version 29650 (0.0008) +[2026-06-07 02:56:49,101][472573] Updated weights for policy 0, policy_version 29662 (0.0009) +[2026-06-07 02:56:49,236][472573] Updated weights for policy 0, policy_version 29674 (0.0008) +[2026-06-07 02:56:49,362][472573] Updated weights for policy 0, policy_version 29685 (0.0008) +[2026-06-07 02:56:49,922][472573] Updated weights for policy 0, policy_version 29695 (0.0007) +[2026-06-07 02:56:50,056][472573] Updated weights for policy 0, policy_version 29707 (0.0008) +[2026-06-07 02:56:50,177][472573] Updated weights for policy 0, policy_version 29718 (0.0008) +[2026-06-07 02:56:50,300][472573] Updated weights for policy 0, policy_version 29729 (0.0008) +[2026-06-07 02:56:50,427][472573] Updated weights for policy 0, policy_version 29740 (0.0008) +[2026-06-07 02:56:50,542][472573] Updated weights for policy 0, policy_version 29750 (0.0008) +[2026-06-07 02:56:51,081][472573] Updated weights for policy 0, policy_version 29760 (0.0008) +[2026-06-07 02:56:51,198][472573] Updated weights for policy 0, policy_version 29770 (0.0008) +[2026-06-07 02:56:51,317][472573] Updated weights for policy 0, policy_version 29781 (0.0008) +[2026-06-07 02:56:51,432][472573] Updated weights for policy 0, policy_version 29791 (0.0008) +[2026-06-07 02:56:51,548][472573] Updated weights for policy 0, policy_version 29801 (0.0008) +[2026-06-07 02:56:51,658][472573] Updated weights for policy 0, policy_version 29811 (0.0008) +[2026-06-07 02:56:52,203][472573] Updated weights for policy 0, policy_version 29821 (0.0008) +[2026-06-07 02:56:52,325][472573] Updated weights for policy 0, policy_version 29832 (0.0008) +[2026-06-07 02:56:52,433][472573] Updated weights for policy 0, policy_version 29842 (0.0009) +[2026-06-07 02:56:52,556][472573] Updated weights for policy 0, policy_version 29853 (0.0008) +[2026-06-07 02:56:52,671][472573] Updated weights for policy 0, policy_version 29863 (0.0008) +[2026-06-07 02:56:52,812][472573] Updated weights for policy 0, policy_version 29875 (0.0008) +[2026-06-07 02:56:53,103][464932] Fps is (10 sec: 29491.3, 60 sec: 28399.0, 300 sec: 28436.0). Total num frames: 15302656. Throughput: 0: 28421.8. Samples: 15257728. Policy #0 lag: (min: 63.0, avg: 74.4, max: 127.0) +[2026-06-07 02:56:53,104][464932] Avg episode reward: [(0, '999.534')] +[2026-06-07 02:56:53,357][472573] Updated weights for policy 0, policy_version 29885 (0.0008) +[2026-06-07 02:56:53,468][472573] Updated weights for policy 0, policy_version 29895 (0.0008) +[2026-06-07 02:56:53,584][472573] Updated weights for policy 0, policy_version 29905 (0.0008) +[2026-06-07 02:56:53,693][472573] Updated weights for policy 0, policy_version 29915 (0.0008) +[2026-06-07 02:56:53,816][472573] Updated weights for policy 0, policy_version 29926 (0.0008) +[2026-06-07 02:56:53,939][472573] Updated weights for policy 0, policy_version 29937 (0.0008) +[2026-06-07 02:56:54,490][472573] Updated weights for policy 0, policy_version 29947 (0.0008) +[2026-06-07 02:56:54,613][472573] Updated weights for policy 0, policy_version 29958 (0.0008) +[2026-06-07 02:56:54,736][472573] Updated weights for policy 0, policy_version 29969 (0.0008) +[2026-06-07 02:56:54,852][472573] Updated weights for policy 0, policy_version 29979 (0.0008) +[2026-06-07 02:56:54,966][472573] Updated weights for policy 0, policy_version 29989 (0.0008) +[2026-06-07 02:56:55,104][472573] Updated weights for policy 0, policy_version 30001 (0.0009) +[2026-06-07 02:56:55,636][472573] Updated weights for policy 0, policy_version 30011 (0.0008) +[2026-06-07 02:56:55,754][472573] Updated weights for policy 0, policy_version 30021 (0.0008) +[2026-06-07 02:56:55,867][472573] Updated weights for policy 0, policy_version 30031 (0.0008) +[2026-06-07 02:56:55,994][472573] Updated weights for policy 0, policy_version 30042 (0.0008) +[2026-06-07 02:56:56,104][472573] Updated weights for policy 0, policy_version 30052 (0.0008) +[2026-06-07 02:56:56,221][472573] Updated weights for policy 0, policy_version 30063 (0.0008) +[2026-06-07 02:56:56,772][472573] Updated weights for policy 0, policy_version 30073 (0.0008) +[2026-06-07 02:56:56,883][472573] Updated weights for policy 0, policy_version 30083 (0.0008) +[2026-06-07 02:56:57,029][472573] Updated weights for policy 0, policy_version 30096 (0.0009) +[2026-06-07 02:56:57,139][472573] Updated weights for policy 0, policy_version 30106 (0.0008) +[2026-06-07 02:56:57,268][472573] Updated weights for policy 0, policy_version 30117 (0.0008) +[2026-06-07 02:56:57,404][472573] Updated weights for policy 0, policy_version 30129 (0.0009) +[2026-06-07 02:56:57,959][472573] Updated weights for policy 0, policy_version 30139 (0.0008) +[2026-06-07 02:56:58,080][472573] Updated weights for policy 0, policy_version 30150 (0.0008) +[2026-06-07 02:56:58,103][464932] Fps is (10 sec: 29490.6, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 15433728. Throughput: 0: 28407.4. Samples: 15431936. Policy #0 lag: (min: 63.0, avg: 74.9, max: 127.0) +[2026-06-07 02:56:58,105][464932] Avg episode reward: [(0, '981.917')] +[2026-06-07 02:56:58,187][472573] Updated weights for policy 0, policy_version 30160 (0.0008) +[2026-06-07 02:56:58,303][472573] Updated weights for policy 0, policy_version 30170 (0.0008) +[2026-06-07 02:56:58,427][472573] Updated weights for policy 0, policy_version 30181 (0.0008) +[2026-06-07 02:56:58,539][472573] Updated weights for policy 0, policy_version 30191 (0.0008) +[2026-06-07 02:56:58,634][472028] Saving results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs1_seed13/checkpoint_p0/checkpoint_000030200_15466496.pth... +[2026-06-07 02:56:59,102][472573] Updated weights for policy 0, policy_version 30202 (0.0008) +[2026-06-07 02:56:59,214][472573] Updated weights for policy 0, policy_version 30212 (0.0008) +[2026-06-07 02:56:59,322][472573] Updated weights for policy 0, policy_version 30222 (0.0008) +[2026-06-07 02:56:59,446][472573] Updated weights for policy 0, policy_version 30233 (0.0008) +[2026-06-07 02:56:59,555][472573] Updated weights for policy 0, policy_version 30243 (0.0008) +[2026-06-07 02:56:59,670][472573] Updated weights for policy 0, policy_version 30253 (0.0008) +[2026-06-07 02:56:59,782][472573] Updated weights for policy 0, policy_version 30263 (0.0008) +[2026-06-07 02:57:00,312][472573] Updated weights for policy 0, policy_version 30273 (0.0007) +[2026-06-07 02:57:00,448][472573] Updated weights for policy 0, policy_version 30285 (0.0008) +[2026-06-07 02:57:00,559][472573] Updated weights for policy 0, policy_version 30295 (0.0008) +[2026-06-07 02:57:00,680][472573] Updated weights for policy 0, policy_version 30306 (0.0008) +[2026-06-07 02:57:00,811][472573] Updated weights for policy 0, policy_version 30318 (0.0008) +[2026-06-07 02:57:00,925][472573] Updated weights for policy 0, policy_version 30328 (0.0008) +[2026-06-07 02:57:01,490][472573] Updated weights for policy 0, policy_version 30338 (0.0008) +[2026-06-07 02:57:01,616][472573] Updated weights for policy 0, policy_version 30350 (0.0008) +[2026-06-07 02:57:01,735][472573] Updated weights for policy 0, policy_version 30360 (0.0008) +[2026-06-07 02:57:01,857][472573] Updated weights for policy 0, policy_version 30371 (0.0008) +[2026-06-07 02:57:01,981][472573] Updated weights for policy 0, policy_version 30382 (0.0008) +[2026-06-07 02:57:02,090][472573] Updated weights for policy 0, policy_version 30392 (0.0008) +[2026-06-07 02:57:02,655][472573] Updated weights for policy 0, policy_version 30402 (0.0007) +[2026-06-07 02:57:02,783][472573] Updated weights for policy 0, policy_version 30413 (0.0008) +[2026-06-07 02:57:02,903][472573] Updated weights for policy 0, policy_version 30424 (0.0008) +[2026-06-07 02:57:03,017][472573] Updated weights for policy 0, policy_version 30434 (0.0008) +[2026-06-07 02:57:03,103][464932] Fps is (10 sec: 26214.4, 60 sec: 28399.0, 300 sec: 28324.9). Total num frames: 15564800. Throughput: 0: 28421.7. Samples: 15606912. Policy #0 lag: (min: 63.0, avg: 74.9, max: 127.0) +[2026-06-07 02:57:03,104][464932] Avg episode reward: [(0, '1020.196')] +[2026-06-07 02:57:03,137][472573] Updated weights for policy 0, policy_version 30445 (0.0008) +[2026-06-07 02:57:03,254][472573] Updated weights for policy 0, policy_version 30455 (0.0008) +[2026-06-07 02:57:03,804][472573] Updated weights for policy 0, policy_version 30466 (0.0008) +[2026-06-07 02:57:03,914][472573] Updated weights for policy 0, policy_version 30476 (0.0008) +[2026-06-07 02:57:04,027][472573] Updated weights for policy 0, policy_version 30486 (0.0008) +[2026-06-07 02:57:04,155][472573] Updated weights for policy 0, policy_version 30497 (0.0008) +[2026-06-07 02:57:04,278][472573] Updated weights for policy 0, policy_version 30508 (0.0008) +[2026-06-07 02:57:04,408][472573] Updated weights for policy 0, policy_version 30520 (0.0008) +[2026-06-07 02:57:04,987][472573] Updated weights for policy 0, policy_version 30530 (0.0008) +[2026-06-07 02:57:05,111][472573] Updated weights for policy 0, policy_version 30541 (0.0008) +[2026-06-07 02:57:05,238][472573] Updated weights for policy 0, policy_version 30552 (0.0008) +[2026-06-07 02:57:05,354][472573] Updated weights for policy 0, policy_version 30563 (0.0008) +[2026-06-07 02:57:05,470][472573] Updated weights for policy 0, policy_version 30573 (0.0008) +[2026-06-07 02:57:05,587][472573] Updated weights for policy 0, policy_version 30583 (0.0008) +[2026-06-07 02:57:06,124][472573] Updated weights for policy 0, policy_version 30594 (0.0007) +[2026-06-07 02:57:06,231][472573] Updated weights for policy 0, policy_version 30604 (0.0008) +[2026-06-07 02:57:06,337][472573] Updated weights for policy 0, policy_version 30614 (0.0008) +[2026-06-07 02:57:06,478][472573] Updated weights for policy 0, policy_version 30626 (0.0008) +[2026-06-07 02:57:06,586][472573] Updated weights for policy 0, policy_version 30636 (0.0008) +[2026-06-07 02:57:07,207][472573] Updated weights for policy 0, policy_version 30649 (0.0008) +[2026-06-07 02:57:07,334][472573] Updated weights for policy 0, policy_version 30661 (0.0008) +[2026-06-07 02:57:07,459][472573] Updated weights for policy 0, policy_version 30673 (0.0008) +[2026-06-07 02:57:07,594][472573] Updated weights for policy 0, policy_version 30685 (0.0008) +[2026-06-07 02:57:07,719][472573] Updated weights for policy 0, policy_version 30696 (0.0008) +[2026-06-07 02:57:07,854][472573] Updated weights for policy 0, policy_version 30708 (0.0007) +[2026-06-07 02:57:08,103][464932] Fps is (10 sec: 29491.1, 60 sec: 28398.9, 300 sec: 28435.9). Total num frames: 15728640. Throughput: 0: 28384.6. Samples: 15682944. Policy #0 lag: (min: 63.0, avg: 74.9, max: 127.0) +[2026-06-07 02:57:08,105][464932] Avg episode reward: [(0, '1013.545')] +[2026-06-07 02:57:08,410][472573] Updated weights for policy 0, policy_version 30718 (0.0007) +[2026-06-07 02:57:08,529][472573] Updated weights for policy 0, policy_version 30729 (0.0008) +[2026-06-07 02:57:08,647][472573] Updated weights for policy 0, policy_version 30740 (0.0008) +[2026-06-07 02:57:08,781][472573] Updated weights for policy 0, policy_version 30752 (0.0008) +[2026-06-07 02:57:08,888][472573] Updated weights for policy 0, policy_version 30762 (0.0008) +[2026-06-07 02:57:09,013][472573] Updated weights for policy 0, policy_version 30773 (0.0008) +[2026-06-07 02:57:09,576][472573] Updated weights for policy 0, policy_version 30783 (0.0008) +[2026-06-07 02:57:09,705][472573] Updated weights for policy 0, policy_version 30794 (0.0008) +[2026-06-07 02:57:09,813][472573] Updated weights for policy 0, policy_version 30804 (0.0008) +[2026-06-07 02:57:09,938][472573] Updated weights for policy 0, policy_version 30815 (0.0008) +[2026-06-07 02:57:10,051][472573] Updated weights for policy 0, policy_version 30825 (0.0008) +[2026-06-07 02:57:10,166][472573] Updated weights for policy 0, policy_version 30835 (0.0008) +[2026-06-07 02:57:10,730][472573] Updated weights for policy 0, policy_version 30846 (0.0007) +[2026-06-07 02:57:10,854][472573] Updated weights for policy 0, policy_version 30857 (0.0008) +[2026-06-07 02:57:10,973][472573] Updated weights for policy 0, policy_version 30868 (0.0008) +[2026-06-07 02:57:11,098][472573] Updated weights for policy 0, policy_version 30879 (0.0008) +[2026-06-07 02:57:11,209][472573] Updated weights for policy 0, policy_version 30889 (0.0008) +[2026-06-07 02:57:11,319][472573] Updated weights for policy 0, policy_version 30899 (0.0008) +[2026-06-07 02:57:11,885][472573] Updated weights for policy 0, policy_version 30910 (0.0007) +[2026-06-07 02:57:12,006][472573] Updated weights for policy 0, policy_version 30921 (0.0008) +[2026-06-07 02:57:12,117][472573] Updated weights for policy 0, policy_version 30931 (0.0008) +[2026-06-07 02:57:12,232][472573] Updated weights for policy 0, policy_version 30941 (0.0008) +[2026-06-07 02:57:12,348][472573] Updated weights for policy 0, policy_version 30951 (0.0008) +[2026-06-07 02:57:12,473][472573] Updated weights for policy 0, policy_version 30962 (0.0008) +[2026-06-07 02:57:13,069][472573] Updated weights for policy 0, policy_version 30976 (0.0008) +[2026-06-07 02:57:13,103][464932] Fps is (10 sec: 29491.1, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 15859712. Throughput: 0: 28416.1. Samples: 15855616. Policy #0 lag: (min: 63.0, avg: 74.9, max: 127.0) +[2026-06-07 02:57:13,104][464932] Avg episode reward: [(0, '1002.214')] +[2026-06-07 02:57:13,193][472573] Updated weights for policy 0, policy_version 30987 (0.0008) +[2026-06-07 02:57:13,316][472573] Updated weights for policy 0, policy_version 30998 (0.0008) +[2026-06-07 02:57:13,427][472573] Updated weights for policy 0, policy_version 31008 (0.0008) +[2026-06-07 02:57:13,540][472573] Updated weights for policy 0, policy_version 31018 (0.0008) +[2026-06-07 02:57:13,682][472573] Updated weights for policy 0, policy_version 31030 (0.0008) +[2026-06-07 02:57:14,218][472573] Updated weights for policy 0, policy_version 31040 (0.0008) +[2026-06-07 02:57:14,351][472573] Updated weights for policy 0, policy_version 31052 (0.0008) +[2026-06-07 02:57:14,461][472573] Updated weights for policy 0, policy_version 31062 (0.0008) +[2026-06-07 02:57:14,572][472573] Updated weights for policy 0, policy_version 31072 (0.0008) +[2026-06-07 02:57:14,689][472573] Updated weights for policy 0, policy_version 31082 (0.0008) +[2026-06-07 02:57:14,802][472573] Updated weights for policy 0, policy_version 31092 (0.0008) +[2026-06-07 02:57:15,358][472573] Updated weights for policy 0, policy_version 31102 (0.0008) +[2026-06-07 02:57:15,471][472573] Updated weights for policy 0, policy_version 31112 (0.0008) +[2026-06-07 02:57:15,587][472573] Updated weights for policy 0, policy_version 31122 (0.0008) +[2026-06-07 02:57:15,699][472573] Updated weights for policy 0, policy_version 31132 (0.0008) +[2026-06-07 02:57:15,835][472573] Updated weights for policy 0, policy_version 31144 (0.0008) +[2026-06-07 02:57:15,943][472573] Updated weights for policy 0, policy_version 31154 (0.0008) +[2026-06-07 02:57:16,498][472573] Updated weights for policy 0, policy_version 31164 (0.0008) +[2026-06-07 02:57:16,604][472573] Updated weights for policy 0, policy_version 31174 (0.0008) +[2026-06-07 02:57:16,734][472573] Updated weights for policy 0, policy_version 31185 (0.0008) +[2026-06-07 02:57:16,854][472573] Updated weights for policy 0, policy_version 31196 (0.0008) +[2026-06-07 02:57:16,962][472573] Updated weights for policy 0, policy_version 31206 (0.0008) +[2026-06-07 02:57:17,078][472573] Updated weights for policy 0, policy_version 31216 (0.0008) +[2026-06-07 02:57:17,642][472573] Updated weights for policy 0, policy_version 31227 (0.0008) +[2026-06-07 02:57:17,781][472573] Updated weights for policy 0, policy_version 31240 (0.0008) +[2026-06-07 02:57:17,890][472573] Updated weights for policy 0, policy_version 31250 (0.0008) +[2026-06-07 02:57:18,013][472573] Updated weights for policy 0, policy_version 31261 (0.0008) +[2026-06-07 02:57:18,103][464932] Fps is (10 sec: 26214.9, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 15990784. Throughput: 0: 28410.2. Samples: 16031744. Policy #0 lag: (min: 63.0, avg: 75.8, max: 127.0) +[2026-06-07 02:57:18,104][464932] Avg episode reward: [(0, '1028.795')] +[2026-06-07 02:57:18,128][472573] Updated weights for policy 0, policy_version 31271 (0.0008) +[2026-06-07 02:57:18,241][472573] Updated weights for policy 0, policy_version 31281 (0.0008) +[2026-06-07 02:57:18,804][472573] Updated weights for policy 0, policy_version 31291 (0.0008) +[2026-06-07 02:57:18,927][472573] Updated weights for policy 0, policy_version 31302 (0.0008) +[2026-06-07 02:57:19,054][472573] Updated weights for policy 0, policy_version 31313 (0.0008) +[2026-06-07 02:57:19,189][472573] Updated weights for policy 0, policy_version 31325 (0.0008) +[2026-06-07 02:57:19,313][472573] Updated weights for policy 0, policy_version 31336 (0.0008) +[2026-06-07 02:57:19,439][472573] Updated weights for policy 0, policy_version 31347 (0.0008) +[2026-06-07 02:57:19,981][472573] Updated weights for policy 0, policy_version 31357 (0.0008) +[2026-06-07 02:57:20,093][472573] Updated weights for policy 0, policy_version 31367 (0.0008) +[2026-06-07 02:57:20,201][472573] Updated weights for policy 0, policy_version 31377 (0.0008) +[2026-06-07 02:57:20,312][472573] Updated weights for policy 0, policy_version 31387 (0.0008) +[2026-06-07 02:57:20,421][472573] Updated weights for policy 0, policy_version 31397 (0.0008) +[2026-06-07 02:57:20,537][472573] Updated weights for policy 0, policy_version 31407 (0.0007) +[2026-06-07 02:57:21,105][472573] Updated weights for policy 0, policy_version 31418 (0.0008) +[2026-06-07 02:57:21,229][472573] Updated weights for policy 0, policy_version 31429 (0.0008) +[2026-06-07 02:57:21,343][472573] Updated weights for policy 0, policy_version 31439 (0.0008) +[2026-06-07 02:57:21,461][472573] Updated weights for policy 0, policy_version 31450 (0.0008) +[2026-06-07 02:57:21,591][472573] Updated weights for policy 0, policy_version 31461 (0.0008) +[2026-06-07 02:57:21,701][472573] Updated weights for policy 0, policy_version 31471 (0.0008) +[2026-06-07 02:57:22,270][472573] Updated weights for policy 0, policy_version 31482 (0.0008) +[2026-06-07 02:57:22,382][472573] Updated weights for policy 0, policy_version 31492 (0.0008) +[2026-06-07 02:57:22,502][472573] Updated weights for policy 0, policy_version 31503 (0.0008) +[2026-06-07 02:57:22,629][472573] Updated weights for policy 0, policy_version 31514 (0.0008) +[2026-06-07 02:57:22,747][472573] Updated weights for policy 0, policy_version 31525 (0.0008) +[2026-06-07 02:57:22,874][472573] Updated weights for policy 0, policy_version 31536 (0.0008) +[2026-06-07 02:57:23,103][464932] Fps is (10 sec: 29491.3, 60 sec: 28399.0, 300 sec: 28436.0). Total num frames: 16154624. Throughput: 0: 28416.0. Samples: 16109184. Policy #0 lag: (min: 63.0, avg: 75.8, max: 127.0) +[2026-06-07 02:57:23,104][464932] Avg episode reward: [(0, '1051.546')] +[2026-06-07 02:57:23,423][472573] Updated weights for policy 0, policy_version 31546 (0.0008) +[2026-06-07 02:57:23,535][472573] Updated weights for policy 0, policy_version 31556 (0.0008) +[2026-06-07 02:57:23,673][472573] Updated weights for policy 0, policy_version 31568 (0.0008) +[2026-06-07 02:57:23,789][472573] Updated weights for policy 0, policy_version 31578 (0.0008) +[2026-06-07 02:57:23,899][472573] Updated weights for policy 0, policy_version 31588 (0.0008) +[2026-06-07 02:57:24,033][472573] Updated weights for policy 0, policy_version 31600 (0.0008) +[2026-06-07 02:57:24,603][472573] Updated weights for policy 0, policy_version 31611 (0.0008) +[2026-06-07 02:57:24,718][472573] Updated weights for policy 0, policy_version 31621 (0.0008) +[2026-06-07 02:57:24,832][472573] Updated weights for policy 0, policy_version 31631 (0.0008) +[2026-06-07 02:57:24,950][472573] Updated weights for policy 0, policy_version 31642 (0.0008) +[2026-06-07 02:57:25,091][472573] Updated weights for policy 0, policy_version 31654 (0.0008) +[2026-06-07 02:57:25,200][472573] Updated weights for policy 0, policy_version 31664 (0.0008) +[2026-06-07 02:57:25,744][472573] Updated weights for policy 0, policy_version 31674 (0.0008) +[2026-06-07 02:57:25,888][472573] Updated weights for policy 0, policy_version 31687 (0.0008) +[2026-06-07 02:57:26,012][472573] Updated weights for policy 0, policy_version 31698 (0.0008) +[2026-06-07 02:57:26,136][472573] Updated weights for policy 0, policy_version 31709 (0.0008) +[2026-06-07 02:57:26,260][472573] Updated weights for policy 0, policy_version 31720 (0.0008) +[2026-06-07 02:57:26,372][472573] Updated weights for policy 0, policy_version 31730 (0.0008) +[2026-06-07 02:57:26,946][472573] Updated weights for policy 0, policy_version 31740 (0.0007) +[2026-06-07 02:57:27,068][472573] Updated weights for policy 0, policy_version 31751 (0.0004) +[2026-06-07 02:57:27,204][472573] Updated weights for policy 0, policy_version 31763 (0.0004) +[2026-06-07 02:57:27,330][472573] Updated weights for policy 0, policy_version 31774 (0.0005) +[2026-06-07 02:57:27,439][472573] Updated weights for policy 0, policy_version 31784 (0.0004) +[2026-06-07 02:57:27,555][472573] Updated weights for policy 0, policy_version 31794 (0.0008) +[2026-06-07 02:57:28,101][472573] Updated weights for policy 0, policy_version 31804 (0.0006) +[2026-06-07 02:57:28,103][464932] Fps is (10 sec: 29490.9, 60 sec: 28399.0, 300 sec: 28324.9). Total num frames: 16285696. Throughput: 0: 28401.7. Samples: 16280448. Policy #0 lag: (min: 63.0, avg: 75.8, max: 127.0) +[2026-06-07 02:57:28,104][464932] Avg episode reward: [(0, '1130.969')] +[2026-06-07 02:57:28,209][472573] Updated weights for policy 0, policy_version 31814 (0.0007) +[2026-06-07 02:57:28,379][472573] Updated weights for policy 0, policy_version 31829 (0.0008) +[2026-06-07 02:57:28,488][472573] Updated weights for policy 0, policy_version 31839 (0.0008) +[2026-06-07 02:57:28,612][472573] Updated weights for policy 0, policy_version 31850 (0.0008) +[2026-06-07 02:57:28,742][472573] Updated weights for policy 0, policy_version 31861 (0.0008) +[2026-06-07 02:57:28,767][472028] Saving new best policy, reward=1130.969! +[2026-06-07 02:57:29,287][472573] Updated weights for policy 0, policy_version 31871 (0.0008) +[2026-06-07 02:57:29,414][472573] Updated weights for policy 0, policy_version 31882 (0.0008) +[2026-06-07 02:57:29,544][472573] Updated weights for policy 0, policy_version 31894 (0.0009) +[2026-06-07 02:57:29,664][472573] Updated weights for policy 0, policy_version 31905 (0.0008) +[2026-06-07 02:57:29,781][472573] Updated weights for policy 0, policy_version 31915 (0.0008) +[2026-06-07 02:57:29,906][472573] Updated weights for policy 0, policy_version 31926 (0.0008) +[2026-06-07 02:57:30,472][472573] Updated weights for policy 0, policy_version 31937 (0.0008) +[2026-06-07 02:57:30,593][472573] Updated weights for policy 0, policy_version 31948 (0.0008) +[2026-06-07 02:57:30,701][472573] Updated weights for policy 0, policy_version 31958 (0.0008) +[2026-06-07 02:57:30,814][472573] Updated weights for policy 0, policy_version 31968 (0.0008) +[2026-06-07 02:57:30,933][472573] Updated weights for policy 0, policy_version 31978 (0.0008) +[2026-06-07 02:57:31,061][472573] Updated weights for policy 0, policy_version 31989 (0.0008) +[2026-06-07 02:57:31,590][472573] Updated weights for policy 0, policy_version 31999 (0.0008) +[2026-06-07 02:57:31,698][472573] Updated weights for policy 0, policy_version 32009 (0.0008) +[2026-06-07 02:57:31,821][472573] Updated weights for policy 0, policy_version 32020 (0.0008) +[2026-06-07 02:57:31,951][472573] Updated weights for policy 0, policy_version 32031 (0.0008) +[2026-06-07 02:57:32,067][472573] Updated weights for policy 0, policy_version 32041 (0.0008) +[2026-06-07 02:57:32,177][472573] Updated weights for policy 0, policy_version 32051 (0.0008) +[2026-06-07 02:57:32,709][472573] Updated weights for policy 0, policy_version 32061 (0.0008) +[2026-06-07 02:57:32,834][472573] Updated weights for policy 0, policy_version 32072 (0.0009) +[2026-06-07 02:57:32,965][472573] Updated weights for policy 0, policy_version 32084 (0.0009) +[2026-06-07 02:57:33,103][464932] Fps is (10 sec: 26214.3, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 16416768. Throughput: 0: 28387.6. Samples: 16456448. Policy #0 lag: (min: 63.0, avg: 75.8, max: 127.0) +[2026-06-07 02:57:33,104][464932] Avg episode reward: [(0, '1093.752')] +[2026-06-07 02:57:33,105][472573] Updated weights for policy 0, policy_version 32097 (0.0008) +[2026-06-07 02:57:33,230][472573] Updated weights for policy 0, policy_version 32109 (0.0008) +[2026-06-07 02:57:33,825][472573] Updated weights for policy 0, policy_version 32121 (0.0008) +[2026-06-07 02:57:33,947][472573] Updated weights for policy 0, policy_version 32131 (0.0008) +[2026-06-07 02:57:34,077][472573] Updated weights for policy 0, policy_version 32142 (0.0008) +[2026-06-07 02:57:34,210][472573] Updated weights for policy 0, policy_version 32153 (0.0009) +[2026-06-07 02:57:34,340][472573] Updated weights for policy 0, policy_version 32165 (0.0008) +[2026-06-07 02:57:34,449][472573] Updated weights for policy 0, policy_version 32175 (0.0008) +[2026-06-07 02:57:35,003][472573] Updated weights for policy 0, policy_version 32187 (0.0008) +[2026-06-07 02:57:35,117][472573] Updated weights for policy 0, policy_version 32197 (0.0008) +[2026-06-07 02:57:35,228][472573] Updated weights for policy 0, policy_version 32207 (0.0007) +[2026-06-07 02:57:35,354][472573] Updated weights for policy 0, policy_version 32218 (0.0007) +[2026-06-07 02:57:35,462][472573] Updated weights for policy 0, policy_version 32228 (0.0008) +[2026-06-07 02:57:35,586][472573] Updated weights for policy 0, policy_version 32239 (0.0008) +[2026-06-07 02:57:36,145][472573] Updated weights for policy 0, policy_version 32249 (0.0008) +[2026-06-07 02:57:36,267][472573] Updated weights for policy 0, policy_version 32260 (0.0008) +[2026-06-07 02:57:36,379][472573] Updated weights for policy 0, policy_version 32270 (0.0009) +[2026-06-07 02:57:36,508][472573] Updated weights for policy 0, policy_version 32282 (0.0009) +[2026-06-07 02:57:36,656][472573] Updated weights for policy 0, policy_version 32295 (0.0008) +[2026-06-07 02:57:36,771][472573] Updated weights for policy 0, policy_version 32305 (0.0008) +[2026-06-07 02:57:37,315][472573] Updated weights for policy 0, policy_version 32315 (0.0008) +[2026-06-07 02:57:37,437][472573] Updated weights for policy 0, policy_version 32326 (0.0008) +[2026-06-07 02:57:37,557][472573] Updated weights for policy 0, policy_version 32337 (0.0006) +[2026-06-07 02:57:37,669][472573] Updated weights for policy 0, policy_version 32347 (0.0005) +[2026-06-07 02:57:37,781][472573] Updated weights for policy 0, policy_version 32357 (0.0008) +[2026-06-07 02:57:37,911][472573] Updated weights for policy 0, policy_version 32369 (0.0008) +[2026-06-07 02:57:38,103][464932] Fps is (10 sec: 29491.0, 60 sec: 28398.8, 300 sec: 28435.9). Total num frames: 16580608. Throughput: 0: 28418.7. Samples: 16536576. Policy #0 lag: (min: 63.0, avg: 75.6, max: 127.0) +[2026-06-07 02:57:38,104][464932] Avg episode reward: [(0, '1078.133')] +[2026-06-07 02:57:38,489][472573] Updated weights for policy 0, policy_version 32379 (0.0008) +[2026-06-07 02:57:38,601][472573] Updated weights for policy 0, policy_version 32389 (0.0008) +[2026-06-07 02:57:38,710][472573] Updated weights for policy 0, policy_version 32399 (0.0008) +[2026-06-07 02:57:38,845][472573] Updated weights for policy 0, policy_version 32411 (0.0008) +[2026-06-07 02:57:38,957][472573] Updated weights for policy 0, policy_version 32421 (0.0008) +[2026-06-07 02:57:39,080][472573] Updated weights for policy 0, policy_version 32432 (0.0008) +[2026-06-07 02:57:39,634][472573] Updated weights for policy 0, policy_version 32443 (0.0008) +[2026-06-07 02:57:39,750][472573] Updated weights for policy 0, policy_version 32454 (0.0008) +[2026-06-07 02:57:39,860][472573] Updated weights for policy 0, policy_version 32464 (0.0008) +[2026-06-07 02:57:39,986][472573] Updated weights for policy 0, policy_version 32476 (0.0008) +[2026-06-07 02:57:40,102][472573] Updated weights for policy 0, policy_version 32486 (0.0008) +[2026-06-07 02:57:40,226][472573] Updated weights for policy 0, policy_version 32497 (0.0008) +[2026-06-07 02:57:40,787][472573] Updated weights for policy 0, policy_version 32508 (0.0008) +[2026-06-07 02:57:40,925][472573] Updated weights for policy 0, policy_version 32520 (0.0007) +[2026-06-07 02:57:41,046][472573] Updated weights for policy 0, policy_version 32531 (0.0006) +[2026-06-07 02:57:41,173][472573] Updated weights for policy 0, policy_version 32542 (0.0007) +[2026-06-07 02:57:41,282][472573] Updated weights for policy 0, policy_version 32552 (0.0006) +[2026-06-07 02:57:41,437][472573] Updated weights for policy 0, policy_version 32566 (0.0007) +[2026-06-07 02:57:41,969][472573] Updated weights for policy 0, policy_version 32576 (0.0007) +[2026-06-07 02:57:42,079][472573] Updated weights for policy 0, policy_version 32586 (0.0007) +[2026-06-07 02:57:42,190][472573] Updated weights for policy 0, policy_version 32596 (0.0008) +[2026-06-07 02:57:42,322][472573] Updated weights for policy 0, policy_version 32608 (0.0008) +[2026-06-07 02:57:42,446][472573] Updated weights for policy 0, policy_version 32619 (0.0008) +[2026-06-07 02:57:42,569][472573] Updated weights for policy 0, policy_version 32630 (0.0008) +[2026-06-07 02:57:43,103][464932] Fps is (10 sec: 29491.3, 60 sec: 28399.0, 300 sec: 28324.9). Total num frames: 16711680. Throughput: 0: 28299.5. Samples: 16705408. Policy #0 lag: (min: 63.0, avg: 75.6, max: 127.0) +[2026-06-07 02:57:43,104][464932] Avg episode reward: [(0, '1073.766')] +[2026-06-07 02:57:43,130][472573] Updated weights for policy 0, policy_version 32640 (0.0008) +[2026-06-07 02:57:43,269][472573] Updated weights for policy 0, policy_version 32653 (0.0008) +[2026-06-07 02:57:43,393][472573] Updated weights for policy 0, policy_version 32664 (0.0008) +[2026-06-07 02:57:43,504][472573] Updated weights for policy 0, policy_version 32674 (0.0008) +[2026-06-07 02:57:43,616][472573] Updated weights for policy 0, policy_version 32684 (0.0008) +[2026-06-07 02:57:43,741][472573] Updated weights for policy 0, policy_version 32695 (0.0008) +[2026-06-07 02:57:44,305][472573] Updated weights for policy 0, policy_version 32706 (0.0007) +[2026-06-07 02:57:44,428][472573] Updated weights for policy 0, policy_version 32717 (0.0008) +[2026-06-07 02:57:44,551][472573] Updated weights for policy 0, policy_version 32728 (0.0008) +[2026-06-07 02:57:44,701][472573] Updated weights for policy 0, policy_version 32742 (0.0008) +[2026-06-07 02:57:44,807][472573] Updated weights for policy 0, policy_version 32752 (0.0007) +[2026-06-07 02:57:45,391][472573] Updated weights for policy 0, policy_version 32763 (0.0008) +[2026-06-07 02:57:45,515][472573] Updated weights for policy 0, policy_version 32774 (0.0008) +[2026-06-07 02:57:45,633][472573] Updated weights for policy 0, policy_version 32785 (0.0008) +[2026-06-07 02:57:45,750][472573] Updated weights for policy 0, policy_version 32795 (0.0008) +[2026-06-07 02:57:45,883][472573] Updated weights for policy 0, policy_version 32807 (0.0008) +[2026-06-07 02:57:46,006][472573] Updated weights for policy 0, policy_version 32818 (0.0008) +[2026-06-07 02:57:46,555][472573] Updated weights for policy 0, policy_version 32829 (0.0007) +[2026-06-07 02:57:46,682][472573] Updated weights for policy 0, policy_version 32841 (0.0008) +[2026-06-07 02:57:46,789][472573] Updated weights for policy 0, policy_version 32851 (0.0008) +[2026-06-07 02:57:46,909][472573] Updated weights for policy 0, policy_version 32862 (0.0008) +[2026-06-07 02:57:47,018][472573] Updated weights for policy 0, policy_version 32872 (0.0008) +[2026-06-07 02:57:47,127][472573] Updated weights for policy 0, policy_version 32882 (0.0008) +[2026-06-07 02:57:47,713][472573] Updated weights for policy 0, policy_version 32894 (0.0008) +[2026-06-07 02:57:47,822][472573] Updated weights for policy 0, policy_version 32904 (0.0008) +[2026-06-07 02:57:47,950][472573] Updated weights for policy 0, policy_version 32916 (0.0008) +[2026-06-07 02:57:48,071][472573] Updated weights for policy 0, policy_version 32927 (0.0008) +[2026-06-07 02:57:48,103][464932] Fps is (10 sec: 26214.6, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 16842752. Throughput: 0: 28316.3. Samples: 16881152. Policy #0 lag: (min: 63.0, avg: 75.6, max: 127.0) +[2026-06-07 02:57:48,104][464932] Avg episode reward: [(0, '1046.389')] +[2026-06-07 02:57:48,204][472573] Updated weights for policy 0, policy_version 32939 (0.0008) +[2026-06-07 02:57:48,336][472573] Updated weights for policy 0, policy_version 32951 (0.0008) +[2026-06-07 02:57:48,918][472573] Updated weights for policy 0, policy_version 32963 (0.0008) +[2026-06-07 02:57:49,041][472573] Updated weights for policy 0, policy_version 32975 (0.0008) +[2026-06-07 02:57:49,181][472573] Updated weights for policy 0, policy_version 32987 (0.0008) +[2026-06-07 02:57:49,300][472573] Updated weights for policy 0, policy_version 32998 (0.0008) +[2026-06-07 02:57:49,432][472573] Updated weights for policy 0, policy_version 33010 (0.0008) +[2026-06-07 02:57:50,009][472573] Updated weights for policy 0, policy_version 33022 (0.0008) +[2026-06-07 02:57:50,143][472573] Updated weights for policy 0, policy_version 33034 (0.0008) +[2026-06-07 02:57:50,285][472573] Updated weights for policy 0, policy_version 33047 (0.0008) +[2026-06-07 02:57:50,408][472573] Updated weights for policy 0, policy_version 33058 (0.0008) +[2026-06-07 02:57:50,527][472573] Updated weights for policy 0, policy_version 33069 (0.0008) +[2026-06-07 02:57:50,645][472573] Updated weights for policy 0, policy_version 33080 (0.0008) +[2026-06-07 02:57:51,219][472573] Updated weights for policy 0, policy_version 33090 (0.0008) +[2026-06-07 02:57:51,336][472573] Updated weights for policy 0, policy_version 33101 (0.0008) +[2026-06-07 02:57:51,474][472573] Updated weights for policy 0, policy_version 33113 (0.0008) +[2026-06-07 02:57:51,604][472573] Updated weights for policy 0, policy_version 33125 (0.0008) +[2026-06-07 02:57:51,728][472573] Updated weights for policy 0, policy_version 33136 (0.0008) +[2026-06-07 02:57:52,303][472573] Updated weights for policy 0, policy_version 33148 (0.0007) +[2026-06-07 02:57:52,428][472573] Updated weights for policy 0, policy_version 33159 (0.0008) +[2026-06-07 02:57:52,541][472573] Updated weights for policy 0, policy_version 33169 (0.0008) +[2026-06-07 02:57:52,665][472573] Updated weights for policy 0, policy_version 33180 (0.0008) +[2026-06-07 02:57:52,789][472573] Updated weights for policy 0, policy_version 33191 (0.0008) +[2026-06-07 02:57:52,919][472573] Updated weights for policy 0, policy_version 33203 (0.0008) +[2026-06-07 02:57:53,103][464932] Fps is (10 sec: 29491.2, 60 sec: 28398.9, 300 sec: 28436.0). Total num frames: 17006592. Throughput: 0: 28384.9. Samples: 16960256. Policy #0 lag: (min: 63.0, avg: 75.6, max: 127.0) +[2026-06-07 02:57:53,104][464932] Avg episode reward: [(0, '1055.040')] +[2026-06-07 02:57:53,480][472573] Updated weights for policy 0, policy_version 33213 (0.0008) +[2026-06-07 02:57:53,588][472573] Updated weights for policy 0, policy_version 33223 (0.0008) +[2026-06-07 02:57:53,725][472573] Updated weights for policy 0, policy_version 33235 (0.0008) +[2026-06-07 02:57:53,856][472573] Updated weights for policy 0, policy_version 33247 (0.0008) +[2026-06-07 02:57:53,994][472573] Updated weights for policy 0, policy_version 33259 (0.0008) +[2026-06-07 02:57:54,117][472573] Updated weights for policy 0, policy_version 33270 (0.0008) +[2026-06-07 02:57:54,679][472573] Updated weights for policy 0, policy_version 33281 (0.0008) +[2026-06-07 02:57:54,801][472573] Updated weights for policy 0, policy_version 33292 (0.0008) +[2026-06-07 02:57:54,922][472573] Updated weights for policy 0, policy_version 33303 (0.0007) +[2026-06-07 02:57:55,052][472573] Updated weights for policy 0, policy_version 33315 (0.0008) +[2026-06-07 02:57:55,176][472573] Updated weights for policy 0, policy_version 33326 (0.0008) +[2026-06-07 02:57:55,757][472573] Updated weights for policy 0, policy_version 33338 (0.0008) +[2026-06-07 02:57:55,866][472573] Updated weights for policy 0, policy_version 33348 (0.0008) +[2026-06-07 02:57:55,976][472573] Updated weights for policy 0, policy_version 33358 (0.0008) +[2026-06-07 02:57:56,109][472573] Updated weights for policy 0, policy_version 33370 (0.0008) +[2026-06-07 02:57:56,232][472573] Updated weights for policy 0, policy_version 33381 (0.0008) +[2026-06-07 02:57:56,366][472573] Updated weights for policy 0, policy_version 33393 (0.0008) +[2026-06-07 02:57:56,935][472573] Updated weights for policy 0, policy_version 33403 (0.0007) +[2026-06-07 02:57:57,053][472573] Updated weights for policy 0, policy_version 33414 (0.0008) +[2026-06-07 02:57:57,193][472573] Updated weights for policy 0, policy_version 33426 (0.0008) +[2026-06-07 02:57:57,334][472573] Updated weights for policy 0, policy_version 33438 (0.0008) +[2026-06-07 02:57:57,445][472573] Updated weights for policy 0, policy_version 33448 (0.0008) +[2026-06-07 02:57:57,557][472573] Updated weights for policy 0, policy_version 33458 (0.0009) +[2026-06-07 02:57:58,104][464932] Fps is (10 sec: 29490.1, 60 sec: 28398.8, 300 sec: 28324.8). Total num frames: 17137664. Throughput: 0: 28324.7. Samples: 17130240. Policy #0 lag: (min: 63.0, avg: 75.6, max: 127.0) +[2026-06-07 02:57:58,105][464932] Avg episode reward: [(0, '1000.212')] +[2026-06-07 02:57:58,125][472573] Updated weights for policy 0, policy_version 33470 (0.0008) +[2026-06-07 02:57:58,241][472573] Updated weights for policy 0, policy_version 33481 (0.0008) +[2026-06-07 02:57:58,351][472573] Updated weights for policy 0, policy_version 33491 (0.0008) +[2026-06-07 02:57:58,489][472573] Updated weights for policy 0, policy_version 33503 (0.0008) +[2026-06-07 02:57:58,620][472573] Updated weights for policy 0, policy_version 33515 (0.0008) +[2026-06-07 02:57:58,742][472573] Updated weights for policy 0, policy_version 33525 (0.0008) +[2026-06-07 02:57:59,290][472573] Updated weights for policy 0, policy_version 33535 (0.0005) +[2026-06-07 02:57:59,410][472573] Updated weights for policy 0, policy_version 33546 (0.0008) +[2026-06-07 02:57:59,523][472573] Updated weights for policy 0, policy_version 33556 (0.0008) +[2026-06-07 02:57:59,644][472573] Updated weights for policy 0, policy_version 33567 (0.0008) +[2026-06-07 02:57:59,787][472573] Updated weights for policy 0, policy_version 33580 (0.0008) +[2026-06-07 02:57:59,914][472573] Updated weights for policy 0, policy_version 33591 (0.0008) +[2026-06-07 02:58:00,469][472573] Updated weights for policy 0, policy_version 33601 (0.0008) +[2026-06-07 02:58:00,582][472573] Updated weights for policy 0, policy_version 33611 (0.0008) +[2026-06-07 02:58:00,693][472573] Updated weights for policy 0, policy_version 33621 (0.0008) +[2026-06-07 02:58:00,816][472573] Updated weights for policy 0, policy_version 33632 (0.0008) +[2026-06-07 02:58:00,932][472573] Updated weights for policy 0, policy_version 33642 (0.0008) +[2026-06-07 02:58:01,054][472573] Updated weights for policy 0, policy_version 33653 (0.0008) +[2026-06-07 02:58:01,588][472573] Updated weights for policy 0, policy_version 33663 (0.0008) +[2026-06-07 02:58:01,707][472573] Updated weights for policy 0, policy_version 33674 (0.0008) +[2026-06-07 02:58:01,834][472573] Updated weights for policy 0, policy_version 33685 (0.0008) +[2026-06-07 02:58:01,952][472573] Updated weights for policy 0, policy_version 33696 (0.0008) +[2026-06-07 02:58:02,068][472573] Updated weights for policy 0, policy_version 33706 (0.0008) +[2026-06-07 02:58:02,186][472573] Updated weights for policy 0, policy_version 33717 (0.0008) +[2026-06-07 02:58:02,743][472573] Updated weights for policy 0, policy_version 33727 (0.0008) +[2026-06-07 02:58:02,852][472573] Updated weights for policy 0, policy_version 33737 (0.0008) +[2026-06-07 02:58:03,005][472573] Updated weights for policy 0, policy_version 33751 (0.0008) +[2026-06-07 02:58:03,103][464932] Fps is (10 sec: 26214.4, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 17268736. Throughput: 0: 28327.9. Samples: 17306496. Policy #0 lag: (min: 31.0, avg: 42.7, max: 95.0) +[2026-06-07 02:58:03,104][464932] Avg episode reward: [(0, '1076.159')] +[2026-06-07 02:58:03,124][472573] Updated weights for policy 0, policy_version 33762 (0.0008) +[2026-06-07 02:58:03,239][472573] Updated weights for policy 0, policy_version 33772 (0.0008) +[2026-06-07 02:58:03,371][472573] Updated weights for policy 0, policy_version 33784 (0.0008) +[2026-06-07 02:58:03,929][472573] Updated weights for policy 0, policy_version 33794 (0.0008) +[2026-06-07 02:58:04,043][472573] Updated weights for policy 0, policy_version 33805 (0.0008) +[2026-06-07 02:58:04,157][472573] Updated weights for policy 0, policy_version 33815 (0.0008) +[2026-06-07 02:58:04,276][472573] Updated weights for policy 0, policy_version 33826 (0.0008) +[2026-06-07 02:58:04,433][472573] Updated weights for policy 0, policy_version 33841 (0.0008) +[2026-06-07 02:58:04,996][472573] Updated weights for policy 0, policy_version 33851 (0.0008) +[2026-06-07 02:58:05,142][472573] Updated weights for policy 0, policy_version 33865 (0.0008) +[2026-06-07 02:58:05,266][472573] Updated weights for policy 0, policy_version 33877 (0.0008) +[2026-06-07 02:58:05,400][472573] Updated weights for policy 0, policy_version 33889 (0.0008) +[2026-06-07 02:58:05,512][472573] Updated weights for policy 0, policy_version 33900 (0.0008) +[2026-06-07 02:58:05,643][472573] Updated weights for policy 0, policy_version 33912 (0.0008) +[2026-06-07 02:58:06,235][472573] Updated weights for policy 0, policy_version 33923 (0.0007) +[2026-06-07 02:58:06,373][472573] Updated weights for policy 0, policy_version 33936 (0.0008) +[2026-06-07 02:58:06,517][472573] Updated weights for policy 0, policy_version 33949 (0.0008) +[2026-06-07 02:58:06,638][472573] Updated weights for policy 0, policy_version 33961 (0.0008) +[2026-06-07 02:58:06,771][472573] Updated weights for policy 0, policy_version 33973 (0.0008) +[2026-06-07 02:58:07,377][472573] Updated weights for policy 0, policy_version 33986 (0.0008) +[2026-06-07 02:58:07,489][472573] Updated weights for policy 0, policy_version 33996 (0.0008) +[2026-06-07 02:58:07,622][472573] Updated weights for policy 0, policy_version 34008 (0.0008) +[2026-06-07 02:58:07,749][472573] Updated weights for policy 0, policy_version 34019 (0.0008) +[2026-06-07 02:58:07,855][472573] Updated weights for policy 0, policy_version 34029 (0.0008) +[2026-06-07 02:58:07,981][472573] Updated weights for policy 0, policy_version 34040 (0.0008) +[2026-06-07 02:58:08,103][464932] Fps is (10 sec: 29492.6, 60 sec: 28399.0, 300 sec: 28436.0). Total num frames: 17432576. Throughput: 0: 28370.4. Samples: 17385856. Policy #0 lag: (min: 31.0, avg: 42.7, max: 95.0) +[2026-06-07 02:58:08,105][464932] Avg episode reward: [(0, '1090.929')] +[2026-06-07 02:58:08,566][472573] Updated weights for policy 0, policy_version 34053 (0.0008) +[2026-06-07 02:58:08,680][472573] Updated weights for policy 0, policy_version 34063 (0.0008) +[2026-06-07 02:58:08,802][472573] Updated weights for policy 0, policy_version 34074 (0.0008) +[2026-06-07 02:58:08,922][472573] Updated weights for policy 0, policy_version 34085 (0.0008) +[2026-06-07 02:58:09,034][472573] Updated weights for policy 0, policy_version 34095 (0.0008) +[2026-06-07 02:58:09,595][472573] Updated weights for policy 0, policy_version 34105 (0.0008) +[2026-06-07 02:58:09,717][472573] Updated weights for policy 0, policy_version 34116 (0.0008) +[2026-06-07 02:58:09,829][472573] Updated weights for policy 0, policy_version 34126 (0.0008) +[2026-06-07 02:58:09,943][472573] Updated weights for policy 0, policy_version 34136 (0.0009) +[2026-06-07 02:58:10,068][472573] Updated weights for policy 0, policy_version 34147 (0.0008) +[2026-06-07 02:58:10,193][472573] Updated weights for policy 0, policy_version 34158 (0.0008) +[2026-06-07 02:58:10,762][472573] Updated weights for policy 0, policy_version 34169 (0.0008) +[2026-06-07 02:58:10,869][472573] Updated weights for policy 0, policy_version 34179 (0.0008) +[2026-06-07 02:58:10,986][472573] Updated weights for policy 0, policy_version 34189 (0.0008) +[2026-06-07 02:58:11,103][472573] Updated weights for policy 0, policy_version 34200 (0.0008) +[2026-06-07 02:58:11,231][472573] Updated weights for policy 0, policy_version 34211 (0.0008) +[2026-06-07 02:58:11,357][472573] Updated weights for policy 0, policy_version 34222 (0.0008) +[2026-06-07 02:58:11,467][472573] Updated weights for policy 0, policy_version 34232 (0.0008) +[2026-06-07 02:58:12,056][472573] Updated weights for policy 0, policy_version 34244 (0.0008) +[2026-06-07 02:58:12,169][472573] Updated weights for policy 0, policy_version 34254 (0.0008) +[2026-06-07 02:58:12,297][472573] Updated weights for policy 0, policy_version 34266 (0.0008) +[2026-06-07 02:58:12,427][472573] Updated weights for policy 0, policy_version 34277 (0.0008) +[2026-06-07 02:58:12,564][472573] Updated weights for policy 0, policy_version 34289 (0.0008) +[2026-06-07 02:58:13,103][464932] Fps is (10 sec: 29491.2, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 17563648. Throughput: 0: 28379.1. Samples: 17557504. Policy #0 lag: (min: 31.0, avg: 42.7, max: 95.0) +[2026-06-07 02:58:13,104][464932] Avg episode reward: [(0, '1143.276')] +[2026-06-07 02:58:13,137][472573] Updated weights for policy 0, policy_version 34300 (0.0008) +[2026-06-07 02:58:13,250][472573] Updated weights for policy 0, policy_version 34311 (0.0008) +[2026-06-07 02:58:13,367][472573] Updated weights for policy 0, policy_version 34321 (0.0008) +[2026-06-07 02:58:13,492][472573] Updated weights for policy 0, policy_version 34332 (0.0008) +[2026-06-07 02:58:13,607][472573] Updated weights for policy 0, policy_version 34342 (0.0008) +[2026-06-07 02:58:13,721][472573] Updated weights for policy 0, policy_version 34352 (0.0008) +[2026-06-07 02:58:13,803][472028] Saving new best policy, reward=1143.276! +[2026-06-07 02:58:14,287][472573] Updated weights for policy 0, policy_version 34363 (0.0008) +[2026-06-07 02:58:14,406][472573] Updated weights for policy 0, policy_version 34374 (0.0008) +[2026-06-07 02:58:14,528][472573] Updated weights for policy 0, policy_version 34385 (0.0008) +[2026-06-07 02:58:14,642][472573] Updated weights for policy 0, policy_version 34395 (0.0008) +[2026-06-07 02:58:14,763][472573] Updated weights for policy 0, policy_version 34406 (0.0008) +[2026-06-07 02:58:14,901][472573] Updated weights for policy 0, policy_version 34418 (0.0008) +[2026-06-07 02:58:15,453][472573] Updated weights for policy 0, policy_version 34428 (0.0007) +[2026-06-07 02:58:15,563][472573] Updated weights for policy 0, policy_version 34438 (0.0008) +[2026-06-07 02:58:15,685][472573] Updated weights for policy 0, policy_version 34449 (0.0008) +[2026-06-07 02:58:15,806][472573] Updated weights for policy 0, policy_version 34460 (0.0008) +[2026-06-07 02:58:15,951][472573] Updated weights for policy 0, policy_version 34473 (0.0008) +[2026-06-07 02:58:16,085][472573] Updated weights for policy 0, policy_version 34485 (0.0008) +[2026-06-07 02:58:16,659][472573] Updated weights for policy 0, policy_version 34496 (0.0008) +[2026-06-07 02:58:16,767][472573] Updated weights for policy 0, policy_version 34506 (0.0008) +[2026-06-07 02:58:16,878][472573] Updated weights for policy 0, policy_version 34516 (0.0008) +[2026-06-07 02:58:16,995][472573] Updated weights for policy 0, policy_version 34527 (0.0008) +[2026-06-07 02:58:17,122][472573] Updated weights for policy 0, policy_version 34538 (0.0008) +[2026-06-07 02:58:17,236][472573] Updated weights for policy 0, policy_version 34548 (0.0008) +[2026-06-07 02:58:17,804][472573] Updated weights for policy 0, policy_version 34559 (0.0008) +[2026-06-07 02:58:17,910][472573] Updated weights for policy 0, policy_version 34569 (0.0008) +[2026-06-07 02:58:18,036][472573] Updated weights for policy 0, policy_version 34580 (0.0008) +[2026-06-07 02:58:18,103][464932] Fps is (10 sec: 26214.5, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 17694720. Throughput: 0: 28347.7. Samples: 17732096. Policy #0 lag: (min: 31.0, avg: 42.7, max: 95.0) +[2026-06-07 02:58:18,104][464932] Avg episode reward: [(0, '1130.408')] +[2026-06-07 02:58:18,161][472573] Updated weights for policy 0, policy_version 34591 (0.0008) +[2026-06-07 02:58:18,296][472573] Updated weights for policy 0, policy_version 34603 (0.0008) +[2026-06-07 02:58:18,414][472573] Updated weights for policy 0, policy_version 34613 (0.0008) +[2026-06-07 02:58:19,009][472573] Updated weights for policy 0, policy_version 34626 (0.0008) +[2026-06-07 02:58:19,119][472573] Updated weights for policy 0, policy_version 34636 (0.0008) +[2026-06-07 02:58:19,232][472573] Updated weights for policy 0, policy_version 34646 (0.0008) +[2026-06-07 02:58:19,348][472573] Updated weights for policy 0, policy_version 34656 (0.0008) +[2026-06-07 02:58:19,460][472573] Updated weights for policy 0, policy_version 34666 (0.0008) +[2026-06-07 02:58:19,588][472573] Updated weights for policy 0, policy_version 34677 (0.0008) +[2026-06-07 02:58:20,133][472573] Updated weights for policy 0, policy_version 34687 (0.0008) +[2026-06-07 02:58:20,244][472573] Updated weights for policy 0, policy_version 34697 (0.0005) +[2026-06-07 02:58:20,371][472573] Updated weights for policy 0, policy_version 34708 (0.0005) +[2026-06-07 02:58:20,491][472573] Updated weights for policy 0, policy_version 34719 (0.0005) +[2026-06-07 02:58:20,614][472573] Updated weights for policy 0, policy_version 34730 (0.0005) +[2026-06-07 02:58:20,728][472573] Updated weights for policy 0, policy_version 34740 (0.0005) +[2026-06-07 02:58:21,283][472573] Updated weights for policy 0, policy_version 34750 (0.0005) +[2026-06-07 02:58:21,394][472573] Updated weights for policy 0, policy_version 34760 (0.0005) +[2026-06-07 02:58:21,503][472573] Updated weights for policy 0, policy_version 34770 (0.0004) +[2026-06-07 02:58:21,628][472573] Updated weights for policy 0, policy_version 34781 (0.0005) +[2026-06-07 02:58:21,736][472573] Updated weights for policy 0, policy_version 34791 (0.0004) +[2026-06-07 02:58:21,867][472573] Updated weights for policy 0, policy_version 34802 (0.0005) +[2026-06-07 02:58:22,419][472573] Updated weights for policy 0, policy_version 34812 (0.0005) +[2026-06-07 02:58:22,535][472573] Updated weights for policy 0, policy_version 34823 (0.0005) +[2026-06-07 02:58:22,650][472573] Updated weights for policy 0, policy_version 34833 (0.0004) +[2026-06-07 02:58:22,793][472573] Updated weights for policy 0, policy_version 34845 (0.0004) +[2026-06-07 02:58:22,912][472573] Updated weights for policy 0, policy_version 34856 (0.0004) +[2026-06-07 02:58:23,037][472573] Updated weights for policy 0, policy_version 34866 (0.0007) +[2026-06-07 02:58:23,103][464932] Fps is (10 sec: 29491.2, 60 sec: 28398.9, 300 sec: 28436.0). Total num frames: 17858560. Throughput: 0: 28427.5. Samples: 17815808. Policy #0 lag: (min: 104.0, avg: 125.4, max: 164.0) +[2026-06-07 02:58:23,104][464932] Avg episode reward: [(0, '1060.348')] +[2026-06-07 02:58:23,583][472573] Updated weights for policy 0, policy_version 34876 (0.0008) +[2026-06-07 02:58:23,702][472573] Updated weights for policy 0, policy_version 34887 (0.0008) +[2026-06-07 02:58:23,814][472573] Updated weights for policy 0, policy_version 34897 (0.0008) +[2026-06-07 02:58:23,925][472573] Updated weights for policy 0, policy_version 34907 (0.0008) +[2026-06-07 02:58:24,039][472573] Updated weights for policy 0, policy_version 34917 (0.0008) +[2026-06-07 02:58:24,151][472573] Updated weights for policy 0, policy_version 34927 (0.0008) +[2026-06-07 02:58:24,715][472573] Updated weights for policy 0, policy_version 34938 (0.0008) +[2026-06-07 02:58:24,823][472573] Updated weights for policy 0, policy_version 34948 (0.0008) +[2026-06-07 02:58:24,958][472573] Updated weights for policy 0, policy_version 34960 (0.0008) +[2026-06-07 02:58:25,090][472573] Updated weights for policy 0, policy_version 34972 (0.0008) +[2026-06-07 02:58:25,206][472573] Updated weights for policy 0, policy_version 34982 (0.0008) +[2026-06-07 02:58:25,321][472573] Updated weights for policy 0, policy_version 34992 (0.0008) +[2026-06-07 02:58:25,916][472573] Updated weights for policy 0, policy_version 35005 (0.0008) +[2026-06-07 02:58:26,037][472573] Updated weights for policy 0, policy_version 35016 (0.0008) +[2026-06-07 02:58:26,169][472573] Updated weights for policy 0, policy_version 35028 (0.0008) +[2026-06-07 02:58:26,290][472573] Updated weights for policy 0, policy_version 35039 (0.0008) +[2026-06-07 02:58:26,434][472573] Updated weights for policy 0, policy_version 35051 (0.0008) +[2026-06-07 02:58:26,547][472573] Updated weights for policy 0, policy_version 35061 (0.0008) +[2026-06-07 02:58:27,094][472573] Updated weights for policy 0, policy_version 35072 (0.0008) +[2026-06-07 02:58:27,226][472573] Updated weights for policy 0, policy_version 35084 (0.0008) +[2026-06-07 02:58:27,364][472573] Updated weights for policy 0, policy_version 35096 (0.0009) +[2026-06-07 02:58:27,475][472573] Updated weights for policy 0, policy_version 35106 (0.0008) +[2026-06-07 02:58:27,592][472573] Updated weights for policy 0, policy_version 35116 (0.0008) +[2026-06-07 02:58:27,699][472573] Updated weights for policy 0, policy_version 35126 (0.0008) +[2026-06-07 02:58:28,103][464932] Fps is (10 sec: 29491.3, 60 sec: 28399.0, 300 sec: 28324.9). Total num frames: 17989632. Throughput: 0: 28418.8. Samples: 17984256. Policy #0 lag: (min: 104.0, avg: 125.4, max: 164.0) +[2026-06-07 02:58:28,104][464932] Avg episode reward: [(0, '1051.252')] +[2026-06-07 02:58:28,255][472573] Updated weights for policy 0, policy_version 35136 (0.0008) +[2026-06-07 02:58:28,367][472573] Updated weights for policy 0, policy_version 35146 (0.0008) +[2026-06-07 02:58:28,475][472573] Updated weights for policy 0, policy_version 35156 (0.0008) +[2026-06-07 02:58:28,584][472573] Updated weights for policy 0, policy_version 35166 (0.0008) +[2026-06-07 02:58:28,698][472573] Updated weights for policy 0, policy_version 35176 (0.0008) +[2026-06-07 02:58:28,810][472573] Updated weights for policy 0, policy_version 35186 (0.0008) +[2026-06-07 02:58:29,359][472573] Updated weights for policy 0, policy_version 35197 (0.0005) +[2026-06-07 02:58:29,493][472573] Updated weights for policy 0, policy_version 35209 (0.0004) +[2026-06-07 02:58:29,641][472573] Updated weights for policy 0, policy_version 35223 (0.0006) +[2026-06-07 02:58:29,762][472573] Updated weights for policy 0, policy_version 35234 (0.0008) +[2026-06-07 02:58:29,901][472573] Updated weights for policy 0, policy_version 35247 (0.0008) +[2026-06-07 02:58:30,480][472573] Updated weights for policy 0, policy_version 35258 (0.0008) +[2026-06-07 02:58:30,607][472573] Updated weights for policy 0, policy_version 35270 (0.0008) +[2026-06-07 02:58:30,735][472573] Updated weights for policy 0, policy_version 35282 (0.0008) +[2026-06-07 02:58:30,872][472573] Updated weights for policy 0, policy_version 35295 (0.0008) +[2026-06-07 02:58:31,002][472573] Updated weights for policy 0, policy_version 35307 (0.0008) +[2026-06-07 02:58:31,133][472573] Updated weights for policy 0, policy_version 35319 (0.0009) +[2026-06-07 02:58:31,733][472573] Updated weights for policy 0, policy_version 35332 (0.0008) +[2026-06-07 02:58:31,856][472573] Updated weights for policy 0, policy_version 35343 (0.0008) +[2026-06-07 02:58:32,002][472573] Updated weights for policy 0, policy_version 35357 (0.0009) +[2026-06-07 02:58:32,141][472573] Updated weights for policy 0, policy_version 35370 (0.0008) +[2026-06-07 02:58:32,273][472573] Updated weights for policy 0, policy_version 35382 (0.0008) +[2026-06-07 02:58:32,847][472573] Updated weights for policy 0, policy_version 35392 (0.0008) +[2026-06-07 02:58:32,983][472573] Updated weights for policy 0, policy_version 35405 (0.0008) +[2026-06-07 02:58:33,101][472573] Updated weights for policy 0, policy_version 35416 (0.0008) +[2026-06-07 02:58:33,103][464932] Fps is (10 sec: 26214.4, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 18120704. Throughput: 0: 28407.6. Samples: 18159488. Policy #0 lag: (min: 104.0, avg: 125.4, max: 164.0) +[2026-06-07 02:58:33,104][464932] Avg episode reward: [(0, '1158.323')] +[2026-06-07 02:58:33,241][472573] Updated weights for policy 0, policy_version 35429 (0.0008) +[2026-06-07 02:58:33,360][472573] Updated weights for policy 0, policy_version 35440 (0.0009) +[2026-06-07 02:58:33,440][472028] Saving new best policy, reward=1158.323! +[2026-06-07 02:58:33,928][472573] Updated weights for policy 0, policy_version 35450 (0.0008) +[2026-06-07 02:58:34,041][472573] Updated weights for policy 0, policy_version 35460 (0.0009) +[2026-06-07 02:58:34,151][472573] Updated weights for policy 0, policy_version 35470 (0.0008) +[2026-06-07 02:58:34,278][472573] Updated weights for policy 0, policy_version 35481 (0.0008) +[2026-06-07 02:58:34,386][472573] Updated weights for policy 0, policy_version 35491 (0.0008) +[2026-06-07 02:58:34,504][472573] Updated weights for policy 0, policy_version 35501 (0.0008) +[2026-06-07 02:58:34,617][472573] Updated weights for policy 0, policy_version 35511 (0.0008) +[2026-06-07 02:58:35,166][472573] Updated weights for policy 0, policy_version 35521 (0.0008) +[2026-06-07 02:58:35,285][472573] Updated weights for policy 0, policy_version 35531 (0.0008) +[2026-06-07 02:58:35,404][472573] Updated weights for policy 0, policy_version 35542 (0.0008) +[2026-06-07 02:58:35,538][472573] Updated weights for policy 0, policy_version 35554 (0.0008) +[2026-06-07 02:58:35,654][472573] Updated weights for policy 0, policy_version 35565 (0.0008) +[2026-06-07 02:58:36,248][472573] Updated weights for policy 0, policy_version 35578 (0.0008) +[2026-06-07 02:58:36,372][472573] Updated weights for policy 0, policy_version 35590 (0.0008) +[2026-06-07 02:58:36,510][472573] Updated weights for policy 0, policy_version 35603 (0.0008) +[2026-06-07 02:58:36,624][472573] Updated weights for policy 0, policy_version 35614 (0.0008) +[2026-06-07 02:58:36,789][472573] Updated weights for policy 0, policy_version 35629 (0.0008) +[2026-06-07 02:58:37,382][472573] Updated weights for policy 0, policy_version 35642 (0.0008) +[2026-06-07 02:58:37,503][472573] Updated weights for policy 0, policy_version 35653 (0.0008) +[2026-06-07 02:58:37,614][472573] Updated weights for policy 0, policy_version 35664 (0.0008) +[2026-06-07 02:58:37,733][472573] Updated weights for policy 0, policy_version 35675 (0.0008) +[2026-06-07 02:58:37,865][472573] Updated weights for policy 0, policy_version 35687 (0.0008) +[2026-06-07 02:58:37,980][472573] Updated weights for policy 0, policy_version 35698 (0.0008) +[2026-06-07 02:58:38,103][464932] Fps is (10 sec: 29491.3, 60 sec: 28399.0, 300 sec: 28324.9). Total num frames: 18284544. Throughput: 0: 28507.0. Samples: 18243072. Policy #0 lag: (min: 104.0, avg: 125.4, max: 164.0) +[2026-06-07 02:58:38,104][464932] Avg episode reward: [(0, '1148.646')] +[2026-06-07 02:58:38,569][472573] Updated weights for policy 0, policy_version 35708 (0.0008) +[2026-06-07 02:58:38,679][472573] Updated weights for policy 0, policy_version 35718 (0.0008) +[2026-06-07 02:58:38,794][472573] Updated weights for policy 0, policy_version 35728 (0.0008) +[2026-06-07 02:58:38,924][472573] Updated weights for policy 0, policy_version 35740 (0.0008) +[2026-06-07 02:58:39,053][472573] Updated weights for policy 0, policy_version 35751 (0.0008) +[2026-06-07 02:58:39,171][472573] Updated weights for policy 0, policy_version 35762 (0.0008) +[2026-06-07 02:58:39,702][472573] Updated weights for policy 0, policy_version 35772 (0.0008) +[2026-06-07 02:58:39,822][472573] Updated weights for policy 0, policy_version 35783 (0.0008) +[2026-06-07 02:58:39,949][472573] Updated weights for policy 0, policy_version 35794 (0.0008) +[2026-06-07 02:58:40,054][472573] Updated weights for policy 0, policy_version 35804 (0.0008) +[2026-06-07 02:58:40,176][472573] Updated weights for policy 0, policy_version 35815 (0.0009) +[2026-06-07 02:58:40,316][472573] Updated weights for policy 0, policy_version 35827 (0.0008) +[2026-06-07 02:58:40,878][472573] Updated weights for policy 0, policy_version 35838 (0.0008) +[2026-06-07 02:58:40,987][472573] Updated weights for policy 0, policy_version 35848 (0.0008) +[2026-06-07 02:58:41,096][472573] Updated weights for policy 0, policy_version 35858 (0.0008) +[2026-06-07 02:58:41,213][472573] Updated weights for policy 0, policy_version 35868 (0.0008) +[2026-06-07 02:58:41,324][472573] Updated weights for policy 0, policy_version 35878 (0.0008) +[2026-06-07 02:58:41,464][472573] Updated weights for policy 0, policy_version 35890 (0.0008) +[2026-06-07 02:58:42,029][472573] Updated weights for policy 0, policy_version 35901 (0.0006) +[2026-06-07 02:58:42,172][472573] Updated weights for policy 0, policy_version 35913 (0.0007) +[2026-06-07 02:58:42,280][472573] Updated weights for policy 0, policy_version 35923 (0.0009) +[2026-06-07 02:58:42,406][472573] Updated weights for policy 0, policy_version 35934 (0.0008) +[2026-06-07 02:58:42,521][472573] Updated weights for policy 0, policy_version 35944 (0.0008) +[2026-06-07 02:58:42,631][472573] Updated weights for policy 0, policy_version 35954 (0.0008) +[2026-06-07 02:58:43,103][464932] Fps is (10 sec: 29491.2, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 18415616. Throughput: 0: 28433.4. Samples: 18409728. Policy #0 lag: (min: 104.0, avg: 125.4, max: 164.0) +[2026-06-07 02:58:43,104][464932] Avg episode reward: [(0, '1228.472')] +[2026-06-07 02:58:43,197][472573] Updated weights for policy 0, policy_version 35965 (0.0007) +[2026-06-07 02:58:43,308][472573] Updated weights for policy 0, policy_version 35975 (0.0005) +[2026-06-07 02:58:43,427][472573] Updated weights for policy 0, policy_version 35986 (0.0004) +[2026-06-07 02:58:43,550][472573] Updated weights for policy 0, policy_version 35997 (0.0005) +[2026-06-07 02:58:43,696][472573] Updated weights for policy 0, policy_version 36010 (0.0005) +[2026-06-07 02:58:43,818][472573] Updated weights for policy 0, policy_version 36021 (0.0005) +[2026-06-07 02:58:43,850][472028] Saving new best policy, reward=1228.472! +[2026-06-07 02:58:44,371][472573] Updated weights for policy 0, policy_version 36031 (0.0004) +[2026-06-07 02:58:44,489][472573] Updated weights for policy 0, policy_version 36042 (0.0004) +[2026-06-07 02:58:44,621][472573] Updated weights for policy 0, policy_version 36054 (0.0004) +[2026-06-07 02:58:44,727][472573] Updated weights for policy 0, policy_version 36064 (0.0004) +[2026-06-07 02:58:44,857][472573] Updated weights for policy 0, policy_version 36076 (0.0004) +[2026-06-07 02:58:44,991][472573] Updated weights for policy 0, policy_version 36088 (0.0004) +[2026-06-07 02:58:45,551][472573] Updated weights for policy 0, policy_version 36100 (0.0004) +[2026-06-07 02:58:45,681][472573] Updated weights for policy 0, policy_version 36112 (0.0004) +[2026-06-07 02:58:45,837][472573] Updated weights for policy 0, policy_version 36127 (0.0008) +[2026-06-07 02:58:45,959][472573] Updated weights for policy 0, policy_version 36138 (0.0008) +[2026-06-07 02:58:46,074][472573] Updated weights for policy 0, policy_version 36149 (0.0008) +[2026-06-07 02:58:46,647][472573] Updated weights for policy 0, policy_version 36160 (0.0008) +[2026-06-07 02:58:46,759][472573] Updated weights for policy 0, policy_version 36171 (0.0008) +[2026-06-07 02:58:46,877][472573] Updated weights for policy 0, policy_version 36182 (0.0008) +[2026-06-07 02:58:47,021][472573] Updated weights for policy 0, policy_version 36195 (0.0008) +[2026-06-07 02:58:47,150][472573] Updated weights for policy 0, policy_version 36207 (0.0008) +[2026-06-07 02:58:47,767][472573] Updated weights for policy 0, policy_version 36220 (0.0008) +[2026-06-07 02:58:47,879][472573] Updated weights for policy 0, policy_version 36231 (0.0007) +[2026-06-07 02:58:47,994][472573] Updated weights for policy 0, policy_version 36241 (0.0008) +[2026-06-07 02:58:48,103][464932] Fps is (10 sec: 26214.3, 60 sec: 28399.0, 300 sec: 28324.9). Total num frames: 18546688. Throughput: 0: 28450.1. Samples: 18586752. Policy #0 lag: (min: 30.0, avg: 41.2, max: 94.0) +[2026-06-07 02:58:48,104][464932] Avg episode reward: [(0, '1249.137')] +[2026-06-07 02:58:48,117][472573] Updated weights for policy 0, policy_version 36252 (0.0008) +[2026-06-07 02:58:48,231][472573] Updated weights for policy 0, policy_version 36262 (0.0008) +[2026-06-07 02:58:48,352][472573] Updated weights for policy 0, policy_version 36273 (0.0008) +[2026-06-07 02:58:48,422][472028] Saving new best policy, reward=1249.137! +[2026-06-07 02:58:48,909][472573] Updated weights for policy 0, policy_version 36283 (0.0007) +[2026-06-07 02:58:49,038][472573] Updated weights for policy 0, policy_version 36294 (0.0008) +[2026-06-07 02:58:49,158][472573] Updated weights for policy 0, policy_version 36305 (0.0008) +[2026-06-07 02:58:49,265][472573] Updated weights for policy 0, policy_version 36315 (0.0009) +[2026-06-07 02:58:49,385][472573] Updated weights for policy 0, policy_version 36325 (0.0009) +[2026-06-07 02:58:49,499][472573] Updated weights for policy 0, policy_version 36335 (0.0009) +[2026-06-07 02:58:50,029][472573] Updated weights for policy 0, policy_version 36345 (0.0008) +[2026-06-07 02:58:50,141][472573] Updated weights for policy 0, policy_version 36355 (0.0008) +[2026-06-07 02:58:50,257][472573] Updated weights for policy 0, policy_version 36366 (0.0008) +[2026-06-07 02:58:50,386][472573] Updated weights for policy 0, policy_version 36378 (0.0008) +[2026-06-07 02:58:50,512][472573] Updated weights for policy 0, policy_version 36390 (0.0008) +[2026-06-07 02:58:50,650][472573] Updated weights for policy 0, policy_version 36403 (0.0008) +[2026-06-07 02:58:51,255][472573] Updated weights for policy 0, policy_version 36415 (0.0005) +[2026-06-07 02:58:51,387][472573] Updated weights for policy 0, policy_version 36427 (0.0006) +[2026-06-07 02:58:51,497][472573] Updated weights for policy 0, policy_version 36437 (0.0008) +[2026-06-07 02:58:51,616][472573] Updated weights for policy 0, policy_version 36448 (0.0008) +[2026-06-07 02:58:51,747][472573] Updated weights for policy 0, policy_version 36460 (0.0008) +[2026-06-07 02:58:51,863][472573] Updated weights for policy 0, policy_version 36470 (0.0008) +[2026-06-07 02:58:52,432][472573] Updated weights for policy 0, policy_version 36481 (0.0008) +[2026-06-07 02:58:52,546][472573] Updated weights for policy 0, policy_version 36491 (0.0008) +[2026-06-07 02:58:52,664][472573] Updated weights for policy 0, policy_version 36502 (0.0008) +[2026-06-07 02:58:52,781][472573] Updated weights for policy 0, policy_version 36512 (0.0010) +[2026-06-07 02:58:52,922][472573] Updated weights for policy 0, policy_version 36525 (0.0009) +[2026-06-07 02:58:53,045][472573] Updated weights for policy 0, policy_version 36535 (0.0008) +[2026-06-07 02:58:53,103][464932] Fps is (10 sec: 29491.4, 60 sec: 28398.9, 300 sec: 28436.0). Total num frames: 18710528. Throughput: 0: 28495.7. Samples: 18668160. Policy #0 lag: (min: 30.0, avg: 41.2, max: 94.0) +[2026-06-07 02:58:53,104][464932] Avg episode reward: [(0, '1337.253')] +[2026-06-07 02:58:53,109][472028] Saving new best policy, reward=1337.253! +[2026-06-07 02:58:53,575][472573] Updated weights for policy 0, policy_version 36545 (0.0008) +[2026-06-07 02:58:53,688][472573] Updated weights for policy 0, policy_version 36555 (0.0008) +[2026-06-07 02:58:53,811][472573] Updated weights for policy 0, policy_version 36566 (0.0008) +[2026-06-07 02:58:53,928][472573] Updated weights for policy 0, policy_version 36577 (0.0008) +[2026-06-07 02:58:54,055][472573] Updated weights for policy 0, policy_version 36588 (0.0008) +[2026-06-07 02:58:54,177][472573] Updated weights for policy 0, policy_version 36599 (0.0008) +[2026-06-07 02:58:54,713][472573] Updated weights for policy 0, policy_version 36609 (0.0008) +[2026-06-07 02:58:54,831][472573] Updated weights for policy 0, policy_version 36619 (0.0009) +[2026-06-07 02:58:54,947][472573] Updated weights for policy 0, policy_version 36630 (0.0008) +[2026-06-07 02:58:55,092][472573] Updated weights for policy 0, policy_version 36643 (0.0009) +[2026-06-07 02:58:55,213][472573] Updated weights for policy 0, policy_version 36654 (0.0008) +[2026-06-07 02:58:55,328][472573] Updated weights for policy 0, policy_version 36664 (0.0009) +[2026-06-07 02:58:55,906][472573] Updated weights for policy 0, policy_version 36675 (0.0008) +[2026-06-07 02:58:56,040][472573] Updated weights for policy 0, policy_version 36687 (0.0008) +[2026-06-07 02:58:56,150][472573] Updated weights for policy 0, policy_version 36697 (0.0009) +[2026-06-07 02:58:56,281][472573] Updated weights for policy 0, policy_version 36709 (0.0008) +[2026-06-07 02:58:56,407][472573] Updated weights for policy 0, policy_version 36720 (0.0008) +[2026-06-07 02:58:56,982][472573] Updated weights for policy 0, policy_version 36732 (0.0008) +[2026-06-07 02:58:57,099][472573] Updated weights for policy 0, policy_version 36742 (0.0008) +[2026-06-07 02:58:57,212][472573] Updated weights for policy 0, policy_version 36752 (0.0008) +[2026-06-07 02:58:57,344][472573] Updated weights for policy 0, policy_version 36764 (0.0008) +[2026-06-07 02:58:57,469][472573] Updated weights for policy 0, policy_version 36775 (0.0008) +[2026-06-07 02:58:57,576][472573] Updated weights for policy 0, policy_version 36785 (0.0008) +[2026-06-07 02:58:58,103][464932] Fps is (10 sec: 29491.2, 60 sec: 28399.2, 300 sec: 28324.9). Total num frames: 18841600. Throughput: 0: 28418.8. Samples: 18836352. Policy #0 lag: (min: 30.0, avg: 41.2, max: 94.0) +[2026-06-07 02:58:58,104][464932] Avg episode reward: [(0, '1386.120')] +[2026-06-07 02:58:58,122][472573] Updated weights for policy 0, policy_version 36795 (0.0008) +[2026-06-07 02:58:58,233][472573] Updated weights for policy 0, policy_version 36805 (0.0008) +[2026-06-07 02:58:58,358][472573] Updated weights for policy 0, policy_version 36816 (0.0008) +[2026-06-07 02:58:58,481][472573] Updated weights for policy 0, policy_version 36827 (0.0008) +[2026-06-07 02:58:58,594][472573] Updated weights for policy 0, policy_version 36837 (0.0008) +[2026-06-07 02:58:58,738][472573] Updated weights for policy 0, policy_version 36850 (0.0008) +[2026-06-07 02:58:58,796][472028] Saving new best policy, reward=1386.120! +[2026-06-07 02:58:59,312][472573] Updated weights for policy 0, policy_version 36863 (0.0008) +[2026-06-07 02:58:59,435][472573] Updated weights for policy 0, policy_version 36874 (0.0008) +[2026-06-07 02:58:59,555][472573] Updated weights for policy 0, policy_version 36885 (0.0008) +[2026-06-07 02:58:59,705][472573] Updated weights for policy 0, policy_version 36898 (0.0008) +[2026-06-07 02:58:59,827][472573] Updated weights for policy 0, policy_version 36909 (0.0008) +[2026-06-07 02:58:59,944][472573] Updated weights for policy 0, policy_version 36920 (0.0008) +[2026-06-07 02:59:00,515][472573] Updated weights for policy 0, policy_version 36931 (0.0007) +[2026-06-07 02:59:00,657][472573] Updated weights for policy 0, policy_version 36944 (0.0008) +[2026-06-07 02:59:00,787][472573] Updated weights for policy 0, policy_version 36956 (0.0008) +[2026-06-07 02:59:00,902][472573] Updated weights for policy 0, policy_version 36966 (0.0008) +[2026-06-07 02:59:01,019][472573] Updated weights for policy 0, policy_version 36976 (0.0008) +[2026-06-07 02:59:01,576][472573] Updated weights for policy 0, policy_version 36986 (0.0008) +[2026-06-07 02:59:01,691][472573] Updated weights for policy 0, policy_version 36997 (0.0008) +[2026-06-07 02:59:01,842][472573] Updated weights for policy 0, policy_version 37011 (0.0008) +[2026-06-07 02:59:01,961][472573] Updated weights for policy 0, policy_version 37021 (0.0008) +[2026-06-07 02:59:02,080][472573] Updated weights for policy 0, policy_version 37031 (0.0008) +[2026-06-07 02:59:02,198][472573] Updated weights for policy 0, policy_version 37041 (0.0008) +[2026-06-07 02:59:02,750][472573] Updated weights for policy 0, policy_version 37051 (0.0007) +[2026-06-07 02:59:02,861][472573] Updated weights for policy 0, policy_version 37061 (0.0007) +[2026-06-07 02:59:02,979][472573] Updated weights for policy 0, policy_version 37072 (0.0008) +[2026-06-07 02:59:03,103][464932] Fps is (10 sec: 26214.2, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 18972672. Throughput: 0: 28430.2. Samples: 19011456. Policy #0 lag: (min: 30.0, avg: 41.2, max: 94.0) +[2026-06-07 02:59:03,104][464932] Avg episode reward: [(0, '1413.045')] +[2026-06-07 02:59:03,106][472573] Updated weights for policy 0, policy_version 37083 (0.0008) +[2026-06-07 02:59:03,242][472573] Updated weights for policy 0, policy_version 37094 (0.0008) +[2026-06-07 02:59:03,357][472573] Updated weights for policy 0, policy_version 37104 (0.0008) +[2026-06-07 02:59:03,446][472028] Saving new best policy, reward=1413.045! +[2026-06-07 02:59:03,904][472573] Updated weights for policy 0, policy_version 37115 (0.0008) +[2026-06-07 02:59:04,022][472573] Updated weights for policy 0, policy_version 37126 (0.0008) +[2026-06-07 02:59:04,185][472573] Updated weights for policy 0, policy_version 37141 (0.0008) +[2026-06-07 02:59:04,305][472573] Updated weights for policy 0, policy_version 37151 (0.0008) +[2026-06-07 02:59:04,417][472573] Updated weights for policy 0, policy_version 37161 (0.0008) +[2026-06-07 02:59:04,548][472573] Updated weights for policy 0, policy_version 37172 (0.0008) +[2026-06-07 02:59:05,118][472573] Updated weights for policy 0, policy_version 37183 (0.0008) +[2026-06-07 02:59:05,238][472573] Updated weights for policy 0, policy_version 37194 (0.0008) +[2026-06-07 02:59:05,349][472573] Updated weights for policy 0, policy_version 37204 (0.0008) +[2026-06-07 02:59:05,491][472573] Updated weights for policy 0, policy_version 37216 (0.0008) +[2026-06-07 02:59:05,641][472573] Updated weights for policy 0, policy_version 37229 (0.0008) +[2026-06-07 02:59:05,760][472573] Updated weights for policy 0, policy_version 37239 (0.0008) +[2026-06-07 02:59:06,283][472573] Updated weights for policy 0, policy_version 37250 (0.0008) +[2026-06-07 02:59:06,400][472573] Updated weights for policy 0, policy_version 37260 (0.0008) +[2026-06-07 02:59:06,524][472573] Updated weights for policy 0, policy_version 37271 (0.0008) +[2026-06-07 02:59:06,633][472573] Updated weights for policy 0, policy_version 37281 (0.0008) +[2026-06-07 02:59:06,773][472573] Updated weights for policy 0, policy_version 37293 (0.0008) +[2026-06-07 02:59:06,891][472573] Updated weights for policy 0, policy_version 37304 (0.0008) +[2026-06-07 02:59:07,450][472573] Updated weights for policy 0, policy_version 37314 (0.0008) +[2026-06-07 02:59:07,589][472573] Updated weights for policy 0, policy_version 37327 (0.0008) +[2026-06-07 02:59:07,700][472573] Updated weights for policy 0, policy_version 37337 (0.0008) +[2026-06-07 02:59:07,815][472573] Updated weights for policy 0, policy_version 37347 (0.0008) +[2026-06-07 02:59:07,943][472573] Updated weights for policy 0, policy_version 37359 (0.0008) +[2026-06-07 02:59:08,103][464932] Fps is (10 sec: 29491.4, 60 sec: 28399.0, 300 sec: 28436.0). Total num frames: 19136512. Throughput: 0: 28410.3. Samples: 19094272. Policy #0 lag: (min: 30.0, avg: 41.2, max: 94.0) +[2026-06-07 02:59:08,104][464932] Avg episode reward: [(0, '1359.400')] +[2026-06-07 02:59:08,498][472573] Updated weights for policy 0, policy_version 37369 (0.0008) +[2026-06-07 02:59:08,621][472573] Updated weights for policy 0, policy_version 37380 (0.0008) +[2026-06-07 02:59:08,729][472573] Updated weights for policy 0, policy_version 37390 (0.0008) +[2026-06-07 02:59:08,855][472573] Updated weights for policy 0, policy_version 37401 (0.0008) +[2026-06-07 02:59:08,971][472573] Updated weights for policy 0, policy_version 37411 (0.0008) +[2026-06-07 02:59:09,093][472573] Updated weights for policy 0, policy_version 37422 (0.0008) +[2026-06-07 02:59:09,643][472573] Updated weights for policy 0, policy_version 37433 (0.0008) +[2026-06-07 02:59:09,767][472573] Updated weights for policy 0, policy_version 37444 (0.0008) +[2026-06-07 02:59:09,877][472573] Updated weights for policy 0, policy_version 37454 (0.0008) +[2026-06-07 02:59:09,989][472573] Updated weights for policy 0, policy_version 37464 (0.0008) +[2026-06-07 02:59:10,102][472573] Updated weights for policy 0, policy_version 37474 (0.0008) +[2026-06-07 02:59:10,216][472573] Updated weights for policy 0, policy_version 37484 (0.0008) +[2026-06-07 02:59:10,340][472573] Updated weights for policy 0, policy_version 37495 (0.0008) +[2026-06-07 02:59:10,890][472573] Updated weights for policy 0, policy_version 37505 (0.0008) +[2026-06-07 02:59:11,007][472573] Updated weights for policy 0, policy_version 37516 (0.0008) +[2026-06-07 02:59:11,121][472573] Updated weights for policy 0, policy_version 37526 (0.0008) +[2026-06-07 02:59:11,253][472573] Updated weights for policy 0, policy_version 37538 (0.0008) +[2026-06-07 02:59:11,381][472573] Updated weights for policy 0, policy_version 37549 (0.0008) +[2026-06-07 02:59:11,495][472573] Updated weights for policy 0, policy_version 37559 (0.0008) +[2026-06-07 02:59:12,061][472573] Updated weights for policy 0, policy_version 37569 (0.0008) +[2026-06-07 02:59:12,189][472573] Updated weights for policy 0, policy_version 37580 (0.0008) +[2026-06-07 02:59:12,296][472573] Updated weights for policy 0, policy_version 37590 (0.0008) +[2026-06-07 02:59:12,443][472573] Updated weights for policy 0, policy_version 37603 (0.0008) +[2026-06-07 02:59:12,563][472573] Updated weights for policy 0, policy_version 37614 (0.0008) +[2026-06-07 02:59:13,103][464932] Fps is (10 sec: 29491.3, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 19267584. Throughput: 0: 28373.3. Samples: 19261056. Policy #0 lag: (min: 63.0, avg: 75.1, max: 127.0) +[2026-06-07 02:59:13,104][464932] Avg episode reward: [(0, '1298.580')] +[2026-06-07 02:59:13,123][472573] Updated weights for policy 0, policy_version 37625 (0.0008) +[2026-06-07 02:59:13,251][472573] Updated weights for policy 0, policy_version 37636 (0.0008) +[2026-06-07 02:59:13,359][472573] Updated weights for policy 0, policy_version 37646 (0.0008) +[2026-06-07 02:59:13,471][472573] Updated weights for policy 0, policy_version 37656 (0.0008) +[2026-06-07 02:59:13,582][472573] Updated weights for policy 0, policy_version 37666 (0.0008) +[2026-06-07 02:59:13,694][472573] Updated weights for policy 0, policy_version 37676 (0.0008) +[2026-06-07 02:59:13,805][472573] Updated weights for policy 0, policy_version 37686 (0.0008) +[2026-06-07 02:59:14,371][472573] Updated weights for policy 0, policy_version 37697 (0.0008) +[2026-06-07 02:59:14,481][472573] Updated weights for policy 0, policy_version 37707 (0.0008) +[2026-06-07 02:59:14,593][472573] Updated weights for policy 0, policy_version 37717 (0.0008) +[2026-06-07 02:59:14,710][472573] Updated weights for policy 0, policy_version 37727 (0.0008) +[2026-06-07 02:59:14,830][472573] Updated weights for policy 0, policy_version 37738 (0.0008) +[2026-06-07 02:59:14,948][472573] Updated weights for policy 0, policy_version 37749 (0.0008) +[2026-06-07 02:59:15,515][472573] Updated weights for policy 0, policy_version 37759 (0.0007) +[2026-06-07 02:59:15,633][472573] Updated weights for policy 0, policy_version 37770 (0.0008) +[2026-06-07 02:59:15,755][472573] Updated weights for policy 0, policy_version 37781 (0.0008) +[2026-06-07 02:59:15,870][472573] Updated weights for policy 0, policy_version 37791 (0.0008) +[2026-06-07 02:59:15,993][472573] Updated weights for policy 0, policy_version 37802 (0.0008) +[2026-06-07 02:59:16,119][472573] Updated weights for policy 0, policy_version 37813 (0.0009) +[2026-06-07 02:59:16,676][472573] Updated weights for policy 0, policy_version 37824 (0.0008) +[2026-06-07 02:59:16,786][472573] Updated weights for policy 0, policy_version 37834 (0.0008) +[2026-06-07 02:59:16,910][472573] Updated weights for policy 0, policy_version 37845 (0.0008) +[2026-06-07 02:59:17,046][472573] Updated weights for policy 0, policy_version 37857 (0.0008) +[2026-06-07 02:59:17,156][472573] Updated weights for policy 0, policy_version 37867 (0.0008) +[2026-06-07 02:59:17,279][472573] Updated weights for policy 0, policy_version 37878 (0.0008) +[2026-06-07 02:59:17,846][472573] Updated weights for policy 0, policy_version 37890 (0.0008) +[2026-06-07 02:59:17,969][472573] Updated weights for policy 0, policy_version 37902 (0.0008) +[2026-06-07 02:59:18,103][464932] Fps is (10 sec: 26214.3, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 19398656. Throughput: 0: 28379.0. Samples: 19436544. Policy #0 lag: (min: 63.0, avg: 75.1, max: 127.0) +[2026-06-07 02:59:18,104][464932] Avg episode reward: [(0, '1290.363')] +[2026-06-07 02:59:18,122][472573] Updated weights for policy 0, policy_version 37916 (0.0008) +[2026-06-07 02:59:18,251][472573] Updated weights for policy 0, policy_version 37928 (0.0008) +[2026-06-07 02:59:18,373][472573] Updated weights for policy 0, policy_version 37939 (0.0008) +[2026-06-07 02:59:18,954][472573] Updated weights for policy 0, policy_version 37950 (0.0008) +[2026-06-07 02:59:19,085][472573] Updated weights for policy 0, policy_version 37962 (0.0008) +[2026-06-07 02:59:19,210][472573] Updated weights for policy 0, policy_version 37974 (0.0008) +[2026-06-07 02:59:19,334][472573] Updated weights for policy 0, policy_version 37985 (0.0008) +[2026-06-07 02:59:19,453][472573] Updated weights for policy 0, policy_version 37996 (0.0008) +[2026-06-07 02:59:19,558][472573] Updated weights for policy 0, policy_version 38006 (0.0008) +[2026-06-07 02:59:20,155][472573] Updated weights for policy 0, policy_version 38018 (0.0008) +[2026-06-07 02:59:20,268][472573] Updated weights for policy 0, policy_version 38029 (0.0009) +[2026-06-07 02:59:20,409][472573] Updated weights for policy 0, policy_version 38042 (0.0008) +[2026-06-07 02:59:20,561][472573] Updated weights for policy 0, policy_version 38056 (0.0008) +[2026-06-07 02:59:20,689][472573] Updated weights for policy 0, policy_version 38068 (0.0008) +[2026-06-07 02:59:21,256][472573] Updated weights for policy 0, policy_version 38079 (0.0008) +[2026-06-07 02:59:21,404][472573] Updated weights for policy 0, policy_version 38093 (0.0008) +[2026-06-07 02:59:21,528][472573] Updated weights for policy 0, policy_version 38105 (0.0008) +[2026-06-07 02:59:21,674][472573] Updated weights for policy 0, policy_version 38118 (0.0008) +[2026-06-07 02:59:21,801][472573] Updated weights for policy 0, policy_version 38130 (0.0008) +[2026-06-07 02:59:22,399][472573] Updated weights for policy 0, policy_version 38142 (0.0008) +[2026-06-07 02:59:22,530][472573] Updated weights for policy 0, policy_version 38155 (0.0008) +[2026-06-07 02:59:22,677][472573] Updated weights for policy 0, policy_version 38169 (0.0008) +[2026-06-07 02:59:22,806][472573] Updated weights for policy 0, policy_version 38181 (0.0008) +[2026-06-07 02:59:22,956][472573] Updated weights for policy 0, policy_version 38195 (0.0008) +[2026-06-07 02:59:23,103][464932] Fps is (10 sec: 29491.2, 60 sec: 28398.9, 300 sec: 28436.0). Total num frames: 19562496. Throughput: 0: 28362.0. Samples: 19519360. Policy #0 lag: (min: 63.0, avg: 75.1, max: 127.0) +[2026-06-07 02:59:23,104][464932] Avg episode reward: [(0, '1315.469')] +[2026-06-07 02:59:23,527][472573] Updated weights for policy 0, policy_version 38206 (0.0008) +[2026-06-07 02:59:23,683][472573] Updated weights for policy 0, policy_version 38221 (0.0008) +[2026-06-07 02:59:23,824][472573] Updated weights for policy 0, policy_version 38234 (0.0008) +[2026-06-07 02:59:23,928][472573] Updated weights for policy 0, policy_version 38244 (0.0008) +[2026-06-07 02:59:24,091][472573] Updated weights for policy 0, policy_version 38259 (0.0008) +[2026-06-07 02:59:24,679][472573] Updated weights for policy 0, policy_version 38270 (0.0008) +[2026-06-07 02:59:24,837][472573] Updated weights for policy 0, policy_version 38285 (0.0008) +[2026-06-07 02:59:24,995][472573] Updated weights for policy 0, policy_version 38300 (0.0008) +[2026-06-07 02:59:25,130][472573] Updated weights for policy 0, policy_version 38313 (0.0008) +[2026-06-07 02:59:25,274][472573] Updated weights for policy 0, policy_version 38326 (0.0008) +[2026-06-07 02:59:25,880][472573] Updated weights for policy 0, policy_version 38339 (0.0008) +[2026-06-07 02:59:25,996][472573] Updated weights for policy 0, policy_version 38350 (0.0008) +[2026-06-07 02:59:26,126][472573] Updated weights for policy 0, policy_version 38362 (0.0007) +[2026-06-07 02:59:26,284][472573] Updated weights for policy 0, policy_version 38377 (0.0009) +[2026-06-07 02:59:26,420][472573] Updated weights for policy 0, policy_version 38389 (0.0009) +[2026-06-07 02:59:26,985][472573] Updated weights for policy 0, policy_version 38400 (0.0007) +[2026-06-07 02:59:27,108][472573] Updated weights for policy 0, policy_version 38412 (0.0008) +[2026-06-07 02:59:27,248][472573] Updated weights for policy 0, policy_version 38425 (0.0008) +[2026-06-07 02:59:27,366][472573] Updated weights for policy 0, policy_version 38436 (0.0008) +[2026-06-07 02:59:27,496][472573] Updated weights for policy 0, policy_version 38448 (0.0009) +[2026-06-07 02:59:28,093][472573] Updated weights for policy 0, policy_version 38459 (0.0008) +[2026-06-07 02:59:28,103][464932] Fps is (10 sec: 29491.1, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 19693568. Throughput: 0: 28427.4. Samples: 19688960. Policy #0 lag: (min: 63.0, avg: 75.1, max: 127.0) +[2026-06-07 02:59:28,104][464932] Avg episode reward: [(0, '1356.849')] +[2026-06-07 02:59:28,237][472573] Updated weights for policy 0, policy_version 38472 (0.0008) +[2026-06-07 02:59:28,348][472573] Updated weights for policy 0, policy_version 38482 (0.0008) +[2026-06-07 02:59:28,465][472573] Updated weights for policy 0, policy_version 38493 (0.0008) +[2026-06-07 02:59:28,589][472573] Updated weights for policy 0, policy_version 38504 (0.0008) +[2026-06-07 02:59:28,705][472573] Updated weights for policy 0, policy_version 38514 (0.0008) +[2026-06-07 02:59:29,262][472573] Updated weights for policy 0, policy_version 38525 (0.0008) +[2026-06-07 02:59:29,370][472573] Updated weights for policy 0, policy_version 38535 (0.0008) +[2026-06-07 02:59:29,504][472573] Updated weights for policy 0, policy_version 38547 (0.0008) +[2026-06-07 02:59:29,629][472573] Updated weights for policy 0, policy_version 38558 (0.0008) +[2026-06-07 02:59:29,765][472573] Updated weights for policy 0, policy_version 38570 (0.0008) +[2026-06-07 02:59:29,877][472573] Updated weights for policy 0, policy_version 38580 (0.0008) +[2026-06-07 02:59:30,435][472573] Updated weights for policy 0, policy_version 38591 (0.0008) +[2026-06-07 02:59:30,556][472573] Updated weights for policy 0, policy_version 38602 (0.0006) +[2026-06-07 02:59:30,691][472573] Updated weights for policy 0, policy_version 38614 (0.0008) +[2026-06-07 02:59:30,806][472573] Updated weights for policy 0, policy_version 38624 (0.0008) +[2026-06-07 02:59:30,929][472573] Updated weights for policy 0, policy_version 38635 (0.0008) +[2026-06-07 02:59:31,049][472573] Updated weights for policy 0, policy_version 38646 (0.0008) +[2026-06-07 02:59:31,604][472573] Updated weights for policy 0, policy_version 38657 (0.0008) +[2026-06-07 02:59:31,741][472573] Updated weights for policy 0, policy_version 38669 (0.0009) +[2026-06-07 02:59:31,899][472573] Updated weights for policy 0, policy_version 38683 (0.0008) +[2026-06-07 02:59:32,036][472573] Updated weights for policy 0, policy_version 38695 (0.0009) +[2026-06-07 02:59:32,161][472573] Updated weights for policy 0, policy_version 38706 (0.0008) +[2026-06-07 02:59:32,702][472573] Updated weights for policy 0, policy_version 38717 (0.0008) +[2026-06-07 02:59:32,824][472573] Updated weights for policy 0, policy_version 38728 (0.0008) +[2026-06-07 02:59:32,946][472573] Updated weights for policy 0, policy_version 38739 (0.0008) +[2026-06-07 02:59:33,081][472573] Updated weights for policy 0, policy_version 38751 (0.0008) +[2026-06-07 02:59:33,103][464932] Fps is (10 sec: 26214.4, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 19824640. Throughput: 0: 28438.8. Samples: 19866496. Policy #0 lag: (min: 63.0, avg: 75.1, max: 127.0) +[2026-06-07 02:59:33,104][464932] Avg episode reward: [(0, '1376.497')] +[2026-06-07 02:59:33,202][472573] Updated weights for policy 0, policy_version 38762 (0.0008) +[2026-06-07 02:59:33,342][472573] Updated weights for policy 0, policy_version 38775 (0.0008) +[2026-06-07 02:59:33,919][472573] Updated weights for policy 0, policy_version 38787 (0.0008) +[2026-06-07 02:59:34,028][472573] Updated weights for policy 0, policy_version 38797 (0.0008) +[2026-06-07 02:59:34,148][472573] Updated weights for policy 0, policy_version 38808 (0.0008) +[2026-06-07 02:59:34,284][472573] Updated weights for policy 0, policy_version 38820 (0.0008) +[2026-06-07 02:59:34,393][472573] Updated weights for policy 0, policy_version 38830 (0.0008) +[2026-06-07 02:59:34,961][472573] Updated weights for policy 0, policy_version 38841 (0.0008) +[2026-06-07 02:59:35,072][472573] Updated weights for policy 0, policy_version 38851 (0.0008) +[2026-06-07 02:59:35,190][472573] Updated weights for policy 0, policy_version 38862 (0.0008) +[2026-06-07 02:59:35,302][472573] Updated weights for policy 0, policy_version 38872 (0.0008) +[2026-06-07 02:59:35,412][472573] Updated weights for policy 0, policy_version 38882 (0.0008) +[2026-06-07 02:59:35,556][472573] Updated weights for policy 0, policy_version 38895 (0.0008) +[2026-06-07 02:59:36,125][472573] Updated weights for policy 0, policy_version 38906 (0.0008) +[2026-06-07 02:59:36,240][472573] Updated weights for policy 0, policy_version 38916 (0.0008) +[2026-06-07 02:59:36,361][472573] Updated weights for policy 0, policy_version 38927 (0.0008) +[2026-06-07 02:59:36,484][472573] Updated weights for policy 0, policy_version 38938 (0.0008) +[2026-06-07 02:59:36,616][472573] Updated weights for policy 0, policy_version 38950 (0.0008) +[2026-06-07 02:59:36,727][472573] Updated weights for policy 0, policy_version 38960 (0.0008) +[2026-06-07 02:59:37,293][472573] Updated weights for policy 0, policy_version 38970 (0.0008) +[2026-06-07 02:59:37,420][472573] Updated weights for policy 0, policy_version 38981 (0.0008) +[2026-06-07 02:59:37,529][472573] Updated weights for policy 0, policy_version 38991 (0.0008) +[2026-06-07 02:59:37,667][472573] Updated weights for policy 0, policy_version 39003 (0.0008) +[2026-06-07 02:59:37,780][472573] Updated weights for policy 0, policy_version 39014 (0.0008) +[2026-06-07 02:59:37,895][472573] Updated weights for policy 0, policy_version 39024 (0.0008) +[2026-06-07 02:59:38,103][464932] Fps is (10 sec: 29491.4, 60 sec: 28398.9, 300 sec: 28436.0). Total num frames: 19988480. Throughput: 0: 28390.4. Samples: 19945728. Policy #0 lag: (min: 63.0, avg: 74.6, max: 127.0) +[2026-06-07 02:59:38,104][464932] Avg episode reward: [(0, '1312.667')] +[2026-06-07 02:59:38,444][472573] Updated weights for policy 0, policy_version 39034 (0.0008) +[2026-06-07 02:59:38,555][472573] Updated weights for policy 0, policy_version 39044 (0.0008) +[2026-06-07 02:59:38,681][472573] Updated weights for policy 0, policy_version 39055 (0.0008) +[2026-06-07 02:59:38,803][472573] Updated weights for policy 0, policy_version 39066 (0.0008) +[2026-06-07 02:59:38,926][472573] Updated weights for policy 0, policy_version 39077 (0.0008) +[2026-06-07 02:59:39,036][472573] Updated weights for policy 0, policy_version 39087 (0.0008) +[2026-06-07 02:59:39,609][472573] Updated weights for policy 0, policy_version 39099 (0.0008) +[2026-06-07 02:59:39,721][472573] Updated weights for policy 0, policy_version 39109 (0.0008) +[2026-06-07 02:59:39,834][472573] Updated weights for policy 0, policy_version 39119 (0.0008) +[2026-06-07 02:59:39,954][472573] Updated weights for policy 0, policy_version 39130 (0.0008) +[2026-06-07 02:59:40,103][472573] Updated weights for policy 0, policy_version 39143 (0.0008) +[2026-06-07 02:59:40,235][472573] Updated weights for policy 0, policy_version 39155 (0.0008) +[2026-06-07 02:59:40,795][472573] Updated weights for policy 0, policy_version 39166 (0.0008) +[2026-06-07 02:59:40,909][472573] Updated weights for policy 0, policy_version 39176 (0.0008) +[2026-06-07 02:59:41,041][472573] Updated weights for policy 0, policy_version 39188 (0.0008) +[2026-06-07 02:59:41,163][472573] Updated weights for policy 0, policy_version 39199 (0.0008) +[2026-06-07 02:59:41,290][472573] Updated weights for policy 0, policy_version 39210 (0.0008) +[2026-06-07 02:59:41,421][472573] Updated weights for policy 0, policy_version 39222 (0.0008) +[2026-06-07 02:59:41,956][472573] Updated weights for policy 0, policy_version 39232 (0.0008) +[2026-06-07 02:59:42,074][472573] Updated weights for policy 0, policy_version 39242 (0.0008) +[2026-06-07 02:59:42,191][472573] Updated weights for policy 0, policy_version 39253 (0.0008) +[2026-06-07 02:59:42,311][472573] Updated weights for policy 0, policy_version 39264 (0.0008) +[2026-06-07 02:59:42,435][472573] Updated weights for policy 0, policy_version 39275 (0.0008) +[2026-06-07 02:59:42,556][472573] Updated weights for policy 0, policy_version 39286 (0.0008) +[2026-06-07 02:59:43,103][464932] Fps is (10 sec: 29491.2, 60 sec: 28398.9, 300 sec: 28436.0). Total num frames: 20119552. Throughput: 0: 28444.5. Samples: 20116352. Policy #0 lag: (min: 63.0, avg: 74.6, max: 127.0) +[2026-06-07 02:59:43,104][464932] Avg episode reward: [(0, '1339.011')] +[2026-06-07 02:59:43,115][472573] Updated weights for policy 0, policy_version 39296 (0.0008) +[2026-06-07 02:59:43,244][472573] Updated weights for policy 0, policy_version 39308 (0.0008) +[2026-06-07 02:59:43,401][472573] Updated weights for policy 0, policy_version 39322 (0.0008) +[2026-06-07 02:59:43,510][472573] Updated weights for policy 0, policy_version 39332 (0.0008) +[2026-06-07 02:59:43,625][472573] Updated weights for policy 0, policy_version 39342 (0.0008) +[2026-06-07 02:59:44,196][472573] Updated weights for policy 0, policy_version 39354 (0.0008) +[2026-06-07 02:59:44,309][472573] Updated weights for policy 0, policy_version 39364 (0.0007) +[2026-06-07 02:59:44,422][472573] Updated weights for policy 0, policy_version 39375 (0.0008) +[2026-06-07 02:59:44,559][472573] Updated weights for policy 0, policy_version 39387 (0.0008) +[2026-06-07 02:59:44,673][472573] Updated weights for policy 0, policy_version 39397 (0.0008) +[2026-06-07 02:59:44,797][472573] Updated weights for policy 0, policy_version 39408 (0.0008) +[2026-06-07 02:59:45,348][472573] Updated weights for policy 0, policy_version 39418 (0.0008) +[2026-06-07 02:59:45,464][472573] Updated weights for policy 0, policy_version 39428 (0.0008) +[2026-06-07 02:59:45,583][472573] Updated weights for policy 0, policy_version 39439 (0.0008) +[2026-06-07 02:59:45,706][472573] Updated weights for policy 0, policy_version 39450 (0.0008) +[2026-06-07 02:59:45,851][472573] Updated weights for policy 0, policy_version 39463 (0.0008) +[2026-06-07 02:59:45,974][472573] Updated weights for policy 0, policy_version 39474 (0.0008) +[2026-06-07 02:59:46,511][472573] Updated weights for policy 0, policy_version 39484 (0.0008) +[2026-06-07 02:59:46,632][472573] Updated weights for policy 0, policy_version 39495 (0.0008) +[2026-06-07 02:59:46,761][472573] Updated weights for policy 0, policy_version 39506 (0.0008) +[2026-06-07 02:59:46,870][472573] Updated weights for policy 0, policy_version 39516 (0.0008) +[2026-06-07 02:59:46,998][472573] Updated weights for policy 0, policy_version 39527 (0.0008) +[2026-06-07 02:59:47,113][472573] Updated weights for policy 0, policy_version 39537 (0.0008) +[2026-06-07 02:59:47,670][472573] Updated weights for policy 0, policy_version 39549 (0.0008) +[2026-06-07 02:59:47,795][472573] Updated weights for policy 0, policy_version 39560 (0.0008) +[2026-06-07 02:59:47,918][472573] Updated weights for policy 0, policy_version 39571 (0.0008) +[2026-06-07 02:59:48,030][472573] Updated weights for policy 0, policy_version 39581 (0.0008) +[2026-06-07 02:59:48,103][464932] Fps is (10 sec: 26214.3, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 20250624. Throughput: 0: 28464.4. Samples: 20292352. Policy #0 lag: (min: 63.0, avg: 74.6, max: 127.0) +[2026-06-07 02:59:48,104][464932] Avg episode reward: [(0, '1312.593')] +[2026-06-07 02:59:48,147][472573] Updated weights for policy 0, policy_version 39591 (0.0008) +[2026-06-07 02:59:48,259][472573] Updated weights for policy 0, policy_version 39601 (0.0008) +[2026-06-07 02:59:48,826][472573] Updated weights for policy 0, policy_version 39612 (0.0008) +[2026-06-07 02:59:48,950][472573] Updated weights for policy 0, policy_version 39623 (0.0008) +[2026-06-07 02:59:49,063][472573] Updated weights for policy 0, policy_version 39633 (0.0008) +[2026-06-07 02:59:49,183][472573] Updated weights for policy 0, policy_version 39644 (0.0008) +[2026-06-07 02:59:49,304][472573] Updated weights for policy 0, policy_version 39654 (0.0008) +[2026-06-07 02:59:49,415][472573] Updated weights for policy 0, policy_version 39664 (0.0008) +[2026-06-07 02:59:49,963][472573] Updated weights for policy 0, policy_version 39675 (0.0007) +[2026-06-07 02:59:50,075][472573] Updated weights for policy 0, policy_version 39685 (0.0008) +[2026-06-07 02:59:50,195][472573] Updated weights for policy 0, policy_version 39696 (0.0008) +[2026-06-07 02:59:50,324][472573] Updated weights for policy 0, policy_version 39707 (0.0008) +[2026-06-07 02:59:50,438][472573] Updated weights for policy 0, policy_version 39717 (0.0008) +[2026-06-07 02:59:50,559][472573] Updated weights for policy 0, policy_version 39728 (0.0008) +[2026-06-07 02:59:51,099][472573] Updated weights for policy 0, policy_version 39738 (0.0007) +[2026-06-07 02:59:51,221][472573] Updated weights for policy 0, policy_version 39749 (0.0007) +[2026-06-07 02:59:51,331][472573] Updated weights for policy 0, policy_version 39759 (0.0008) +[2026-06-07 02:59:51,470][472573] Updated weights for policy 0, policy_version 39771 (0.0008) +[2026-06-07 02:59:51,577][472573] Updated weights for policy 0, policy_version 39781 (0.0008) +[2026-06-07 02:59:51,690][472573] Updated weights for policy 0, policy_version 39791 (0.0008) +[2026-06-07 02:59:52,263][472573] Updated weights for policy 0, policy_version 39803 (0.0008) +[2026-06-07 02:59:52,380][472573] Updated weights for policy 0, policy_version 39813 (0.0008) +[2026-06-07 02:59:52,496][472573] Updated weights for policy 0, policy_version 39823 (0.0008) +[2026-06-07 02:59:52,609][472573] Updated weights for policy 0, policy_version 39833 (0.0008) +[2026-06-07 02:59:52,730][472573] Updated weights for policy 0, policy_version 39844 (0.0008) +[2026-06-07 02:59:52,843][472573] Updated weights for policy 0, policy_version 39854 (0.0008) +[2026-06-07 02:59:53,103][464932] Fps is (10 sec: 29491.4, 60 sec: 28399.0, 300 sec: 28436.0). Total num frames: 20414464. Throughput: 0: 28362.0. Samples: 20370560. Policy #0 lag: (min: 63.0, avg: 74.6, max: 127.0) +[2026-06-07 02:59:53,104][464932] Avg episode reward: [(0, '1312.593')] +[2026-06-07 02:59:53,393][472573] Updated weights for policy 0, policy_version 39865 (0.0008) +[2026-06-07 02:59:53,504][472573] Updated weights for policy 0, policy_version 39875 (0.0008) +[2026-06-07 02:59:53,622][472573] Updated weights for policy 0, policy_version 39886 (0.0008) +[2026-06-07 02:59:53,738][472573] Updated weights for policy 0, policy_version 39896 (0.0008) +[2026-06-07 02:59:53,863][472573] Updated weights for policy 0, policy_version 39907 (0.0009) +[2026-06-07 02:59:53,973][472573] Updated weights for policy 0, policy_version 39917 (0.0008) +[2026-06-07 02:59:54,552][472573] Updated weights for policy 0, policy_version 39930 (0.0008) +[2026-06-07 02:59:54,678][472573] Updated weights for policy 0, policy_version 39941 (0.0008) +[2026-06-07 02:59:54,789][472573] Updated weights for policy 0, policy_version 39951 (0.0008) +[2026-06-07 02:59:54,919][472573] Updated weights for policy 0, policy_version 39963 (0.0008) +[2026-06-07 02:59:55,057][472573] Updated weights for policy 0, policy_version 39975 (0.0008) +[2026-06-07 02:59:55,171][472573] Updated weights for policy 0, policy_version 39985 (0.0008) +[2026-06-07 02:59:55,708][472573] Updated weights for policy 0, policy_version 39995 (0.0008) +[2026-06-07 02:59:55,820][472573] Updated weights for policy 0, policy_version 40005 (0.0008) +[2026-06-07 02:59:55,930][472573] Updated weights for policy 0, policy_version 40015 (0.0008) +[2026-06-07 02:59:56,048][472573] Updated weights for policy 0, policy_version 40026 (0.0008) +[2026-06-07 02:59:56,178][472573] Updated weights for policy 0, policy_version 40037 (0.0008) +[2026-06-07 02:59:56,291][472573] Updated weights for policy 0, policy_version 40047 (0.0008) +[2026-06-07 02:59:56,840][472573] Updated weights for policy 0, policy_version 40057 (0.0008) +[2026-06-07 02:59:56,958][472573] Updated weights for policy 0, policy_version 40067 (0.0008) +[2026-06-07 02:59:57,068][472573] Updated weights for policy 0, policy_version 40077 (0.0008) +[2026-06-07 02:59:57,176][472573] Updated weights for policy 0, policy_version 40087 (0.0008) +[2026-06-07 02:59:57,292][472573] Updated weights for policy 0, policy_version 40097 (0.0008) +[2026-06-07 02:59:57,425][472573] Updated weights for policy 0, policy_version 40109 (0.0008) +[2026-06-07 02:59:57,537][472573] Updated weights for policy 0, policy_version 40119 (0.0008) +[2026-06-07 02:59:58,103][464932] Fps is (10 sec: 29491.2, 60 sec: 28398.9, 300 sec: 28436.0). Total num frames: 20545536. Throughput: 0: 28470.0. Samples: 20542208. Policy #0 lag: (min: 63.0, avg: 74.6, max: 127.0) +[2026-06-07 02:59:58,104][464932] Avg episode reward: [(0, '1278.344')] +[2026-06-07 02:59:58,111][472573] Updated weights for policy 0, policy_version 40131 (0.0007) +[2026-06-07 02:59:58,236][472573] Updated weights for policy 0, policy_version 40142 (0.0008) +[2026-06-07 02:59:58,370][472573] Updated weights for policy 0, policy_version 40154 (0.0008) +[2026-06-07 02:59:58,503][472573] Updated weights for policy 0, policy_version 40166 (0.0008) +[2026-06-07 02:59:58,653][472573] Updated weights for policy 0, policy_version 40179 (0.0008) +[2026-06-07 02:59:59,196][472573] Updated weights for policy 0, policy_version 40189 (0.0008) +[2026-06-07 02:59:59,306][472573] Updated weights for policy 0, policy_version 40199 (0.0008) +[2026-06-07 02:59:59,430][472573] Updated weights for policy 0, policy_version 40210 (0.0008) +[2026-06-07 02:59:59,541][472573] Updated weights for policy 0, policy_version 40220 (0.0008) +[2026-06-07 02:59:59,661][472573] Updated weights for policy 0, policy_version 40231 (0.0008) +[2026-06-07 02:59:59,812][472573] Updated weights for policy 0, policy_version 40244 (0.0008) +[2026-06-07 03:00:00,366][472573] Updated weights for policy 0, policy_version 40254 (0.0008) +[2026-06-07 03:00:00,473][472573] Updated weights for policy 0, policy_version 40264 (0.0008) +[2026-06-07 03:00:00,587][472573] Updated weights for policy 0, policy_version 40274 (0.0008) +[2026-06-07 03:00:00,707][472573] Updated weights for policy 0, policy_version 40285 (0.0008) +[2026-06-07 03:00:00,842][472573] Updated weights for policy 0, policy_version 40297 (0.0008) +[2026-06-07 03:00:00,974][472573] Updated weights for policy 0, policy_version 40309 (0.0008) +[2026-06-07 03:00:01,525][472573] Updated weights for policy 0, policy_version 40319 (0.0008) +[2026-06-07 03:00:01,644][472573] Updated weights for policy 0, policy_version 40330 (0.0008) +[2026-06-07 03:00:01,777][472573] Updated weights for policy 0, policy_version 40342 (0.0008) +[2026-06-07 03:00:01,907][472573] Updated weights for policy 0, policy_version 40354 (0.0008) +[2026-06-07 03:00:02,037][472573] Updated weights for policy 0, policy_version 40365 (0.0008) +[2026-06-07 03:00:02,158][472573] Updated weights for policy 0, policy_version 40376 (0.0008) +[2026-06-07 03:00:02,713][472573] Updated weights for policy 0, policy_version 40386 (0.0008) +[2026-06-07 03:00:02,822][472573] Updated weights for policy 0, policy_version 40396 (0.0008) +[2026-06-07 03:00:02,932][472573] Updated weights for policy 0, policy_version 40406 (0.0006) +[2026-06-07 03:00:03,071][472573] Updated weights for policy 0, policy_version 40419 (0.0006) +[2026-06-07 03:00:03,103][464932] Fps is (10 sec: 26214.1, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 20676608. Throughput: 0: 28475.7. Samples: 20717952. Policy #0 lag: (min: 63.0, avg: 74.6, max: 127.0) +[2026-06-07 03:00:03,104][464932] Avg episode reward: [(0, '1327.230')] +[2026-06-07 03:00:03,222][472573] Updated weights for policy 0, policy_version 40433 (0.0009) +[2026-06-07 03:00:03,799][472573] Updated weights for policy 0, policy_version 40445 (0.0009) +[2026-06-07 03:00:03,927][472573] Updated weights for policy 0, policy_version 40457 (0.0007) +[2026-06-07 03:00:04,076][472573] Updated weights for policy 0, policy_version 40471 (0.0008) +[2026-06-07 03:00:04,195][472573] Updated weights for policy 0, policy_version 40482 (0.0008) +[2026-06-07 03:00:04,323][472573] Updated weights for policy 0, policy_version 40493 (0.0008) +[2026-06-07 03:00:04,441][472573] Updated weights for policy 0, policy_version 40503 (0.0008) +[2026-06-07 03:00:05,008][472573] Updated weights for policy 0, policy_version 40515 (0.0008) +[2026-06-07 03:00:05,119][472573] Updated weights for policy 0, policy_version 40525 (0.0008) +[2026-06-07 03:00:05,229][472573] Updated weights for policy 0, policy_version 40535 (0.0008) +[2026-06-07 03:00:05,357][472573] Updated weights for policy 0, policy_version 40546 (0.0008) +[2026-06-07 03:00:05,483][472573] Updated weights for policy 0, policy_version 40557 (0.0008) +[2026-06-07 03:00:05,603][472573] Updated weights for policy 0, policy_version 40568 (0.0008) +[2026-06-07 03:00:06,157][472573] Updated weights for policy 0, policy_version 40579 (0.0009) +[2026-06-07 03:00:06,286][472573] Updated weights for policy 0, policy_version 40590 (0.0008) +[2026-06-07 03:00:06,394][472573] Updated weights for policy 0, policy_version 40600 (0.0008) +[2026-06-07 03:00:06,522][472573] Updated weights for policy 0, policy_version 40612 (0.0008) +[2026-06-07 03:00:06,637][472573] Updated weights for policy 0, policy_version 40622 (0.0008) +[2026-06-07 03:00:07,208][472573] Updated weights for policy 0, policy_version 40634 (0.0008) +[2026-06-07 03:00:07,322][472573] Updated weights for policy 0, policy_version 40644 (0.0009) +[2026-06-07 03:00:07,445][472573] Updated weights for policy 0, policy_version 40655 (0.0008) +[2026-06-07 03:00:07,553][472573] Updated weights for policy 0, policy_version 40665 (0.0008) +[2026-06-07 03:00:07,663][472573] Updated weights for policy 0, policy_version 40675 (0.0009) +[2026-06-07 03:00:07,774][472573] Updated weights for policy 0, policy_version 40685 (0.0009) +[2026-06-07 03:00:07,897][472573] Updated weights for policy 0, policy_version 40696 (0.0009) +[2026-06-07 03:00:08,103][464932] Fps is (10 sec: 29491.5, 60 sec: 28399.0, 300 sec: 28436.0). Total num frames: 20840448. Throughput: 0: 28359.2. Samples: 20795520. Policy #0 lag: (min: 63.0, avg: 74.6, max: 127.0) +[2026-06-07 03:00:08,104][464932] Avg episode reward: [(0, '1366.696')] +[2026-06-07 03:00:08,473][472573] Updated weights for policy 0, policy_version 40707 (0.0008) +[2026-06-07 03:00:08,609][472573] Updated weights for policy 0, policy_version 40719 (0.0008) +[2026-06-07 03:00:08,720][472573] Updated weights for policy 0, policy_version 40729 (0.0009) +[2026-06-07 03:00:08,848][472573] Updated weights for policy 0, policy_version 40741 (0.0009) +[2026-06-07 03:00:08,965][472573] Updated weights for policy 0, policy_version 40751 (0.0009) +[2026-06-07 03:00:09,510][472573] Updated weights for policy 0, policy_version 40762 (0.0008) +[2026-06-07 03:00:09,629][472573] Updated weights for policy 0, policy_version 40772 (0.0004) +[2026-06-07 03:00:09,736][472573] Updated weights for policy 0, policy_version 40782 (0.0004) +[2026-06-07 03:00:09,875][472573] Updated weights for policy 0, policy_version 40794 (0.0004) +[2026-06-07 03:00:09,995][472573] Updated weights for policy 0, policy_version 40805 (0.0004) +[2026-06-07 03:00:10,111][472573] Updated weights for policy 0, policy_version 40815 (0.0004) +[2026-06-07 03:00:10,634][472573] Updated weights for policy 0, policy_version 40825 (0.0004) +[2026-06-07 03:00:10,738][472573] Updated weights for policy 0, policy_version 40835 (0.0008) +[2026-06-07 03:00:10,862][472573] Updated weights for policy 0, policy_version 40846 (0.0008) +[2026-06-07 03:00:10,997][472573] Updated weights for policy 0, policy_version 40858 (0.0008) +[2026-06-07 03:00:11,113][472573] Updated weights for policy 0, policy_version 40868 (0.0008) +[2026-06-07 03:00:11,227][472573] Updated weights for policy 0, policy_version 40878 (0.0008) +[2026-06-07 03:00:11,334][472573] Updated weights for policy 0, policy_version 40888 (0.0008) +[2026-06-07 03:00:11,900][472573] Updated weights for policy 0, policy_version 40898 (0.0008) +[2026-06-07 03:00:12,010][472573] Updated weights for policy 0, policy_version 40908 (0.0008) +[2026-06-07 03:00:12,155][472573] Updated weights for policy 0, policy_version 40921 (0.0009) +[2026-06-07 03:00:12,277][472573] Updated weights for policy 0, policy_version 40932 (0.0008) +[2026-06-07 03:00:12,390][472573] Updated weights for policy 0, policy_version 40942 (0.0008) +[2026-06-07 03:00:12,504][472573] Updated weights for policy 0, policy_version 40952 (0.0006) +[2026-06-07 03:00:13,048][472573] Updated weights for policy 0, policy_version 40962 (0.0004) +[2026-06-07 03:00:13,103][464932] Fps is (10 sec: 29491.3, 60 sec: 28398.9, 300 sec: 28436.0). Total num frames: 20971520. Throughput: 0: 28458.7. Samples: 20969600. Policy #0 lag: (min: 63.0, avg: 74.6, max: 127.0) +[2026-06-07 03:00:13,104][464932] Avg episode reward: [(0, '1378.860')] +[2026-06-07 03:00:13,161][472573] Updated weights for policy 0, policy_version 40972 (0.0004) +[2026-06-07 03:00:13,283][472573] Updated weights for policy 0, policy_version 40983 (0.0004) +[2026-06-07 03:00:13,400][472573] Updated weights for policy 0, policy_version 40993 (0.0005) +[2026-06-07 03:00:13,513][472573] Updated weights for policy 0, policy_version 41003 (0.0008) +[2026-06-07 03:00:13,657][472573] Updated weights for policy 0, policy_version 41016 (0.0008) +[2026-06-07 03:00:14,190][472573] Updated weights for policy 0, policy_version 41026 (0.0008) +[2026-06-07 03:00:14,301][472573] Updated weights for policy 0, policy_version 41036 (0.0008) +[2026-06-07 03:00:14,413][472573] Updated weights for policy 0, policy_version 41046 (0.0008) +[2026-06-07 03:00:14,538][472573] Updated weights for policy 0, policy_version 41057 (0.0008) +[2026-06-07 03:00:14,660][472573] Updated weights for policy 0, policy_version 41068 (0.0008) +[2026-06-07 03:00:14,789][472573] Updated weights for policy 0, policy_version 41079 (0.0008) +[2026-06-07 03:00:15,344][472573] Updated weights for policy 0, policy_version 41089 (0.0008) +[2026-06-07 03:00:15,476][472573] Updated weights for policy 0, policy_version 41101 (0.0008) +[2026-06-07 03:00:15,597][472573] Updated weights for policy 0, policy_version 41112 (0.0008) +[2026-06-07 03:00:15,729][472573] Updated weights for policy 0, policy_version 41124 (0.0008) +[2026-06-07 03:00:15,857][472573] Updated weights for policy 0, policy_version 41135 (0.0008) +[2026-06-07 03:00:16,410][472573] Updated weights for policy 0, policy_version 41146 (0.0008) +[2026-06-07 03:00:16,521][472573] Updated weights for policy 0, policy_version 41156 (0.0008) +[2026-06-07 03:00:16,633][472573] Updated weights for policy 0, policy_version 41166 (0.0009) +[2026-06-07 03:00:16,754][472573] Updated weights for policy 0, policy_version 41177 (0.0008) +[2026-06-07 03:00:16,888][472573] Updated weights for policy 0, policy_version 41189 (0.0008) +[2026-06-07 03:00:17,015][472573] Updated weights for policy 0, policy_version 41200 (0.0008) +[2026-06-07 03:00:17,569][472573] Updated weights for policy 0, policy_version 41210 (0.0008) +[2026-06-07 03:00:17,690][472573] Updated weights for policy 0, policy_version 41221 (0.0008) +[2026-06-07 03:00:17,811][472573] Updated weights for policy 0, policy_version 41232 (0.0009) +[2026-06-07 03:00:17,946][472573] Updated weights for policy 0, policy_version 41244 (0.0009) +[2026-06-07 03:00:18,081][472573] Updated weights for policy 0, policy_version 41256 (0.0008) +[2026-06-07 03:00:18,103][464932] Fps is (10 sec: 26214.2, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 21102592. Throughput: 0: 28433.1. Samples: 21145984. Policy #0 lag: (min: 63.0, avg: 74.6, max: 127.0) +[2026-06-07 03:00:18,104][464932] Avg episode reward: [(0, '1395.510')] +[2026-06-07 03:00:18,205][472573] Updated weights for policy 0, policy_version 41267 (0.0009) +[2026-06-07 03:00:18,747][472573] Updated weights for policy 0, policy_version 41277 (0.0007) +[2026-06-07 03:00:18,881][472573] Updated weights for policy 0, policy_version 41289 (0.0009) +[2026-06-07 03:00:18,998][472573] Updated weights for policy 0, policy_version 41300 (0.0008) +[2026-06-07 03:00:19,106][472573] Updated weights for policy 0, policy_version 41310 (0.0008) +[2026-06-07 03:00:19,241][472573] Updated weights for policy 0, policy_version 41322 (0.0009) +[2026-06-07 03:00:19,354][472573] Updated weights for policy 0, policy_version 41332 (0.0008) +[2026-06-07 03:00:19,920][472573] Updated weights for policy 0, policy_version 41342 (0.0008) +[2026-06-07 03:00:20,043][472573] Updated weights for policy 0, policy_version 41353 (0.0008) +[2026-06-07 03:00:20,153][472573] Updated weights for policy 0, policy_version 41363 (0.0008) +[2026-06-07 03:00:20,279][472573] Updated weights for policy 0, policy_version 41374 (0.0008) +[2026-06-07 03:00:20,388][472573] Updated weights for policy 0, policy_version 41384 (0.0009) +[2026-06-07 03:00:20,506][472573] Updated weights for policy 0, policy_version 41394 (0.0008) +[2026-06-07 03:00:21,074][472573] Updated weights for policy 0, policy_version 41405 (0.0008) +[2026-06-07 03:00:21,195][472573] Updated weights for policy 0, policy_version 41416 (0.0008) +[2026-06-07 03:00:21,319][472573] Updated weights for policy 0, policy_version 41427 (0.0008) +[2026-06-07 03:00:21,428][472573] Updated weights for policy 0, policy_version 41437 (0.0008) +[2026-06-07 03:00:21,552][472573] Updated weights for policy 0, policy_version 41448 (0.0008) +[2026-06-07 03:00:21,678][472573] Updated weights for policy 0, policy_version 41459 (0.0008) +[2026-06-07 03:00:22,240][472573] Updated weights for policy 0, policy_version 41470 (0.0008) +[2026-06-07 03:00:22,371][472573] Updated weights for policy 0, policy_version 41481 (0.0008) +[2026-06-07 03:00:22,488][472573] Updated weights for policy 0, policy_version 41492 (0.0008) +[2026-06-07 03:00:22,606][472573] Updated weights for policy 0, policy_version 41502 (0.0008) +[2026-06-07 03:00:22,712][472573] Updated weights for policy 0, policy_version 41512 (0.0008) +[2026-06-07 03:00:22,829][472573] Updated weights for policy 0, policy_version 41522 (0.0008) +[2026-06-07 03:00:23,103][464932] Fps is (10 sec: 29491.0, 60 sec: 28398.9, 300 sec: 28436.0). Total num frames: 21266432. Throughput: 0: 28387.5. Samples: 21223168. Policy #0 lag: (min: 63.0, avg: 74.6, max: 127.0) +[2026-06-07 03:00:23,105][464932] Avg episode reward: [(0, '1435.683')] +[2026-06-07 03:00:23,110][472028] Saving new best policy, reward=1435.683! +[2026-06-07 03:00:23,391][472573] Updated weights for policy 0, policy_version 41533 (0.0008) +[2026-06-07 03:00:23,511][472573] Updated weights for policy 0, policy_version 41544 (0.0008) +[2026-06-07 03:00:23,647][472573] Updated weights for policy 0, policy_version 41556 (0.0008) +[2026-06-07 03:00:23,754][472573] Updated weights for policy 0, policy_version 41566 (0.0008) +[2026-06-07 03:00:23,869][472573] Updated weights for policy 0, policy_version 41576 (0.0008) +[2026-06-07 03:00:23,985][472573] Updated weights for policy 0, policy_version 41586 (0.0007) +[2026-06-07 03:00:24,529][472573] Updated weights for policy 0, policy_version 41596 (0.0008) +[2026-06-07 03:00:24,645][472573] Updated weights for policy 0, policy_version 41607 (0.0008) +[2026-06-07 03:00:24,763][472573] Updated weights for policy 0, policy_version 41617 (0.0008) +[2026-06-07 03:00:24,894][472573] Updated weights for policy 0, policy_version 41629 (0.0008) +[2026-06-07 03:00:25,012][472573] Updated weights for policy 0, policy_version 41639 (0.0008) +[2026-06-07 03:00:25,140][472573] Updated weights for policy 0, policy_version 41650 (0.0008) +[2026-06-07 03:00:25,707][472573] Updated weights for policy 0, policy_version 41662 (0.0007) +[2026-06-07 03:00:25,832][472573] Updated weights for policy 0, policy_version 41673 (0.0008) +[2026-06-07 03:00:25,957][472573] Updated weights for policy 0, policy_version 41684 (0.0008) +[2026-06-07 03:00:26,103][472573] Updated weights for policy 0, policy_version 41697 (0.0008) +[2026-06-07 03:00:26,213][472573] Updated weights for policy 0, policy_version 41707 (0.0008) +[2026-06-07 03:00:26,347][472573] Updated weights for policy 0, policy_version 41719 (0.0008) +[2026-06-07 03:00:26,900][472573] Updated weights for policy 0, policy_version 41730 (0.0007) +[2026-06-07 03:00:27,033][472573] Updated weights for policy 0, policy_version 41742 (0.0008) +[2026-06-07 03:00:27,157][472573] Updated weights for policy 0, policy_version 41753 (0.0008) +[2026-06-07 03:00:27,280][472573] Updated weights for policy 0, policy_version 41764 (0.0008) +[2026-06-07 03:00:27,393][472573] Updated weights for policy 0, policy_version 41774 (0.0008) +[2026-06-07 03:00:27,506][472573] Updated weights for policy 0, policy_version 41784 (0.0008) +[2026-06-07 03:00:28,056][472573] Updated weights for policy 0, policy_version 41794 (0.0008) +[2026-06-07 03:00:28,103][464932] Fps is (10 sec: 29490.9, 60 sec: 28398.9, 300 sec: 28435.9). Total num frames: 21397504. Throughput: 0: 28461.4. Samples: 21397120. Policy #0 lag: (min: 63.0, avg: 74.6, max: 127.0) +[2026-06-07 03:00:28,104][464932] Avg episode reward: [(0, '1450.857')] +[2026-06-07 03:00:28,180][472573] Updated weights for policy 0, policy_version 41805 (0.0008) +[2026-06-07 03:00:28,318][472573] Updated weights for policy 0, policy_version 41817 (0.0008) +[2026-06-07 03:00:28,432][472573] Updated weights for policy 0, policy_version 41827 (0.0009) +[2026-06-07 03:00:28,549][472573] Updated weights for policy 0, policy_version 41837 (0.0009) +[2026-06-07 03:00:28,664][472028] Saving new best policy, reward=1450.857! +[2026-06-07 03:00:28,667][472573] Updated weights for policy 0, policy_version 41848 (0.0009) +[2026-06-07 03:00:29,246][472573] Updated weights for policy 0, policy_version 41860 (0.0008) +[2026-06-07 03:00:29,358][472573] Updated weights for policy 0, policy_version 41870 (0.0008) +[2026-06-07 03:00:29,469][472573] Updated weights for policy 0, policy_version 41880 (0.0008) +[2026-06-07 03:00:29,618][472573] Updated weights for policy 0, policy_version 41893 (0.0008) +[2026-06-07 03:00:29,750][472573] Updated weights for policy 0, policy_version 41905 (0.0008) +[2026-06-07 03:00:30,299][472573] Updated weights for policy 0, policy_version 41915 (0.0008) +[2026-06-07 03:00:30,434][472573] Updated weights for policy 0, policy_version 41927 (0.0008) +[2026-06-07 03:00:30,541][472573] Updated weights for policy 0, policy_version 41937 (0.0008) +[2026-06-07 03:00:30,670][472573] Updated weights for policy 0, policy_version 41948 (0.0008) +[2026-06-07 03:00:30,785][472573] Updated weights for policy 0, policy_version 41958 (0.0008) +[2026-06-07 03:00:30,899][472573] Updated weights for policy 0, policy_version 41968 (0.0008) +[2026-06-07 03:00:31,426][472573] Updated weights for policy 0, policy_version 41978 (0.0008) +[2026-06-07 03:00:31,539][472573] Updated weights for policy 0, policy_version 41988 (0.0008) +[2026-06-07 03:00:31,661][472573] Updated weights for policy 0, policy_version 41999 (0.0008) +[2026-06-07 03:00:31,781][472573] Updated weights for policy 0, policy_version 42010 (0.0008) +[2026-06-07 03:00:31,907][472573] Updated weights for policy 0, policy_version 42021 (0.0008) +[2026-06-07 03:00:32,034][472573] Updated weights for policy 0, policy_version 42032 (0.0008) +[2026-06-07 03:00:32,593][472573] Updated weights for policy 0, policy_version 42044 (0.0008) +[2026-06-07 03:00:32,704][472573] Updated weights for policy 0, policy_version 42054 (0.0008) +[2026-06-07 03:00:32,837][472573] Updated weights for policy 0, policy_version 42066 (0.0008) +[2026-06-07 03:00:32,951][472573] Updated weights for policy 0, policy_version 42076 (0.0008) +[2026-06-07 03:00:33,065][472573] Updated weights for policy 0, policy_version 42086 (0.0008) +[2026-06-07 03:00:33,103][464932] Fps is (10 sec: 26214.6, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 21528576. Throughput: 0: 28464.4. Samples: 21573248. Policy #0 lag: (min: 63.0, avg: 75.3, max: 127.0) +[2026-06-07 03:00:33,104][464932] Avg episode reward: [(0, '1468.819')] +[2026-06-07 03:00:33,190][472573] Updated weights for policy 0, policy_version 42097 (0.0008) +[2026-06-07 03:00:33,263][472028] Saving new best policy, reward=1468.819! +[2026-06-07 03:00:33,735][472573] Updated weights for policy 0, policy_version 42108 (0.0007) +[2026-06-07 03:00:33,850][472573] Updated weights for policy 0, policy_version 42118 (0.0008) +[2026-06-07 03:00:33,962][472573] Updated weights for policy 0, policy_version 42128 (0.0008) +[2026-06-07 03:00:34,086][472573] Updated weights for policy 0, policy_version 42139 (0.0008) +[2026-06-07 03:00:34,200][472573] Updated weights for policy 0, policy_version 42149 (0.0008) +[2026-06-07 03:00:34,325][472573] Updated weights for policy 0, policy_version 42160 (0.0011) +[2026-06-07 03:00:34,886][472573] Updated weights for policy 0, policy_version 42172 (0.0010) +[2026-06-07 03:00:34,994][472573] Updated weights for policy 0, policy_version 42182 (0.0005) +[2026-06-07 03:00:35,122][472573] Updated weights for policy 0, policy_version 42193 (0.0005) +[2026-06-07 03:00:35,256][472573] Updated weights for policy 0, policy_version 42205 (0.0006) +[2026-06-07 03:00:35,372][472573] Updated weights for policy 0, policy_version 42215 (0.0010) +[2026-06-07 03:00:35,480][472573] Updated weights for policy 0, policy_version 42225 (0.0011) +[2026-06-07 03:00:36,046][472573] Updated weights for policy 0, policy_version 42237 (0.0008) +[2026-06-07 03:00:36,161][472573] Updated weights for policy 0, policy_version 42247 (0.0005) +[2026-06-07 03:00:36,267][472573] Updated weights for policy 0, policy_version 42257 (0.0005) +[2026-06-07 03:00:36,405][472573] Updated weights for policy 0, policy_version 42269 (0.0007) +[2026-06-07 03:00:36,536][472573] Updated weights for policy 0, policy_version 42281 (0.0011) +[2026-06-07 03:00:36,662][472573] Updated weights for policy 0, policy_version 42292 (0.0010) +[2026-06-07 03:00:37,228][472573] Updated weights for policy 0, policy_version 42304 (0.0008) +[2026-06-07 03:00:37,346][472573] Updated weights for policy 0, policy_version 42315 (0.0005) +[2026-06-07 03:00:37,462][472573] Updated weights for policy 0, policy_version 42325 (0.0004) +[2026-06-07 03:00:37,578][472573] Updated weights for policy 0, policy_version 42335 (0.0007) +[2026-06-07 03:00:37,690][472573] Updated weights for policy 0, policy_version 42345 (0.0011) +[2026-06-07 03:00:37,837][472573] Updated weights for policy 0, policy_version 42358 (0.0011) +[2026-06-07 03:00:38,103][464932] Fps is (10 sec: 29491.4, 60 sec: 28398.9, 300 sec: 28436.0). Total num frames: 21692416. Throughput: 0: 28418.8. Samples: 21649408. Policy #0 lag: (min: 63.0, avg: 75.3, max: 127.0) +[2026-06-07 03:00:38,104][464932] Avg episode reward: [(0, '1466.143')] +[2026-06-07 03:00:38,385][472573] Updated weights for policy 0, policy_version 42368 (0.0007) +[2026-06-07 03:00:38,508][472573] Updated weights for policy 0, policy_version 42379 (0.0008) +[2026-06-07 03:00:38,626][472573] Updated weights for policy 0, policy_version 42390 (0.0008) +[2026-06-07 03:00:38,753][472573] Updated weights for policy 0, policy_version 42401 (0.0008) +[2026-06-07 03:00:38,866][472573] Updated weights for policy 0, policy_version 42411 (0.0008) +[2026-06-07 03:00:38,976][472573] Updated weights for policy 0, policy_version 42421 (0.0008) +[2026-06-07 03:00:39,547][472573] Updated weights for policy 0, policy_version 42433 (0.0008) +[2026-06-07 03:00:39,657][472573] Updated weights for policy 0, policy_version 42443 (0.0008) +[2026-06-07 03:00:39,762][472573] Updated weights for policy 0, policy_version 42453 (0.0008) +[2026-06-07 03:00:39,903][472573] Updated weights for policy 0, policy_version 42465 (0.0008) +[2026-06-07 03:00:40,036][472573] Updated weights for policy 0, policy_version 42477 (0.0008) +[2026-06-07 03:00:40,157][472573] Updated weights for policy 0, policy_version 42488 (0.0008) +[2026-06-07 03:00:40,719][472573] Updated weights for policy 0, policy_version 42498 (0.0007) +[2026-06-07 03:00:40,831][472573] Updated weights for policy 0, policy_version 42508 (0.0008) +[2026-06-07 03:00:40,943][472573] Updated weights for policy 0, policy_version 42518 (0.0008) +[2026-06-07 03:00:41,059][472573] Updated weights for policy 0, policy_version 42528 (0.0008) +[2026-06-07 03:00:41,167][472573] Updated weights for policy 0, policy_version 42538 (0.0008) +[2026-06-07 03:00:41,275][472573] Updated weights for policy 0, policy_version 42548 (0.0008) +[2026-06-07 03:00:41,827][472573] Updated weights for policy 0, policy_version 42558 (0.0008) +[2026-06-07 03:00:41,931][472573] Updated weights for policy 0, policy_version 42568 (0.0008) +[2026-06-07 03:00:42,061][472573] Updated weights for policy 0, policy_version 42579 (0.0008) +[2026-06-07 03:00:42,196][472573] Updated weights for policy 0, policy_version 42591 (0.0008) +[2026-06-07 03:00:42,310][472573] Updated weights for policy 0, policy_version 42601 (0.0008) +[2026-06-07 03:00:42,421][472573] Updated weights for policy 0, policy_version 42611 (0.0008) +[2026-06-07 03:00:42,963][472573] Updated weights for policy 0, policy_version 42622 (0.0008) +[2026-06-07 03:00:43,072][472573] Updated weights for policy 0, policy_version 42632 (0.0008) +[2026-06-07 03:00:43,103][464932] Fps is (10 sec: 29491.1, 60 sec: 28398.9, 300 sec: 28436.0). Total num frames: 21823488. Throughput: 0: 28487.1. Samples: 21824128. Policy #0 lag: (min: 63.0, avg: 75.3, max: 127.0) +[2026-06-07 03:00:43,104][464932] Avg episode reward: [(0, '1455.693')] +[2026-06-07 03:00:43,181][472573] Updated weights for policy 0, policy_version 42642 (0.0008) +[2026-06-07 03:00:43,311][472573] Updated weights for policy 0, policy_version 42653 (0.0008) +[2026-06-07 03:00:43,420][472573] Updated weights for policy 0, policy_version 42663 (0.0008) +[2026-06-07 03:00:43,565][472573] Updated weights for policy 0, policy_version 42676 (0.0008) +[2026-06-07 03:00:44,118][472573] Updated weights for policy 0, policy_version 42686 (0.0008) +[2026-06-07 03:00:44,229][472573] Updated weights for policy 0, policy_version 42696 (0.0008) +[2026-06-07 03:00:44,336][472573] Updated weights for policy 0, policy_version 42706 (0.0008) +[2026-06-07 03:00:44,480][472573] Updated weights for policy 0, policy_version 42719 (0.0008) +[2026-06-07 03:00:44,626][472573] Updated weights for policy 0, policy_version 42732 (0.0008) +[2026-06-07 03:00:44,743][472573] Updated weights for policy 0, policy_version 42743 (0.0008) +[2026-06-07 03:00:45,316][472573] Updated weights for policy 0, policy_version 42755 (0.0008) +[2026-06-07 03:00:45,452][472573] Updated weights for policy 0, policy_version 42767 (0.0008) +[2026-06-07 03:00:45,565][472573] Updated weights for policy 0, policy_version 42777 (0.0008) +[2026-06-07 03:00:45,682][472573] Updated weights for policy 0, policy_version 42787 (0.0008) +[2026-06-07 03:00:45,791][472573] Updated weights for policy 0, policy_version 42797 (0.0008) +[2026-06-07 03:00:46,368][472573] Updated weights for policy 0, policy_version 42809 (0.0008) +[2026-06-07 03:00:46,504][472573] Updated weights for policy 0, policy_version 42821 (0.0008) +[2026-06-07 03:00:46,623][472573] Updated weights for policy 0, policy_version 42832 (0.0007) +[2026-06-07 03:00:46,752][472573] Updated weights for policy 0, policy_version 42844 (0.0005) +[2026-06-07 03:00:46,900][472573] Updated weights for policy 0, policy_version 42858 (0.0005) +[2026-06-07 03:00:47,042][472573] Updated weights for policy 0, policy_version 42871 (0.0005) +[2026-06-07 03:00:47,604][472573] Updated weights for policy 0, policy_version 42882 (0.0007) +[2026-06-07 03:00:47,745][472573] Updated weights for policy 0, policy_version 42895 (0.0008) +[2026-06-07 03:00:47,884][472573] Updated weights for policy 0, policy_version 42908 (0.0008) +[2026-06-07 03:00:47,993][472573] Updated weights for policy 0, policy_version 42918 (0.0008) +[2026-06-07 03:00:48,103][464932] Fps is (10 sec: 26214.3, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 21954560. Throughput: 0: 28458.6. Samples: 21998592. Policy #0 lag: (min: 63.0, avg: 75.3, max: 127.0) +[2026-06-07 03:00:48,105][464932] Avg episode reward: [(0, '1511.884')] +[2026-06-07 03:00:48,114][472573] Updated weights for policy 0, policy_version 42929 (0.0009) +[2026-06-07 03:00:48,184][472028] Saving new best policy, reward=1511.884! +[2026-06-07 03:00:48,694][472573] Updated weights for policy 0, policy_version 42940 (0.0006) +[2026-06-07 03:00:48,821][472573] Updated weights for policy 0, policy_version 42952 (0.0004) +[2026-06-07 03:00:48,951][472573] Updated weights for policy 0, policy_version 42964 (0.0008) +[2026-06-07 03:00:49,092][472573] Updated weights for policy 0, policy_version 42977 (0.0008) +[2026-06-07 03:00:49,229][472573] Updated weights for policy 0, policy_version 42990 (0.0008) +[2026-06-07 03:00:49,800][472573] Updated weights for policy 0, policy_version 43001 (0.0008) +[2026-06-07 03:00:49,907][472573] Updated weights for policy 0, policy_version 43011 (0.0008) +[2026-06-07 03:00:50,011][472573] Updated weights for policy 0, policy_version 43021 (0.0008) +[2026-06-07 03:00:50,141][472573] Updated weights for policy 0, policy_version 43033 (0.0008) +[2026-06-07 03:00:50,270][472573] Updated weights for policy 0, policy_version 43045 (0.0008) +[2026-06-07 03:00:50,430][472573] Updated weights for policy 0, policy_version 43060 (0.0008) +[2026-06-07 03:00:51,008][472573] Updated weights for policy 0, policy_version 43070 (0.0008) +[2026-06-07 03:00:51,145][472573] Updated weights for policy 0, policy_version 43083 (0.0008) +[2026-06-07 03:00:51,268][472573] Updated weights for policy 0, policy_version 43095 (0.0008) +[2026-06-07 03:00:51,389][472573] Updated weights for policy 0, policy_version 43106 (0.0008) +[2026-06-07 03:00:51,511][472573] Updated weights for policy 0, policy_version 43117 (0.0008) +[2026-06-07 03:00:51,627][472573] Updated weights for policy 0, policy_version 43128 (0.0008) +[2026-06-07 03:00:52,193][472573] Updated weights for policy 0, policy_version 43139 (0.0008) +[2026-06-07 03:00:52,319][472573] Updated weights for policy 0, policy_version 43151 (0.0008) +[2026-06-07 03:00:52,459][472573] Updated weights for policy 0, policy_version 43164 (0.0008) +[2026-06-07 03:00:52,607][472573] Updated weights for policy 0, policy_version 43178 (0.0008) +[2026-06-07 03:00:52,741][472573] Updated weights for policy 0, policy_version 43190 (0.0008) +[2026-06-07 03:00:53,103][464932] Fps is (10 sec: 29491.2, 60 sec: 28398.9, 300 sec: 28436.0). Total num frames: 22118400. Throughput: 0: 28450.1. Samples: 22075776. Policy #0 lag: (min: 63.0, avg: 75.3, max: 127.0) +[2026-06-07 03:00:53,104][464932] Avg episode reward: [(0, '1600.782')] +[2026-06-07 03:00:53,109][472028] Saving new best policy, reward=1600.782! +[2026-06-07 03:00:53,310][472573] Updated weights for policy 0, policy_version 43201 (0.0008) +[2026-06-07 03:00:53,435][472573] Updated weights for policy 0, policy_version 43212 (0.0008) +[2026-06-07 03:00:53,543][472573] Updated weights for policy 0, policy_version 43222 (0.0008) +[2026-06-07 03:00:53,660][472573] Updated weights for policy 0, policy_version 43233 (0.0008) +[2026-06-07 03:00:53,785][472573] Updated weights for policy 0, policy_version 43244 (0.0008) +[2026-06-07 03:00:53,912][472573] Updated weights for policy 0, policy_version 43255 (0.0008) +[2026-06-07 03:00:54,475][472573] Updated weights for policy 0, policy_version 43265 (0.0007) +[2026-06-07 03:00:54,580][472573] Updated weights for policy 0, policy_version 43275 (0.0008) +[2026-06-07 03:00:54,703][472573] Updated weights for policy 0, policy_version 43286 (0.0008) +[2026-06-07 03:00:54,827][472573] Updated weights for policy 0, policy_version 43297 (0.0008) +[2026-06-07 03:00:54,938][472573] Updated weights for policy 0, policy_version 43307 (0.0008) +[2026-06-07 03:00:55,063][472573] Updated weights for policy 0, policy_version 43318 (0.0008) +[2026-06-07 03:00:55,617][472573] Updated weights for policy 0, policy_version 43328 (0.0008) +[2026-06-07 03:00:55,725][472573] Updated weights for policy 0, policy_version 43338 (0.0008) +[2026-06-07 03:00:55,875][472573] Updated weights for policy 0, policy_version 43351 (0.0008) +[2026-06-07 03:00:55,985][472573] Updated weights for policy 0, policy_version 43361 (0.0008) +[2026-06-07 03:00:56,116][472573] Updated weights for policy 0, policy_version 43373 (0.0008) +[2026-06-07 03:00:56,240][472573] Updated weights for policy 0, policy_version 43384 (0.0008) +[2026-06-07 03:00:56,793][472573] Updated weights for policy 0, policy_version 43395 (0.0008) +[2026-06-07 03:00:56,903][472573] Updated weights for policy 0, policy_version 43405 (0.0008) +[2026-06-07 03:00:57,013][472573] Updated weights for policy 0, policy_version 43415 (0.0008) +[2026-06-07 03:00:57,137][472573] Updated weights for policy 0, policy_version 43426 (0.0008) +[2026-06-07 03:00:57,256][472573] Updated weights for policy 0, policy_version 43437 (0.0008) +[2026-06-07 03:00:57,380][472573] Updated weights for policy 0, policy_version 43448 (0.0008) +[2026-06-07 03:00:57,944][472573] Updated weights for policy 0, policy_version 43459 (0.0008) +[2026-06-07 03:00:58,069][472573] Updated weights for policy 0, policy_version 43470 (0.0008) +[2026-06-07 03:00:58,103][464932] Fps is (10 sec: 29491.3, 60 sec: 28398.9, 300 sec: 28436.0). Total num frames: 22249472. Throughput: 0: 28504.2. Samples: 22252288. Policy #0 lag: (min: 63.0, avg: 75.3, max: 127.0) +[2026-06-07 03:00:58,104][464932] Avg episode reward: [(0, '1657.220')] +[2026-06-07 03:00:58,177][472573] Updated weights for policy 0, policy_version 43480 (0.0008) +[2026-06-07 03:00:58,308][472573] Updated weights for policy 0, policy_version 43492 (0.0008) +[2026-06-07 03:00:58,424][472573] Updated weights for policy 0, policy_version 43502 (0.0008) +[2026-06-07 03:00:58,532][472028] Saving new best policy, reward=1657.220! +[2026-06-07 03:00:58,534][472573] Updated weights for policy 0, policy_version 43512 (0.0008) +[2026-06-07 03:00:59,112][472573] Updated weights for policy 0, policy_version 43523 (0.0008) +[2026-06-07 03:00:59,236][472573] Updated weights for policy 0, policy_version 43534 (0.0008) +[2026-06-07 03:00:59,346][472573] Updated weights for policy 0, policy_version 43544 (0.0008) +[2026-06-07 03:00:59,482][472573] Updated weights for policy 0, policy_version 43556 (0.0008) +[2026-06-07 03:00:59,596][472573] Updated weights for policy 0, policy_version 43566 (0.0008) +[2026-06-07 03:00:59,712][472573] Updated weights for policy 0, policy_version 43576 (0.0008) +[2026-06-07 03:01:00,237][472573] Updated weights for policy 0, policy_version 43586 (0.0008) +[2026-06-07 03:01:00,348][472573] Updated weights for policy 0, policy_version 43596 (0.0008) +[2026-06-07 03:01:00,456][472573] Updated weights for policy 0, policy_version 43606 (0.0008) +[2026-06-07 03:01:00,568][472573] Updated weights for policy 0, policy_version 43616 (0.0008) +[2026-06-07 03:01:00,711][472573] Updated weights for policy 0, policy_version 43629 (0.0008) +[2026-06-07 03:01:00,838][472573] Updated weights for policy 0, policy_version 43640 (0.0008) +[2026-06-07 03:01:01,391][472573] Updated weights for policy 0, policy_version 43651 (0.0008) +[2026-06-07 03:01:01,506][472573] Updated weights for policy 0, policy_version 43661 (0.0008) +[2026-06-07 03:01:01,639][472573] Updated weights for policy 0, policy_version 43673 (0.0008) +[2026-06-07 03:01:01,755][472573] Updated weights for policy 0, policy_version 43683 (0.0009) +[2026-06-07 03:01:01,867][472573] Updated weights for policy 0, policy_version 43693 (0.0009) +[2026-06-07 03:01:01,978][472573] Updated weights for policy 0, policy_version 43703 (0.0008) +[2026-06-07 03:01:02,509][472573] Updated weights for policy 0, policy_version 43713 (0.0008) +[2026-06-07 03:01:02,632][472573] Updated weights for policy 0, policy_version 43724 (0.0008) +[2026-06-07 03:01:02,766][472573] Updated weights for policy 0, policy_version 43736 (0.0008) +[2026-06-07 03:01:02,883][472573] Updated weights for policy 0, policy_version 43746 (0.0009) +[2026-06-07 03:01:02,998][472573] Updated weights for policy 0, policy_version 43756 (0.0009) +[2026-06-07 03:01:03,103][464932] Fps is (10 sec: 26214.4, 60 sec: 28398.9, 300 sec: 28324.9). Total num frames: 22380544. Throughput: 0: 28376.2. Samples: 22422912. Policy #0 lag: (min: 63.0, avg: 75.0, max: 127.0) +[2026-06-07 03:01:03,104][464932] Avg episode reward: [(0, '1706.469')] +[2026-06-07 03:01:03,116][472573] Updated weights for policy 0, policy_version 43767 (0.0008) +[2026-06-07 03:01:03,126][472028] Saving new best policy, reward=1706.469! +[2026-06-07 03:01:03,675][472573] Updated weights for policy 0, policy_version 43777 (0.0008) +[2026-06-07 03:01:03,781][472573] Updated weights for policy 0, policy_version 43787 (0.0008) +[2026-06-07 03:01:03,908][472573] Updated weights for policy 0, policy_version 43798 (0.0008) +[2026-06-07 03:01:04,046][472573] Updated weights for policy 0, policy_version 43810 (0.0008) +[2026-06-07 03:01:04,160][472573] Updated weights for policy 0, policy_version 43820 (0.0008) +[2026-06-07 03:01:04,287][472573] Updated weights for policy 0, policy_version 43831 (0.0008) +[2026-06-07 03:01:04,838][472573] Updated weights for policy 0, policy_version 43842 (0.0008) +[2026-06-07 03:01:04,968][472573] Updated weights for policy 0, policy_version 43854 (0.0009) +[2026-06-07 03:01:05,094][472573] Updated weights for policy 0, policy_version 43865 (0.0008) +[2026-06-07 03:01:05,197][472573] Updated weights for policy 0, policy_version 43875 (0.0008) +[2026-06-07 03:01:05,325][472573] Updated weights for policy 0, policy_version 43886 (0.0008) +[2026-06-07 03:01:05,435][472573] Updated weights for policy 0, policy_version 43896 (0.0007) +[2026-06-07 03:01:05,983][472573] Updated weights for policy 0, policy_version 43906 (0.0008) +[2026-06-07 03:01:06,125][472573] Updated weights for policy 0, policy_version 43919 (0.0008) +[2026-06-07 03:01:06,240][472573] Updated weights for policy 0, policy_version 43929 (0.0008) +[2026-06-07 03:01:06,373][472573] Updated weights for policy 0, policy_version 43941 (0.0008) +[2026-06-07 03:01:06,530][472573] Updated weights for policy 0, policy_version 43955 (0.0008) +[2026-06-07 03:01:07,088][472573] Updated weights for policy 0, policy_version 43966 (0.0008) +[2026-06-07 03:01:07,198][472573] Updated weights for policy 0, policy_version 43976 (0.0008) +[2026-06-07 03:01:07,318][472573] Updated weights for policy 0, policy_version 43987 (0.0008) +[2026-06-07 03:01:07,431][472573] Updated weights for policy 0, policy_version 43997 (0.0008) +[2026-06-07 03:01:07,543][472573] Updated weights for policy 0, policy_version 44007 (0.0008) +[2026-06-07 03:01:07,667][472573] Updated weights for policy 0, policy_version 44018 (0.0008) +[2026-06-07 03:01:08,103][464932] Fps is (10 sec: 29491.3, 60 sec: 28398.9, 300 sec: 28436.0). Total num frames: 22544384. Throughput: 0: 28413.2. Samples: 22501760. Policy #0 lag: (min: 63.0, avg: 75.0, max: 127.0) +[2026-06-07 03:01:08,104][464932] Avg episode reward: [(0, '1683.954')] +[2026-06-07 03:01:08,214][472573] Updated weights for policy 0, policy_version 44028 (0.0008) +[2026-06-07 03:01:08,335][472573] Updated weights for policy 0, policy_version 44039 (0.0008) +[2026-06-07 03:01:08,453][472573] Updated weights for policy 0, policy_version 44050 (0.0008) +[2026-06-07 03:01:08,581][472573] Updated weights for policy 0, policy_version 44061 (0.0008) +[2026-06-07 03:01:08,693][472573] Updated weights for policy 0, policy_version 44071 (0.0008) +[2026-06-07 03:01:08,806][472573] Updated weights for policy 0, policy_version 44081 (0.0008) +[2026-06-07 03:01:09,339][472573] Updated weights for policy 0, policy_version 44091 (0.0005) +[2026-06-07 03:01:09,485][472573] Updated weights for policy 0, policy_version 44104 (0.0008) +[2026-06-07 03:01:09,595][472573] Updated weights for policy 0, policy_version 44114 (0.0006) +[2026-06-07 03:01:09,706][472573] Updated weights for policy 0, policy_version 44124 (0.0004) +[2026-06-07 03:01:09,830][472573] Updated weights for policy 0, policy_version 44135 (0.0004) +[2026-06-07 03:01:09,966][472573] Updated weights for policy 0, policy_version 44147 (0.0004) +[2026-06-07 03:01:10,510][472573] Updated weights for policy 0, policy_version 44158 (0.0005) +[2026-06-07 03:01:10,643][472573] Updated weights for policy 0, policy_version 44170 (0.0005) +[2026-06-07 03:01:10,760][472573] Updated weights for policy 0, policy_version 44181 (0.0005) +[2026-06-07 03:01:10,890][472573] Updated weights for policy 0, policy_version 44192 (0.0008) +[2026-06-07 03:01:11,019][472573] Updated weights for policy 0, policy_version 44203 (0.0008) +[2026-06-07 03:01:11,146][472573] Updated weights for policy 0, policy_version 44214 (0.0008) +[2026-06-07 03:01:11,676][472573] Updated weights for policy 0, policy_version 44224 (0.0005) +[2026-06-07 03:01:11,804][472573] Updated weights for policy 0, policy_version 44235 (0.0005) +[2026-06-07 03:01:11,914][472573] Updated weights for policy 0, policy_version 44245 (0.0004) +[2026-06-07 03:01:12,032][472573] Updated weights for policy 0, policy_version 44255 (0.0005) +[2026-06-07 03:01:12,153][472573] Updated weights for policy 0, policy_version 44266 (0.0006) +[2026-06-07 03:01:12,277][472573] Updated weights for policy 0, policy_version 44277 (0.0008) +[2026-06-07 03:01:12,813][472573] Updated weights for policy 0, policy_version 44287 (0.0008) +[2026-06-07 03:01:12,924][472573] Updated weights for policy 0, policy_version 44297 (0.0008) +[2026-06-07 03:01:13,066][472573] Updated weights for policy 0, policy_version 44309 (0.0008) +[2026-06-07 03:01:13,103][464932] Fps is (10 sec: 29491.3, 60 sec: 28398.9, 300 sec: 28436.0). Total num frames: 22675456. Throughput: 0: 28504.2. Samples: 22679808. Policy #0 lag: (min: 63.0, avg: 75.0, max: 127.0) +[2026-06-07 03:01:13,104][464932] Avg episode reward: [(0, '1720.685')] +[2026-06-07 03:01:13,185][472573] Updated weights for policy 0, policy_version 44320 (0.0008) +[2026-06-07 03:01:13,306][472573] Updated weights for policy 0, policy_version 44331 (0.0008) +[2026-06-07 03:01:13,415][472573] Updated weights for policy 0, policy_version 44341 (0.0008) +[2026-06-07 03:01:13,445][472028] Saving new best policy, reward=1720.685! +[2026-06-07 03:01:13,969][472573] Updated weights for policy 0, policy_version 44351 (0.0008) +[2026-06-07 03:01:14,093][472573] Updated weights for policy 0, policy_version 44362 (0.0009) +[2026-06-07 03:01:14,226][472573] Updated weights for policy 0, policy_version 44374 (0.0008) +[2026-06-07 03:01:14,349][472573] Updated weights for policy 0, policy_version 44385 (0.0008) +[2026-06-07 03:01:14,462][472573] Updated weights for policy 0, policy_version 44395 (0.0008) +[2026-06-07 03:01:14,571][472573] Updated weights for policy 0, policy_version 44405 (0.0008) +[2026-06-07 03:01:15,122][472573] Updated weights for policy 0, policy_version 44415 (0.0009) +[2026-06-07 03:01:15,227][472573] Updated weights for policy 0, policy_version 44425 (0.0008) +[2026-06-07 03:01:15,357][472573] Updated weights for policy 0, policy_version 44436 (0.0008) +[2026-06-07 03:01:15,491][472573] Updated weights for policy 0, policy_version 44448 (0.0008) +[2026-06-07 03:01:15,599][472573] Updated weights for policy 0, policy_version 44458 (0.0008) +[2026-06-07 03:01:15,741][472573] Updated weights for policy 0, policy_version 44470 (0.0008) +[2026-06-07 03:01:16,303][472573] Updated weights for policy 0, policy_version 44480 (0.0005) +[2026-06-07 03:01:16,433][472573] Updated weights for policy 0, policy_version 44492 (0.0008) +[2026-06-07 03:01:16,549][472573] Updated weights for policy 0, policy_version 44502 (0.0008) +[2026-06-07 03:01:16,672][472573] Updated weights for policy 0, policy_version 44513 (0.0008) +[2026-06-07 03:01:16,782][472573] Updated weights for policy 0, policy_version 44523 (0.0008) +[2026-06-07 03:01:16,896][472573] Updated weights for policy 0, policy_version 44533 (0.0008) +[2026-06-07 03:01:17,430][472573] Updated weights for policy 0, policy_version 44543 (0.0008) +[2026-06-07 03:01:17,552][472573] Updated weights for policy 0, policy_version 44554 (0.0008) +[2026-06-07 03:01:17,675][472573] Updated weights for policy 0, policy_version 44565 (0.0008) +[2026-06-07 03:01:17,799][472573] Updated weights for policy 0, policy_version 44576 (0.0008) +[2026-06-07 03:01:17,925][472573] Updated weights for policy 0, policy_version 44587 (0.0008) +[2026-06-07 03:01:18,033][472573] Updated weights for policy 0, policy_version 44597 (0.0008) +[2026-06-07 03:01:18,103][464932] Fps is (10 sec: 29491.2, 60 sec: 28945.1, 300 sec: 28436.0). Total num frames: 22839296. Throughput: 0: 28322.1. Samples: 22847744. Policy #0 lag: (min: 63.0, avg: 75.0, max: 127.0) +[2026-06-07 03:01:18,104][464932] Avg episode reward: [(0, '1785.194')] +[2026-06-07 03:01:18,109][472028] Saving new best policy, reward=1785.194! +[2026-06-07 03:01:18,580][472573] Updated weights for policy 0, policy_version 44607 (0.0007) +[2026-06-07 03:01:18,695][472573] Updated weights for policy 0, policy_version 44617 (0.0008) +[2026-06-07 03:01:18,814][472573] Updated weights for policy 0, policy_version 44628 (0.0008) +[2026-06-07 03:01:18,940][472573] Updated weights for policy 0, policy_version 44639 (0.0008) +[2026-06-07 03:01:19,073][472573] Updated weights for policy 0, policy_version 44651 (0.0008) +[2026-06-07 03:01:19,185][472573] Updated weights for policy 0, policy_version 44661 (0.0008) +[2026-06-07 03:01:19,742][472573] Updated weights for policy 0, policy_version 44672 (0.0008) +[2026-06-07 03:01:19,851][472573] Updated weights for policy 0, policy_version 44682 (0.0008) +[2026-06-07 03:01:19,972][472573] Updated weights for policy 0, policy_version 44693 (0.0008) +[2026-06-07 03:01:20,099][472573] Updated weights for policy 0, policy_version 44704 (0.0008) +[2026-06-07 03:01:20,215][472573] Updated weights for policy 0, policy_version 44714 (0.0008) +[2026-06-07 03:01:20,329][472573] Updated weights for policy 0, policy_version 44725 (0.0008) +[2026-06-07 03:01:20,887][472573] Updated weights for policy 0, policy_version 44735 (0.0008) +[2026-06-07 03:01:20,998][472573] Updated weights for policy 0, policy_version 44745 (0.0008) +[2026-06-07 03:01:21,110][472573] Updated weights for policy 0, policy_version 44755 (0.0008) +[2026-06-07 03:01:21,218][472573] Updated weights for policy 0, policy_version 44765 (0.0008) +[2026-06-07 03:01:21,348][472573] Updated weights for policy 0, policy_version 44776 (0.0008) +[2026-06-07 03:01:21,480][472573] Updated weights for policy 0, policy_version 44788 (0.0008) +[2026-06-07 03:01:22,025][472573] Updated weights for policy 0, policy_version 44798 (0.0007) +[2026-06-07 03:01:22,149][472573] Updated weights for policy 0, policy_version 44809 (0.0008) +[2026-06-07 03:01:22,258][472573] Updated weights for policy 0, policy_version 44819 (0.0008) +[2026-06-07 03:01:22,394][472573] Updated weights for policy 0, policy_version 44831 (0.0008) +[2026-06-07 03:01:22,500][472573] Updated weights for policy 0, policy_version 44841 (0.0008) +[2026-06-07 03:01:22,622][472573] Updated weights for policy 0, policy_version 44851 (0.0008) +[2026-06-07 03:01:23,103][464932] Fps is (10 sec: 29491.2, 60 sec: 28399.0, 300 sec: 28436.0). Total num frames: 22970368. Throughput: 0: 28447.3. Samples: 22929536. Policy #0 lag: (min: 63.0, avg: 75.0, max: 127.0) +[2026-06-07 03:01:23,104][464932] Avg episode reward: [(0, '1744.065')] +[2026-06-07 03:01:23,151][472573] Updated weights for policy 0, policy_version 44861 (0.0009) +[2026-06-07 03:01:23,273][472573] Updated weights for policy 0, policy_version 44872 (0.0008) +[2026-06-07 03:01:23,416][472573] Updated weights for policy 0, policy_version 44885 (0.0008) +[2026-06-07 03:01:23,537][472573] Updated weights for policy 0, policy_version 44895 (0.0008) +[2026-06-07 03:01:23,666][472573] Updated weights for policy 0, policy_version 44907 (0.0008) +[2026-06-07 03:01:23,776][472573] Updated weights for policy 0, policy_version 44917 (0.0008) +[2026-06-07 03:01:24,341][472573] Updated weights for policy 0, policy_version 44928 (0.0009) +[2026-06-07 03:01:24,454][472573] Updated weights for policy 0, policy_version 44938 (0.0008) +[2026-06-07 03:01:24,587][472573] Updated weights for policy 0, policy_version 44950 (0.0009) +[2026-06-07 03:01:24,699][472573] Updated weights for policy 0, policy_version 44960 (0.0010) +[2026-06-07 03:01:24,831][472573] Updated weights for policy 0, policy_version 44972 (0.0010) +[2026-06-07 03:01:24,944][472573] Updated weights for policy 0, policy_version 44982 (0.0009) +[2026-06-07 03:01:25,490][472573] Updated weights for policy 0, policy_version 44993 (0.0009) +[2026-06-07 03:01:25,620][472573] Updated weights for policy 0, policy_version 45005 (0.0009) +[2026-06-07 03:01:25,750][472573] Updated weights for policy 0, policy_version 45016 (0.0009) +[2026-06-07 03:01:25,859][472573] Updated weights for policy 0, policy_version 45026 (0.0010) +[2026-06-07 03:01:25,970][472573] Updated weights for policy 0, policy_version 45036 (0.0010) +[2026-06-07 03:01:26,081][472573] Updated weights for policy 0, policy_version 45046 (0.0011) +[2026-06-07 03:01:26,628][472573] Updated weights for policy 0, policy_version 45056 (0.0009) +[2026-06-07 03:01:26,752][472573] Updated weights for policy 0, policy_version 45067 (0.0011) +[2026-06-07 03:01:26,862][472573] Updated weights for policy 0, policy_version 45077 (0.0009) +[2026-06-07 03:01:26,975][472573] Updated weights for policy 0, policy_version 45087 (0.0010) +[2026-06-07 03:01:27,114][472573] Updated weights for policy 0, policy_version 45100 (0.0009) +[2026-06-07 03:01:27,228][472573] Updated weights for policy 0, policy_version 45110 (0.0010) +[2026-06-07 03:01:27,777][472573] Updated weights for policy 0, policy_version 45120 (0.0007) +[2026-06-07 03:01:27,887][472573] Updated weights for policy 0, policy_version 45130 (0.0009) +[2026-06-07 03:01:28,000][472573] Updated weights for policy 0, policy_version 45140 (0.0009) +[2026-06-07 03:01:28,103][464932] Fps is (10 sec: 26214.1, 60 sec: 28398.9, 300 sec: 28435.9). Total num frames: 23101440. Throughput: 0: 28464.3. Samples: 23105024. Policy #0 lag: (min: 63.0, avg: 75.0, max: 127.0) +[2026-06-07 03:01:28,104][464932] Avg episode reward: [(0, '1728.093')] +[2026-06-07 03:01:28,120][472573] Updated weights for policy 0, policy_version 45151 (0.0011) +[2026-06-07 03:01:28,246][472573] Updated weights for policy 0, policy_version 45162 (0.0009) +[2026-06-07 03:01:28,361][472573] Updated weights for policy 0, policy_version 45172 (0.0010) +[2026-06-07 03:01:28,921][472573] Updated weights for policy 0, policy_version 45184 (0.0008) +[2026-06-07 03:01:29,043][472573] Updated weights for policy 0, policy_version 45195 (0.0008) +[2026-06-07 03:01:29,171][472573] Updated weights for policy 0, policy_version 45207 (0.0008) +[2026-06-07 03:01:29,304][472573] Updated weights for policy 0, policy_version 45218 (0.0008) +[2026-06-07 03:01:29,436][472573] Updated weights for policy 0, policy_version 45230 (0.0008) +[2026-06-07 03:01:29,998][472573] Updated weights for policy 0, policy_version 45241 (0.0008) +[2026-06-07 03:01:30,113][472573] Updated weights for policy 0, policy_version 45251 (0.0008) +[2026-06-07 03:01:30,222][472573] Updated weights for policy 0, policy_version 45261 (0.0009) +[2026-06-07 03:01:30,364][472573] Updated weights for policy 0, policy_version 45274 (0.0009) +[2026-06-07 03:01:30,484][472573] Updated weights for policy 0, policy_version 45285 (0.0008) +[2026-06-07 03:01:30,654][472573] Updated weights for policy 0, policy_version 45301 (0.0008) +[2026-06-07 03:01:31,219][472573] Updated weights for policy 0, policy_version 45312 (0.0007) +[2026-06-07 03:01:31,331][472573] Updated weights for policy 0, policy_version 45323 (0.0009) +[2026-06-07 03:01:31,461][472573] Updated weights for policy 0, policy_version 45335 (0.0008) +[2026-06-07 03:01:31,624][472573] Updated weights for policy 0, policy_version 45350 (0.0009) +[2026-06-07 03:01:31,746][472573] Updated weights for policy 0, policy_version 45362 (0.0008) +[2026-06-07 03:01:32,348][472573] Updated weights for policy 0, policy_version 45374 (0.0008) +[2026-06-07 03:01:32,483][472573] Updated weights for policy 0, policy_version 45386 (0.0008) +[2026-06-07 03:01:32,592][472573] Updated weights for policy 0, policy_version 45396 (0.0009) +[2026-06-07 03:01:32,711][472573] Updated weights for policy 0, policy_version 45407 (0.0008) +[2026-06-07 03:01:32,832][472573] Updated weights for policy 0, policy_version 45417 (0.0008) +[2026-06-07 03:01:32,952][472573] Updated weights for policy 0, policy_version 45428 (0.0009) +[2026-06-07 03:01:33,103][464932] Fps is (10 sec: 29491.2, 60 sec: 28945.1, 300 sec: 28436.0). Total num frames: 23265280. Throughput: 0: 28299.4. Samples: 23272064. Policy #0 lag: (min: 34.0, avg: 56.2, max: 98.0) +[2026-06-07 03:01:33,104][464932] Avg episode reward: [(0, '1772.545')] +[2026-06-07 03:01:33,506][472573] Updated weights for policy 0, policy_version 45438 (0.0008) +[2026-06-07 03:01:33,629][472573] Updated weights for policy 0, policy_version 45449 (0.0009) +[2026-06-07 03:01:33,739][472573] Updated weights for policy 0, policy_version 45459 (0.0009) +[2026-06-07 03:01:33,854][472573] Updated weights for policy 0, policy_version 45469 (0.0009) +[2026-06-07 03:01:33,992][472573] Updated weights for policy 0, policy_version 45482 (0.0009) +[2026-06-07 03:01:34,108][472573] Updated weights for policy 0, policy_version 45492 (0.0008) +[2026-06-07 03:01:34,660][472573] Updated weights for policy 0, policy_version 45502 (0.0008) +[2026-06-07 03:01:34,802][472573] Updated weights for policy 0, policy_version 45515 (0.0008) +[2026-06-07 03:01:34,927][472573] Updated weights for policy 0, policy_version 45526 (0.0008) +[2026-06-07 03:01:35,052][472573] Updated weights for policy 0, policy_version 45537 (0.0009) +[2026-06-07 03:01:35,164][472573] Updated weights for policy 0, policy_version 45547 (0.0008) +[2026-06-07 03:01:35,305][472573] Updated weights for policy 0, policy_version 45560 (0.0008) +[2026-06-07 03:01:35,850][472573] Updated weights for policy 0, policy_version 45570 (0.0008) +[2026-06-07 03:01:35,970][472573] Updated weights for policy 0, policy_version 45581 (0.0008) +[2026-06-07 03:01:36,087][472573] Updated weights for policy 0, policy_version 45591 (0.0008) +[2026-06-07 03:01:36,211][472573] Updated weights for policy 0, policy_version 45602 (0.0008) +[2026-06-07 03:01:36,322][472573] Updated weights for policy 0, policy_version 45612 (0.0008) +[2026-06-07 03:01:36,433][472573] Updated weights for policy 0, policy_version 45622 (0.0008) +[2026-06-07 03:01:36,965][472573] Updated weights for policy 0, policy_version 45633 (0.0007) +[2026-06-07 03:01:37,088][472573] Updated weights for policy 0, policy_version 45644 (0.0008) +[2026-06-07 03:01:37,226][472573] Updated weights for policy 0, policy_version 45656 (0.0008) +[2026-06-07 03:01:37,351][472573] Updated weights for policy 0, policy_version 45667 (0.0008) +[2026-06-07 03:01:37,469][472573] Updated weights for policy 0, policy_version 45677 (0.0008) +[2026-06-07 03:01:38,057][472573] Updated weights for policy 0, policy_version 45689 (0.0008) +[2026-06-07 03:01:38,103][464932] Fps is (10 sec: 29491.2, 60 sec: 28398.9, 300 sec: 28436.0). Total num frames: 23396352. Throughput: 0: 28455.8. Samples: 23356288. Policy #0 lag: (min: 34.0, avg: 56.2, max: 98.0) +[2026-06-07 03:01:38,104][464932] Avg episode reward: [(0, '1767.199')] +[2026-06-07 03:01:38,182][472573] Updated weights for policy 0, policy_version 45700 (0.0005) +[2026-06-07 03:01:38,292][472573] Updated weights for policy 0, policy_version 45710 (0.0004) +[2026-06-07 03:01:38,407][472573] Updated weights for policy 0, policy_version 45720 (0.0004) +[2026-06-07 03:01:38,521][472573] Updated weights for policy 0, policy_version 45730 (0.0008) +[2026-06-07 03:01:38,632][472573] Updated weights for policy 0, policy_version 45740 (0.0008) +[2026-06-07 03:01:39,201][472573] Updated weights for policy 0, policy_version 45753 (0.0008) +[2026-06-07 03:01:39,326][472573] Updated weights for policy 0, policy_version 45764 (0.0008) +[2026-06-07 03:01:39,455][472573] Updated weights for policy 0, policy_version 45776 (0.0008) +[2026-06-07 03:01:39,584][472573] Updated weights for policy 0, policy_version 45787 (0.0008) +[2026-06-07 03:01:39,718][472573] Updated weights for policy 0, policy_version 45799 (0.0008) +[2026-06-07 03:01:39,853][472573] Updated weights for policy 0, policy_version 45811 (0.0008) +[2026-06-07 03:01:40,415][472573] Updated weights for policy 0, policy_version 45823 (0.0008) +[2026-06-07 03:01:40,542][472573] Updated weights for policy 0, policy_version 45834 (0.0008) +[2026-06-07 03:01:40,666][472573] Updated weights for policy 0, policy_version 45845 (0.0008) +[2026-06-07 03:01:40,789][472573] Updated weights for policy 0, policy_version 45856 (0.0008) +[2026-06-07 03:01:40,912][472573] Updated weights for policy 0, policy_version 45867 (0.0008) +[2026-06-07 03:01:41,052][472573] Updated weights for policy 0, policy_version 45879 (0.0008) +[2026-06-07 03:01:41,605][472573] Updated weights for policy 0, policy_version 45890 (0.0008) +[2026-06-07 03:01:41,731][472573] Updated weights for policy 0, policy_version 45901 (0.0008) +[2026-06-07 03:01:41,848][472573] Updated weights for policy 0, policy_version 45912 (0.0008) +[2026-06-07 03:01:41,964][472573] Updated weights for policy 0, policy_version 45922 (0.0008) +[2026-06-07 03:01:42,093][472573] Updated weights for policy 0, policy_version 45934 (0.0008) +[2026-06-07 03:01:42,657][472573] Updated weights for policy 0, policy_version 45945 (0.0008) +[2026-06-07 03:01:42,795][472573] Updated weights for policy 0, policy_version 45957 (0.0006) +[2026-06-07 03:01:42,929][472573] Updated weights for policy 0, policy_version 45969 (0.0007) +[2026-06-07 03:01:43,052][472573] Updated weights for policy 0, policy_version 45980 (0.0008) +[2026-06-07 03:01:43,103][464932] Fps is (10 sec: 26214.1, 60 sec: 28398.9, 300 sec: 28436.0). Total num frames: 23527424. Throughput: 0: 28379.0. Samples: 23529344. Policy #0 lag: (min: 34.0, avg: 56.2, max: 98.0) +[2026-06-07 03:01:43,104][464932] Avg episode reward: [(0, '1816.291')] +[2026-06-07 03:01:43,180][472573] Updated weights for policy 0, policy_version 45992 (0.0008) +[2026-06-07 03:01:43,310][472573] Updated weights for policy 0, policy_version 46004 (0.0008) +[2026-06-07 03:01:43,348][472028] Saving new best policy, reward=1816.291! +[2026-06-07 03:01:43,871][472573] Updated weights for policy 0, policy_version 46015 (0.0008) +[2026-06-07 03:01:43,985][472573] Updated weights for policy 0, policy_version 46026 (0.0008) +[2026-06-07 03:01:44,109][472573] Updated weights for policy 0, policy_version 46037 (0.0010) +[2026-06-07 03:01:44,240][472573] Updated weights for policy 0, policy_version 46049 (0.0008) +[2026-06-07 03:01:44,358][472573] Updated weights for policy 0, policy_version 46060 (0.0008) +[2026-06-07 03:01:44,467][472573] Updated weights for policy 0, policy_version 46070 (0.0008) +[2026-06-07 03:01:45,043][472573] Updated weights for policy 0, policy_version 46081 (0.0007) +[2026-06-07 03:01:45,157][472573] Updated weights for policy 0, policy_version 46092 (0.0008) +[2026-06-07 03:01:45,297][472573] Updated weights for policy 0, policy_version 46105 (0.0008) +[2026-06-07 03:01:45,453][472573] Updated weights for policy 0, policy_version 46119 (0.0008) +[2026-06-07 03:01:45,557][472573] Updated weights for policy 0, policy_version 46129 (0.0008) +[2026-06-07 03:01:46,134][472573] Updated weights for policy 0, policy_version 46139 (0.0008) +[2026-06-07 03:01:46,242][472573] Updated weights for policy 0, policy_version 46149 (0.0008) +[2026-06-07 03:01:46,382][472573] Updated weights for policy 0, policy_version 46162 (0.0008) +[2026-06-07 03:01:46,507][472573] Updated weights for policy 0, policy_version 46173 (0.0008) +[2026-06-07 03:01:46,634][472573] Updated weights for policy 0, policy_version 46185 (0.0008) +[2026-06-07 03:01:46,752][472573] Updated weights for policy 0, policy_version 46196 (0.0008) +[2026-06-07 03:01:47,325][472573] Updated weights for policy 0, policy_version 46207 (0.0007) +[2026-06-07 03:01:47,440][472573] Updated weights for policy 0, policy_version 46218 (0.0008) +[2026-06-07 03:01:47,561][472573] Updated weights for policy 0, policy_version 46229 (0.0008) +[2026-06-07 03:01:47,720][472573] Updated weights for policy 0, policy_version 46244 (0.0008) +[2026-06-07 03:01:47,857][472573] Updated weights for policy 0, policy_version 46256 (0.0008) +[2026-06-07 03:01:48,103][464932] Fps is (10 sec: 29491.6, 60 sec: 28945.1, 300 sec: 28436.0). Total num frames: 23691264. Throughput: 0: 28299.4. Samples: 23696384. Policy #0 lag: (min: 34.0, avg: 56.2, max: 98.0) +[2026-06-07 03:01:48,104][464932] Avg episode reward: [(0, '1813.101')] +[2026-06-07 03:01:48,443][472573] Updated weights for policy 0, policy_version 46268 (0.0008) +[2026-06-07 03:01:48,560][472573] Updated weights for policy 0, policy_version 46279 (0.0008) +[2026-06-07 03:01:48,681][472573] Updated weights for policy 0, policy_version 46290 (0.0008) +[2026-06-07 03:01:48,826][472573] Updated weights for policy 0, policy_version 46304 (0.0008) +[2026-06-07 03:01:48,953][472573] Updated weights for policy 0, policy_version 46315 (0.0008) +[2026-06-07 03:01:49,093][472573] Updated weights for policy 0, policy_version 46328 (0.0008) +[2026-06-07 03:01:49,660][472573] Updated weights for policy 0, policy_version 46338 (0.0008) +[2026-06-07 03:01:49,780][472573] Updated weights for policy 0, policy_version 46349 (0.0008) +[2026-06-07 03:01:49,898][472573] Updated weights for policy 0, policy_version 46359 (0.0008) +[2026-06-07 03:01:50,009][472573] Updated weights for policy 0, policy_version 46369 (0.0008) +[2026-06-07 03:01:50,120][472573] Updated weights for policy 0, policy_version 46379 (0.0008) +[2026-06-07 03:01:50,241][472573] Updated weights for policy 0, policy_version 46390 (0.0008) +[2026-06-07 03:01:50,792][472573] Updated weights for policy 0, policy_version 46400 (0.0008) +[2026-06-07 03:01:50,920][472573] Updated weights for policy 0, policy_version 46411 (0.0008) +[2026-06-07 03:01:51,028][472573] Updated weights for policy 0, policy_version 46421 (0.0008) +[2026-06-07 03:01:51,139][472573] Updated weights for policy 0, policy_version 46431 (0.0008) +[2026-06-07 03:01:51,271][472573] Updated weights for policy 0, policy_version 46443 (0.0008) +[2026-06-07 03:01:51,395][472573] Updated weights for policy 0, policy_version 46454 (0.0008) +[2026-06-07 03:01:51,964][472573] Updated weights for policy 0, policy_version 46465 (0.0008) +[2026-06-07 03:01:52,084][472573] Updated weights for policy 0, policy_version 46476 (0.0008) +[2026-06-07 03:01:52,193][472573] Updated weights for policy 0, policy_version 46486 (0.0008) +[2026-06-07 03:01:52,316][472573] Updated weights for policy 0, policy_version 46497 (0.0008) +[2026-06-07 03:01:52,440][472573] Updated weights for policy 0, policy_version 46508 (0.0008) +[2026-06-07 03:01:52,563][472573] Updated weights for policy 0, policy_version 46519 (0.0008) +[2026-06-07 03:01:53,103][464932] Fps is (10 sec: 29491.5, 60 sec: 28399.0, 300 sec: 28436.0). Total num frames: 23822336. Throughput: 0: 28455.8. Samples: 23782272. Policy #0 lag: (min: 34.0, avg: 56.2, max: 98.0) +[2026-06-07 03:01:53,104][464932] Avg episode reward: [(0, '1820.158')] +[2026-06-07 03:01:53,135][472573] Updated weights for policy 0, policy_version 46530 (0.0006) +[2026-06-07 03:01:53,257][472573] Updated weights for policy 0, policy_version 46541 (0.0008) +[2026-06-07 03:01:53,366][472573] Updated weights for policy 0, policy_version 46551 (0.0008) +[2026-06-07 03:01:53,482][472573] Updated weights for policy 0, policy_version 46561 (0.0008) +[2026-06-07 03:01:53,605][472573] Updated weights for policy 0, policy_version 46572 (0.0008) +[2026-06-07 03:01:53,718][472573] Updated weights for policy 0, policy_version 46582 (0.0008) +[2026-06-07 03:01:53,738][472028] Saving new best policy, reward=1820.158! +[2026-06-07 03:01:54,277][472573] Updated weights for policy 0, policy_version 46594 (0.0008) +[2026-06-07 03:01:54,405][472573] Updated weights for policy 0, policy_version 46606 (0.0008) +[2026-06-07 03:01:54,530][472573] Updated weights for policy 0, policy_version 46618 (0.0008) +[2026-06-07 03:01:54,645][472573] Updated weights for policy 0, policy_version 46628 (0.0009) +[2026-06-07 03:01:54,777][472573] Updated weights for policy 0, policy_version 46640 (0.0008) +[2026-06-07 03:01:55,341][472573] Updated weights for policy 0, policy_version 46650 (0.0008) +[2026-06-07 03:01:55,460][472573] Updated weights for policy 0, policy_version 46661 (0.0008) +[2026-06-07 03:01:55,572][472573] Updated weights for policy 0, policy_version 46671 (0.0008) +[2026-06-07 03:01:55,677][472573] Updated weights for policy 0, policy_version 46681 (0.0008) +[2026-06-07 03:01:55,805][472573] Updated weights for policy 0, policy_version 46693 (0.0008) +[2026-06-07 03:01:55,921][472573] Updated weights for policy 0, policy_version 46703 (0.0008) +[2026-06-07 03:01:56,496][472573] Updated weights for policy 0, policy_version 46715 (0.0008) +[2026-06-07 03:01:56,615][472573] Updated weights for policy 0, policy_version 46726 (0.0008) +[2026-06-07 03:01:56,742][472573] Updated weights for policy 0, policy_version 46738 (0.0008) +[2026-06-07 03:01:56,873][472573] Updated weights for policy 0, policy_version 46750 (0.0008) +[2026-06-07 03:01:56,988][472573] Updated weights for policy 0, policy_version 46761 (0.0008) +[2026-06-07 03:01:57,125][472573] Updated weights for policy 0, policy_version 46773 (0.0008) +[2026-06-07 03:01:57,690][472573] Updated weights for policy 0, policy_version 46783 (0.0005) +[2026-06-07 03:01:57,820][472573] Updated weights for policy 0, policy_version 46795 (0.0004) +[2026-06-07 03:01:57,950][472573] Updated weights for policy 0, policy_version 46807 (0.0004) +[2026-06-07 03:01:58,072][472573] Updated weights for policy 0, policy_version 46818 (0.0004) +[2026-06-07 03:01:58,103][464932] Fps is (10 sec: 26214.5, 60 sec: 28399.0, 300 sec: 28436.0). Total num frames: 23953408. Throughput: 0: 28353.4. Samples: 23955712. Policy #0 lag: (min: 34.0, avg: 56.2, max: 98.0) +[2026-06-07 03:01:58,104][464932] Avg episode reward: [(0, '1807.045')] +[2026-06-07 03:01:58,191][472573] Updated weights for policy 0, policy_version 46829 (0.0004) +[2026-06-07 03:01:58,316][472573] Updated weights for policy 0, policy_version 46840 (0.0008) +[2026-06-07 03:01:58,880][472573] Updated weights for policy 0, policy_version 46850 (0.0008) +[2026-06-07 03:01:59,004][472573] Updated weights for policy 0, policy_version 46861 (0.0008) +[2026-06-07 03:01:59,121][472573] Updated weights for policy 0, policy_version 46872 (0.0008) +[2026-06-07 03:01:59,240][472573] Updated weights for policy 0, policy_version 46882 (0.0008) +[2026-06-07 03:01:59,363][472573] Updated weights for policy 0, policy_version 46893 (0.0008) +[2026-06-07 03:01:59,494][472573] Updated weights for policy 0, policy_version 46904 (0.0008) +[2026-06-07 03:02:00,040][472573] Updated weights for policy 0, policy_version 46914 (0.0008) +[2026-06-07 03:02:00,159][472573] Updated weights for policy 0, policy_version 46925 (0.0008) +[2026-06-07 03:02:00,273][472573] Updated weights for policy 0, policy_version 46935 (0.0008) +[2026-06-07 03:02:00,400][472573] Updated weights for policy 0, policy_version 46946 (0.0008) +[2026-06-07 03:02:00,516][472573] Updated weights for policy 0, policy_version 46956 (0.0008) +[2026-06-07 03:02:00,650][472573] Updated weights for policy 0, policy_version 46968 (0.0008) +[2026-06-07 03:02:01,208][472573] Updated weights for policy 0, policy_version 46978 (0.0008) +[2026-06-07 03:02:01,341][472573] Updated weights for policy 0, policy_version 46990 (0.0008) +[2026-06-07 03:02:01,451][472573] Updated weights for policy 0, policy_version 47000 (0.0008) +[2026-06-07 03:02:01,564][472573] Updated weights for policy 0, policy_version 47010 (0.0008) +[2026-06-07 03:02:01,698][472573] Updated weights for policy 0, policy_version 47022 (0.0008) +[2026-06-07 03:02:01,816][472573] Updated weights for policy 0, policy_version 47032 (0.0008) +[2026-06-07 03:02:02,384][472573] Updated weights for policy 0, policy_version 47042 (0.0008) +[2026-06-07 03:02:02,508][472573] Updated weights for policy 0, policy_version 47053 (0.0008) +[2026-06-07 03:02:02,620][472573] Updated weights for policy 0, policy_version 47063 (0.0008) +[2026-06-07 03:02:02,732][472573] Updated weights for policy 0, policy_version 47073 (0.0008) +[2026-06-07 03:02:02,856][472573] Updated weights for policy 0, policy_version 47084 (0.0008) +[2026-06-07 03:02:02,967][472573] Updated weights for policy 0, policy_version 47094 (0.0008) +[2026-06-07 03:02:03,103][464932] Fps is (10 sec: 29491.3, 60 sec: 28945.1, 300 sec: 28436.0). Total num frames: 24117248. Throughput: 0: 28362.0. Samples: 24124032. Policy #0 lag: (min: 63.0, avg: 74.8, max: 127.0) +[2026-06-07 03:02:03,104][464932] Avg episode reward: [(0, '1800.256')] +[2026-06-07 03:02:03,525][472573] Updated weights for policy 0, policy_version 47104 (0.0008) +[2026-06-07 03:02:03,642][472573] Updated weights for policy 0, policy_version 47114 (0.0008) +[2026-06-07 03:02:03,762][472573] Updated weights for policy 0, policy_version 47125 (0.0008) +[2026-06-07 03:02:03,871][472573] Updated weights for policy 0, policy_version 47135 (0.0008) +[2026-06-07 03:02:04,013][472573] Updated weights for policy 0, policy_version 47147 (0.0009) +[2026-06-07 03:02:04,123][472573] Updated weights for policy 0, policy_version 47157 (0.0008) +[2026-06-07 03:02:04,681][472573] Updated weights for policy 0, policy_version 47168 (0.0008) +[2026-06-07 03:02:04,788][472573] Updated weights for policy 0, policy_version 47178 (0.0008) +[2026-06-07 03:02:04,911][472573] Updated weights for policy 0, policy_version 47189 (0.0008) +[2026-06-07 03:02:05,021][472573] Updated weights for policy 0, policy_version 47199 (0.0008) +[2026-06-07 03:02:05,131][472573] Updated weights for policy 0, policy_version 47209 (0.0008) +[2026-06-07 03:02:05,252][472573] Updated weights for policy 0, policy_version 47220 (0.0008) +[2026-06-07 03:02:05,836][472573] Updated weights for policy 0, policy_version 47232 (0.0008) +[2026-06-07 03:02:05,952][472573] Updated weights for policy 0, policy_version 47243 (0.0008) +[2026-06-07 03:02:06,078][472573] Updated weights for policy 0, policy_version 47254 (0.0008) +[2026-06-07 03:02:06,200][472573] Updated weights for policy 0, policy_version 47265 (0.0008) +[2026-06-07 03:02:06,334][472573] Updated weights for policy 0, policy_version 47277 (0.0008) +[2026-06-07 03:02:06,454][472573] Updated weights for policy 0, policy_version 47288 (0.0008) +[2026-06-07 03:02:07,010][472573] Updated weights for policy 0, policy_version 47298 (0.0008) +[2026-06-07 03:02:07,132][472573] Updated weights for policy 0, policy_version 47309 (0.0008) +[2026-06-07 03:02:07,245][472573] Updated weights for policy 0, policy_version 47319 (0.0008) +[2026-06-07 03:02:07,368][472573] Updated weights for policy 0, policy_version 47330 (0.0008) +[2026-06-07 03:02:07,492][472573] Updated weights for policy 0, policy_version 47341 (0.0008) +[2026-06-07 03:02:07,611][472573] Updated weights for policy 0, policy_version 47352 (0.0008) +[2026-06-07 03:02:08,103][464932] Fps is (10 sec: 29490.8, 60 sec: 28398.9, 300 sec: 28436.0). Total num frames: 24248320. Throughput: 0: 28410.3. Samples: 24208000. Policy #0 lag: (min: 63.0, avg: 74.8, max: 127.0) +[2026-06-07 03:02:08,104][464932] Avg episode reward: [(0, '1858.772')] +[2026-06-07 03:02:08,178][472573] Updated weights for policy 0, policy_version 47363 (0.0005) +[2026-06-07 03:02:08,296][472573] Updated weights for policy 0, policy_version 47373 (0.0004) +[2026-06-07 03:02:08,424][472573] Updated weights for policy 0, policy_version 47385 (0.0005) +[2026-06-07 03:02:08,544][472573] Updated weights for policy 0, policy_version 47395 (0.0004) +[2026-06-07 03:02:08,657][472573] Updated weights for policy 0, policy_version 47405 (0.0005) +[2026-06-07 03:02:08,774][472028] Saving new best policy, reward=1858.772! +[2026-06-07 03:02:09,221][472573] Updated weights for policy 0, policy_version 47417 (0.0005) +[2026-06-07 03:02:09,332][472573] Updated weights for policy 0, policy_version 47427 (0.0004) +[2026-06-07 03:02:09,456][472573] Updated weights for policy 0, policy_version 47438 (0.0005) +[2026-06-07 03:02:09,569][472573] Updated weights for policy 0, policy_version 47448 (0.0005) +[2026-06-07 03:02:09,676][472573] Updated weights for policy 0, policy_version 47458 (0.0005) +[2026-06-07 03:02:09,802][472573] Updated weights for policy 0, policy_version 47469 (0.0004) +[2026-06-07 03:02:09,917][472573] Updated weights for policy 0, policy_version 47479 (0.0004) +[2026-06-07 03:02:10,464][472573] Updated weights for policy 0, policy_version 47489 (0.0005) +[2026-06-07 03:02:10,582][472573] Updated weights for policy 0, policy_version 47499 (0.0004) +[2026-06-07 03:02:10,707][472573] Updated weights for policy 0, policy_version 47510 (0.0004) +[2026-06-07 03:02:10,828][472573] Updated weights for policy 0, policy_version 47521 (0.0005) +[2026-06-07 03:02:10,956][472573] Updated weights for policy 0, policy_version 47532 (0.0005) +[2026-06-07 03:02:11,066][472573] Updated weights for policy 0, policy_version 47542 (0.0004) +[2026-06-07 03:02:11,606][472573] Updated weights for policy 0, policy_version 47552 (0.0005) +[2026-06-07 03:02:11,748][472573] Updated weights for policy 0, policy_version 47565 (0.0005) +[2026-06-07 03:02:11,873][472573] Updated weights for policy 0, policy_version 47576 (0.0005) +[2026-06-07 03:02:11,999][472573] Updated weights for policy 0, policy_version 47587 (0.0005) +[2026-06-07 03:02:12,121][472573] Updated weights for policy 0, policy_version 47598 (0.0005) +[2026-06-07 03:02:12,679][472573] Updated weights for policy 0, policy_version 47609 (0.0005) +[2026-06-07 03:02:12,791][472573] Updated weights for policy 0, policy_version 47619 (0.0005) +[2026-06-07 03:02:12,907][472573] Updated weights for policy 0, policy_version 47629 (0.0004) +[2026-06-07 03:02:13,031][472573] Updated weights for policy 0, policy_version 47640 (0.0004) +[2026-06-07 03:02:13,103][464932] Fps is (10 sec: 26214.3, 60 sec: 28398.9, 300 sec: 28436.0). Total num frames: 24379392. Throughput: 0: 28364.9. Samples: 24381440. Policy #0 lag: (min: 63.0, avg: 74.8, max: 127.0) +[2026-06-07 03:02:13,104][464932] Avg episode reward: [(0, '1852.266')] +[2026-06-07 03:02:13,163][472573] Updated weights for policy 0, policy_version 47652 (0.0005) +[2026-06-07 03:02:13,273][472573] Updated weights for policy 0, policy_version 47663 (0.0004) +[2026-06-07 03:02:13,836][472573] Updated weights for policy 0, policy_version 47674 (0.0005) +[2026-06-07 03:02:13,960][472573] Updated weights for policy 0, policy_version 47685 (0.0005) +[2026-06-07 03:02:14,084][472573] Updated weights for policy 0, policy_version 47697 (0.0005) +[2026-06-07 03:02:14,204][472573] Updated weights for policy 0, policy_version 47707 (0.0005) +[2026-06-07 03:02:14,317][472573] Updated weights for policy 0, policy_version 47717 (0.0004) +[2026-06-07 03:02:14,436][472573] Updated weights for policy 0, policy_version 47727 (0.0005) +[2026-06-07 03:02:14,979][472573] Updated weights for policy 0, policy_version 47738 (0.0005) +[2026-06-07 03:02:15,088][472573] Updated weights for policy 0, policy_version 47748 (0.0004) +[2026-06-07 03:02:15,224][472573] Updated weights for policy 0, policy_version 47760 (0.0005) +[2026-06-07 03:02:15,334][472573] Updated weights for policy 0, policy_version 47770 (0.0005) +[2026-06-07 03:02:15,460][472573] Updated weights for policy 0, policy_version 47781 (0.0005) +[2026-06-07 03:02:15,585][472573] Updated weights for policy 0, policy_version 47792 (0.0005) +[2026-06-07 03:02:16,143][472573] Updated weights for policy 0, policy_version 47802 (0.0005) +[2026-06-07 03:02:16,277][472573] Updated weights for policy 0, policy_version 47814 (0.0005) +[2026-06-07 03:02:16,397][472573] Updated weights for policy 0, policy_version 47825 (0.0004) +[2026-06-07 03:02:16,537][472573] Updated weights for policy 0, policy_version 47837 (0.0004) +[2026-06-07 03:02:16,665][472573] Updated weights for policy 0, policy_version 47848 (0.0004) +[2026-06-07 03:02:16,771][472573] Updated weights for policy 0, policy_version 47858 (0.0004) +[2026-06-07 03:02:17,330][472573] Updated weights for policy 0, policy_version 47869 (0.0005) +[2026-06-07 03:02:17,463][472573] Updated weights for policy 0, policy_version 47881 (0.0005) +[2026-06-07 03:02:17,573][472573] Updated weights for policy 0, policy_version 47891 (0.0004) +[2026-06-07 03:02:17,696][472573] Updated weights for policy 0, policy_version 47902 (0.0004) +[2026-06-07 03:02:17,820][472573] Updated weights for policy 0, policy_version 47913 (0.0004) +[2026-06-07 03:02:17,937][472573] Updated weights for policy 0, policy_version 47923 (0.0004) +[2026-06-07 03:02:18,103][464932] Fps is (10 sec: 29491.4, 60 sec: 28398.9, 300 sec: 28436.0). Total num frames: 24543232. Throughput: 0: 28353.4. Samples: 24547968. Policy #0 lag: (min: 63.0, avg: 74.8, max: 127.0) +[2026-06-07 03:02:18,104][464932] Avg episode reward: [(0, '1877.685')] +[2026-06-07 03:02:18,109][472028] Saving new best policy, reward=1877.685! +[2026-06-07 03:02:18,498][472573] Updated weights for policy 0, policy_version 47933 (0.0005) +[2026-06-07 03:02:18,608][472573] Updated weights for policy 0, policy_version 47943 (0.0004) +[2026-06-07 03:02:18,730][472573] Updated weights for policy 0, policy_version 47954 (0.0005) +[2026-06-07 03:02:18,838][472573] Updated weights for policy 0, policy_version 47964 (0.0008) +[2026-06-07 03:02:18,954][472573] Updated weights for policy 0, policy_version 47974 (0.0008) +[2026-06-07 03:02:19,089][472573] Updated weights for policy 0, policy_version 47986 (0.0008) +[2026-06-07 03:02:19,635][472573] Updated weights for policy 0, policy_version 47996 (0.0008) +[2026-06-07 03:02:19,750][472573] Updated weights for policy 0, policy_version 48007 (0.0008) +[2026-06-07 03:02:19,886][472573] Updated weights for policy 0, policy_version 48019 (0.0008) +[2026-06-07 03:02:20,023][472573] Updated weights for policy 0, policy_version 48032 (0.0008) +[2026-06-07 03:02:20,166][472573] Updated weights for policy 0, policy_version 48045 (0.0009) +[2026-06-07 03:02:20,274][472573] Updated weights for policy 0, policy_version 48055 (0.0008) +[2026-06-07 03:02:20,853][472573] Updated weights for policy 0, policy_version 48066 (0.0008) +[2026-06-07 03:02:20,996][472573] Updated weights for policy 0, policy_version 48079 (0.0008) +[2026-06-07 03:02:21,119][472573] Updated weights for policy 0, policy_version 48090 (0.0008) +[2026-06-07 03:02:21,235][472573] Updated weights for policy 0, policy_version 48100 (0.0008) +[2026-06-07 03:02:21,376][472573] Updated weights for policy 0, policy_version 48112 (0.0008) +[2026-06-07 03:02:21,930][472573] Updated weights for policy 0, policy_version 48122 (0.0008) +[2026-06-07 03:02:22,046][472573] Updated weights for policy 0, policy_version 48133 (0.0005) +[2026-06-07 03:02:22,157][472573] Updated weights for policy 0, policy_version 48143 (0.0007) +[2026-06-07 03:02:22,281][472573] Updated weights for policy 0, policy_version 48154 (0.0008) +[2026-06-07 03:02:22,396][472573] Updated weights for policy 0, policy_version 48164 (0.0008) +[2026-06-07 03:02:22,508][472573] Updated weights for policy 0, policy_version 48174 (0.0008) +[2026-06-07 03:02:23,074][472573] Updated weights for policy 0, policy_version 48185 (0.0008) +[2026-06-07 03:02:23,103][464932] Fps is (10 sec: 29490.8, 60 sec: 28398.9, 300 sec: 28436.0). Total num frames: 24674304. Throughput: 0: 28356.2. Samples: 24632320. Policy #0 lag: (min: 63.0, avg: 74.8, max: 127.0) +[2026-06-07 03:02:23,105][464932] Avg episode reward: [(0, '1902.611')] +[2026-06-07 03:02:23,211][472573] Updated weights for policy 0, policy_version 48197 (0.0008) +[2026-06-07 03:02:23,319][472573] Updated weights for policy 0, policy_version 48207 (0.0008) +[2026-06-07 03:02:23,436][472573] Updated weights for policy 0, policy_version 48217 (0.0008) +[2026-06-07 03:02:23,572][472573] Updated weights for policy 0, policy_version 48229 (0.0008) +[2026-06-07 03:02:23,693][472573] Updated weights for policy 0, policy_version 48240 (0.0008) +[2026-06-07 03:02:23,785][472028] Saving new best policy, reward=1902.611! +[2026-06-07 03:02:24,253][472573] Updated weights for policy 0, policy_version 48250 (0.0007) +[2026-06-07 03:02:24,371][472573] Updated weights for policy 0, policy_version 48261 (0.0005) +[2026-06-07 03:02:24,498][472573] Updated weights for policy 0, policy_version 48272 (0.0008) +[2026-06-07 03:02:24,624][472573] Updated weights for policy 0, policy_version 48283 (0.0008) +[2026-06-07 03:02:24,735][472573] Updated weights for policy 0, policy_version 48293 (0.0008) +[2026-06-07 03:02:24,845][472573] Updated weights for policy 0, policy_version 48303 (0.0008) +[2026-06-07 03:02:25,380][472573] Updated weights for policy 0, policy_version 48313 (0.0008) +[2026-06-07 03:02:25,494][472573] Updated weights for policy 0, policy_version 48323 (0.0007) +[2026-06-07 03:02:25,628][472573] Updated weights for policy 0, policy_version 48335 (0.0008) +[2026-06-07 03:02:25,763][472573] Updated weights for policy 0, policy_version 48347 (0.0008) +[2026-06-07 03:02:25,880][472573] Updated weights for policy 0, policy_version 48357 (0.0008) +[2026-06-07 03:02:26,001][472573] Updated weights for policy 0, policy_version 48368 (0.0008) +[2026-06-07 03:02:26,541][472573] Updated weights for policy 0, policy_version 48378 (0.0008) +[2026-06-07 03:02:26,653][472573] Updated weights for policy 0, policy_version 48388 (0.0008) +[2026-06-07 03:02:26,764][472573] Updated weights for policy 0, policy_version 48398 (0.0008) +[2026-06-07 03:02:26,889][472573] Updated weights for policy 0, policy_version 48409 (0.0008) +[2026-06-07 03:02:27,012][472573] Updated weights for policy 0, policy_version 48420 (0.0008) +[2026-06-07 03:02:27,133][472573] Updated weights for policy 0, policy_version 48431 (0.0008) +[2026-06-07 03:02:27,714][472573] Updated weights for policy 0, policy_version 48444 (0.0008) +[2026-06-07 03:02:27,847][472573] Updated weights for policy 0, policy_version 48456 (0.0008) +[2026-06-07 03:02:27,958][472573] Updated weights for policy 0, policy_version 48466 (0.0008) +[2026-06-07 03:02:28,085][472573] Updated weights for policy 0, policy_version 48477 (0.0008) +[2026-06-07 03:02:28,103][464932] Fps is (10 sec: 26214.5, 60 sec: 28399.0, 300 sec: 28436.0). Total num frames: 24805376. Throughput: 0: 28390.5. Samples: 24806912. Policy #0 lag: (min: 63.0, avg: 74.8, max: 127.0) +[2026-06-07 03:02:28,104][464932] Avg episode reward: [(0, '1932.382')] +[2026-06-07 03:02:28,221][472573] Updated weights for policy 0, policy_version 48489 (0.0008) +[2026-06-07 03:02:28,345][472573] Updated weights for policy 0, policy_version 48500 (0.0008) +[2026-06-07 03:02:28,382][472028] Saving new best policy, reward=1932.382! +[2026-06-07 03:02:28,890][472573] Updated weights for policy 0, policy_version 48510 (0.0007) +[2026-06-07 03:02:29,023][472573] Updated weights for policy 0, policy_version 48522 (0.0008) +[2026-06-07 03:02:29,144][472573] Updated weights for policy 0, policy_version 48533 (0.0008) +[2026-06-07 03:02:29,267][472573] Updated weights for policy 0, policy_version 48544 (0.0008) +[2026-06-07 03:02:29,380][472573] Updated weights for policy 0, policy_version 48554 (0.0008) +[2026-06-07 03:02:29,492][472573] Updated weights for policy 0, policy_version 48564 (0.0008) +[2026-06-07 03:02:30,052][472573] Updated weights for policy 0, policy_version 48575 (0.0008) +[2026-06-07 03:02:30,175][472573] Updated weights for policy 0, policy_version 48586 (0.0008) +[2026-06-07 03:02:30,295][472573] Updated weights for policy 0, policy_version 48597 (0.0008) +[2026-06-07 03:02:30,421][472573] Updated weights for policy 0, policy_version 48608 (0.0008) +[2026-06-07 03:02:30,542][472573] Updated weights for policy 0, policy_version 48619 (0.0008) +[2026-06-07 03:02:30,655][472573] Updated weights for policy 0, policy_version 48629 (0.0008) +[2026-06-07 03:02:31,219][472573] Updated weights for policy 0, policy_version 48640 (0.0008) +[2026-06-07 03:02:31,330][472573] Updated weights for policy 0, policy_version 48650 (0.0008) +[2026-06-07 03:02:31,450][472573] Updated weights for policy 0, policy_version 48661 (0.0008) +[2026-06-07 03:02:31,567][472573] Updated weights for policy 0, policy_version 48671 (0.0008) +[2026-06-07 03:02:31,688][472573] Updated weights for policy 0, policy_version 48682 (0.0008) +[2026-06-07 03:02:31,828][472573] Updated weights for policy 0, policy_version 48694 (0.0008) +[2026-06-07 03:02:32,369][472573] Updated weights for policy 0, policy_version 48705 (0.0008) +[2026-06-07 03:02:32,491][472573] Updated weights for policy 0, policy_version 48716 (0.0008) +[2026-06-07 03:02:32,622][472573] Updated weights for policy 0, policy_version 48728 (0.0008) +[2026-06-07 03:02:32,737][472573] Updated weights for policy 0, policy_version 48738 (0.0008) +[2026-06-07 03:02:32,857][472573] Updated weights for policy 0, policy_version 48749 (0.0008) +[2026-06-07 03:02:33,103][464932] Fps is (10 sec: 29491.8, 60 sec: 28399.0, 300 sec: 28436.0). Total num frames: 24969216. Throughput: 0: 28387.6. Samples: 24973824. Policy #0 lag: (min: 63.0, avg: 76.5, max: 127.0) +[2026-06-07 03:02:33,104][464932] Avg episode reward: [(0, '1932.382')] +[2026-06-07 03:02:33,431][472573] Updated weights for policy 0, policy_version 48761 (0.0008) +[2026-06-07 03:02:33,564][472573] Updated weights for policy 0, policy_version 48773 (0.0008) +[2026-06-07 03:02:33,683][472573] Updated weights for policy 0, policy_version 48783 (0.0008) +[2026-06-07 03:02:33,807][472573] Updated weights for policy 0, policy_version 48794 (0.0008) +[2026-06-07 03:02:33,952][472573] Updated weights for policy 0, policy_version 48807 (0.0008) +[2026-06-07 03:02:34,072][472573] Updated weights for policy 0, policy_version 48818 (0.0008) +[2026-06-07 03:02:34,627][472573] Updated weights for policy 0, policy_version 48828 (0.0007) +[2026-06-07 03:02:34,750][472028] Early stopping after 2 epochs (16 sgd steps), loss delta 0.0000000 +[2026-06-07 03:02:34,751][472028] Saving results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs1_seed13/checkpoint_p0/checkpoint_000048840_25034752.pth... +[2026-06-07 03:02:34,752][472575] Stopping RolloutWorker_w1... +[2026-06-07 03:02:34,752][464932] Component RolloutWorker_w1 stopped! +[2026-06-07 03:02:34,753][472575] Loop rollout_proc1_evt_loop terminating... +[2026-06-07 03:02:34,753][472028] Stopping Batcher_0... +[2026-06-07 03:02:34,753][464932] Component Batcher_0 stopped! +[2026-06-07 03:02:34,753][472028] Loop batcher_evt_loop terminating... +[2026-06-07 03:02:34,754][464932] Component RolloutWorker_w0 stopped! +[2026-06-07 03:02:34,754][472574] Stopping RolloutWorker_w0... +[2026-06-07 03:02:34,755][472574] Loop rollout_proc0_evt_loop terminating... +[2026-06-07 03:02:34,775][472028] Saving results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs1_seed13/checkpoint_p0/checkpoint_000048840_25034752.pth... +[2026-06-07 03:02:34,800][472028] Stopping LearnerWorker_p0... +[2026-06-07 03:02:34,800][472028] Loop learner_proc0_evt_loop terminating... +[2026-06-07 03:02:34,800][464932] Component LearnerWorker_p0 stopped! +[2026-06-07 03:02:34,850][472573] Weights refcount: 2 0 +[2026-06-07 03:02:34,851][472573] Stopping InferenceWorker_p0-w0... +[2026-06-07 03:02:34,851][472573] Loop inference_proc0-0_evt_loop terminating... +[2026-06-07 03:02:34,851][464932] Component InferenceWorker_p0-w0 stopped! +[2026-06-07 03:02:34,852][464932] Waiting for process learner_proc0 to stop... +[2026-06-07 03:02:35,819][464932] Waiting for process inference_proc0-0 to join... +[2026-06-07 03:02:35,820][464932] Waiting for process rollout_proc0 to join... +[2026-06-07 03:02:35,821][464932] Waiting for process rollout_proc1 to join... +[2026-06-07 03:02:35,821][464932] Batcher 0 profile tree view: +batching: 0.8962, releasing_batches: 0.0258 +[2026-06-07 03:02:35,822][464932] InferenceWorker_p0-w0 profile tree view: wait_policy: 0.0000 - wait_policy_total: 849.7775 -update_model: 38.4178 - weight_update: 0.0011 -one_step: 0.0022 - handle_policy_step: 518.7962 - deserialize: 12.3540, stack: 0.4143, obs_to_device_normalize: 48.7268, forward: 128.1332, prepare_outputs: 303.5928, send_messages: 9.0167 -[2026-06-07 02:34:22,718][321791] Learner 0 profile tree view: -misc: 0.0042, prepare_batch: 79.1215 -train: 1009.7830 - epoch_init: 0.0494, minibatch_init: 2.7483, losses_postprocess: 230.5321, kl_divergence: 24.6947, after_optimizer: 553.0465 - calculate_losses: 49.4474 - losses_init: 0.0895, forward_head: 13.4953, bptt_initial: 0.4990, bptt: 0.4669, tail: 11.3053, advantages_returns: 3.7340, losses: 15.8411 - update: 145.1080 - clip: 21.3454 -[2026-06-07 02:34:22,718][321791] RolloutWorker_w0 profile tree view: -wait_for_trajectories: 0.0423, enqueue_policy_requests: 14.2498, wait_policy_outputs_event: 3.6298, process_policy_outputs: 12.1782, env_step: 1056.6219, finalize_trajectories: 0.1576, complete_rollouts: 0.0979 -post_env_step: 21.8373 - process_env_step: 6.2165 -[2026-06-07 02:34:22,718][321791] RolloutWorker_w1 profile tree view: -wait_for_trajectories: 0.0429, enqueue_policy_requests: 14.6051, wait_policy_outputs_event: 3.8081, process_policy_outputs: 12.5501, env_step: 995.5254, finalize_trajectories: 0.1562, complete_rollouts: 0.1004 -post_env_step: 22.0846 - process_env_step: 6.2726 -[2026-06-07 02:34:22,719][321791] Loop Runner_EvtLoop terminating... -[2026-06-07 02:34:22,720][321791] Runner profile tree view: -main_loop: 1437.0450 -[2026-06-07 02:34:22,720][321791] Collected {0: 25034752}, FPS: 17421.0 + wait_policy_total: 489.5443 +update_model: 40.3998 + weight_update: 0.0007 +one_step: 0.0016 + handle_policy_step: 339.1971 + deserialize: 4.4609, stack: 0.3357, obs_to_device_normalize: 48.8403, forward: 127.9718, prepare_outputs: 134.1394, send_messages: 9.2078 +[2026-06-07 03:02:35,822][464932] Learner 0 profile tree view: +misc: 0.0045, prepare_batch: 25.7354 +train: 544.4353 + epoch_init: 0.0586, minibatch_init: 2.5126, losses_postprocess: 148.5336, kl_divergence: 22.7809, after_optimizer: 196.1488 + calculate_losses: 40.1309 + losses_init: 0.0804, forward_head: 12.7129, bptt_initial: 0.3957, bptt: 0.4332, tail: 9.1333, advantages_returns: 3.0033, losses: 11.2797 + update: 131.0697 + clip: 13.1271 +[2026-06-07 03:02:35,822][464932] RolloutWorker_w0 profile tree view: +wait_for_trajectories: 0.0346, enqueue_policy_requests: 112.4220, process_policy_outputs: 7.4403, env_step: 524.9069, finalize_trajectories: 0.1098, complete_rollouts: 0.0781 +post_env_step: 16.1248 + process_env_step: 4.6157 +[2026-06-07 03:02:35,823][464932] RolloutWorker_w1 profile tree view: +wait_for_trajectories: 0.0341, enqueue_policy_requests: 117.4303, process_policy_outputs: 7.4092, env_step: 524.7143, finalize_trajectories: 0.1056, complete_rollouts: 0.0784 +post_env_step: 16.0567 + process_env_step: 4.5896 +[2026-06-07 03:02:35,824][464932] Loop Runner_EvtLoop terminating... +[2026-06-07 03:02:35,825][464932] Runner profile tree view: +main_loop: 897.9103 +[2026-06-07 03:02:35,825][464932] Collected {0: 25034752}, FPS: 27881.1