diff --git a/.gitattributes b/.gitattributes index f601d457434aff847a627df375bf563ca93e7c11..de37b1e9efe85a63f7eea60fbd4a2163fd176077 100644 --- a/.gitattributes +++ b/.gitattributes @@ -48,3 +48,4 @@ factor_sweeps/flappy/observation_stride/train/factor_sweep:flappy:observation_st factor_sweeps/flappy/observation_stride/train/factor_sweep:flappy:observation_stride:fixed_l2:fs4:obs15:stride2:seed12/episode_metrics.jsonl filter=lfs diff=lfs merge=lfs -text factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed10/episode_metrics.jsonl filter=lfs diff=lfs merge=lfs -text factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed11/episode_metrics.jsonl filter=lfs diff=lfs merge=lfs -text +factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed12/episode_metrics.jsonl filter=lfs diff=lfs merge=lfs -text diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed12/checkpoint_p0/best_000048840_25034752_reward_2156.198.pth b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed12/checkpoint_p0/best_000048840_25034752_reward_2156.198.pth new file mode 100644 index 0000000000000000000000000000000000000000..abef2d19e829b310d80a2eca113f226caaffa82c --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed12/checkpoint_p0/best_000048840_25034752_reward_2156.198.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:881450007cfa014c9d46bbf821d36d207882ccbc3953672f02e06298a21abe2a +size 22210361 diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed12/checkpoint_p0/checkpoint_000021152_10846208.pth b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed12/checkpoint_p0/checkpoint_000021152_10846208.pth new file mode 100644 index 0000000000000000000000000000000000000000..97fda81a8c52e8597af61de99057d6ba0c524397 --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed12/checkpoint_p0/checkpoint_000021152_10846208.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1401620a4db490f3e59bb22706fd389deb0273a8e6ea08d7e14cdf932072aba +size 22210721 diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed12/checkpoint_p0/checkpoint_000042720_21889024.pth b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed12/checkpoint_p0/checkpoint_000042720_21889024.pth new file mode 100644 index 0000000000000000000000000000000000000000..7a7c6a6ccbe356f4bfba76d16b13852fe9853aee --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed12/checkpoint_p0/checkpoint_000042720_21889024.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f62bc7349347379d1899dafd39c31f488b9037411282d6f3a0d23b54890f297 +size 22210721 diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed12/checkpoint_p0/checkpoint_000048840_25034752.pth b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed12/checkpoint_p0/checkpoint_000048840_25034752.pth new file mode 100644 index 0000000000000000000000000000000000000000..0656246be8cbb604f3521df5b5877a021dfc802c --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed12/checkpoint_p0/checkpoint_000048840_25034752.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e80d5d6d07bced0410b185491cb4288b7fd087a616e3372f6beca2a24e0a8e6 +size 22210721 diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed12/config.json b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed12/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e291805169e8d67115d34682adcae8e2303fcbb8 --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed12/config.json @@ -0,0 +1,266 @@ +{ + "help": false, + "algo": "APPO", + "env": "latency_flappy", + "experiment": "flappy_frame_stack_fixed_l2_fs5_seed12", + "train_dir": "results/checkpoints_factor_sweeps/flappy/context_window", + "restart_behavior": "resume", + "device": "gpu", + "seed": 12, + "num_policies": 1, + "async_rl": true, + "serial_mode": false, + "batched_sampling": true, + "num_batches_to_accumulate": 2, + "worker_num_splits": 1, + "policy_workers_per_policy": 1, + "max_policy_lag": 400, + "num_workers": 2, + "num_envs_per_worker": 1, + "batch_size": 4096, + "num_batches_per_epoch": 8, + "num_epochs": 8, + "rollout": 128, + "recurrence": 1, + "shuffle_minibatches": false, + "gamma": 0.99, + "reward_scale": 1.0, + "reward_clip": 1000.0, + "value_bootstrap": false, + "normalize_returns": true, + "exploration_loss_coeff": 0.003, + "value_loss_coeff": 0.5, + "kl_loss_coeff": 0.0, + "exploration_loss": "entropy", + "gae_lambda": 0.95, + "ppo_clip_ratio": 0.1, + "ppo_clip_value": 0.2, + "with_vtrace": false, + "vtrace_rho": 1.0, + "vtrace_c": 1.0, + "optimizer": "adam", + "adam_eps": 1e-05, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "max_grad_norm": 0.5, + "learning_rate": 0.00025, + "lr_schedule": "linear_decay", + "lr_schedule_kl_threshold": 0.008, + "lr_adaptive_min": 1e-06, + "lr_adaptive_max": 0.01, + "obs_subtract_mean": 0.0, + "obs_scale": 255.0, + "normalize_input": true, + "normalize_input_keys": null, + "decorrelate_experience_max_seconds": 0, + "decorrelate_envs_on_one_worker": true, + "actor_worker_gpus": [ + 0 + ], + "set_workers_cpu_affinity": true, + "force_envs_single_thread": false, + "default_niceness": 0, + "log_to_file": true, + "experiment_summaries_interval": 1, + "flush_summaries_interval": 30, + "stats_avg": 100, + "summaries_use_frameskip": true, + "heartbeat_interval": 20, + "heartbeat_reporting_interval": 180, + "train_for_env_steps": 25000000, + "train_for_seconds": 10000000000, + "save_every_sec": 600, + "keep_checkpoints": 5, + "load_checkpoint_kind": "latest", + "save_milestones_sec": -1, + "save_best_every_sec": 5, + "save_best_metric": "reward", + "save_best_after": 100000, + "benchmark": false, + "encoder_mlp_layers": [ + 512, + 512 + ], + "encoder_conv_architecture": "convnet_atari", + "encoder_conv_mlp_layers": [ + 512 + ], + "use_rnn": false, + "rnn_size": 512, + "rnn_type": "gru", + "rnn_num_layers": 1, + "decoder_mlp_layers": [], + "nonlinearity": "elu", + "policy_initialization": "orthogonal", + "policy_init_gain": 1.0, + "actor_critic_share_weights": true, + "adaptive_stddev": true, + "continuous_tanh_scale": 0.0, + "initial_stddev": 1.0, + "use_env_info_cache": false, + "env_gpu_actions": true, + "env_gpu_observations": true, + "env_frameskip": 1, + "env_framestack": 1, + "pixel_format": "CHW", + "use_record_episode_statistics": false, + "with_wandb": true, + "wandb_user": null, + "wandb_project": "latency-sensitive-bench", + "wandb_group": "flappy-fs5-fixed_l2", + "wandb_job_type": "sample_factory", + "wandb_tags": [ + "factor_sweep", + "flappy", + "frame_stack", + "fixed", + "fixed_l2", + "fs5", + "seed12" + ], + "with_pbt": false, + "pbt_mix_policies_in_one_env": true, + "pbt_period_env_steps": 5000000, + "pbt_start_mutation": 20000000, + "pbt_replace_fraction": 0.3, + "pbt_mutation_rate": 0.15, + "pbt_replace_reward_gap": 0.1, + "pbt_replace_reward_gap_absolute": 1e-06, + "pbt_optimize_gamma": false, + "pbt_target_objective": "true_objective", + "pbt_perturb_min": 1.1, + "pbt_perturb_max": 1.5, + "gym_id": "FlappyBird-v0", + "env_fps": 30.0, + "obs_fps": 30.0, + "use_lidar": false, + "normalize_obs": true, + "audio_on": false, + "screen_size": "", + "obs_resize": "84,84", + "use_gpu_render": true, + "simulator": "gpu", + "gpu_render_device": "auto", + "gpu_render_batch_size": 128, + "gpu_render_profile": false, + "gpu_render_profile_interval": 200, + "pipe_gap": 100, + "bird_color": "yellow", + "pipe_color": "green", + "background": "day", + "score_limit": -1, + "frame_stack": 5, + "debug": false, + "debug_timelimit_diagnostics": false, + "max_episode_steps": 0, + "mode": "train", + "latency_type": "fixed", + "fixed_latency_ms": 66.66666666666667, + "mean_latency_ms": null, + "std_latency_ms": null, + "min_latency_ms": null, + "max_latency_ms": null, + "latency_seed": null, + "add_latency_info": false, + "max_pending_actions": null, + "hold_policy": "one_frame_then_noop", + "ordering_policy": "latest_ready", + "eval_episodes": 100, + "eval_parallel_envs": 100, + "eval_latency_raw_frame_values": "0,1,2,3,4,5", + "eval_max_steps": 3600, + "eval_deterministic": true, + "eval_raw_reward": false, + "episode_metrics_path": "results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs5_seed12/episode_metrics.jsonl", + "command_line": "--mode train --algo APPO --env latency_flappy --experiment flappy_frame_stack_fixed_l2_fs5_seed12 --train_dir results/checkpoints_factor_sweeps/flappy/context_window --restart_behavior resume --device gpu --actor_worker_gpus 0 --env_gpu_observations True --env_gpu_actions True --gpu-render-batch-size 128 --seed 12 --episode_metrics_path results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs5_seed12/episode_metrics.jsonl --train_for_env_steps 25000000 --num_workers 2 --num_envs_per_worker 1 --num_policies 1 --batch_size 4096 --rollout 128 --recurrence 1 --num_epochs 8 --num_batches_per_epoch 8 --worker_num_splits 1 --max_policy_lag 400 --learning_rate 0.00025 --gamma 0.99 --gae_lambda 0.95 --ppo_clip_ratio 0.1 --ppo_clip_value 0.2 --value_loss_coeff 0.5 --max_grad_norm 0.5 --save_every_sec 600 --keep_checkpoints 5 --stats_avg 100 --experiment_summaries_interval 1 --batched_sampling True --async_rl True --use_rnn False --normalize_returns True --normalize_input True --latency-type fixed --fixed-latency-ms 66.66666666666667 --add-latency-info False --eval-episodes 100 --eval-parallel-envs 100 --eval-max-steps 3600 --eval-deterministic True --with_wandb True --wandb_project latency-sensitive-bench --wandb_group flappy-fs5-fixed_l2 --wandb_job_type sample_factory --wandb_tags factor_sweep flappy frame_stack fixed fixed_l2 fs5 seed12 --gym_id FlappyBird-v0 --env-fps 30 --obs-fps 30.0 --use_lidar False --normalize_obs True --audio_on False --obs_resize 84,84 --use-gpu-render True --simulator gpu --gpu-render-device auto --gpu-render-profile False --gpu-render-profile-interval 200 --pipe_gap 100 --bird_color yellow --pipe_color green --background day --frame_stack 5 --debug False --debug-timelimit-diagnostics False --hold-policy one_frame_then_noop --ordering-policy latest_ready", + "cli_args": { + "algo": "APPO", + "env": "latency_flappy", + "experiment": "flappy_frame_stack_fixed_l2_fs5_seed12", + "train_dir": "results/checkpoints_factor_sweeps/flappy/context_window", + "restart_behavior": "resume", + "device": "gpu", + "seed": 12, + "num_policies": 1, + "async_rl": true, + "batched_sampling": true, + "worker_num_splits": 1, + "max_policy_lag": 400, + "num_workers": 2, + "num_envs_per_worker": 1, + "batch_size": 4096, + "num_batches_per_epoch": 8, + "num_epochs": 8, + "rollout": 128, + "recurrence": 1, + "gamma": 0.99, + "normalize_returns": true, + "value_loss_coeff": 0.5, + "gae_lambda": 0.95, + "ppo_clip_ratio": 0.1, + "ppo_clip_value": 0.2, + "max_grad_norm": 0.5, + "learning_rate": 0.00025, + "normalize_input": true, + "actor_worker_gpus": [ + 0 + ], + "experiment_summaries_interval": 1, + "stats_avg": 100, + "train_for_env_steps": 25000000, + "save_every_sec": 600, + "keep_checkpoints": 5, + "use_rnn": false, + "env_gpu_actions": true, + "env_gpu_observations": true, + "with_wandb": true, + "wandb_project": "latency-sensitive-bench", + "wandb_group": "flappy-fs5-fixed_l2", + "wandb_job_type": "sample_factory", + "wandb_tags": [ + "factor_sweep", + "flappy", + "frame_stack", + "fixed", + "fixed_l2", + "fs5", + "seed12" + ], + "gym_id": "FlappyBird-v0", + "env_fps": 30.0, + "obs_fps": 30.0, + "use_lidar": false, + "normalize_obs": true, + "audio_on": false, + "obs_resize": "84,84", + "use_gpu_render": true, + "simulator": "gpu", + "gpu_render_device": "auto", + "gpu_render_batch_size": 128, + "gpu_render_profile": false, + "gpu_render_profile_interval": 200, + "pipe_gap": 100, + "bird_color": "yellow", + "pipe_color": "green", + "background": "day", + "frame_stack": 5, + "debug": false, + "debug_timelimit_diagnostics": false, + "mode": "train", + "latency_type": "fixed", + "fixed_latency_ms": 66.66666666666667, + "add_latency_info": false, + "hold_policy": "one_frame_then_noop", + "ordering_policy": "latest_ready", + "eval_episodes": 100, + "eval_parallel_envs": 100, + "eval_max_steps": 3600, + "eval_deterministic": true, + "episode_metrics_path": "results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs5_seed12/episode_metrics.jsonl" + }, + "git_hash": "284fe8ace24f0e8a40c03c5b559969abd7caeb29", + "git_repo_name": "git@github.com:ZihanWang314/latency-sensitive-bench.git", + "eval_env_frameskip": 1, + "output_dir": "outputs/factor_sweeps/flappy/context_window/train/frame_stack/fixed_l2/fs5/seed_12", + "wandb_unique_id": "flappy-fs5-fixed_l2-s12" +} \ No newline at end of file diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed12/episode_metrics.jsonl b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed12/episode_metrics.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..bc2e75e0d96ec38503314deccaefb5a4526fc676 --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed12/episode_metrics.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b56af456ed3e58ad4937015279dbdba3f55d22410fafdc5225a0018df74f34a5 +size 20384405 diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed12/git.diff b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed12/git.diff new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed12/sf_log.txt b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed12/sf_log.txt new file mode 100644 index 0000000000000000000000000000000000000000..828eb8ae3252f87cd33dbb71e03b23aa6f9896c0 --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed12/sf_log.txt @@ -0,0 +1,5625 @@ +[2026-06-02 16:46:39,395][260776] Saving configuration to results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs5_seed12/config.json... +[2026-06-02 16:46:39,461][260776] Using GPUs [0] for process 0 (actually maps to GPUs [3]) +[2026-06-02 16:46:39,462][260776] Rollout worker 0 uses device cuda:0 +[2026-06-02 16:46:39,463][260776] Using GPUs [0] for process 1 (actually maps to GPUs [3]) +[2026-06-02 16:46:39,463][260776] Rollout worker 1 uses device cuda:0 +[2026-06-02 16:46:41,169][260776] Using GPUs [0] for process 0 (actually maps to GPUs [3]) +[2026-06-02 16:46:41,170][260776] InferenceWorker_p0-w0: min num requests: 1 +[2026-06-02 16:46:41,174][260776] Using GPUs [0] for process 0 (actually maps to GPUs [3]) +[2026-06-02 16:46:41,178][260776] Using GPUs [0] for process 1 (actually maps to GPUs [3]) +[2026-06-02 16:46:41,178][260776] Starting all processes... +[2026-06-02 16:46:41,179][260776] Starting process learner_proc0 +[2026-06-02 16:46:42,371][260776] Starting all processes... +[2026-06-02 16:46:42,375][260776] Starting process inference_proc0-0 +[2026-06-02 16:46:42,375][260776] Starting process rollout_proc0 +[2026-06-02 16:46:42,376][260776] Starting process rollout_proc1 +[2026-06-02 16:46:42,849][262026] Using GPUs [0] for process 0 (actually maps to GPUs [3]) +[2026-06-02 16:46:42,849][262026] Set environment var CUDA_VISIBLE_DEVICES to '3' (GPU indices [0]) for learning process 0 +[2026-06-02 16:46:42,849][262026] Num visible devices: 1 +[2026-06-02 16:46:42,850][262026] Setting fixed seed 12 +[2026-06-02 16:46:42,851][262026] Using GPUs [0] for process 0 (actually maps to GPUs [3]) +[2026-06-02 16:46:42,852][262026] Initializing actor-critic model on device cuda:0 +[2026-06-02 16:46:42,852][262026] RunningMeanStd input shape: (15, 84, 84) +[2026-06-02 16:46:42,885][262026] RunningMeanStd input shape: (1,) +[2026-06-02 16:46:42,895][262026] ConvEncoder: input_channels=15 +[2026-06-02 16:46:42,957][262026] Conv encoder output size: 512 +[2026-06-02 16:46:42,959][262026] Created Actor Critic model with architecture: +[2026-06-02 16:46:42,959][262026] ActorCriticSharedWeights( + (obs_normalizer): ObservationNormalizer( + (running_mean_std): RunningMeanStdDictInPlace( + (running_mean_std): ModuleDict( + (obs): RunningMeanStdInPlace() + ) + ) + ) + (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) + (encoder): MultiInputEncoder( + (encoders): ModuleDict( + (obs): ConvEncoder( + (enc): RecursiveScriptModule( + original_name=ConvEncoderImpl + (conv_head): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Conv2d) + (1): RecursiveScriptModule(original_name=ELU) + (2): RecursiveScriptModule(original_name=Conv2d) + (3): RecursiveScriptModule(original_name=ELU) + (4): RecursiveScriptModule(original_name=Conv2d) + (5): RecursiveScriptModule(original_name=ELU) + ) + (mlp_layers): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Linear) + (1): RecursiveScriptModule(original_name=ELU) + ) + ) + ) + ) + ) + (core): ModelCoreIdentity() + (decoder): MlpDecoder( + (mlp): Identity() + ) + (critic_linear): Linear(in_features=512, out_features=1, bias=True) + (action_parameterization): ActionParameterizationDefault( + (distribution_linear): Linear(in_features=512, out_features=2, bias=True) + ) +) +[2026-06-02 16:46:42,976][262026] Using optimizer +[2026-06-02 16:46:43,761][262026] No checkpoints found +[2026-06-02 16:46:43,761][262026] Did not load from checkpoint, starting from scratch! +[2026-06-02 16:46:43,762][262026] Initialized policy 0 weights for model version 0 +[2026-06-02 16:46:43,782][262026] LearnerWorker_p0 finished initialization! +[2026-06-02 16:46:43,782][262026] Using GPUs [0] for process 0 (actually maps to GPUs [3]) +[2026-06-02 16:46:45,071][262583] Worker 0 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191] +[2026-06-02 16:46:45,072][262583] Using GPUs [0] for process 0 (actually maps to GPUs [3]) +[2026-06-02 16:46:45,072][262583] Set environment var CUDA_VISIBLE_DEVICES to '3' (GPU indices [0]) for actor process 0 +[2026-06-02 16:46:45,072][262583] Num visible devices: 1 +[2026-06-02 16:46:45,074][262585] Worker 1 uses CPU cores [192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383] +[2026-06-02 16:46:45,075][262585] Using GPUs [0] for process 1 (actually maps to GPUs [3]) +[2026-06-02 16:46:45,075][262585] Set environment var CUDA_VISIBLE_DEVICES to '3' (GPU indices [0]) for actor process 1 +[2026-06-02 16:46:45,075][262585] Num visible devices: 1 +[2026-06-02 16:46:45,082][262582] Using GPUs [0] for process 0 (actually maps to GPUs [3]) +[2026-06-02 16:46:45,082][262582] Set environment var CUDA_VISIBLE_DEVICES to '3' (GPU indices [0]) for inference process 0 +[2026-06-02 16:46:45,082][262582] Num visible devices: 1 +[2026-06-02 16:46:45,087][262582] RunningMeanStd input shape: (15, 84, 84) +[2026-06-02 16:46:45,116][262582] RunningMeanStd input shape: (1,) +[2026-06-02 16:46:45,124][262582] ConvEncoder: input_channels=15 +[2026-06-02 16:46:45,185][262582] Conv encoder output size: 512 +[2026-06-02 16:46:45,209][260776] Inference worker 0-0 is ready! +[2026-06-02 16:46:45,210][260776] All inference workers are ready! Signal rollout workers to start! +[2026-06-02 16:46:45,210][262583] EnvRunner 0-0 uses policy 0 +[2026-06-02 16:46:45,210][262585] EnvRunner 1-0 uses policy 0 +[2026-06-02 16:46:46,007][260776] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2026-06-02 16:46:48,100][262026] Signal inference workers to stop experience collection... +[2026-06-02 16:46:48,110][262582] InferenceWorker_p0-w0: stopping experience collection +[2026-06-02 16:46:50,241][262026] Signal inference workers to resume experience collection... +[2026-06-02 16:46:50,242][262582] InferenceWorker_p0-w0: resuming experience collection +[2026-06-02 16:46:50,599][262582] Updated weights for policy 0, policy_version 73 (0.0069) +[2026-06-02 16:46:50,902][262582] Updated weights for policy 0, policy_version 89 (0.0009) +[2026-06-02 16:46:51,007][260776] Fps is (10 sec: 6553.4, 60 sec: 6553.4, 300 sec: 6553.4). Total num frames: 32768. Throughput: 0: 13209.2. Samples: 66048. Policy #0 lag: (min: 53.0, avg: 53.0, max: 53.0) +[2026-06-02 16:46:51,008][260776] Avg episode reward: [(0, '-6.848')] +[2026-06-02 16:46:51,107][262582] Updated weights for policy 0, policy_version 99 (0.0009) +[2026-06-02 16:46:51,321][262582] Updated weights for policy 0, policy_version 109 (0.0009) +[2026-06-02 16:46:51,588][262582] Updated weights for policy 0, policy_version 123 (0.0009) +[2026-06-02 16:46:52,124][262582] Updated weights for policy 0, policy_version 137 (0.0008) +[2026-06-02 16:46:52,337][262582] Updated weights for policy 0, policy_version 147 (0.0008) +[2026-06-02 16:46:52,549][262582] Updated weights for policy 0, policy_version 157 (0.0007) +[2026-06-02 16:46:52,840][262582] Updated weights for policy 0, policy_version 173 (0.0009) +[2026-06-02 16:46:53,056][262582] Updated weights for policy 0, policy_version 183 (0.0009) +[2026-06-02 16:46:53,569][262582] Updated weights for policy 0, policy_version 193 (0.0008) +[2026-06-02 16:46:53,773][262582] Updated weights for policy 0, policy_version 203 (0.0004) +[2026-06-02 16:46:54,006][262582] Updated weights for policy 0, policy_version 216 (0.0007) +[2026-06-02 16:46:54,222][262582] Updated weights for policy 0, policy_version 226 (0.0004) +[2026-06-02 16:46:54,429][262582] Updated weights for policy 0, policy_version 236 (0.0004) +[2026-06-02 16:46:54,665][262582] Updated weights for policy 0, policy_version 249 (0.0008) +[2026-06-02 16:46:55,164][262582] Updated weights for policy 0, policy_version 262 (0.0009) +[2026-06-02 16:46:55,368][262582] Updated weights for policy 0, policy_version 272 (0.0009) +[2026-06-02 16:46:55,579][262582] Updated weights for policy 0, policy_version 282 (0.0008) +[2026-06-02 16:46:55,861][262582] Updated weights for policy 0, policy_version 297 (0.0008) +[2026-06-02 16:46:56,007][260776] Fps is (10 sec: 13106.8, 60 sec: 13106.8, 300 sec: 13106.8). Total num frames: 131072. Throughput: 0: 14873.2. Samples: 148736. Policy #0 lag: (min: 63.0, avg: 91.4, max: 127.0) +[2026-06-02 16:46:56,008][260776] Avg episode reward: [(0, '-5.555')] +[2026-06-02 16:46:56,073][262582] Updated weights for policy 0, policy_version 307 (0.0008) +[2026-06-02 16:46:56,289][262582] Updated weights for policy 0, policy_version 317 (0.0008) +[2026-06-02 16:46:56,351][262026] Saving new best policy, reward=-5.555! +[2026-06-02 16:46:56,804][262582] Updated weights for policy 0, policy_version 327 (0.0005) +[2026-06-02 16:46:57,078][262582] Updated weights for policy 0, policy_version 342 (0.0008) +[2026-06-02 16:46:57,292][262582] Updated weights for policy 0, policy_version 352 (0.0010) +[2026-06-02 16:46:57,584][262582] Updated weights for policy 0, policy_version 367 (0.0008) +[2026-06-02 16:46:57,786][262582] Updated weights for policy 0, policy_version 377 (0.0008) +[2026-06-02 16:46:58,255][262582] Updated weights for policy 0, policy_version 389 (0.0009) +[2026-06-02 16:46:58,459][262582] Updated weights for policy 0, policy_version 399 (0.0008) +[2026-06-02 16:46:58,673][262582] Updated weights for policy 0, policy_version 409 (0.0008) +[2026-06-02 16:46:58,973][262582] Updated weights for policy 0, policy_version 425 (0.0009) +[2026-06-02 16:46:59,187][262582] Updated weights for policy 0, policy_version 435 (0.0008) +[2026-06-02 16:46:59,401][262582] Updated weights for policy 0, policy_version 445 (0.0008) +[2026-06-02 16:46:59,969][262582] Updated weights for policy 0, policy_version 458 (0.0007) +[2026-06-02 16:47:00,158][262582] Updated weights for policy 0, policy_version 468 (0.0008) +[2026-06-02 16:47:00,373][262582] Updated weights for policy 0, policy_version 478 (0.0009) +[2026-06-02 16:47:00,578][262582] Updated weights for policy 0, policy_version 488 (0.0008) +[2026-06-02 16:47:00,782][262582] Updated weights for policy 0, policy_version 499 (0.0008) +[2026-06-02 16:47:00,982][262582] Updated weights for policy 0, policy_version 509 (0.0008) +[2026-06-02 16:47:01,007][260776] Fps is (10 sec: 19660.9, 60 sec: 15291.6, 300 sec: 15291.6). Total num frames: 229376. Throughput: 0: 18244.1. Samples: 273664. Policy #0 lag: (min: 63.0, avg: 93.7, max: 127.0) +[2026-06-02 16:47:01,008][260776] Avg episode reward: [(0, '-1.491')] +[2026-06-02 16:47:01,046][262026] Saving new best policy, reward=-1.491! +[2026-06-02 16:47:01,159][260776] Heartbeat connected on Batcher_0 +[2026-06-02 16:47:01,163][260776] Heartbeat connected on LearnerWorker_p0 +[2026-06-02 16:47:01,172][260776] Heartbeat connected on InferenceWorker_p0-w0 +[2026-06-02 16:47:01,178][260776] Heartbeat connected on RolloutWorker_w1 +[2026-06-02 16:47:01,192][260776] Heartbeat connected on RolloutWorker_w0 +[2026-06-02 16:47:01,522][262582] Updated weights for policy 0, policy_version 519 (0.0008) +[2026-06-02 16:47:01,711][262582] Updated weights for policy 0, policy_version 529 (0.0008) +[2026-06-02 16:47:01,911][262582] Updated weights for policy 0, policy_version 539 (0.0007) +[2026-06-02 16:47:02,105][262582] Updated weights for policy 0, policy_version 549 (0.0005) +[2026-06-02 16:47:02,295][262582] Updated weights for policy 0, policy_version 559 (0.0005) +[2026-06-02 16:47:02,488][262582] Updated weights for policy 0, policy_version 569 (0.0008) +[2026-06-02 16:47:03,047][262582] Updated weights for policy 0, policy_version 579 (0.0009) +[2026-06-02 16:47:03,228][262582] Updated weights for policy 0, policy_version 589 (0.0008) +[2026-06-02 16:47:03,424][262582] Updated weights for policy 0, policy_version 599 (0.0008) +[2026-06-02 16:47:03,622][262582] Updated weights for policy 0, policy_version 609 (0.0008) +[2026-06-02 16:47:03,819][262582] Updated weights for policy 0, policy_version 619 (0.0008) +[2026-06-02 16:47:04,014][262582] Updated weights for policy 0, policy_version 629 (0.0008) +[2026-06-02 16:47:04,214][262582] Updated weights for policy 0, policy_version 639 (0.0008) +[2026-06-02 16:47:04,815][262582] Updated weights for policy 0, policy_version 649 (0.0008) +[2026-06-02 16:47:05,013][262582] Updated weights for policy 0, policy_version 659 (0.0008) +[2026-06-02 16:47:05,213][262582] Updated weights for policy 0, policy_version 669 (0.0008) +[2026-06-02 16:47:05,417][262582] Updated weights for policy 0, policy_version 680 (0.0008) +[2026-06-02 16:47:05,612][262582] Updated weights for policy 0, policy_version 690 (0.0008) +[2026-06-02 16:47:05,792][262582] Updated weights for policy 0, policy_version 700 (0.0008) +[2026-06-02 16:47:06,007][260776] Fps is (10 sec: 22938.1, 60 sec: 18022.4, 300 sec: 18022.4). Total num frames: 360448. Throughput: 0: 16832.0. Samples: 336640. Policy #0 lag: (min: 25.0, avg: 63.9, max: 89.0) +[2026-06-02 16:47:06,008][260776] Avg episode reward: [(0, '3.780')] +[2026-06-02 16:47:06,013][262026] Saving new best policy, reward=3.780! +[2026-06-02 16:47:06,448][262582] Updated weights for policy 0, policy_version 710 (0.0008) +[2026-06-02 16:47:06,682][262582] Updated weights for policy 0, policy_version 723 (0.0010) +[2026-06-02 16:47:06,886][262582] Updated weights for policy 0, policy_version 734 (0.0008) +[2026-06-02 16:47:07,076][262582] Updated weights for policy 0, policy_version 744 (0.0008) +[2026-06-02 16:47:07,270][262582] Updated weights for policy 0, policy_version 754 (0.0009) +[2026-06-02 16:47:07,473][262582] Updated weights for policy 0, policy_version 764 (0.0008) +[2026-06-02 16:47:08,191][262582] Updated weights for policy 0, policy_version 774 (0.0008) +[2026-06-02 16:47:08,391][262582] Updated weights for policy 0, policy_version 784 (0.0008) +[2026-06-02 16:47:08,569][262582] Updated weights for policy 0, policy_version 794 (0.0008) +[2026-06-02 16:47:08,755][262582] Updated weights for policy 0, policy_version 804 (0.0008) +[2026-06-02 16:47:08,948][262582] Updated weights for policy 0, policy_version 814 (0.0008) +[2026-06-02 16:47:09,144][262582] Updated weights for policy 0, policy_version 825 (0.0008) +[2026-06-02 16:47:09,932][262582] Updated weights for policy 0, policy_version 835 (0.0009) +[2026-06-02 16:47:10,118][262582] Updated weights for policy 0, policy_version 846 (0.0008) +[2026-06-02 16:47:10,304][262582] Updated weights for policy 0, policy_version 856 (0.0008) +[2026-06-02 16:47:10,483][262582] Updated weights for policy 0, policy_version 866 (0.0009) +[2026-06-02 16:47:10,676][262582] Updated weights for policy 0, policy_version 876 (0.0009) +[2026-06-02 16:47:10,897][262582] Updated weights for policy 0, policy_version 888 (0.0008) +[2026-06-02 16:47:11,007][260776] Fps is (10 sec: 19660.9, 60 sec: 17039.3, 300 sec: 17039.3). Total num frames: 425984. Throughput: 0: 17935.3. Samples: 448384. Policy #0 lag: (min: 22.0, avg: 44.6, max: 86.0) +[2026-06-02 16:47:11,008][260776] Avg episode reward: [(0, '4.011')] +[2026-06-02 16:47:11,041][262026] Saving new best policy, reward=4.011! +[2026-06-02 16:47:11,746][262582] Updated weights for policy 0, policy_version 898 (0.0009) +[2026-06-02 16:47:11,922][262582] Updated weights for policy 0, policy_version 908 (0.0008) +[2026-06-02 16:47:12,097][262582] Updated weights for policy 0, policy_version 918 (0.0009) +[2026-06-02 16:47:12,302][262582] Updated weights for policy 0, policy_version 929 (0.0008) +[2026-06-02 16:47:12,485][262582] Updated weights for policy 0, policy_version 939 (0.0008) +[2026-06-02 16:47:12,676][262582] Updated weights for policy 0, policy_version 949 (0.0008) +[2026-06-02 16:47:12,863][262582] Updated weights for policy 0, policy_version 959 (0.0008) +[2026-06-02 16:47:13,641][262582] Updated weights for policy 0, policy_version 970 (0.0009) +[2026-06-02 16:47:13,824][262582] Updated weights for policy 0, policy_version 980 (0.0008) +[2026-06-02 16:47:14,031][262582] Updated weights for policy 0, policy_version 991 (0.0008) +[2026-06-02 16:47:14,213][262582] Updated weights for policy 0, policy_version 1001 (0.0005) +[2026-06-02 16:47:14,406][262582] Updated weights for policy 0, policy_version 1011 (0.0005) +[2026-06-02 16:47:14,586][262582] Updated weights for policy 0, policy_version 1021 (0.0005) +[2026-06-02 16:47:15,390][262582] Updated weights for policy 0, policy_version 1031 (0.0006) +[2026-06-02 16:47:15,603][262582] Updated weights for policy 0, policy_version 1042 (0.0007) +[2026-06-02 16:47:15,785][262582] Updated weights for policy 0, policy_version 1052 (0.0008) +[2026-06-02 16:47:15,984][262582] Updated weights for policy 0, policy_version 1063 (0.0008) +[2026-06-02 16:47:16,007][260776] Fps is (10 sec: 16384.0, 60 sec: 17476.2, 300 sec: 17476.2). Total num frames: 524288. Throughput: 0: 18867.2. Samples: 566016. Policy #0 lag: (min: 63.0, avg: 80.7, max: 127.0) +[2026-06-02 16:47:16,008][260776] Avg episode reward: [(0, '4.077')] +[2026-06-02 16:47:16,186][262582] Updated weights for policy 0, policy_version 1074 (0.0009) +[2026-06-02 16:47:16,372][262582] Updated weights for policy 0, policy_version 1084 (0.0008) +[2026-06-02 16:47:16,443][262026] Saving new best policy, reward=4.077! +[2026-06-02 16:47:17,173][262582] Updated weights for policy 0, policy_version 1094 (0.0009) +[2026-06-02 16:47:17,349][262582] Updated weights for policy 0, policy_version 1104 (0.0008) +[2026-06-02 16:47:17,539][262582] Updated weights for policy 0, policy_version 1114 (0.0009) +[2026-06-02 16:47:17,744][262582] Updated weights for policy 0, policy_version 1125 (0.0009) +[2026-06-02 16:47:17,922][262582] Updated weights for policy 0, policy_version 1135 (0.0008) +[2026-06-02 16:47:18,132][262582] Updated weights for policy 0, policy_version 1146 (0.0010) +[2026-06-02 16:47:18,978][262582] Updated weights for policy 0, policy_version 1157 (0.0008) +[2026-06-02 16:47:19,159][262582] Updated weights for policy 0, policy_version 1167 (0.0008) +[2026-06-02 16:47:19,353][262582] Updated weights for policy 0, policy_version 1177 (0.0005) +[2026-06-02 16:47:19,533][262582] Updated weights for policy 0, policy_version 1187 (0.0005) +[2026-06-02 16:47:19,727][262582] Updated weights for policy 0, policy_version 1197 (0.0006) +[2026-06-02 16:47:19,912][262582] Updated weights for policy 0, policy_version 1207 (0.0009) +[2026-06-02 16:47:20,742][262582] Updated weights for policy 0, policy_version 1218 (0.0009) +[2026-06-02 16:47:20,925][262582] Updated weights for policy 0, policy_version 1228 (0.0010) +[2026-06-02 16:47:21,007][260776] Fps is (10 sec: 19660.6, 60 sec: 17788.2, 300 sec: 17788.2). Total num frames: 622592. Throughput: 0: 17459.1. Samples: 611072. Policy #0 lag: (min: 63.0, avg: 76.8, max: 127.0) +[2026-06-02 16:47:21,008][260776] Avg episode reward: [(0, '4.637')] +[2026-06-02 16:47:21,102][262582] Updated weights for policy 0, policy_version 1238 (0.0008) +[2026-06-02 16:47:21,290][262582] Updated weights for policy 0, policy_version 1248 (0.0008) +[2026-06-02 16:47:21,485][262582] Updated weights for policy 0, policy_version 1258 (0.0009) +[2026-06-02 16:47:21,673][262582] Updated weights for policy 0, policy_version 1268 (0.0008) +[2026-06-02 16:47:21,879][262582] Updated weights for policy 0, policy_version 1279 (0.0008) +[2026-06-02 16:47:21,885][262026] Saving new best policy, reward=4.637! +[2026-06-02 16:47:22,726][262582] Updated weights for policy 0, policy_version 1290 (0.0009) +[2026-06-02 16:47:22,908][262582] Updated weights for policy 0, policy_version 1300 (0.0008) +[2026-06-02 16:47:23,099][262582] Updated weights for policy 0, policy_version 1310 (0.0008) +[2026-06-02 16:47:23,277][262582] Updated weights for policy 0, policy_version 1320 (0.0008) +[2026-06-02 16:47:23,470][262582] Updated weights for policy 0, policy_version 1330 (0.0008) +[2026-06-02 16:47:23,694][262582] Updated weights for policy 0, policy_version 1342 (0.0008) +[2026-06-02 16:47:24,508][262582] Updated weights for policy 0, policy_version 1352 (0.0009) +[2026-06-02 16:47:24,687][262582] Updated weights for policy 0, policy_version 1362 (0.0009) +[2026-06-02 16:47:24,881][262582] Updated weights for policy 0, policy_version 1372 (0.0009) +[2026-06-02 16:47:25,074][262582] Updated weights for policy 0, policy_version 1382 (0.0009) +[2026-06-02 16:47:25,281][262582] Updated weights for policy 0, policy_version 1393 (0.0009) +[2026-06-02 16:47:25,463][262582] Updated weights for policy 0, policy_version 1403 (0.0008) +[2026-06-02 16:47:26,007][260776] Fps is (10 sec: 19660.7, 60 sec: 18022.4, 300 sec: 18022.4). Total num frames: 720896. Throughput: 0: 18243.1. Samples: 729728. Policy #0 lag: (min: 63.0, avg: 79.0, max: 127.0) +[2026-06-02 16:47:26,009][260776] Avg episode reward: [(0, '4.943')] +[2026-06-02 16:47:26,015][262026] Saving new best policy, reward=4.943! +[2026-06-02 16:47:26,311][262582] Updated weights for policy 0, policy_version 1413 (0.0009) +[2026-06-02 16:47:26,494][262582] Updated weights for policy 0, policy_version 1423 (0.0008) +[2026-06-02 16:47:26,686][262582] Updated weights for policy 0, policy_version 1433 (0.0009) +[2026-06-02 16:47:26,869][262582] Updated weights for policy 0, policy_version 1443 (0.0009) +[2026-06-02 16:47:27,059][262582] Updated weights for policy 0, policy_version 1453 (0.0008) +[2026-06-02 16:47:27,261][262582] Updated weights for policy 0, policy_version 1464 (0.0009) +[2026-06-02 16:47:28,071][262582] Updated weights for policy 0, policy_version 1474 (0.0009) +[2026-06-02 16:47:28,235][262582] Updated weights for policy 0, policy_version 1484 (0.0009) +[2026-06-02 16:47:28,422][262582] Updated weights for policy 0, policy_version 1494 (0.0008) +[2026-06-02 16:47:28,628][262582] Updated weights for policy 0, policy_version 1505 (0.0008) +[2026-06-02 16:47:28,817][262582] Updated weights for policy 0, policy_version 1515 (0.0009) +[2026-06-02 16:47:29,014][262582] Updated weights for policy 0, policy_version 1525 (0.0009) +[2026-06-02 16:47:29,193][262582] Updated weights for policy 0, policy_version 1535 (0.0009) +[2026-06-02 16:47:30,032][262582] Updated weights for policy 0, policy_version 1546 (0.0009) +[2026-06-02 16:47:30,241][262582] Updated weights for policy 0, policy_version 1557 (0.0008) +[2026-06-02 16:47:30,418][262582] Updated weights for policy 0, policy_version 1567 (0.0008) +[2026-06-02 16:47:30,620][262582] Updated weights for policy 0, policy_version 1577 (0.0008) +[2026-06-02 16:47:30,829][262582] Updated weights for policy 0, policy_version 1588 (0.0008) +[2026-06-02 16:47:31,007][260776] Fps is (10 sec: 16384.2, 60 sec: 17476.2, 300 sec: 17476.2). Total num frames: 786432. Throughput: 0: 18520.2. Samples: 833408. Policy #0 lag: (min: 63.0, avg: 79.0, max: 127.0) +[2026-06-02 16:47:31,008][260776] Avg episode reward: [(0, '5.250')] +[2026-06-02 16:47:31,016][262582] Updated weights for policy 0, policy_version 1599 (0.0010) +[2026-06-02 16:47:31,037][262026] Saving new best policy, reward=5.250! +[2026-06-02 16:47:31,850][262582] Updated weights for policy 0, policy_version 1609 (0.0009) +[2026-06-02 16:47:32,046][262582] Updated weights for policy 0, policy_version 1620 (0.0008) +[2026-06-02 16:47:32,223][262582] Updated weights for policy 0, policy_version 1630 (0.0008) +[2026-06-02 16:47:32,474][262582] Updated weights for policy 0, policy_version 1643 (0.0011) +[2026-06-02 16:47:32,664][262582] Updated weights for policy 0, policy_version 1653 (0.0008) +[2026-06-02 16:47:32,864][262582] Updated weights for policy 0, policy_version 1664 (0.0009) +[2026-06-02 16:47:33,730][262582] Updated weights for policy 0, policy_version 1674 (0.0009) +[2026-06-02 16:47:33,908][262582] Updated weights for policy 0, policy_version 1684 (0.0009) +[2026-06-02 16:47:34,097][262582] Updated weights for policy 0, policy_version 1694 (0.0009) +[2026-06-02 16:47:34,281][262582] Updated weights for policy 0, policy_version 1704 (0.0009) +[2026-06-02 16:47:34,471][262582] Updated weights for policy 0, policy_version 1714 (0.0009) +[2026-06-02 16:47:34,657][262582] Updated weights for policy 0, policy_version 1724 (0.0008) +[2026-06-02 16:47:35,516][262582] Updated weights for policy 0, policy_version 1734 (0.0008) +[2026-06-02 16:47:35,722][262582] Updated weights for policy 0, policy_version 1746 (0.0008) +[2026-06-02 16:47:35,913][262582] Updated weights for policy 0, policy_version 1756 (0.0008) +[2026-06-02 16:47:36,007][260776] Fps is (10 sec: 16384.1, 60 sec: 17694.7, 300 sec: 17694.7). Total num frames: 884736. Throughput: 0: 18255.7. Samples: 887552. Policy #0 lag: (min: 63.0, avg: 78.1, max: 127.0) +[2026-06-02 16:47:36,008][260776] Avg episode reward: [(0, '5.836')] +[2026-06-02 16:47:36,119][262582] Updated weights for policy 0, policy_version 1767 (0.0008) +[2026-06-02 16:47:36,306][262582] Updated weights for policy 0, policy_version 1777 (0.0008) +[2026-06-02 16:47:36,490][262582] Updated weights for policy 0, policy_version 1787 (0.0008) +[2026-06-02 16:47:36,579][262026] Saving new best policy, reward=5.836! +[2026-06-02 16:47:37,305][262582] Updated weights for policy 0, policy_version 1797 (0.0008) +[2026-06-02 16:47:37,484][262582] Updated weights for policy 0, policy_version 1807 (0.0008) +[2026-06-02 16:47:37,712][262582] Updated weights for policy 0, policy_version 1819 (0.0008) +[2026-06-02 16:47:37,922][262582] Updated weights for policy 0, policy_version 1830 (0.0008) +[2026-06-02 16:47:38,110][262582] Updated weights for policy 0, policy_version 1840 (0.0008) +[2026-06-02 16:47:38,337][262582] Updated weights for policy 0, policy_version 1852 (0.0008) +[2026-06-02 16:47:39,201][262582] Updated weights for policy 0, policy_version 1862 (0.0008) +[2026-06-02 16:47:39,399][262582] Updated weights for policy 0, policy_version 1873 (0.0008) +[2026-06-02 16:47:39,588][262582] Updated weights for policy 0, policy_version 1883 (0.0008) +[2026-06-02 16:47:39,777][262582] Updated weights for policy 0, policy_version 1893 (0.0009) +[2026-06-02 16:47:39,978][262582] Updated weights for policy 0, policy_version 1903 (0.0008) +[2026-06-02 16:47:40,160][262582] Updated weights for policy 0, policy_version 1913 (0.0008) +[2026-06-02 16:47:41,007][260776] Fps is (10 sec: 19660.5, 60 sec: 17873.4, 300 sec: 17873.4). Total num frames: 983040. Throughput: 0: 18796.1. Samples: 994560. Policy #0 lag: (min: 29.0, avg: 42.8, max: 93.0) +[2026-06-02 16:47:41,009][260776] Avg episode reward: [(0, '6.676')] +[2026-06-02 16:47:41,018][262582] Updated weights for policy 0, policy_version 1924 (0.0009) +[2026-06-02 16:47:41,200][262582] Updated weights for policy 0, policy_version 1934 (0.0008) +[2026-06-02 16:47:41,411][262582] Updated weights for policy 0, policy_version 1945 (0.0009) +[2026-06-02 16:47:41,634][262582] Updated weights for policy 0, policy_version 1957 (0.0009) +[2026-06-02 16:47:41,822][262582] Updated weights for policy 0, policy_version 1967 (0.0008) +[2026-06-02 16:47:42,010][262582] Updated weights for policy 0, policy_version 1977 (0.0008) +[2026-06-02 16:47:42,136][262026] Saving new best policy, reward=6.676! +[2026-06-02 16:47:42,888][262582] Updated weights for policy 0, policy_version 1987 (0.0009) +[2026-06-02 16:47:43,105][262582] Updated weights for policy 0, policy_version 1999 (0.0009) +[2026-06-02 16:47:43,353][262582] Updated weights for policy 0, policy_version 2012 (0.0008) +[2026-06-02 16:47:43,536][262582] Updated weights for policy 0, policy_version 2022 (0.0008) +[2026-06-02 16:47:43,754][262582] Updated weights for policy 0, policy_version 2033 (0.0009) +[2026-06-02 16:47:43,937][262582] Updated weights for policy 0, policy_version 2043 (0.0008) +[2026-06-02 16:47:44,794][262582] Updated weights for policy 0, policy_version 2054 (0.0008) +[2026-06-02 16:47:44,960][262582] Updated weights for policy 0, policy_version 2064 (0.0008) +[2026-06-02 16:47:45,161][262582] Updated weights for policy 0, policy_version 2074 (0.0008) +[2026-06-02 16:47:45,346][262582] Updated weights for policy 0, policy_version 2084 (0.0009) +[2026-06-02 16:47:45,555][262582] Updated weights for policy 0, policy_version 2095 (0.0008) +[2026-06-02 16:47:45,751][262582] Updated weights for policy 0, policy_version 2105 (0.0008) +[2026-06-02 16:47:46,007][260776] Fps is (10 sec: 19660.9, 60 sec: 18022.4, 300 sec: 18022.4). Total num frames: 1081344. Throughput: 0: 18298.3. Samples: 1097088. Policy #0 lag: (min: 43.0, avg: 93.0, max: 107.0) +[2026-06-02 16:47:46,008][260776] Avg episode reward: [(0, '6.948')] +[2026-06-02 16:47:46,014][262026] Saving new best policy, reward=6.948! +[2026-06-02 16:47:46,607][262582] Updated weights for policy 0, policy_version 2116 (0.0008) +[2026-06-02 16:47:46,829][262582] Updated weights for policy 0, policy_version 2128 (0.0008) +[2026-06-02 16:47:47,018][262582] Updated weights for policy 0, policy_version 2138 (0.0008) +[2026-06-02 16:47:47,225][262582] Updated weights for policy 0, policy_version 2149 (0.0008) +[2026-06-02 16:47:47,420][262582] Updated weights for policy 0, policy_version 2159 (0.0008) +[2026-06-02 16:47:47,613][262582] Updated weights for policy 0, policy_version 2169 (0.0009) +[2026-06-02 16:47:48,498][262582] Updated weights for policy 0, policy_version 2181 (0.0008) +[2026-06-02 16:47:48,700][262582] Updated weights for policy 0, policy_version 2192 (0.0008) +[2026-06-02 16:47:48,908][262582] Updated weights for policy 0, policy_version 2203 (0.0008) +[2026-06-02 16:47:49,092][262582] Updated weights for policy 0, policy_version 2213 (0.0008) +[2026-06-02 16:47:49,280][262582] Updated weights for policy 0, policy_version 2223 (0.0009) +[2026-06-02 16:47:49,474][262582] Updated weights for policy 0, policy_version 2233 (0.0008) +[2026-06-02 16:47:50,314][262582] Updated weights for policy 0, policy_version 2243 (0.0008) +[2026-06-02 16:47:50,493][262582] Updated weights for policy 0, policy_version 2253 (0.0008) +[2026-06-02 16:47:50,699][262582] Updated weights for policy 0, policy_version 2264 (0.0008) +[2026-06-02 16:47:50,882][262582] Updated weights for policy 0, policy_version 2274 (0.0008) +[2026-06-02 16:47:51,007][260776] Fps is (10 sec: 16384.3, 60 sec: 18568.6, 300 sec: 17644.3). Total num frames: 1146880. Throughput: 0: 18198.8. Samples: 1155584. Policy #0 lag: (min: 50.0, avg: 101.5, max: 114.0) +[2026-06-02 16:47:51,008][260776] Avg episode reward: [(0, '7.709')] +[2026-06-02 16:47:51,065][262582] Updated weights for policy 0, policy_version 2284 (0.0008) +[2026-06-02 16:47:51,268][262582] Updated weights for policy 0, policy_version 2295 (0.0008) +[2026-06-02 16:47:51,443][262026] Saving new best policy, reward=7.709! +[2026-06-02 16:47:52,135][262582] Updated weights for policy 0, policy_version 2305 (0.0008) +[2026-06-02 16:47:52,347][262582] Updated weights for policy 0, policy_version 2316 (0.0008) +[2026-06-02 16:47:52,521][262582] Updated weights for policy 0, policy_version 2326 (0.0008) +[2026-06-02 16:47:52,733][262582] Updated weights for policy 0, policy_version 2337 (0.0008) +[2026-06-02 16:47:52,943][262582] Updated weights for policy 0, policy_version 2348 (0.0008) +[2026-06-02 16:47:53,151][262582] Updated weights for policy 0, policy_version 2359 (0.0008) +[2026-06-02 16:47:53,997][262582] Updated weights for policy 0, policy_version 2371 (0.0009) +[2026-06-02 16:47:54,232][262582] Updated weights for policy 0, policy_version 2384 (0.0008) +[2026-06-02 16:47:54,474][262582] Updated weights for policy 0, policy_version 2397 (0.0008) +[2026-06-02 16:47:54,660][262582] Updated weights for policy 0, policy_version 2407 (0.0008) +[2026-06-02 16:47:54,855][262582] Updated weights for policy 0, policy_version 2417 (0.0008) +[2026-06-02 16:47:55,041][262582] Updated weights for policy 0, policy_version 2427 (0.0008) +[2026-06-02 16:47:55,866][262582] Updated weights for policy 0, policy_version 2437 (0.0008) +[2026-06-02 16:47:56,007][260776] Fps is (10 sec: 16383.7, 60 sec: 18568.6, 300 sec: 17788.3). Total num frames: 1245184. Throughput: 0: 17996.8. Samples: 1258240. Policy #0 lag: (min: 63.0, avg: 77.8, max: 127.0) +[2026-06-02 16:47:56,008][260776] Avg episode reward: [(0, '9.191')] +[2026-06-02 16:47:56,047][262582] Updated weights for policy 0, policy_version 2447 (0.0008) +[2026-06-02 16:47:56,227][262582] Updated weights for policy 0, policy_version 2457 (0.0008) +[2026-06-02 16:47:56,414][262582] Updated weights for policy 0, policy_version 2467 (0.0008) +[2026-06-02 16:47:56,625][262582] Updated weights for policy 0, policy_version 2478 (0.0008) +[2026-06-02 16:47:56,819][262582] Updated weights for policy 0, policy_version 2488 (0.0008) +[2026-06-02 16:47:56,959][262026] Saving new best policy, reward=9.191! +[2026-06-02 16:47:57,675][262582] Updated weights for policy 0, policy_version 2499 (0.0008) +[2026-06-02 16:47:57,846][262582] Updated weights for policy 0, policy_version 2509 (0.0008) +[2026-06-02 16:47:58,023][262582] Updated weights for policy 0, policy_version 2519 (0.0008) +[2026-06-02 16:47:58,239][262582] Updated weights for policy 0, policy_version 2530 (0.0008) +[2026-06-02 16:47:58,440][262582] Updated weights for policy 0, policy_version 2541 (0.0008) +[2026-06-02 16:47:58,655][262582] Updated weights for policy 0, policy_version 2552 (0.0008) +[2026-06-02 16:47:59,555][262582] Updated weights for policy 0, policy_version 2564 (0.0009) +[2026-06-02 16:47:59,772][262582] Updated weights for policy 0, policy_version 2576 (0.0008) +[2026-06-02 16:47:59,973][262582] Updated weights for policy 0, policy_version 2587 (0.0008) +[2026-06-02 16:48:00,184][262582] Updated weights for policy 0, policy_version 2598 (0.0008) +[2026-06-02 16:48:00,416][262582] Updated weights for policy 0, policy_version 2610 (0.0009) +[2026-06-02 16:48:00,606][262582] Updated weights for policy 0, policy_version 2620 (0.0008) +[2026-06-02 16:48:01,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.6, 300 sec: 17913.2). Total num frames: 1343488. Throughput: 0: 17649.8. Samples: 1360256. Policy #0 lag: (min: 63.0, avg: 75.6, max: 127.0) +[2026-06-02 16:48:01,008][260776] Avg episode reward: [(0, '9.591')] +[2026-06-02 16:48:01,012][262026] Saving new best policy, reward=9.591! +[2026-06-02 16:48:01,469][262582] Updated weights for policy 0, policy_version 2631 (0.0008) +[2026-06-02 16:48:01,657][262582] Updated weights for policy 0, policy_version 2641 (0.0008) +[2026-06-02 16:48:01,881][262582] Updated weights for policy 0, policy_version 2653 (0.0008) +[2026-06-02 16:48:02,089][262582] Updated weights for policy 0, policy_version 2664 (0.0008) +[2026-06-02 16:48:02,313][262582] Updated weights for policy 0, policy_version 2676 (0.0008) +[2026-06-02 16:48:02,515][262582] Updated weights for policy 0, policy_version 2687 (0.0009) +[2026-06-02 16:48:03,389][262582] Updated weights for policy 0, policy_version 2699 (0.0008) +[2026-06-02 16:48:03,589][262582] Updated weights for policy 0, policy_version 2710 (0.0008) +[2026-06-02 16:48:03,774][262582] Updated weights for policy 0, policy_version 2720 (0.0008) +[2026-06-02 16:48:03,957][262582] Updated weights for policy 0, policy_version 2730 (0.0008) +[2026-06-02 16:48:04,188][262582] Updated weights for policy 0, policy_version 2742 (0.0008) +[2026-06-02 16:48:04,375][262582] Updated weights for policy 0, policy_version 2752 (0.0008) +[2026-06-02 16:48:05,246][262582] Updated weights for policy 0, policy_version 2762 (0.0008) +[2026-06-02 16:48:05,438][262582] Updated weights for policy 0, policy_version 2772 (0.0008) +[2026-06-02 16:48:05,621][262582] Updated weights for policy 0, policy_version 2782 (0.0008) +[2026-06-02 16:48:05,834][262582] Updated weights for policy 0, policy_version 2793 (0.0008) +[2026-06-02 16:48:06,007][260776] Fps is (10 sec: 16384.2, 60 sec: 17476.3, 300 sec: 17612.8). Total num frames: 1409024. Throughput: 0: 17974.1. Samples: 1419904. Policy #0 lag: (min: 63.0, avg: 75.6, max: 127.0) +[2026-06-02 16:48:06,008][260776] Avg episode reward: [(0, '11.431')] +[2026-06-02 16:48:06,041][262582] Updated weights for policy 0, policy_version 2804 (0.0008) +[2026-06-02 16:48:06,249][262582] Updated weights for policy 0, policy_version 2815 (0.0008) +[2026-06-02 16:48:06,264][262026] Saving new best policy, reward=11.431! +[2026-06-02 16:48:07,065][262582] Updated weights for policy 0, policy_version 2825 (0.0008) +[2026-06-02 16:48:07,288][262582] Updated weights for policy 0, policy_version 2837 (0.0008) +[2026-06-02 16:48:07,496][262582] Updated weights for policy 0, policy_version 2848 (0.0008) +[2026-06-02 16:48:07,724][262582] Updated weights for policy 0, policy_version 2860 (0.0008) +[2026-06-02 16:48:07,955][262582] Updated weights for policy 0, policy_version 2872 (0.0008) +[2026-06-02 16:48:08,795][262582] Updated weights for policy 0, policy_version 2884 (0.0008) +[2026-06-02 16:48:08,969][262582] Updated weights for policy 0, policy_version 2894 (0.0008) +[2026-06-02 16:48:09,201][262582] Updated weights for policy 0, policy_version 2906 (0.0008) +[2026-06-02 16:48:09,451][262582] Updated weights for policy 0, policy_version 2919 (0.0009) +[2026-06-02 16:48:09,664][262582] Updated weights for policy 0, policy_version 2930 (0.0008) +[2026-06-02 16:48:09,878][262582] Updated weights for policy 0, policy_version 2941 (0.0008) +[2026-06-02 16:48:10,686][262582] Updated weights for policy 0, policy_version 2952 (0.0008) +[2026-06-02 16:48:10,914][262582] Updated weights for policy 0, policy_version 2964 (0.0008) +[2026-06-02 16:48:11,007][260776] Fps is (10 sec: 16384.0, 60 sec: 18022.4, 300 sec: 17733.3). Total num frames: 1507328. Throughput: 0: 17618.5. Samples: 1522560. Policy #0 lag: (min: 63.0, avg: 76.2, max: 127.0) +[2026-06-02 16:48:11,008][260776] Avg episode reward: [(0, '15.327')] +[2026-06-02 16:48:11,103][262582] Updated weights for policy 0, policy_version 2974 (0.0008) +[2026-06-02 16:48:11,326][262582] Updated weights for policy 0, policy_version 2985 (0.0008) +[2026-06-02 16:48:11,517][262582] Updated weights for policy 0, policy_version 2995 (0.0008) +[2026-06-02 16:48:11,706][262582] Updated weights for policy 0, policy_version 3005 (0.0008) +[2026-06-02 16:48:11,760][262026] Saving new best policy, reward=15.327! +[2026-06-02 16:48:12,542][262582] Updated weights for policy 0, policy_version 3015 (0.0008) +[2026-06-02 16:48:12,726][262582] Updated weights for policy 0, policy_version 3025 (0.0008) +[2026-06-02 16:48:12,915][262582] Updated weights for policy 0, policy_version 3035 (0.0008) +[2026-06-02 16:48:13,122][262582] Updated weights for policy 0, policy_version 3046 (0.0008) +[2026-06-02 16:48:13,316][262582] Updated weights for policy 0, policy_version 3056 (0.0008) +[2026-06-02 16:48:13,545][262582] Updated weights for policy 0, policy_version 3068 (0.0008) +[2026-06-02 16:48:14,374][262582] Updated weights for policy 0, policy_version 3080 (0.0008) +[2026-06-02 16:48:14,592][262582] Updated weights for policy 0, policy_version 3091 (0.0008) +[2026-06-02 16:48:14,818][262582] Updated weights for policy 0, policy_version 3103 (0.0008) +[2026-06-02 16:48:15,046][262582] Updated weights for policy 0, policy_version 3115 (0.0008) +[2026-06-02 16:48:15,233][262582] Updated weights for policy 0, policy_version 3125 (0.0008) +[2026-06-02 16:48:15,431][262582] Updated weights for policy 0, policy_version 3135 (0.0008) +[2026-06-02 16:48:16,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18022.4, 300 sec: 17840.3). Total num frames: 1605632. Throughput: 0: 17646.9. Samples: 1627520. Policy #0 lag: (min: 63.0, avg: 76.3, max: 127.0) +[2026-06-02 16:48:16,008][260776] Avg episode reward: [(0, '16.058')] +[2026-06-02 16:48:16,221][262582] Updated weights for policy 0, policy_version 3145 (0.0008) +[2026-06-02 16:48:16,408][262582] Updated weights for policy 0, policy_version 3155 (0.0010) +[2026-06-02 16:48:16,602][262582] Updated weights for policy 0, policy_version 3165 (0.0008) +[2026-06-02 16:48:16,806][262582] Updated weights for policy 0, policy_version 3176 (0.0008) +[2026-06-02 16:48:17,008][262582] Updated weights for policy 0, policy_version 3186 (0.0008) +[2026-06-02 16:48:17,243][262582] Updated weights for policy 0, policy_version 3198 (0.0008) +[2026-06-02 16:48:17,267][262026] Saving new best policy, reward=16.058! +[2026-06-02 16:48:18,025][262582] Updated weights for policy 0, policy_version 3208 (0.0008) +[2026-06-02 16:48:18,242][262582] Updated weights for policy 0, policy_version 3220 (0.0008) +[2026-06-02 16:48:18,455][262582] Updated weights for policy 0, policy_version 3231 (0.0008) +[2026-06-02 16:48:18,646][262582] Updated weights for policy 0, policy_version 3241 (0.0008) +[2026-06-02 16:48:18,855][262582] Updated weights for policy 0, policy_version 3252 (0.0008) +[2026-06-02 16:48:19,069][262582] Updated weights for policy 0, policy_version 3263 (0.0008) +[2026-06-02 16:48:19,883][262582] Updated weights for policy 0, policy_version 3273 (0.0008) +[2026-06-02 16:48:20,090][262582] Updated weights for policy 0, policy_version 3284 (0.0008) +[2026-06-02 16:48:20,280][262582] Updated weights for policy 0, policy_version 3294 (0.0008) +[2026-06-02 16:48:20,495][262582] Updated weights for policy 0, policy_version 3305 (0.0008) +[2026-06-02 16:48:20,719][262582] Updated weights for policy 0, policy_version 3317 (0.0009) +[2026-06-02 16:48:20,918][262582] Updated weights for policy 0, policy_version 3327 (0.0009) +[2026-06-02 16:48:21,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18022.4, 300 sec: 17936.2). Total num frames: 1703936. Throughput: 0: 17746.5. Samples: 1686144. Policy #0 lag: (min: 12.0, avg: 26.3, max: 76.0) +[2026-06-02 16:48:21,008][260776] Avg episode reward: [(0, '18.856')] +[2026-06-02 16:48:21,013][262026] Saving new best policy, reward=18.856! +[2026-06-02 16:48:21,710][262582] Updated weights for policy 0, policy_version 3337 (0.0008) +[2026-06-02 16:48:21,894][262582] Updated weights for policy 0, policy_version 3347 (0.0009) +[2026-06-02 16:48:22,087][262582] Updated weights for policy 0, policy_version 3357 (0.0008) +[2026-06-02 16:48:22,296][262582] Updated weights for policy 0, policy_version 3368 (0.0008) +[2026-06-02 16:48:22,526][262582] Updated weights for policy 0, policy_version 3380 (0.0008) +[2026-06-02 16:48:22,716][262582] Updated weights for policy 0, policy_version 3390 (0.0008) +[2026-06-02 16:48:23,529][262582] Updated weights for policy 0, policy_version 3401 (0.0008) +[2026-06-02 16:48:23,715][262582] Updated weights for policy 0, policy_version 3411 (0.0008) +[2026-06-02 16:48:23,955][262582] Updated weights for policy 0, policy_version 3423 (0.0008) +[2026-06-02 16:48:24,159][262582] Updated weights for policy 0, policy_version 3434 (0.0008) +[2026-06-02 16:48:24,349][262582] Updated weights for policy 0, policy_version 3444 (0.0008) +[2026-06-02 16:48:24,545][262582] Updated weights for policy 0, policy_version 3454 (0.0008) +[2026-06-02 16:48:25,314][262582] Updated weights for policy 0, policy_version 3464 (0.0008) +[2026-06-02 16:48:25,497][262582] Updated weights for policy 0, policy_version 3474 (0.0008) +[2026-06-02 16:48:25,713][262582] Updated weights for policy 0, policy_version 3485 (0.0008) +[2026-06-02 16:48:25,900][262582] Updated weights for policy 0, policy_version 3495 (0.0008) +[2026-06-02 16:48:26,007][260776] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17694.7). Total num frames: 1769472. Throughput: 0: 17695.4. Samples: 1790848. Policy #0 lag: (min: 63.0, avg: 77.2, max: 127.0) +[2026-06-02 16:48:26,008][260776] Avg episode reward: [(0, '21.099')] +[2026-06-02 16:48:26,085][262582] Updated weights for policy 0, policy_version 3505 (0.0008) +[2026-06-02 16:48:26,285][262582] Updated weights for policy 0, policy_version 3515 (0.0008) +[2026-06-02 16:48:26,373][262026] Saving new best policy, reward=21.099! +[2026-06-02 16:48:27,043][262582] Updated weights for policy 0, policy_version 3525 (0.0008) +[2026-06-02 16:48:27,245][262582] Updated weights for policy 0, policy_version 3536 (0.0008) +[2026-06-02 16:48:27,456][262582] Updated weights for policy 0, policy_version 3547 (0.0008) +[2026-06-02 16:48:27,643][262582] Updated weights for policy 0, policy_version 3557 (0.0008) +[2026-06-02 16:48:27,840][262582] Updated weights for policy 0, policy_version 3567 (0.0009) +[2026-06-02 16:48:28,052][262582] Updated weights for policy 0, policy_version 3578 (0.0008) +[2026-06-02 16:48:28,828][262582] Updated weights for policy 0, policy_version 3588 (0.0009) +[2026-06-02 16:48:29,047][262582] Updated weights for policy 0, policy_version 3600 (0.0008) +[2026-06-02 16:48:29,230][262582] Updated weights for policy 0, policy_version 3610 (0.0008) +[2026-06-02 16:48:29,461][262582] Updated weights for policy 0, policy_version 3622 (0.0008) +[2026-06-02 16:48:29,677][262582] Updated weights for policy 0, policy_version 3633 (0.0008) +[2026-06-02 16:48:29,872][262582] Updated weights for policy 0, policy_version 3643 (0.0008) +[2026-06-02 16:48:30,643][262582] Updated weights for policy 0, policy_version 3653 (0.0009) +[2026-06-02 16:48:30,842][262582] Updated weights for policy 0, policy_version 3664 (0.0008) +[2026-06-02 16:48:31,007][260776] Fps is (10 sec: 16384.0, 60 sec: 18022.4, 300 sec: 17788.3). Total num frames: 1867776. Throughput: 0: 17928.5. Samples: 1903872. Policy #0 lag: (min: 63.0, avg: 78.1, max: 127.0) +[2026-06-02 16:48:31,008][260776] Avg episode reward: [(0, '27.663')] +[2026-06-02 16:48:31,035][262582] Updated weights for policy 0, policy_version 3674 (0.0008) +[2026-06-02 16:48:31,231][262582] Updated weights for policy 0, policy_version 3684 (0.0008) +[2026-06-02 16:48:31,425][262582] Updated weights for policy 0, policy_version 3694 (0.0009) +[2026-06-02 16:48:31,616][262582] Updated weights for policy 0, policy_version 3704 (0.0008) +[2026-06-02 16:48:31,760][262026] Saving new best policy, reward=27.663! +[2026-06-02 16:48:32,403][262582] Updated weights for policy 0, policy_version 3714 (0.0009) +[2026-06-02 16:48:32,624][262582] Updated weights for policy 0, policy_version 3726 (0.0008) +[2026-06-02 16:48:32,821][262582] Updated weights for policy 0, policy_version 3736 (0.0009) +[2026-06-02 16:48:33,008][262582] Updated weights for policy 0, policy_version 3746 (0.0008) +[2026-06-02 16:48:33,207][262582] Updated weights for policy 0, policy_version 3756 (0.0009) +[2026-06-02 16:48:33,395][262582] Updated weights for policy 0, policy_version 3766 (0.0008) +[2026-06-02 16:48:34,191][262582] Updated weights for policy 0, policy_version 3777 (0.0008) +[2026-06-02 16:48:34,371][262582] Updated weights for policy 0, policy_version 3787 (0.0008) +[2026-06-02 16:48:34,584][262582] Updated weights for policy 0, policy_version 3798 (0.0008) +[2026-06-02 16:48:34,801][262582] Updated weights for policy 0, policy_version 3809 (0.0008) +[2026-06-02 16:48:34,984][262582] Updated weights for policy 0, policy_version 3819 (0.0008) +[2026-06-02 16:48:35,183][262582] Updated weights for policy 0, policy_version 3829 (0.0009) +[2026-06-02 16:48:36,007][260776] Fps is (10 sec: 19660.7, 60 sec: 18022.4, 300 sec: 17873.4). Total num frames: 1966080. Throughput: 0: 17772.1. Samples: 1955328. Policy #0 lag: (min: 32.0, avg: 73.2, max: 92.0) +[2026-06-02 16:48:36,008][260776] Avg episode reward: [(0, '32.653')] +[2026-06-02 16:48:36,019][262582] Updated weights for policy 0, policy_version 3842 (0.0008) +[2026-06-02 16:48:36,209][262582] Updated weights for policy 0, policy_version 3853 (0.0008) +[2026-06-02 16:48:36,417][262582] Updated weights for policy 0, policy_version 3864 (0.0008) +[2026-06-02 16:48:36,605][262582] Updated weights for policy 0, policy_version 3874 (0.0008) +[2026-06-02 16:48:36,806][262582] Updated weights for policy 0, policy_version 3884 (0.0010) +[2026-06-02 16:48:37,012][262582] Updated weights for policy 0, policy_version 3895 (0.0008) +[2026-06-02 16:48:37,185][262026] Saving new best policy, reward=32.653! +[2026-06-02 16:48:37,772][262582] Updated weights for policy 0, policy_version 3905 (0.0008) +[2026-06-02 16:48:37,961][262582] Updated weights for policy 0, policy_version 3915 (0.0008) +[2026-06-02 16:48:38,147][262582] Updated weights for policy 0, policy_version 3925 (0.0008) +[2026-06-02 16:48:38,355][262582] Updated weights for policy 0, policy_version 3936 (0.0008) +[2026-06-02 16:48:38,562][262582] Updated weights for policy 0, policy_version 3946 (0.0008) +[2026-06-02 16:48:38,748][262582] Updated weights for policy 0, policy_version 3956 (0.0008) +[2026-06-02 16:48:38,943][262582] Updated weights for policy 0, policy_version 3966 (0.0008) +[2026-06-02 16:48:39,744][262582] Updated weights for policy 0, policy_version 3978 (0.0008) +[2026-06-02 16:48:39,947][262582] Updated weights for policy 0, policy_version 3988 (0.0009) +[2026-06-02 16:48:40,158][262582] Updated weights for policy 0, policy_version 3999 (0.0008) +[2026-06-02 16:48:40,369][262582] Updated weights for policy 0, policy_version 4010 (0.0008) +[2026-06-02 16:48:40,572][262582] Updated weights for policy 0, policy_version 4021 (0.0008) +[2026-06-02 16:48:40,772][262582] Updated weights for policy 0, policy_version 4031 (0.0008) +[2026-06-02 16:48:41,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18022.5, 300 sec: 17951.2). Total num frames: 2064384. Throughput: 0: 17999.7. Samples: 2068224. Policy #0 lag: (min: 39.0, avg: 54.2, max: 103.0) +[2026-06-02 16:48:41,008][260776] Avg episode reward: [(0, '43.248')] +[2026-06-02 16:48:41,013][262026] Saving new best policy, reward=43.248! +[2026-06-02 16:48:41,539][262582] Updated weights for policy 0, policy_version 4041 (0.0008) +[2026-06-02 16:48:41,730][262582] Updated weights for policy 0, policy_version 4051 (0.0008) +[2026-06-02 16:48:41,925][262582] Updated weights for policy 0, policy_version 4061 (0.0009) +[2026-06-02 16:48:42,120][262582] Updated weights for policy 0, policy_version 4071 (0.0008) +[2026-06-02 16:48:42,317][262582] Updated weights for policy 0, policy_version 4081 (0.0008) +[2026-06-02 16:48:42,511][262582] Updated weights for policy 0, policy_version 4091 (0.0008) +[2026-06-02 16:48:43,224][262582] Updated weights for policy 0, policy_version 4101 (0.0008) +[2026-06-02 16:48:43,432][262582] Updated weights for policy 0, policy_version 4112 (0.0008) +[2026-06-02 16:48:43,634][262582] Updated weights for policy 0, policy_version 4122 (0.0008) +[2026-06-02 16:48:43,823][262582] Updated weights for policy 0, policy_version 4132 (0.0008) +[2026-06-02 16:48:44,018][262582] Updated weights for policy 0, policy_version 4142 (0.0008) +[2026-06-02 16:48:44,208][262582] Updated weights for policy 0, policy_version 4152 (0.0008) +[2026-06-02 16:48:44,982][262582] Updated weights for policy 0, policy_version 4164 (0.0008) +[2026-06-02 16:48:45,171][262582] Updated weights for policy 0, policy_version 4174 (0.0008) +[2026-06-02 16:48:45,365][262582] Updated weights for policy 0, policy_version 4184 (0.0009) +[2026-06-02 16:48:45,558][262582] Updated weights for policy 0, policy_version 4194 (0.0009) +[2026-06-02 16:48:45,776][262582] Updated weights for policy 0, policy_version 4205 (0.0009) +[2026-06-02 16:48:45,978][262582] Updated weights for policy 0, policy_version 4216 (0.0008) +[2026-06-02 16:48:46,007][260776] Fps is (10 sec: 16384.0, 60 sec: 17476.3, 300 sec: 17749.3). Total num frames: 2129920. Throughput: 0: 18204.4. Samples: 2179456. Policy #0 lag: (min: 41.0, avg: 72.9, max: 103.0) +[2026-06-02 16:48:46,008][260776] Avg episode reward: [(0, '46.912')] +[2026-06-02 16:48:46,131][262026] Saving new best policy, reward=46.912! +[2026-06-02 16:48:46,713][262582] Updated weights for policy 0, policy_version 4226 (0.0008) +[2026-06-02 16:48:46,893][262582] Updated weights for policy 0, policy_version 4236 (0.0008) +[2026-06-02 16:48:47,085][262582] Updated weights for policy 0, policy_version 4246 (0.0008) +[2026-06-02 16:48:47,274][262582] Updated weights for policy 0, policy_version 4256 (0.0009) +[2026-06-02 16:48:47,472][262582] Updated weights for policy 0, policy_version 4266 (0.0008) +[2026-06-02 16:48:47,667][262582] Updated weights for policy 0, policy_version 4276 (0.0008) +[2026-06-02 16:48:47,864][262582] Updated weights for policy 0, policy_version 4286 (0.0008) +[2026-06-02 16:48:48,587][262582] Updated weights for policy 0, policy_version 4296 (0.0009) +[2026-06-02 16:48:48,784][262582] Updated weights for policy 0, policy_version 4306 (0.0008) +[2026-06-02 16:48:48,986][262582] Updated weights for policy 0, policy_version 4316 (0.0009) +[2026-06-02 16:48:49,188][262582] Updated weights for policy 0, policy_version 4327 (0.0008) +[2026-06-02 16:48:49,393][262582] Updated weights for policy 0, policy_version 4337 (0.0009) +[2026-06-02 16:48:49,579][262582] Updated weights for policy 0, policy_version 4347 (0.0008) +[2026-06-02 16:48:50,310][262582] Updated weights for policy 0, policy_version 4357 (0.0008) +[2026-06-02 16:48:50,523][262582] Updated weights for policy 0, policy_version 4368 (0.0009) +[2026-06-02 16:48:50,740][262582] Updated weights for policy 0, policy_version 4379 (0.0008) +[2026-06-02 16:48:50,933][262582] Updated weights for policy 0, policy_version 4389 (0.0009) +[2026-06-02 16:48:51,007][260776] Fps is (10 sec: 16383.9, 60 sec: 18022.4, 300 sec: 17825.8). Total num frames: 2228224. Throughput: 0: 18136.2. Samples: 2236032. Policy #0 lag: (min: 23.0, avg: 37.9, max: 87.0) +[2026-06-02 16:48:51,008][260776] Avg episode reward: [(0, '47.724')] +[2026-06-02 16:48:51,128][262582] Updated weights for policy 0, policy_version 4399 (0.0009) +[2026-06-02 16:48:51,330][262582] Updated weights for policy 0, policy_version 4409 (0.0008) +[2026-06-02 16:48:51,448][262026] Saving new best policy, reward=47.724! +[2026-06-02 16:48:52,074][262582] Updated weights for policy 0, policy_version 4420 (0.0009) +[2026-06-02 16:48:52,267][262582] Updated weights for policy 0, policy_version 4430 (0.0008) +[2026-06-02 16:48:52,463][262582] Updated weights for policy 0, policy_version 4440 (0.0008) +[2026-06-02 16:48:52,671][262582] Updated weights for policy 0, policy_version 4451 (0.0009) +[2026-06-02 16:48:52,866][262582] Updated weights for policy 0, policy_version 4461 (0.0008) +[2026-06-02 16:48:53,081][262582] Updated weights for policy 0, policy_version 4472 (0.0008) +[2026-06-02 16:48:53,819][262582] Updated weights for policy 0, policy_version 4482 (0.0008) +[2026-06-02 16:48:54,009][262582] Updated weights for policy 0, policy_version 4492 (0.0008) +[2026-06-02 16:48:54,204][262582] Updated weights for policy 0, policy_version 4502 (0.0008) +[2026-06-02 16:48:54,392][262582] Updated weights for policy 0, policy_version 4512 (0.0008) +[2026-06-02 16:48:54,614][262582] Updated weights for policy 0, policy_version 4523 (0.0008) +[2026-06-02 16:48:54,804][262582] Updated weights for policy 0, policy_version 4533 (0.0008) +[2026-06-02 16:48:55,003][262582] Updated weights for policy 0, policy_version 4543 (0.0008) +[2026-06-02 16:48:55,757][262582] Updated weights for policy 0, policy_version 4554 (0.0009) +[2026-06-02 16:48:55,950][262582] Updated weights for policy 0, policy_version 4564 (0.0009) +[2026-06-02 16:48:56,007][260776] Fps is (10 sec: 19660.9, 60 sec: 18022.4, 300 sec: 17896.4). Total num frames: 2326528. Throughput: 0: 18255.6. Samples: 2344064. Policy #0 lag: (min: 31.0, avg: 47.2, max: 95.0) +[2026-06-02 16:48:56,008][260776] Avg episode reward: [(0, '57.681')] +[2026-06-02 16:48:56,135][262582] Updated weights for policy 0, policy_version 4574 (0.0008) +[2026-06-02 16:48:56,346][262582] Updated weights for policy 0, policy_version 4584 (0.0008) +[2026-06-02 16:48:56,585][262582] Updated weights for policy 0, policy_version 4596 (0.0008) +[2026-06-02 16:48:56,776][262582] Updated weights for policy 0, policy_version 4606 (0.0009) +[2026-06-02 16:48:56,805][262026] Saving new best policy, reward=57.681! +[2026-06-02 16:48:57,523][262582] Updated weights for policy 0, policy_version 4617 (0.0009) +[2026-06-02 16:48:57,731][262582] Updated weights for policy 0, policy_version 4628 (0.0008) +[2026-06-02 16:48:57,929][262582] Updated weights for policy 0, policy_version 4638 (0.0009) +[2026-06-02 16:48:58,120][262582] Updated weights for policy 0, policy_version 4648 (0.0009) +[2026-06-02 16:48:58,318][262582] Updated weights for policy 0, policy_version 4658 (0.0008) +[2026-06-02 16:48:58,524][262582] Updated weights for policy 0, policy_version 4668 (0.0008) +[2026-06-02 16:48:59,314][262582] Updated weights for policy 0, policy_version 4680 (0.0009) +[2026-06-02 16:48:59,511][262582] Updated weights for policy 0, policy_version 4690 (0.0008) +[2026-06-02 16:48:59,704][262582] Updated weights for policy 0, policy_version 4700 (0.0008) +[2026-06-02 16:48:59,906][262582] Updated weights for policy 0, policy_version 4710 (0.0008) +[2026-06-02 16:49:00,096][262582] Updated weights for policy 0, policy_version 4720 (0.0008) +[2026-06-02 16:49:00,312][262582] Updated weights for policy 0, policy_version 4731 (0.0009) +[2026-06-02 16:49:01,002][262582] Updated weights for policy 0, policy_version 4741 (0.0008) +[2026-06-02 16:49:01,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18022.4, 300 sec: 17961.7). Total num frames: 2424832. Throughput: 0: 18309.7. Samples: 2451456. Policy #0 lag: (min: 29.0, avg: 63.1, max: 91.0) +[2026-06-02 16:49:01,008][260776] Avg episode reward: [(0, '68.736')] +[2026-06-02 16:49:01,219][262582] Updated weights for policy 0, policy_version 4752 (0.0010) +[2026-06-02 16:49:01,407][262582] Updated weights for policy 0, policy_version 4762 (0.0009) +[2026-06-02 16:49:01,604][262582] Updated weights for policy 0, policy_version 4772 (0.0008) +[2026-06-02 16:49:01,810][262582] Updated weights for policy 0, policy_version 4782 (0.0008) +[2026-06-02 16:49:02,008][262582] Updated weights for policy 0, policy_version 4792 (0.0008) +[2026-06-02 16:49:02,153][262026] Saving new best policy, reward=68.736! +[2026-06-02 16:49:02,737][262582] Updated weights for policy 0, policy_version 4802 (0.0009) +[2026-06-02 16:49:02,921][262582] Updated weights for policy 0, policy_version 4812 (0.0009) +[2026-06-02 16:49:03,119][262582] Updated weights for policy 0, policy_version 4822 (0.0008) +[2026-06-02 16:49:03,317][262582] Updated weights for policy 0, policy_version 4832 (0.0008) +[2026-06-02 16:49:03,507][262582] Updated weights for policy 0, policy_version 4842 (0.0006) +[2026-06-02 16:49:03,704][262582] Updated weights for policy 0, policy_version 4852 (0.0005) +[2026-06-02 16:49:03,907][262582] Updated weights for policy 0, policy_version 4862 (0.0005) +[2026-06-02 16:49:04,627][262582] Updated weights for policy 0, policy_version 4872 (0.0008) +[2026-06-02 16:49:04,834][262582] Updated weights for policy 0, policy_version 4882 (0.0009) +[2026-06-02 16:49:05,027][262582] Updated weights for policy 0, policy_version 4892 (0.0008) +[2026-06-02 16:49:05,244][262582] Updated weights for policy 0, policy_version 4903 (0.0009) +[2026-06-02 16:49:05,436][262582] Updated weights for policy 0, policy_version 4913 (0.0009) +[2026-06-02 16:49:05,641][262582] Updated weights for policy 0, policy_version 4923 (0.0008) +[2026-06-02 16:49:06,007][260776] Fps is (10 sec: 19660.7, 60 sec: 18568.5, 300 sec: 18022.4). Total num frames: 2523136. Throughput: 0: 18321.1. Samples: 2510592. Policy #0 lag: (min: 14.0, avg: 30.3, max: 78.0) +[2026-06-02 16:49:06,008][260776] Avg episode reward: [(0, '64.270')] +[2026-06-02 16:49:06,325][262582] Updated weights for policy 0, policy_version 4933 (0.0008) +[2026-06-02 16:49:06,512][262582] Updated weights for policy 0, policy_version 4943 (0.0008) +[2026-06-02 16:49:06,709][262582] Updated weights for policy 0, policy_version 4953 (0.0008) +[2026-06-02 16:49:06,912][262582] Updated weights for policy 0, policy_version 4964 (0.0009) +[2026-06-02 16:49:07,133][262582] Updated weights for policy 0, policy_version 4975 (0.0008) +[2026-06-02 16:49:07,339][262582] Updated weights for policy 0, policy_version 4985 (0.0009) +[2026-06-02 16:49:08,104][262582] Updated weights for policy 0, policy_version 4995 (0.0009) +[2026-06-02 16:49:08,290][262582] Updated weights for policy 0, policy_version 5005 (0.0009) +[2026-06-02 16:49:08,490][262582] Updated weights for policy 0, policy_version 5015 (0.0008) +[2026-06-02 16:49:08,702][262582] Updated weights for policy 0, policy_version 5026 (0.0008) +[2026-06-02 16:49:08,912][262582] Updated weights for policy 0, policy_version 5037 (0.0008) +[2026-06-02 16:49:09,120][262582] Updated weights for policy 0, policy_version 5047 (0.0008) +[2026-06-02 16:49:09,859][262582] Updated weights for policy 0, policy_version 5057 (0.0009) +[2026-06-02 16:49:10,058][262582] Updated weights for policy 0, policy_version 5068 (0.0009) +[2026-06-02 16:49:10,278][262582] Updated weights for policy 0, policy_version 5080 (0.0008) +[2026-06-02 16:49:10,493][262582] Updated weights for policy 0, policy_version 5090 (0.0009) +[2026-06-02 16:49:10,705][262582] Updated weights for policy 0, policy_version 5101 (0.0009) +[2026-06-02 16:49:10,906][262582] Updated weights for policy 0, policy_version 5111 (0.0009) +[2026-06-02 16:49:11,007][260776] Fps is (10 sec: 16384.0, 60 sec: 18022.4, 300 sec: 17852.9). Total num frames: 2588672. Throughput: 0: 18375.1. Samples: 2617728. Policy #0 lag: (min: 14.0, avg: 30.3, max: 78.0) +[2026-06-02 16:49:11,008][260776] Avg episode reward: [(0, '75.770')] +[2026-06-02 16:49:11,075][262026] Saving new best policy, reward=75.770! +[2026-06-02 16:49:11,678][262582] Updated weights for policy 0, policy_version 5121 (0.0009) +[2026-06-02 16:49:11,854][262582] Updated weights for policy 0, policy_version 5131 (0.0009) +[2026-06-02 16:49:12,060][262582] Updated weights for policy 0, policy_version 5142 (0.0008) +[2026-06-02 16:49:12,262][262582] Updated weights for policy 0, policy_version 5152 (0.0008) +[2026-06-02 16:49:12,460][262582] Updated weights for policy 0, policy_version 5162 (0.0008) +[2026-06-02 16:49:12,656][262582] Updated weights for policy 0, policy_version 5172 (0.0009) +[2026-06-02 16:49:12,875][262582] Updated weights for policy 0, policy_version 5183 (0.0008) +[2026-06-02 16:49:13,562][262582] Updated weights for policy 0, policy_version 5193 (0.0008) +[2026-06-02 16:49:13,775][262582] Updated weights for policy 0, policy_version 5204 (0.0008) +[2026-06-02 16:49:14,011][262582] Updated weights for policy 0, policy_version 5216 (0.0008) +[2026-06-02 16:49:14,230][262582] Updated weights for policy 0, policy_version 5227 (0.0009) +[2026-06-02 16:49:14,420][262582] Updated weights for policy 0, policy_version 5237 (0.0008) +[2026-06-02 16:49:14,629][262582] Updated weights for policy 0, policy_version 5248 (0.0008) +[2026-06-02 16:49:15,347][262582] Updated weights for policy 0, policy_version 5258 (0.0008) +[2026-06-02 16:49:15,542][262582] Updated weights for policy 0, policy_version 5268 (0.0008) +[2026-06-02 16:49:15,730][262582] Updated weights for policy 0, policy_version 5278 (0.0008) +[2026-06-02 16:49:15,950][262582] Updated weights for policy 0, policy_version 5289 (0.0008) +[2026-06-02 16:49:16,007][260776] Fps is (10 sec: 16384.0, 60 sec: 18022.4, 300 sec: 17913.2). Total num frames: 2686976. Throughput: 0: 18517.3. Samples: 2737152. Policy #0 lag: (min: 28.0, avg: 44.0, max: 92.0) +[2026-06-02 16:49:16,008][260776] Avg episode reward: [(0, '90.128')] +[2026-06-02 16:49:16,152][262582] Updated weights for policy 0, policy_version 5299 (0.0008) +[2026-06-02 16:49:16,352][262582] Updated weights for policy 0, policy_version 5309 (0.0008) +[2026-06-02 16:49:16,401][262026] Saving new best policy, reward=90.128! +[2026-06-02 16:49:17,068][262582] Updated weights for policy 0, policy_version 5319 (0.0008) +[2026-06-02 16:49:17,250][262582] Updated weights for policy 0, policy_version 5329 (0.0008) +[2026-06-02 16:49:17,466][262582] Updated weights for policy 0, policy_version 5340 (0.0009) +[2026-06-02 16:49:17,680][262582] Updated weights for policy 0, policy_version 5351 (0.0008) +[2026-06-02 16:49:17,871][262582] Updated weights for policy 0, policy_version 5361 (0.0008) +[2026-06-02 16:49:18,080][262582] Updated weights for policy 0, policy_version 5371 (0.0009) +[2026-06-02 16:49:18,820][262582] Updated weights for policy 0, policy_version 5381 (0.0009) +[2026-06-02 16:49:19,004][262582] Updated weights for policy 0, policy_version 5391 (0.0009) +[2026-06-02 16:49:19,210][262582] Updated weights for policy 0, policy_version 5401 (0.0009) +[2026-06-02 16:49:19,403][262582] Updated weights for policy 0, policy_version 5411 (0.0008) +[2026-06-02 16:49:19,610][262582] Updated weights for policy 0, policy_version 5421 (0.0008) +[2026-06-02 16:49:19,808][262582] Updated weights for policy 0, policy_version 5431 (0.0009) +[2026-06-02 16:49:20,496][262582] Updated weights for policy 0, policy_version 5441 (0.0008) +[2026-06-02 16:49:20,702][262582] Updated weights for policy 0, policy_version 5452 (0.0008) +[2026-06-02 16:49:20,897][262582] Updated weights for policy 0, policy_version 5462 (0.0008) +[2026-06-02 16:49:21,007][260776] Fps is (10 sec: 19660.7, 60 sec: 18022.4, 300 sec: 17969.5). Total num frames: 2785280. Throughput: 0: 18426.3. Samples: 2784512. Policy #0 lag: (min: 63.0, avg: 78.8, max: 127.0) +[2026-06-02 16:49:21,008][260776] Avg episode reward: [(0, '99.169')] +[2026-06-02 16:49:21,109][262582] Updated weights for policy 0, policy_version 5472 (0.0008) +[2026-06-02 16:49:21,293][262582] Updated weights for policy 0, policy_version 5482 (0.0008) +[2026-06-02 16:49:21,510][262582] Updated weights for policy 0, policy_version 5492 (0.0008) +[2026-06-02 16:49:21,700][262582] Updated weights for policy 0, policy_version 5502 (0.0008) +[2026-06-02 16:49:21,733][262026] Saving new best policy, reward=99.169! +[2026-06-02 16:49:22,387][262582] Updated weights for policy 0, policy_version 5512 (0.0008) +[2026-06-02 16:49:22,582][262582] Updated weights for policy 0, policy_version 5522 (0.0008) +[2026-06-02 16:49:22,785][262582] Updated weights for policy 0, policy_version 5532 (0.0009) +[2026-06-02 16:49:22,967][262582] Updated weights for policy 0, policy_version 5542 (0.0008) +[2026-06-02 16:49:23,174][262582] Updated weights for policy 0, policy_version 5552 (0.0008) +[2026-06-02 16:49:23,377][262582] Updated weights for policy 0, policy_version 5562 (0.0008) +[2026-06-02 16:49:24,123][262582] Updated weights for policy 0, policy_version 5573 (0.0009) +[2026-06-02 16:49:24,320][262582] Updated weights for policy 0, policy_version 5583 (0.0009) +[2026-06-02 16:49:24,518][262582] Updated weights for policy 0, policy_version 5593 (0.0009) +[2026-06-02 16:49:24,735][262582] Updated weights for policy 0, policy_version 5604 (0.0008) +[2026-06-02 16:49:24,937][262582] Updated weights for policy 0, policy_version 5614 (0.0008) +[2026-06-02 16:49:25,122][262582] Updated weights for policy 0, policy_version 5624 (0.0008) +[2026-06-02 16:49:25,851][262582] Updated weights for policy 0, policy_version 5634 (0.0008) +[2026-06-02 16:49:26,007][260776] Fps is (10 sec: 19660.9, 60 sec: 18568.5, 300 sec: 18022.4). Total num frames: 2883584. Throughput: 0: 18554.3. Samples: 2903168. Policy #0 lag: (min: 63.0, avg: 78.5, max: 127.0) +[2026-06-02 16:49:26,008][260776] Avg episode reward: [(0, '114.449')] +[2026-06-02 16:49:26,031][262582] Updated weights for policy 0, policy_version 5644 (0.0009) +[2026-06-02 16:49:26,231][262582] Updated weights for policy 0, policy_version 5654 (0.0009) +[2026-06-02 16:49:26,453][262582] Updated weights for policy 0, policy_version 5665 (0.0009) +[2026-06-02 16:49:26,645][262582] Updated weights for policy 0, policy_version 5675 (0.0009) +[2026-06-02 16:49:26,845][262582] Updated weights for policy 0, policy_version 5685 (0.0009) +[2026-06-02 16:49:27,037][262582] Updated weights for policy 0, policy_version 5695 (0.0009) +[2026-06-02 16:49:27,053][262026] Saving new best policy, reward=114.449! +[2026-06-02 16:49:27,765][262582] Updated weights for policy 0, policy_version 5706 (0.0009) +[2026-06-02 16:49:27,959][262582] Updated weights for policy 0, policy_version 5716 (0.0009) +[2026-06-02 16:49:28,157][262582] Updated weights for policy 0, policy_version 5726 (0.0009) +[2026-06-02 16:49:28,356][262582] Updated weights for policy 0, policy_version 5736 (0.0009) +[2026-06-02 16:49:28,552][262582] Updated weights for policy 0, policy_version 5746 (0.0009) +[2026-06-02 16:49:28,771][262582] Updated weights for policy 0, policy_version 5757 (0.0009) +[2026-06-02 16:49:29,467][262582] Updated weights for policy 0, policy_version 5767 (0.0009) +[2026-06-02 16:49:29,656][262582] Updated weights for policy 0, policy_version 5777 (0.0009) +[2026-06-02 16:49:29,849][262582] Updated weights for policy 0, policy_version 5787 (0.0009) +[2026-06-02 16:49:30,045][262582] Updated weights for policy 0, policy_version 5797 (0.0009) +[2026-06-02 16:49:30,245][262582] Updated weights for policy 0, policy_version 5807 (0.0009) +[2026-06-02 16:49:30,440][262582] Updated weights for policy 0, policy_version 5817 (0.0009) +[2026-06-02 16:49:31,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18072.0). Total num frames: 2981888. Throughput: 0: 18426.3. Samples: 3008640. Policy #0 lag: (min: 63.0, avg: 78.5, max: 127.0) +[2026-06-02 16:49:31,008][260776] Avg episode reward: [(0, '121.311')] +[2026-06-02 16:49:31,183][262582] Updated weights for policy 0, policy_version 5827 (0.0009) +[2026-06-02 16:49:31,390][262582] Updated weights for policy 0, policy_version 5838 (0.0009) +[2026-06-02 16:49:31,608][262582] Updated weights for policy 0, policy_version 5849 (0.0009) +[2026-06-02 16:49:31,796][262582] Updated weights for policy 0, policy_version 5859 (0.0008) +[2026-06-02 16:49:31,988][262582] Updated weights for policy 0, policy_version 5869 (0.0008) +[2026-06-02 16:49:32,203][262582] Updated weights for policy 0, policy_version 5879 (0.0008) +[2026-06-02 16:49:32,375][262026] Saving new best policy, reward=121.311! +[2026-06-02 16:49:32,922][262582] Updated weights for policy 0, policy_version 5889 (0.0009) +[2026-06-02 16:49:33,105][262582] Updated weights for policy 0, policy_version 5899 (0.0009) +[2026-06-02 16:49:33,315][262582] Updated weights for policy 0, policy_version 5910 (0.0009) +[2026-06-02 16:49:33,515][262582] Updated weights for policy 0, policy_version 5920 (0.0008) +[2026-06-02 16:49:33,750][262582] Updated weights for policy 0, policy_version 5932 (0.0008) +[2026-06-02 16:49:33,974][262582] Updated weights for policy 0, policy_version 5943 (0.0008) +[2026-06-02 16:49:34,690][262582] Updated weights for policy 0, policy_version 5953 (0.0008) +[2026-06-02 16:49:34,884][262582] Updated weights for policy 0, policy_version 5963 (0.0009) +[2026-06-02 16:49:35,071][262582] Updated weights for policy 0, policy_version 5973 (0.0008) +[2026-06-02 16:49:35,287][262582] Updated weights for policy 0, policy_version 5984 (0.0008) +[2026-06-02 16:49:35,530][262582] Updated weights for policy 0, policy_version 5996 (0.0008) +[2026-06-02 16:49:35,735][262582] Updated weights for policy 0, policy_version 6006 (0.0009) +[2026-06-02 16:49:35,925][262582] Updated weights for policy 0, policy_version 6016 (0.0009) +[2026-06-02 16:49:36,007][260776] Fps is (10 sec: 19660.7, 60 sec: 18568.5, 300 sec: 18118.8). Total num frames: 3080192. Throughput: 0: 18511.6. Samples: 3069056. Policy #0 lag: (min: 63.0, avg: 78.4, max: 127.0) +[2026-06-02 16:49:36,008][260776] Avg episode reward: [(0, '117.231')] +[2026-06-02 16:49:36,631][262582] Updated weights for policy 0, policy_version 6026 (0.0008) +[2026-06-02 16:49:36,841][262582] Updated weights for policy 0, policy_version 6037 (0.0008) +[2026-06-02 16:49:37,065][262582] Updated weights for policy 0, policy_version 6048 (0.0008) +[2026-06-02 16:49:37,259][262582] Updated weights for policy 0, policy_version 6058 (0.0008) +[2026-06-02 16:49:37,486][262582] Updated weights for policy 0, policy_version 6069 (0.0009) +[2026-06-02 16:49:37,685][262582] Updated weights for policy 0, policy_version 6079 (0.0008) +[2026-06-02 16:49:38,393][262582] Updated weights for policy 0, policy_version 6089 (0.0007) +[2026-06-02 16:49:38,575][262582] Updated weights for policy 0, policy_version 6099 (0.0009) +[2026-06-02 16:49:38,781][262582] Updated weights for policy 0, policy_version 6109 (0.0009) +[2026-06-02 16:49:38,979][262582] Updated weights for policy 0, policy_version 6119 (0.0009) +[2026-06-02 16:49:39,175][262582] Updated weights for policy 0, policy_version 6129 (0.0008) +[2026-06-02 16:49:39,374][262582] Updated weights for policy 0, policy_version 6139 (0.0009) +[2026-06-02 16:49:40,076][262582] Updated weights for policy 0, policy_version 6149 (0.0008) +[2026-06-02 16:49:40,277][262582] Updated weights for policy 0, policy_version 6159 (0.0008) +[2026-06-02 16:49:40,469][262582] Updated weights for policy 0, policy_version 6169 (0.0008) +[2026-06-02 16:49:40,687][262582] Updated weights for policy 0, policy_version 6180 (0.0009) +[2026-06-02 16:49:40,906][262582] Updated weights for policy 0, policy_version 6191 (0.0008) +[2026-06-02 16:49:41,007][260776] Fps is (10 sec: 16384.0, 60 sec: 18022.4, 300 sec: 17975.6). Total num frames: 3145728. Throughput: 0: 18463.3. Samples: 3174912. Policy #0 lag: (min: 67.0, avg: 83.7, max: 131.0) +[2026-06-02 16:49:41,008][260776] Avg episode reward: [(0, '127.326')] +[2026-06-02 16:49:41,116][262582] Updated weights for policy 0, policy_version 6201 (0.0008) +[2026-06-02 16:49:41,246][262026] Saving new best policy, reward=127.326! +[2026-06-02 16:49:41,847][262582] Updated weights for policy 0, policy_version 6211 (0.0008) +[2026-06-02 16:49:42,048][262582] Updated weights for policy 0, policy_version 6221 (0.0008) +[2026-06-02 16:49:42,243][262582] Updated weights for policy 0, policy_version 6231 (0.0008) +[2026-06-02 16:49:42,441][262582] Updated weights for policy 0, policy_version 6241 (0.0008) +[2026-06-02 16:49:42,646][262582] Updated weights for policy 0, policy_version 6251 (0.0009) +[2026-06-02 16:49:42,831][262582] Updated weights for policy 0, policy_version 6261 (0.0008) +[2026-06-02 16:49:43,038][262582] Updated weights for policy 0, policy_version 6271 (0.0008) +[2026-06-02 16:49:43,707][262582] Updated weights for policy 0, policy_version 6281 (0.0008) +[2026-06-02 16:49:43,903][262582] Updated weights for policy 0, policy_version 6291 (0.0008) +[2026-06-02 16:49:44,104][262582] Updated weights for policy 0, policy_version 6301 (0.0008) +[2026-06-02 16:49:44,310][262582] Updated weights for policy 0, policy_version 6311 (0.0008) +[2026-06-02 16:49:44,530][262582] Updated weights for policy 0, policy_version 6322 (0.0008) +[2026-06-02 16:49:44,751][262582] Updated weights for policy 0, policy_version 6333 (0.0008) +[2026-06-02 16:49:45,460][262582] Updated weights for policy 0, policy_version 6344 (0.0008) +[2026-06-02 16:49:45,677][262582] Updated weights for policy 0, policy_version 6355 (0.0008) +[2026-06-02 16:49:45,869][262582] Updated weights for policy 0, policy_version 6365 (0.0008) +[2026-06-02 16:49:46,007][260776] Fps is (10 sec: 16384.1, 60 sec: 18568.5, 300 sec: 18022.4). Total num frames: 3244032. Throughput: 0: 18713.6. Samples: 3293568. Policy #0 lag: (min: 67.0, avg: 83.7, max: 131.0) +[2026-06-02 16:49:46,007][260776] Avg episode reward: [(0, '126.576')] +[2026-06-02 16:49:46,080][262582] Updated weights for policy 0, policy_version 6375 (0.0005) +[2026-06-02 16:49:46,272][262582] Updated weights for policy 0, policy_version 6385 (0.0005) +[2026-06-02 16:49:46,470][262582] Updated weights for policy 0, policy_version 6395 (0.0005) +[2026-06-02 16:49:47,156][262582] Updated weights for policy 0, policy_version 6405 (0.0007) +[2026-06-02 16:49:47,353][262582] Updated weights for policy 0, policy_version 6415 (0.0008) +[2026-06-02 16:49:47,549][262582] Updated weights for policy 0, policy_version 6425 (0.0008) +[2026-06-02 16:49:47,746][262582] Updated weights for policy 0, policy_version 6435 (0.0008) +[2026-06-02 16:49:47,983][262582] Updated weights for policy 0, policy_version 6447 (0.0008) +[2026-06-02 16:49:48,185][262582] Updated weights for policy 0, policy_version 6457 (0.0008) +[2026-06-02 16:49:48,908][262582] Updated weights for policy 0, policy_version 6467 (0.0008) +[2026-06-02 16:49:49,097][262582] Updated weights for policy 0, policy_version 6477 (0.0008) +[2026-06-02 16:49:49,299][262582] Updated weights for policy 0, policy_version 6487 (0.0008) +[2026-06-02 16:49:49,491][262582] Updated weights for policy 0, policy_version 6497 (0.0008) +[2026-06-02 16:49:49,695][262582] Updated weights for policy 0, policy_version 6507 (0.0008) +[2026-06-02 16:49:49,893][262582] Updated weights for policy 0, policy_version 6517 (0.0008) +[2026-06-02 16:49:50,088][262582] Updated weights for policy 0, policy_version 6527 (0.0008) +[2026-06-02 16:49:50,794][262582] Updated weights for policy 0, policy_version 6538 (0.0008) +[2026-06-02 16:49:50,983][262582] Updated weights for policy 0, policy_version 6548 (0.0009) +[2026-06-02 16:49:51,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18066.7). Total num frames: 3342336. Throughput: 0: 18420.6. Samples: 3339520. Policy #0 lag: (min: 63.0, avg: 80.1, max: 127.0) +[2026-06-02 16:49:51,007][260776] Avg episode reward: [(0, '138.802')] +[2026-06-02 16:49:51,183][262582] Updated weights for policy 0, policy_version 6558 (0.0008) +[2026-06-02 16:49:51,405][262582] Updated weights for policy 0, policy_version 6569 (0.0008) +[2026-06-02 16:49:51,603][262582] Updated weights for policy 0, policy_version 6579 (0.0008) +[2026-06-02 16:49:51,805][262582] Updated weights for policy 0, policy_version 6589 (0.0008) +[2026-06-02 16:49:51,852][262026] Saving new best policy, reward=138.802! +[2026-06-02 16:49:52,521][262582] Updated weights for policy 0, policy_version 6599 (0.0008) +[2026-06-02 16:49:52,719][262582] Updated weights for policy 0, policy_version 6610 (0.0008) +[2026-06-02 16:49:52,927][262582] Updated weights for policy 0, policy_version 6620 (0.0008) +[2026-06-02 16:49:53,118][262582] Updated weights for policy 0, policy_version 6630 (0.0008) +[2026-06-02 16:49:53,346][262582] Updated weights for policy 0, policy_version 6641 (0.0008) +[2026-06-02 16:49:53,543][262582] Updated weights for policy 0, policy_version 6651 (0.0008) +[2026-06-02 16:49:54,253][262582] Updated weights for policy 0, policy_version 6662 (0.0008) +[2026-06-02 16:49:54,449][262582] Updated weights for policy 0, policy_version 6672 (0.0008) +[2026-06-02 16:49:54,653][262582] Updated weights for policy 0, policy_version 6682 (0.0008) +[2026-06-02 16:49:54,879][262582] Updated weights for policy 0, policy_version 6693 (0.0008) +[2026-06-02 16:49:55,078][262582] Updated weights for policy 0, policy_version 6703 (0.0008) +[2026-06-02 16:49:55,268][262582] Updated weights for policy 0, policy_version 6713 (0.0008) +[2026-06-02 16:49:56,000][262582] Updated weights for policy 0, policy_version 6724 (0.0008) +[2026-06-02 16:49:56,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18108.6). Total num frames: 3440640. Throughput: 0: 18668.1. Samples: 3457792. Policy #0 lag: (min: 63.0, avg: 80.2, max: 127.0) +[2026-06-02 16:49:56,008][260776] Avg episode reward: [(0, '141.231')] +[2026-06-02 16:49:56,193][262582] Updated weights for policy 0, policy_version 6734 (0.0008) +[2026-06-02 16:49:56,385][262582] Updated weights for policy 0, policy_version 6744 (0.0008) +[2026-06-02 16:49:56,611][262582] Updated weights for policy 0, policy_version 6755 (0.0008) +[2026-06-02 16:49:56,806][262582] Updated weights for policy 0, policy_version 6765 (0.0008) +[2026-06-02 16:49:57,006][262582] Updated weights for policy 0, policy_version 6775 (0.0008) +[2026-06-02 16:49:57,184][262026] Saving new best policy, reward=141.231! +[2026-06-02 16:49:57,734][262582] Updated weights for policy 0, policy_version 6785 (0.0009) +[2026-06-02 16:49:57,958][262582] Updated weights for policy 0, policy_version 6797 (0.0008) +[2026-06-02 16:49:58,150][262582] Updated weights for policy 0, policy_version 6807 (0.0008) +[2026-06-02 16:49:58,363][262582] Updated weights for policy 0, policy_version 6818 (0.0009) +[2026-06-02 16:49:58,569][262582] Updated weights for policy 0, policy_version 6828 (0.0009) +[2026-06-02 16:49:58,772][262582] Updated weights for policy 0, policy_version 6838 (0.0008) +[2026-06-02 16:49:59,520][262582] Updated weights for policy 0, policy_version 6849 (0.0008) +[2026-06-02 16:49:59,695][262582] Updated weights for policy 0, policy_version 6859 (0.0008) +[2026-06-02 16:49:59,893][262582] Updated weights for policy 0, policy_version 6869 (0.0008) +[2026-06-02 16:50:00,094][262582] Updated weights for policy 0, policy_version 6879 (0.0008) +[2026-06-02 16:50:00,287][262582] Updated weights for policy 0, policy_version 6889 (0.0008) +[2026-06-02 16:50:00,491][262582] Updated weights for policy 0, policy_version 6899 (0.0008) +[2026-06-02 16:50:00,692][262582] Updated weights for policy 0, policy_version 6909 (0.0008) +[2026-06-02 16:50:01,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18148.4). Total num frames: 3538944. Throughput: 0: 18392.2. Samples: 3564800. Policy #0 lag: (min: 63.0, avg: 80.2, max: 127.0) +[2026-06-02 16:50:01,007][260776] Avg episode reward: [(0, '147.691')] +[2026-06-02 16:50:01,012][262026] Saving new best policy, reward=147.691! +[2026-06-02 16:50:01,393][262582] Updated weights for policy 0, policy_version 6920 (0.0008) +[2026-06-02 16:50:01,585][262582] Updated weights for policy 0, policy_version 6930 (0.0008) +[2026-06-02 16:50:01,775][262582] Updated weights for policy 0, policy_version 6940 (0.0008) +[2026-06-02 16:50:01,998][262582] Updated weights for policy 0, policy_version 6951 (0.0009) +[2026-06-02 16:50:02,227][262582] Updated weights for policy 0, policy_version 6963 (0.0008) +[2026-06-02 16:50:02,442][262582] Updated weights for policy 0, policy_version 6973 (0.0008) +[2026-06-02 16:50:03,156][262582] Updated weights for policy 0, policy_version 6983 (0.0008) +[2026-06-02 16:50:03,343][262582] Updated weights for policy 0, policy_version 6993 (0.0009) +[2026-06-02 16:50:03,538][262582] Updated weights for policy 0, policy_version 7003 (0.0009) +[2026-06-02 16:50:03,752][262582] Updated weights for policy 0, policy_version 7014 (0.0008) +[2026-06-02 16:50:03,944][262582] Updated weights for policy 0, policy_version 7024 (0.0008) +[2026-06-02 16:50:04,151][262582] Updated weights for policy 0, policy_version 7034 (0.0008) +[2026-06-02 16:50:04,867][262582] Updated weights for policy 0, policy_version 7045 (0.0008) +[2026-06-02 16:50:05,065][262582] Updated weights for policy 0, policy_version 7055 (0.0008) +[2026-06-02 16:50:05,264][262582] Updated weights for policy 0, policy_version 7065 (0.0008) +[2026-06-02 16:50:05,462][262582] Updated weights for policy 0, policy_version 7075 (0.0008) +[2026-06-02 16:50:05,684][262582] Updated weights for policy 0, policy_version 7086 (0.0008) +[2026-06-02 16:50:05,885][262582] Updated weights for policy 0, policy_version 7096 (0.0009) +[2026-06-02 16:50:06,007][260776] Fps is (10 sec: 16383.9, 60 sec: 18022.4, 300 sec: 18022.4). Total num frames: 3604480. Throughput: 0: 18648.2. Samples: 3623680. Policy #0 lag: (min: 61.0, avg: 82.7, max: 131.0) +[2026-06-02 16:50:06,008][260776] Avg episode reward: [(0, '167.290')] +[2026-06-02 16:50:06,033][262026] Saving new best policy, reward=167.290! +[2026-06-02 16:50:06,583][262582] Updated weights for policy 0, policy_version 7106 (0.0009) +[2026-06-02 16:50:06,794][262582] Updated weights for policy 0, policy_version 7117 (0.0008) +[2026-06-02 16:50:06,986][262582] Updated weights for policy 0, policy_version 7127 (0.0008) +[2026-06-02 16:50:07,205][262582] Updated weights for policy 0, policy_version 7138 (0.0009) +[2026-06-02 16:50:07,415][262582] Updated weights for policy 0, policy_version 7148 (0.0008) +[2026-06-02 16:50:07,628][262582] Updated weights for policy 0, policy_version 7159 (0.0008) +[2026-06-02 16:50:08,354][262582] Updated weights for policy 0, policy_version 7169 (0.0008) +[2026-06-02 16:50:08,539][262582] Updated weights for policy 0, policy_version 7179 (0.0007) +[2026-06-02 16:50:08,748][262582] Updated weights for policy 0, policy_version 7190 (0.0007) +[2026-06-02 16:50:08,946][262582] Updated weights for policy 0, policy_version 7200 (0.0007) +[2026-06-02 16:50:09,140][262582] Updated weights for policy 0, policy_version 7210 (0.0007) +[2026-06-02 16:50:09,343][262582] Updated weights for policy 0, policy_version 7220 (0.0008) +[2026-06-02 16:50:09,562][262582] Updated weights for policy 0, policy_version 7231 (0.0008) +[2026-06-02 16:50:10,303][262582] Updated weights for policy 0, policy_version 7241 (0.0009) +[2026-06-02 16:50:10,496][262582] Updated weights for policy 0, policy_version 7251 (0.0008) +[2026-06-02 16:50:10,697][262582] Updated weights for policy 0, policy_version 7261 (0.0008) +[2026-06-02 16:50:10,895][262582] Updated weights for policy 0, policy_version 7271 (0.0008) +[2026-06-02 16:50:11,007][260776] Fps is (10 sec: 16384.0, 60 sec: 18568.5, 300 sec: 18062.4). Total num frames: 3702784. Throughput: 0: 18323.9. Samples: 3727744. Policy #0 lag: (min: 61.0, avg: 82.7, max: 131.0) +[2026-06-02 16:50:11,008][260776] Avg episode reward: [(0, '159.305')] +[2026-06-02 16:50:11,097][262582] Updated weights for policy 0, policy_version 7281 (0.0008) +[2026-06-02 16:50:11,293][262582] Updated weights for policy 0, policy_version 7291 (0.0008) +[2026-06-02 16:50:12,030][262582] Updated weights for policy 0, policy_version 7301 (0.0008) +[2026-06-02 16:50:12,241][262582] Updated weights for policy 0, policy_version 7312 (0.0008) +[2026-06-02 16:50:12,437][262582] Updated weights for policy 0, policy_version 7322 (0.0008) +[2026-06-02 16:50:12,632][262582] Updated weights for policy 0, policy_version 7332 (0.0008) +[2026-06-02 16:50:12,835][262582] Updated weights for policy 0, policy_version 7342 (0.0008) +[2026-06-02 16:50:13,033][262582] Updated weights for policy 0, policy_version 7352 (0.0008) +[2026-06-02 16:50:13,727][262582] Updated weights for policy 0, policy_version 7362 (0.0008) +[2026-06-02 16:50:13,929][262582] Updated weights for policy 0, policy_version 7373 (0.0008) +[2026-06-02 16:50:14,145][262582] Updated weights for policy 0, policy_version 7384 (0.0008) +[2026-06-02 16:50:14,342][262582] Updated weights for policy 0, policy_version 7394 (0.0008) +[2026-06-02 16:50:14,545][262582] Updated weights for policy 0, policy_version 7404 (0.0008) +[2026-06-02 16:50:14,752][262582] Updated weights for policy 0, policy_version 7414 (0.0008) +[2026-06-02 16:50:14,943][262582] Updated weights for policy 0, policy_version 7424 (0.0008) +[2026-06-02 16:50:15,652][262582] Updated weights for policy 0, policy_version 7435 (0.0008) +[2026-06-02 16:50:15,870][262582] Updated weights for policy 0, policy_version 7446 (0.0008) +[2026-06-02 16:50:16,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18100.4). Total num frames: 3801088. Throughput: 0: 18557.1. Samples: 3843712. Policy #0 lag: (min: 63.0, avg: 78.4, max: 127.0) +[2026-06-02 16:50:16,008][260776] Avg episode reward: [(0, '149.608')] +[2026-06-02 16:50:16,075][262582] Updated weights for policy 0, policy_version 7456 (0.0008) +[2026-06-02 16:50:16,275][262582] Updated weights for policy 0, policy_version 7466 (0.0009) +[2026-06-02 16:50:16,466][262582] Updated weights for policy 0, policy_version 7476 (0.0008) +[2026-06-02 16:50:16,695][262582] Updated weights for policy 0, policy_version 7487 (0.0008) +[2026-06-02 16:50:17,409][262582] Updated weights for policy 0, policy_version 7498 (0.0008) +[2026-06-02 16:50:17,603][262582] Updated weights for policy 0, policy_version 7508 (0.0008) +[2026-06-02 16:50:17,801][262582] Updated weights for policy 0, policy_version 7518 (0.0008) +[2026-06-02 16:50:18,003][262582] Updated weights for policy 0, policy_version 7528 (0.0008) +[2026-06-02 16:50:18,198][262582] Updated weights for policy 0, policy_version 7538 (0.0008) +[2026-06-02 16:50:18,418][262582] Updated weights for policy 0, policy_version 7549 (0.0008) +[2026-06-02 16:50:19,119][262582] Updated weights for policy 0, policy_version 7559 (0.0008) +[2026-06-02 16:50:19,313][262582] Updated weights for policy 0, policy_version 7569 (0.0008) +[2026-06-02 16:50:19,511][262582] Updated weights for policy 0, policy_version 7579 (0.0008) +[2026-06-02 16:50:19,721][262582] Updated weights for policy 0, policy_version 7589 (0.0008) +[2026-06-02 16:50:19,917][262582] Updated weights for policy 0, policy_version 7599 (0.0008) +[2026-06-02 16:50:20,120][262582] Updated weights for policy 0, policy_version 7609 (0.0008) +[2026-06-02 16:50:20,831][262582] Updated weights for policy 0, policy_version 7619 (0.0008) +[2026-06-02 16:50:21,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18136.7). Total num frames: 3899392. Throughput: 0: 18298.3. Samples: 3892480. Policy #0 lag: (min: 63.0, avg: 79.4, max: 127.0) +[2026-06-02 16:50:21,008][260776] Avg episode reward: [(0, '155.179')] +[2026-06-02 16:50:21,036][262582] Updated weights for policy 0, policy_version 7630 (0.0008) +[2026-06-02 16:50:21,243][262582] Updated weights for policy 0, policy_version 7640 (0.0008) +[2026-06-02 16:50:21,441][262582] Updated weights for policy 0, policy_version 7650 (0.0008) +[2026-06-02 16:50:21,640][262582] Updated weights for policy 0, policy_version 7660 (0.0008) +[2026-06-02 16:50:21,849][262582] Updated weights for policy 0, policy_version 7670 (0.0008) +[2026-06-02 16:50:22,032][262582] Updated weights for policy 0, policy_version 7680 (0.0009) +[2026-06-02 16:50:22,744][262582] Updated weights for policy 0, policy_version 7690 (0.0008) +[2026-06-02 16:50:22,940][262582] Updated weights for policy 0, policy_version 7700 (0.0008) +[2026-06-02 16:50:23,132][262582] Updated weights for policy 0, policy_version 7710 (0.0009) +[2026-06-02 16:50:23,337][262582] Updated weights for policy 0, policy_version 7720 (0.0008) +[2026-06-02 16:50:23,553][262582] Updated weights for policy 0, policy_version 7731 (0.0008) +[2026-06-02 16:50:23,758][262582] Updated weights for policy 0, policy_version 7741 (0.0008) +[2026-06-02 16:50:24,453][262582] Updated weights for policy 0, policy_version 7751 (0.0008) +[2026-06-02 16:50:24,655][262582] Updated weights for policy 0, policy_version 7761 (0.0008) +[2026-06-02 16:50:24,860][262582] Updated weights for policy 0, policy_version 7771 (0.0008) +[2026-06-02 16:50:25,047][262582] Updated weights for policy 0, policy_version 7781 (0.0009) +[2026-06-02 16:50:25,259][262582] Updated weights for policy 0, policy_version 7791 (0.0008) +[2026-06-02 16:50:25,477][262582] Updated weights for policy 0, policy_version 7802 (0.0008) +[2026-06-02 16:50:26,007][260776] Fps is (10 sec: 19660.9, 60 sec: 18568.5, 300 sec: 18171.3). Total num frames: 3997696. Throughput: 0: 18571.4. Samples: 4010624. Policy #0 lag: (min: 63.0, avg: 79.4, max: 127.0) +[2026-06-02 16:50:26,008][260776] Avg episode reward: [(0, '171.399')] +[2026-06-02 16:50:26,174][262582] Updated weights for policy 0, policy_version 7812 (0.0008) +[2026-06-02 16:50:26,352][262582] Updated weights for policy 0, policy_version 7822 (0.0008) +[2026-06-02 16:50:26,554][262582] Updated weights for policy 0, policy_version 7832 (0.0008) +[2026-06-02 16:50:26,755][262582] Updated weights for policy 0, policy_version 7842 (0.0008) +[2026-06-02 16:50:26,962][262582] Updated weights for policy 0, policy_version 7852 (0.0008) +[2026-06-02 16:50:27,164][262582] Updated weights for policy 0, policy_version 7862 (0.0008) +[2026-06-02 16:50:27,354][262026] Saving new best policy, reward=171.399! +[2026-06-02 16:50:27,358][262582] Updated weights for policy 0, policy_version 7872 (0.0008) +[2026-06-02 16:50:28,094][262582] Updated weights for policy 0, policy_version 7882 (0.0008) +[2026-06-02 16:50:28,286][262582] Updated weights for policy 0, policy_version 7892 (0.0008) +[2026-06-02 16:50:28,480][262582] Updated weights for policy 0, policy_version 7902 (0.0008) +[2026-06-02 16:50:28,688][262582] Updated weights for policy 0, policy_version 7912 (0.0008) +[2026-06-02 16:50:28,908][262582] Updated weights for policy 0, policy_version 7923 (0.0008) +[2026-06-02 16:50:29,107][262582] Updated weights for policy 0, policy_version 7933 (0.0008) +[2026-06-02 16:50:29,859][262582] Updated weights for policy 0, policy_version 7946 (0.0008) +[2026-06-02 16:50:30,056][262582] Updated weights for policy 0, policy_version 7956 (0.0009) +[2026-06-02 16:50:30,277][262582] Updated weights for policy 0, policy_version 7967 (0.0008) +[2026-06-02 16:50:30,477][262582] Updated weights for policy 0, policy_version 7977 (0.0008) +[2026-06-02 16:50:30,672][262582] Updated weights for policy 0, policy_version 7987 (0.0008) +[2026-06-02 16:50:30,881][262582] Updated weights for policy 0, policy_version 7997 (0.0009) +[2026-06-02 16:50:31,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18204.4). Total num frames: 4096000. Throughput: 0: 18278.4. Samples: 4116096. Policy #0 lag: (min: 33.0, avg: 65.2, max: 98.0) +[2026-06-02 16:50:31,008][260776] Avg episode reward: [(0, '165.968')] +[2026-06-02 16:50:31,581][262582] Updated weights for policy 0, policy_version 8007 (0.0009) +[2026-06-02 16:50:31,776][262582] Updated weights for policy 0, policy_version 8017 (0.0009) +[2026-06-02 16:50:31,998][262582] Updated weights for policy 0, policy_version 8028 (0.0009) +[2026-06-02 16:50:32,190][262582] Updated weights for policy 0, policy_version 8038 (0.0008) +[2026-06-02 16:50:32,394][262582] Updated weights for policy 0, policy_version 8048 (0.0008) +[2026-06-02 16:50:32,613][262582] Updated weights for policy 0, policy_version 8059 (0.0008) +[2026-06-02 16:50:33,306][262582] Updated weights for policy 0, policy_version 8069 (0.0008) +[2026-06-02 16:50:33,505][262582] Updated weights for policy 0, policy_version 8079 (0.0009) +[2026-06-02 16:50:33,696][262582] Updated weights for policy 0, policy_version 8089 (0.0008) +[2026-06-02 16:50:33,897][262582] Updated weights for policy 0, policy_version 8099 (0.0008) +[2026-06-02 16:50:34,096][262582] Updated weights for policy 0, policy_version 8109 (0.0008) +[2026-06-02 16:50:34,320][262582] Updated weights for policy 0, policy_version 8120 (0.0008) +[2026-06-02 16:50:35,021][262582] Updated weights for policy 0, policy_version 8130 (0.0009) +[2026-06-02 16:50:35,208][262582] Updated weights for policy 0, policy_version 8140 (0.0008) +[2026-06-02 16:50:35,402][262582] Updated weights for policy 0, policy_version 8150 (0.0009) +[2026-06-02 16:50:35,598][262582] Updated weights for policy 0, policy_version 8160 (0.0008) +[2026-06-02 16:50:35,800][262582] Updated weights for policy 0, policy_version 8170 (0.0009) +[2026-06-02 16:50:36,004][262582] Updated weights for policy 0, policy_version 8180 (0.0008) +[2026-06-02 16:50:36,007][260776] Fps is (10 sec: 16383.9, 60 sec: 18022.4, 300 sec: 18093.6). Total num frames: 4161536. Throughput: 0: 18560.0. Samples: 4174720. Policy #0 lag: (min: 33.0, avg: 65.2, max: 98.0) +[2026-06-02 16:50:36,008][260776] Avg episode reward: [(0, '174.946')] +[2026-06-02 16:50:36,203][262582] Updated weights for policy 0, policy_version 8190 (0.0008) +[2026-06-02 16:50:36,243][262026] Saving new best policy, reward=174.946! +[2026-06-02 16:50:36,897][262582] Updated weights for policy 0, policy_version 8200 (0.0008) +[2026-06-02 16:50:37,093][262582] Updated weights for policy 0, policy_version 8210 (0.0008) +[2026-06-02 16:50:37,290][262582] Updated weights for policy 0, policy_version 8220 (0.0008) +[2026-06-02 16:50:37,485][262582] Updated weights for policy 0, policy_version 8230 (0.0008) +[2026-06-02 16:50:37,690][262582] Updated weights for policy 0, policy_version 8240 (0.0008) +[2026-06-02 16:50:37,893][262582] Updated weights for policy 0, policy_version 8250 (0.0008) +[2026-06-02 16:50:38,577][262582] Updated weights for policy 0, policy_version 8260 (0.0009) +[2026-06-02 16:50:38,783][262582] Updated weights for policy 0, policy_version 8271 (0.0008) +[2026-06-02 16:50:38,977][262582] Updated weights for policy 0, policy_version 8281 (0.0008) +[2026-06-02 16:50:39,187][262582] Updated weights for policy 0, policy_version 8291 (0.0008) +[2026-06-02 16:50:39,380][262582] Updated weights for policy 0, policy_version 8301 (0.0008) +[2026-06-02 16:50:39,590][262582] Updated weights for policy 0, policy_version 8311 (0.0008) +[2026-06-02 16:50:40,280][262582] Updated weights for policy 0, policy_version 8321 (0.0008) +[2026-06-02 16:50:40,501][262582] Updated weights for policy 0, policy_version 8333 (0.0008) +[2026-06-02 16:50:40,700][262582] Updated weights for policy 0, policy_version 8343 (0.0005) +[2026-06-02 16:50:40,899][262582] Updated weights for policy 0, policy_version 8353 (0.0005) +[2026-06-02 16:50:41,007][260776] Fps is (10 sec: 16384.1, 60 sec: 18568.5, 300 sec: 18127.0). Total num frames: 4259840. Throughput: 0: 18255.6. Samples: 4279296. Policy #0 lag: (min: 63.0, avg: 80.6, max: 127.0) +[2026-06-02 16:50:41,008][260776] Avg episode reward: [(0, '172.122')] +[2026-06-02 16:50:41,107][262582] Updated weights for policy 0, policy_version 8363 (0.0005) +[2026-06-02 16:50:41,334][262582] Updated weights for policy 0, policy_version 8374 (0.0005) +[2026-06-02 16:50:41,533][262582] Updated weights for policy 0, policy_version 8384 (0.0005) +[2026-06-02 16:50:42,228][262582] Updated weights for policy 0, policy_version 8394 (0.0008) +[2026-06-02 16:50:42,424][262582] Updated weights for policy 0, policy_version 8404 (0.0008) +[2026-06-02 16:50:42,615][262582] Updated weights for policy 0, policy_version 8414 (0.0008) +[2026-06-02 16:50:42,824][262582] Updated weights for policy 0, policy_version 8424 (0.0008) +[2026-06-02 16:50:43,029][262582] Updated weights for policy 0, policy_version 8434 (0.0008) +[2026-06-02 16:50:43,232][262582] Updated weights for policy 0, policy_version 8444 (0.0008) +[2026-06-02 16:50:43,914][262582] Updated weights for policy 0, policy_version 8454 (0.0008) +[2026-06-02 16:50:44,133][262582] Updated weights for policy 0, policy_version 8465 (0.0008) +[2026-06-02 16:50:44,322][262582] Updated weights for policy 0, policy_version 8475 (0.0008) +[2026-06-02 16:50:44,547][262582] Updated weights for policy 0, policy_version 8486 (0.0008) +[2026-06-02 16:50:44,752][262582] Updated weights for policy 0, policy_version 8496 (0.0008) +[2026-06-02 16:50:44,963][262582] Updated weights for policy 0, policy_version 8506 (0.0008) +[2026-06-02 16:50:45,649][262582] Updated weights for policy 0, policy_version 8516 (0.0008) +[2026-06-02 16:50:45,851][262582] Updated weights for policy 0, policy_version 8526 (0.0008) +[2026-06-02 16:50:46,007][260776] Fps is (10 sec: 19660.9, 60 sec: 18568.5, 300 sec: 18158.9). Total num frames: 4358144. Throughput: 0: 18323.9. Samples: 4389376. Policy #0 lag: (min: 63.0, avg: 80.6, max: 127.0) +[2026-06-02 16:50:46,008][260776] Avg episode reward: [(0, '178.764')] +[2026-06-02 16:50:46,042][262582] Updated weights for policy 0, policy_version 8536 (0.0008) +[2026-06-02 16:50:46,248][262582] Updated weights for policy 0, policy_version 8546 (0.0008) +[2026-06-02 16:50:46,451][262582] Updated weights for policy 0, policy_version 8556 (0.0008) +[2026-06-02 16:50:46,649][262582] Updated weights for policy 0, policy_version 8566 (0.0007) +[2026-06-02 16:50:46,841][262026] Saving new best policy, reward=178.764! +[2026-06-02 16:50:46,843][262582] Updated weights for policy 0, policy_version 8576 (0.0004) +[2026-06-02 16:50:47,531][262582] Updated weights for policy 0, policy_version 8586 (0.0008) +[2026-06-02 16:50:47,747][262582] Updated weights for policy 0, policy_version 8597 (0.0008) +[2026-06-02 16:50:47,948][262582] Updated weights for policy 0, policy_version 8607 (0.0008) +[2026-06-02 16:50:48,146][262582] Updated weights for policy 0, policy_version 8617 (0.0008) +[2026-06-02 16:50:48,377][262582] Updated weights for policy 0, policy_version 8628 (0.0008) +[2026-06-02 16:50:48,574][262582] Updated weights for policy 0, policy_version 8638 (0.0008) +[2026-06-02 16:50:49,301][262582] Updated weights for policy 0, policy_version 8649 (0.0008) +[2026-06-02 16:50:49,489][262582] Updated weights for policy 0, policy_version 8659 (0.0008) +[2026-06-02 16:50:49,694][262582] Updated weights for policy 0, policy_version 8669 (0.0008) +[2026-06-02 16:50:49,899][262582] Updated weights for policy 0, policy_version 8679 (0.0008) +[2026-06-02 16:50:50,103][262582] Updated weights for policy 0, policy_version 8689 (0.0008) +[2026-06-02 16:50:50,304][262582] Updated weights for policy 0, policy_version 8699 (0.0008) +[2026-06-02 16:50:51,006][262582] Updated weights for policy 0, policy_version 8709 (0.0006) +[2026-06-02 16:50:51,007][260776] Fps is (10 sec: 19660.7, 60 sec: 18568.5, 300 sec: 18189.6). Total num frames: 4456448. Throughput: 0: 18215.8. Samples: 4443392. Policy #0 lag: (min: 63.0, avg: 80.6, max: 127.0) +[2026-06-02 16:50:51,008][260776] Avg episode reward: [(0, '185.611')] +[2026-06-02 16:50:51,205][262582] Updated weights for policy 0, policy_version 8719 (0.0007) +[2026-06-02 16:50:51,387][262582] Updated weights for policy 0, policy_version 8729 (0.0004) +[2026-06-02 16:50:51,596][262582] Updated weights for policy 0, policy_version 8739 (0.0004) +[2026-06-02 16:50:51,794][262582] Updated weights for policy 0, policy_version 8749 (0.0004) +[2026-06-02 16:50:52,008][262582] Updated weights for policy 0, policy_version 8759 (0.0004) +[2026-06-02 16:50:52,168][262026] Saving new best policy, reward=185.611! +[2026-06-02 16:50:52,681][262582] Updated weights for policy 0, policy_version 8769 (0.0004) +[2026-06-02 16:50:52,876][262582] Updated weights for policy 0, policy_version 8779 (0.0008) +[2026-06-02 16:50:53,070][262582] Updated weights for policy 0, policy_version 8789 (0.0009) +[2026-06-02 16:50:53,291][262582] Updated weights for policy 0, policy_version 8800 (0.0008) +[2026-06-02 16:50:53,499][262582] Updated weights for policy 0, policy_version 8810 (0.0008) +[2026-06-02 16:50:53,690][262582] Updated weights for policy 0, policy_version 8820 (0.0008) +[2026-06-02 16:50:53,897][262582] Updated weights for policy 0, policy_version 8830 (0.0008) +[2026-06-02 16:50:54,623][262582] Updated weights for policy 0, policy_version 8841 (0.0008) +[2026-06-02 16:50:54,821][262582] Updated weights for policy 0, policy_version 8851 (0.0009) +[2026-06-02 16:50:55,022][262582] Updated weights for policy 0, policy_version 8861 (0.0008) +[2026-06-02 16:50:55,223][262582] Updated weights for policy 0, policy_version 8871 (0.0008) +[2026-06-02 16:50:55,417][262582] Updated weights for policy 0, policy_version 8881 (0.0008) +[2026-06-02 16:50:55,627][262582] Updated weights for policy 0, policy_version 8891 (0.0008) +[2026-06-02 16:50:56,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18219.0). Total num frames: 4554752. Throughput: 0: 18471.8. Samples: 4558976. Policy #0 lag: (min: 21.0, avg: 38.7, max: 85.0) +[2026-06-02 16:50:56,008][260776] Avg episode reward: [(0, '210.652')] +[2026-06-02 16:50:56,012][262026] Saving new best policy, reward=210.652! +[2026-06-02 16:50:56,320][262582] Updated weights for policy 0, policy_version 8901 (0.0009) +[2026-06-02 16:50:56,537][262582] Updated weights for policy 0, policy_version 8912 (0.0009) +[2026-06-02 16:50:56,736][262582] Updated weights for policy 0, policy_version 8922 (0.0008) +[2026-06-02 16:50:56,938][262582] Updated weights for policy 0, policy_version 8932 (0.0008) +[2026-06-02 16:50:57,144][262582] Updated weights for policy 0, policy_version 8942 (0.0008) +[2026-06-02 16:50:57,341][262582] Updated weights for policy 0, policy_version 8952 (0.0008) +[2026-06-02 16:50:58,040][262582] Updated weights for policy 0, policy_version 8962 (0.0008) +[2026-06-02 16:50:58,245][262582] Updated weights for policy 0, policy_version 8973 (0.0009) +[2026-06-02 16:50:58,437][262582] Updated weights for policy 0, policy_version 8983 (0.0008) +[2026-06-02 16:50:58,636][262582] Updated weights for policy 0, policy_version 8993 (0.0008) +[2026-06-02 16:50:58,837][262582] Updated weights for policy 0, policy_version 9003 (0.0008) +[2026-06-02 16:50:59,042][262582] Updated weights for policy 0, policy_version 9013 (0.0008) +[2026-06-02 16:50:59,246][262582] Updated weights for policy 0, policy_version 9023 (0.0008) +[2026-06-02 16:50:59,914][262582] Updated weights for policy 0, policy_version 9033 (0.0008) +[2026-06-02 16:51:00,137][262582] Updated weights for policy 0, policy_version 9044 (0.0008) +[2026-06-02 16:51:00,352][262582] Updated weights for policy 0, policy_version 9055 (0.0008) +[2026-06-02 16:51:00,562][262582] Updated weights for policy 0, policy_version 9065 (0.0008) +[2026-06-02 16:51:00,766][262582] Updated weights for policy 0, policy_version 9075 (0.0009) +[2026-06-02 16:51:00,983][262582] Updated weights for policy 0, policy_version 9086 (0.0008) +[2026-06-02 16:51:01,007][260776] Fps is (10 sec: 16384.0, 60 sec: 18022.4, 300 sec: 18118.8). Total num frames: 4620288. Throughput: 0: 18284.1. Samples: 4666496. Policy #0 lag: (min: 21.0, avg: 38.7, max: 85.0) +[2026-06-02 16:51:01,008][260776] Avg episode reward: [(0, '215.265')] +[2026-06-02 16:51:01,014][262026] Saving new best policy, reward=215.265! +[2026-06-02 16:51:01,668][262582] Updated weights for policy 0, policy_version 9096 (0.0008) +[2026-06-02 16:51:01,857][262582] Updated weights for policy 0, policy_version 9106 (0.0008) +[2026-06-02 16:51:02,070][262582] Updated weights for policy 0, policy_version 9116 (0.0008) +[2026-06-02 16:51:02,278][262582] Updated weights for policy 0, policy_version 9126 (0.0008) +[2026-06-02 16:51:02,490][262582] Updated weights for policy 0, policy_version 9137 (0.0008) +[2026-06-02 16:51:02,690][262582] Updated weights for policy 0, policy_version 9147 (0.0008) +[2026-06-02 16:51:03,394][262582] Updated weights for policy 0, policy_version 9157 (0.0008) +[2026-06-02 16:51:03,578][262582] Updated weights for policy 0, policy_version 9167 (0.0008) +[2026-06-02 16:51:03,783][262582] Updated weights for policy 0, policy_version 9177 (0.0008) +[2026-06-02 16:51:03,988][262582] Updated weights for policy 0, policy_version 9187 (0.0008) +[2026-06-02 16:51:04,192][262582] Updated weights for policy 0, policy_version 9197 (0.0008) +[2026-06-02 16:51:04,398][262582] Updated weights for policy 0, policy_version 9207 (0.0009) +[2026-06-02 16:51:05,093][262582] Updated weights for policy 0, policy_version 9217 (0.0008) +[2026-06-02 16:51:05,279][262582] Updated weights for policy 0, policy_version 9227 (0.0009) +[2026-06-02 16:51:05,483][262582] Updated weights for policy 0, policy_version 9237 (0.0008) +[2026-06-02 16:51:05,683][262582] Updated weights for policy 0, policy_version 9247 (0.0008) +[2026-06-02 16:51:05,898][262582] Updated weights for policy 0, policy_version 9258 (0.0008) +[2026-06-02 16:51:06,007][260776] Fps is (10 sec: 16384.0, 60 sec: 18568.5, 300 sec: 18148.4). Total num frames: 4718592. Throughput: 0: 18511.7. Samples: 4725504. Policy #0 lag: (min: 63.0, avg: 79.8, max: 127.0) +[2026-06-02 16:51:06,008][260776] Avg episode reward: [(0, '233.489')] +[2026-06-02 16:51:06,105][262582] Updated weights for policy 0, policy_version 9268 (0.0008) +[2026-06-02 16:51:06,312][262582] Updated weights for policy 0, policy_version 9278 (0.0008) +[2026-06-02 16:51:06,338][262026] Saving new best policy, reward=233.489! +[2026-06-02 16:51:07,012][262582] Updated weights for policy 0, policy_version 9288 (0.0008) +[2026-06-02 16:51:07,197][262582] Updated weights for policy 0, policy_version 9298 (0.0008) +[2026-06-02 16:51:07,416][262582] Updated weights for policy 0, policy_version 9308 (0.0009) +[2026-06-02 16:51:07,612][262582] Updated weights for policy 0, policy_version 9318 (0.0008) +[2026-06-02 16:51:07,818][262582] Updated weights for policy 0, policy_version 9328 (0.0008) +[2026-06-02 16:51:08,017][262582] Updated weights for policy 0, policy_version 9338 (0.0009) +[2026-06-02 16:51:08,698][262582] Updated weights for policy 0, policy_version 9348 (0.0009) +[2026-06-02 16:51:08,898][262582] Updated weights for policy 0, policy_version 9359 (0.0008) +[2026-06-02 16:51:09,103][262582] Updated weights for policy 0, policy_version 9369 (0.0008) +[2026-06-02 16:51:09,307][262582] Updated weights for policy 0, policy_version 9379 (0.0008) +[2026-06-02 16:51:09,511][262582] Updated weights for policy 0, policy_version 9389 (0.0009) +[2026-06-02 16:51:09,712][262582] Updated weights for policy 0, policy_version 9399 (0.0009) +[2026-06-02 16:51:10,416][262582] Updated weights for policy 0, policy_version 9409 (0.0008) +[2026-06-02 16:51:10,599][262582] Updated weights for policy 0, policy_version 9419 (0.0008) +[2026-06-02 16:51:10,789][262582] Updated weights for policy 0, policy_version 9429 (0.0008) +[2026-06-02 16:51:11,003][262582] Updated weights for policy 0, policy_version 9439 (0.0009) +[2026-06-02 16:51:11,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18177.0). Total num frames: 4816896. Throughput: 0: 18224.4. Samples: 4830720. Policy #0 lag: (min: 63.0, avg: 79.8, max: 127.0) +[2026-06-02 16:51:11,007][260776] Avg episode reward: [(0, '258.135')] +[2026-06-02 16:51:11,198][262582] Updated weights for policy 0, policy_version 9449 (0.0008) +[2026-06-02 16:51:11,399][262582] Updated weights for policy 0, policy_version 9459 (0.0008) +[2026-06-02 16:51:11,596][262582] Updated weights for policy 0, policy_version 9469 (0.0008) +[2026-06-02 16:51:11,660][262026] Saving new best policy, reward=258.135! +[2026-06-02 16:51:12,319][262582] Updated weights for policy 0, policy_version 9479 (0.0008) +[2026-06-02 16:51:12,507][262582] Updated weights for policy 0, policy_version 9489 (0.0008) +[2026-06-02 16:51:12,731][262582] Updated weights for policy 0, policy_version 9500 (0.0008) +[2026-06-02 16:51:12,935][262582] Updated weights for policy 0, policy_version 9510 (0.0008) +[2026-06-02 16:51:13,134][262582] Updated weights for policy 0, policy_version 9520 (0.0008) +[2026-06-02 16:51:13,335][262582] Updated weights for policy 0, policy_version 9530 (0.0008) +[2026-06-02 16:51:14,022][262582] Updated weights for policy 0, policy_version 9540 (0.0009) +[2026-06-02 16:51:14,210][262582] Updated weights for policy 0, policy_version 9550 (0.0008) +[2026-06-02 16:51:14,410][262582] Updated weights for policy 0, policy_version 9560 (0.0008) +[2026-06-02 16:51:14,622][262582] Updated weights for policy 0, policy_version 9571 (0.0009) +[2026-06-02 16:51:14,833][262582] Updated weights for policy 0, policy_version 9581 (0.0008) +[2026-06-02 16:51:15,030][262582] Updated weights for policy 0, policy_version 9591 (0.0009) +[2026-06-02 16:51:15,747][262582] Updated weights for policy 0, policy_version 9601 (0.0008) +[2026-06-02 16:51:15,933][262582] Updated weights for policy 0, policy_version 9611 (0.0008) +[2026-06-02 16:51:16,007][260776] Fps is (10 sec: 19660.9, 60 sec: 18568.6, 300 sec: 18204.4). Total num frames: 4915200. Throughput: 0: 18218.7. Samples: 4935936. Policy #0 lag: (min: 63.0, avg: 80.1, max: 127.0) +[2026-06-02 16:51:16,007][260776] Avg episode reward: [(0, '252.669')] +[2026-06-02 16:51:16,129][262582] Updated weights for policy 0, policy_version 9621 (0.0008) +[2026-06-02 16:51:16,331][262582] Updated weights for policy 0, policy_version 9631 (0.0008) +[2026-06-02 16:51:16,537][262582] Updated weights for policy 0, policy_version 9641 (0.0008) +[2026-06-02 16:51:16,738][262582] Updated weights for policy 0, policy_version 9651 (0.0008) +[2026-06-02 16:51:16,937][262582] Updated weights for policy 0, policy_version 9661 (0.0009) +[2026-06-02 16:51:17,610][262582] Updated weights for policy 0, policy_version 9671 (0.0008) +[2026-06-02 16:51:17,797][262582] Updated weights for policy 0, policy_version 9681 (0.0008) +[2026-06-02 16:51:17,999][262582] Updated weights for policy 0, policy_version 9691 (0.0008) +[2026-06-02 16:51:18,202][262582] Updated weights for policy 0, policy_version 9701 (0.0008) +[2026-06-02 16:51:18,406][262582] Updated weights for policy 0, policy_version 9711 (0.0009) +[2026-06-02 16:51:18,607][262582] Updated weights for policy 0, policy_version 9721 (0.0008) +[2026-06-02 16:51:19,313][262582] Updated weights for policy 0, policy_version 9731 (0.0008) +[2026-06-02 16:51:19,509][262582] Updated weights for policy 0, policy_version 9741 (0.0008) +[2026-06-02 16:51:19,710][262582] Updated weights for policy 0, policy_version 9751 (0.0008) +[2026-06-02 16:51:19,910][262582] Updated weights for policy 0, policy_version 9761 (0.0008) +[2026-06-02 16:51:20,106][262582] Updated weights for policy 0, policy_version 9771 (0.0008) +[2026-06-02 16:51:20,320][262582] Updated weights for policy 0, policy_version 9781 (0.0008) +[2026-06-02 16:51:20,520][262582] Updated weights for policy 0, policy_version 9791 (0.0008) +[2026-06-02 16:51:21,007][260776] Fps is (10 sec: 19660.7, 60 sec: 18568.5, 300 sec: 18230.9). Total num frames: 5013504. Throughput: 0: 18213.0. Samples: 4994304. Policy #0 lag: (min: 63.0, avg: 80.1, max: 127.0) +[2026-06-02 16:51:21,008][260776] Avg episode reward: [(0, '265.033')] +[2026-06-02 16:51:21,203][262582] Updated weights for policy 0, policy_version 9801 (0.0008) +[2026-06-02 16:51:21,407][262582] Updated weights for policy 0, policy_version 9811 (0.0008) +[2026-06-02 16:51:21,606][262582] Updated weights for policy 0, policy_version 9821 (0.0008) +[2026-06-02 16:51:21,832][262582] Updated weights for policy 0, policy_version 9832 (0.0008) +[2026-06-02 16:51:22,033][262582] Updated weights for policy 0, policy_version 9842 (0.0008) +[2026-06-02 16:51:22,235][262582] Updated weights for policy 0, policy_version 9852 (0.0008) +[2026-06-02 16:51:22,304][262026] Saving new best policy, reward=265.033! +[2026-06-02 16:51:22,946][262582] Updated weights for policy 0, policy_version 9862 (0.0009) +[2026-06-02 16:51:23,137][262582] Updated weights for policy 0, policy_version 9872 (0.0008) +[2026-06-02 16:51:23,354][262582] Updated weights for policy 0, policy_version 9883 (0.0008) +[2026-06-02 16:51:23,557][262582] Updated weights for policy 0, policy_version 9893 (0.0008) +[2026-06-02 16:51:23,748][262582] Updated weights for policy 0, policy_version 9903 (0.0009) +[2026-06-02 16:51:23,962][262582] Updated weights for policy 0, policy_version 9913 (0.0008) +[2026-06-02 16:51:24,642][262582] Updated weights for policy 0, policy_version 9923 (0.0008) +[2026-06-02 16:51:24,834][262582] Updated weights for policy 0, policy_version 9933 (0.0008) +[2026-06-02 16:51:25,031][262582] Updated weights for policy 0, policy_version 9943 (0.0008) +[2026-06-02 16:51:25,242][262582] Updated weights for policy 0, policy_version 9954 (0.0008) +[2026-06-02 16:51:25,453][262582] Updated weights for policy 0, policy_version 9964 (0.0008) +[2026-06-02 16:51:25,664][262582] Updated weights for policy 0, policy_version 9974 (0.0008) +[2026-06-02 16:51:25,860][262582] Updated weights for policy 0, policy_version 9984 (0.0008) +[2026-06-02 16:51:26,007][260776] Fps is (10 sec: 19660.9, 60 sec: 18568.6, 300 sec: 18256.5). Total num frames: 5111808. Throughput: 0: 18349.5. Samples: 5105024. Policy #0 lag: (min: 25.0, avg: 57.5, max: 89.0) +[2026-06-02 16:51:26,007][260776] Avg episode reward: [(0, '281.283')] +[2026-06-02 16:51:26,011][262026] Saving new best policy, reward=281.283! +[2026-06-02 16:51:26,544][262582] Updated weights for policy 0, policy_version 9994 (0.0008) +[2026-06-02 16:51:26,732][262582] Updated weights for policy 0, policy_version 10004 (0.0008) +[2026-06-02 16:51:26,941][262582] Updated weights for policy 0, policy_version 10014 (0.0008) +[2026-06-02 16:51:27,166][262582] Updated weights for policy 0, policy_version 10025 (0.0008) +[2026-06-02 16:51:27,379][262582] Updated weights for policy 0, policy_version 10036 (0.0008) +[2026-06-02 16:51:27,578][262582] Updated weights for policy 0, policy_version 10046 (0.0008) +[2026-06-02 16:51:28,266][262582] Updated weights for policy 0, policy_version 10056 (0.0008) +[2026-06-02 16:51:28,463][262582] Updated weights for policy 0, policy_version 10066 (0.0008) +[2026-06-02 16:51:28,656][262582] Updated weights for policy 0, policy_version 10076 (0.0008) +[2026-06-02 16:51:28,868][262582] Updated weights for policy 0, policy_version 10086 (0.0008) +[2026-06-02 16:51:29,076][262582] Updated weights for policy 0, policy_version 10096 (0.0009) +[2026-06-02 16:51:29,281][262582] Updated weights for policy 0, policy_version 10106 (0.0008) +[2026-06-02 16:51:29,969][262582] Updated weights for policy 0, policy_version 10116 (0.0008) +[2026-06-02 16:51:30,150][262582] Updated weights for policy 0, policy_version 10126 (0.0008) +[2026-06-02 16:51:30,347][262582] Updated weights for policy 0, policy_version 10136 (0.0008) +[2026-06-02 16:51:30,548][262582] Updated weights for policy 0, policy_version 10146 (0.0008) +[2026-06-02 16:51:30,749][262582] Updated weights for policy 0, policy_version 10156 (0.0008) +[2026-06-02 16:51:30,945][262582] Updated weights for policy 0, policy_version 10166 (0.0008) +[2026-06-02 16:51:31,007][260776] Fps is (10 sec: 16384.0, 60 sec: 18022.4, 300 sec: 18166.1). Total num frames: 5177344. Throughput: 0: 18403.6. Samples: 5217536. Policy #0 lag: (min: 25.0, avg: 57.5, max: 89.0) +[2026-06-02 16:51:31,008][260776] Avg episode reward: [(0, '274.991')] +[2026-06-02 16:51:31,146][262582] Updated weights for policy 0, policy_version 10176 (0.0008) +[2026-06-02 16:51:31,822][262582] Updated weights for policy 0, policy_version 10186 (0.0009) +[2026-06-02 16:51:32,028][262582] Updated weights for policy 0, policy_version 10196 (0.0009) +[2026-06-02 16:51:32,220][262582] Updated weights for policy 0, policy_version 10206 (0.0008) +[2026-06-02 16:51:32,446][262582] Updated weights for policy 0, policy_version 10217 (0.0009) +[2026-06-02 16:51:32,643][262582] Updated weights for policy 0, policy_version 10227 (0.0008) +[2026-06-02 16:51:32,853][262582] Updated weights for policy 0, policy_version 10237 (0.0008) +[2026-06-02 16:51:33,546][262582] Updated weights for policy 0, policy_version 10247 (0.0009) +[2026-06-02 16:51:33,741][262582] Updated weights for policy 0, policy_version 10257 (0.0008) +[2026-06-02 16:51:33,965][262582] Updated weights for policy 0, policy_version 10268 (0.0009) +[2026-06-02 16:51:34,162][262582] Updated weights for policy 0, policy_version 10278 (0.0008) +[2026-06-02 16:51:34,366][262582] Updated weights for policy 0, policy_version 10288 (0.0009) +[2026-06-02 16:51:34,568][262582] Updated weights for policy 0, policy_version 10298 (0.0009) +[2026-06-02 16:51:35,284][262582] Updated weights for policy 0, policy_version 10308 (0.0009) +[2026-06-02 16:51:35,490][262582] Updated weights for policy 0, policy_version 10319 (0.0009) +[2026-06-02 16:51:35,689][262582] Updated weights for policy 0, policy_version 10329 (0.0008) +[2026-06-02 16:51:35,913][262582] Updated weights for policy 0, policy_version 10340 (0.0009) +[2026-06-02 16:51:36,007][260776] Fps is (10 sec: 16383.7, 60 sec: 18568.5, 300 sec: 18191.9). Total num frames: 5275648. Throughput: 0: 18480.3. Samples: 5275008. Policy #0 lag: (min: 58.0, avg: 86.0, max: 122.0) +[2026-06-02 16:51:36,008][260776] Avg episode reward: [(0, '293.762')] +[2026-06-02 16:51:36,133][262582] Updated weights for policy 0, policy_version 10351 (0.0008) +[2026-06-02 16:51:36,370][262582] Updated weights for policy 0, policy_version 10363 (0.0008) +[2026-06-02 16:51:36,461][262026] Saving new best policy, reward=293.762! +[2026-06-02 16:51:37,072][262582] Updated weights for policy 0, policy_version 10373 (0.0008) +[2026-06-02 16:51:37,274][262582] Updated weights for policy 0, policy_version 10383 (0.0008) +[2026-06-02 16:51:37,470][262582] Updated weights for policy 0, policy_version 10393 (0.0008) +[2026-06-02 16:51:37,685][262582] Updated weights for policy 0, policy_version 10404 (0.0008) +[2026-06-02 16:51:37,915][262582] Updated weights for policy 0, policy_version 10415 (0.0008) +[2026-06-02 16:51:38,116][262582] Updated weights for policy 0, policy_version 10425 (0.0008) +[2026-06-02 16:51:38,825][262582] Updated weights for policy 0, policy_version 10435 (0.0009) +[2026-06-02 16:51:39,034][262582] Updated weights for policy 0, policy_version 10446 (0.0008) +[2026-06-02 16:51:39,274][262582] Updated weights for policy 0, policy_version 10458 (0.0008) +[2026-06-02 16:51:39,474][262582] Updated weights for policy 0, policy_version 10468 (0.0008) +[2026-06-02 16:51:39,674][262582] Updated weights for policy 0, policy_version 10478 (0.0008) +[2026-06-02 16:51:39,874][262582] Updated weights for policy 0, policy_version 10488 (0.0008) +[2026-06-02 16:51:40,573][262582] Updated weights for policy 0, policy_version 10498 (0.0008) +[2026-06-02 16:51:40,758][262582] Updated weights for policy 0, policy_version 10508 (0.0009) +[2026-06-02 16:51:40,977][262582] Updated weights for policy 0, policy_version 10519 (0.0008) +[2026-06-02 16:51:41,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18216.8). Total num frames: 5373952. Throughput: 0: 18289.8. Samples: 5382016. Policy #0 lag: (min: 58.0, avg: 86.0, max: 122.0) +[2026-06-02 16:51:41,008][260776] Avg episode reward: [(0, '316.769')] +[2026-06-02 16:51:41,181][262582] Updated weights for policy 0, policy_version 10529 (0.0008) +[2026-06-02 16:51:41,399][262582] Updated weights for policy 0, policy_version 10540 (0.0008) +[2026-06-02 16:51:41,617][262582] Updated weights for policy 0, policy_version 10550 (0.0009) +[2026-06-02 16:51:41,808][262026] Saving new best policy, reward=316.769! +[2026-06-02 16:51:41,810][262582] Updated weights for policy 0, policy_version 10560 (0.0009) +[2026-06-02 16:51:42,533][262582] Updated weights for policy 0, policy_version 10570 (0.0008) +[2026-06-02 16:51:42,756][262582] Updated weights for policy 0, policy_version 10581 (0.0008) +[2026-06-02 16:51:42,956][262582] Updated weights for policy 0, policy_version 10591 (0.0008) +[2026-06-02 16:51:43,193][262582] Updated weights for policy 0, policy_version 10603 (0.0008) +[2026-06-02 16:51:43,391][262582] Updated weights for policy 0, policy_version 10613 (0.0009) +[2026-06-02 16:51:44,121][262582] Updated weights for policy 0, policy_version 10625 (0.0009) +[2026-06-02 16:51:44,331][262582] Updated weights for policy 0, policy_version 10636 (0.0008) +[2026-06-02 16:51:44,527][262582] Updated weights for policy 0, policy_version 10646 (0.0008) +[2026-06-02 16:51:44,731][262582] Updated weights for policy 0, policy_version 10656 (0.0008) +[2026-06-02 16:51:44,933][262582] Updated weights for policy 0, policy_version 10666 (0.0008) +[2026-06-02 16:51:45,137][262582] Updated weights for policy 0, policy_version 10676 (0.0008) +[2026-06-02 16:51:45,333][262582] Updated weights for policy 0, policy_version 10686 (0.0008) +[2026-06-02 16:51:46,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 5472256. Throughput: 0: 18244.2. Samples: 5487488. Policy #0 lag: (min: 63.0, avg: 80.0, max: 127.0) +[2026-06-02 16:51:46,007][260776] Avg episode reward: [(0, '321.490')] +[2026-06-02 16:51:46,019][262582] Updated weights for policy 0, policy_version 10696 (0.0008) +[2026-06-02 16:51:46,221][262582] Updated weights for policy 0, policy_version 10706 (0.0008) +[2026-06-02 16:51:46,416][262582] Updated weights for policy 0, policy_version 10716 (0.0008) +[2026-06-02 16:51:46,615][262582] Updated weights for policy 0, policy_version 10726 (0.0008) +[2026-06-02 16:51:46,817][262582] Updated weights for policy 0, policy_version 10736 (0.0009) +[2026-06-02 16:51:47,047][262582] Updated weights for policy 0, policy_version 10747 (0.0009) +[2026-06-02 16:51:47,138][262026] Saving new best policy, reward=321.490! +[2026-06-02 16:51:47,732][262582] Updated weights for policy 0, policy_version 10757 (0.0008) +[2026-06-02 16:51:47,941][262582] Updated weights for policy 0, policy_version 10768 (0.0008) +[2026-06-02 16:51:48,145][262582] Updated weights for policy 0, policy_version 10778 (0.0008) +[2026-06-02 16:51:48,349][262582] Updated weights for policy 0, policy_version 10788 (0.0008) +[2026-06-02 16:51:48,562][262582] Updated weights for policy 0, policy_version 10799 (0.0008) +[2026-06-02 16:51:48,768][262582] Updated weights for policy 0, policy_version 10809 (0.0008) +[2026-06-02 16:51:49,484][262582] Updated weights for policy 0, policy_version 10819 (0.0008) +[2026-06-02 16:51:49,694][262582] Updated weights for policy 0, policy_version 10830 (0.0008) +[2026-06-02 16:51:49,891][262582] Updated weights for policy 0, policy_version 10840 (0.0008) +[2026-06-02 16:51:50,093][262582] Updated weights for policy 0, policy_version 10850 (0.0008) +[2026-06-02 16:51:50,296][262582] Updated weights for policy 0, policy_version 10860 (0.0008) +[2026-06-02 16:51:50,489][262582] Updated weights for policy 0, policy_version 10870 (0.0008) +[2026-06-02 16:51:50,693][262582] Updated weights for policy 0, policy_version 10880 (0.0008) +[2026-06-02 16:51:51,007][260776] Fps is (10 sec: 19661.0, 60 sec: 18568.6, 300 sec: 18439.0). Total num frames: 5570560. Throughput: 0: 18230.1. Samples: 5545856. Policy #0 lag: (min: 63.0, avg: 80.0, max: 127.0) +[2026-06-02 16:51:51,007][260776] Avg episode reward: [(0, '336.574')] +[2026-06-02 16:51:51,011][262026] Saving new best policy, reward=336.574! +[2026-06-02 16:51:51,405][262582] Updated weights for policy 0, policy_version 10890 (0.0005) +[2026-06-02 16:51:51,609][262582] Updated weights for policy 0, policy_version 10900 (0.0008) +[2026-06-02 16:51:51,811][262582] Updated weights for policy 0, policy_version 10910 (0.0008) +[2026-06-02 16:51:52,010][262582] Updated weights for policy 0, policy_version 10920 (0.0008) +[2026-06-02 16:51:52,228][262582] Updated weights for policy 0, policy_version 10931 (0.0008) +[2026-06-02 16:51:52,432][262582] Updated weights for policy 0, policy_version 10941 (0.0008) +[2026-06-02 16:51:53,120][262582] Updated weights for policy 0, policy_version 10951 (0.0008) +[2026-06-02 16:51:53,318][262582] Updated weights for policy 0, policy_version 10961 (0.0008) +[2026-06-02 16:51:53,517][262582] Updated weights for policy 0, policy_version 10971 (0.0008) +[2026-06-02 16:51:53,719][262582] Updated weights for policy 0, policy_version 10981 (0.0008) +[2026-06-02 16:51:53,927][262582] Updated weights for policy 0, policy_version 10992 (0.0009) +[2026-06-02 16:51:54,136][262582] Updated weights for policy 0, policy_version 11002 (0.0008) +[2026-06-02 16:51:54,865][262582] Updated weights for policy 0, policy_version 11012 (0.0006) +[2026-06-02 16:51:55,058][262582] Updated weights for policy 0, policy_version 11022 (0.0004) +[2026-06-02 16:51:55,258][262582] Updated weights for policy 0, policy_version 11032 (0.0008) +[2026-06-02 16:51:55,457][262582] Updated weights for policy 0, policy_version 11042 (0.0008) +[2026-06-02 16:51:55,663][262582] Updated weights for policy 0, policy_version 11052 (0.0008) +[2026-06-02 16:51:55,887][262582] Updated weights for policy 0, policy_version 11063 (0.0008) +[2026-06-02 16:51:56,007][260776] Fps is (10 sec: 16384.1, 60 sec: 18022.4, 300 sec: 18327.9). Total num frames: 5636096. Throughput: 0: 18238.6. Samples: 5651456. Policy #0 lag: (min: 62.0, avg: 77.9, max: 126.0) +[2026-06-02 16:51:56,007][260776] Avg episode reward: [(0, '352.626')] +[2026-06-02 16:51:56,059][262026] Saving new best policy, reward=352.626! +[2026-06-02 16:51:56,610][262582] Updated weights for policy 0, policy_version 11073 (0.0008) +[2026-06-02 16:51:56,793][262582] Updated weights for policy 0, policy_version 11083 (0.0008) +[2026-06-02 16:51:56,984][262582] Updated weights for policy 0, policy_version 11093 (0.0008) +[2026-06-02 16:51:57,213][262582] Updated weights for policy 0, policy_version 11104 (0.0008) +[2026-06-02 16:51:57,433][262582] Updated weights for policy 0, policy_version 11115 (0.0008) +[2026-06-02 16:51:57,634][262582] Updated weights for policy 0, policy_version 11125 (0.0008) +[2026-06-02 16:51:57,836][262582] Updated weights for policy 0, policy_version 11135 (0.0008) +[2026-06-02 16:51:58,540][262582] Updated weights for policy 0, policy_version 11146 (0.0008) +[2026-06-02 16:51:58,743][262582] Updated weights for policy 0, policy_version 11156 (0.0008) +[2026-06-02 16:51:58,968][262582] Updated weights for policy 0, policy_version 11167 (0.0008) +[2026-06-02 16:51:59,160][262582] Updated weights for policy 0, policy_version 11177 (0.0008) +[2026-06-02 16:51:59,362][262582] Updated weights for policy 0, policy_version 11187 (0.0009) +[2026-06-02 16:51:59,569][262582] Updated weights for policy 0, policy_version 11197 (0.0009) +[2026-06-02 16:52:00,298][262582] Updated weights for policy 0, policy_version 11208 (0.0009) +[2026-06-02 16:52:00,491][262582] Updated weights for policy 0, policy_version 11218 (0.0008) +[2026-06-02 16:52:00,687][262582] Updated weights for policy 0, policy_version 11228 (0.0009) +[2026-06-02 16:52:00,908][262582] Updated weights for policy 0, policy_version 11239 (0.0009) +[2026-06-02 16:52:01,007][260776] Fps is (10 sec: 16383.9, 60 sec: 18568.5, 300 sec: 18216.8). Total num frames: 5734400. Throughput: 0: 18560.0. Samples: 5771136. Policy #0 lag: (min: 62.0, avg: 77.9, max: 126.0) +[2026-06-02 16:52:01,007][260776] Avg episode reward: [(0, '357.501')] +[2026-06-02 16:52:01,119][262582] Updated weights for policy 0, policy_version 11249 (0.0008) +[2026-06-02 16:52:01,321][262582] Updated weights for policy 0, policy_version 11259 (0.0008) +[2026-06-02 16:52:01,410][262026] Saving new best policy, reward=357.501! +[2026-06-02 16:52:02,015][262582] Updated weights for policy 0, policy_version 11269 (0.0008) +[2026-06-02 16:52:02,215][262582] Updated weights for policy 0, policy_version 11280 (0.0008) +[2026-06-02 16:52:02,417][262582] Updated weights for policy 0, policy_version 11290 (0.0008) +[2026-06-02 16:52:02,622][262582] Updated weights for policy 0, policy_version 11300 (0.0008) +[2026-06-02 16:52:02,869][262582] Updated weights for policy 0, policy_version 11312 (0.0008) +[2026-06-02 16:52:03,073][262582] Updated weights for policy 0, policy_version 11322 (0.0009) +[2026-06-02 16:52:03,775][262582] Updated weights for policy 0, policy_version 11332 (0.0009) +[2026-06-02 16:52:03,961][262582] Updated weights for policy 0, policy_version 11342 (0.0008) +[2026-06-02 16:52:04,169][262582] Updated weights for policy 0, policy_version 11352 (0.0008) +[2026-06-02 16:52:04,385][262582] Updated weights for policy 0, policy_version 11363 (0.0008) +[2026-06-02 16:52:04,583][262582] Updated weights for policy 0, policy_version 11373 (0.0008) +[2026-06-02 16:52:04,793][262582] Updated weights for policy 0, policy_version 11383 (0.0008) +[2026-06-02 16:52:05,502][262582] Updated weights for policy 0, policy_version 11393 (0.0008) +[2026-06-02 16:52:05,691][262582] Updated weights for policy 0, policy_version 11403 (0.0008) +[2026-06-02 16:52:05,886][262582] Updated weights for policy 0, policy_version 11413 (0.0009) +[2026-06-02 16:52:06,007][260776] Fps is (10 sec: 19660.7, 60 sec: 18568.5, 300 sec: 18327.9). Total num frames: 5832704. Throughput: 0: 18318.2. Samples: 5818624. Policy #0 lag: (min: 54.0, avg: 70.7, max: 118.0) +[2026-06-02 16:52:06,008][260776] Avg episode reward: [(0, '372.449')] +[2026-06-02 16:52:06,083][262582] Updated weights for policy 0, policy_version 11423 (0.0009) +[2026-06-02 16:52:06,296][262582] Updated weights for policy 0, policy_version 11433 (0.0008) +[2026-06-02 16:52:06,494][262582] Updated weights for policy 0, policy_version 11443 (0.0008) +[2026-06-02 16:52:06,698][262582] Updated weights for policy 0, policy_version 11453 (0.0008) +[2026-06-02 16:52:06,749][262026] Saving new best policy, reward=372.449! +[2026-06-02 16:52:07,380][262582] Updated weights for policy 0, policy_version 11463 (0.0009) +[2026-06-02 16:52:07,601][262582] Updated weights for policy 0, policy_version 11474 (0.0009) +[2026-06-02 16:52:07,801][262582] Updated weights for policy 0, policy_version 11484 (0.0008) +[2026-06-02 16:52:08,003][262582] Updated weights for policy 0, policy_version 11494 (0.0009) +[2026-06-02 16:52:08,206][262582] Updated weights for policy 0, policy_version 11504 (0.0010) +[2026-06-02 16:52:08,416][262582] Updated weights for policy 0, policy_version 11514 (0.0009) +[2026-06-02 16:52:09,116][262582] Updated weights for policy 0, policy_version 11524 (0.0009) +[2026-06-02 16:52:09,343][262582] Updated weights for policy 0, policy_version 11536 (0.0008) +[2026-06-02 16:52:09,538][262582] Updated weights for policy 0, policy_version 11546 (0.0008) +[2026-06-02 16:52:09,750][262582] Updated weights for policy 0, policy_version 11556 (0.0009) +[2026-06-02 16:52:09,941][262582] Updated weights for policy 0, policy_version 11566 (0.0008) +[2026-06-02 16:52:10,151][262582] Updated weights for policy 0, policy_version 11576 (0.0008) +[2026-06-02 16:52:10,844][262582] Updated weights for policy 0, policy_version 11586 (0.0008) +[2026-06-02 16:52:11,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18327.9). Total num frames: 5931008. Throughput: 0: 18474.6. Samples: 5936384. Policy #0 lag: (min: 54.0, avg: 70.7, max: 118.0) +[2026-06-02 16:52:11,008][260776] Avg episode reward: [(0, '391.715')] +[2026-06-02 16:52:11,046][262582] Updated weights for policy 0, policy_version 11596 (0.0008) +[2026-06-02 16:52:11,241][262582] Updated weights for policy 0, policy_version 11606 (0.0008) +[2026-06-02 16:52:11,437][262582] Updated weights for policy 0, policy_version 11616 (0.0009) +[2026-06-02 16:52:11,639][262582] Updated weights for policy 0, policy_version 11626 (0.0008) +[2026-06-02 16:52:11,840][262582] Updated weights for policy 0, policy_version 11636 (0.0009) +[2026-06-02 16:52:12,047][262582] Updated weights for policy 0, policy_version 11646 (0.0008) +[2026-06-02 16:52:12,077][262026] Saving new best policy, reward=391.715! +[2026-06-02 16:52:12,763][262582] Updated weights for policy 0, policy_version 11656 (0.0009) +[2026-06-02 16:52:12,978][262582] Updated weights for policy 0, policy_version 11667 (0.0008) +[2026-06-02 16:52:13,198][262582] Updated weights for policy 0, policy_version 11678 (0.0009) +[2026-06-02 16:52:13,395][262582] Updated weights for policy 0, policy_version 11688 (0.0008) +[2026-06-02 16:52:13,596][262582] Updated weights for policy 0, policy_version 11698 (0.0008) +[2026-06-02 16:52:13,812][262582] Updated weights for policy 0, policy_version 11708 (0.0009) +[2026-06-02 16:52:14,494][262582] Updated weights for policy 0, policy_version 11718 (0.0009) +[2026-06-02 16:52:14,689][262582] Updated weights for policy 0, policy_version 11728 (0.0008) +[2026-06-02 16:52:14,890][262582] Updated weights for policy 0, policy_version 11738 (0.0008) +[2026-06-02 16:52:15,084][262582] Updated weights for policy 0, policy_version 11748 (0.0008) +[2026-06-02 16:52:15,313][262582] Updated weights for policy 0, policy_version 11759 (0.0008) +[2026-06-02 16:52:15,507][262582] Updated weights for policy 0, policy_version 11769 (0.0008) +[2026-06-02 16:52:16,007][260776] Fps is (10 sec: 19660.9, 60 sec: 18568.5, 300 sec: 18327.9). Total num frames: 6029312. Throughput: 0: 18323.9. Samples: 6042112. Policy #0 lag: (min: 53.0, avg: 69.9, max: 117.0) +[2026-06-02 16:52:16,008][260776] Avg episode reward: [(0, '383.490')] +[2026-06-02 16:52:16,227][262582] Updated weights for policy 0, policy_version 11779 (0.0008) +[2026-06-02 16:52:16,437][262582] Updated weights for policy 0, policy_version 11790 (0.0008) +[2026-06-02 16:52:16,628][262582] Updated weights for policy 0, policy_version 11800 (0.0008) +[2026-06-02 16:52:16,831][262582] Updated weights for policy 0, policy_version 11810 (0.0008) +[2026-06-02 16:52:17,041][262582] Updated weights for policy 0, policy_version 11820 (0.0008) +[2026-06-02 16:52:17,239][262582] Updated weights for policy 0, policy_version 11830 (0.0008) +[2026-06-02 16:52:17,967][262582] Updated weights for policy 0, policy_version 11841 (0.0008) +[2026-06-02 16:52:18,158][262582] Updated weights for policy 0, policy_version 11851 (0.0008) +[2026-06-02 16:52:18,350][262582] Updated weights for policy 0, policy_version 11861 (0.0009) +[2026-06-02 16:52:18,560][262582] Updated weights for policy 0, policy_version 11871 (0.0008) +[2026-06-02 16:52:18,753][262582] Updated weights for policy 0, policy_version 11881 (0.0008) +[2026-06-02 16:52:18,952][262582] Updated weights for policy 0, policy_version 11891 (0.0008) +[2026-06-02 16:52:19,161][262582] Updated weights for policy 0, policy_version 11901 (0.0008) +[2026-06-02 16:52:19,848][262582] Updated weights for policy 0, policy_version 11911 (0.0008) +[2026-06-02 16:52:20,051][262582] Updated weights for policy 0, policy_version 11921 (0.0008) +[2026-06-02 16:52:20,248][262582] Updated weights for policy 0, policy_version 11931 (0.0008) +[2026-06-02 16:52:20,446][262582] Updated weights for policy 0, policy_version 11941 (0.0008) +[2026-06-02 16:52:20,653][262582] Updated weights for policy 0, policy_version 11951 (0.0008) +[2026-06-02 16:52:20,851][262582] Updated weights for policy 0, policy_version 11961 (0.0008) +[2026-06-02 16:52:21,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18327.9). Total num frames: 6127616. Throughput: 0: 18352.4. Samples: 6100864. Policy #0 lag: (min: 53.0, avg: 69.9, max: 117.0) +[2026-06-02 16:52:21,008][260776] Avg episode reward: [(0, '378.573')] +[2026-06-02 16:52:21,580][262582] Updated weights for policy 0, policy_version 11971 (0.0009) +[2026-06-02 16:52:21,768][262582] Updated weights for policy 0, policy_version 11981 (0.0008) +[2026-06-02 16:52:21,970][262582] Updated weights for policy 0, policy_version 11991 (0.0009) +[2026-06-02 16:52:22,194][262582] Updated weights for policy 0, policy_version 12002 (0.0009) +[2026-06-02 16:52:22,394][262582] Updated weights for policy 0, policy_version 12012 (0.0009) +[2026-06-02 16:52:22,588][262582] Updated weights for policy 0, policy_version 12022 (0.0008) +[2026-06-02 16:52:22,792][262582] Updated weights for policy 0, policy_version 12032 (0.0009) +[2026-06-02 16:52:23,464][262582] Updated weights for policy 0, policy_version 12042 (0.0009) +[2026-06-02 16:52:23,665][262582] Updated weights for policy 0, policy_version 12052 (0.0009) +[2026-06-02 16:52:23,865][262582] Updated weights for policy 0, policy_version 12062 (0.0008) +[2026-06-02 16:52:24,066][262582] Updated weights for policy 0, policy_version 12072 (0.0008) +[2026-06-02 16:52:24,267][262582] Updated weights for policy 0, policy_version 12082 (0.0008) +[2026-06-02 16:52:24,477][262582] Updated weights for policy 0, policy_version 12092 (0.0009) +[2026-06-02 16:52:25,180][262582] Updated weights for policy 0, policy_version 12102 (0.0009) +[2026-06-02 16:52:25,395][262582] Updated weights for policy 0, policy_version 12113 (0.0006) +[2026-06-02 16:52:25,594][262582] Updated weights for policy 0, policy_version 12123 (0.0007) +[2026-06-02 16:52:25,791][262582] Updated weights for policy 0, policy_version 12133 (0.0008) +[2026-06-02 16:52:26,000][262582] Updated weights for policy 0, policy_version 12143 (0.0009) +[2026-06-02 16:52:26,007][260776] Fps is (10 sec: 16383.9, 60 sec: 18022.4, 300 sec: 18327.9). Total num frames: 6193152. Throughput: 0: 18318.2. Samples: 6206336. Policy #0 lag: (min: 55.0, avg: 72.3, max: 119.0) +[2026-06-02 16:52:26,008][260776] Avg episode reward: [(0, '369.626')] +[2026-06-02 16:52:26,203][262582] Updated weights for policy 0, policy_version 12153 (0.0008) +[2026-06-02 16:52:26,936][262582] Updated weights for policy 0, policy_version 12164 (0.0009) +[2026-06-02 16:52:27,148][262582] Updated weights for policy 0, policy_version 12175 (0.0008) +[2026-06-02 16:52:27,351][262582] Updated weights for policy 0, policy_version 12185 (0.0008) +[2026-06-02 16:52:27,554][262582] Updated weights for policy 0, policy_version 12195 (0.0008) +[2026-06-02 16:52:27,747][262582] Updated weights for policy 0, policy_version 12205 (0.0008) +[2026-06-02 16:52:27,957][262582] Updated weights for policy 0, policy_version 12215 (0.0008) +[2026-06-02 16:52:28,661][262582] Updated weights for policy 0, policy_version 12225 (0.0008) +[2026-06-02 16:52:28,856][262582] Updated weights for policy 0, policy_version 12235 (0.0008) +[2026-06-02 16:52:29,049][262582] Updated weights for policy 0, policy_version 12245 (0.0008) +[2026-06-02 16:52:29,254][262582] Updated weights for policy 0, policy_version 12255 (0.0008) +[2026-06-02 16:52:29,454][262582] Updated weights for policy 0, policy_version 12265 (0.0008) +[2026-06-02 16:52:29,659][262582] Updated weights for policy 0, policy_version 12275 (0.0008) +[2026-06-02 16:52:29,860][262582] Updated weights for policy 0, policy_version 12285 (0.0007) +[2026-06-02 16:52:30,558][262582] Updated weights for policy 0, policy_version 12295 (0.0008) +[2026-06-02 16:52:30,756][262582] Updated weights for policy 0, policy_version 12305 (0.0008) +[2026-06-02 16:52:30,951][262582] Updated weights for policy 0, policy_version 12315 (0.0005) +[2026-06-02 16:52:31,007][260776] Fps is (10 sec: 16384.0, 60 sec: 18568.5, 300 sec: 18327.9). Total num frames: 6291456. Throughput: 0: 18591.3. Samples: 6324096. Policy #0 lag: (min: 55.0, avg: 72.3, max: 119.0) +[2026-06-02 16:52:31,007][260776] Avg episode reward: [(0, '363.544')] +[2026-06-02 16:52:31,160][262582] Updated weights for policy 0, policy_version 12325 (0.0005) +[2026-06-02 16:52:31,377][262582] Updated weights for policy 0, policy_version 12336 (0.0005) +[2026-06-02 16:52:31,582][262582] Updated weights for policy 0, policy_version 12346 (0.0005) +[2026-06-02 16:52:32,275][262582] Updated weights for policy 0, policy_version 12356 (0.0006) +[2026-06-02 16:52:32,461][262582] Updated weights for policy 0, policy_version 12366 (0.0008) +[2026-06-02 16:52:32,662][262582] Updated weights for policy 0, policy_version 12376 (0.0008) +[2026-06-02 16:52:32,892][262582] Updated weights for policy 0, policy_version 12387 (0.0008) +[2026-06-02 16:52:33,095][262582] Updated weights for policy 0, policy_version 12397 (0.0008) +[2026-06-02 16:52:33,300][262582] Updated weights for policy 0, policy_version 12407 (0.0008) +[2026-06-02 16:52:33,987][262582] Updated weights for policy 0, policy_version 12417 (0.0008) +[2026-06-02 16:52:34,178][262582] Updated weights for policy 0, policy_version 12427 (0.0008) +[2026-06-02 16:52:34,372][262582] Updated weights for policy 0, policy_version 12437 (0.0008) +[2026-06-02 16:52:34,577][262582] Updated weights for policy 0, policy_version 12447 (0.0008) +[2026-06-02 16:52:34,774][262582] Updated weights for policy 0, policy_version 12457 (0.0009) +[2026-06-02 16:52:34,990][262582] Updated weights for policy 0, policy_version 12467 (0.0008) +[2026-06-02 16:52:35,186][262582] Updated weights for policy 0, policy_version 12477 (0.0008) +[2026-06-02 16:52:35,879][262582] Updated weights for policy 0, policy_version 12487 (0.0008) +[2026-06-02 16:52:36,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18327.9). Total num frames: 6389760. Throughput: 0: 18375.1. Samples: 6372736. Policy #0 lag: (min: 63.0, avg: 80.2, max: 127.0) +[2026-06-02 16:52:36,008][260776] Avg episode reward: [(0, '358.310')] +[2026-06-02 16:52:36,083][262582] Updated weights for policy 0, policy_version 12497 (0.0008) +[2026-06-02 16:52:36,272][262582] Updated weights for policy 0, policy_version 12507 (0.0008) +[2026-06-02 16:52:36,486][262582] Updated weights for policy 0, policy_version 12517 (0.0008) +[2026-06-02 16:52:36,688][262582] Updated weights for policy 0, policy_version 12527 (0.0008) +[2026-06-02 16:52:36,884][262582] Updated weights for policy 0, policy_version 12537 (0.0008) +[2026-06-02 16:52:37,601][262582] Updated weights for policy 0, policy_version 12547 (0.0009) +[2026-06-02 16:52:37,787][262582] Updated weights for policy 0, policy_version 12557 (0.0008) +[2026-06-02 16:52:37,990][262582] Updated weights for policy 0, policy_version 12567 (0.0008) +[2026-06-02 16:52:38,187][262582] Updated weights for policy 0, policy_version 12577 (0.0008) +[2026-06-02 16:52:38,398][262582] Updated weights for policy 0, policy_version 12587 (0.0009) +[2026-06-02 16:52:38,602][262582] Updated weights for policy 0, policy_version 12597 (0.0008) +[2026-06-02 16:52:38,816][262582] Updated weights for policy 0, policy_version 12608 (0.0008) +[2026-06-02 16:52:39,522][262582] Updated weights for policy 0, policy_version 12618 (0.0009) +[2026-06-02 16:52:39,714][262582] Updated weights for policy 0, policy_version 12628 (0.0009) +[2026-06-02 16:52:39,918][262582] Updated weights for policy 0, policy_version 12638 (0.0008) +[2026-06-02 16:52:40,111][262582] Updated weights for policy 0, policy_version 12648 (0.0008) +[2026-06-02 16:52:40,314][262582] Updated weights for policy 0, policy_version 12658 (0.0008) +[2026-06-02 16:52:40,537][262582] Updated weights for policy 0, policy_version 12669 (0.0008) +[2026-06-02 16:52:41,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18327.9). Total num frames: 6488064. Throughput: 0: 18673.8. Samples: 6491776. Policy #0 lag: (min: 63.0, avg: 80.2, max: 127.0) +[2026-06-02 16:52:41,008][260776] Avg episode reward: [(0, '372.820')] +[2026-06-02 16:52:41,236][262582] Updated weights for policy 0, policy_version 12679 (0.0008) +[2026-06-02 16:52:41,436][262582] Updated weights for policy 0, policy_version 12689 (0.0008) +[2026-06-02 16:52:41,636][262582] Updated weights for policy 0, policy_version 12699 (0.0008) +[2026-06-02 16:52:41,860][262582] Updated weights for policy 0, policy_version 12710 (0.0008) +[2026-06-02 16:52:42,056][262582] Updated weights for policy 0, policy_version 12720 (0.0009) +[2026-06-02 16:52:42,266][262582] Updated weights for policy 0, policy_version 12730 (0.0008) +[2026-06-02 16:52:42,971][262582] Updated weights for policy 0, policy_version 12740 (0.0008) +[2026-06-02 16:52:43,176][262582] Updated weights for policy 0, policy_version 12751 (0.0008) +[2026-06-02 16:52:43,385][262582] Updated weights for policy 0, policy_version 12761 (0.0009) +[2026-06-02 16:52:43,582][262582] Updated weights for policy 0, policy_version 12771 (0.0008) +[2026-06-02 16:52:43,787][262582] Updated weights for policy 0, policy_version 12781 (0.0008) +[2026-06-02 16:52:43,990][262582] Updated weights for policy 0, policy_version 12791 (0.0008) +[2026-06-02 16:52:44,681][262582] Updated weights for policy 0, policy_version 12801 (0.0008) +[2026-06-02 16:52:44,861][262582] Updated weights for policy 0, policy_version 12811 (0.0008) +[2026-06-02 16:52:45,084][262582] Updated weights for policy 0, policy_version 12822 (0.0008) +[2026-06-02 16:52:45,286][262582] Updated weights for policy 0, policy_version 12832 (0.0008) +[2026-06-02 16:52:45,504][262582] Updated weights for policy 0, policy_version 12842 (0.0009) +[2026-06-02 16:52:45,694][262582] Updated weights for policy 0, policy_version 12852 (0.0008) +[2026-06-02 16:52:45,909][262582] Updated weights for policy 0, policy_version 12862 (0.0008) +[2026-06-02 16:52:46,007][260776] Fps is (10 sec: 19660.9, 60 sec: 18568.6, 300 sec: 18438.9). Total num frames: 6586368. Throughput: 0: 18358.0. Samples: 6597248. Policy #0 lag: (min: 63.0, avg: 80.2, max: 127.0) +[2026-06-02 16:52:46,008][260776] Avg episode reward: [(0, '367.245')] +[2026-06-02 16:52:46,592][262582] Updated weights for policy 0, policy_version 12872 (0.0008) +[2026-06-02 16:52:46,786][262582] Updated weights for policy 0, policy_version 12882 (0.0008) +[2026-06-02 16:52:46,991][262582] Updated weights for policy 0, policy_version 12892 (0.0008) +[2026-06-02 16:52:47,182][262582] Updated weights for policy 0, policy_version 12902 (0.0008) +[2026-06-02 16:52:47,440][262582] Updated weights for policy 0, policy_version 12915 (0.0009) +[2026-06-02 16:52:47,645][262582] Updated weights for policy 0, policy_version 12925 (0.0008) +[2026-06-02 16:52:48,373][262582] Updated weights for policy 0, policy_version 12936 (0.0009) +[2026-06-02 16:52:48,572][262582] Updated weights for policy 0, policy_version 12946 (0.0008) +[2026-06-02 16:52:48,776][262582] Updated weights for policy 0, policy_version 12956 (0.0008) +[2026-06-02 16:52:48,979][262582] Updated weights for policy 0, policy_version 12966 (0.0008) +[2026-06-02 16:52:49,183][262582] Updated weights for policy 0, policy_version 12976 (0.0008) +[2026-06-02 16:52:49,402][262582] Updated weights for policy 0, policy_version 12987 (0.0009) +[2026-06-02 16:52:50,104][262582] Updated weights for policy 0, policy_version 12997 (0.0008) +[2026-06-02 16:52:50,303][262582] Updated weights for policy 0, policy_version 13007 (0.0008) +[2026-06-02 16:52:50,499][262582] Updated weights for policy 0, policy_version 13017 (0.0008) +[2026-06-02 16:52:50,705][262582] Updated weights for policy 0, policy_version 13027 (0.0009) +[2026-06-02 16:52:50,905][262582] Updated weights for policy 0, policy_version 13037 (0.0008) +[2026-06-02 16:52:51,007][260776] Fps is (10 sec: 16384.0, 60 sec: 18022.4, 300 sec: 18327.9). Total num frames: 6651904. Throughput: 0: 18616.9. Samples: 6656384. Policy #0 lag: (min: 63.0, avg: 80.5, max: 127.0) +[2026-06-02 16:52:51,008][260776] Avg episode reward: [(0, '353.531')] +[2026-06-02 16:52:51,104][262582] Updated weights for policy 0, policy_version 13047 (0.0008) +[2026-06-02 16:52:51,849][262582] Updated weights for policy 0, policy_version 13058 (0.0008) +[2026-06-02 16:52:52,048][262582] Updated weights for policy 0, policy_version 13069 (0.0008) +[2026-06-02 16:52:52,252][262582] Updated weights for policy 0, policy_version 13079 (0.0008) +[2026-06-02 16:52:52,450][262582] Updated weights for policy 0, policy_version 13089 (0.0009) +[2026-06-02 16:52:52,662][262582] Updated weights for policy 0, policy_version 13099 (0.0008) +[2026-06-02 16:52:52,870][262582] Updated weights for policy 0, policy_version 13109 (0.0008) +[2026-06-02 16:52:53,083][262582] Updated weights for policy 0, policy_version 13120 (0.0008) +[2026-06-02 16:52:53,772][262582] Updated weights for policy 0, policy_version 13130 (0.0008) +[2026-06-02 16:52:53,980][262582] Updated weights for policy 0, policy_version 13140 (0.0008) +[2026-06-02 16:52:54,181][262582] Updated weights for policy 0, policy_version 13150 (0.0009) +[2026-06-02 16:52:54,382][262582] Updated weights for policy 0, policy_version 13160 (0.0008) +[2026-06-02 16:52:54,606][262582] Updated weights for policy 0, policy_version 13171 (0.0008) +[2026-06-02 16:52:54,808][262582] Updated weights for policy 0, policy_version 13181 (0.0009) +[2026-06-02 16:52:55,508][262582] Updated weights for policy 0, policy_version 13191 (0.0009) +[2026-06-02 16:52:55,721][262582] Updated weights for policy 0, policy_version 13202 (0.0008) +[2026-06-02 16:52:55,930][262582] Updated weights for policy 0, policy_version 13212 (0.0009) +[2026-06-02 16:52:56,007][260776] Fps is (10 sec: 16383.9, 60 sec: 18568.5, 300 sec: 18327.9). Total num frames: 6750208. Throughput: 0: 18377.9. Samples: 6763392. Policy #0 lag: (min: 63.0, avg: 80.5, max: 127.0) +[2026-06-02 16:52:56,008][260776] Avg episode reward: [(0, '352.202')] +[2026-06-02 16:52:56,128][262582] Updated weights for policy 0, policy_version 13222 (0.0008) +[2026-06-02 16:52:56,331][262582] Updated weights for policy 0, policy_version 13232 (0.0008) +[2026-06-02 16:52:56,542][262582] Updated weights for policy 0, policy_version 13242 (0.0009) +[2026-06-02 16:52:57,235][262582] Updated weights for policy 0, policy_version 13252 (0.0009) +[2026-06-02 16:52:57,437][262582] Updated weights for policy 0, policy_version 13263 (0.0008) +[2026-06-02 16:52:57,640][262582] Updated weights for policy 0, policy_version 13273 (0.0009) +[2026-06-02 16:52:57,844][262582] Updated weights for policy 0, policy_version 13283 (0.0008) +[2026-06-02 16:52:58,043][262582] Updated weights for policy 0, policy_version 13293 (0.0009) +[2026-06-02 16:52:58,269][262582] Updated weights for policy 0, policy_version 13304 (0.0008) +[2026-06-02 16:52:58,982][262582] Updated weights for policy 0, policy_version 13314 (0.0008) +[2026-06-02 16:52:59,171][262582] Updated weights for policy 0, policy_version 13324 (0.0009) +[2026-06-02 16:52:59,393][262582] Updated weights for policy 0, policy_version 13335 (0.0008) +[2026-06-02 16:52:59,594][262582] Updated weights for policy 0, policy_version 13345 (0.0008) +[2026-06-02 16:52:59,791][262582] Updated weights for policy 0, policy_version 13355 (0.0008) +[2026-06-02 16:52:59,995][262582] Updated weights for policy 0, policy_version 13365 (0.0008) +[2026-06-02 16:53:00,200][262582] Updated weights for policy 0, policy_version 13375 (0.0009) +[2026-06-02 16:53:00,885][262582] Updated weights for policy 0, policy_version 13385 (0.0008) +[2026-06-02 16:53:01,007][260776] Fps is (10 sec: 19660.5, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 6848512. Throughput: 0: 18395.0. Samples: 6869888. Policy #0 lag: (min: 63.0, avg: 79.9, max: 127.0) +[2026-06-02 16:53:01,008][260776] Avg episode reward: [(0, '372.999')] +[2026-06-02 16:53:01,085][262582] Updated weights for policy 0, policy_version 13395 (0.0008) +[2026-06-02 16:53:01,285][262582] Updated weights for policy 0, policy_version 13405 (0.0008) +[2026-06-02 16:53:01,502][262582] Updated weights for policy 0, policy_version 13415 (0.0008) +[2026-06-02 16:53:01,701][262582] Updated weights for policy 0, policy_version 13425 (0.0008) +[2026-06-02 16:53:01,901][262582] Updated weights for policy 0, policy_version 13435 (0.0008) +[2026-06-02 16:53:02,620][262582] Updated weights for policy 0, policy_version 13446 (0.0009) +[2026-06-02 16:53:02,808][262582] Updated weights for policy 0, policy_version 13456 (0.0008) +[2026-06-02 16:53:03,010][262582] Updated weights for policy 0, policy_version 13466 (0.0008) +[2026-06-02 16:53:03,218][262582] Updated weights for policy 0, policy_version 13476 (0.0008) +[2026-06-02 16:53:03,419][262582] Updated weights for policy 0, policy_version 13486 (0.0008) +[2026-06-02 16:53:03,624][262582] Updated weights for policy 0, policy_version 13496 (0.0009) +[2026-06-02 16:53:04,364][262582] Updated weights for policy 0, policy_version 13506 (0.0008) +[2026-06-02 16:53:04,550][262582] Updated weights for policy 0, policy_version 13516 (0.0008) +[2026-06-02 16:53:04,740][262582] Updated weights for policy 0, policy_version 13526 (0.0008) +[2026-06-02 16:53:04,952][262582] Updated weights for policy 0, policy_version 13536 (0.0008) +[2026-06-02 16:53:05,162][262582] Updated weights for policy 0, policy_version 13546 (0.0008) +[2026-06-02 16:53:05,352][262582] Updated weights for policy 0, policy_version 13556 (0.0008) +[2026-06-02 16:53:05,560][262582] Updated weights for policy 0, policy_version 13566 (0.0009) +[2026-06-02 16:53:06,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 6946816. Throughput: 0: 18392.2. Samples: 6928512. Policy #0 lag: (min: 63.0, avg: 79.9, max: 127.0) +[2026-06-02 16:53:06,008][260776] Avg episode reward: [(0, '429.019')] +[2026-06-02 16:53:06,268][262582] Updated weights for policy 0, policy_version 13576 (0.0009) +[2026-06-02 16:53:06,471][262582] Updated weights for policy 0, policy_version 13586 (0.0008) +[2026-06-02 16:53:06,674][262582] Updated weights for policy 0, policy_version 13596 (0.0008) +[2026-06-02 16:53:06,896][262582] Updated weights for policy 0, policy_version 13607 (0.0009) +[2026-06-02 16:53:07,094][262582] Updated weights for policy 0, policy_version 13617 (0.0008) +[2026-06-02 16:53:07,297][262582] Updated weights for policy 0, policy_version 13627 (0.0008) +[2026-06-02 16:53:07,388][262026] Saving new best policy, reward=429.019! +[2026-06-02 16:53:08,032][262582] Updated weights for policy 0, policy_version 13639 (0.0009) +[2026-06-02 16:53:08,246][262582] Updated weights for policy 0, policy_version 13650 (0.0009) +[2026-06-02 16:53:08,456][262582] Updated weights for policy 0, policy_version 13660 (0.0008) +[2026-06-02 16:53:08,654][262582] Updated weights for policy 0, policy_version 13670 (0.0009) +[2026-06-02 16:53:08,865][262582] Updated weights for policy 0, policy_version 13680 (0.0008) +[2026-06-02 16:53:09,069][262582] Updated weights for policy 0, policy_version 13690 (0.0008) +[2026-06-02 16:53:09,759][262582] Updated weights for policy 0, policy_version 13700 (0.0008) +[2026-06-02 16:53:09,952][262582] Updated weights for policy 0, policy_version 13710 (0.0008) +[2026-06-02 16:53:10,171][262582] Updated weights for policy 0, policy_version 13721 (0.0008) +[2026-06-02 16:53:10,388][262582] Updated weights for policy 0, policy_version 13732 (0.0008) +[2026-06-02 16:53:10,594][262582] Updated weights for policy 0, policy_version 13742 (0.0008) +[2026-06-02 16:53:10,806][262582] Updated weights for policy 0, policy_version 13752 (0.0009) +[2026-06-02 16:53:11,007][260776] Fps is (10 sec: 19661.1, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 7045120. Throughput: 0: 18446.2. Samples: 7036416. Policy #0 lag: (min: 63.0, avg: 79.9, max: 127.0) +[2026-06-02 16:53:11,008][260776] Avg episode reward: [(0, '390.421')] +[2026-06-02 16:53:11,530][262582] Updated weights for policy 0, policy_version 13762 (0.0009) +[2026-06-02 16:53:11,717][262582] Updated weights for policy 0, policy_version 13772 (0.0008) +[2026-06-02 16:53:11,918][262582] Updated weights for policy 0, policy_version 13782 (0.0009) +[2026-06-02 16:53:12,124][262582] Updated weights for policy 0, policy_version 13792 (0.0008) +[2026-06-02 16:53:12,328][262582] Updated weights for policy 0, policy_version 13802 (0.0009) +[2026-06-02 16:53:12,521][262582] Updated weights for policy 0, policy_version 13812 (0.0008) +[2026-06-02 16:53:12,738][262582] Updated weights for policy 0, policy_version 13822 (0.0008) +[2026-06-02 16:53:13,423][262582] Updated weights for policy 0, policy_version 13832 (0.0008) +[2026-06-02 16:53:13,623][262582] Updated weights for policy 0, policy_version 13842 (0.0008) +[2026-06-02 16:53:13,829][262582] Updated weights for policy 0, policy_version 13852 (0.0008) +[2026-06-02 16:53:14,031][262582] Updated weights for policy 0, policy_version 13862 (0.0008) +[2026-06-02 16:53:14,232][262582] Updated weights for policy 0, policy_version 13872 (0.0008) +[2026-06-02 16:53:14,436][262582] Updated weights for policy 0, policy_version 13882 (0.0008) +[2026-06-02 16:53:15,133][262582] Updated weights for policy 0, policy_version 13892 (0.0008) +[2026-06-02 16:53:15,338][262582] Updated weights for policy 0, policy_version 13902 (0.0008) +[2026-06-02 16:53:15,527][262582] Updated weights for policy 0, policy_version 13912 (0.0008) +[2026-06-02 16:53:15,730][262582] Updated weights for policy 0, policy_version 13922 (0.0008) +[2026-06-02 16:53:15,924][262582] Updated weights for policy 0, policy_version 13932 (0.0008) +[2026-06-02 16:53:16,007][260776] Fps is (10 sec: 16383.9, 60 sec: 18022.4, 300 sec: 18327.9). Total num frames: 7110656. Throughput: 0: 18451.9. Samples: 7154432. Policy #0 lag: (min: 56.0, avg: 96.6, max: 120.0) +[2026-06-02 16:53:16,008][260776] Avg episode reward: [(0, '384.223')] +[2026-06-02 16:53:16,135][262582] Updated weights for policy 0, policy_version 13942 (0.0009) +[2026-06-02 16:53:16,330][262582] Updated weights for policy 0, policy_version 13952 (0.0008) +[2026-06-02 16:53:17,009][262582] Updated weights for policy 0, policy_version 13962 (0.0008) +[2026-06-02 16:53:17,215][262582] Updated weights for policy 0, policy_version 13972 (0.0008) +[2026-06-02 16:53:17,417][262582] Updated weights for policy 0, policy_version 13982 (0.0009) +[2026-06-02 16:53:17,625][262582] Updated weights for policy 0, policy_version 13992 (0.0008) +[2026-06-02 16:53:17,827][262582] Updated weights for policy 0, policy_version 14002 (0.0008) +[2026-06-02 16:53:18,022][262582] Updated weights for policy 0, policy_version 14012 (0.0008) +[2026-06-02 16:53:18,723][262582] Updated weights for policy 0, policy_version 14022 (0.0008) +[2026-06-02 16:53:18,933][262582] Updated weights for policy 0, policy_version 14033 (0.0008) +[2026-06-02 16:53:19,134][262582] Updated weights for policy 0, policy_version 14043 (0.0008) +[2026-06-02 16:53:19,343][262582] Updated weights for policy 0, policy_version 14053 (0.0009) +[2026-06-02 16:53:19,568][262582] Updated weights for policy 0, policy_version 14064 (0.0008) +[2026-06-02 16:53:19,774][262582] Updated weights for policy 0, policy_version 14074 (0.0008) +[2026-06-02 16:53:20,466][262582] Updated weights for policy 0, policy_version 14084 (0.0008) +[2026-06-02 16:53:20,659][262582] Updated weights for policy 0, policy_version 14094 (0.0008) +[2026-06-02 16:53:20,877][262582] Updated weights for policy 0, policy_version 14105 (0.0008) +[2026-06-02 16:53:21,007][260776] Fps is (10 sec: 16384.0, 60 sec: 18022.4, 300 sec: 18438.9). Total num frames: 7208960. Throughput: 0: 18491.8. Samples: 7204864. Policy #0 lag: (min: 56.0, avg: 96.6, max: 120.0) +[2026-06-02 16:53:21,008][260776] Avg episode reward: [(0, '375.447')] +[2026-06-02 16:53:21,066][262582] Updated weights for policy 0, policy_version 14115 (0.0009) +[2026-06-02 16:53:21,287][262582] Updated weights for policy 0, policy_version 14125 (0.0009) +[2026-06-02 16:53:21,488][262582] Updated weights for policy 0, policy_version 14135 (0.0008) +[2026-06-02 16:53:22,168][262582] Updated weights for policy 0, policy_version 14145 (0.0008) +[2026-06-02 16:53:22,347][262582] Updated weights for policy 0, policy_version 14155 (0.0008) +[2026-06-02 16:53:22,552][262582] Updated weights for policy 0, policy_version 14165 (0.0008) +[2026-06-02 16:53:22,755][262582] Updated weights for policy 0, policy_version 14175 (0.0008) +[2026-06-02 16:53:22,957][262582] Updated weights for policy 0, policy_version 14185 (0.0008) +[2026-06-02 16:53:23,166][262582] Updated weights for policy 0, policy_version 14195 (0.0008) +[2026-06-02 16:53:23,358][262582] Updated weights for policy 0, policy_version 14205 (0.0008) +[2026-06-02 16:53:24,063][262582] Updated weights for policy 0, policy_version 14215 (0.0009) +[2026-06-02 16:53:24,258][262582] Updated weights for policy 0, policy_version 14225 (0.0008) +[2026-06-02 16:53:24,463][262582] Updated weights for policy 0, policy_version 14235 (0.0008) +[2026-06-02 16:53:24,676][262582] Updated weights for policy 0, policy_version 14246 (0.0008) +[2026-06-02 16:53:24,893][262582] Updated weights for policy 0, policy_version 14257 (0.0008) +[2026-06-02 16:53:25,107][262582] Updated weights for policy 0, policy_version 14267 (0.0008) +[2026-06-02 16:53:25,794][262582] Updated weights for policy 0, policy_version 14277 (0.0006) +[2026-06-02 16:53:25,993][262582] Updated weights for policy 0, policy_version 14287 (0.0004) +[2026-06-02 16:53:26,007][260776] Fps is (10 sec: 19660.9, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 7307264. Throughput: 0: 18386.5. Samples: 7319168. Policy #0 lag: (min: 52.0, avg: 68.6, max: 116.0) +[2026-06-02 16:53:26,008][260776] Avg episode reward: [(0, '397.727')] +[2026-06-02 16:53:26,188][262582] Updated weights for policy 0, policy_version 14297 (0.0004) +[2026-06-02 16:53:26,399][262582] Updated weights for policy 0, policy_version 14307 (0.0004) +[2026-06-02 16:53:26,617][262582] Updated weights for policy 0, policy_version 14318 (0.0004) +[2026-06-02 16:53:26,827][262582] Updated weights for policy 0, policy_version 14328 (0.0006) +[2026-06-02 16:53:27,518][262582] Updated weights for policy 0, policy_version 14338 (0.0008) +[2026-06-02 16:53:27,714][262582] Updated weights for policy 0, policy_version 14348 (0.0008) +[2026-06-02 16:53:27,909][262582] Updated weights for policy 0, policy_version 14358 (0.0008) +[2026-06-02 16:53:28,113][262582] Updated weights for policy 0, policy_version 14368 (0.0008) +[2026-06-02 16:53:28,317][262582] Updated weights for policy 0, policy_version 14378 (0.0009) +[2026-06-02 16:53:28,508][262582] Updated weights for policy 0, policy_version 14388 (0.0008) +[2026-06-02 16:53:28,722][262582] Updated weights for policy 0, policy_version 14398 (0.0008) +[2026-06-02 16:53:29,418][262582] Updated weights for policy 0, policy_version 14408 (0.0008) +[2026-06-02 16:53:29,616][262582] Updated weights for policy 0, policy_version 14418 (0.0008) +[2026-06-02 16:53:29,834][262582] Updated weights for policy 0, policy_version 14429 (0.0008) +[2026-06-02 16:53:30,036][262582] Updated weights for policy 0, policy_version 14439 (0.0009) +[2026-06-02 16:53:30,250][262582] Updated weights for policy 0, policy_version 14449 (0.0009) +[2026-06-02 16:53:30,451][262582] Updated weights for policy 0, policy_version 14459 (0.0009) +[2026-06-02 16:53:31,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 7405568. Throughput: 0: 18417.8. Samples: 7426048. Policy #0 lag: (min: 52.0, avg: 68.6, max: 116.0) +[2026-06-02 16:53:31,008][260776] Avg episode reward: [(0, '379.214')] +[2026-06-02 16:53:31,152][262582] Updated weights for policy 0, policy_version 14469 (0.0008) +[2026-06-02 16:53:31,350][262582] Updated weights for policy 0, policy_version 14479 (0.0008) +[2026-06-02 16:53:31,547][262582] Updated weights for policy 0, policy_version 14489 (0.0008) +[2026-06-02 16:53:31,751][262582] Updated weights for policy 0, policy_version 14499 (0.0008) +[2026-06-02 16:53:31,952][262582] Updated weights for policy 0, policy_version 14509 (0.0008) +[2026-06-02 16:53:32,156][262582] Updated weights for policy 0, policy_version 14519 (0.0008) +[2026-06-02 16:53:32,867][262582] Updated weights for policy 0, policy_version 14529 (0.0008) +[2026-06-02 16:53:33,050][262582] Updated weights for policy 0, policy_version 14539 (0.0010) +[2026-06-02 16:53:33,243][262582] Updated weights for policy 0, policy_version 14549 (0.0009) +[2026-06-02 16:53:33,451][262582] Updated weights for policy 0, policy_version 14559 (0.0012) +[2026-06-02 16:53:33,653][262582] Updated weights for policy 0, policy_version 14569 (0.0010) +[2026-06-02 16:53:33,877][262582] Updated weights for policy 0, policy_version 14580 (0.0009) +[2026-06-02 16:53:34,072][262582] Updated weights for policy 0, policy_version 14590 (0.0008) +[2026-06-02 16:53:34,774][262582] Updated weights for policy 0, policy_version 14600 (0.0009) +[2026-06-02 16:53:34,961][262582] Updated weights for policy 0, policy_version 14610 (0.0008) +[2026-06-02 16:53:35,171][262582] Updated weights for policy 0, policy_version 14620 (0.0009) +[2026-06-02 16:53:35,370][262582] Updated weights for policy 0, policy_version 14630 (0.0008) +[2026-06-02 16:53:35,564][262582] Updated weights for policy 0, policy_version 14640 (0.0008) +[2026-06-02 16:53:35,772][262582] Updated weights for policy 0, policy_version 14650 (0.0008) +[2026-06-02 16:53:36,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.6, 300 sec: 18438.9). Total num frames: 7503872. Throughput: 0: 18423.5. Samples: 7485440. Policy #0 lag: (min: 52.0, avg: 68.6, max: 116.0) +[2026-06-02 16:53:36,008][260776] Avg episode reward: [(0, '372.291')] +[2026-06-02 16:53:36,468][262582] Updated weights for policy 0, policy_version 14660 (0.0009) +[2026-06-02 16:53:36,670][262582] Updated weights for policy 0, policy_version 14670 (0.0008) +[2026-06-02 16:53:36,862][262582] Updated weights for policy 0, policy_version 14680 (0.0009) +[2026-06-02 16:53:37,057][262582] Updated weights for policy 0, policy_version 14690 (0.0008) +[2026-06-02 16:53:37,272][262582] Updated weights for policy 0, policy_version 14700 (0.0008) +[2026-06-02 16:53:37,475][262582] Updated weights for policy 0, policy_version 14710 (0.0008) +[2026-06-02 16:53:37,679][262582] Updated weights for policy 0, policy_version 14720 (0.0008) +[2026-06-02 16:53:38,359][262582] Updated weights for policy 0, policy_version 14730 (0.0008) +[2026-06-02 16:53:38,561][262582] Updated weights for policy 0, policy_version 14740 (0.0008) +[2026-06-02 16:53:38,760][262582] Updated weights for policy 0, policy_version 14750 (0.0008) +[2026-06-02 16:53:38,966][262582] Updated weights for policy 0, policy_version 14760 (0.0008) +[2026-06-02 16:53:39,165][262582] Updated weights for policy 0, policy_version 14770 (0.0008) +[2026-06-02 16:53:39,379][262582] Updated weights for policy 0, policy_version 14780 (0.0009) +[2026-06-02 16:53:40,069][262582] Updated weights for policy 0, policy_version 14790 (0.0009) +[2026-06-02 16:53:40,265][262582] Updated weights for policy 0, policy_version 14800 (0.0008) +[2026-06-02 16:53:40,456][262582] Updated weights for policy 0, policy_version 14810 (0.0008) +[2026-06-02 16:53:40,666][262582] Updated weights for policy 0, policy_version 14820 (0.0008) +[2026-06-02 16:53:40,866][262582] Updated weights for policy 0, policy_version 14830 (0.0008) +[2026-06-02 16:53:41,007][260776] Fps is (10 sec: 16383.9, 60 sec: 18022.4, 300 sec: 18438.9). Total num frames: 7569408. Throughput: 0: 18406.4. Samples: 7591680. Policy #0 lag: (min: 63.0, avg: 80.0, max: 127.0) +[2026-06-02 16:53:41,008][260776] Avg episode reward: [(0, '350.496')] +[2026-06-02 16:53:41,074][262582] Updated weights for policy 0, policy_version 14840 (0.0008) +[2026-06-02 16:53:41,787][262582] Updated weights for policy 0, policy_version 14850 (0.0009) +[2026-06-02 16:53:41,993][262582] Updated weights for policy 0, policy_version 14861 (0.0008) +[2026-06-02 16:53:42,190][262582] Updated weights for policy 0, policy_version 14871 (0.0008) +[2026-06-02 16:53:42,400][262582] Updated weights for policy 0, policy_version 14881 (0.0008) +[2026-06-02 16:53:42,606][262582] Updated weights for policy 0, policy_version 14891 (0.0009) +[2026-06-02 16:53:42,815][262582] Updated weights for policy 0, policy_version 14901 (0.0008) +[2026-06-02 16:53:43,014][262582] Updated weights for policy 0, policy_version 14911 (0.0009) +[2026-06-02 16:53:43,668][262582] Updated weights for policy 0, policy_version 14921 (0.0009) +[2026-06-02 16:53:43,869][262582] Updated weights for policy 0, policy_version 14931 (0.0008) +[2026-06-02 16:53:44,074][262582] Updated weights for policy 0, policy_version 14941 (0.0008) +[2026-06-02 16:53:44,269][262582] Updated weights for policy 0, policy_version 14951 (0.0009) +[2026-06-02 16:53:44,484][262582] Updated weights for policy 0, policy_version 14961 (0.0009) +[2026-06-02 16:53:44,689][262582] Updated weights for policy 0, policy_version 14971 (0.0009) +[2026-06-02 16:53:45,366][262582] Updated weights for policy 0, policy_version 14981 (0.0009) +[2026-06-02 16:53:45,551][262582] Updated weights for policy 0, policy_version 14991 (0.0008) +[2026-06-02 16:53:45,757][262582] Updated weights for policy 0, policy_version 15001 (0.0009) +[2026-06-02 16:53:45,965][262582] Updated weights for policy 0, policy_version 15011 (0.0010) +[2026-06-02 16:53:46,007][260776] Fps is (10 sec: 16383.9, 60 sec: 18022.4, 300 sec: 18438.9). Total num frames: 7667712. Throughput: 0: 18651.1. Samples: 7709184. Policy #0 lag: (min: 63.0, avg: 80.0, max: 127.0) +[2026-06-02 16:53:46,008][260776] Avg episode reward: [(0, '375.747')] +[2026-06-02 16:53:46,156][262582] Updated weights for policy 0, policy_version 15021 (0.0009) +[2026-06-02 16:53:46,377][262582] Updated weights for policy 0, policy_version 15031 (0.0008) +[2026-06-02 16:53:47,071][262582] Updated weights for policy 0, policy_version 15041 (0.0008) +[2026-06-02 16:53:47,260][262582] Updated weights for policy 0, policy_version 15051 (0.0009) +[2026-06-02 16:53:47,465][262582] Updated weights for policy 0, policy_version 15061 (0.0008) +[2026-06-02 16:53:47,656][262582] Updated weights for policy 0, policy_version 15071 (0.0008) +[2026-06-02 16:53:47,862][262582] Updated weights for policy 0, policy_version 15081 (0.0009) +[2026-06-02 16:53:48,067][262582] Updated weights for policy 0, policy_version 15091 (0.0008) +[2026-06-02 16:53:48,271][262582] Updated weights for policy 0, policy_version 15101 (0.0008) +[2026-06-02 16:53:48,960][262582] Updated weights for policy 0, policy_version 15111 (0.0008) +[2026-06-02 16:53:49,158][262582] Updated weights for policy 0, policy_version 15121 (0.0008) +[2026-06-02 16:53:49,360][262582] Updated weights for policy 0, policy_version 15131 (0.0008) +[2026-06-02 16:53:49,562][262582] Updated weights for policy 0, policy_version 15141 (0.0008) +[2026-06-02 16:53:49,769][262582] Updated weights for policy 0, policy_version 15151 (0.0008) +[2026-06-02 16:53:49,987][262582] Updated weights for policy 0, policy_version 15162 (0.0008) +[2026-06-02 16:53:50,690][262582] Updated weights for policy 0, policy_version 15172 (0.0008) +[2026-06-02 16:53:50,879][262582] Updated weights for policy 0, policy_version 15182 (0.0009) +[2026-06-02 16:53:51,007][260776] Fps is (10 sec: 19660.6, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 7766016. Throughput: 0: 18389.3. Samples: 7756032. Policy #0 lag: (min: 63.0, avg: 80.2, max: 127.0) +[2026-06-02 16:53:51,008][260776] Avg episode reward: [(0, '380.877')] +[2026-06-02 16:53:51,073][262582] Updated weights for policy 0, policy_version 15192 (0.0008) +[2026-06-02 16:53:51,286][262582] Updated weights for policy 0, policy_version 15202 (0.0008) +[2026-06-02 16:53:51,512][262582] Updated weights for policy 0, policy_version 15213 (0.0009) +[2026-06-02 16:53:51,713][262582] Updated weights for policy 0, policy_version 15223 (0.0008) +[2026-06-02 16:53:52,423][262582] Updated weights for policy 0, policy_version 15233 (0.0009) +[2026-06-02 16:53:52,636][262582] Updated weights for policy 0, policy_version 15244 (0.0009) +[2026-06-02 16:53:52,833][262582] Updated weights for policy 0, policy_version 15254 (0.0008) +[2026-06-02 16:53:53,042][262582] Updated weights for policy 0, policy_version 15264 (0.0009) +[2026-06-02 16:53:53,237][262582] Updated weights for policy 0, policy_version 15274 (0.0009) +[2026-06-02 16:53:53,476][262582] Updated weights for policy 0, policy_version 15285 (0.0009) +[2026-06-02 16:53:53,680][262582] Updated weights for policy 0, policy_version 15295 (0.0009) +[2026-06-02 16:53:54,352][262582] Updated weights for policy 0, policy_version 15305 (0.0008) +[2026-06-02 16:53:54,576][262582] Updated weights for policy 0, policy_version 15316 (0.0009) +[2026-06-02 16:53:54,778][262582] Updated weights for policy 0, policy_version 15326 (0.0009) +[2026-06-02 16:53:54,997][262582] Updated weights for policy 0, policy_version 15337 (0.0009) +[2026-06-02 16:53:55,211][262582] Updated weights for policy 0, policy_version 15348 (0.0008) +[2026-06-02 16:53:55,451][262582] Updated weights for policy 0, policy_version 15360 (0.0008) +[2026-06-02 16:53:56,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 7864320. Throughput: 0: 18628.3. Samples: 7874688. Policy #0 lag: (min: 63.0, avg: 80.2, max: 127.0) +[2026-06-02 16:53:56,008][260776] Avg episode reward: [(0, '370.725')] +[2026-06-02 16:53:56,162][262582] Updated weights for policy 0, policy_version 15370 (0.0009) +[2026-06-02 16:53:56,355][262582] Updated weights for policy 0, policy_version 15380 (0.0008) +[2026-06-02 16:53:56,555][262582] Updated weights for policy 0, policy_version 15390 (0.0009) +[2026-06-02 16:53:56,791][262582] Updated weights for policy 0, policy_version 15402 (0.0008) +[2026-06-02 16:53:56,990][262582] Updated weights for policy 0, policy_version 15412 (0.0008) +[2026-06-02 16:53:57,196][262582] Updated weights for policy 0, policy_version 15422 (0.0009) +[2026-06-02 16:53:57,944][262582] Updated weights for policy 0, policy_version 15432 (0.0009) +[2026-06-02 16:53:58,140][262582] Updated weights for policy 0, policy_version 15442 (0.0008) +[2026-06-02 16:53:58,338][262582] Updated weights for policy 0, policy_version 15452 (0.0008) +[2026-06-02 16:53:58,536][262582] Updated weights for policy 0, policy_version 15462 (0.0008) +[2026-06-02 16:53:58,750][262582] Updated weights for policy 0, policy_version 15472 (0.0008) +[2026-06-02 16:53:58,948][262582] Updated weights for policy 0, policy_version 15482 (0.0008) +[2026-06-02 16:53:59,622][262582] Updated weights for policy 0, policy_version 15492 (0.0009) +[2026-06-02 16:53:59,813][262582] Updated weights for policy 0, policy_version 15502 (0.0008) +[2026-06-02 16:54:00,033][262582] Updated weights for policy 0, policy_version 15513 (0.0008) +[2026-06-02 16:54:00,232][262582] Updated weights for policy 0, policy_version 15523 (0.0008) +[2026-06-02 16:54:00,435][262582] Updated weights for policy 0, policy_version 15533 (0.0008) +[2026-06-02 16:54:00,641][262582] Updated weights for policy 0, policy_version 15543 (0.0009) +[2026-06-02 16:54:01,007][260776] Fps is (10 sec: 19661.0, 60 sec: 18568.6, 300 sec: 18438.9). Total num frames: 7962624. Throughput: 0: 18332.5. Samples: 7979392. Policy #0 lag: (min: 63.0, avg: 80.2, max: 127.0) +[2026-06-02 16:54:01,008][260776] Avg episode reward: [(0, '413.059')] +[2026-06-02 16:54:01,354][262582] Updated weights for policy 0, policy_version 15553 (0.0009) +[2026-06-02 16:54:01,540][262582] Updated weights for policy 0, policy_version 15563 (0.0008) +[2026-06-02 16:54:01,738][262582] Updated weights for policy 0, policy_version 15573 (0.0009) +[2026-06-02 16:54:01,936][262582] Updated weights for policy 0, policy_version 15583 (0.0008) +[2026-06-02 16:54:02,141][262582] Updated weights for policy 0, policy_version 15593 (0.0008) +[2026-06-02 16:54:02,341][262582] Updated weights for policy 0, policy_version 15603 (0.0009) +[2026-06-02 16:54:02,549][262582] Updated weights for policy 0, policy_version 15613 (0.0008) +[2026-06-02 16:54:03,241][262582] Updated weights for policy 0, policy_version 15623 (0.0008) +[2026-06-02 16:54:03,438][262582] Updated weights for policy 0, policy_version 15633 (0.0008) +[2026-06-02 16:54:03,636][262582] Updated weights for policy 0, policy_version 15643 (0.0008) +[2026-06-02 16:54:03,838][262582] Updated weights for policy 0, policy_version 15653 (0.0009) +[2026-06-02 16:54:04,050][262582] Updated weights for policy 0, policy_version 15663 (0.0008) +[2026-06-02 16:54:04,257][262582] Updated weights for policy 0, policy_version 15673 (0.0008) +[2026-06-02 16:54:04,964][262582] Updated weights for policy 0, policy_version 15683 (0.0008) +[2026-06-02 16:54:05,151][262582] Updated weights for policy 0, policy_version 15693 (0.0008) +[2026-06-02 16:54:05,350][262582] Updated weights for policy 0, policy_version 15703 (0.0008) +[2026-06-02 16:54:05,547][262582] Updated weights for policy 0, policy_version 15713 (0.0008) +[2026-06-02 16:54:05,754][262582] Updated weights for policy 0, policy_version 15723 (0.0008) +[2026-06-02 16:54:05,965][262582] Updated weights for policy 0, policy_version 15734 (0.0008) +[2026-06-02 16:54:06,007][260776] Fps is (10 sec: 16384.0, 60 sec: 18022.4, 300 sec: 18438.9). Total num frames: 8028160. Throughput: 0: 18523.0. Samples: 8038400. Policy #0 lag: (min: 43.0, avg: 90.6, max: 113.0) +[2026-06-02 16:54:06,008][260776] Avg episode reward: [(0, '416.491')] +[2026-06-02 16:54:06,168][262582] Updated weights for policy 0, policy_version 15744 (0.0008) +[2026-06-02 16:54:06,889][262582] Updated weights for policy 0, policy_version 15755 (0.0007) +[2026-06-02 16:54:07,091][262582] Updated weights for policy 0, policy_version 15765 (0.0005) +[2026-06-02 16:54:07,291][262582] Updated weights for policy 0, policy_version 15775 (0.0006) +[2026-06-02 16:54:07,493][262582] Updated weights for policy 0, policy_version 15785 (0.0005) +[2026-06-02 16:54:07,696][262582] Updated weights for policy 0, policy_version 15795 (0.0005) +[2026-06-02 16:54:07,902][262582] Updated weights for policy 0, policy_version 15805 (0.0006) +[2026-06-02 16:54:08,587][262582] Updated weights for policy 0, policy_version 15816 (0.0008) +[2026-06-02 16:54:08,788][262582] Updated weights for policy 0, policy_version 15826 (0.0009) +[2026-06-02 16:54:08,992][262582] Updated weights for policy 0, policy_version 15836 (0.0008) +[2026-06-02 16:54:09,192][262582] Updated weights for policy 0, policy_version 15846 (0.0008) +[2026-06-02 16:54:09,391][262582] Updated weights for policy 0, policy_version 15856 (0.0008) +[2026-06-02 16:54:09,601][262582] Updated weights for policy 0, policy_version 15866 (0.0008) +[2026-06-02 16:54:10,306][262582] Updated weights for policy 0, policy_version 15876 (0.0008) +[2026-06-02 16:54:10,499][262582] Updated weights for policy 0, policy_version 15886 (0.0008) +[2026-06-02 16:54:10,697][262582] Updated weights for policy 0, policy_version 15896 (0.0008) +[2026-06-02 16:54:10,899][262582] Updated weights for policy 0, policy_version 15906 (0.0008) +[2026-06-02 16:54:11,007][260776] Fps is (10 sec: 16384.0, 60 sec: 18022.4, 300 sec: 18438.9). Total num frames: 8126464. Throughput: 0: 18343.8. Samples: 8144640. Policy #0 lag: (min: 43.0, avg: 90.6, max: 113.0) +[2026-06-02 16:54:11,008][260776] Avg episode reward: [(0, '424.778')] +[2026-06-02 16:54:11,122][262582] Updated weights for policy 0, policy_version 15917 (0.0008) +[2026-06-02 16:54:11,323][262582] Updated weights for policy 0, policy_version 15927 (0.0008) +[2026-06-02 16:54:12,047][262582] Updated weights for policy 0, policy_version 15937 (0.0008) +[2026-06-02 16:54:12,232][262582] Updated weights for policy 0, policy_version 15947 (0.0008) +[2026-06-02 16:54:12,431][262582] Updated weights for policy 0, policy_version 15957 (0.0008) +[2026-06-02 16:54:12,631][262582] Updated weights for policy 0, policy_version 15967 (0.0008) +[2026-06-02 16:54:12,847][262582] Updated weights for policy 0, policy_version 15978 (0.0008) +[2026-06-02 16:54:13,059][262582] Updated weights for policy 0, policy_version 15988 (0.0009) +[2026-06-02 16:54:13,260][262582] Updated weights for policy 0, policy_version 15998 (0.0008) +[2026-06-02 16:54:13,940][262582] Updated weights for policy 0, policy_version 16008 (0.0008) +[2026-06-02 16:54:14,124][262582] Updated weights for policy 0, policy_version 16018 (0.0008) +[2026-06-02 16:54:14,327][262582] Updated weights for policy 0, policy_version 16028 (0.0008) +[2026-06-02 16:54:14,531][262582] Updated weights for policy 0, policy_version 16038 (0.0008) +[2026-06-02 16:54:14,733][262582] Updated weights for policy 0, policy_version 16048 (0.0008) +[2026-06-02 16:54:14,943][262582] Updated weights for policy 0, policy_version 16058 (0.0008) +[2026-06-02 16:54:15,645][262582] Updated weights for policy 0, policy_version 16068 (0.0008) +[2026-06-02 16:54:15,846][262582] Updated weights for policy 0, policy_version 16078 (0.0008) +[2026-06-02 16:54:16,007][260776] Fps is (10 sec: 19660.7, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 8224768. Throughput: 0: 18432.0. Samples: 8255488. Policy #0 lag: (min: 43.0, avg: 91.8, max: 107.0) +[2026-06-02 16:54:16,008][260776] Avg episode reward: [(0, '425.887')] +[2026-06-02 16:54:16,042][262582] Updated weights for policy 0, policy_version 16088 (0.0008) +[2026-06-02 16:54:16,244][262582] Updated weights for policy 0, policy_version 16098 (0.0008) +[2026-06-02 16:54:16,460][262582] Updated weights for policy 0, policy_version 16108 (0.0008) +[2026-06-02 16:54:16,650][262582] Updated weights for policy 0, policy_version 16118 (0.0008) +[2026-06-02 16:54:16,844][262582] Updated weights for policy 0, policy_version 16128 (0.0008) +[2026-06-02 16:54:17,550][262582] Updated weights for policy 0, policy_version 16138 (0.0008) +[2026-06-02 16:54:17,754][262582] Updated weights for policy 0, policy_version 16148 (0.0009) +[2026-06-02 16:54:17,952][262582] Updated weights for policy 0, policy_version 16158 (0.0008) +[2026-06-02 16:54:18,153][262582] Updated weights for policy 0, policy_version 16168 (0.0008) +[2026-06-02 16:54:18,371][262582] Updated weights for policy 0, policy_version 16178 (0.0009) +[2026-06-02 16:54:18,570][262582] Updated weights for policy 0, policy_version 16188 (0.0008) +[2026-06-02 16:54:19,267][262582] Updated weights for policy 0, policy_version 16198 (0.0008) +[2026-06-02 16:54:19,470][262582] Updated weights for policy 0, policy_version 16209 (0.0008) +[2026-06-02 16:54:19,681][262582] Updated weights for policy 0, policy_version 16219 (0.0005) +[2026-06-02 16:54:19,886][262582] Updated weights for policy 0, policy_version 16229 (0.0004) +[2026-06-02 16:54:20,092][262582] Updated weights for policy 0, policy_version 16239 (0.0009) +[2026-06-02 16:54:20,299][262582] Updated weights for policy 0, policy_version 16249 (0.0006) +[2026-06-02 16:54:21,004][262582] Updated weights for policy 0, policy_version 16260 (0.0005) +[2026-06-02 16:54:21,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 8323072. Throughput: 0: 18321.1. Samples: 8309888. Policy #0 lag: (min: 43.0, avg: 91.8, max: 107.0) +[2026-06-02 16:54:21,008][260776] Avg episode reward: [(0, '378.956')] +[2026-06-02 16:54:21,199][262582] Updated weights for policy 0, policy_version 16270 (0.0008) +[2026-06-02 16:54:21,397][262582] Updated weights for policy 0, policy_version 16280 (0.0004) +[2026-06-02 16:54:21,595][262582] Updated weights for policy 0, policy_version 16290 (0.0004) +[2026-06-02 16:54:21,802][262582] Updated weights for policy 0, policy_version 16300 (0.0004) +[2026-06-02 16:54:22,013][262582] Updated weights for policy 0, policy_version 16310 (0.0004) +[2026-06-02 16:54:22,209][262582] Updated weights for policy 0, policy_version 16320 (0.0004) +[2026-06-02 16:54:22,887][262582] Updated weights for policy 0, policy_version 16330 (0.0004) +[2026-06-02 16:54:23,103][262582] Updated weights for policy 0, policy_version 16341 (0.0006) +[2026-06-02 16:54:23,319][262582] Updated weights for policy 0, policy_version 16351 (0.0006) +[2026-06-02 16:54:23,523][262582] Updated weights for policy 0, policy_version 16361 (0.0004) +[2026-06-02 16:54:23,723][262582] Updated weights for policy 0, policy_version 16371 (0.0004) +[2026-06-02 16:54:23,929][262582] Updated weights for policy 0, policy_version 16381 (0.0004) +[2026-06-02 16:54:24,617][262582] Updated weights for policy 0, policy_version 16391 (0.0004) +[2026-06-02 16:54:24,831][262582] Updated weights for policy 0, policy_version 16402 (0.0004) +[2026-06-02 16:54:25,036][262582] Updated weights for policy 0, policy_version 16412 (0.0005) +[2026-06-02 16:54:25,230][262582] Updated weights for policy 0, policy_version 16422 (0.0008) +[2026-06-02 16:54:25,434][262582] Updated weights for policy 0, policy_version 16432 (0.0008) +[2026-06-02 16:54:25,638][262582] Updated weights for policy 0, policy_version 16442 (0.0009) +[2026-06-02 16:54:26,007][260776] Fps is (10 sec: 19660.9, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 8421376. Throughput: 0: 18528.7. Samples: 8425472. Policy #0 lag: (min: 43.0, avg: 91.8, max: 107.0) +[2026-06-02 16:54:26,008][260776] Avg episode reward: [(0, '364.568')] +[2026-06-02 16:54:26,335][262582] Updated weights for policy 0, policy_version 16452 (0.0008) +[2026-06-02 16:54:26,526][262582] Updated weights for policy 0, policy_version 16462 (0.0008) +[2026-06-02 16:54:26,725][262582] Updated weights for policy 0, policy_version 16472 (0.0008) +[2026-06-02 16:54:26,916][262582] Updated weights for policy 0, policy_version 16482 (0.0008) +[2026-06-02 16:54:27,151][262582] Updated weights for policy 0, policy_version 16493 (0.0008) +[2026-06-02 16:54:27,353][262582] Updated weights for policy 0, policy_version 16503 (0.0008) +[2026-06-02 16:54:28,067][262582] Updated weights for policy 0, policy_version 16513 (0.0009) +[2026-06-02 16:54:28,262][262582] Updated weights for policy 0, policy_version 16524 (0.0008) +[2026-06-02 16:54:28,468][262582] Updated weights for policy 0, policy_version 16534 (0.0008) +[2026-06-02 16:54:28,671][262582] Updated weights for policy 0, policy_version 16544 (0.0008) +[2026-06-02 16:54:28,875][262582] Updated weights for policy 0, policy_version 16554 (0.0008) +[2026-06-02 16:54:29,079][262582] Updated weights for policy 0, policy_version 16564 (0.0008) +[2026-06-02 16:54:29,279][262582] Updated weights for policy 0, policy_version 16574 (0.0008) +[2026-06-02 16:54:29,969][262582] Updated weights for policy 0, policy_version 16584 (0.0008) +[2026-06-02 16:54:30,163][262582] Updated weights for policy 0, policy_version 16594 (0.0008) +[2026-06-02 16:54:30,367][262582] Updated weights for policy 0, policy_version 16604 (0.0008) +[2026-06-02 16:54:30,572][262582] Updated weights for policy 0, policy_version 16614 (0.0008) +[2026-06-02 16:54:30,788][262582] Updated weights for policy 0, policy_version 16625 (0.0008) +[2026-06-02 16:54:30,998][262582] Updated weights for policy 0, policy_version 16635 (0.0008) +[2026-06-02 16:54:31,007][260776] Fps is (10 sec: 16384.0, 60 sec: 18022.4, 300 sec: 18327.9). Total num frames: 8486912. Throughput: 0: 18341.0. Samples: 8534528. Policy #0 lag: (min: 63.0, avg: 80.5, max: 127.0) +[2026-06-02 16:54:31,008][260776] Avg episode reward: [(0, '367.292')] +[2026-06-02 16:54:31,696][262582] Updated weights for policy 0, policy_version 16645 (0.0009) +[2026-06-02 16:54:31,886][262582] Updated weights for policy 0, policy_version 16655 (0.0008) +[2026-06-02 16:54:32,092][262582] Updated weights for policy 0, policy_version 16665 (0.0008) +[2026-06-02 16:54:32,289][262582] Updated weights for policy 0, policy_version 16675 (0.0008) +[2026-06-02 16:54:32,495][262582] Updated weights for policy 0, policy_version 16685 (0.0008) +[2026-06-02 16:54:32,700][262582] Updated weights for policy 0, policy_version 16695 (0.0008) +[2026-06-02 16:54:33,411][262582] Updated weights for policy 0, policy_version 16705 (0.0008) +[2026-06-02 16:54:33,594][262582] Updated weights for policy 0, policy_version 16715 (0.0008) +[2026-06-02 16:54:33,793][262582] Updated weights for policy 0, policy_version 16725 (0.0008) +[2026-06-02 16:54:34,000][262582] Updated weights for policy 0, policy_version 16735 (0.0009) +[2026-06-02 16:54:34,204][262582] Updated weights for policy 0, policy_version 16745 (0.0008) +[2026-06-02 16:54:34,401][262582] Updated weights for policy 0, policy_version 16755 (0.0008) +[2026-06-02 16:54:34,611][262582] Updated weights for policy 0, policy_version 16765 (0.0008) +[2026-06-02 16:54:35,310][262582] Updated weights for policy 0, policy_version 16776 (0.0008) +[2026-06-02 16:54:35,521][262582] Updated weights for policy 0, policy_version 16786 (0.0008) +[2026-06-02 16:54:35,708][262582] Updated weights for policy 0, policy_version 16796 (0.0008) +[2026-06-02 16:54:35,913][262582] Updated weights for policy 0, policy_version 16806 (0.0008) +[2026-06-02 16:54:36,007][260776] Fps is (10 sec: 16384.0, 60 sec: 18022.4, 300 sec: 18438.9). Total num frames: 8585216. Throughput: 0: 18625.5. Samples: 8594176. Policy #0 lag: (min: 63.0, avg: 80.5, max: 127.0) +[2026-06-02 16:54:36,008][260776] Avg episode reward: [(0, '377.752')] +[2026-06-02 16:54:36,121][262582] Updated weights for policy 0, policy_version 16816 (0.0008) +[2026-06-02 16:54:36,323][262582] Updated weights for policy 0, policy_version 16826 (0.0008) +[2026-06-02 16:54:37,023][262582] Updated weights for policy 0, policy_version 16836 (0.0008) +[2026-06-02 16:54:37,220][262582] Updated weights for policy 0, policy_version 16846 (0.0008) +[2026-06-02 16:54:37,421][262582] Updated weights for policy 0, policy_version 16856 (0.0008) +[2026-06-02 16:54:37,618][262582] Updated weights for policy 0, policy_version 16866 (0.0008) +[2026-06-02 16:54:37,833][262582] Updated weights for policy 0, policy_version 16876 (0.0008) +[2026-06-02 16:54:38,027][262582] Updated weights for policy 0, policy_version 16886 (0.0008) +[2026-06-02 16:54:38,219][262582] Updated weights for policy 0, policy_version 16896 (0.0008) +[2026-06-02 16:54:38,920][262582] Updated weights for policy 0, policy_version 16906 (0.0008) +[2026-06-02 16:54:39,141][262582] Updated weights for policy 0, policy_version 16917 (0.0008) +[2026-06-02 16:54:39,333][262582] Updated weights for policy 0, policy_version 16927 (0.0008) +[2026-06-02 16:54:39,542][262582] Updated weights for policy 0, policy_version 16937 (0.0008) +[2026-06-02 16:54:39,745][262582] Updated weights for policy 0, policy_version 16947 (0.0008) +[2026-06-02 16:54:39,945][262582] Updated weights for policy 0, policy_version 16957 (0.0008) +[2026-06-02 16:54:40,646][262582] Updated weights for policy 0, policy_version 16967 (0.0008) +[2026-06-02 16:54:40,849][262582] Updated weights for policy 0, policy_version 16978 (0.0008) +[2026-06-02 16:54:41,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 8683520. Throughput: 0: 18329.6. Samples: 8699520. Policy #0 lag: (min: 63.0, avg: 80.5, max: 127.0) +[2026-06-02 16:54:41,008][260776] Avg episode reward: [(0, '378.997')] +[2026-06-02 16:54:41,057][262582] Updated weights for policy 0, policy_version 16988 (0.0008) +[2026-06-02 16:54:41,256][262582] Updated weights for policy 0, policy_version 16998 (0.0008) +[2026-06-02 16:54:41,458][262582] Updated weights for policy 0, policy_version 17008 (0.0008) +[2026-06-02 16:54:41,671][262582] Updated weights for policy 0, policy_version 17018 (0.0008) +[2026-06-02 16:54:42,406][262582] Updated weights for policy 0, policy_version 17028 (0.0008) +[2026-06-02 16:54:42,600][262582] Updated weights for policy 0, policy_version 17038 (0.0008) +[2026-06-02 16:54:42,802][262582] Updated weights for policy 0, policy_version 17048 (0.0008) +[2026-06-02 16:54:43,006][262582] Updated weights for policy 0, policy_version 17058 (0.0008) +[2026-06-02 16:54:43,216][262582] Updated weights for policy 0, policy_version 17068 (0.0010) +[2026-06-02 16:54:43,411][262582] Updated weights for policy 0, policy_version 17078 (0.0005) +[2026-06-02 16:54:43,610][262582] Updated weights for policy 0, policy_version 17088 (0.0005) +[2026-06-02 16:54:44,276][262582] Updated weights for policy 0, policy_version 17098 (0.0004) +[2026-06-02 16:54:44,476][262582] Updated weights for policy 0, policy_version 17108 (0.0004) +[2026-06-02 16:54:44,688][262582] Updated weights for policy 0, policy_version 17118 (0.0004) +[2026-06-02 16:54:44,886][262582] Updated weights for policy 0, policy_version 17128 (0.0004) +[2026-06-02 16:54:45,098][262582] Updated weights for policy 0, policy_version 17138 (0.0004) +[2026-06-02 16:54:45,291][262582] Updated weights for policy 0, policy_version 17148 (0.0008) +[2026-06-02 16:54:45,978][262582] Updated weights for policy 0, policy_version 17158 (0.0008) +[2026-06-02 16:54:46,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 8781824. Throughput: 0: 18377.9. Samples: 8806400. Policy #0 lag: (min: 63.0, avg: 79.8, max: 127.0) +[2026-06-02 16:54:46,008][260776] Avg episode reward: [(0, '401.361')] +[2026-06-02 16:54:46,174][262582] Updated weights for policy 0, policy_version 17168 (0.0008) +[2026-06-02 16:54:46,373][262582] Updated weights for policy 0, policy_version 17178 (0.0008) +[2026-06-02 16:54:46,569][262582] Updated weights for policy 0, policy_version 17188 (0.0008) +[2026-06-02 16:54:46,774][262582] Updated weights for policy 0, policy_version 17198 (0.0008) +[2026-06-02 16:54:46,988][262582] Updated weights for policy 0, policy_version 17208 (0.0008) +[2026-06-02 16:54:47,690][262582] Updated weights for policy 0, policy_version 17218 (0.0008) +[2026-06-02 16:54:47,884][262582] Updated weights for policy 0, policy_version 17228 (0.0008) +[2026-06-02 16:54:48,091][262582] Updated weights for policy 0, policy_version 17238 (0.0009) +[2026-06-02 16:54:48,321][262582] Updated weights for policy 0, policy_version 17250 (0.0009) +[2026-06-02 16:54:48,531][262582] Updated weights for policy 0, policy_version 17260 (0.0008) +[2026-06-02 16:54:48,733][262582] Updated weights for policy 0, policy_version 17270 (0.0008) +[2026-06-02 16:54:48,931][262582] Updated weights for policy 0, policy_version 17280 (0.0008) +[2026-06-02 16:54:49,626][262582] Updated weights for policy 0, policy_version 17290 (0.0008) +[2026-06-02 16:54:49,840][262582] Updated weights for policy 0, policy_version 17301 (0.0008) +[2026-06-02 16:54:50,065][262582] Updated weights for policy 0, policy_version 17312 (0.0008) +[2026-06-02 16:54:50,264][262582] Updated weights for policy 0, policy_version 17322 (0.0008) +[2026-06-02 16:54:50,474][262582] Updated weights for policy 0, policy_version 17332 (0.0008) +[2026-06-02 16:54:50,667][262582] Updated weights for policy 0, policy_version 17342 (0.0008) +[2026-06-02 16:54:51,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.6, 300 sec: 18438.9). Total num frames: 8880128. Throughput: 0: 18369.4. Samples: 8865024. Policy #0 lag: (min: 63.0, avg: 79.8, max: 127.0) +[2026-06-02 16:54:51,008][260776] Avg episode reward: [(0, '447.412')] +[2026-06-02 16:54:51,015][262026] Saving new best policy, reward=447.412! +[2026-06-02 16:54:51,376][262582] Updated weights for policy 0, policy_version 17352 (0.0009) +[2026-06-02 16:54:51,571][262582] Updated weights for policy 0, policy_version 17362 (0.0008) +[2026-06-02 16:54:51,774][262582] Updated weights for policy 0, policy_version 17372 (0.0008) +[2026-06-02 16:54:51,978][262582] Updated weights for policy 0, policy_version 17382 (0.0008) +[2026-06-02 16:54:52,196][262582] Updated weights for policy 0, policy_version 17393 (0.0008) +[2026-06-02 16:54:52,400][262582] Updated weights for policy 0, policy_version 17403 (0.0008) +[2026-06-02 16:54:53,079][262582] Updated weights for policy 0, policy_version 17413 (0.0008) +[2026-06-02 16:54:53,277][262582] Updated weights for policy 0, policy_version 17423 (0.0008) +[2026-06-02 16:54:53,468][262582] Updated weights for policy 0, policy_version 17433 (0.0008) +[2026-06-02 16:54:53,678][262582] Updated weights for policy 0, policy_version 17443 (0.0008) +[2026-06-02 16:54:53,896][262582] Updated weights for policy 0, policy_version 17454 (0.0008) +[2026-06-02 16:54:54,106][262582] Updated weights for policy 0, policy_version 17464 (0.0008) +[2026-06-02 16:54:54,812][262582] Updated weights for policy 0, policy_version 17474 (0.0008) +[2026-06-02 16:54:55,000][262582] Updated weights for policy 0, policy_version 17484 (0.0008) +[2026-06-02 16:54:55,195][262582] Updated weights for policy 0, policy_version 17494 (0.0008) +[2026-06-02 16:54:55,407][262582] Updated weights for policy 0, policy_version 17504 (0.0008) +[2026-06-02 16:54:55,600][262582] Updated weights for policy 0, policy_version 17514 (0.0008) +[2026-06-02 16:54:55,800][262582] Updated weights for policy 0, policy_version 17524 (0.0008) +[2026-06-02 16:54:56,006][262582] Updated weights for policy 0, policy_version 17534 (0.0008) +[2026-06-02 16:54:56,007][260776] Fps is (10 sec: 16383.9, 60 sec: 18022.4, 300 sec: 18327.9). Total num frames: 8945664. Throughput: 0: 18343.8. Samples: 8970112. Policy #0 lag: (min: 63.0, avg: 79.8, max: 127.0) +[2026-06-02 16:54:56,008][260776] Avg episode reward: [(0, '511.716')] +[2026-06-02 16:54:56,041][262026] Saving new best policy, reward=511.716! +[2026-06-02 16:54:56,698][262582] Updated weights for policy 0, policy_version 17544 (0.0008) +[2026-06-02 16:54:56,894][262582] Updated weights for policy 0, policy_version 17554 (0.0008) +[2026-06-02 16:54:57,095][262582] Updated weights for policy 0, policy_version 17564 (0.0008) +[2026-06-02 16:54:57,296][262582] Updated weights for policy 0, policy_version 17574 (0.0008) +[2026-06-02 16:54:57,496][262582] Updated weights for policy 0, policy_version 17584 (0.0009) +[2026-06-02 16:54:57,698][262582] Updated weights for policy 0, policy_version 17594 (0.0008) +[2026-06-02 16:54:58,431][262582] Updated weights for policy 0, policy_version 17604 (0.0008) +[2026-06-02 16:54:58,619][262582] Updated weights for policy 0, policy_version 17614 (0.0008) +[2026-06-02 16:54:58,823][262582] Updated weights for policy 0, policy_version 17624 (0.0008) +[2026-06-02 16:54:59,026][262582] Updated weights for policy 0, policy_version 17634 (0.0009) +[2026-06-02 16:54:59,221][262582] Updated weights for policy 0, policy_version 17644 (0.0008) +[2026-06-02 16:54:59,428][262582] Updated weights for policy 0, policy_version 17654 (0.0008) +[2026-06-02 16:54:59,628][262582] Updated weights for policy 0, policy_version 17664 (0.0008) +[2026-06-02 16:55:00,347][262582] Updated weights for policy 0, policy_version 17675 (0.0008) +[2026-06-02 16:55:00,541][262582] Updated weights for policy 0, policy_version 17685 (0.0008) +[2026-06-02 16:55:00,756][262582] Updated weights for policy 0, policy_version 17696 (0.0009) +[2026-06-02 16:55:00,955][262582] Updated weights for policy 0, policy_version 17706 (0.0008) +[2026-06-02 16:55:01,007][260776] Fps is (10 sec: 16384.0, 60 sec: 18022.4, 300 sec: 18438.9). Total num frames: 9043968. Throughput: 0: 18528.7. Samples: 9089280. Policy #0 lag: (min: 28.0, avg: 44.4, max: 92.0) +[2026-06-02 16:55:01,008][260776] Avg episode reward: [(0, '528.470')] +[2026-06-02 16:55:01,171][262582] Updated weights for policy 0, policy_version 17716 (0.0008) +[2026-06-02 16:55:01,367][262582] Updated weights for policy 0, policy_version 17726 (0.0008) +[2026-06-02 16:55:01,403][262026] Saving new best policy, reward=528.470! +[2026-06-02 16:55:02,058][262582] Updated weights for policy 0, policy_version 17736 (0.0008) +[2026-06-02 16:55:02,272][262582] Updated weights for policy 0, policy_version 17747 (0.0008) +[2026-06-02 16:55:02,475][262582] Updated weights for policy 0, policy_version 17757 (0.0008) +[2026-06-02 16:55:02,674][262582] Updated weights for policy 0, policy_version 17767 (0.0008) +[2026-06-02 16:55:02,872][262582] Updated weights for policy 0, policy_version 17777 (0.0008) +[2026-06-02 16:55:03,076][262582] Updated weights for policy 0, policy_version 17787 (0.0008) +[2026-06-02 16:55:03,785][262582] Updated weights for policy 0, policy_version 17797 (0.0008) +[2026-06-02 16:55:03,985][262582] Updated weights for policy 0, policy_version 17807 (0.0008) +[2026-06-02 16:55:04,187][262582] Updated weights for policy 0, policy_version 17817 (0.0008) +[2026-06-02 16:55:04,406][262582] Updated weights for policy 0, policy_version 17828 (0.0008) +[2026-06-02 16:55:04,616][262582] Updated weights for policy 0, policy_version 17838 (0.0009) +[2026-06-02 16:55:04,818][262582] Updated weights for policy 0, policy_version 17848 (0.0008) +[2026-06-02 16:55:05,518][262582] Updated weights for policy 0, policy_version 17858 (0.0008) +[2026-06-02 16:55:05,703][262582] Updated weights for policy 0, policy_version 17868 (0.0008) +[2026-06-02 16:55:05,903][262582] Updated weights for policy 0, policy_version 17878 (0.0008) +[2026-06-02 16:55:06,007][260776] Fps is (10 sec: 19660.9, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 9142272. Throughput: 0: 18378.0. Samples: 9136896. Policy #0 lag: (min: 28.0, avg: 44.4, max: 92.0) +[2026-06-02 16:55:06,008][260776] Avg episode reward: [(0, '524.267')] +[2026-06-02 16:55:06,099][262582] Updated weights for policy 0, policy_version 17888 (0.0008) +[2026-06-02 16:55:06,308][262582] Updated weights for policy 0, policy_version 17898 (0.0008) +[2026-06-02 16:55:06,502][262582] Updated weights for policy 0, policy_version 17908 (0.0008) +[2026-06-02 16:55:06,712][262582] Updated weights for policy 0, policy_version 17918 (0.0008) +[2026-06-02 16:55:07,417][262582] Updated weights for policy 0, policy_version 17928 (0.0008) +[2026-06-02 16:55:07,611][262582] Updated weights for policy 0, policy_version 17938 (0.0009) +[2026-06-02 16:55:07,828][262582] Updated weights for policy 0, policy_version 17949 (0.0009) +[2026-06-02 16:55:08,028][262582] Updated weights for policy 0, policy_version 17959 (0.0008) +[2026-06-02 16:55:08,253][262582] Updated weights for policy 0, policy_version 17970 (0.0009) +[2026-06-02 16:55:08,443][262582] Updated weights for policy 0, policy_version 17980 (0.0008) +[2026-06-02 16:55:09,153][262582] Updated weights for policy 0, policy_version 17990 (0.0008) +[2026-06-02 16:55:09,357][262582] Updated weights for policy 0, policy_version 18001 (0.0008) +[2026-06-02 16:55:09,556][262582] Updated weights for policy 0, policy_version 18011 (0.0008) +[2026-06-02 16:55:09,781][262582] Updated weights for policy 0, policy_version 18022 (0.0008) +[2026-06-02 16:55:09,979][262582] Updated weights for policy 0, policy_version 18032 (0.0008) +[2026-06-02 16:55:10,173][262582] Updated weights for policy 0, policy_version 18042 (0.0009) +[2026-06-02 16:55:10,905][262582] Updated weights for policy 0, policy_version 18052 (0.0008) +[2026-06-02 16:55:11,007][260776] Fps is (10 sec: 19661.0, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 9240576. Throughput: 0: 18412.1. Samples: 9254016. Policy #0 lag: (min: 63.0, avg: 79.9, max: 127.0) +[2026-06-02 16:55:11,007][260776] Avg episode reward: [(0, '516.121')] +[2026-06-02 16:55:11,094][262582] Updated weights for policy 0, policy_version 18062 (0.0008) +[2026-06-02 16:55:11,286][262582] Updated weights for policy 0, policy_version 18072 (0.0008) +[2026-06-02 16:55:11,495][262582] Updated weights for policy 0, policy_version 18082 (0.0008) +[2026-06-02 16:55:11,689][262582] Updated weights for policy 0, policy_version 18092 (0.0009) +[2026-06-02 16:55:11,895][262582] Updated weights for policy 0, policy_version 18102 (0.0008) +[2026-06-02 16:55:12,090][262582] Updated weights for policy 0, policy_version 18112 (0.0009) +[2026-06-02 16:55:12,841][262582] Updated weights for policy 0, policy_version 18123 (0.0010) +[2026-06-02 16:55:13,037][262582] Updated weights for policy 0, policy_version 18133 (0.0009) +[2026-06-02 16:55:13,228][262582] Updated weights for policy 0, policy_version 18143 (0.0009) +[2026-06-02 16:55:13,430][262582] Updated weights for policy 0, policy_version 18153 (0.0009) +[2026-06-02 16:55:13,624][262582] Updated weights for policy 0, policy_version 18163 (0.0009) +[2026-06-02 16:55:13,822][262582] Updated weights for policy 0, policy_version 18173 (0.0009) +[2026-06-02 16:55:14,538][262582] Updated weights for policy 0, policy_version 18183 (0.0009) +[2026-06-02 16:55:14,742][262582] Updated weights for policy 0, policy_version 18193 (0.0008) +[2026-06-02 16:55:14,930][262582] Updated weights for policy 0, policy_version 18203 (0.0008) +[2026-06-02 16:55:15,132][262582] Updated weights for policy 0, policy_version 18213 (0.0009) +[2026-06-02 16:55:15,336][262582] Updated weights for policy 0, policy_version 18223 (0.0009) +[2026-06-02 16:55:15,549][262582] Updated weights for policy 0, policy_version 18233 (0.0009) +[2026-06-02 16:55:16,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 9338880. Throughput: 0: 18363.7. Samples: 9360896. Policy #0 lag: (min: 63.0, avg: 79.9, max: 127.0) +[2026-06-02 16:55:16,008][260776] Avg episode reward: [(0, '551.609')] +[2026-06-02 16:55:16,013][262026] Saving new best policy, reward=551.609! +[2026-06-02 16:55:16,273][262582] Updated weights for policy 0, policy_version 18243 (0.0008) +[2026-06-02 16:55:16,472][262582] Updated weights for policy 0, policy_version 18253 (0.0008) +[2026-06-02 16:55:16,691][262582] Updated weights for policy 0, policy_version 18264 (0.0009) +[2026-06-02 16:55:16,884][262582] Updated weights for policy 0, policy_version 18274 (0.0008) +[2026-06-02 16:55:17,097][262582] Updated weights for policy 0, policy_version 18284 (0.0008) +[2026-06-02 16:55:17,300][262582] Updated weights for policy 0, policy_version 18294 (0.0009) +[2026-06-02 16:55:17,493][262582] Updated weights for policy 0, policy_version 18304 (0.0008) +[2026-06-02 16:55:18,161][262582] Updated weights for policy 0, policy_version 18314 (0.0009) +[2026-06-02 16:55:18,352][262582] Updated weights for policy 0, policy_version 18324 (0.0008) +[2026-06-02 16:55:18,570][262582] Updated weights for policy 0, policy_version 18335 (0.0008) +[2026-06-02 16:55:18,769][262582] Updated weights for policy 0, policy_version 18345 (0.0008) +[2026-06-02 16:55:18,978][262582] Updated weights for policy 0, policy_version 18355 (0.0009) +[2026-06-02 16:55:19,176][262582] Updated weights for policy 0, policy_version 18365 (0.0009) +[2026-06-02 16:55:19,914][262582] Updated weights for policy 0, policy_version 18377 (0.0009) +[2026-06-02 16:55:20,122][262582] Updated weights for policy 0, policy_version 18388 (0.0008) +[2026-06-02 16:55:20,317][262582] Updated weights for policy 0, policy_version 18398 (0.0008) +[2026-06-02 16:55:20,537][262582] Updated weights for policy 0, policy_version 18409 (0.0009) +[2026-06-02 16:55:20,744][262582] Updated weights for policy 0, policy_version 18419 (0.0010) +[2026-06-02 16:55:20,927][262582] Updated weights for policy 0, policy_version 18429 (0.0008) +[2026-06-02 16:55:21,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 9437184. Throughput: 0: 18349.5. Samples: 9419904. Policy #0 lag: (min: 63.0, avg: 79.9, max: 127.0) +[2026-06-02 16:55:21,008][260776] Avg episode reward: [(0, '537.739')] +[2026-06-02 16:55:21,713][262582] Updated weights for policy 0, policy_version 18440 (0.0008) +[2026-06-02 16:55:21,920][262582] Updated weights for policy 0, policy_version 18451 (0.0008) +[2026-06-02 16:55:22,120][262582] Updated weights for policy 0, policy_version 18461 (0.0008) +[2026-06-02 16:55:22,319][262582] Updated weights for policy 0, policy_version 18471 (0.0009) +[2026-06-02 16:55:22,523][262582] Updated weights for policy 0, policy_version 18481 (0.0009) +[2026-06-02 16:55:22,746][262582] Updated weights for policy 0, policy_version 18492 (0.0008) +[2026-06-02 16:55:23,452][262582] Updated weights for policy 0, policy_version 18502 (0.0008) +[2026-06-02 16:55:23,647][262582] Updated weights for policy 0, policy_version 18512 (0.0008) +[2026-06-02 16:55:23,858][262582] Updated weights for policy 0, policy_version 18522 (0.0008) +[2026-06-02 16:55:24,061][262582] Updated weights for policy 0, policy_version 18532 (0.0008) +[2026-06-02 16:55:24,264][262582] Updated weights for policy 0, policy_version 18542 (0.0008) +[2026-06-02 16:55:24,459][262582] Updated weights for policy 0, policy_version 18552 (0.0008) +[2026-06-02 16:55:25,187][262582] Updated weights for policy 0, policy_version 18562 (0.0008) +[2026-06-02 16:55:25,371][262582] Updated weights for policy 0, policy_version 18572 (0.0008) +[2026-06-02 16:55:25,577][262582] Updated weights for policy 0, policy_version 18582 (0.0008) +[2026-06-02 16:55:25,782][262582] Updated weights for policy 0, policy_version 18592 (0.0009) +[2026-06-02 16:55:25,984][262582] Updated weights for policy 0, policy_version 18602 (0.0008) +[2026-06-02 16:55:26,007][260776] Fps is (10 sec: 16384.0, 60 sec: 18022.4, 300 sec: 18327.9). Total num frames: 9502720. Throughput: 0: 18383.7. Samples: 9526784. Policy #0 lag: (min: 63.0, avg: 78.9, max: 127.0) +[2026-06-02 16:55:26,008][260776] Avg episode reward: [(0, '555.085')] +[2026-06-02 16:55:26,180][262582] Updated weights for policy 0, policy_version 18612 (0.0008) +[2026-06-02 16:55:26,395][262582] Updated weights for policy 0, policy_version 18622 (0.0009) +[2026-06-02 16:55:26,425][262026] Saving new best policy, reward=555.085! +[2026-06-02 16:55:27,095][262582] Updated weights for policy 0, policy_version 18633 (0.0008) +[2026-06-02 16:55:27,290][262582] Updated weights for policy 0, policy_version 18643 (0.0008) +[2026-06-02 16:55:27,492][262582] Updated weights for policy 0, policy_version 18653 (0.0008) +[2026-06-02 16:55:27,700][262582] Updated weights for policy 0, policy_version 18663 (0.0009) +[2026-06-02 16:55:27,913][262582] Updated weights for policy 0, policy_version 18673 (0.0009) +[2026-06-02 16:55:28,110][262582] Updated weights for policy 0, policy_version 18683 (0.0008) +[2026-06-02 16:55:28,796][262582] Updated weights for policy 0, policy_version 18693 (0.0008) +[2026-06-02 16:55:28,988][262582] Updated weights for policy 0, policy_version 18703 (0.0008) +[2026-06-02 16:55:29,194][262582] Updated weights for policy 0, policy_version 18713 (0.0009) +[2026-06-02 16:55:29,400][262582] Updated weights for policy 0, policy_version 18723 (0.0009) +[2026-06-02 16:55:29,594][262582] Updated weights for policy 0, policy_version 18733 (0.0008) +[2026-06-02 16:55:29,806][262582] Updated weights for policy 0, policy_version 18743 (0.0008) +[2026-06-02 16:55:30,510][262582] Updated weights for policy 0, policy_version 18753 (0.0008) +[2026-06-02 16:55:30,696][262582] Updated weights for policy 0, policy_version 18763 (0.0008) +[2026-06-02 16:55:30,888][262582] Updated weights for policy 0, policy_version 18773 (0.0009) +[2026-06-02 16:55:31,007][260776] Fps is (10 sec: 16384.0, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 9601024. Throughput: 0: 18577.1. Samples: 9642368. Policy #0 lag: (min: 63.0, avg: 78.9, max: 127.0) +[2026-06-02 16:55:31,008][260776] Avg episode reward: [(0, '562.183')] +[2026-06-02 16:55:31,103][262582] Updated weights for policy 0, policy_version 18783 (0.0008) +[2026-06-02 16:55:31,296][262582] Updated weights for policy 0, policy_version 18793 (0.0008) +[2026-06-02 16:55:31,500][262582] Updated weights for policy 0, policy_version 18803 (0.0008) +[2026-06-02 16:55:31,704][262582] Updated weights for policy 0, policy_version 18813 (0.0008) +[2026-06-02 16:55:31,760][262026] Saving new best policy, reward=562.183! +[2026-06-02 16:55:32,402][262582] Updated weights for policy 0, policy_version 18824 (0.0005) +[2026-06-02 16:55:32,604][262582] Updated weights for policy 0, policy_version 18834 (0.0004) +[2026-06-02 16:55:32,804][262582] Updated weights for policy 0, policy_version 18844 (0.0004) +[2026-06-02 16:55:33,017][262582] Updated weights for policy 0, policy_version 18854 (0.0004) +[2026-06-02 16:55:33,219][262582] Updated weights for policy 0, policy_version 18864 (0.0004) +[2026-06-02 16:55:33,424][262582] Updated weights for policy 0, policy_version 18874 (0.0004) +[2026-06-02 16:55:34,099][262582] Updated weights for policy 0, policy_version 18884 (0.0006) +[2026-06-02 16:55:34,294][262582] Updated weights for policy 0, policy_version 18894 (0.0008) +[2026-06-02 16:55:34,483][262582] Updated weights for policy 0, policy_version 18904 (0.0008) +[2026-06-02 16:55:34,686][262582] Updated weights for policy 0, policy_version 18914 (0.0008) +[2026-06-02 16:55:34,915][262582] Updated weights for policy 0, policy_version 18925 (0.0008) +[2026-06-02 16:55:35,125][262582] Updated weights for policy 0, policy_version 18935 (0.0008) +[2026-06-02 16:55:35,825][262582] Updated weights for policy 0, policy_version 18945 (0.0008) +[2026-06-02 16:55:36,007][260776] Fps is (10 sec: 19660.2, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 9699328. Throughput: 0: 18403.5. Samples: 9693184. Policy #0 lag: (min: 63.0, avg: 78.9, max: 127.0) +[2026-06-02 16:55:36,008][260776] Avg episode reward: [(0, '571.073')] +[2026-06-02 16:55:36,012][262582] Updated weights for policy 0, policy_version 18955 (0.0009) +[2026-06-02 16:55:36,232][262582] Updated weights for policy 0, policy_version 18966 (0.0009) +[2026-06-02 16:55:36,425][262582] Updated weights for policy 0, policy_version 18976 (0.0008) +[2026-06-02 16:55:36,642][262582] Updated weights for policy 0, policy_version 18986 (0.0009) +[2026-06-02 16:55:36,750][262026] Early stopping after 6 epochs (48 sgd steps), loss delta 0.0000003 +[2026-06-02 16:55:36,751][262026] Saving new best policy, reward=571.073! +[2026-06-02 16:55:37,397][262582] Updated weights for policy 0, policy_version 18996 (0.0009) +[2026-06-02 16:55:37,611][262582] Updated weights for policy 0, policy_version 19007 (0.0008) +[2026-06-02 16:55:37,809][262582] Updated weights for policy 0, policy_version 19017 (0.0008) +[2026-06-02 16:55:38,014][262582] Updated weights for policy 0, policy_version 19027 (0.0008) +[2026-06-02 16:55:38,221][262582] Updated weights for policy 0, policy_version 19037 (0.0008) +[2026-06-02 16:55:38,428][262582] Updated weights for policy 0, policy_version 19047 (0.0009) +[2026-06-02 16:55:39,136][262582] Updated weights for policy 0, policy_version 19057 (0.0008) +[2026-06-02 16:55:39,340][262582] Updated weights for policy 0, policy_version 19068 (0.0008) +[2026-06-02 16:55:39,581][262582] Updated weights for policy 0, policy_version 19080 (0.0009) +[2026-06-02 16:55:39,788][262582] Updated weights for policy 0, policy_version 19090 (0.0008) +[2026-06-02 16:55:39,982][262582] Updated weights for policy 0, policy_version 19100 (0.0008) +[2026-06-02 16:55:40,199][262582] Updated weights for policy 0, policy_version 19110 (0.0009) +[2026-06-02 16:55:40,402][262582] Updated weights for policy 0, policy_version 19120 (0.0008) +[2026-06-02 16:55:41,007][260776] Fps is (10 sec: 19660.7, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 9797632. Throughput: 0: 18759.1. Samples: 9814272. Policy #0 lag: (min: 63.0, avg: 79.9, max: 127.0) +[2026-06-02 16:55:41,008][260776] Avg episode reward: [(0, '591.549')] +[2026-06-02 16:55:41,097][262582] Updated weights for policy 0, policy_version 19131 (0.0008) +[2026-06-02 16:55:41,295][262582] Updated weights for policy 0, policy_version 19141 (0.0008) +[2026-06-02 16:55:41,501][262582] Updated weights for policy 0, policy_version 19151 (0.0008) +[2026-06-02 16:55:41,710][262582] Updated weights for policy 0, policy_version 19161 (0.0008) +[2026-06-02 16:55:41,930][262582] Updated weights for policy 0, policy_version 19172 (0.0008) +[2026-06-02 16:55:42,137][262582] Updated weights for policy 0, policy_version 19182 (0.0008) +[2026-06-02 16:55:42,166][262026] Saving new best policy, reward=591.549! +[2026-06-02 16:55:42,824][262582] Updated weights for policy 0, policy_version 19192 (0.0008) +[2026-06-02 16:55:43,024][262582] Updated weights for policy 0, policy_version 19202 (0.0009) +[2026-06-02 16:55:43,236][262582] Updated weights for policy 0, policy_version 19212 (0.0009) +[2026-06-02 16:55:43,433][262582] Updated weights for policy 0, policy_version 19222 (0.0008) +[2026-06-02 16:55:43,641][262582] Updated weights for policy 0, policy_version 19232 (0.0008) +[2026-06-02 16:55:43,848][262582] Updated weights for policy 0, policy_version 19242 (0.0008) +[2026-06-02 16:55:44,511][262582] Updated weights for policy 0, policy_version 19252 (0.0008) +[2026-06-02 16:55:44,708][262582] Updated weights for policy 0, policy_version 19262 (0.0009) +[2026-06-02 16:55:44,896][262582] Updated weights for policy 0, policy_version 19272 (0.0008) +[2026-06-02 16:55:45,106][262582] Updated weights for policy 0, policy_version 19282 (0.0008) +[2026-06-02 16:55:45,304][262582] Updated weights for policy 0, policy_version 19292 (0.0008) +[2026-06-02 16:55:45,528][262582] Updated weights for policy 0, policy_version 19303 (0.0009) +[2026-06-02 16:55:46,007][260776] Fps is (10 sec: 19661.4, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 9895936. Throughput: 0: 18443.4. Samples: 9919232. Policy #0 lag: (min: 63.0, avg: 79.9, max: 127.0) +[2026-06-02 16:55:46,008][260776] Avg episode reward: [(0, '584.094')] +[2026-06-02 16:55:46,236][262582] Updated weights for policy 0, policy_version 19313 (0.0008) +[2026-06-02 16:55:46,415][262582] Updated weights for policy 0, policy_version 19323 (0.0008) +[2026-06-02 16:55:46,618][262582] Updated weights for policy 0, policy_version 19333 (0.0008) +[2026-06-02 16:55:46,819][262582] Updated weights for policy 0, policy_version 19343 (0.0008) +[2026-06-02 16:55:47,040][262582] Updated weights for policy 0, policy_version 19354 (0.0008) +[2026-06-02 16:55:47,238][262582] Updated weights for policy 0, policy_version 19364 (0.0008) +[2026-06-02 16:55:47,447][262582] Updated weights for policy 0, policy_version 19374 (0.0008) +[2026-06-02 16:55:48,153][262582] Updated weights for policy 0, policy_version 19384 (0.0008) +[2026-06-02 16:55:48,350][262582] Updated weights for policy 0, policy_version 19394 (0.0009) +[2026-06-02 16:55:48,600][262582] Updated weights for policy 0, policy_version 19407 (0.0009) +[2026-06-02 16:55:48,830][262582] Updated weights for policy 0, policy_version 19418 (0.0008) +[2026-06-02 16:55:49,028][262582] Updated weights for policy 0, policy_version 19428 (0.0009) +[2026-06-02 16:55:49,231][262582] Updated weights for policy 0, policy_version 19438 (0.0009) +[2026-06-02 16:55:49,933][262582] Updated weights for policy 0, policy_version 19448 (0.0009) +[2026-06-02 16:55:50,119][262582] Updated weights for policy 0, policy_version 19458 (0.0009) +[2026-06-02 16:55:50,329][262582] Updated weights for policy 0, policy_version 19468 (0.0009) +[2026-06-02 16:55:50,542][262582] Updated weights for policy 0, policy_version 19478 (0.0009) +[2026-06-02 16:55:50,752][262582] Updated weights for policy 0, policy_version 19489 (0.0008) +[2026-06-02 16:55:50,964][262582] Updated weights for policy 0, policy_version 19499 (0.0009) +[2026-06-02 16:55:51,007][260776] Fps is (10 sec: 16383.9, 60 sec: 18022.4, 300 sec: 18327.9). Total num frames: 9961472. Throughput: 0: 18702.2. Samples: 9978496. Policy #0 lag: (min: 63.0, avg: 79.9, max: 127.0) +[2026-06-02 16:55:51,008][260776] Avg episode reward: [(0, '566.069')] +[2026-06-02 16:55:51,658][262582] Updated weights for policy 0, policy_version 19509 (0.0009) +[2026-06-02 16:55:51,872][262582] Updated weights for policy 0, policy_version 19520 (0.0009) +[2026-06-02 16:55:52,084][262582] Updated weights for policy 0, policy_version 19530 (0.0009) +[2026-06-02 16:55:52,292][262582] Updated weights for policy 0, policy_version 19540 (0.0009) +[2026-06-02 16:55:52,508][262582] Updated weights for policy 0, policy_version 19551 (0.0009) +[2026-06-02 16:55:52,721][262582] Updated weights for policy 0, policy_version 19561 (0.0008) +[2026-06-02 16:55:53,407][262582] Updated weights for policy 0, policy_version 19571 (0.0008) +[2026-06-02 16:55:53,593][262582] Updated weights for policy 0, policy_version 19581 (0.0008) +[2026-06-02 16:55:53,785][262582] Updated weights for policy 0, policy_version 19591 (0.0008) +[2026-06-02 16:55:53,988][262582] Updated weights for policy 0, policy_version 19601 (0.0008) +[2026-06-02 16:55:54,193][262582] Updated weights for policy 0, policy_version 19611 (0.0008) +[2026-06-02 16:55:54,399][262582] Updated weights for policy 0, policy_version 19621 (0.0008) +[2026-06-02 16:55:54,608][262582] Updated weights for policy 0, policy_version 19631 (0.0009) +[2026-06-02 16:55:55,309][262582] Updated weights for policy 0, policy_version 19641 (0.0008) +[2026-06-02 16:55:55,503][262582] Updated weights for policy 0, policy_version 19651 (0.0008) +[2026-06-02 16:55:55,707][262582] Updated weights for policy 0, policy_version 19661 (0.0008) +[2026-06-02 16:55:55,909][262582] Updated weights for policy 0, policy_version 19671 (0.0008) +[2026-06-02 16:55:56,007][260776] Fps is (10 sec: 16383.9, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 10059776. Throughput: 0: 18460.4. Samples: 10084736. Policy #0 lag: (min: 63.0, avg: 79.9, max: 127.0) +[2026-06-02 16:55:56,008][260776] Avg episode reward: [(0, '545.569')] +[2026-06-02 16:55:56,101][262582] Updated weights for policy 0, policy_version 19681 (0.0008) +[2026-06-02 16:55:56,319][262582] Updated weights for policy 0, policy_version 19691 (0.0008) +[2026-06-02 16:55:57,041][262582] Updated weights for policy 0, policy_version 19702 (0.0008) +[2026-06-02 16:55:57,239][262582] Updated weights for policy 0, policy_version 19712 (0.0008) +[2026-06-02 16:55:57,437][262582] Updated weights for policy 0, policy_version 19722 (0.0008) +[2026-06-02 16:55:57,634][262582] Updated weights for policy 0, policy_version 19732 (0.0008) +[2026-06-02 16:55:57,840][262582] Updated weights for policy 0, policy_version 19742 (0.0008) +[2026-06-02 16:55:58,050][262582] Updated weights for policy 0, policy_version 19752 (0.0008) +[2026-06-02 16:55:58,726][262582] Updated weights for policy 0, policy_version 19762 (0.0008) +[2026-06-02 16:55:58,919][262582] Updated weights for policy 0, policy_version 19772 (0.0008) +[2026-06-02 16:55:59,121][262582] Updated weights for policy 0, policy_version 19782 (0.0008) +[2026-06-02 16:55:59,322][262582] Updated weights for policy 0, policy_version 19792 (0.0008) +[2026-06-02 16:55:59,532][262582] Updated weights for policy 0, policy_version 19802 (0.0009) +[2026-06-02 16:55:59,733][262582] Updated weights for policy 0, policy_version 19812 (0.0008) +[2026-06-02 16:55:59,936][262582] Updated weights for policy 0, policy_version 19822 (0.0008) +[2026-06-02 16:56:00,618][262582] Updated weights for policy 0, policy_version 19832 (0.0008) +[2026-06-02 16:56:00,808][262582] Updated weights for policy 0, policy_version 19842 (0.0008) +[2026-06-02 16:56:01,007][260776] Fps is (10 sec: 19660.9, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 10158080. Throughput: 0: 18648.2. Samples: 10200064. Policy #0 lag: (min: 60.0, avg: 76.9, max: 124.0) +[2026-06-02 16:56:01,011][262582] Updated weights for policy 0, policy_version 19852 (0.0008) +[2026-06-02 16:56:01,010][260776] Avg episode reward: [(0, '546.965')] +[2026-06-02 16:56:01,213][262582] Updated weights for policy 0, policy_version 19862 (0.0008) +[2026-06-02 16:56:01,417][262582] Updated weights for policy 0, policy_version 19872 (0.0008) +[2026-06-02 16:56:01,623][262582] Updated weights for policy 0, policy_version 19882 (0.0008) +[2026-06-02 16:56:02,333][262582] Updated weights for policy 0, policy_version 19892 (0.0008) +[2026-06-02 16:56:02,519][262582] Updated weights for policy 0, policy_version 19902 (0.0008) +[2026-06-02 16:56:02,712][262582] Updated weights for policy 0, policy_version 19912 (0.0008) +[2026-06-02 16:56:02,924][262582] Updated weights for policy 0, policy_version 19922 (0.0008) +[2026-06-02 16:56:03,133][262582] Updated weights for policy 0, policy_version 19932 (0.0008) +[2026-06-02 16:56:03,333][262582] Updated weights for policy 0, policy_version 19942 (0.0008) +[2026-06-02 16:56:03,530][262582] Updated weights for policy 0, policy_version 19952 (0.0008) +[2026-06-02 16:56:04,217][262582] Updated weights for policy 0, policy_version 19962 (0.0008) +[2026-06-02 16:56:04,417][262582] Updated weights for policy 0, policy_version 19972 (0.0008) +[2026-06-02 16:56:04,622][262582] Updated weights for policy 0, policy_version 19982 (0.0008) +[2026-06-02 16:56:04,814][262582] Updated weights for policy 0, policy_version 19992 (0.0008) +[2026-06-02 16:56:05,028][262582] Updated weights for policy 0, policy_version 20002 (0.0009) +[2026-06-02 16:56:05,227][262582] Updated weights for policy 0, policy_version 20012 (0.0008) +[2026-06-02 16:56:05,948][262582] Updated weights for policy 0, policy_version 20022 (0.0008) +[2026-06-02 16:56:06,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 10256384. Throughput: 0: 18443.3. Samples: 10249856. Policy #0 lag: (min: 60.0, avg: 76.9, max: 124.0) +[2026-06-02 16:56:06,008][260776] Avg episode reward: [(0, '538.376')] +[2026-06-02 16:56:06,145][262582] Updated weights for policy 0, policy_version 20032 (0.0008) +[2026-06-02 16:56:06,336][262582] Updated weights for policy 0, policy_version 20042 (0.0008) +[2026-06-02 16:56:06,552][262582] Updated weights for policy 0, policy_version 20052 (0.0008) +[2026-06-02 16:56:06,770][262582] Updated weights for policy 0, policy_version 20063 (0.0008) +[2026-06-02 16:56:06,974][262582] Updated weights for policy 0, policy_version 20073 (0.0008) +[2026-06-02 16:56:07,680][262582] Updated weights for policy 0, policy_version 20083 (0.0008) +[2026-06-02 16:56:07,866][262582] Updated weights for policy 0, policy_version 20093 (0.0008) +[2026-06-02 16:56:08,063][262582] Updated weights for policy 0, policy_version 20103 (0.0008) +[2026-06-02 16:56:08,300][262582] Updated weights for policy 0, policy_version 20114 (0.0009) +[2026-06-02 16:56:08,495][262582] Updated weights for policy 0, policy_version 20124 (0.0009) +[2026-06-02 16:56:08,710][262582] Updated weights for policy 0, policy_version 20135 (0.0008) +[2026-06-02 16:56:09,423][262582] Updated weights for policy 0, policy_version 20145 (0.0008) +[2026-06-02 16:56:09,612][262582] Updated weights for policy 0, policy_version 20155 (0.0008) +[2026-06-02 16:56:09,851][262582] Updated weights for policy 0, policy_version 20167 (0.0009) +[2026-06-02 16:56:10,035][262582] Updated weights for policy 0, policy_version 20177 (0.0009) +[2026-06-02 16:56:10,264][262582] Updated weights for policy 0, policy_version 20188 (0.0008) +[2026-06-02 16:56:10,474][262582] Updated weights for policy 0, policy_version 20199 (0.0009) +[2026-06-02 16:56:11,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 10354688. Throughput: 0: 18705.1. Samples: 10368512. Policy #0 lag: (min: 60.0, avg: 76.9, max: 124.0) +[2026-06-02 16:56:11,008][260776] Avg episode reward: [(0, '540.604')] +[2026-06-02 16:56:11,215][262582] Updated weights for policy 0, policy_version 20209 (0.0008) +[2026-06-02 16:56:11,398][262582] Updated weights for policy 0, policy_version 20219 (0.0008) +[2026-06-02 16:56:11,582][262582] Updated weights for policy 0, policy_version 20229 (0.0008) +[2026-06-02 16:56:11,789][262582] Updated weights for policy 0, policy_version 20239 (0.0008) +[2026-06-02 16:56:11,996][262582] Updated weights for policy 0, policy_version 20249 (0.0008) +[2026-06-02 16:56:12,216][262582] Updated weights for policy 0, policy_version 20260 (0.0008) +[2026-06-02 16:56:12,434][262582] Updated weights for policy 0, policy_version 20271 (0.0008) +[2026-06-02 16:56:13,155][262582] Updated weights for policy 0, policy_version 20281 (0.0008) +[2026-06-02 16:56:13,345][262582] Updated weights for policy 0, policy_version 20291 (0.0008) +[2026-06-02 16:56:13,581][262582] Updated weights for policy 0, policy_version 20303 (0.0008) +[2026-06-02 16:56:13,781][262582] Updated weights for policy 0, policy_version 20313 (0.0008) +[2026-06-02 16:56:13,988][262582] Updated weights for policy 0, policy_version 20323 (0.0009) +[2026-06-02 16:56:14,189][262582] Updated weights for policy 0, policy_version 20333 (0.0009) +[2026-06-02 16:56:14,873][262582] Updated weights for policy 0, policy_version 20343 (0.0009) +[2026-06-02 16:56:15,074][262582] Updated weights for policy 0, policy_version 20354 (0.0008) +[2026-06-02 16:56:15,294][262582] Updated weights for policy 0, policy_version 20365 (0.0009) +[2026-06-02 16:56:15,497][262582] Updated weights for policy 0, policy_version 20375 (0.0009) +[2026-06-02 16:56:15,697][262582] Updated weights for policy 0, policy_version 20385 (0.0008) +[2026-06-02 16:56:15,904][262582] Updated weights for policy 0, policy_version 20395 (0.0009) +[2026-06-02 16:56:16,007][260776] Fps is (10 sec: 19661.0, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 10452992. Throughput: 0: 18497.4. Samples: 10474752. Policy #0 lag: (min: 81.0, avg: 98.4, max: 143.0) +[2026-06-02 16:56:16,008][260776] Avg episode reward: [(0, '540.895')] +[2026-06-02 16:56:16,643][262582] Updated weights for policy 0, policy_version 20406 (0.0008) +[2026-06-02 16:56:16,846][262582] Updated weights for policy 0, policy_version 20417 (0.0008) +[2026-06-02 16:56:17,046][262582] Updated weights for policy 0, policy_version 20427 (0.0008) +[2026-06-02 16:56:17,272][262582] Updated weights for policy 0, policy_version 20438 (0.0009) +[2026-06-02 16:56:17,489][262582] Updated weights for policy 0, policy_version 20449 (0.0009) +[2026-06-02 16:56:17,680][262582] Updated weights for policy 0, policy_version 20459 (0.0009) +[2026-06-02 16:56:18,394][262582] Updated weights for policy 0, policy_version 20469 (0.0008) +[2026-06-02 16:56:18,609][262582] Updated weights for policy 0, policy_version 20480 (0.0008) +[2026-06-02 16:56:18,808][262582] Updated weights for policy 0, policy_version 20490 (0.0008) +[2026-06-02 16:56:19,007][262582] Updated weights for policy 0, policy_version 20500 (0.0009) +[2026-06-02 16:56:19,212][262582] Updated weights for policy 0, policy_version 20510 (0.0009) +[2026-06-02 16:56:19,435][262582] Updated weights for policy 0, policy_version 20522 (0.0009) +[2026-06-02 16:56:20,170][262582] Updated weights for policy 0, policy_version 20532 (0.0009) +[2026-06-02 16:56:20,368][262582] Updated weights for policy 0, policy_version 20542 (0.0009) +[2026-06-02 16:56:20,601][262582] Updated weights for policy 0, policy_version 20554 (0.0009) +[2026-06-02 16:56:20,795][262582] Updated weights for policy 0, policy_version 20564 (0.0008) +[2026-06-02 16:56:21,002][262582] Updated weights for policy 0, policy_version 20574 (0.0009) +[2026-06-02 16:56:21,007][260776] Fps is (10 sec: 16383.9, 60 sec: 18022.4, 300 sec: 18327.9). Total num frames: 10518528. Throughput: 0: 18699.5. Samples: 10534656. Policy #0 lag: (min: 81.0, avg: 98.4, max: 143.0) +[2026-06-02 16:56:21,008][260776] Avg episode reward: [(0, '544.293')] +[2026-06-02 16:56:21,203][262582] Updated weights for policy 0, policy_version 20584 (0.0009) +[2026-06-02 16:56:21,911][262582] Updated weights for policy 0, policy_version 20594 (0.0009) +[2026-06-02 16:56:22,135][262582] Updated weights for policy 0, policy_version 20606 (0.0008) +[2026-06-02 16:56:22,350][262582] Updated weights for policy 0, policy_version 20617 (0.0008) +[2026-06-02 16:56:22,570][262582] Updated weights for policy 0, policy_version 20628 (0.0009) +[2026-06-02 16:56:22,773][262582] Updated weights for policy 0, policy_version 20638 (0.0008) +[2026-06-02 16:56:22,988][262582] Updated weights for policy 0, policy_version 20649 (0.0009) +[2026-06-02 16:56:23,728][262582] Updated weights for policy 0, policy_version 20660 (0.0008) +[2026-06-02 16:56:23,954][262582] Updated weights for policy 0, policy_version 20672 (0.0008) +[2026-06-02 16:56:24,171][262582] Updated weights for policy 0, policy_version 20682 (0.0009) +[2026-06-02 16:56:24,372][262582] Updated weights for policy 0, policy_version 20693 (0.0009) +[2026-06-02 16:56:24,590][262582] Updated weights for policy 0, policy_version 20704 (0.0008) +[2026-06-02 16:56:24,795][262582] Updated weights for policy 0, policy_version 20714 (0.0008) +[2026-06-02 16:56:25,537][262582] Updated weights for policy 0, policy_version 20725 (0.0009) +[2026-06-02 16:56:25,735][262582] Updated weights for policy 0, policy_version 20736 (0.0008) +[2026-06-02 16:56:25,934][262582] Updated weights for policy 0, policy_version 20746 (0.0008) +[2026-06-02 16:56:26,007][260776] Fps is (10 sec: 16383.9, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 10616832. Throughput: 0: 18369.4. Samples: 10640896. Policy #0 lag: (min: 81.0, avg: 98.4, max: 143.0) +[2026-06-02 16:56:26,008][260776] Avg episode reward: [(0, '554.413')] +[2026-06-02 16:56:26,133][262582] Updated weights for policy 0, policy_version 20756 (0.0008) +[2026-06-02 16:56:26,329][262582] Updated weights for policy 0, policy_version 20766 (0.0008) +[2026-06-02 16:56:26,541][262582] Updated weights for policy 0, policy_version 20776 (0.0009) +[2026-06-02 16:56:27,263][262582] Updated weights for policy 0, policy_version 20787 (0.0008) +[2026-06-02 16:56:27,462][262582] Updated weights for policy 0, policy_version 20798 (0.0009) +[2026-06-02 16:56:27,660][262582] Updated weights for policy 0, policy_version 20808 (0.0008) +[2026-06-02 16:56:27,867][262582] Updated weights for policy 0, policy_version 20818 (0.0008) +[2026-06-02 16:56:28,058][262582] Updated weights for policy 0, policy_version 20828 (0.0009) +[2026-06-02 16:56:28,137][262026] Early stopping after 6 epochs (48 sgd steps), loss delta 0.0000001 +[2026-06-02 16:56:28,863][262582] Updated weights for policy 0, policy_version 20838 (0.0008) +[2026-06-02 16:56:29,054][262582] Updated weights for policy 0, policy_version 20848 (0.0009) +[2026-06-02 16:56:29,266][262582] Updated weights for policy 0, policy_version 20859 (0.0009) +[2026-06-02 16:56:29,479][262582] Updated weights for policy 0, policy_version 20869 (0.0008) +[2026-06-02 16:56:29,679][262582] Updated weights for policy 0, policy_version 20879 (0.0009) +[2026-06-02 16:56:29,884][262582] Updated weights for policy 0, policy_version 20889 (0.0009) +[2026-06-02 16:56:30,603][262582] Updated weights for policy 0, policy_version 20901 (0.0008) +[2026-06-02 16:56:30,805][262582] Updated weights for policy 0, policy_version 20912 (0.0008) +[2026-06-02 16:56:31,007][260776] Fps is (10 sec: 19660.9, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 10715136. Throughput: 0: 18614.0. Samples: 10756864. Policy #0 lag: (min: 80.0, avg: 95.3, max: 144.0) +[2026-06-02 16:56:31,008][260776] Avg episode reward: [(0, '582.395')] +[2026-06-02 16:56:31,014][262582] Updated weights for policy 0, policy_version 20922 (0.0008) +[2026-06-02 16:56:31,214][262582] Updated weights for policy 0, policy_version 20932 (0.0009) +[2026-06-02 16:56:31,414][262582] Updated weights for policy 0, policy_version 20942 (0.0008) +[2026-06-02 16:56:31,632][262582] Updated weights for policy 0, policy_version 20953 (0.0008) +[2026-06-02 16:56:32,344][262582] Updated weights for policy 0, policy_version 20963 (0.0008) +[2026-06-02 16:56:32,537][262582] Updated weights for policy 0, policy_version 20973 (0.0008) +[2026-06-02 16:56:32,733][262582] Updated weights for policy 0, policy_version 20983 (0.0008) +[2026-06-02 16:56:32,937][262582] Updated weights for policy 0, policy_version 20993 (0.0008) +[2026-06-02 16:56:33,141][262582] Updated weights for policy 0, policy_version 21003 (0.0009) +[2026-06-02 16:56:33,345][262582] Updated weights for policy 0, policy_version 21013 (0.0008) +[2026-06-02 16:56:33,548][262582] Updated weights for policy 0, policy_version 21023 (0.0008) +[2026-06-02 16:56:34,225][262582] Updated weights for policy 0, policy_version 21033 (0.0008) +[2026-06-02 16:56:34,423][262582] Updated weights for policy 0, policy_version 21043 (0.0008) +[2026-06-02 16:56:34,619][262582] Updated weights for policy 0, policy_version 21053 (0.0008) +[2026-06-02 16:56:34,825][262582] Updated weights for policy 0, policy_version 21063 (0.0008) +[2026-06-02 16:56:35,027][262582] Updated weights for policy 0, policy_version 21073 (0.0008) +[2026-06-02 16:56:35,231][262582] Updated weights for policy 0, policy_version 21083 (0.0008) +[2026-06-02 16:56:35,953][262582] Updated weights for policy 0, policy_version 21094 (0.0008) +[2026-06-02 16:56:36,007][260776] Fps is (10 sec: 19660.7, 60 sec: 18568.6, 300 sec: 18438.9). Total num frames: 10813440. Throughput: 0: 18446.2. Samples: 10808576. Policy #0 lag: (min: 80.0, avg: 95.3, max: 144.0) +[2026-06-02 16:56:36,008][260776] Avg episode reward: [(0, '575.940')] +[2026-06-02 16:56:36,142][262582] Updated weights for policy 0, policy_version 21104 (0.0008) +[2026-06-02 16:56:36,340][262582] Updated weights for policy 0, policy_version 21114 (0.0008) +[2026-06-02 16:56:36,541][262582] Updated weights for policy 0, policy_version 21124 (0.0008) +[2026-06-02 16:56:36,745][262582] Updated weights for policy 0, policy_version 21134 (0.0008) +[2026-06-02 16:56:36,957][262582] Updated weights for policy 0, policy_version 21144 (0.0008) +[2026-06-02 16:56:37,113][262026] Saving results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs5_seed12/checkpoint_p0/checkpoint_000021152_10846208.pth... +[2026-06-02 16:56:37,663][262582] Updated weights for policy 0, policy_version 21154 (0.0008) +[2026-06-02 16:56:37,847][262582] Updated weights for policy 0, policy_version 21164 (0.0008) +[2026-06-02 16:56:38,054][262582] Updated weights for policy 0, policy_version 21174 (0.0008) +[2026-06-02 16:56:38,255][262582] Updated weights for policy 0, policy_version 21184 (0.0008) +[2026-06-02 16:56:38,460][262582] Updated weights for policy 0, policy_version 21194 (0.0008) +[2026-06-02 16:56:38,665][262582] Updated weights for policy 0, policy_version 21204 (0.0009) +[2026-06-02 16:56:38,860][262582] Updated weights for policy 0, policy_version 21214 (0.0008) +[2026-06-02 16:56:39,543][262582] Updated weights for policy 0, policy_version 21224 (0.0009) +[2026-06-02 16:56:39,758][262582] Updated weights for policy 0, policy_version 21235 (0.0008) +[2026-06-02 16:56:39,962][262582] Updated weights for policy 0, policy_version 21245 (0.0009) +[2026-06-02 16:56:40,171][262582] Updated weights for policy 0, policy_version 21255 (0.0008) +[2026-06-02 16:56:40,392][262582] Updated weights for policy 0, policy_version 21266 (0.0008) +[2026-06-02 16:56:40,600][262582] Updated weights for policy 0, policy_version 21276 (0.0008) +[2026-06-02 16:56:40,667][262026] Early stopping after 8 epochs (64 sgd steps), loss delta 0.0000002 +[2026-06-02 16:56:41,007][260776] Fps is (10 sec: 19660.9, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 10911744. Throughput: 0: 18705.1. Samples: 10926464. Policy #0 lag: (min: 80.0, avg: 95.3, max: 144.0) +[2026-06-02 16:56:41,008][260776] Avg episode reward: [(0, '591.279')] +[2026-06-02 16:56:41,278][262582] Updated weights for policy 0, policy_version 21286 (0.0008) +[2026-06-02 16:56:41,484][262582] Updated weights for policy 0, policy_version 21296 (0.0008) +[2026-06-02 16:56:41,688][262582] Updated weights for policy 0, policy_version 21306 (0.0008) +[2026-06-02 16:56:41,904][262582] Updated weights for policy 0, policy_version 21317 (0.0009) +[2026-06-02 16:56:42,104][262582] Updated weights for policy 0, policy_version 21327 (0.0009) +[2026-06-02 16:56:42,302][262582] Updated weights for policy 0, policy_version 21337 (0.0008) +[2026-06-02 16:56:43,022][262582] Updated weights for policy 0, policy_version 21347 (0.0008) +[2026-06-02 16:56:43,218][262582] Updated weights for policy 0, policy_version 21357 (0.0009) +[2026-06-02 16:56:43,415][262582] Updated weights for policy 0, policy_version 21367 (0.0008) +[2026-06-02 16:56:43,622][262582] Updated weights for policy 0, policy_version 21377 (0.0009) +[2026-06-02 16:56:43,824][262582] Updated weights for policy 0, policy_version 21387 (0.0008) +[2026-06-02 16:56:44,019][262582] Updated weights for policy 0, policy_version 21397 (0.0008) +[2026-06-02 16:56:44,224][262582] Updated weights for policy 0, policy_version 21407 (0.0008) +[2026-06-02 16:56:44,892][262582] Updated weights for policy 0, policy_version 21417 (0.0008) +[2026-06-02 16:56:45,086][262582] Updated weights for policy 0, policy_version 21427 (0.0008) +[2026-06-02 16:56:45,286][262582] Updated weights for policy 0, policy_version 21437 (0.0008) +[2026-06-02 16:56:45,489][262582] Updated weights for policy 0, policy_version 21447 (0.0008) +[2026-06-02 16:56:45,693][262582] Updated weights for policy 0, policy_version 21457 (0.0008) +[2026-06-02 16:56:45,892][262582] Updated weights for policy 0, policy_version 21467 (0.0008) +[2026-06-02 16:56:46,007][260776] Fps is (10 sec: 19661.0, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 11010048. Throughput: 0: 18483.2. Samples: 11031808. Policy #0 lag: (min: 63.0, avg: 81.8, max: 127.0) +[2026-06-02 16:56:46,008][260776] Avg episode reward: [(0, '606.695')] +[2026-06-02 16:56:46,013][262026] Saving new best policy, reward=606.695! +[2026-06-02 16:56:46,601][262582] Updated weights for policy 0, policy_version 21477 (0.0008) +[2026-06-02 16:56:46,791][262582] Updated weights for policy 0, policy_version 21487 (0.0008) +[2026-06-02 16:56:46,989][262582] Updated weights for policy 0, policy_version 21497 (0.0008) +[2026-06-02 16:56:47,197][262582] Updated weights for policy 0, policy_version 21507 (0.0008) +[2026-06-02 16:56:47,408][262582] Updated weights for policy 0, policy_version 21518 (0.0008) +[2026-06-02 16:56:47,617][262582] Updated weights for policy 0, policy_version 21528 (0.0008) +[2026-06-02 16:56:48,328][262582] Updated weights for policy 0, policy_version 21539 (0.0008) +[2026-06-02 16:56:48,516][262582] Updated weights for policy 0, policy_version 21549 (0.0008) +[2026-06-02 16:56:48,712][262582] Updated weights for policy 0, policy_version 21559 (0.0008) +[2026-06-02 16:56:48,911][262582] Updated weights for policy 0, policy_version 21569 (0.0008) +[2026-06-02 16:56:49,120][262582] Updated weights for policy 0, policy_version 21579 (0.0008) +[2026-06-02 16:56:49,317][262582] Updated weights for policy 0, policy_version 21589 (0.0008) +[2026-06-02 16:56:49,516][262582] Updated weights for policy 0, policy_version 21599 (0.0008) +[2026-06-02 16:56:50,229][262582] Updated weights for policy 0, policy_version 21609 (0.0008) +[2026-06-02 16:56:50,431][262582] Updated weights for policy 0, policy_version 21619 (0.0008) +[2026-06-02 16:56:50,630][262582] Updated weights for policy 0, policy_version 21629 (0.0008) +[2026-06-02 16:56:50,831][262582] Updated weights for policy 0, policy_version 21639 (0.0009) +[2026-06-02 16:56:51,007][260776] Fps is (10 sec: 16383.8, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 11075584. Throughput: 0: 18679.5. Samples: 11090432. Policy #0 lag: (min: 63.0, avg: 81.8, max: 127.0) +[2026-06-02 16:56:51,008][260776] Avg episode reward: [(0, '631.384')] +[2026-06-02 16:56:51,034][262582] Updated weights for policy 0, policy_version 21649 (0.0009) +[2026-06-02 16:56:51,230][262582] Updated weights for policy 0, policy_version 21659 (0.0008) +[2026-06-02 16:56:51,330][262026] Saving new best policy, reward=631.384! +[2026-06-02 16:56:51,930][262582] Updated weights for policy 0, policy_version 21669 (0.0008) +[2026-06-02 16:56:52,122][262582] Updated weights for policy 0, policy_version 21679 (0.0008) +[2026-06-02 16:56:52,320][262582] Updated weights for policy 0, policy_version 21689 (0.0008) +[2026-06-02 16:56:52,521][262582] Updated weights for policy 0, policy_version 21699 (0.0009) +[2026-06-02 16:56:52,727][262582] Updated weights for policy 0, policy_version 21709 (0.0008) +[2026-06-02 16:56:52,923][262582] Updated weights for policy 0, policy_version 21719 (0.0008) +[2026-06-02 16:56:53,642][262582] Updated weights for policy 0, policy_version 21730 (0.0008) +[2026-06-02 16:56:53,832][262582] Updated weights for policy 0, policy_version 21740 (0.0008) +[2026-06-02 16:56:54,034][262582] Updated weights for policy 0, policy_version 21750 (0.0008) +[2026-06-02 16:56:54,238][262582] Updated weights for policy 0, policy_version 21760 (0.0008) +[2026-06-02 16:56:54,441][262582] Updated weights for policy 0, policy_version 21770 (0.0009) +[2026-06-02 16:56:54,641][262582] Updated weights for policy 0, policy_version 21780 (0.0008) +[2026-06-02 16:56:54,839][262582] Updated weights for policy 0, policy_version 21790 (0.0008) +[2026-06-02 16:56:55,564][262582] Updated weights for policy 0, policy_version 21801 (0.0008) +[2026-06-02 16:56:55,756][262582] Updated weights for policy 0, policy_version 21811 (0.0009) +[2026-06-02 16:56:55,960][262582] Updated weights for policy 0, policy_version 21821 (0.0008) +[2026-06-02 16:56:56,007][260776] Fps is (10 sec: 16383.5, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 11173888. Throughput: 0: 18394.9. Samples: 11196288. Policy #0 lag: (min: 63.0, avg: 81.8, max: 127.0) +[2026-06-02 16:56:56,010][260776] Avg episode reward: [(0, '654.324')] +[2026-06-02 16:56:56,167][262582] Updated weights for policy 0, policy_version 21831 (0.0008) +[2026-06-02 16:56:56,363][262582] Updated weights for policy 0, policy_version 21841 (0.0008) +[2026-06-02 16:56:56,568][262582] Updated weights for policy 0, policy_version 21851 (0.0008) +[2026-06-02 16:56:56,659][262026] Saving new best policy, reward=654.324! +[2026-06-02 16:56:57,247][262582] Updated weights for policy 0, policy_version 21861 (0.0008) +[2026-06-02 16:56:57,466][262582] Updated weights for policy 0, policy_version 21872 (0.0008) +[2026-06-02 16:56:57,666][262582] Updated weights for policy 0, policy_version 21882 (0.0008) +[2026-06-02 16:56:57,869][262582] Updated weights for policy 0, policy_version 21892 (0.0009) +[2026-06-02 16:56:58,064][262582] Updated weights for policy 0, policy_version 21902 (0.0008) +[2026-06-02 16:56:58,300][262582] Updated weights for policy 0, policy_version 21913 (0.0009) +[2026-06-02 16:56:59,003][262582] Updated weights for policy 0, policy_version 21923 (0.0008) +[2026-06-02 16:56:59,195][262582] Updated weights for policy 0, policy_version 21933 (0.0008) +[2026-06-02 16:56:59,402][262582] Updated weights for policy 0, policy_version 21943 (0.0008) +[2026-06-02 16:56:59,603][262582] Updated weights for policy 0, policy_version 21953 (0.0008) +[2026-06-02 16:56:59,801][262582] Updated weights for policy 0, policy_version 21963 (0.0008) +[2026-06-02 16:57:00,007][262582] Updated weights for policy 0, policy_version 21973 (0.0008) +[2026-06-02 16:57:00,231][262582] Updated weights for policy 0, policy_version 21984 (0.0008) +[2026-06-02 16:57:00,906][262582] Updated weights for policy 0, policy_version 21994 (0.0008) +[2026-06-02 16:57:01,007][260776] Fps is (10 sec: 19661.0, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 11272192. Throughput: 0: 18400.7. Samples: 11302784. Policy #0 lag: (min: 63.0, avg: 81.8, max: 127.0) +[2026-06-02 16:57:01,008][260776] Avg episode reward: [(0, '644.245')] +[2026-06-02 16:57:01,118][262582] Updated weights for policy 0, policy_version 22005 (0.0008) +[2026-06-02 16:57:01,321][262582] Updated weights for policy 0, policy_version 22015 (0.0008) +[2026-06-02 16:57:01,525][262582] Updated weights for policy 0, policy_version 22025 (0.0009) +[2026-06-02 16:57:01,760][262582] Updated weights for policy 0, policy_version 22036 (0.0008) +[2026-06-02 16:57:01,956][262582] Updated weights for policy 0, policy_version 22046 (0.0008) +[2026-06-02 16:57:02,654][262582] Updated weights for policy 0, policy_version 22057 (0.0008) +[2026-06-02 16:57:02,849][262582] Updated weights for policy 0, policy_version 22067 (0.0008) +[2026-06-02 16:57:03,045][262582] Updated weights for policy 0, policy_version 22077 (0.0009) +[2026-06-02 16:57:03,265][262582] Updated weights for policy 0, policy_version 22087 (0.0009) +[2026-06-02 16:57:03,473][262582] Updated weights for policy 0, policy_version 22098 (0.0009) +[2026-06-02 16:57:03,678][262582] Updated weights for policy 0, policy_version 22108 (0.0009) +[2026-06-02 16:57:04,367][262582] Updated weights for policy 0, policy_version 22118 (0.0009) +[2026-06-02 16:57:04,566][262582] Updated weights for policy 0, policy_version 22128 (0.0009) +[2026-06-02 16:57:04,788][262582] Updated weights for policy 0, policy_version 22139 (0.0007) +[2026-06-02 16:57:04,989][262582] Updated weights for policy 0, policy_version 22149 (0.0004) +[2026-06-02 16:57:05,200][262582] Updated weights for policy 0, policy_version 22159 (0.0006) +[2026-06-02 16:57:05,409][262582] Updated weights for policy 0, policy_version 22169 (0.0006) +[2026-06-02 16:57:06,007][260776] Fps is (10 sec: 19661.3, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 11370496. Throughput: 0: 18352.4. Samples: 11360512. Policy #0 lag: (min: 63.0, avg: 80.7, max: 127.0) +[2026-06-02 16:57:06,008][260776] Avg episode reward: [(0, '663.580')] +[2026-06-02 16:57:06,065][262582] Updated weights for policy 0, policy_version 22179 (0.0005) +[2026-06-02 16:57:06,258][262582] Updated weights for policy 0, policy_version 22189 (0.0004) +[2026-06-02 16:57:06,458][262582] Updated weights for policy 0, policy_version 22199 (0.0004) +[2026-06-02 16:57:06,659][262582] Updated weights for policy 0, policy_version 22209 (0.0008) +[2026-06-02 16:57:06,866][262582] Updated weights for policy 0, policy_version 22219 (0.0008) +[2026-06-02 16:57:07,069][262582] Updated weights for policy 0, policy_version 22229 (0.0008) +[2026-06-02 16:57:07,271][262582] Updated weights for policy 0, policy_version 22239 (0.0008) +[2026-06-02 16:57:07,281][262026] Saving new best policy, reward=663.580! +[2026-06-02 16:57:07,964][262582] Updated weights for policy 0, policy_version 22250 (0.0008) +[2026-06-02 16:57:08,179][262582] Updated weights for policy 0, policy_version 22261 (0.0008) +[2026-06-02 16:57:08,384][262582] Updated weights for policy 0, policy_version 22271 (0.0008) +[2026-06-02 16:57:08,606][262582] Updated weights for policy 0, policy_version 22282 (0.0008) +[2026-06-02 16:57:08,818][262582] Updated weights for policy 0, policy_version 22292 (0.0008) +[2026-06-02 16:57:09,022][262582] Updated weights for policy 0, policy_version 22302 (0.0009) +[2026-06-02 16:57:09,693][262582] Updated weights for policy 0, policy_version 22312 (0.0008) +[2026-06-02 16:57:09,891][262582] Updated weights for policy 0, policy_version 22322 (0.0008) +[2026-06-02 16:57:10,088][262582] Updated weights for policy 0, policy_version 22332 (0.0008) +[2026-06-02 16:57:10,294][262582] Updated weights for policy 0, policy_version 22342 (0.0009) +[2026-06-02 16:57:10,488][262582] Updated weights for policy 0, policy_version 22352 (0.0008) +[2026-06-02 16:57:10,695][262582] Updated weights for policy 0, policy_version 22362 (0.0009) +[2026-06-02 16:57:11,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 11468800. Throughput: 0: 18454.8. Samples: 11471360. Policy #0 lag: (min: 63.0, avg: 80.7, max: 127.0) +[2026-06-02 16:57:11,009][260776] Avg episode reward: [(0, '679.712')] +[2026-06-02 16:57:11,014][262026] Saving new best policy, reward=679.712! +[2026-06-02 16:57:11,392][262582] Updated weights for policy 0, policy_version 22372 (0.0009) +[2026-06-02 16:57:11,585][262582] Updated weights for policy 0, policy_version 22382 (0.0009) +[2026-06-02 16:57:11,797][262582] Updated weights for policy 0, policy_version 22392 (0.0009) +[2026-06-02 16:57:11,997][262582] Updated weights for policy 0, policy_version 22402 (0.0008) +[2026-06-02 16:57:12,195][262582] Updated weights for policy 0, policy_version 22412 (0.0009) +[2026-06-02 16:57:12,401][262582] Updated weights for policy 0, policy_version 22422 (0.0008) +[2026-06-02 16:57:12,598][262582] Updated weights for policy 0, policy_version 22432 (0.0008) +[2026-06-02 16:57:13,345][262582] Updated weights for policy 0, policy_version 22443 (0.0008) +[2026-06-02 16:57:13,531][262582] Updated weights for policy 0, policy_version 22453 (0.0008) +[2026-06-02 16:57:13,743][262582] Updated weights for policy 0, policy_version 22463 (0.0008) +[2026-06-02 16:57:13,947][262582] Updated weights for policy 0, policy_version 22473 (0.0009) +[2026-06-02 16:57:14,173][262582] Updated weights for policy 0, policy_version 22484 (0.0008) +[2026-06-02 16:57:14,370][262582] Updated weights for policy 0, policy_version 22494 (0.0008) +[2026-06-02 16:57:15,037][262582] Updated weights for policy 0, policy_version 22504 (0.0008) +[2026-06-02 16:57:15,237][262582] Updated weights for policy 0, policy_version 22514 (0.0008) +[2026-06-02 16:57:15,439][262582] Updated weights for policy 0, policy_version 22524 (0.0008) +[2026-06-02 16:57:15,654][262582] Updated weights for policy 0, policy_version 22534 (0.0008) +[2026-06-02 16:57:15,866][262582] Updated weights for policy 0, policy_version 22545 (0.0008) +[2026-06-02 16:57:16,007][260776] Fps is (10 sec: 16384.0, 60 sec: 18022.4, 300 sec: 18327.9). Total num frames: 11534336. Throughput: 0: 18358.0. Samples: 11582976. Policy #0 lag: (min: 63.0, avg: 80.7, max: 127.0) +[2026-06-02 16:57:16,008][260776] Avg episode reward: [(0, '686.944')] +[2026-06-02 16:57:16,082][262582] Updated weights for policy 0, policy_version 22556 (0.0008) +[2026-06-02 16:57:16,159][262026] Saving new best policy, reward=686.944! +[2026-06-02 16:57:16,778][262582] Updated weights for policy 0, policy_version 22566 (0.0009) +[2026-06-02 16:57:16,977][262582] Updated weights for policy 0, policy_version 22576 (0.0009) +[2026-06-02 16:57:17,183][262582] Updated weights for policy 0, policy_version 22586 (0.0009) +[2026-06-02 16:57:17,384][262582] Updated weights for policy 0, policy_version 22596 (0.0008) +[2026-06-02 16:57:17,586][262582] Updated weights for policy 0, policy_version 22606 (0.0008) +[2026-06-02 16:57:17,785][262582] Updated weights for policy 0, policy_version 22616 (0.0008) +[2026-06-02 16:57:18,479][262582] Updated weights for policy 0, policy_version 22626 (0.0008) +[2026-06-02 16:57:18,666][262582] Updated weights for policy 0, policy_version 22636 (0.0008) +[2026-06-02 16:57:18,878][262582] Updated weights for policy 0, policy_version 22647 (0.0008) +[2026-06-02 16:57:19,078][262582] Updated weights for policy 0, policy_version 22657 (0.0008) +[2026-06-02 16:57:19,282][262582] Updated weights for policy 0, policy_version 22667 (0.0008) +[2026-06-02 16:57:19,490][262582] Updated weights for policy 0, policy_version 22677 (0.0008) +[2026-06-02 16:57:19,680][262582] Updated weights for policy 0, policy_version 22687 (0.0008) +[2026-06-02 16:57:20,384][262582] Updated weights for policy 0, policy_version 22697 (0.0008) +[2026-06-02 16:57:20,578][262582] Updated weights for policy 0, policy_version 22707 (0.0008) +[2026-06-02 16:57:20,784][262582] Updated weights for policy 0, policy_version 22717 (0.0008) +[2026-06-02 16:57:20,980][262582] Updated weights for policy 0, policy_version 22727 (0.0008) +[2026-06-02 16:57:21,007][260776] Fps is (10 sec: 16383.8, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 11632640. Throughput: 0: 18477.5. Samples: 11640064. Policy #0 lag: (min: 63.0, avg: 80.7, max: 127.0) +[2026-06-02 16:57:21,008][260776] Avg episode reward: [(0, '724.396')] +[2026-06-02 16:57:21,191][262582] Updated weights for policy 0, policy_version 22737 (0.0008) +[2026-06-02 16:57:21,391][262582] Updated weights for policy 0, policy_version 22747 (0.0008) +[2026-06-02 16:57:21,492][262026] Saving new best policy, reward=724.396! +[2026-06-02 16:57:22,091][262582] Updated weights for policy 0, policy_version 22757 (0.0008) +[2026-06-02 16:57:22,284][262582] Updated weights for policy 0, policy_version 22767 (0.0008) +[2026-06-02 16:57:22,477][262582] Updated weights for policy 0, policy_version 22777 (0.0008) +[2026-06-02 16:57:22,676][262582] Updated weights for policy 0, policy_version 22787 (0.0009) +[2026-06-02 16:57:22,888][262582] Updated weights for policy 0, policy_version 22797 (0.0008) +[2026-06-02 16:57:23,091][262582] Updated weights for policy 0, policy_version 22807 (0.0008) +[2026-06-02 16:57:23,796][262582] Updated weights for policy 0, policy_version 22817 (0.0008) +[2026-06-02 16:57:23,981][262582] Updated weights for policy 0, policy_version 22827 (0.0008) +[2026-06-02 16:57:24,178][262582] Updated weights for policy 0, policy_version 22837 (0.0008) +[2026-06-02 16:57:24,390][262582] Updated weights for policy 0, policy_version 22847 (0.0008) +[2026-06-02 16:57:24,587][262582] Updated weights for policy 0, policy_version 22857 (0.0008) +[2026-06-02 16:57:24,795][262582] Updated weights for policy 0, policy_version 22867 (0.0008) +[2026-06-02 16:57:25,007][262582] Updated weights for policy 0, policy_version 22878 (0.0008) +[2026-06-02 16:57:25,683][262582] Updated weights for policy 0, policy_version 22888 (0.0008) +[2026-06-02 16:57:25,896][262582] Updated weights for policy 0, policy_version 22899 (0.0008) +[2026-06-02 16:57:26,007][260776] Fps is (10 sec: 19660.9, 60 sec: 18568.6, 300 sec: 18438.9). Total num frames: 11730944. Throughput: 0: 18250.0. Samples: 11747712. Policy #0 lag: (min: 63.0, avg: 79.7, max: 127.0) +[2026-06-02 16:57:26,008][260776] Avg episode reward: [(0, '714.652')] +[2026-06-02 16:57:26,099][262582] Updated weights for policy 0, policy_version 22909 (0.0008) +[2026-06-02 16:57:26,299][262582] Updated weights for policy 0, policy_version 22919 (0.0008) +[2026-06-02 16:57:26,518][262582] Updated weights for policy 0, policy_version 22930 (0.0008) +[2026-06-02 16:57:26,721][262582] Updated weights for policy 0, policy_version 22940 (0.0008) +[2026-06-02 16:57:27,412][262582] Updated weights for policy 0, policy_version 22950 (0.0009) +[2026-06-02 16:57:27,598][262582] Updated weights for policy 0, policy_version 22960 (0.0008) +[2026-06-02 16:57:27,805][262582] Updated weights for policy 0, policy_version 22970 (0.0008) +[2026-06-02 16:57:28,005][262582] Updated weights for policy 0, policy_version 22980 (0.0008) +[2026-06-02 16:57:28,212][262582] Updated weights for policy 0, policy_version 22990 (0.0008) +[2026-06-02 16:57:28,407][262582] Updated weights for policy 0, policy_version 23000 (0.0008) +[2026-06-02 16:57:29,120][262582] Updated weights for policy 0, policy_version 23010 (0.0008) +[2026-06-02 16:57:29,327][262582] Updated weights for policy 0, policy_version 23021 (0.0008) +[2026-06-02 16:57:29,525][262582] Updated weights for policy 0, policy_version 23031 (0.0008) +[2026-06-02 16:57:29,719][262582] Updated weights for policy 0, policy_version 23041 (0.0009) +[2026-06-02 16:57:29,933][262582] Updated weights for policy 0, policy_version 23051 (0.0008) +[2026-06-02 16:57:30,128][262582] Updated weights for policy 0, policy_version 23061 (0.0008) +[2026-06-02 16:57:30,333][262582] Updated weights for policy 0, policy_version 23071 (0.0008) +[2026-06-02 16:57:31,007][260776] Fps is (10 sec: 19661.0, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 11829248. Throughput: 0: 18230.0. Samples: 11852160. Policy #0 lag: (min: 63.0, avg: 79.7, max: 127.0) +[2026-06-02 16:57:31,008][260776] Avg episode reward: [(0, '739.386')] +[2026-06-02 16:57:31,018][262582] Updated weights for policy 0, policy_version 23081 (0.0008) +[2026-06-02 16:57:31,223][262582] Updated weights for policy 0, policy_version 23091 (0.0008) +[2026-06-02 16:57:31,416][262582] Updated weights for policy 0, policy_version 23101 (0.0008) +[2026-06-02 16:57:31,624][262582] Updated weights for policy 0, policy_version 23111 (0.0008) +[2026-06-02 16:57:31,822][262582] Updated weights for policy 0, policy_version 23121 (0.0008) +[2026-06-02 16:57:32,029][262582] Updated weights for policy 0, policy_version 23131 (0.0009) +[2026-06-02 16:57:32,121][262026] Saving new best policy, reward=739.386! +[2026-06-02 16:57:32,736][262582] Updated weights for policy 0, policy_version 23141 (0.0008) +[2026-06-02 16:57:32,946][262582] Updated weights for policy 0, policy_version 23152 (0.0008) +[2026-06-02 16:57:33,147][262582] Updated weights for policy 0, policy_version 23162 (0.0008) +[2026-06-02 16:57:33,348][262582] Updated weights for policy 0, policy_version 23172 (0.0009) +[2026-06-02 16:57:33,552][262582] Updated weights for policy 0, policy_version 23182 (0.0008) +[2026-06-02 16:57:33,777][262582] Updated weights for policy 0, policy_version 23193 (0.0008) +[2026-06-02 16:57:34,484][262582] Updated weights for policy 0, policy_version 23204 (0.0008) +[2026-06-02 16:57:34,683][262582] Updated weights for policy 0, policy_version 23214 (0.0009) +[2026-06-02 16:57:34,883][262582] Updated weights for policy 0, policy_version 23224 (0.0008) +[2026-06-02 16:57:35,084][262582] Updated weights for policy 0, policy_version 23234 (0.0008) +[2026-06-02 16:57:35,287][262582] Updated weights for policy 0, policy_version 23244 (0.0008) +[2026-06-02 16:57:35,488][262582] Updated weights for policy 0, policy_version 23254 (0.0008) +[2026-06-02 16:57:35,684][262582] Updated weights for policy 0, policy_version 23264 (0.0008) +[2026-06-02 16:57:36,007][260776] Fps is (10 sec: 19660.7, 60 sec: 18568.6, 300 sec: 18438.9). Total num frames: 11927552. Throughput: 0: 18238.6. Samples: 11911168. Policy #0 lag: (min: 63.0, avg: 79.7, max: 127.0) +[2026-06-02 16:57:36,008][260776] Avg episode reward: [(0, '735.888')] +[2026-06-02 16:57:36,393][262582] Updated weights for policy 0, policy_version 23275 (0.0009) +[2026-06-02 16:57:36,592][262582] Updated weights for policy 0, policy_version 23285 (0.0008) +[2026-06-02 16:57:36,786][262582] Updated weights for policy 0, policy_version 23295 (0.0008) +[2026-06-02 16:57:36,996][262582] Updated weights for policy 0, policy_version 23305 (0.0008) +[2026-06-02 16:57:37,197][262582] Updated weights for policy 0, policy_version 23315 (0.0008) +[2026-06-02 16:57:37,394][262582] Updated weights for policy 0, policy_version 23325 (0.0008) +[2026-06-02 16:57:38,078][262582] Updated weights for policy 0, policy_version 23335 (0.0008) +[2026-06-02 16:57:38,270][262582] Updated weights for policy 0, policy_version 23345 (0.0008) +[2026-06-02 16:57:38,481][262582] Updated weights for policy 0, policy_version 23355 (0.0008) +[2026-06-02 16:57:38,677][262582] Updated weights for policy 0, policy_version 23365 (0.0008) +[2026-06-02 16:57:38,887][262582] Updated weights for policy 0, policy_version 23375 (0.0008) +[2026-06-02 16:57:39,092][262582] Updated weights for policy 0, policy_version 23385 (0.0008) +[2026-06-02 16:57:39,784][262582] Updated weights for policy 0, policy_version 23395 (0.0008) +[2026-06-02 16:57:39,968][262582] Updated weights for policy 0, policy_version 23405 (0.0008) +[2026-06-02 16:57:40,164][262582] Updated weights for policy 0, policy_version 23415 (0.0008) +[2026-06-02 16:57:40,375][262582] Updated weights for policy 0, policy_version 23425 (0.0008) +[2026-06-02 16:57:40,581][262582] Updated weights for policy 0, policy_version 23435 (0.0008) +[2026-06-02 16:57:40,786][262582] Updated weights for policy 0, policy_version 23445 (0.0008) +[2026-06-02 16:57:40,984][262582] Updated weights for policy 0, policy_version 23455 (0.0008) +[2026-06-02 16:57:41,007][260776] Fps is (10 sec: 19660.9, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 12025856. Throughput: 0: 18230.2. Samples: 12016640. Policy #0 lag: (min: 63.0, avg: 81.1, max: 127.0) +[2026-06-02 16:57:41,008][260776] Avg episode reward: [(0, '751.668')] +[2026-06-02 16:57:41,012][262026] Saving new best policy, reward=751.668! +[2026-06-02 16:57:41,711][262582] Updated weights for policy 0, policy_version 23466 (0.0008) +[2026-06-02 16:57:41,901][262582] Updated weights for policy 0, policy_version 23476 (0.0008) +[2026-06-02 16:57:42,113][262582] Updated weights for policy 0, policy_version 23486 (0.0008) +[2026-06-02 16:57:42,319][262582] Updated weights for policy 0, policy_version 23496 (0.0008) +[2026-06-02 16:57:42,538][262582] Updated weights for policy 0, policy_version 23507 (0.0008) +[2026-06-02 16:57:42,742][262582] Updated weights for policy 0, policy_version 23517 (0.0008) +[2026-06-02 16:57:43,422][262582] Updated weights for policy 0, policy_version 23527 (0.0008) +[2026-06-02 16:57:43,616][262582] Updated weights for policy 0, policy_version 23537 (0.0008) +[2026-06-02 16:57:43,818][262582] Updated weights for policy 0, policy_version 23547 (0.0008) +[2026-06-02 16:57:44,010][262582] Updated weights for policy 0, policy_version 23557 (0.0008) +[2026-06-02 16:57:44,218][262582] Updated weights for policy 0, policy_version 23567 (0.0008) +[2026-06-02 16:57:44,427][262582] Updated weights for policy 0, policy_version 23577 (0.0008) +[2026-06-02 16:57:45,114][262582] Updated weights for policy 0, policy_version 23587 (0.0008) +[2026-06-02 16:57:45,293][262582] Updated weights for policy 0, policy_version 23597 (0.0008) +[2026-06-02 16:57:45,531][262582] Updated weights for policy 0, policy_version 23609 (0.0008) +[2026-06-02 16:57:45,744][262582] Updated weights for policy 0, policy_version 23619 (0.0008) +[2026-06-02 16:57:45,938][262582] Updated weights for policy 0, policy_version 23629 (0.0008) +[2026-06-02 16:57:46,007][260776] Fps is (10 sec: 16383.9, 60 sec: 18022.4, 300 sec: 18438.9). Total num frames: 12091392. Throughput: 0: 18449.0. Samples: 12132992. Policy #0 lag: (min: 63.0, avg: 81.1, max: 127.0) +[2026-06-02 16:57:46,008][260776] Avg episode reward: [(0, '814.888')] +[2026-06-02 16:57:46,144][262582] Updated weights for policy 0, policy_version 23639 (0.0008) +[2026-06-02 16:57:46,322][262026] Saving new best policy, reward=814.888! +[2026-06-02 16:57:46,860][262582] Updated weights for policy 0, policy_version 23649 (0.0008) +[2026-06-02 16:57:47,048][262582] Updated weights for policy 0, policy_version 23659 (0.0008) +[2026-06-02 16:57:47,242][262582] Updated weights for policy 0, policy_version 23669 (0.0008) +[2026-06-02 16:57:47,462][262582] Updated weights for policy 0, policy_version 23680 (0.0009) +[2026-06-02 16:57:47,667][262582] Updated weights for policy 0, policy_version 23690 (0.0008) +[2026-06-02 16:57:47,865][262582] Updated weights for policy 0, policy_version 23700 (0.0008) +[2026-06-02 16:57:48,066][262582] Updated weights for policy 0, policy_version 23710 (0.0008) +[2026-06-02 16:57:48,782][262582] Updated weights for policy 0, policy_version 23721 (0.0008) +[2026-06-02 16:57:48,975][262582] Updated weights for policy 0, policy_version 23731 (0.0008) +[2026-06-02 16:57:49,183][262582] Updated weights for policy 0, policy_version 23741 (0.0008) +[2026-06-02 16:57:49,404][262582] Updated weights for policy 0, policy_version 23752 (0.0008) +[2026-06-02 16:57:49,617][262582] Updated weights for policy 0, policy_version 23762 (0.0008) +[2026-06-02 16:57:49,811][262582] Updated weights for policy 0, policy_version 23772 (0.0008) +[2026-06-02 16:57:50,486][262582] Updated weights for policy 0, policy_version 23782 (0.0008) +[2026-06-02 16:57:50,704][262582] Updated weights for policy 0, policy_version 23793 (0.0008) +[2026-06-02 16:57:50,896][262582] Updated weights for policy 0, policy_version 23803 (0.0008) +[2026-06-02 16:57:51,007][260776] Fps is (10 sec: 16383.9, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 12189696. Throughput: 0: 18284.1. Samples: 12183296. Policy #0 lag: (min: 63.0, avg: 81.1, max: 127.0) +[2026-06-02 16:57:51,008][260776] Avg episode reward: [(0, '820.448')] +[2026-06-02 16:57:51,107][262582] Updated weights for policy 0, policy_version 23813 (0.0008) +[2026-06-02 16:57:51,317][262582] Updated weights for policy 0, policy_version 23823 (0.0008) +[2026-06-02 16:57:51,511][262582] Updated weights for policy 0, policy_version 23833 (0.0008) +[2026-06-02 16:57:51,646][262026] Saving new best policy, reward=820.448! +[2026-06-02 16:57:52,211][262582] Updated weights for policy 0, policy_version 23843 (0.0008) +[2026-06-02 16:57:52,426][262582] Updated weights for policy 0, policy_version 23854 (0.0008) +[2026-06-02 16:57:52,619][262582] Updated weights for policy 0, policy_version 23864 (0.0008) +[2026-06-02 16:57:52,825][262582] Updated weights for policy 0, policy_version 23874 (0.0008) +[2026-06-02 16:57:53,036][262582] Updated weights for policy 0, policy_version 23884 (0.0008) +[2026-06-02 16:57:53,238][262582] Updated weights for policy 0, policy_version 23894 (0.0009) +[2026-06-02 16:57:53,434][262582] Updated weights for policy 0, policy_version 23904 (0.0008) +[2026-06-02 16:57:54,128][262582] Updated weights for policy 0, policy_version 23914 (0.0008) +[2026-06-02 16:57:54,329][262582] Updated weights for policy 0, policy_version 23924 (0.0008) +[2026-06-02 16:57:54,530][262582] Updated weights for policy 0, policy_version 23934 (0.0009) +[2026-06-02 16:57:54,732][262582] Updated weights for policy 0, policy_version 23944 (0.0009) +[2026-06-02 16:57:54,945][262582] Updated weights for policy 0, policy_version 23954 (0.0008) +[2026-06-02 16:57:55,144][262582] Updated weights for policy 0, policy_version 23964 (0.0008) +[2026-06-02 16:57:55,822][262582] Updated weights for policy 0, policy_version 23974 (0.0008) +[2026-06-02 16:57:56,007][260776] Fps is (10 sec: 19660.9, 60 sec: 18568.6, 300 sec: 18438.9). Total num frames: 12288000. Throughput: 0: 18358.0. Samples: 12297472. Policy #0 lag: (min: 63.0, avg: 81.1, max: 127.0) +[2026-06-02 16:57:56,008][260776] Avg episode reward: [(0, '859.692')] +[2026-06-02 16:57:56,066][262582] Updated weights for policy 0, policy_version 23986 (0.0008) +[2026-06-02 16:57:56,266][262582] Updated weights for policy 0, policy_version 23996 (0.0008) +[2026-06-02 16:57:56,461][262582] Updated weights for policy 0, policy_version 24006 (0.0008) +[2026-06-02 16:57:56,691][262582] Updated weights for policy 0, policy_version 24017 (0.0008) +[2026-06-02 16:57:56,910][262582] Updated weights for policy 0, policy_version 24028 (0.0008) +[2026-06-02 16:57:56,998][262026] Saving new best policy, reward=859.692! +[2026-06-02 16:57:57,597][262582] Updated weights for policy 0, policy_version 24038 (0.0009) +[2026-06-02 16:57:57,810][262582] Updated weights for policy 0, policy_version 24049 (0.0008) +[2026-06-02 16:57:58,025][262582] Updated weights for policy 0, policy_version 24060 (0.0008) +[2026-06-02 16:57:58,236][262582] Updated weights for policy 0, policy_version 24070 (0.0008) +[2026-06-02 16:57:58,441][262582] Updated weights for policy 0, policy_version 24080 (0.0008) +[2026-06-02 16:57:58,645][262582] Updated weights for policy 0, policy_version 24090 (0.0008) +[2026-06-02 16:57:59,347][262582] Updated weights for policy 0, policy_version 24100 (0.0009) +[2026-06-02 16:57:59,535][262582] Updated weights for policy 0, policy_version 24110 (0.0008) +[2026-06-02 16:57:59,747][262582] Updated weights for policy 0, policy_version 24120 (0.0008) +[2026-06-02 16:57:59,946][262582] Updated weights for policy 0, policy_version 24130 (0.0008) +[2026-06-02 16:58:00,151][262582] Updated weights for policy 0, policy_version 24140 (0.0008) +[2026-06-02 16:58:00,353][262582] Updated weights for policy 0, policy_version 24150 (0.0008) +[2026-06-02 16:58:00,548][262582] Updated weights for policy 0, policy_version 24160 (0.0008) +[2026-06-02 16:58:01,007][260776] Fps is (10 sec: 19660.9, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 12386304. Throughput: 0: 18221.5. Samples: 12402944. Policy #0 lag: (min: 63.0, avg: 81.1, max: 127.0) +[2026-06-02 16:58:01,008][260776] Avg episode reward: [(0, '876.987')] +[2026-06-02 16:58:01,247][262582] Updated weights for policy 0, policy_version 24170 (0.0008) +[2026-06-02 16:58:01,453][262582] Updated weights for policy 0, policy_version 24180 (0.0008) +[2026-06-02 16:58:01,645][262582] Updated weights for policy 0, policy_version 24190 (0.0008) +[2026-06-02 16:58:01,863][262582] Updated weights for policy 0, policy_version 24200 (0.0008) +[2026-06-02 16:58:02,080][262582] Updated weights for policy 0, policy_version 24211 (0.0008) +[2026-06-02 16:58:02,286][262582] Updated weights for policy 0, policy_version 24221 (0.0008) +[2026-06-02 16:58:02,336][262026] Saving new best policy, reward=876.987! +[2026-06-02 16:58:02,964][262582] Updated weights for policy 0, policy_version 24231 (0.0009) +[2026-06-02 16:58:03,155][262582] Updated weights for policy 0, policy_version 24241 (0.0008) +[2026-06-02 16:58:03,355][262582] Updated weights for policy 0, policy_version 24251 (0.0008) +[2026-06-02 16:58:03,558][262582] Updated weights for policy 0, policy_version 24261 (0.0008) +[2026-06-02 16:58:03,758][262582] Updated weights for policy 0, policy_version 24271 (0.0008) +[2026-06-02 16:58:03,966][262582] Updated weights for policy 0, policy_version 24281 (0.0009) +[2026-06-02 16:58:04,679][262582] Updated weights for policy 0, policy_version 24291 (0.0008) +[2026-06-02 16:58:04,865][262582] Updated weights for policy 0, policy_version 24301 (0.0008) +[2026-06-02 16:58:05,070][262582] Updated weights for policy 0, policy_version 24311 (0.0008) +[2026-06-02 16:58:05,273][262582] Updated weights for policy 0, policy_version 24321 (0.0008) +[2026-06-02 16:58:05,475][262582] Updated weights for policy 0, policy_version 24331 (0.0008) +[2026-06-02 16:58:05,677][262582] Updated weights for policy 0, policy_version 24341 (0.0008) +[2026-06-02 16:58:05,879][262582] Updated weights for policy 0, policy_version 24351 (0.0008) +[2026-06-02 16:58:06,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 12484608. Throughput: 0: 18252.9. Samples: 12461440. Policy #0 lag: (min: 63.0, avg: 81.1, max: 127.0) +[2026-06-02 16:58:06,008][260776] Avg episode reward: [(0, '858.249')] +[2026-06-02 16:58:06,563][262582] Updated weights for policy 0, policy_version 24361 (0.0008) +[2026-06-02 16:58:06,762][262582] Updated weights for policy 0, policy_version 24371 (0.0008) +[2026-06-02 16:58:06,952][262582] Updated weights for policy 0, policy_version 24381 (0.0008) +[2026-06-02 16:58:07,181][262582] Updated weights for policy 0, policy_version 24392 (0.0008) +[2026-06-02 16:58:07,384][262582] Updated weights for policy 0, policy_version 24402 (0.0008) +[2026-06-02 16:58:07,591][262582] Updated weights for policy 0, policy_version 24412 (0.0008) +[2026-06-02 16:58:08,286][262582] Updated weights for policy 0, policy_version 24422 (0.0008) +[2026-06-02 16:58:08,490][262582] Updated weights for policy 0, policy_version 24432 (0.0008) +[2026-06-02 16:58:08,688][262582] Updated weights for policy 0, policy_version 24442 (0.0008) +[2026-06-02 16:58:08,892][262582] Updated weights for policy 0, policy_version 24452 (0.0008) +[2026-06-02 16:58:09,097][262582] Updated weights for policy 0, policy_version 24462 (0.0008) +[2026-06-02 16:58:09,306][262582] Updated weights for policy 0, policy_version 24472 (0.0008) +[2026-06-02 16:58:09,999][262582] Updated weights for policy 0, policy_version 24482 (0.0008) +[2026-06-02 16:58:10,187][262582] Updated weights for policy 0, policy_version 24492 (0.0008) +[2026-06-02 16:58:10,382][262582] Updated weights for policy 0, policy_version 24502 (0.0008) +[2026-06-02 16:58:10,587][262582] Updated weights for policy 0, policy_version 24512 (0.0008) +[2026-06-02 16:58:10,791][262582] Updated weights for policy 0, policy_version 24522 (0.0008) +[2026-06-02 16:58:10,996][262582] Updated weights for policy 0, policy_version 24532 (0.0009) +[2026-06-02 16:58:11,007][260776] Fps is (10 sec: 16384.0, 60 sec: 18022.4, 300 sec: 18438.9). Total num frames: 12550144. Throughput: 0: 18224.4. Samples: 12567808. Policy #0 lag: (min: 63.0, avg: 81.1, max: 127.0) +[2026-06-02 16:58:11,008][260776] Avg episode reward: [(0, '884.897')] +[2026-06-02 16:58:11,195][262582] Updated weights for policy 0, policy_version 24542 (0.0008) +[2026-06-02 16:58:11,226][262026] Saving new best policy, reward=884.897! +[2026-06-02 16:58:11,905][262582] Updated weights for policy 0, policy_version 24553 (0.0008) +[2026-06-02 16:58:12,131][262582] Updated weights for policy 0, policy_version 24564 (0.0009) +[2026-06-02 16:58:12,327][262582] Updated weights for policy 0, policy_version 24574 (0.0009) +[2026-06-02 16:58:12,534][262582] Updated weights for policy 0, policy_version 24584 (0.0009) +[2026-06-02 16:58:12,753][262582] Updated weights for policy 0, policy_version 24595 (0.0009) +[2026-06-02 16:58:12,956][262582] Updated weights for policy 0, policy_version 24605 (0.0009) +[2026-06-02 16:58:13,625][262582] Updated weights for policy 0, policy_version 24615 (0.0008) +[2026-06-02 16:58:13,812][262582] Updated weights for policy 0, policy_version 24625 (0.0008) +[2026-06-02 16:58:14,015][262582] Updated weights for policy 0, policy_version 24635 (0.0008) +[2026-06-02 16:58:14,213][262582] Updated weights for policy 0, policy_version 24645 (0.0008) +[2026-06-02 16:58:14,427][262582] Updated weights for policy 0, policy_version 24655 (0.0008) +[2026-06-02 16:58:14,648][262582] Updated weights for policy 0, policy_version 24666 (0.0008) +[2026-06-02 16:58:15,351][262582] Updated weights for policy 0, policy_version 24676 (0.0008) +[2026-06-02 16:58:15,531][262582] Updated weights for policy 0, policy_version 24686 (0.0008) +[2026-06-02 16:58:15,738][262582] Updated weights for policy 0, policy_version 24696 (0.0008) +[2026-06-02 16:58:15,940][262582] Updated weights for policy 0, policy_version 24706 (0.0009) +[2026-06-02 16:58:16,007][260776] Fps is (10 sec: 16384.0, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 12648448. Throughput: 0: 18494.6. Samples: 12684416. Policy #0 lag: (min: 42.0, avg: 71.9, max: 106.0) +[2026-06-02 16:58:16,008][260776] Avg episode reward: [(0, '869.363')] +[2026-06-02 16:58:16,142][262582] Updated weights for policy 0, policy_version 24716 (0.0008) +[2026-06-02 16:58:16,354][262582] Updated weights for policy 0, policy_version 24726 (0.0009) +[2026-06-02 16:58:17,072][262582] Updated weights for policy 0, policy_version 24737 (0.0009) +[2026-06-02 16:58:17,262][262582] Updated weights for policy 0, policy_version 24747 (0.0009) +[2026-06-02 16:58:17,451][262582] Updated weights for policy 0, policy_version 24757 (0.0008) +[2026-06-02 16:58:17,657][262582] Updated weights for policy 0, policy_version 24767 (0.0009) +[2026-06-02 16:58:17,868][262582] Updated weights for policy 0, policy_version 24777 (0.0009) +[2026-06-02 16:58:18,062][262582] Updated weights for policy 0, policy_version 24787 (0.0009) +[2026-06-02 16:58:18,266][262582] Updated weights for policy 0, policy_version 24797 (0.0010) +[2026-06-02 16:58:18,944][262582] Updated weights for policy 0, policy_version 24807 (0.0008) +[2026-06-02 16:58:19,135][262582] Updated weights for policy 0, policy_version 24817 (0.0008) +[2026-06-02 16:58:19,360][262582] Updated weights for policy 0, policy_version 24828 (0.0008) +[2026-06-02 16:58:19,552][262582] Updated weights for policy 0, policy_version 24838 (0.0008) +[2026-06-02 16:58:19,757][262582] Updated weights for policy 0, policy_version 24848 (0.0008) +[2026-06-02 16:58:19,953][262582] Updated weights for policy 0, policy_version 24858 (0.0008) +[2026-06-02 16:58:20,686][262582] Updated weights for policy 0, policy_version 24869 (0.0008) +[2026-06-02 16:58:20,876][262582] Updated weights for policy 0, policy_version 24879 (0.0008) +[2026-06-02 16:58:21,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.6, 300 sec: 18438.9). Total num frames: 12746752. Throughput: 0: 18218.7. Samples: 12731008. Policy #0 lag: (min: 42.0, avg: 71.9, max: 106.0) +[2026-06-02 16:58:21,007][260776] Avg episode reward: [(0, '897.091')] +[2026-06-02 16:58:21,074][262582] Updated weights for policy 0, policy_version 24889 (0.0008) +[2026-06-02 16:58:21,279][262582] Updated weights for policy 0, policy_version 24899 (0.0007) +[2026-06-02 16:58:21,479][262582] Updated weights for policy 0, policy_version 24909 (0.0008) +[2026-06-02 16:58:21,690][262582] Updated weights for policy 0, policy_version 24919 (0.0009) +[2026-06-02 16:58:21,867][262026] Saving new best policy, reward=897.091! +[2026-06-02 16:58:22,383][262582] Updated weights for policy 0, policy_version 24930 (0.0008) +[2026-06-02 16:58:22,582][262582] Updated weights for policy 0, policy_version 24940 (0.0008) +[2026-06-02 16:58:22,769][262582] Updated weights for policy 0, policy_version 24950 (0.0008) +[2026-06-02 16:58:22,981][262582] Updated weights for policy 0, policy_version 24960 (0.0008) +[2026-06-02 16:58:23,180][262582] Updated weights for policy 0, policy_version 24970 (0.0010) +[2026-06-02 16:58:23,387][262582] Updated weights for policy 0, policy_version 24980 (0.0008) +[2026-06-02 16:58:23,583][262582] Updated weights for policy 0, policy_version 24990 (0.0008) +[2026-06-02 16:58:24,273][262582] Updated weights for policy 0, policy_version 25000 (0.0009) +[2026-06-02 16:58:24,493][262582] Updated weights for policy 0, policy_version 25011 (0.0009) +[2026-06-02 16:58:24,696][262582] Updated weights for policy 0, policy_version 25021 (0.0009) +[2026-06-02 16:58:24,902][262582] Updated weights for policy 0, policy_version 25031 (0.0009) +[2026-06-02 16:58:25,106][262582] Updated weights for policy 0, policy_version 25041 (0.0008) +[2026-06-02 16:58:25,308][262582] Updated weights for policy 0, policy_version 25051 (0.0008) +[2026-06-02 16:58:26,007][260776] Fps is (10 sec: 19660.6, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 12845056. Throughput: 0: 18466.1. Samples: 12847616. Policy #0 lag: (min: 42.0, avg: 71.9, max: 106.0) +[2026-06-02 16:58:26,008][260776] Avg episode reward: [(0, '894.264')] +[2026-06-02 16:58:26,018][262582] Updated weights for policy 0, policy_version 25062 (0.0009) +[2026-06-02 16:58:26,211][262582] Updated weights for policy 0, policy_version 25072 (0.0008) +[2026-06-02 16:58:26,408][262582] Updated weights for policy 0, policy_version 25082 (0.0008) +[2026-06-02 16:58:26,622][262582] Updated weights for policy 0, policy_version 25092 (0.0008) +[2026-06-02 16:58:26,821][262582] Updated weights for policy 0, policy_version 25102 (0.0008) +[2026-06-02 16:58:27,027][262582] Updated weights for policy 0, policy_version 25112 (0.0008) +[2026-06-02 16:58:27,732][262582] Updated weights for policy 0, policy_version 25122 (0.0008) +[2026-06-02 16:58:27,925][262582] Updated weights for policy 0, policy_version 25132 (0.0008) +[2026-06-02 16:58:28,120][262582] Updated weights for policy 0, policy_version 25142 (0.0008) +[2026-06-02 16:58:28,343][262582] Updated weights for policy 0, policy_version 25153 (0.0008) +[2026-06-02 16:58:28,552][262582] Updated weights for policy 0, policy_version 25163 (0.0009) +[2026-06-02 16:58:28,742][262582] Updated weights for policy 0, policy_version 25173 (0.0008) +[2026-06-02 16:58:28,950][262582] Updated weights for policy 0, policy_version 25183 (0.0008) +[2026-06-02 16:58:29,651][262582] Updated weights for policy 0, policy_version 25193 (0.0008) +[2026-06-02 16:58:29,847][262582] Updated weights for policy 0, policy_version 25203 (0.0008) +[2026-06-02 16:58:30,049][262582] Updated weights for policy 0, policy_version 25213 (0.0009) +[2026-06-02 16:58:30,269][262582] Updated weights for policy 0, policy_version 25224 (0.0008) +[2026-06-02 16:58:30,474][262582] Updated weights for policy 0, policy_version 25234 (0.0008) +[2026-06-02 16:58:30,691][262582] Updated weights for policy 0, policy_version 25245 (0.0008) +[2026-06-02 16:58:31,007][260776] Fps is (10 sec: 19660.6, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 12943360. Throughput: 0: 18235.7. Samples: 12953600. Policy #0 lag: (min: 42.0, avg: 71.9, max: 106.0) +[2026-06-02 16:58:31,008][260776] Avg episode reward: [(0, '892.754')] +[2026-06-02 16:58:31,393][262582] Updated weights for policy 0, policy_version 25255 (0.0008) +[2026-06-02 16:58:31,579][262582] Updated weights for policy 0, policy_version 25265 (0.0009) +[2026-06-02 16:58:31,795][262582] Updated weights for policy 0, policy_version 25275 (0.0008) +[2026-06-02 16:58:31,989][262582] Updated weights for policy 0, policy_version 25285 (0.0008) +[2026-06-02 16:58:32,192][262582] Updated weights for policy 0, policy_version 25295 (0.0008) +[2026-06-02 16:58:32,389][262582] Updated weights for policy 0, policy_version 25305 (0.0009) +[2026-06-02 16:58:33,085][262582] Updated weights for policy 0, policy_version 25315 (0.0008) +[2026-06-02 16:58:33,303][262582] Updated weights for policy 0, policy_version 25326 (0.0008) +[2026-06-02 16:58:33,499][262582] Updated weights for policy 0, policy_version 25336 (0.0008) +[2026-06-02 16:58:33,692][262582] Updated weights for policy 0, policy_version 25346 (0.0008) +[2026-06-02 16:58:33,923][262582] Updated weights for policy 0, policy_version 25357 (0.0008) +[2026-06-02 16:58:34,121][262582] Updated weights for policy 0, policy_version 25367 (0.0008) +[2026-06-02 16:58:34,878][262582] Updated weights for policy 0, policy_version 25379 (0.0009) +[2026-06-02 16:58:35,064][262582] Updated weights for policy 0, policy_version 25389 (0.0009) +[2026-06-02 16:58:35,263][262582] Updated weights for policy 0, policy_version 25399 (0.0009) +[2026-06-02 16:58:35,462][262582] Updated weights for policy 0, policy_version 25409 (0.0008) +[2026-06-02 16:58:35,662][262582] Updated weights for policy 0, policy_version 25419 (0.0008) +[2026-06-02 16:58:35,865][262582] Updated weights for policy 0, policy_version 25429 (0.0008) +[2026-06-02 16:58:36,007][260776] Fps is (10 sec: 16384.1, 60 sec: 18022.4, 300 sec: 18438.9). Total num frames: 13008896. Throughput: 0: 18429.1. Samples: 13012608. Policy #0 lag: (min: 47.0, avg: 64.3, max: 111.0) +[2026-06-02 16:58:36,008][260776] Avg episode reward: [(0, '889.293')] +[2026-06-02 16:58:36,067][262582] Updated weights for policy 0, policy_version 25439 (0.0008) +[2026-06-02 16:58:36,749][262582] Updated weights for policy 0, policy_version 25449 (0.0009) +[2026-06-02 16:58:36,944][262582] Updated weights for policy 0, policy_version 25459 (0.0009) +[2026-06-02 16:58:37,144][262582] Updated weights for policy 0, policy_version 25469 (0.0008) +[2026-06-02 16:58:37,342][262582] Updated weights for policy 0, policy_version 25479 (0.0008) +[2026-06-02 16:58:37,555][262582] Updated weights for policy 0, policy_version 25489 (0.0008) +[2026-06-02 16:58:37,754][262582] Updated weights for policy 0, policy_version 25499 (0.0009) +[2026-06-02 16:58:38,488][262582] Updated weights for policy 0, policy_version 25510 (0.0008) +[2026-06-02 16:58:38,681][262582] Updated weights for policy 0, policy_version 25520 (0.0008) +[2026-06-02 16:58:38,873][262582] Updated weights for policy 0, policy_version 25530 (0.0008) +[2026-06-02 16:58:39,088][262582] Updated weights for policy 0, policy_version 25540 (0.0009) +[2026-06-02 16:58:39,290][262582] Updated weights for policy 0, policy_version 25550 (0.0008) +[2026-06-02 16:58:39,494][262582] Updated weights for policy 0, policy_version 25560 (0.0008) +[2026-06-02 16:58:40,186][262582] Updated weights for policy 0, policy_version 25570 (0.0008) +[2026-06-02 16:58:40,374][262582] Updated weights for policy 0, policy_version 25580 (0.0008) +[2026-06-02 16:58:40,577][262582] Updated weights for policy 0, policy_version 25590 (0.0008) +[2026-06-02 16:58:40,776][262582] Updated weights for policy 0, policy_version 25600 (0.0008) +[2026-06-02 16:58:40,973][262582] Updated weights for policy 0, policy_version 25610 (0.0008) +[2026-06-02 16:58:41,007][260776] Fps is (10 sec: 16384.0, 60 sec: 18022.4, 300 sec: 18438.9). Total num frames: 13107200. Throughput: 0: 18264.2. Samples: 13119360. Policy #0 lag: (min: 47.0, avg: 64.3, max: 111.0) +[2026-06-02 16:58:41,008][260776] Avg episode reward: [(0, '863.853')] +[2026-06-02 16:58:41,185][262582] Updated weights for policy 0, policy_version 25620 (0.0008) +[2026-06-02 16:58:41,388][262582] Updated weights for policy 0, policy_version 25630 (0.0008) +[2026-06-02 16:58:42,089][262582] Updated weights for policy 0, policy_version 25640 (0.0008) +[2026-06-02 16:58:42,289][262582] Updated weights for policy 0, policy_version 25650 (0.0008) +[2026-06-02 16:58:42,498][262582] Updated weights for policy 0, policy_version 25660 (0.0008) +[2026-06-02 16:58:42,704][262582] Updated weights for policy 0, policy_version 25670 (0.0008) +[2026-06-02 16:58:42,897][262582] Updated weights for policy 0, policy_version 25680 (0.0008) +[2026-06-02 16:58:43,098][262582] Updated weights for policy 0, policy_version 25690 (0.0008) +[2026-06-02 16:58:43,797][262582] Updated weights for policy 0, policy_version 25700 (0.0008) +[2026-06-02 16:58:43,996][262582] Updated weights for policy 0, policy_version 25710 (0.0008) +[2026-06-02 16:58:44,194][262582] Updated weights for policy 0, policy_version 25720 (0.0008) +[2026-06-02 16:58:44,395][262582] Updated weights for policy 0, policy_version 25730 (0.0008) +[2026-06-02 16:58:44,595][262582] Updated weights for policy 0, policy_version 25740 (0.0008) +[2026-06-02 16:58:44,799][262582] Updated weights for policy 0, policy_version 25750 (0.0008) +[2026-06-02 16:58:44,994][262582] Updated weights for policy 0, policy_version 25760 (0.0008) +[2026-06-02 16:58:45,701][262582] Updated weights for policy 0, policy_version 25770 (0.0009) +[2026-06-02 16:58:45,895][262582] Updated weights for policy 0, policy_version 25780 (0.0008) +[2026-06-02 16:58:46,007][260776] Fps is (10 sec: 19660.9, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 13205504. Throughput: 0: 18443.4. Samples: 13232896. Policy #0 lag: (min: 47.0, avg: 64.3, max: 111.0) +[2026-06-02 16:58:46,008][260776] Avg episode reward: [(0, '846.219')] +[2026-06-02 16:58:46,098][262582] Updated weights for policy 0, policy_version 25790 (0.0009) +[2026-06-02 16:58:46,297][262582] Updated weights for policy 0, policy_version 25800 (0.0008) +[2026-06-02 16:58:46,506][262582] Updated weights for policy 0, policy_version 25810 (0.0009) +[2026-06-02 16:58:46,723][262582] Updated weights for policy 0, policy_version 25821 (0.0008) +[2026-06-02 16:58:47,423][262582] Updated weights for policy 0, policy_version 25831 (0.0009) +[2026-06-02 16:58:47,621][262582] Updated weights for policy 0, policy_version 25841 (0.0008) +[2026-06-02 16:58:47,818][262582] Updated weights for policy 0, policy_version 25851 (0.0008) +[2026-06-02 16:58:48,030][262582] Updated weights for policy 0, policy_version 25861 (0.0008) +[2026-06-02 16:58:48,223][262582] Updated weights for policy 0, policy_version 25871 (0.0008) +[2026-06-02 16:58:48,452][262582] Updated weights for policy 0, policy_version 25882 (0.0009) +[2026-06-02 16:58:49,152][262582] Updated weights for policy 0, policy_version 25892 (0.0009) +[2026-06-02 16:58:49,341][262582] Updated weights for policy 0, policy_version 25902 (0.0008) +[2026-06-02 16:58:49,541][262582] Updated weights for policy 0, policy_version 25912 (0.0008) +[2026-06-02 16:58:49,743][262582] Updated weights for policy 0, policy_version 25922 (0.0008) +[2026-06-02 16:58:49,961][262582] Updated weights for policy 0, policy_version 25933 (0.0008) +[2026-06-02 16:58:50,152][262582] Updated weights for policy 0, policy_version 25943 (0.0008) +[2026-06-02 16:58:50,877][262582] Updated weights for policy 0, policy_version 25953 (0.0008) +[2026-06-02 16:58:51,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 13303808. Throughput: 0: 18292.6. Samples: 13284608. Policy #0 lag: (min: 47.0, avg: 64.3, max: 111.0) +[2026-06-02 16:58:51,008][260776] Avg episode reward: [(0, '838.732')] +[2026-06-02 16:58:51,089][262582] Updated weights for policy 0, policy_version 25964 (0.0008) +[2026-06-02 16:58:51,293][262582] Updated weights for policy 0, policy_version 25974 (0.0008) +[2026-06-02 16:58:51,512][262582] Updated weights for policy 0, policy_version 25985 (0.0008) +[2026-06-02 16:58:51,718][262582] Updated weights for policy 0, policy_version 25995 (0.0008) +[2026-06-02 16:58:51,935][262582] Updated weights for policy 0, policy_version 26006 (0.0008) +[2026-06-02 16:58:52,146][262582] Updated weights for policy 0, policy_version 26016 (0.0009) +[2026-06-02 16:58:52,833][262582] Updated weights for policy 0, policy_version 26026 (0.0009) +[2026-06-02 16:58:53,035][262582] Updated weights for policy 0, policy_version 26036 (0.0008) +[2026-06-02 16:58:53,234][262582] Updated weights for policy 0, policy_version 26046 (0.0008) +[2026-06-02 16:58:53,462][262582] Updated weights for policy 0, policy_version 26057 (0.0008) +[2026-06-02 16:58:53,665][262582] Updated weights for policy 0, policy_version 26067 (0.0008) +[2026-06-02 16:58:53,863][262582] Updated weights for policy 0, policy_version 26077 (0.0008) +[2026-06-02 16:58:54,570][262582] Updated weights for policy 0, policy_version 26087 (0.0008) +[2026-06-02 16:58:54,764][262582] Updated weights for policy 0, policy_version 26097 (0.0008) +[2026-06-02 16:58:54,957][262582] Updated weights for policy 0, policy_version 26107 (0.0008) +[2026-06-02 16:58:55,188][262582] Updated weights for policy 0, policy_version 26118 (0.0008) +[2026-06-02 16:58:55,387][262582] Updated weights for policy 0, policy_version 26128 (0.0008) +[2026-06-02 16:58:55,597][262582] Updated weights for policy 0, policy_version 26138 (0.0008) +[2026-06-02 16:58:56,007][260776] Fps is (10 sec: 19660.5, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 13402112. Throughput: 0: 18531.5. Samples: 13401728. Policy #0 lag: (min: 63.0, avg: 78.9, max: 127.0) +[2026-06-02 16:58:56,008][260776] Avg episode reward: [(0, '813.624')] +[2026-06-02 16:58:56,289][262582] Updated weights for policy 0, policy_version 26148 (0.0009) +[2026-06-02 16:58:56,486][262582] Updated weights for policy 0, policy_version 26158 (0.0008) +[2026-06-02 16:58:56,675][262582] Updated weights for policy 0, policy_version 26168 (0.0008) +[2026-06-02 16:58:56,890][262582] Updated weights for policy 0, policy_version 26178 (0.0008) +[2026-06-02 16:58:57,090][262582] Updated weights for policy 0, policy_version 26188 (0.0008) +[2026-06-02 16:58:57,293][262582] Updated weights for policy 0, policy_version 26198 (0.0008) +[2026-06-02 16:58:57,489][262582] Updated weights for policy 0, policy_version 26208 (0.0008) +[2026-06-02 16:58:58,196][262582] Updated weights for policy 0, policy_version 26218 (0.0008) +[2026-06-02 16:58:58,406][262582] Updated weights for policy 0, policy_version 26229 (0.0009) +[2026-06-02 16:58:58,617][262582] Updated weights for policy 0, policy_version 26239 (0.0008) +[2026-06-02 16:58:58,818][262582] Updated weights for policy 0, policy_version 26249 (0.0008) +[2026-06-02 16:58:59,026][262582] Updated weights for policy 0, policy_version 26259 (0.0009) +[2026-06-02 16:58:59,229][262582] Updated weights for policy 0, policy_version 26269 (0.0008) +[2026-06-02 16:58:59,980][262582] Updated weights for policy 0, policy_version 26281 (0.0008) +[2026-06-02 16:59:00,169][262582] Updated weights for policy 0, policy_version 26291 (0.0008) +[2026-06-02 16:59:00,379][262582] Updated weights for policy 0, policy_version 26301 (0.0008) +[2026-06-02 16:59:00,604][262582] Updated weights for policy 0, policy_version 26312 (0.0009) +[2026-06-02 16:59:00,806][262582] Updated weights for policy 0, policy_version 26322 (0.0008) +[2026-06-02 16:59:01,007][262582] Updated weights for policy 0, policy_version 26332 (0.0008) +[2026-06-02 16:59:01,007][260776] Fps is (10 sec: 16384.0, 60 sec: 18022.4, 300 sec: 18438.9). Total num frames: 13467648. Throughput: 0: 18363.7. Samples: 13510784. Policy #0 lag: (min: 63.0, avg: 78.9, max: 127.0) +[2026-06-02 16:59:01,008][260776] Avg episode reward: [(0, '812.498')] +[2026-06-02 16:59:01,719][262582] Updated weights for policy 0, policy_version 26342 (0.0009) +[2026-06-02 16:59:01,917][262582] Updated weights for policy 0, policy_version 26352 (0.0008) +[2026-06-02 16:59:02,133][262582] Updated weights for policy 0, policy_version 26363 (0.0008) +[2026-06-02 16:59:02,337][262582] Updated weights for policy 0, policy_version 26373 (0.0008) +[2026-06-02 16:59:02,543][262582] Updated weights for policy 0, policy_version 26383 (0.0008) +[2026-06-02 16:59:02,742][262582] Updated weights for policy 0, policy_version 26393 (0.0008) +[2026-06-02 16:59:03,442][262582] Updated weights for policy 0, policy_version 26403 (0.0008) +[2026-06-02 16:59:03,629][262582] Updated weights for policy 0, policy_version 26413 (0.0008) +[2026-06-02 16:59:03,824][262582] Updated weights for policy 0, policy_version 26423 (0.0008) +[2026-06-02 16:59:04,037][262582] Updated weights for policy 0, policy_version 26433 (0.0008) +[2026-06-02 16:59:04,237][262582] Updated weights for policy 0, policy_version 26443 (0.0008) +[2026-06-02 16:59:04,462][262582] Updated weights for policy 0, policy_version 26454 (0.0008) +[2026-06-02 16:59:04,657][262582] Updated weights for policy 0, policy_version 26464 (0.0008) +[2026-06-02 16:59:05,356][262582] Updated weights for policy 0, policy_version 26474 (0.0008) +[2026-06-02 16:59:05,553][262582] Updated weights for policy 0, policy_version 26484 (0.0008) +[2026-06-02 16:59:05,755][262582] Updated weights for policy 0, policy_version 26494 (0.0008) +[2026-06-02 16:59:05,955][262582] Updated weights for policy 0, policy_version 26504 (0.0008) +[2026-06-02 16:59:06,007][260776] Fps is (10 sec: 16384.3, 60 sec: 18022.4, 300 sec: 18438.9). Total num frames: 13565952. Throughput: 0: 18662.4. Samples: 13570816. Policy #0 lag: (min: 63.0, avg: 78.9, max: 127.0) +[2026-06-02 16:59:06,008][260776] Avg episode reward: [(0, '848.457')] +[2026-06-02 16:59:06,156][262582] Updated weights for policy 0, policy_version 26514 (0.0008) +[2026-06-02 16:59:06,359][262582] Updated weights for policy 0, policy_version 26524 (0.0009) +[2026-06-02 16:59:07,073][262582] Updated weights for policy 0, policy_version 26534 (0.0009) +[2026-06-02 16:59:07,265][262582] Updated weights for policy 0, policy_version 26544 (0.0008) +[2026-06-02 16:59:07,468][262582] Updated weights for policy 0, policy_version 26554 (0.0008) +[2026-06-02 16:59:07,672][262582] Updated weights for policy 0, policy_version 26564 (0.0009) +[2026-06-02 16:59:07,875][262582] Updated weights for policy 0, policy_version 26574 (0.0008) +[2026-06-02 16:59:08,074][262582] Updated weights for policy 0, policy_version 26584 (0.0008) +[2026-06-02 16:59:08,786][262582] Updated weights for policy 0, policy_version 26594 (0.0008) +[2026-06-02 16:59:08,969][262582] Updated weights for policy 0, policy_version 26604 (0.0008) +[2026-06-02 16:59:09,166][262582] Updated weights for policy 0, policy_version 26614 (0.0008) +[2026-06-02 16:59:09,371][262582] Updated weights for policy 0, policy_version 26624 (0.0008) +[2026-06-02 16:59:09,574][262582] Updated weights for policy 0, policy_version 26634 (0.0009) +[2026-06-02 16:59:09,775][262582] Updated weights for policy 0, policy_version 26644 (0.0008) +[2026-06-02 16:59:09,983][262582] Updated weights for policy 0, policy_version 26654 (0.0008) +[2026-06-02 16:59:10,675][262582] Updated weights for policy 0, policy_version 26664 (0.0008) +[2026-06-02 16:59:10,872][262582] Updated weights for policy 0, policy_version 26674 (0.0009) +[2026-06-02 16:59:11,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 13664256. Throughput: 0: 18446.2. Samples: 13677696. Policy #0 lag: (min: 63.0, avg: 78.9, max: 127.0) +[2026-06-02 16:59:11,008][260776] Avg episode reward: [(0, '863.848')] +[2026-06-02 16:59:11,064][262582] Updated weights for policy 0, policy_version 26684 (0.0009) +[2026-06-02 16:59:11,295][262582] Updated weights for policy 0, policy_version 26695 (0.0008) +[2026-06-02 16:59:11,494][262582] Updated weights for policy 0, policy_version 26705 (0.0008) +[2026-06-02 16:59:11,697][262582] Updated weights for policy 0, policy_version 26715 (0.0008) +[2026-06-02 16:59:12,425][262582] Updated weights for policy 0, policy_version 26725 (0.0009) +[2026-06-02 16:59:12,631][262582] Updated weights for policy 0, policy_version 26736 (0.0009) +[2026-06-02 16:59:12,837][262582] Updated weights for policy 0, policy_version 26746 (0.0009) +[2026-06-02 16:59:13,053][262582] Updated weights for policy 0, policy_version 26757 (0.0009) +[2026-06-02 16:59:13,257][262582] Updated weights for policy 0, policy_version 26767 (0.0009) +[2026-06-02 16:59:13,461][262582] Updated weights for policy 0, policy_version 26777 (0.0010) +[2026-06-02 16:59:14,148][262582] Updated weights for policy 0, policy_version 26787 (0.0009) +[2026-06-02 16:59:14,338][262582] Updated weights for policy 0, policy_version 26797 (0.0009) +[2026-06-02 16:59:14,540][262582] Updated weights for policy 0, policy_version 26807 (0.0008) +[2026-06-02 16:59:14,745][262582] Updated weights for policy 0, policy_version 26817 (0.0008) +[2026-06-02 16:59:14,945][262582] Updated weights for policy 0, policy_version 26827 (0.0009) +[2026-06-02 16:59:15,152][262582] Updated weights for policy 0, policy_version 26837 (0.0009) +[2026-06-02 16:59:15,351][262582] Updated weights for policy 0, policy_version 26847 (0.0009) +[2026-06-02 16:59:16,007][260776] Fps is (10 sec: 19660.7, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 13762560. Throughput: 0: 18443.4. Samples: 13783552. Policy #0 lag: (min: 63.0, avg: 80.0, max: 127.0) +[2026-06-02 16:59:16,008][260776] Avg episode reward: [(0, '851.875')] +[2026-06-02 16:59:16,041][262582] Updated weights for policy 0, policy_version 26857 (0.0009) +[2026-06-02 16:59:16,249][262582] Updated weights for policy 0, policy_version 26867 (0.0009) +[2026-06-02 16:59:16,467][262582] Updated weights for policy 0, policy_version 26878 (0.0009) +[2026-06-02 16:59:16,668][262582] Updated weights for policy 0, policy_version 26888 (0.0009) +[2026-06-02 16:59:16,871][262582] Updated weights for policy 0, policy_version 26898 (0.0009) +[2026-06-02 16:59:17,073][262582] Updated weights for policy 0, policy_version 26908 (0.0008) +[2026-06-02 16:59:17,803][262582] Updated weights for policy 0, policy_version 26919 (0.0008) +[2026-06-02 16:59:18,002][262582] Updated weights for policy 0, policy_version 26929 (0.0008) +[2026-06-02 16:59:18,204][262582] Updated weights for policy 0, policy_version 26939 (0.0008) +[2026-06-02 16:59:18,406][262582] Updated weights for policy 0, policy_version 26949 (0.0008) +[2026-06-02 16:59:18,609][262582] Updated weights for policy 0, policy_version 26959 (0.0008) +[2026-06-02 16:59:18,801][262582] Updated weights for policy 0, policy_version 26969 (0.0008) +[2026-06-02 16:59:19,511][262582] Updated weights for policy 0, policy_version 26979 (0.0008) +[2026-06-02 16:59:19,707][262582] Updated weights for policy 0, policy_version 26989 (0.0008) +[2026-06-02 16:59:19,904][262582] Updated weights for policy 0, policy_version 26999 (0.0008) +[2026-06-02 16:59:20,125][262582] Updated weights for policy 0, policy_version 27010 (0.0008) +[2026-06-02 16:59:20,326][262582] Updated weights for policy 0, policy_version 27020 (0.0008) +[2026-06-02 16:59:20,527][262582] Updated weights for policy 0, policy_version 27030 (0.0009) +[2026-06-02 16:59:20,726][262582] Updated weights for policy 0, policy_version 27040 (0.0008) +[2026-06-02 16:59:21,007][260776] Fps is (10 sec: 19661.0, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 13860864. Throughput: 0: 18457.6. Samples: 13843200. Policy #0 lag: (min: 63.0, avg: 80.0, max: 127.0) +[2026-06-02 16:59:21,008][260776] Avg episode reward: [(0, '848.610')] +[2026-06-02 16:59:21,439][262582] Updated weights for policy 0, policy_version 27051 (0.0008) +[2026-06-02 16:59:21,646][262582] Updated weights for policy 0, policy_version 27061 (0.0008) +[2026-06-02 16:59:21,842][262582] Updated weights for policy 0, policy_version 27071 (0.0008) +[2026-06-02 16:59:22,053][262582] Updated weights for policy 0, policy_version 27081 (0.0008) +[2026-06-02 16:59:22,256][262582] Updated weights for policy 0, policy_version 27091 (0.0008) +[2026-06-02 16:59:22,479][262582] Updated weights for policy 0, policy_version 27102 (0.0008) +[2026-06-02 16:59:23,176][262582] Updated weights for policy 0, policy_version 27112 (0.0009) +[2026-06-02 16:59:23,367][262582] Updated weights for policy 0, policy_version 27122 (0.0008) +[2026-06-02 16:59:23,576][262582] Updated weights for policy 0, policy_version 27132 (0.0008) +[2026-06-02 16:59:23,778][262582] Updated weights for policy 0, policy_version 27142 (0.0008) +[2026-06-02 16:59:23,999][262582] Updated weights for policy 0, policy_version 27153 (0.0009) +[2026-06-02 16:59:24,204][262582] Updated weights for policy 0, policy_version 27163 (0.0008) +[2026-06-02 16:59:24,926][262582] Updated weights for policy 0, policy_version 27174 (0.0009) +[2026-06-02 16:59:25,120][262582] Updated weights for policy 0, policy_version 27184 (0.0008) +[2026-06-02 16:59:25,319][262582] Updated weights for policy 0, policy_version 27194 (0.0008) +[2026-06-02 16:59:25,511][262582] Updated weights for policy 0, policy_version 27204 (0.0008) +[2026-06-02 16:59:25,721][262582] Updated weights for policy 0, policy_version 27214 (0.0008) +[2026-06-02 16:59:25,947][262582] Updated weights for policy 0, policy_version 27225 (0.0008) +[2026-06-02 16:59:26,007][260776] Fps is (10 sec: 16384.1, 60 sec: 18022.4, 300 sec: 18438.9). Total num frames: 13926400. Throughput: 0: 18446.2. Samples: 13949440. Policy #0 lag: (min: 63.0, avg: 80.0, max: 127.0) +[2026-06-02 16:59:26,008][260776] Avg episode reward: [(0, '883.537')] +[2026-06-02 16:59:26,668][262582] Updated weights for policy 0, policy_version 27235 (0.0008) +[2026-06-02 16:59:26,851][262582] Updated weights for policy 0, policy_version 27245 (0.0008) +[2026-06-02 16:59:27,052][262582] Updated weights for policy 0, policy_version 27255 (0.0008) +[2026-06-02 16:59:27,249][262582] Updated weights for policy 0, policy_version 27265 (0.0008) +[2026-06-02 16:59:27,445][262582] Updated weights for policy 0, policy_version 27275 (0.0008) +[2026-06-02 16:59:27,653][262582] Updated weights for policy 0, policy_version 27285 (0.0008) +[2026-06-02 16:59:27,855][262582] Updated weights for policy 0, policy_version 27295 (0.0008) +[2026-06-02 16:59:28,556][262582] Updated weights for policy 0, policy_version 27305 (0.0008) +[2026-06-02 16:59:28,744][262582] Updated weights for policy 0, policy_version 27315 (0.0009) +[2026-06-02 16:59:28,945][262582] Updated weights for policy 0, policy_version 27325 (0.0008) +[2026-06-02 16:59:29,147][262582] Updated weights for policy 0, policy_version 27335 (0.0008) +[2026-06-02 16:59:29,360][262582] Updated weights for policy 0, policy_version 27345 (0.0008) +[2026-06-02 16:59:29,557][262582] Updated weights for policy 0, policy_version 27355 (0.0009) +[2026-06-02 16:59:30,261][262582] Updated weights for policy 0, policy_version 27365 (0.0008) +[2026-06-02 16:59:30,477][262582] Updated weights for policy 0, policy_version 27376 (0.0009) +[2026-06-02 16:59:30,678][262582] Updated weights for policy 0, policy_version 27386 (0.0008) +[2026-06-02 16:59:30,880][262582] Updated weights for policy 0, policy_version 27396 (0.0008) +[2026-06-02 16:59:31,007][260776] Fps is (10 sec: 16384.0, 60 sec: 18022.4, 300 sec: 18438.9). Total num frames: 14024704. Throughput: 0: 18577.1. Samples: 14068864. Policy #0 lag: (min: 63.0, avg: 80.0, max: 127.0) +[2026-06-02 16:59:31,008][260776] Avg episode reward: [(0, '894.540')] +[2026-06-02 16:59:31,084][262582] Updated weights for policy 0, policy_version 27406 (0.0009) +[2026-06-02 16:59:31,278][262582] Updated weights for policy 0, policy_version 27416 (0.0008) +[2026-06-02 16:59:31,996][262582] Updated weights for policy 0, policy_version 27426 (0.0008) +[2026-06-02 16:59:32,181][262582] Updated weights for policy 0, policy_version 27436 (0.0008) +[2026-06-02 16:59:32,382][262582] Updated weights for policy 0, policy_version 27446 (0.0008) +[2026-06-02 16:59:32,585][262582] Updated weights for policy 0, policy_version 27456 (0.0008) +[2026-06-02 16:59:32,786][262582] Updated weights for policy 0, policy_version 27466 (0.0008) +[2026-06-02 16:59:32,989][262582] Updated weights for policy 0, policy_version 27476 (0.0008) +[2026-06-02 16:59:33,190][262582] Updated weights for policy 0, policy_version 27486 (0.0008) +[2026-06-02 16:59:33,899][262582] Updated weights for policy 0, policy_version 27496 (0.0008) +[2026-06-02 16:59:34,109][262582] Updated weights for policy 0, policy_version 27507 (0.0009) +[2026-06-02 16:59:34,318][262582] Updated weights for policy 0, policy_version 27517 (0.0009) +[2026-06-02 16:59:34,521][262582] Updated weights for policy 0, policy_version 27527 (0.0008) +[2026-06-02 16:59:34,725][262582] Updated weights for policy 0, policy_version 27537 (0.0009) +[2026-06-02 16:59:34,918][262582] Updated weights for policy 0, policy_version 27547 (0.0008) +[2026-06-02 16:59:35,609][262582] Updated weights for policy 0, policy_version 27557 (0.0008) +[2026-06-02 16:59:35,815][262582] Updated weights for policy 0, policy_version 27567 (0.0008) +[2026-06-02 16:59:36,006][262582] Updated weights for policy 0, policy_version 27577 (0.0008) +[2026-06-02 16:59:36,007][260776] Fps is (10 sec: 19660.9, 60 sec: 18568.6, 300 sec: 18438.9). Total num frames: 14123008. Throughput: 0: 18463.3. Samples: 14115456. Policy #0 lag: (min: 63.0, avg: 79.5, max: 127.0) +[2026-06-02 16:59:36,008][260776] Avg episode reward: [(0, '879.235')] +[2026-06-02 16:59:36,206][262582] Updated weights for policy 0, policy_version 27587 (0.0008) +[2026-06-02 16:59:36,401][262582] Updated weights for policy 0, policy_version 27597 (0.0008) +[2026-06-02 16:59:36,610][262582] Updated weights for policy 0, policy_version 27607 (0.0008) +[2026-06-02 16:59:37,341][262582] Updated weights for policy 0, policy_version 27617 (0.0008) +[2026-06-02 16:59:37,523][262582] Updated weights for policy 0, policy_version 27627 (0.0008) +[2026-06-02 16:59:37,722][262582] Updated weights for policy 0, policy_version 27637 (0.0008) +[2026-06-02 16:59:37,923][262582] Updated weights for policy 0, policy_version 27647 (0.0008) +[2026-06-02 16:59:38,139][262582] Updated weights for policy 0, policy_version 27658 (0.0008) +[2026-06-02 16:59:38,352][262582] Updated weights for policy 0, policy_version 27668 (0.0008) +[2026-06-02 16:59:38,555][262582] Updated weights for policy 0, policy_version 27678 (0.0009) +[2026-06-02 16:59:39,261][262582] Updated weights for policy 0, policy_version 27688 (0.0009) +[2026-06-02 16:59:39,477][262582] Updated weights for policy 0, policy_version 27699 (0.0009) +[2026-06-02 16:59:39,677][262582] Updated weights for policy 0, policy_version 27709 (0.0009) +[2026-06-02 16:59:39,883][262582] Updated weights for policy 0, policy_version 27719 (0.0009) +[2026-06-02 16:59:40,082][262582] Updated weights for policy 0, policy_version 27729 (0.0009) +[2026-06-02 16:59:40,289][262582] Updated weights for policy 0, policy_version 27739 (0.0009) +[2026-06-02 16:59:40,991][262582] Updated weights for policy 0, policy_version 27749 (0.0006) +[2026-06-02 16:59:41,007][260776] Fps is (10 sec: 19660.7, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 14221312. Throughput: 0: 18514.6. Samples: 14234880. Policy #0 lag: (min: 63.0, avg: 79.5, max: 127.0) +[2026-06-02 16:59:41,008][260776] Avg episode reward: [(0, '888.321')] +[2026-06-02 16:59:41,176][262582] Updated weights for policy 0, policy_version 27759 (0.0004) +[2026-06-02 16:59:41,384][262582] Updated weights for policy 0, policy_version 27769 (0.0005) +[2026-06-02 16:59:41,581][262582] Updated weights for policy 0, policy_version 27779 (0.0008) +[2026-06-02 16:59:41,789][262582] Updated weights for policy 0, policy_version 27789 (0.0008) +[2026-06-02 16:59:41,989][262582] Updated weights for policy 0, policy_version 27799 (0.0008) +[2026-06-02 16:59:42,726][262582] Updated weights for policy 0, policy_version 27810 (0.0008) +[2026-06-02 16:59:42,914][262582] Updated weights for policy 0, policy_version 27820 (0.0008) +[2026-06-02 16:59:43,115][262582] Updated weights for policy 0, policy_version 27830 (0.0008) +[2026-06-02 16:59:43,343][262582] Updated weights for policy 0, policy_version 27841 (0.0008) +[2026-06-02 16:59:43,550][262582] Updated weights for policy 0, policy_version 27851 (0.0008) +[2026-06-02 16:59:43,751][262582] Updated weights for policy 0, policy_version 27861 (0.0008) +[2026-06-02 16:59:43,954][262582] Updated weights for policy 0, policy_version 27871 (0.0008) +[2026-06-02 16:59:44,628][262582] Updated weights for policy 0, policy_version 27881 (0.0008) +[2026-06-02 16:59:44,821][262582] Updated weights for policy 0, policy_version 27891 (0.0009) +[2026-06-02 16:59:45,021][262582] Updated weights for policy 0, policy_version 27901 (0.0008) +[2026-06-02 16:59:45,229][262582] Updated weights for policy 0, policy_version 27911 (0.0009) +[2026-06-02 16:59:45,434][262582] Updated weights for policy 0, policy_version 27921 (0.0008) +[2026-06-02 16:59:45,623][262582] Updated weights for policy 0, policy_version 27931 (0.0008) +[2026-06-02 16:59:46,007][260776] Fps is (10 sec: 19660.7, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 14319616. Throughput: 0: 18443.4. Samples: 14340736. Policy #0 lag: (min: 63.0, avg: 79.5, max: 127.0) +[2026-06-02 16:59:46,008][260776] Avg episode reward: [(0, '881.675')] +[2026-06-02 16:59:46,330][262582] Updated weights for policy 0, policy_version 27941 (0.0008) +[2026-06-02 16:59:46,533][262582] Updated weights for policy 0, policy_version 27952 (0.0008) +[2026-06-02 16:59:46,745][262582] Updated weights for policy 0, policy_version 27962 (0.0008) +[2026-06-02 16:59:46,953][262582] Updated weights for policy 0, policy_version 27972 (0.0008) +[2026-06-02 16:59:47,144][262582] Updated weights for policy 0, policy_version 27982 (0.0013) +[2026-06-02 16:59:47,352][262582] Updated weights for policy 0, policy_version 27992 (0.0009) +[2026-06-02 16:59:48,062][262582] Updated weights for policy 0, policy_version 28002 (0.0008) +[2026-06-02 16:59:48,241][262582] Updated weights for policy 0, policy_version 28012 (0.0008) +[2026-06-02 16:59:48,446][262582] Updated weights for policy 0, policy_version 28022 (0.0008) +[2026-06-02 16:59:48,646][262582] Updated weights for policy 0, policy_version 28032 (0.0008) +[2026-06-02 16:59:48,876][262582] Updated weights for policy 0, policy_version 28043 (0.0008) +[2026-06-02 16:59:49,078][262582] Updated weights for policy 0, policy_version 28053 (0.0008) +[2026-06-02 16:59:49,277][262582] Updated weights for policy 0, policy_version 28063 (0.0008) +[2026-06-02 16:59:49,972][262582] Updated weights for policy 0, policy_version 28073 (0.0008) +[2026-06-02 16:59:50,170][262582] Updated weights for policy 0, policy_version 28083 (0.0008) +[2026-06-02 16:59:50,369][262582] Updated weights for policy 0, policy_version 28093 (0.0008) +[2026-06-02 16:59:50,573][262582] Updated weights for policy 0, policy_version 28103 (0.0008) +[2026-06-02 16:59:50,783][262582] Updated weights for policy 0, policy_version 28113 (0.0008) +[2026-06-02 16:59:50,977][262582] Updated weights for policy 0, policy_version 28123 (0.0008) +[2026-06-02 16:59:51,007][260776] Fps is (10 sec: 16384.0, 60 sec: 18022.4, 300 sec: 18438.9). Total num frames: 14385152. Throughput: 0: 18426.3. Samples: 14400000. Policy #0 lag: (min: 63.0, avg: 79.5, max: 127.0) +[2026-06-02 16:59:51,008][260776] Avg episode reward: [(0, '861.283')] +[2026-06-02 16:59:51,700][262582] Updated weights for policy 0, policy_version 28134 (0.0008) +[2026-06-02 16:59:51,901][262582] Updated weights for policy 0, policy_version 28144 (0.0009) +[2026-06-02 16:59:52,104][262582] Updated weights for policy 0, policy_version 28154 (0.0008) +[2026-06-02 16:59:52,297][262582] Updated weights for policy 0, policy_version 28164 (0.0008) +[2026-06-02 16:59:52,505][262582] Updated weights for policy 0, policy_version 28174 (0.0008) +[2026-06-02 16:59:52,708][262582] Updated weights for policy 0, policy_version 28184 (0.0009) +[2026-06-02 16:59:53,420][262582] Updated weights for policy 0, policy_version 28194 (0.0009) +[2026-06-02 16:59:53,618][262582] Updated weights for policy 0, policy_version 28204 (0.0009) +[2026-06-02 16:59:53,817][262582] Updated weights for policy 0, policy_version 28214 (0.0008) +[2026-06-02 16:59:54,021][262582] Updated weights for policy 0, policy_version 28224 (0.0009) +[2026-06-02 16:59:54,229][262582] Updated weights for policy 0, policy_version 28234 (0.0008) +[2026-06-02 16:59:54,426][262582] Updated weights for policy 0, policy_version 28244 (0.0008) +[2026-06-02 16:59:54,626][262582] Updated weights for policy 0, policy_version 28254 (0.0008) +[2026-06-02 16:59:55,313][262582] Updated weights for policy 0, policy_version 28264 (0.0008) +[2026-06-02 16:59:55,518][262582] Updated weights for policy 0, policy_version 28274 (0.0009) +[2026-06-02 16:59:55,717][262582] Updated weights for policy 0, policy_version 28284 (0.0009) +[2026-06-02 16:59:55,917][262582] Updated weights for policy 0, policy_version 28294 (0.0008) +[2026-06-02 16:59:56,007][260776] Fps is (10 sec: 16384.0, 60 sec: 18022.5, 300 sec: 18438.9). Total num frames: 14483456. Throughput: 0: 18432.0. Samples: 14507136. Policy #0 lag: (min: 63.0, avg: 79.9, max: 127.0) +[2026-06-02 16:59:56,008][260776] Avg episode reward: [(0, '932.218')] +[2026-06-02 16:59:56,134][262582] Updated weights for policy 0, policy_version 28305 (0.0008) +[2026-06-02 16:59:56,333][262582] Updated weights for policy 0, policy_version 28315 (0.0008) +[2026-06-02 16:59:56,432][262026] Saving new best policy, reward=932.218! +[2026-06-02 16:59:57,050][262582] Updated weights for policy 0, policy_version 28325 (0.0008) +[2026-06-02 16:59:57,263][262582] Updated weights for policy 0, policy_version 28336 (0.0008) +[2026-06-02 16:59:57,461][262582] Updated weights for policy 0, policy_version 28346 (0.0008) +[2026-06-02 16:59:57,662][262582] Updated weights for policy 0, policy_version 28356 (0.0008) +[2026-06-02 16:59:57,865][262582] Updated weights for policy 0, policy_version 28366 (0.0008) +[2026-06-02 16:59:58,063][262582] Updated weights for policy 0, policy_version 28376 (0.0008) +[2026-06-02 16:59:58,788][262582] Updated weights for policy 0, policy_version 28386 (0.0008) +[2026-06-02 16:59:59,008][262582] Updated weights for policy 0, policy_version 28398 (0.0008) +[2026-06-02 16:59:59,216][262582] Updated weights for policy 0, policy_version 28408 (0.0008) +[2026-06-02 16:59:59,417][262582] Updated weights for policy 0, policy_version 28418 (0.0009) +[2026-06-02 16:59:59,634][262582] Updated weights for policy 0, policy_version 28429 (0.0008) +[2026-06-02 16:59:59,839][262582] Updated weights for policy 0, policy_version 28439 (0.0008) +[2026-06-02 17:00:00,548][262582] Updated weights for policy 0, policy_version 28449 (0.0008) +[2026-06-02 17:00:00,737][262582] Updated weights for policy 0, policy_version 28459 (0.0008) +[2026-06-02 17:00:00,938][262582] Updated weights for policy 0, policy_version 28469 (0.0009) +[2026-06-02 17:00:01,007][260776] Fps is (10 sec: 19660.9, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 14581760. Throughput: 0: 18599.9. Samples: 14620544. Policy #0 lag: (min: 63.0, avg: 79.9, max: 127.0) +[2026-06-02 17:00:01,008][260776] Avg episode reward: [(0, '958.728')] +[2026-06-02 17:00:01,136][262582] Updated weights for policy 0, policy_version 28479 (0.0009) +[2026-06-02 17:00:01,330][262582] Updated weights for policy 0, policy_version 28489 (0.0009) +[2026-06-02 17:00:01,546][262582] Updated weights for policy 0, policy_version 28499 (0.0008) +[2026-06-02 17:00:01,743][262582] Updated weights for policy 0, policy_version 28509 (0.0009) +[2026-06-02 17:00:01,801][262026] Saving new best policy, reward=958.728! +[2026-06-02 17:00:02,450][262582] Updated weights for policy 0, policy_version 28519 (0.0008) +[2026-06-02 17:00:02,641][262582] Updated weights for policy 0, policy_version 28529 (0.0008) +[2026-06-02 17:00:02,847][262582] Updated weights for policy 0, policy_version 28539 (0.0008) +[2026-06-02 17:00:03,052][262582] Updated weights for policy 0, policy_version 28549 (0.0008) +[2026-06-02 17:00:03,254][262582] Updated weights for policy 0, policy_version 28559 (0.0008) +[2026-06-02 17:00:03,455][262582] Updated weights for policy 0, policy_version 28569 (0.0008) +[2026-06-02 17:00:04,155][262582] Updated weights for policy 0, policy_version 28579 (0.0008) +[2026-06-02 17:00:04,333][262582] Updated weights for policy 0, policy_version 28589 (0.0008) +[2026-06-02 17:00:04,546][262582] Updated weights for policy 0, policy_version 28599 (0.0008) +[2026-06-02 17:00:04,742][262582] Updated weights for policy 0, policy_version 28609 (0.0008) +[2026-06-02 17:00:04,950][262582] Updated weights for policy 0, policy_version 28619 (0.0008) +[2026-06-02 17:00:05,173][262582] Updated weights for policy 0, policy_version 28630 (0.0009) +[2026-06-02 17:00:05,369][262582] Updated weights for policy 0, policy_version 28640 (0.0008) +[2026-06-02 17:00:06,007][260776] Fps is (10 sec: 19660.7, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 14680064. Throughput: 0: 18446.2. Samples: 14673280. Policy #0 lag: (min: 63.0, avg: 79.9, max: 127.0) +[2026-06-02 17:00:06,008][260776] Avg episode reward: [(0, '974.574')] +[2026-06-02 17:00:06,072][262582] Updated weights for policy 0, policy_version 28650 (0.0009) +[2026-06-02 17:00:06,284][262582] Updated weights for policy 0, policy_version 28661 (0.0009) +[2026-06-02 17:00:06,483][262582] Updated weights for policy 0, policy_version 28671 (0.0008) +[2026-06-02 17:00:06,691][262582] Updated weights for policy 0, policy_version 28681 (0.0008) +[2026-06-02 17:00:06,887][262582] Updated weights for policy 0, policy_version 28691 (0.0008) +[2026-06-02 17:00:07,096][262582] Updated weights for policy 0, policy_version 28701 (0.0008) +[2026-06-02 17:00:07,147][262026] Saving new best policy, reward=974.574! +[2026-06-02 17:00:07,792][262582] Updated weights for policy 0, policy_version 28711 (0.0008) +[2026-06-02 17:00:07,992][262582] Updated weights for policy 0, policy_version 28722 (0.0008) +[2026-06-02 17:00:08,191][262582] Updated weights for policy 0, policy_version 28732 (0.0008) +[2026-06-02 17:00:08,401][262582] Updated weights for policy 0, policy_version 28742 (0.0008) +[2026-06-02 17:00:08,607][262582] Updated weights for policy 0, policy_version 28752 (0.0007) +[2026-06-02 17:00:08,805][262582] Updated weights for policy 0, policy_version 28762 (0.0008) +[2026-06-02 17:00:09,503][262582] Updated weights for policy 0, policy_version 28772 (0.0009) +[2026-06-02 17:00:09,693][262582] Updated weights for policy 0, policy_version 28782 (0.0009) +[2026-06-02 17:00:09,895][262582] Updated weights for policy 0, policy_version 28792 (0.0008) +[2026-06-02 17:00:10,089][262582] Updated weights for policy 0, policy_version 28802 (0.0008) +[2026-06-02 17:00:10,305][262582] Updated weights for policy 0, policy_version 28812 (0.0008) +[2026-06-02 17:00:10,506][262582] Updated weights for policy 0, policy_version 28822 (0.0008) +[2026-06-02 17:00:10,704][262582] Updated weights for policy 0, policy_version 28832 (0.0008) +[2026-06-02 17:00:11,007][260776] Fps is (10 sec: 19660.4, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 14778368. Throughput: 0: 18682.2. Samples: 14790144. Policy #0 lag: (min: 63.0, avg: 79.9, max: 127.0) +[2026-06-02 17:00:11,011][260776] Avg episode reward: [(0, '991.808')] +[2026-06-02 17:00:11,016][262026] Saving new best policy, reward=991.808! +[2026-06-02 17:00:11,406][262582] Updated weights for policy 0, policy_version 28842 (0.0008) +[2026-06-02 17:00:11,599][262582] Updated weights for policy 0, policy_version 28852 (0.0008) +[2026-06-02 17:00:11,807][262582] Updated weights for policy 0, policy_version 28862 (0.0008) +[2026-06-02 17:00:12,005][262582] Updated weights for policy 0, policy_version 28872 (0.0008) +[2026-06-02 17:00:12,223][262582] Updated weights for policy 0, policy_version 28883 (0.0008) +[2026-06-02 17:00:12,432][262582] Updated weights for policy 0, policy_version 28893 (0.0008) +[2026-06-02 17:00:13,156][262582] Updated weights for policy 0, policy_version 28904 (0.0009) +[2026-06-02 17:00:13,350][262582] Updated weights for policy 0, policy_version 28914 (0.0008) +[2026-06-02 17:00:13,552][262582] Updated weights for policy 0, policy_version 28924 (0.0008) +[2026-06-02 17:00:13,744][262582] Updated weights for policy 0, policy_version 28934 (0.0008) +[2026-06-02 17:00:13,970][262582] Updated weights for policy 0, policy_version 28945 (0.0008) +[2026-06-02 17:00:14,179][262582] Updated weights for policy 0, policy_version 28955 (0.0009) +[2026-06-02 17:00:14,876][262582] Updated weights for policy 0, policy_version 28965 (0.0008) +[2026-06-02 17:00:15,075][262582] Updated weights for policy 0, policy_version 28975 (0.0008) +[2026-06-02 17:00:15,266][262582] Updated weights for policy 0, policy_version 28985 (0.0008) +[2026-06-02 17:00:15,476][262582] Updated weights for policy 0, policy_version 28995 (0.0008) +[2026-06-02 17:00:15,682][262582] Updated weights for policy 0, policy_version 29005 (0.0008) +[2026-06-02 17:00:15,883][262582] Updated weights for policy 0, policy_version 29015 (0.0008) +[2026-06-02 17:00:16,007][260776] Fps is (10 sec: 16383.8, 60 sec: 18022.4, 300 sec: 18327.9). Total num frames: 14843904. Throughput: 0: 18426.2. Samples: 14898048. Policy #0 lag: (min: 63.0, avg: 80.0, max: 127.0) +[2026-06-02 17:00:16,008][260776] Avg episode reward: [(0, '1012.640')] +[2026-06-02 17:00:16,053][262026] Saving new best policy, reward=1012.640! +[2026-06-02 17:00:16,581][262582] Updated weights for policy 0, policy_version 29025 (0.0008) +[2026-06-02 17:00:16,765][262582] Updated weights for policy 0, policy_version 29035 (0.0008) +[2026-06-02 17:00:16,952][262582] Updated weights for policy 0, policy_version 29045 (0.0008) +[2026-06-02 17:00:17,165][262582] Updated weights for policy 0, policy_version 29055 (0.0007) +[2026-06-02 17:00:17,368][262582] Updated weights for policy 0, policy_version 29065 (0.0005) +[2026-06-02 17:00:17,572][262582] Updated weights for policy 0, policy_version 29075 (0.0005) +[2026-06-02 17:00:17,794][262582] Updated weights for policy 0, policy_version 29086 (0.0005) +[2026-06-02 17:00:18,491][262582] Updated weights for policy 0, policy_version 29096 (0.0005) +[2026-06-02 17:00:18,686][262582] Updated weights for policy 0, policy_version 29106 (0.0005) +[2026-06-02 17:00:18,879][262582] Updated weights for policy 0, policy_version 29116 (0.0004) +[2026-06-02 17:00:19,085][262582] Updated weights for policy 0, policy_version 29126 (0.0005) +[2026-06-02 17:00:19,293][262582] Updated weights for policy 0, policy_version 29136 (0.0006) +[2026-06-02 17:00:19,498][262582] Updated weights for policy 0, policy_version 29146 (0.0008) +[2026-06-02 17:00:20,202][262582] Updated weights for policy 0, policy_version 29156 (0.0008) +[2026-06-02 17:00:20,394][262582] Updated weights for policy 0, policy_version 29166 (0.0008) +[2026-06-02 17:00:20,594][262582] Updated weights for policy 0, policy_version 29176 (0.0008) +[2026-06-02 17:00:20,793][262582] Updated weights for policy 0, policy_version 29186 (0.0008) +[2026-06-02 17:00:20,989][262582] Updated weights for policy 0, policy_version 29196 (0.0008) +[2026-06-02 17:00:21,007][260776] Fps is (10 sec: 16384.3, 60 sec: 18022.4, 300 sec: 18438.9). Total num frames: 14942208. Throughput: 0: 18710.8. Samples: 14957440. Policy #0 lag: (min: 63.0, avg: 80.0, max: 127.0) +[2026-06-02 17:00:21,008][260776] Avg episode reward: [(0, '1039.697')] +[2026-06-02 17:00:21,201][262582] Updated weights for policy 0, policy_version 29206 (0.0008) +[2026-06-02 17:00:21,393][262026] Saving new best policy, reward=1039.697! +[2026-06-02 17:00:21,397][262582] Updated weights for policy 0, policy_version 29216 (0.0009) +[2026-06-02 17:00:22,077][262582] Updated weights for policy 0, policy_version 29226 (0.0008) +[2026-06-02 17:00:22,278][262582] Updated weights for policy 0, policy_version 29236 (0.0008) +[2026-06-02 17:00:22,480][262582] Updated weights for policy 0, policy_version 29246 (0.0008) +[2026-06-02 17:00:22,685][262582] Updated weights for policy 0, policy_version 29256 (0.0009) +[2026-06-02 17:00:22,886][262582] Updated weights for policy 0, policy_version 29266 (0.0008) +[2026-06-02 17:00:23,109][262582] Updated weights for policy 0, policy_version 29277 (0.0008) +[2026-06-02 17:00:23,809][262582] Updated weights for policy 0, policy_version 29287 (0.0008) +[2026-06-02 17:00:24,011][262582] Updated weights for policy 0, policy_version 29297 (0.0008) +[2026-06-02 17:00:24,206][262582] Updated weights for policy 0, policy_version 29307 (0.0008) +[2026-06-02 17:00:24,411][262582] Updated weights for policy 0, policy_version 29317 (0.0008) +[2026-06-02 17:00:24,616][262582] Updated weights for policy 0, policy_version 29327 (0.0008) +[2026-06-02 17:00:24,820][262582] Updated weights for policy 0, policy_version 29337 (0.0008) +[2026-06-02 17:00:25,530][262582] Updated weights for policy 0, policy_version 29347 (0.0008) +[2026-06-02 17:00:25,732][262582] Updated weights for policy 0, policy_version 29358 (0.0008) +[2026-06-02 17:00:25,935][262582] Updated weights for policy 0, policy_version 29368 (0.0008) +[2026-06-02 17:00:26,007][260776] Fps is (10 sec: 19661.0, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 15040512. Throughput: 0: 18406.4. Samples: 15063168. Policy #0 lag: (min: 63.0, avg: 80.0, max: 127.0) +[2026-06-02 17:00:26,008][260776] Avg episode reward: [(0, '1025.320')] +[2026-06-02 17:00:26,139][262582] Updated weights for policy 0, policy_version 29378 (0.0008) +[2026-06-02 17:00:26,344][262582] Updated weights for policy 0, policy_version 29388 (0.0008) +[2026-06-02 17:00:26,547][262582] Updated weights for policy 0, policy_version 29398 (0.0008) +[2026-06-02 17:00:26,737][262582] Updated weights for policy 0, policy_version 29408 (0.0008) +[2026-06-02 17:00:27,457][262582] Updated weights for policy 0, policy_version 29418 (0.0008) +[2026-06-02 17:00:27,649][262582] Updated weights for policy 0, policy_version 29428 (0.0008) +[2026-06-02 17:00:27,859][262582] Updated weights for policy 0, policy_version 29438 (0.0008) +[2026-06-02 17:00:28,056][262582] Updated weights for policy 0, policy_version 29448 (0.0008) +[2026-06-02 17:00:28,261][262582] Updated weights for policy 0, policy_version 29458 (0.0008) +[2026-06-02 17:00:28,468][262582] Updated weights for policy 0, policy_version 29468 (0.0008) +[2026-06-02 17:00:29,155][262582] Updated weights for policy 0, policy_version 29478 (0.0008) +[2026-06-02 17:00:29,347][262582] Updated weights for policy 0, policy_version 29488 (0.0009) +[2026-06-02 17:00:29,546][262582] Updated weights for policy 0, policy_version 29498 (0.0008) +[2026-06-02 17:00:29,750][262582] Updated weights for policy 0, policy_version 29508 (0.0009) +[2026-06-02 17:00:29,953][262582] Updated weights for policy 0, policy_version 29518 (0.0009) +[2026-06-02 17:00:30,154][262582] Updated weights for policy 0, policy_version 29528 (0.0008) +[2026-06-02 17:00:30,844][262582] Updated weights for policy 0, policy_version 29538 (0.0009) +[2026-06-02 17:00:31,007][260776] Fps is (10 sec: 19660.5, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 15138816. Throughput: 0: 18426.3. Samples: 15169920. Policy #0 lag: (min: 63.0, avg: 80.0, max: 127.0) +[2026-06-02 17:00:31,008][260776] Avg episode reward: [(0, '1090.580')] +[2026-06-02 17:00:31,049][262582] Updated weights for policy 0, policy_version 29549 (0.0009) +[2026-06-02 17:00:31,244][262582] Updated weights for policy 0, policy_version 29559 (0.0009) +[2026-06-02 17:00:31,449][262582] Updated weights for policy 0, policy_version 29569 (0.0009) +[2026-06-02 17:00:31,654][262582] Updated weights for policy 0, policy_version 29579 (0.0009) +[2026-06-02 17:00:31,857][262582] Updated weights for policy 0, policy_version 29589 (0.0009) +[2026-06-02 17:00:32,057][262582] Updated weights for policy 0, policy_version 29599 (0.0009) +[2026-06-02 17:00:32,069][262026] Saving new best policy, reward=1090.580! +[2026-06-02 17:00:32,738][262582] Updated weights for policy 0, policy_version 29609 (0.0009) +[2026-06-02 17:00:32,957][262582] Updated weights for policy 0, policy_version 29620 (0.0009) +[2026-06-02 17:00:33,154][262582] Updated weights for policy 0, policy_version 29630 (0.0009) +[2026-06-02 17:00:33,359][262582] Updated weights for policy 0, policy_version 29640 (0.0007) +[2026-06-02 17:00:33,571][262582] Updated weights for policy 0, policy_version 29650 (0.0004) +[2026-06-02 17:00:33,795][262582] Updated weights for policy 0, policy_version 29661 (0.0004) +[2026-06-02 17:00:34,473][262582] Updated weights for policy 0, policy_version 29671 (0.0007) +[2026-06-02 17:00:34,667][262582] Updated weights for policy 0, policy_version 29681 (0.0008) +[2026-06-02 17:00:34,881][262582] Updated weights for policy 0, policy_version 29692 (0.0008) +[2026-06-02 17:00:35,091][262582] Updated weights for policy 0, policy_version 29702 (0.0009) +[2026-06-02 17:00:35,292][262582] Updated weights for policy 0, policy_version 29712 (0.0009) +[2026-06-02 17:00:35,496][262582] Updated weights for policy 0, policy_version 29722 (0.0008) +[2026-06-02 17:00:36,007][260776] Fps is (10 sec: 19661.0, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 15237120. Throughput: 0: 18397.9. Samples: 15227904. Policy #0 lag: (min: 45.0, avg: 62.7, max: 109.0) +[2026-06-02 17:00:36,008][260776] Avg episode reward: [(0, '1109.560')] +[2026-06-02 17:00:36,210][262582] Updated weights for policy 0, policy_version 29733 (0.0008) +[2026-06-02 17:00:36,404][262582] Updated weights for policy 0, policy_version 29743 (0.0009) +[2026-06-02 17:00:36,600][262582] Updated weights for policy 0, policy_version 29753 (0.0008) +[2026-06-02 17:00:36,826][262582] Updated weights for policy 0, policy_version 29764 (0.0009) +[2026-06-02 17:00:37,030][262582] Updated weights for policy 0, policy_version 29774 (0.0009) +[2026-06-02 17:00:37,229][262582] Updated weights for policy 0, policy_version 29784 (0.0010) +[2026-06-02 17:00:37,386][262026] Saving new best policy, reward=1109.560! +[2026-06-02 17:00:37,940][262582] Updated weights for policy 0, policy_version 29794 (0.0008) +[2026-06-02 17:00:38,152][262582] Updated weights for policy 0, policy_version 29805 (0.0008) +[2026-06-02 17:00:38,355][262582] Updated weights for policy 0, policy_version 29815 (0.0008) +[2026-06-02 17:00:38,557][262582] Updated weights for policy 0, policy_version 29825 (0.0008) +[2026-06-02 17:00:38,756][262582] Updated weights for policy 0, policy_version 29835 (0.0008) +[2026-06-02 17:00:38,959][262582] Updated weights for policy 0, policy_version 29845 (0.0008) +[2026-06-02 17:00:39,165][262582] Updated weights for policy 0, policy_version 29855 (0.0008) +[2026-06-02 17:00:39,849][262582] Updated weights for policy 0, policy_version 29865 (0.0008) +[2026-06-02 17:00:40,052][262582] Updated weights for policy 0, policy_version 29875 (0.0008) +[2026-06-02 17:00:40,273][262582] Updated weights for policy 0, policy_version 29886 (0.0009) +[2026-06-02 17:00:40,478][262582] Updated weights for policy 0, policy_version 29896 (0.0008) +[2026-06-02 17:00:40,683][262582] Updated weights for policy 0, policy_version 29906 (0.0008) +[2026-06-02 17:00:40,875][262582] Updated weights for policy 0, policy_version 29916 (0.0008) +[2026-06-02 17:00:41,007][260776] Fps is (10 sec: 19661.3, 60 sec: 18568.6, 300 sec: 18438.9). Total num frames: 15335424. Throughput: 0: 18403.6. Samples: 15335296. Policy #0 lag: (min: 45.0, avg: 62.7, max: 109.0) +[2026-06-02 17:00:41,008][260776] Avg episode reward: [(0, '1164.154')] +[2026-06-02 17:00:41,012][262026] Saving new best policy, reward=1164.154! +[2026-06-02 17:00:41,575][262582] Updated weights for policy 0, policy_version 29926 (0.0008) +[2026-06-02 17:00:41,767][262582] Updated weights for policy 0, policy_version 29936 (0.0008) +[2026-06-02 17:00:41,967][262582] Updated weights for policy 0, policy_version 29946 (0.0008) +[2026-06-02 17:00:42,171][262582] Updated weights for policy 0, policy_version 29956 (0.0008) +[2026-06-02 17:00:42,373][262582] Updated weights for policy 0, policy_version 29966 (0.0008) +[2026-06-02 17:00:42,578][262582] Updated weights for policy 0, policy_version 29976 (0.0008) +[2026-06-02 17:00:43,277][262582] Updated weights for policy 0, policy_version 29986 (0.0008) +[2026-06-02 17:00:43,466][262582] Updated weights for policy 0, policy_version 29996 (0.0008) +[2026-06-02 17:00:43,685][262582] Updated weights for policy 0, policy_version 30007 (0.0008) +[2026-06-02 17:00:43,883][262582] Updated weights for policy 0, policy_version 30017 (0.0009) +[2026-06-02 17:00:44,092][262582] Updated weights for policy 0, policy_version 30027 (0.0008) +[2026-06-02 17:00:44,290][262582] Updated weights for policy 0, policy_version 30037 (0.0008) +[2026-06-02 17:00:44,501][262582] Updated weights for policy 0, policy_version 30047 (0.0008) +[2026-06-02 17:00:45,184][262582] Updated weights for policy 0, policy_version 30057 (0.0008) +[2026-06-02 17:00:45,376][262582] Updated weights for policy 0, policy_version 30067 (0.0008) +[2026-06-02 17:00:45,605][262582] Updated weights for policy 0, policy_version 30078 (0.0008) +[2026-06-02 17:00:45,803][262582] Updated weights for policy 0, policy_version 30088 (0.0008) +[2026-06-02 17:00:46,007][260776] Fps is (10 sec: 16383.9, 60 sec: 18022.4, 300 sec: 18438.9). Total num frames: 15400960. Throughput: 0: 18463.3. Samples: 15451392. Policy #0 lag: (min: 45.0, avg: 62.7, max: 109.0) +[2026-06-02 17:00:46,008][260776] Avg episode reward: [(0, '1139.677')] +[2026-06-02 17:00:46,034][262582] Updated weights for policy 0, policy_version 30099 (0.0008) +[2026-06-02 17:00:46,229][262582] Updated weights for policy 0, policy_version 30109 (0.0008) +[2026-06-02 17:00:46,913][262582] Updated weights for policy 0, policy_version 30119 (0.0009) +[2026-06-02 17:00:47,128][262582] Updated weights for policy 0, policy_version 30130 (0.0008) +[2026-06-02 17:00:47,338][262582] Updated weights for policy 0, policy_version 30140 (0.0008) +[2026-06-02 17:00:47,535][262582] Updated weights for policy 0, policy_version 30150 (0.0009) +[2026-06-02 17:00:47,733][262582] Updated weights for policy 0, policy_version 30160 (0.0009) +[2026-06-02 17:00:47,934][262582] Updated weights for policy 0, policy_version 30170 (0.0009) +[2026-06-02 17:00:48,628][262582] Updated weights for policy 0, policy_version 30180 (0.0009) +[2026-06-02 17:00:48,825][262582] Updated weights for policy 0, policy_version 30190 (0.0008) +[2026-06-02 17:00:49,025][262582] Updated weights for policy 0, policy_version 30200 (0.0009) +[2026-06-02 17:00:49,248][262582] Updated weights for policy 0, policy_version 30211 (0.0009) +[2026-06-02 17:00:49,453][262582] Updated weights for policy 0, policy_version 30221 (0.0009) +[2026-06-02 17:00:49,654][262582] Updated weights for policy 0, policy_version 30231 (0.0008) +[2026-06-02 17:00:50,351][262582] Updated weights for policy 0, policy_version 30241 (0.0009) +[2026-06-02 17:00:50,538][262582] Updated weights for policy 0, policy_version 30251 (0.0008) +[2026-06-02 17:00:50,740][262582] Updated weights for policy 0, policy_version 30261 (0.0009) +[2026-06-02 17:00:50,944][262582] Updated weights for policy 0, policy_version 30271 (0.0009) +[2026-06-02 17:00:51,007][260776] Fps is (10 sec: 16383.8, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 15499264. Throughput: 0: 18449.1. Samples: 15503488. Policy #0 lag: (min: 45.0, avg: 62.7, max: 109.0) +[2026-06-02 17:00:51,008][260776] Avg episode reward: [(0, '1149.386')] +[2026-06-02 17:00:51,140][262582] Updated weights for policy 0, policy_version 30281 (0.0009) +[2026-06-02 17:00:51,344][262582] Updated weights for policy 0, policy_version 30291 (0.0009) +[2026-06-02 17:00:51,549][262582] Updated weights for policy 0, policy_version 30301 (0.0009) +[2026-06-02 17:00:52,231][262582] Updated weights for policy 0, policy_version 30311 (0.0009) +[2026-06-02 17:00:52,428][262582] Updated weights for policy 0, policy_version 30321 (0.0009) +[2026-06-02 17:00:52,626][262582] Updated weights for policy 0, policy_version 30331 (0.0009) +[2026-06-02 17:00:52,832][262582] Updated weights for policy 0, policy_version 30341 (0.0009) +[2026-06-02 17:00:53,054][262582] Updated weights for policy 0, policy_version 30352 (0.0008) +[2026-06-02 17:00:53,263][262582] Updated weights for policy 0, policy_version 30362 (0.0009) +[2026-06-02 17:00:53,952][262582] Updated weights for policy 0, policy_version 30372 (0.0009) +[2026-06-02 17:00:54,142][262582] Updated weights for policy 0, policy_version 30382 (0.0009) +[2026-06-02 17:00:54,345][262582] Updated weights for policy 0, policy_version 30392 (0.0008) +[2026-06-02 17:00:54,566][262582] Updated weights for policy 0, policy_version 30403 (0.0008) +[2026-06-02 17:00:54,773][262582] Updated weights for policy 0, policy_version 30413 (0.0008) +[2026-06-02 17:00:54,973][262582] Updated weights for policy 0, policy_version 30423 (0.0008) +[2026-06-02 17:00:55,680][262582] Updated weights for policy 0, policy_version 30433 (0.0009) +[2026-06-02 17:00:55,867][262582] Updated weights for policy 0, policy_version 30443 (0.0008) +[2026-06-02 17:00:56,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 15597568. Throughput: 0: 18343.9. Samples: 15615616. Policy #0 lag: (min: 63.0, avg: 80.2, max: 127.0) +[2026-06-02 17:00:56,008][260776] Avg episode reward: [(0, '1145.500')] +[2026-06-02 17:00:56,065][262582] Updated weights for policy 0, policy_version 30453 (0.0008) +[2026-06-02 17:00:56,267][262582] Updated weights for policy 0, policy_version 30463 (0.0008) +[2026-06-02 17:00:56,473][262582] Updated weights for policy 0, policy_version 30473 (0.0009) +[2026-06-02 17:00:56,682][262582] Updated weights for policy 0, policy_version 30483 (0.0009) +[2026-06-02 17:00:56,875][262582] Updated weights for policy 0, policy_version 30493 (0.0008) +[2026-06-02 17:00:57,570][262582] Updated weights for policy 0, policy_version 30504 (0.0009) +[2026-06-02 17:00:57,785][262582] Updated weights for policy 0, policy_version 30515 (0.0009) +[2026-06-02 17:00:57,988][262582] Updated weights for policy 0, policy_version 30525 (0.0009) +[2026-06-02 17:00:58,226][262582] Updated weights for policy 0, policy_version 30537 (0.0009) +[2026-06-02 17:00:58,435][262582] Updated weights for policy 0, policy_version 30547 (0.0009) +[2026-06-02 17:00:58,643][262582] Updated weights for policy 0, policy_version 30557 (0.0009) +[2026-06-02 17:00:59,317][262582] Updated weights for policy 0, policy_version 30567 (0.0008) +[2026-06-02 17:00:59,503][262582] Updated weights for policy 0, policy_version 30577 (0.0008) +[2026-06-02 17:00:59,707][262582] Updated weights for policy 0, policy_version 30587 (0.0009) +[2026-06-02 17:00:59,915][262582] Updated weights for policy 0, policy_version 30597 (0.0008) +[2026-06-02 17:01:00,113][262582] Updated weights for policy 0, policy_version 30607 (0.0008) +[2026-06-02 17:01:00,342][262582] Updated weights for policy 0, policy_version 30618 (0.0008) +[2026-06-02 17:01:01,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 15695872. Throughput: 0: 18281.3. Samples: 15720704. Policy #0 lag: (min: 63.0, avg: 80.2, max: 127.0) +[2026-06-02 17:01:01,008][260776] Avg episode reward: [(0, '1154.445')] +[2026-06-02 17:01:01,046][262582] Updated weights for policy 0, policy_version 30629 (0.0008) +[2026-06-02 17:01:01,237][262582] Updated weights for policy 0, policy_version 30639 (0.0008) +[2026-06-02 17:01:01,440][262582] Updated weights for policy 0, policy_version 30649 (0.0008) +[2026-06-02 17:01:01,646][262582] Updated weights for policy 0, policy_version 30659 (0.0008) +[2026-06-02 17:01:01,851][262582] Updated weights for policy 0, policy_version 30669 (0.0008) +[2026-06-02 17:01:02,053][262582] Updated weights for policy 0, policy_version 30679 (0.0008) +[2026-06-02 17:01:02,743][262582] Updated weights for policy 0, policy_version 30689 (0.0008) +[2026-06-02 17:01:02,929][262582] Updated weights for policy 0, policy_version 30699 (0.0004) +[2026-06-02 17:01:03,121][262582] Updated weights for policy 0, policy_version 30709 (0.0005) +[2026-06-02 17:01:03,334][262582] Updated weights for policy 0, policy_version 30719 (0.0004) +[2026-06-02 17:01:03,542][262582] Updated weights for policy 0, policy_version 30729 (0.0005) +[2026-06-02 17:01:03,746][262582] Updated weights for policy 0, policy_version 30739 (0.0005) +[2026-06-02 17:01:03,945][262582] Updated weights for policy 0, policy_version 30749 (0.0005) +[2026-06-02 17:01:04,624][262582] Updated weights for policy 0, policy_version 30759 (0.0005) +[2026-06-02 17:01:04,836][262582] Updated weights for policy 0, policy_version 30770 (0.0005) +[2026-06-02 17:01:05,035][262582] Updated weights for policy 0, policy_version 30780 (0.0007) +[2026-06-02 17:01:05,238][262582] Updated weights for policy 0, policy_version 30790 (0.0008) +[2026-06-02 17:01:05,448][262582] Updated weights for policy 0, policy_version 30800 (0.0008) +[2026-06-02 17:01:05,654][262582] Updated weights for policy 0, policy_version 30810 (0.0008) +[2026-06-02 17:01:06,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 15794176. Throughput: 0: 18255.6. Samples: 15778944. Policy #0 lag: (min: 63.0, avg: 80.2, max: 127.0) +[2026-06-02 17:01:06,008][260776] Avg episode reward: [(0, '1174.549')] +[2026-06-02 17:01:06,014][262026] Saving new best policy, reward=1174.549! +[2026-06-02 17:01:06,353][262582] Updated weights for policy 0, policy_version 30820 (0.0010) +[2026-06-02 17:01:06,538][262582] Updated weights for policy 0, policy_version 30830 (0.0008) +[2026-06-02 17:01:06,729][262582] Updated weights for policy 0, policy_version 30840 (0.0009) +[2026-06-02 17:01:06,939][262582] Updated weights for policy 0, policy_version 30850 (0.0007) +[2026-06-02 17:01:07,165][262582] Updated weights for policy 0, policy_version 30861 (0.0009) +[2026-06-02 17:01:07,366][262582] Updated weights for policy 0, policy_version 30871 (0.0007) +[2026-06-02 17:01:08,067][262582] Updated weights for policy 0, policy_version 30882 (0.0008) +[2026-06-02 17:01:08,244][262582] Updated weights for policy 0, policy_version 30892 (0.0008) +[2026-06-02 17:01:08,450][262582] Updated weights for policy 0, policy_version 30902 (0.0008) +[2026-06-02 17:01:08,668][262582] Updated weights for policy 0, policy_version 30913 (0.0007) +[2026-06-02 17:01:08,873][262582] Updated weights for policy 0, policy_version 30923 (0.0004) +[2026-06-02 17:01:09,082][262582] Updated weights for policy 0, policy_version 30933 (0.0004) +[2026-06-02 17:01:09,286][262582] Updated weights for policy 0, policy_version 30943 (0.0004) +[2026-06-02 17:01:09,971][262582] Updated weights for policy 0, policy_version 30954 (0.0007) +[2026-06-02 17:01:10,165][262582] Updated weights for policy 0, policy_version 30964 (0.0008) +[2026-06-02 17:01:10,373][262582] Updated weights for policy 0, policy_version 30974 (0.0008) +[2026-06-02 17:01:10,575][262582] Updated weights for policy 0, policy_version 30984 (0.0008) +[2026-06-02 17:01:10,776][262582] Updated weights for policy 0, policy_version 30994 (0.0008) +[2026-06-02 17:01:10,979][262582] Updated weights for policy 0, policy_version 31004 (0.0008) +[2026-06-02 17:01:11,007][260776] Fps is (10 sec: 16384.1, 60 sec: 18022.5, 300 sec: 18327.9). Total num frames: 15859712. Throughput: 0: 18232.9. Samples: 15883648. Policy #0 lag: (min: 63.0, avg: 80.2, max: 127.0) +[2026-06-02 17:01:11,008][260776] Avg episode reward: [(0, '1153.395')] +[2026-06-02 17:01:11,731][262582] Updated weights for policy 0, policy_version 31015 (0.0014) +[2026-06-02 17:01:11,920][262582] Updated weights for policy 0, policy_version 31025 (0.0005) +[2026-06-02 17:01:12,111][262582] Updated weights for policy 0, policy_version 31035 (0.0004) +[2026-06-02 17:01:12,315][262582] Updated weights for policy 0, policy_version 31045 (0.0004) +[2026-06-02 17:01:12,517][262582] Updated weights for policy 0, policy_version 31055 (0.0004) +[2026-06-02 17:01:12,730][262582] Updated weights for policy 0, policy_version 31065 (0.0004) +[2026-06-02 17:01:13,397][262582] Updated weights for policy 0, policy_version 31075 (0.0004) +[2026-06-02 17:01:13,575][262582] Updated weights for policy 0, policy_version 31085 (0.0005) +[2026-06-02 17:01:13,783][262582] Updated weights for policy 0, policy_version 31095 (0.0004) +[2026-06-02 17:01:13,977][262582] Updated weights for policy 0, policy_version 31105 (0.0004) +[2026-06-02 17:01:14,186][262582] Updated weights for policy 0, policy_version 31115 (0.0004) +[2026-06-02 17:01:14,390][262582] Updated weights for policy 0, policy_version 31125 (0.0007) +[2026-06-02 17:01:14,603][262582] Updated weights for policy 0, policy_version 31135 (0.0008) +[2026-06-02 17:01:15,282][262582] Updated weights for policy 0, policy_version 31146 (0.0008) +[2026-06-02 17:01:15,474][262582] Updated weights for policy 0, policy_version 31156 (0.0008) +[2026-06-02 17:01:15,679][262582] Updated weights for policy 0, policy_version 31166 (0.0008) +[2026-06-02 17:01:15,884][262582] Updated weights for policy 0, policy_version 31176 (0.0008) +[2026-06-02 17:01:16,007][260776] Fps is (10 sec: 16384.2, 60 sec: 18568.6, 300 sec: 18439.0). Total num frames: 15958016. Throughput: 0: 18449.2. Samples: 16000128. Policy #0 lag: (min: 63.0, avg: 80.2, max: 127.0) +[2026-06-02 17:01:16,007][260776] Avg episode reward: [(0, '1185.951')] +[2026-06-02 17:01:16,095][262582] Updated weights for policy 0, policy_version 31186 (0.0008) +[2026-06-02 17:01:16,296][262582] Updated weights for policy 0, policy_version 31196 (0.0008) +[2026-06-02 17:01:16,366][262026] Saving new best policy, reward=1185.951! +[2026-06-02 17:01:16,994][262582] Updated weights for policy 0, policy_version 31207 (0.0008) +[2026-06-02 17:01:17,195][262582] Updated weights for policy 0, policy_version 31217 (0.0008) +[2026-06-02 17:01:17,394][262582] Updated weights for policy 0, policy_version 31227 (0.0008) +[2026-06-02 17:01:17,604][262582] Updated weights for policy 0, policy_version 31237 (0.0008) +[2026-06-02 17:01:17,824][262582] Updated weights for policy 0, policy_version 31248 (0.0008) +[2026-06-02 17:01:18,022][262582] Updated weights for policy 0, policy_version 31258 (0.0006) +[2026-06-02 17:01:18,735][262582] Updated weights for policy 0, policy_version 31269 (0.0007) +[2026-06-02 17:01:18,931][262582] Updated weights for policy 0, policy_version 31279 (0.0008) +[2026-06-02 17:01:19,131][262582] Updated weights for policy 0, policy_version 31289 (0.0008) +[2026-06-02 17:01:19,323][262582] Updated weights for policy 0, policy_version 31299 (0.0008) +[2026-06-02 17:01:19,538][262582] Updated weights for policy 0, policy_version 31309 (0.0008) +[2026-06-02 17:01:19,732][262582] Updated weights for policy 0, policy_version 31319 (0.0009) +[2026-06-02 17:01:20,428][262582] Updated weights for policy 0, policy_version 31329 (0.0009) +[2026-06-02 17:01:20,612][262582] Updated weights for policy 0, policy_version 31339 (0.0008) +[2026-06-02 17:01:20,806][262582] Updated weights for policy 0, policy_version 31349 (0.0008) +[2026-06-02 17:01:21,007][262582] Updated weights for policy 0, policy_version 31359 (0.0008) +[2026-06-02 17:01:21,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 16056320. Throughput: 0: 18264.2. Samples: 16049792. Policy #0 lag: (min: 63.0, avg: 79.9, max: 127.0) +[2026-06-02 17:01:21,008][260776] Avg episode reward: [(0, '1269.255')] +[2026-06-02 17:01:21,207][262582] Updated weights for policy 0, policy_version 31369 (0.0008) +[2026-06-02 17:01:21,410][262582] Updated weights for policy 0, policy_version 31379 (0.0008) +[2026-06-02 17:01:21,607][262582] Updated weights for policy 0, policy_version 31389 (0.0009) +[2026-06-02 17:01:21,662][262026] Saving new best policy, reward=1269.255! +[2026-06-02 17:01:22,310][262582] Updated weights for policy 0, policy_version 31399 (0.0008) +[2026-06-02 17:01:22,505][262582] Updated weights for policy 0, policy_version 31409 (0.0008) +[2026-06-02 17:01:22,706][262582] Updated weights for policy 0, policy_version 31419 (0.0008) +[2026-06-02 17:01:22,919][262582] Updated weights for policy 0, policy_version 31429 (0.0008) +[2026-06-02 17:01:23,124][262582] Updated weights for policy 0, policy_version 31439 (0.0009) +[2026-06-02 17:01:23,320][262582] Updated weights for policy 0, policy_version 31449 (0.0008) +[2026-06-02 17:01:24,021][262582] Updated weights for policy 0, policy_version 31459 (0.0008) +[2026-06-02 17:01:24,207][262582] Updated weights for policy 0, policy_version 31469 (0.0008) +[2026-06-02 17:01:24,411][262582] Updated weights for policy 0, policy_version 31479 (0.0008) +[2026-06-02 17:01:24,608][262582] Updated weights for policy 0, policy_version 31489 (0.0008) +[2026-06-02 17:01:24,805][262582] Updated weights for policy 0, policy_version 31499 (0.0008) +[2026-06-02 17:01:25,014][262582] Updated weights for policy 0, policy_version 31509 (0.0008) +[2026-06-02 17:01:25,214][262582] Updated weights for policy 0, policy_version 31519 (0.0008) +[2026-06-02 17:01:25,900][262582] Updated weights for policy 0, policy_version 31529 (0.0008) +[2026-06-02 17:01:26,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.6, 300 sec: 18438.9). Total num frames: 16154624. Throughput: 0: 18429.2. Samples: 16164608. Policy #0 lag: (min: 63.0, avg: 79.9, max: 127.0) +[2026-06-02 17:01:26,008][260776] Avg episode reward: [(0, '1282.787')] +[2026-06-02 17:01:26,087][262582] Updated weights for policy 0, policy_version 31539 (0.0008) +[2026-06-02 17:01:26,288][262582] Updated weights for policy 0, policy_version 31549 (0.0008) +[2026-06-02 17:01:26,497][262582] Updated weights for policy 0, policy_version 31559 (0.0008) +[2026-06-02 17:01:26,703][262582] Updated weights for policy 0, policy_version 31569 (0.0008) +[2026-06-02 17:01:26,907][262582] Updated weights for policy 0, policy_version 31579 (0.0008) +[2026-06-02 17:01:26,998][262026] Saving new best policy, reward=1282.787! +[2026-06-02 17:01:27,607][262582] Updated weights for policy 0, policy_version 31589 (0.0008) +[2026-06-02 17:01:27,800][262582] Updated weights for policy 0, policy_version 31599 (0.0008) +[2026-06-02 17:01:28,008][262582] Updated weights for policy 0, policy_version 31609 (0.0008) +[2026-06-02 17:01:28,207][262582] Updated weights for policy 0, policy_version 31619 (0.0008) +[2026-06-02 17:01:28,410][262582] Updated weights for policy 0, policy_version 31629 (0.0008) +[2026-06-02 17:01:28,613][262582] Updated weights for policy 0, policy_version 31639 (0.0008) +[2026-06-02 17:01:29,316][262582] Updated weights for policy 0, policy_version 31649 (0.0009) +[2026-06-02 17:01:29,512][262582] Updated weights for policy 0, policy_version 31660 (0.0008) +[2026-06-02 17:01:29,719][262582] Updated weights for policy 0, policy_version 31670 (0.0008) +[2026-06-02 17:01:29,919][262582] Updated weights for policy 0, policy_version 31680 (0.0009) +[2026-06-02 17:01:30,125][262582] Updated weights for policy 0, policy_version 31690 (0.0008) +[2026-06-02 17:01:30,326][262582] Updated weights for policy 0, policy_version 31700 (0.0008) +[2026-06-02 17:01:30,529][262582] Updated weights for policy 0, policy_version 31710 (0.0008) +[2026-06-02 17:01:31,007][260776] Fps is (10 sec: 19660.9, 60 sec: 18568.6, 300 sec: 18438.9). Total num frames: 16252928. Throughput: 0: 18190.2. Samples: 16269952. Policy #0 lag: (min: 63.0, avg: 79.9, max: 127.0) +[2026-06-02 17:01:31,008][260776] Avg episode reward: [(0, '1280.216')] +[2026-06-02 17:01:31,214][262582] Updated weights for policy 0, policy_version 31720 (0.0008) +[2026-06-02 17:01:31,410][262582] Updated weights for policy 0, policy_version 31730 (0.0009) +[2026-06-02 17:01:31,618][262582] Updated weights for policy 0, policy_version 31740 (0.0008) +[2026-06-02 17:01:31,821][262582] Updated weights for policy 0, policy_version 31750 (0.0009) +[2026-06-02 17:01:32,018][262582] Updated weights for policy 0, policy_version 31760 (0.0005) +[2026-06-02 17:01:32,228][262582] Updated weights for policy 0, policy_version 31770 (0.0011) +[2026-06-02 17:01:32,930][262582] Updated weights for policy 0, policy_version 31780 (0.0010) +[2026-06-02 17:01:33,107][262582] Updated weights for policy 0, policy_version 31790 (0.0011) +[2026-06-02 17:01:33,317][262582] Updated weights for policy 0, policy_version 31800 (0.0010) +[2026-06-02 17:01:33,536][262582] Updated weights for policy 0, policy_version 31811 (0.0005) +[2026-06-02 17:01:33,749][262582] Updated weights for policy 0, policy_version 31821 (0.0009) +[2026-06-02 17:01:33,947][262582] Updated weights for policy 0, policy_version 31831 (0.0009) +[2026-06-02 17:01:34,645][262582] Updated weights for policy 0, policy_version 31841 (0.0009) +[2026-06-02 17:01:34,820][262582] Updated weights for policy 0, policy_version 31851 (0.0008) +[2026-06-02 17:01:35,023][262582] Updated weights for policy 0, policy_version 31861 (0.0009) +[2026-06-02 17:01:35,224][262582] Updated weights for policy 0, policy_version 31871 (0.0008) +[2026-06-02 17:01:35,448][262582] Updated weights for policy 0, policy_version 31882 (0.0008) +[2026-06-02 17:01:35,649][262582] Updated weights for policy 0, policy_version 31892 (0.0008) +[2026-06-02 17:01:35,844][262582] Updated weights for policy 0, policy_version 31902 (0.0008) +[2026-06-02 17:01:36,007][260776] Fps is (10 sec: 19660.6, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 16351232. Throughput: 0: 18352.4. Samples: 16329344. Policy #0 lag: (min: 63.0, avg: 79.9, max: 127.0) +[2026-06-02 17:01:36,008][260776] Avg episode reward: [(0, '1255.425')] +[2026-06-02 17:01:36,540][262582] Updated weights for policy 0, policy_version 31912 (0.0008) +[2026-06-02 17:01:36,739][262582] Updated weights for policy 0, policy_version 31922 (0.0009) +[2026-06-02 17:01:36,942][262582] Updated weights for policy 0, policy_version 31932 (0.0009) +[2026-06-02 17:01:37,149][262582] Updated weights for policy 0, policy_version 31942 (0.0008) +[2026-06-02 17:01:37,341][262582] Updated weights for policy 0, policy_version 31952 (0.0008) +[2026-06-02 17:01:37,555][262582] Updated weights for policy 0, policy_version 31962 (0.0008) +[2026-06-02 17:01:38,249][262582] Updated weights for policy 0, policy_version 31972 (0.0008) +[2026-06-02 17:01:38,437][262582] Updated weights for policy 0, policy_version 31982 (0.0008) +[2026-06-02 17:01:38,643][262582] Updated weights for policy 0, policy_version 31992 (0.0008) +[2026-06-02 17:01:38,843][262582] Updated weights for policy 0, policy_version 32002 (0.0008) +[2026-06-02 17:01:39,045][262582] Updated weights for policy 0, policy_version 32012 (0.0008) +[2026-06-02 17:01:39,247][262582] Updated weights for policy 0, policy_version 32022 (0.0008) +[2026-06-02 17:01:39,437][262582] Updated weights for policy 0, policy_version 32032 (0.0008) +[2026-06-02 17:01:40,126][262582] Updated weights for policy 0, policy_version 32042 (0.0008) +[2026-06-02 17:01:40,323][262582] Updated weights for policy 0, policy_version 32052 (0.0008) +[2026-06-02 17:01:40,526][262582] Updated weights for policy 0, policy_version 32062 (0.0008) +[2026-06-02 17:01:40,728][262582] Updated weights for policy 0, policy_version 32072 (0.0008) +[2026-06-02 17:01:40,929][262582] Updated weights for policy 0, policy_version 32082 (0.0008) +[2026-06-02 17:01:41,007][260776] Fps is (10 sec: 16384.0, 60 sec: 18022.4, 300 sec: 18327.9). Total num frames: 16416768. Throughput: 0: 18210.2. Samples: 16435072. Policy #0 lag: (min: 63.0, avg: 79.9, max: 127.0) +[2026-06-02 17:01:41,008][260776] Avg episode reward: [(0, '1267.365')] +[2026-06-02 17:01:41,135][262582] Updated weights for policy 0, policy_version 32092 (0.0008) +[2026-06-02 17:01:41,822][262582] Updated weights for policy 0, policy_version 32102 (0.0008) +[2026-06-02 17:01:42,020][262582] Updated weights for policy 0, policy_version 32112 (0.0008) +[2026-06-02 17:01:42,224][262582] Updated weights for policy 0, policy_version 32122 (0.0009) +[2026-06-02 17:01:42,426][262582] Updated weights for policy 0, policy_version 32132 (0.0008) +[2026-06-02 17:01:42,629][262582] Updated weights for policy 0, policy_version 32142 (0.0008) +[2026-06-02 17:01:42,831][262582] Updated weights for policy 0, policy_version 32152 (0.0008) +[2026-06-02 17:01:43,510][262582] Updated weights for policy 0, policy_version 32162 (0.0008) +[2026-06-02 17:01:43,730][262582] Updated weights for policy 0, policy_version 32173 (0.0009) +[2026-06-02 17:01:43,924][262582] Updated weights for policy 0, policy_version 32183 (0.0008) +[2026-06-02 17:01:44,121][262582] Updated weights for policy 0, policy_version 32193 (0.0008) +[2026-06-02 17:01:44,322][262582] Updated weights for policy 0, policy_version 32203 (0.0008) +[2026-06-02 17:01:44,535][262582] Updated weights for policy 0, policy_version 32213 (0.0009) +[2026-06-02 17:01:44,731][262582] Updated weights for policy 0, policy_version 32223 (0.0008) +[2026-06-02 17:01:45,415][262582] Updated weights for policy 0, policy_version 32233 (0.0009) +[2026-06-02 17:01:45,615][262582] Updated weights for policy 0, policy_version 32243 (0.0008) +[2026-06-02 17:01:45,817][262582] Updated weights for policy 0, policy_version 32253 (0.0008) +[2026-06-02 17:01:46,007][260776] Fps is (10 sec: 16384.1, 60 sec: 18568.6, 300 sec: 18438.9). Total num frames: 16515072. Throughput: 0: 18471.8. Samples: 16551936. Policy #0 lag: (min: 32.0, avg: 67.6, max: 96.0) +[2026-06-02 17:01:46,008][260776] Avg episode reward: [(0, '1283.939')] +[2026-06-02 17:01:46,010][262582] Updated weights for policy 0, policy_version 32263 (0.0008) +[2026-06-02 17:01:46,228][262582] Updated weights for policy 0, policy_version 32273 (0.0008) +[2026-06-02 17:01:46,441][262582] Updated weights for policy 0, policy_version 32284 (0.0008) +[2026-06-02 17:01:46,521][262026] Saving new best policy, reward=1283.939! +[2026-06-02 17:01:47,114][262582] Updated weights for policy 0, policy_version 32294 (0.0008) +[2026-06-02 17:01:47,307][262582] Updated weights for policy 0, policy_version 32304 (0.0008) +[2026-06-02 17:01:47,517][262582] Updated weights for policy 0, policy_version 32314 (0.0008) +[2026-06-02 17:01:47,722][262582] Updated weights for policy 0, policy_version 32324 (0.0008) +[2026-06-02 17:01:47,926][262582] Updated weights for policy 0, policy_version 32334 (0.0008) +[2026-06-02 17:01:48,131][262582] Updated weights for policy 0, policy_version 32344 (0.0008) +[2026-06-02 17:01:48,815][262582] Updated weights for policy 0, policy_version 32354 (0.0009) +[2026-06-02 17:01:49,007][262582] Updated weights for policy 0, policy_version 32364 (0.0008) +[2026-06-02 17:01:49,207][262582] Updated weights for policy 0, policy_version 32374 (0.0008) +[2026-06-02 17:01:49,429][262582] Updated weights for policy 0, policy_version 32385 (0.0009) +[2026-06-02 17:01:49,630][262582] Updated weights for policy 0, policy_version 32395 (0.0009) +[2026-06-02 17:01:49,835][262582] Updated weights for policy 0, policy_version 32405 (0.0008) +[2026-06-02 17:01:50,038][262582] Updated weights for policy 0, policy_version 32415 (0.0008) +[2026-06-02 17:01:50,732][262582] Updated weights for policy 0, policy_version 32425 (0.0008) +[2026-06-02 17:01:50,932][262582] Updated weights for policy 0, policy_version 32435 (0.0008) +[2026-06-02 17:01:51,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.6, 300 sec: 18439.0). Total num frames: 16613376. Throughput: 0: 18230.1. Samples: 16599296. Policy #0 lag: (min: 32.0, avg: 67.6, max: 96.0) +[2026-06-02 17:01:51,008][260776] Avg episode reward: [(0, '1280.299')] +[2026-06-02 17:01:51,130][262582] Updated weights for policy 0, policy_version 32445 (0.0009) +[2026-06-02 17:01:51,337][262582] Updated weights for policy 0, policy_version 32455 (0.0008) +[2026-06-02 17:01:51,541][262582] Updated weights for policy 0, policy_version 32465 (0.0008) +[2026-06-02 17:01:51,737][262582] Updated weights for policy 0, policy_version 32475 (0.0009) +[2026-06-02 17:01:52,433][262582] Updated weights for policy 0, policy_version 32485 (0.0009) +[2026-06-02 17:01:52,649][262582] Updated weights for policy 0, policy_version 32496 (0.0009) +[2026-06-02 17:01:52,851][262582] Updated weights for policy 0, policy_version 32506 (0.0009) +[2026-06-02 17:01:53,054][262582] Updated weights for policy 0, policy_version 32516 (0.0009) +[2026-06-02 17:01:53,246][262582] Updated weights for policy 0, policy_version 32526 (0.0008) +[2026-06-02 17:01:53,461][262582] Updated weights for policy 0, policy_version 32536 (0.0009) +[2026-06-02 17:01:54,160][262582] Updated weights for policy 0, policy_version 32546 (0.0009) +[2026-06-02 17:01:54,354][262582] Updated weights for policy 0, policy_version 32556 (0.0009) +[2026-06-02 17:01:54,552][262582] Updated weights for policy 0, policy_version 32566 (0.0008) +[2026-06-02 17:01:54,749][262582] Updated weights for policy 0, policy_version 32576 (0.0008) +[2026-06-02 17:01:54,961][262582] Updated weights for policy 0, policy_version 32586 (0.0008) +[2026-06-02 17:01:55,154][262582] Updated weights for policy 0, policy_version 32596 (0.0008) +[2026-06-02 17:01:55,371][262582] Updated weights for policy 0, policy_version 32606 (0.0008) +[2026-06-02 17:01:56,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 16711680. Throughput: 0: 18525.9. Samples: 16717312. Policy #0 lag: (min: 32.0, avg: 67.6, max: 96.0) +[2026-06-02 17:01:56,008][260776] Avg episode reward: [(0, '1361.714')] +[2026-06-02 17:01:56,044][262582] Updated weights for policy 0, policy_version 32616 (0.0009) +[2026-06-02 17:01:56,242][262582] Updated weights for policy 0, policy_version 32626 (0.0009) +[2026-06-02 17:01:56,435][262582] Updated weights for policy 0, policy_version 32636 (0.0008) +[2026-06-02 17:01:56,636][262582] Updated weights for policy 0, policy_version 32646 (0.0008) +[2026-06-02 17:01:56,848][262582] Updated weights for policy 0, policy_version 32656 (0.0008) +[2026-06-02 17:01:57,074][262582] Updated weights for policy 0, policy_version 32667 (0.0009) +[2026-06-02 17:01:57,163][262026] Saving new best policy, reward=1361.714! +[2026-06-02 17:01:57,763][262582] Updated weights for policy 0, policy_version 32677 (0.0008) +[2026-06-02 17:01:57,965][262582] Updated weights for policy 0, policy_version 32687 (0.0008) +[2026-06-02 17:01:58,155][262582] Updated weights for policy 0, policy_version 32697 (0.0008) +[2026-06-02 17:01:58,358][262582] Updated weights for policy 0, policy_version 32707 (0.0008) +[2026-06-02 17:01:58,568][262582] Updated weights for policy 0, policy_version 32717 (0.0009) +[2026-06-02 17:01:58,764][262582] Updated weights for policy 0, policy_version 32727 (0.0008) +[2026-06-02 17:01:59,464][262582] Updated weights for policy 0, policy_version 32737 (0.0008) +[2026-06-02 17:01:59,643][262582] Updated weights for policy 0, policy_version 32747 (0.0008) +[2026-06-02 17:01:59,853][262582] Updated weights for policy 0, policy_version 32757 (0.0008) +[2026-06-02 17:02:00,065][262582] Updated weights for policy 0, policy_version 32767 (0.0008) +[2026-06-02 17:02:00,254][262582] Updated weights for policy 0, policy_version 32777 (0.0009) +[2026-06-02 17:02:00,462][262582] Updated weights for policy 0, policy_version 32787 (0.0009) +[2026-06-02 17:02:00,684][262582] Updated weights for policy 0, policy_version 32798 (0.0009) +[2026-06-02 17:02:01,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.6, 300 sec: 18438.9). Total num frames: 16809984. Throughput: 0: 18295.4. Samples: 16823424. Policy #0 lag: (min: 32.0, avg: 67.6, max: 96.0) +[2026-06-02 17:02:01,008][260776] Avg episode reward: [(0, '1315.003')] +[2026-06-02 17:02:01,356][262582] Updated weights for policy 0, policy_version 32808 (0.0008) +[2026-06-02 17:02:01,562][262582] Updated weights for policy 0, policy_version 32818 (0.0008) +[2026-06-02 17:02:01,754][262582] Updated weights for policy 0, policy_version 32828 (0.0008) +[2026-06-02 17:02:01,989][262582] Updated weights for policy 0, policy_version 32839 (0.0008) +[2026-06-02 17:02:02,190][262582] Updated weights for policy 0, policy_version 32849 (0.0009) +[2026-06-02 17:02:02,393][262582] Updated weights for policy 0, policy_version 32859 (0.0008) +[2026-06-02 17:02:03,091][262582] Updated weights for policy 0, policy_version 32869 (0.0008) +[2026-06-02 17:02:03,286][262582] Updated weights for policy 0, policy_version 32879 (0.0009) +[2026-06-02 17:02:03,486][262582] Updated weights for policy 0, policy_version 32889 (0.0008) +[2026-06-02 17:02:03,684][262582] Updated weights for policy 0, policy_version 32899 (0.0008) +[2026-06-02 17:02:03,892][262582] Updated weights for policy 0, policy_version 32909 (0.0008) +[2026-06-02 17:02:04,095][262582] Updated weights for policy 0, policy_version 32919 (0.0008) +[2026-06-02 17:02:04,796][262582] Updated weights for policy 0, policy_version 32929 (0.0008) +[2026-06-02 17:02:04,984][262582] Updated weights for policy 0, policy_version 32939 (0.0008) +[2026-06-02 17:02:05,188][262582] Updated weights for policy 0, policy_version 32949 (0.0008) +[2026-06-02 17:02:05,381][262582] Updated weights for policy 0, policy_version 32959 (0.0008) +[2026-06-02 17:02:05,590][262582] Updated weights for policy 0, policy_version 32969 (0.0008) +[2026-06-02 17:02:05,795][262582] Updated weights for policy 0, policy_version 32979 (0.0008) +[2026-06-02 17:02:05,994][262582] Updated weights for policy 0, policy_version 32989 (0.0008) +[2026-06-02 17:02:06,007][260776] Fps is (10 sec: 16383.9, 60 sec: 18022.4, 300 sec: 18327.9). Total num frames: 16875520. Throughput: 0: 18500.3. Samples: 16882304. Policy #0 lag: (min: 63.0, avg: 80.1, max: 127.0) +[2026-06-02 17:02:06,008][260776] Avg episode reward: [(0, '1282.376')] +[2026-06-02 17:02:06,679][262582] Updated weights for policy 0, policy_version 32999 (0.0008) +[2026-06-02 17:02:06,883][262582] Updated weights for policy 0, policy_version 33009 (0.0008) +[2026-06-02 17:02:07,086][262582] Updated weights for policy 0, policy_version 33019 (0.0008) +[2026-06-02 17:02:07,282][262582] Updated weights for policy 0, policy_version 33029 (0.0009) +[2026-06-02 17:02:07,494][262582] Updated weights for policy 0, policy_version 33039 (0.0008) +[2026-06-02 17:02:07,701][262582] Updated weights for policy 0, policy_version 33049 (0.0008) +[2026-06-02 17:02:08,366][262582] Updated weights for policy 0, policy_version 33059 (0.0008) +[2026-06-02 17:02:08,564][262582] Updated weights for policy 0, policy_version 33069 (0.0008) +[2026-06-02 17:02:08,762][262582] Updated weights for policy 0, policy_version 33079 (0.0008) +[2026-06-02 17:02:08,968][262582] Updated weights for policy 0, policy_version 33089 (0.0009) +[2026-06-02 17:02:09,170][262582] Updated weights for policy 0, policy_version 33099 (0.0008) +[2026-06-02 17:02:09,375][262582] Updated weights for policy 0, policy_version 33109 (0.0009) +[2026-06-02 17:02:09,569][262582] Updated weights for policy 0, policy_version 33119 (0.0009) +[2026-06-02 17:02:10,263][262582] Updated weights for policy 0, policy_version 33129 (0.0008) +[2026-06-02 17:02:10,460][262582] Updated weights for policy 0, policy_version 33139 (0.0005) +[2026-06-02 17:02:10,668][262582] Updated weights for policy 0, policy_version 33149 (0.0007) +[2026-06-02 17:02:10,872][262582] Updated weights for policy 0, policy_version 33159 (0.0009) +[2026-06-02 17:02:11,007][260776] Fps is (10 sec: 16384.0, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 16973824. Throughput: 0: 18304.0. Samples: 16988288. Policy #0 lag: (min: 63.0, avg: 80.1, max: 127.0) +[2026-06-02 17:02:11,008][260776] Avg episode reward: [(0, '1286.523')] +[2026-06-02 17:02:11,077][262582] Updated weights for policy 0, policy_version 33169 (0.0008) +[2026-06-02 17:02:11,279][262582] Updated weights for policy 0, policy_version 33179 (0.0009) +[2026-06-02 17:02:11,992][262582] Updated weights for policy 0, policy_version 33189 (0.0009) +[2026-06-02 17:02:12,211][262582] Updated weights for policy 0, policy_version 33200 (0.0009) +[2026-06-02 17:02:12,410][262582] Updated weights for policy 0, policy_version 33210 (0.0009) +[2026-06-02 17:02:12,618][262582] Updated weights for policy 0, policy_version 33220 (0.0009) +[2026-06-02 17:02:12,813][262582] Updated weights for policy 0, policy_version 33230 (0.0008) +[2026-06-02 17:02:13,016][262582] Updated weights for policy 0, policy_version 33240 (0.0008) +[2026-06-02 17:02:13,682][262582] Updated weights for policy 0, policy_version 33250 (0.0008) +[2026-06-02 17:02:13,857][262582] Updated weights for policy 0, policy_version 33260 (0.0008) +[2026-06-02 17:02:14,065][262582] Updated weights for policy 0, policy_version 33270 (0.0009) +[2026-06-02 17:02:14,273][262582] Updated weights for policy 0, policy_version 33280 (0.0008) +[2026-06-02 17:02:14,467][262582] Updated weights for policy 0, policy_version 33290 (0.0008) +[2026-06-02 17:02:14,672][262582] Updated weights for policy 0, policy_version 33300 (0.0008) +[2026-06-02 17:02:14,882][262582] Updated weights for policy 0, policy_version 33310 (0.0008) +[2026-06-02 17:02:15,542][262582] Updated weights for policy 0, policy_version 33320 (0.0008) +[2026-06-02 17:02:15,749][262582] Updated weights for policy 0, policy_version 33330 (0.0009) +[2026-06-02 17:02:15,945][262582] Updated weights for policy 0, policy_version 33340 (0.0008) +[2026-06-02 17:02:16,007][260776] Fps is (10 sec: 19660.9, 60 sec: 18568.5, 300 sec: 18439.0). Total num frames: 17072128. Throughput: 0: 18565.7. Samples: 17105408. Policy #0 lag: (min: 63.0, avg: 80.1, max: 127.0) +[2026-06-02 17:02:16,008][260776] Avg episode reward: [(0, '1278.771')] +[2026-06-02 17:02:16,146][262582] Updated weights for policy 0, policy_version 33350 (0.0008) +[2026-06-02 17:02:16,357][262582] Updated weights for policy 0, policy_version 33360 (0.0009) +[2026-06-02 17:02:16,557][262582] Updated weights for policy 0, policy_version 33370 (0.0008) +[2026-06-02 17:02:17,247][262582] Updated weights for policy 0, policy_version 33380 (0.0008) +[2026-06-02 17:02:17,437][262582] Updated weights for policy 0, policy_version 33390 (0.0008) +[2026-06-02 17:02:17,642][262582] Updated weights for policy 0, policy_version 33400 (0.0008) +[2026-06-02 17:02:17,836][262582] Updated weights for policy 0, policy_version 33410 (0.0008) +[2026-06-02 17:02:18,045][262582] Updated weights for policy 0, policy_version 33420 (0.0008) +[2026-06-02 17:02:18,239][262582] Updated weights for policy 0, policy_version 33430 (0.0008) +[2026-06-02 17:02:18,444][262582] Updated weights for policy 0, policy_version 33440 (0.0008) +[2026-06-02 17:02:19,124][262582] Updated weights for policy 0, policy_version 33450 (0.0009) +[2026-06-02 17:02:19,322][262582] Updated weights for policy 0, policy_version 33460 (0.0009) +[2026-06-02 17:02:19,518][262582] Updated weights for policy 0, policy_version 33470 (0.0008) +[2026-06-02 17:02:19,721][262582] Updated weights for policy 0, policy_version 33480 (0.0008) +[2026-06-02 17:02:19,936][262582] Updated weights for policy 0, policy_version 33490 (0.0009) +[2026-06-02 17:02:20,139][262582] Updated weights for policy 0, policy_version 33500 (0.0008) +[2026-06-02 17:02:20,822][262582] Updated weights for policy 0, policy_version 33510 (0.0008) +[2026-06-02 17:02:21,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 17170432. Throughput: 0: 18289.8. Samples: 17152384. Policy #0 lag: (min: 63.0, avg: 80.1, max: 127.0) +[2026-06-02 17:02:21,008][260776] Avg episode reward: [(0, '1256.073')] +[2026-06-02 17:02:21,020][262582] Updated weights for policy 0, policy_version 33520 (0.0008) +[2026-06-02 17:02:21,225][262582] Updated weights for policy 0, policy_version 33530 (0.0009) +[2026-06-02 17:02:21,450][262582] Updated weights for policy 0, policy_version 33541 (0.0008) +[2026-06-02 17:02:21,651][262582] Updated weights for policy 0, policy_version 33551 (0.0008) +[2026-06-02 17:02:21,852][262582] Updated weights for policy 0, policy_version 33561 (0.0009) +[2026-06-02 17:02:22,557][262582] Updated weights for policy 0, policy_version 33571 (0.0008) +[2026-06-02 17:02:22,735][262582] Updated weights for policy 0, policy_version 33581 (0.0008) +[2026-06-02 17:02:22,943][262582] Updated weights for policy 0, policy_version 33591 (0.0008) +[2026-06-02 17:02:23,147][262582] Updated weights for policy 0, policy_version 33601 (0.0008) +[2026-06-02 17:02:23,355][262582] Updated weights for policy 0, policy_version 33611 (0.0008) +[2026-06-02 17:02:23,581][262582] Updated weights for policy 0, policy_version 33622 (0.0009) +[2026-06-02 17:02:23,781][262582] Updated weights for policy 0, policy_version 33632 (0.0009) +[2026-06-02 17:02:24,460][262582] Updated weights for policy 0, policy_version 33642 (0.0008) +[2026-06-02 17:02:24,660][262582] Updated weights for policy 0, policy_version 33652 (0.0009) +[2026-06-02 17:02:24,861][262582] Updated weights for policy 0, policy_version 33662 (0.0008) +[2026-06-02 17:02:25,063][262582] Updated weights for policy 0, policy_version 33672 (0.0008) +[2026-06-02 17:02:25,287][262582] Updated weights for policy 0, policy_version 33682 (0.0008) +[2026-06-02 17:02:25,487][262582] Updated weights for policy 0, policy_version 33692 (0.0008) +[2026-06-02 17:02:26,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 17268736. Throughput: 0: 18557.2. Samples: 17270144. Policy #0 lag: (min: 63.0, avg: 80.1, max: 127.0) +[2026-06-02 17:02:26,008][260776] Avg episode reward: [(0, '1308.766')] +[2026-06-02 17:02:26,201][262582] Updated weights for policy 0, policy_version 33703 (0.0008) +[2026-06-02 17:02:26,389][262582] Updated weights for policy 0, policy_version 33713 (0.0008) +[2026-06-02 17:02:26,616][262582] Updated weights for policy 0, policy_version 33724 (0.0009) +[2026-06-02 17:02:26,819][262582] Updated weights for policy 0, policy_version 33734 (0.0008) +[2026-06-02 17:02:27,021][262582] Updated weights for policy 0, policy_version 33744 (0.0008) +[2026-06-02 17:02:27,221][262582] Updated weights for policy 0, policy_version 33754 (0.0008) +[2026-06-02 17:02:27,921][262582] Updated weights for policy 0, policy_version 33764 (0.0008) +[2026-06-02 17:02:28,122][262582] Updated weights for policy 0, policy_version 33774 (0.0008) +[2026-06-02 17:02:28,314][262582] Updated weights for policy 0, policy_version 33784 (0.0009) +[2026-06-02 17:02:28,516][262582] Updated weights for policy 0, policy_version 33794 (0.0008) +[2026-06-02 17:02:28,719][262582] Updated weights for policy 0, policy_version 33804 (0.0009) +[2026-06-02 17:02:28,922][262582] Updated weights for policy 0, policy_version 33814 (0.0008) +[2026-06-02 17:02:29,118][262582] Updated weights for policy 0, policy_version 33824 (0.0008) +[2026-06-02 17:02:29,826][262582] Updated weights for policy 0, policy_version 33834 (0.0008) +[2026-06-02 17:02:30,049][262582] Updated weights for policy 0, policy_version 33845 (0.0009) +[2026-06-02 17:02:30,250][262582] Updated weights for policy 0, policy_version 33855 (0.0008) +[2026-06-02 17:02:30,454][262582] Updated weights for policy 0, policy_version 33865 (0.0008) +[2026-06-02 17:02:30,658][262582] Updated weights for policy 0, policy_version 33875 (0.0009) +[2026-06-02 17:02:30,852][262582] Updated weights for policy 0, policy_version 33885 (0.0008) +[2026-06-02 17:02:31,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 17367040. Throughput: 0: 18335.3. Samples: 17377024. Policy #0 lag: (min: 63.0, avg: 80.2, max: 127.0) +[2026-06-02 17:02:31,008][260776] Avg episode reward: [(0, '1325.454')] +[2026-06-02 17:02:31,554][262582] Updated weights for policy 0, policy_version 33895 (0.0009) +[2026-06-02 17:02:31,748][262582] Updated weights for policy 0, policy_version 33905 (0.0009) +[2026-06-02 17:02:31,952][262582] Updated weights for policy 0, policy_version 33915 (0.0008) +[2026-06-02 17:02:32,157][262582] Updated weights for policy 0, policy_version 33925 (0.0008) +[2026-06-02 17:02:32,380][262582] Updated weights for policy 0, policy_version 33936 (0.0008) +[2026-06-02 17:02:32,583][262582] Updated weights for policy 0, policy_version 33946 (0.0008) +[2026-06-02 17:02:33,281][262582] Updated weights for policy 0, policy_version 33956 (0.0009) +[2026-06-02 17:02:33,473][262582] Updated weights for policy 0, policy_version 33966 (0.0009) +[2026-06-02 17:02:33,675][262582] Updated weights for policy 0, policy_version 33976 (0.0009) +[2026-06-02 17:02:33,899][262582] Updated weights for policy 0, policy_version 33987 (0.0008) +[2026-06-02 17:02:34,100][262582] Updated weights for policy 0, policy_version 33997 (0.0009) +[2026-06-02 17:02:34,332][262582] Updated weights for policy 0, policy_version 34008 (0.0008) +[2026-06-02 17:02:35,023][262582] Updated weights for policy 0, policy_version 34018 (0.0008) +[2026-06-02 17:02:35,219][262582] Updated weights for policy 0, policy_version 34028 (0.0008) +[2026-06-02 17:02:35,411][262582] Updated weights for policy 0, policy_version 34038 (0.0008) +[2026-06-02 17:02:35,626][262582] Updated weights for policy 0, policy_version 34048 (0.0008) +[2026-06-02 17:02:35,845][262582] Updated weights for policy 0, policy_version 34059 (0.0008) +[2026-06-02 17:02:36,007][260776] Fps is (10 sec: 16383.9, 60 sec: 18022.4, 300 sec: 18327.9). Total num frames: 17432576. Throughput: 0: 18596.9. Samples: 17436160. Policy #0 lag: (min: 63.0, avg: 80.2, max: 127.0) +[2026-06-02 17:02:36,008][260776] Avg episode reward: [(0, '1359.125')] +[2026-06-02 17:02:36,045][262582] Updated weights for policy 0, policy_version 34069 (0.0008) +[2026-06-02 17:02:36,252][262582] Updated weights for policy 0, policy_version 34079 (0.0008) +[2026-06-02 17:02:36,919][262582] Updated weights for policy 0, policy_version 34089 (0.0008) +[2026-06-02 17:02:37,127][262582] Updated weights for policy 0, policy_version 34099 (0.0009) +[2026-06-02 17:02:37,345][262582] Updated weights for policy 0, policy_version 34110 (0.0008) +[2026-06-02 17:02:37,544][262582] Updated weights for policy 0, policy_version 34120 (0.0008) +[2026-06-02 17:02:37,756][262582] Updated weights for policy 0, policy_version 34130 (0.0008) +[2026-06-02 17:02:37,958][262582] Updated weights for policy 0, policy_version 34140 (0.0008) +[2026-06-02 17:02:38,650][262582] Updated weights for policy 0, policy_version 34150 (0.0008) +[2026-06-02 17:02:38,846][262582] Updated weights for policy 0, policy_version 34160 (0.0008) +[2026-06-02 17:02:39,052][262582] Updated weights for policy 0, policy_version 34170 (0.0008) +[2026-06-02 17:02:39,261][262582] Updated weights for policy 0, policy_version 34180 (0.0008) +[2026-06-02 17:02:39,458][262582] Updated weights for policy 0, policy_version 34190 (0.0008) +[2026-06-02 17:02:39,656][262582] Updated weights for policy 0, policy_version 34200 (0.0008) +[2026-06-02 17:02:40,364][262582] Updated weights for policy 0, policy_version 34210 (0.0008) +[2026-06-02 17:02:40,553][262582] Updated weights for policy 0, policy_version 34220 (0.0008) +[2026-06-02 17:02:40,750][262582] Updated weights for policy 0, policy_version 34230 (0.0008) +[2026-06-02 17:02:40,953][262582] Updated weights for policy 0, policy_version 34240 (0.0008) +[2026-06-02 17:02:41,007][260776] Fps is (10 sec: 16384.0, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 17530880. Throughput: 0: 18346.7. Samples: 17542912. Policy #0 lag: (min: 63.0, avg: 80.2, max: 127.0) +[2026-06-02 17:02:41,008][260776] Avg episode reward: [(0, '1403.505')] +[2026-06-02 17:02:41,162][262582] Updated weights for policy 0, policy_version 34250 (0.0008) +[2026-06-02 17:02:41,352][262582] Updated weights for policy 0, policy_version 34260 (0.0008) +[2026-06-02 17:02:41,566][262582] Updated weights for policy 0, policy_version 34270 (0.0008) +[2026-06-02 17:02:41,596][262026] Saving new best policy, reward=1403.505! +[2026-06-02 17:02:42,281][262582] Updated weights for policy 0, policy_version 34280 (0.0008) +[2026-06-02 17:02:42,478][262582] Updated weights for policy 0, policy_version 34290 (0.0008) +[2026-06-02 17:02:42,680][262582] Updated weights for policy 0, policy_version 34300 (0.0008) +[2026-06-02 17:02:42,874][262582] Updated weights for policy 0, policy_version 34310 (0.0008) +[2026-06-02 17:02:43,086][262582] Updated weights for policy 0, policy_version 34320 (0.0008) +[2026-06-02 17:02:43,289][262582] Updated weights for policy 0, policy_version 34330 (0.0009) +[2026-06-02 17:02:43,970][262582] Updated weights for policy 0, policy_version 34340 (0.0008) +[2026-06-02 17:02:44,210][262582] Updated weights for policy 0, policy_version 34352 (0.0008) +[2026-06-02 17:02:44,406][262582] Updated weights for policy 0, policy_version 34362 (0.0008) +[2026-06-02 17:02:44,613][262582] Updated weights for policy 0, policy_version 34372 (0.0008) +[2026-06-02 17:02:44,813][262582] Updated weights for policy 0, policy_version 34382 (0.0008) +[2026-06-02 17:02:45,020][262582] Updated weights for policy 0, policy_version 34392 (0.0008) +[2026-06-02 17:02:45,731][262582] Updated weights for policy 0, policy_version 34402 (0.0009) +[2026-06-02 17:02:45,912][262582] Updated weights for policy 0, policy_version 34412 (0.0008) +[2026-06-02 17:02:46,007][260776] Fps is (10 sec: 19661.0, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 17629184. Throughput: 0: 18383.6. Samples: 17650688. Policy #0 lag: (min: 63.0, avg: 80.2, max: 127.0) +[2026-06-02 17:02:46,008][260776] Avg episode reward: [(0, '1424.792')] +[2026-06-02 17:02:46,113][262582] Updated weights for policy 0, policy_version 34422 (0.0009) +[2026-06-02 17:02:46,314][262582] Updated weights for policy 0, policy_version 34432 (0.0008) +[2026-06-02 17:02:46,525][262582] Updated weights for policy 0, policy_version 34442 (0.0009) +[2026-06-02 17:02:46,725][262582] Updated weights for policy 0, policy_version 34452 (0.0008) +[2026-06-02 17:02:46,922][262582] Updated weights for policy 0, policy_version 34462 (0.0008) +[2026-06-02 17:02:46,960][262026] Saving new best policy, reward=1424.792! +[2026-06-02 17:02:47,642][262582] Updated weights for policy 0, policy_version 34473 (0.0008) +[2026-06-02 17:02:47,841][262582] Updated weights for policy 0, policy_version 34483 (0.0008) +[2026-06-02 17:02:48,050][262582] Updated weights for policy 0, policy_version 34493 (0.0008) +[2026-06-02 17:02:48,252][262582] Updated weights for policy 0, policy_version 34503 (0.0008) +[2026-06-02 17:02:48,451][262582] Updated weights for policy 0, policy_version 34513 (0.0008) +[2026-06-02 17:02:48,672][262582] Updated weights for policy 0, policy_version 34524 (0.0008) +[2026-06-02 17:02:49,376][262582] Updated weights for policy 0, policy_version 34534 (0.0008) +[2026-06-02 17:02:49,557][262582] Updated weights for policy 0, policy_version 34544 (0.0009) +[2026-06-02 17:02:49,784][262582] Updated weights for policy 0, policy_version 34555 (0.0009) +[2026-06-02 17:02:49,999][262582] Updated weights for policy 0, policy_version 34566 (0.0009) +[2026-06-02 17:02:50,211][262582] Updated weights for policy 0, policy_version 34576 (0.0009) +[2026-06-02 17:02:50,408][262582] Updated weights for policy 0, policy_version 34586 (0.0009) +[2026-06-02 17:02:51,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 17727488. Throughput: 0: 18380.8. Samples: 17709440. Policy #0 lag: (min: 63.0, avg: 80.2, max: 127.0) +[2026-06-02 17:02:51,008][260776] Avg episode reward: [(0, '1443.827')] +[2026-06-02 17:02:51,116][262582] Updated weights for policy 0, policy_version 34597 (0.0008) +[2026-06-02 17:02:51,307][262582] Updated weights for policy 0, policy_version 34607 (0.0009) +[2026-06-02 17:02:51,514][262582] Updated weights for policy 0, policy_version 34617 (0.0008) +[2026-06-02 17:02:51,720][262582] Updated weights for policy 0, policy_version 34627 (0.0009) +[2026-06-02 17:02:51,914][262582] Updated weights for policy 0, policy_version 34637 (0.0008) +[2026-06-02 17:02:52,133][262582] Updated weights for policy 0, policy_version 34648 (0.0008) +[2026-06-02 17:02:52,285][262026] Saving new best policy, reward=1443.827! +[2026-06-02 17:02:52,856][262582] Updated weights for policy 0, policy_version 34658 (0.0008) +[2026-06-02 17:02:53,044][262582] Updated weights for policy 0, policy_version 34668 (0.0008) +[2026-06-02 17:02:53,260][262582] Updated weights for policy 0, policy_version 34679 (0.0008) +[2026-06-02 17:02:53,476][262582] Updated weights for policy 0, policy_version 34690 (0.0008) +[2026-06-02 17:02:53,697][262582] Updated weights for policy 0, policy_version 34701 (0.0008) +[2026-06-02 17:02:53,901][262582] Updated weights for policy 0, policy_version 34711 (0.0008) +[2026-06-02 17:02:54,630][262582] Updated weights for policy 0, policy_version 34721 (0.0009) +[2026-06-02 17:02:54,831][262582] Updated weights for policy 0, policy_version 34732 (0.0008) +[2026-06-02 17:02:55,033][262582] Updated weights for policy 0, policy_version 34742 (0.0009) +[2026-06-02 17:02:55,229][262582] Updated weights for policy 0, policy_version 34752 (0.0008) +[2026-06-02 17:02:55,433][262582] Updated weights for policy 0, policy_version 34762 (0.0009) +[2026-06-02 17:02:55,634][262582] Updated weights for policy 0, policy_version 34772 (0.0008) +[2026-06-02 17:02:55,845][262582] Updated weights for policy 0, policy_version 34782 (0.0008) +[2026-06-02 17:02:56,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 17825792. Throughput: 0: 18480.4. Samples: 17819904. Policy #0 lag: (min: 5.0, avg: 22.2, max: 69.0) +[2026-06-02 17:02:56,008][260776] Avg episode reward: [(0, '1423.935')] +[2026-06-02 17:02:56,535][262582] Updated weights for policy 0, policy_version 34792 (0.0008) +[2026-06-02 17:02:56,735][262582] Updated weights for policy 0, policy_version 34802 (0.0008) +[2026-06-02 17:02:56,937][262582] Updated weights for policy 0, policy_version 34812 (0.0008) +[2026-06-02 17:02:57,129][262582] Updated weights for policy 0, policy_version 34822 (0.0009) +[2026-06-02 17:02:57,338][262582] Updated weights for policy 0, policy_version 34832 (0.0008) +[2026-06-02 17:02:57,536][262582] Updated weights for policy 0, policy_version 34842 (0.0008) +[2026-06-02 17:02:58,280][262582] Updated weights for policy 0, policy_version 34853 (0.0008) +[2026-06-02 17:02:58,470][262582] Updated weights for policy 0, policy_version 34863 (0.0008) +[2026-06-02 17:02:58,672][262582] Updated weights for policy 0, policy_version 34873 (0.0008) +[2026-06-02 17:02:58,870][262582] Updated weights for policy 0, policy_version 34883 (0.0008) +[2026-06-02 17:02:59,069][262582] Updated weights for policy 0, policy_version 34893 (0.0008) +[2026-06-02 17:02:59,286][262582] Updated weights for policy 0, policy_version 34904 (0.0008) +[2026-06-02 17:02:59,996][262582] Updated weights for policy 0, policy_version 34914 (0.0009) +[2026-06-02 17:03:00,184][262582] Updated weights for policy 0, policy_version 34924 (0.0009) +[2026-06-02 17:03:00,379][262582] Updated weights for policy 0, policy_version 34934 (0.0009) +[2026-06-02 17:03:00,595][262582] Updated weights for policy 0, policy_version 34944 (0.0008) +[2026-06-02 17:03:00,812][262582] Updated weights for policy 0, policy_version 34955 (0.0009) +[2026-06-02 17:03:01,007][260776] Fps is (10 sec: 16384.1, 60 sec: 18022.4, 300 sec: 18327.9). Total num frames: 17891328. Throughput: 0: 18434.8. Samples: 17934976. Policy #0 lag: (min: 5.0, avg: 22.2, max: 69.0) +[2026-06-02 17:03:01,008][260776] Avg episode reward: [(0, '1366.752')] +[2026-06-02 17:03:01,012][262582] Updated weights for policy 0, policy_version 34965 (0.0009) +[2026-06-02 17:03:01,214][262582] Updated weights for policy 0, policy_version 34975 (0.0009) +[2026-06-02 17:03:01,926][262582] Updated weights for policy 0, policy_version 34986 (0.0008) +[2026-06-02 17:03:02,149][262582] Updated weights for policy 0, policy_version 34997 (0.0009) +[2026-06-02 17:03:02,350][262582] Updated weights for policy 0, policy_version 35007 (0.0008) +[2026-06-02 17:03:02,572][262582] Updated weights for policy 0, policy_version 35018 (0.0009) +[2026-06-02 17:03:02,771][262582] Updated weights for policy 0, policy_version 35028 (0.0009) +[2026-06-02 17:03:02,972][262582] Updated weights for policy 0, policy_version 35038 (0.0008) +[2026-06-02 17:03:03,684][262582] Updated weights for policy 0, policy_version 35049 (0.0009) +[2026-06-02 17:03:03,881][262582] Updated weights for policy 0, policy_version 35059 (0.0009) +[2026-06-02 17:03:04,092][262582] Updated weights for policy 0, policy_version 35070 (0.0008) +[2026-06-02 17:03:04,304][262582] Updated weights for policy 0, policy_version 35080 (0.0008) +[2026-06-02 17:03:04,510][262582] Updated weights for policy 0, policy_version 35090 (0.0009) +[2026-06-02 17:03:04,705][262582] Updated weights for policy 0, policy_version 35100 (0.0009) +[2026-06-02 17:03:05,419][262582] Updated weights for policy 0, policy_version 35110 (0.0008) +[2026-06-02 17:03:05,613][262582] Updated weights for policy 0, policy_version 35120 (0.0008) +[2026-06-02 17:03:05,810][262582] Updated weights for policy 0, policy_version 35130 (0.0009) +[2026-06-02 17:03:06,007][260776] Fps is (10 sec: 16384.0, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 17989632. Throughput: 0: 18602.7. Samples: 17989504. Policy #0 lag: (min: 5.0, avg: 22.2, max: 69.0) +[2026-06-02 17:03:06,008][260776] Avg episode reward: [(0, '1367.546')] +[2026-06-02 17:03:06,017][262582] Updated weights for policy 0, policy_version 35140 (0.0009) +[2026-06-02 17:03:06,224][262582] Updated weights for policy 0, policy_version 35150 (0.0009) +[2026-06-02 17:03:06,418][262582] Updated weights for policy 0, policy_version 35160 (0.0008) +[2026-06-02 17:03:07,149][262582] Updated weights for policy 0, policy_version 35170 (0.0008) +[2026-06-02 17:03:07,343][262582] Updated weights for policy 0, policy_version 35180 (0.0005) +[2026-06-02 17:03:07,541][262582] Updated weights for policy 0, policy_version 35190 (0.0005) +[2026-06-02 17:03:07,758][262582] Updated weights for policy 0, policy_version 35201 (0.0007) +[2026-06-02 17:03:07,951][262582] Updated weights for policy 0, policy_version 35211 (0.0008) +[2026-06-02 17:03:08,186][262582] Updated weights for policy 0, policy_version 35222 (0.0008) +[2026-06-02 17:03:08,381][262582] Updated weights for policy 0, policy_version 35232 (0.0008) +[2026-06-02 17:03:09,075][262582] Updated weights for policy 0, policy_version 35242 (0.0009) +[2026-06-02 17:03:09,268][262582] Updated weights for policy 0, policy_version 35252 (0.0008) +[2026-06-02 17:03:09,475][262582] Updated weights for policy 0, policy_version 35262 (0.0008) +[2026-06-02 17:03:09,694][262582] Updated weights for policy 0, policy_version 35273 (0.0008) +[2026-06-02 17:03:09,927][262582] Updated weights for policy 0, policy_version 35284 (0.0008) +[2026-06-02 17:03:10,120][262582] Updated weights for policy 0, policy_version 35294 (0.0008) +[2026-06-02 17:03:10,821][262582] Updated weights for policy 0, policy_version 35304 (0.0008) +[2026-06-02 17:03:11,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 18087936. Throughput: 0: 18480.4. Samples: 18101760. Policy #0 lag: (min: 5.0, avg: 22.2, max: 69.0) +[2026-06-02 17:03:11,008][260776] Avg episode reward: [(0, '1380.617')] +[2026-06-02 17:03:11,038][262582] Updated weights for policy 0, policy_version 35315 (0.0008) +[2026-06-02 17:03:11,239][262582] Updated weights for policy 0, policy_version 35325 (0.0008) +[2026-06-02 17:03:11,483][262582] Updated weights for policy 0, policy_version 35337 (0.0008) +[2026-06-02 17:03:11,681][262582] Updated weights for policy 0, policy_version 35347 (0.0008) +[2026-06-02 17:03:11,885][262582] Updated weights for policy 0, policy_version 35357 (0.0008) +[2026-06-02 17:03:12,610][262582] Updated weights for policy 0, policy_version 35368 (0.0008) +[2026-06-02 17:03:12,811][262582] Updated weights for policy 0, policy_version 35378 (0.0008) +[2026-06-02 17:03:13,012][262582] Updated weights for policy 0, policy_version 35388 (0.0008) +[2026-06-02 17:03:13,215][262582] Updated weights for policy 0, policy_version 35398 (0.0009) +[2026-06-02 17:03:13,414][262582] Updated weights for policy 0, policy_version 35408 (0.0008) +[2026-06-02 17:03:13,616][262582] Updated weights for policy 0, policy_version 35418 (0.0008) +[2026-06-02 17:03:14,310][262582] Updated weights for policy 0, policy_version 35428 (0.0008) +[2026-06-02 17:03:14,520][262582] Updated weights for policy 0, policy_version 35439 (0.0008) +[2026-06-02 17:03:14,721][262582] Updated weights for policy 0, policy_version 35449 (0.0008) +[2026-06-02 17:03:14,911][262582] Updated weights for policy 0, policy_version 35459 (0.0008) +[2026-06-02 17:03:15,127][262582] Updated weights for policy 0, policy_version 35469 (0.0008) +[2026-06-02 17:03:15,328][262582] Updated weights for policy 0, policy_version 35479 (0.0008) +[2026-06-02 17:03:16,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 18186240. Throughput: 0: 18460.5. Samples: 18207744. Policy #0 lag: (min: 44.0, avg: 61.1, max: 108.0) +[2026-06-02 17:03:16,008][260776] Avg episode reward: [(0, '1424.909')] +[2026-06-02 17:03:16,048][262582] Updated weights for policy 0, policy_version 35489 (0.0009) +[2026-06-02 17:03:16,232][262582] Updated weights for policy 0, policy_version 35499 (0.0008) +[2026-06-02 17:03:16,428][262582] Updated weights for policy 0, policy_version 35509 (0.0008) +[2026-06-02 17:03:16,634][262582] Updated weights for policy 0, policy_version 35520 (0.0008) +[2026-06-02 17:03:16,850][262582] Updated weights for policy 0, policy_version 35530 (0.0008) +[2026-06-02 17:03:17,053][262582] Updated weights for policy 0, policy_version 35540 (0.0008) +[2026-06-02 17:03:17,247][262582] Updated weights for policy 0, policy_version 35550 (0.0008) +[2026-06-02 17:03:17,959][262582] Updated weights for policy 0, policy_version 35561 (0.0008) +[2026-06-02 17:03:18,162][262582] Updated weights for policy 0, policy_version 35571 (0.0008) +[2026-06-02 17:03:18,362][262582] Updated weights for policy 0, policy_version 35581 (0.0009) +[2026-06-02 17:03:18,571][262582] Updated weights for policy 0, policy_version 35591 (0.0009) +[2026-06-02 17:03:18,784][262582] Updated weights for policy 0, policy_version 35602 (0.0008) +[2026-06-02 17:03:18,991][262582] Updated weights for policy 0, policy_version 35612 (0.0008) +[2026-06-02 17:03:19,703][262582] Updated weights for policy 0, policy_version 35622 (0.0008) +[2026-06-02 17:03:19,916][262582] Updated weights for policy 0, policy_version 35632 (0.0008) +[2026-06-02 17:03:20,126][262582] Updated weights for policy 0, policy_version 35643 (0.0008) +[2026-06-02 17:03:20,325][262582] Updated weights for policy 0, policy_version 35653 (0.0009) +[2026-06-02 17:03:20,524][262582] Updated weights for policy 0, policy_version 35663 (0.0008) +[2026-06-02 17:03:20,729][262582] Updated weights for policy 0, policy_version 35673 (0.0008) +[2026-06-02 17:03:21,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18439.0). Total num frames: 18284544. Throughput: 0: 18466.2. Samples: 18267136. Policy #0 lag: (min: 44.0, avg: 61.1, max: 108.0) +[2026-06-02 17:03:21,008][260776] Avg episode reward: [(0, '1454.050')] +[2026-06-02 17:03:21,012][262026] Saving new best policy, reward=1454.050! +[2026-06-02 17:03:21,442][262582] Updated weights for policy 0, policy_version 35684 (0.0009) +[2026-06-02 17:03:21,680][262582] Updated weights for policy 0, policy_version 35697 (0.0008) +[2026-06-02 17:03:21,887][262582] Updated weights for policy 0, policy_version 35707 (0.0008) +[2026-06-02 17:03:22,092][262582] Updated weights for policy 0, policy_version 35717 (0.0008) +[2026-06-02 17:03:22,288][262582] Updated weights for policy 0, policy_version 35727 (0.0008) +[2026-06-02 17:03:22,487][262582] Updated weights for policy 0, policy_version 35737 (0.0008) +[2026-06-02 17:03:23,208][262582] Updated weights for policy 0, policy_version 35747 (0.0009) +[2026-06-02 17:03:23,403][262582] Updated weights for policy 0, policy_version 35757 (0.0008) +[2026-06-02 17:03:23,606][262582] Updated weights for policy 0, policy_version 35767 (0.0008) +[2026-06-02 17:03:23,812][262582] Updated weights for policy 0, policy_version 35777 (0.0008) +[2026-06-02 17:03:24,019][262582] Updated weights for policy 0, policy_version 35788 (0.0008) +[2026-06-02 17:03:24,220][262582] Updated weights for policy 0, policy_version 35798 (0.0008) +[2026-06-02 17:03:24,423][262582] Updated weights for policy 0, policy_version 35808 (0.0008) +[2026-06-02 17:03:25,121][262582] Updated weights for policy 0, policy_version 35818 (0.0008) +[2026-06-02 17:03:25,312][262582] Updated weights for policy 0, policy_version 35828 (0.0008) +[2026-06-02 17:03:25,516][262582] Updated weights for policy 0, policy_version 35838 (0.0009) +[2026-06-02 17:03:25,720][262582] Updated weights for policy 0, policy_version 35848 (0.0008) +[2026-06-02 17:03:25,926][262582] Updated weights for policy 0, policy_version 35858 (0.0008) +[2026-06-02 17:03:26,007][260776] Fps is (10 sec: 16383.9, 60 sec: 18022.4, 300 sec: 18327.9). Total num frames: 18350080. Throughput: 0: 18469.0. Samples: 18374016. Policy #0 lag: (min: 44.0, avg: 61.1, max: 108.0) +[2026-06-02 17:03:26,008][260776] Avg episode reward: [(0, '1438.609')] +[2026-06-02 17:03:26,125][262582] Updated weights for policy 0, policy_version 35868 (0.0008) +[2026-06-02 17:03:26,829][262582] Updated weights for policy 0, policy_version 35878 (0.0008) +[2026-06-02 17:03:27,028][262582] Updated weights for policy 0, policy_version 35888 (0.0008) +[2026-06-02 17:03:27,231][262582] Updated weights for policy 0, policy_version 35898 (0.0008) +[2026-06-02 17:03:27,450][262582] Updated weights for policy 0, policy_version 35909 (0.0008) +[2026-06-02 17:03:27,657][262582] Updated weights for policy 0, policy_version 35919 (0.0008) +[2026-06-02 17:03:27,855][262582] Updated weights for policy 0, policy_version 35929 (0.0008) +[2026-06-02 17:03:28,555][262582] Updated weights for policy 0, policy_version 35939 (0.0008) +[2026-06-02 17:03:28,764][262582] Updated weights for policy 0, policy_version 35949 (0.0009) +[2026-06-02 17:03:28,963][262582] Updated weights for policy 0, policy_version 35960 (0.0008) +[2026-06-02 17:03:29,167][262582] Updated weights for policy 0, policy_version 35970 (0.0009) +[2026-06-02 17:03:29,383][262582] Updated weights for policy 0, policy_version 35980 (0.0008) +[2026-06-02 17:03:29,585][262582] Updated weights for policy 0, policy_version 35990 (0.0008) +[2026-06-02 17:03:29,783][262582] Updated weights for policy 0, policy_version 36000 (0.0009) +[2026-06-02 17:03:30,457][262582] Updated weights for policy 0, policy_version 36010 (0.0008) +[2026-06-02 17:03:30,656][262582] Updated weights for policy 0, policy_version 36020 (0.0008) +[2026-06-02 17:03:30,870][262582] Updated weights for policy 0, policy_version 36030 (0.0008) +[2026-06-02 17:03:31,007][260776] Fps is (10 sec: 16384.0, 60 sec: 18022.4, 300 sec: 18438.9). Total num frames: 18448384. Throughput: 0: 18725.0. Samples: 18493312. Policy #0 lag: (min: 44.0, avg: 61.1, max: 108.0) +[2026-06-02 17:03:31,008][260776] Avg episode reward: [(0, '1450.137')] +[2026-06-02 17:03:31,067][262582] Updated weights for policy 0, policy_version 36040 (0.0008) +[2026-06-02 17:03:31,267][262582] Updated weights for policy 0, policy_version 36050 (0.0008) +[2026-06-02 17:03:31,475][262582] Updated weights for policy 0, policy_version 36060 (0.0010) +[2026-06-02 17:03:32,185][262582] Updated weights for policy 0, policy_version 36070 (0.0009) +[2026-06-02 17:03:32,392][262582] Updated weights for policy 0, policy_version 36080 (0.0011) +[2026-06-02 17:03:32,589][262582] Updated weights for policy 0, policy_version 36090 (0.0011) +[2026-06-02 17:03:32,803][262582] Updated weights for policy 0, policy_version 36100 (0.0010) +[2026-06-02 17:03:33,029][262582] Updated weights for policy 0, policy_version 36111 (0.0011) +[2026-06-02 17:03:33,226][262582] Updated weights for policy 0, policy_version 36121 (0.0010) +[2026-06-02 17:03:33,927][262582] Updated weights for policy 0, policy_version 36131 (0.0012) +[2026-06-02 17:03:34,118][262582] Updated weights for policy 0, policy_version 36141 (0.0012) +[2026-06-02 17:03:34,322][262582] Updated weights for policy 0, policy_version 36151 (0.0011) +[2026-06-02 17:03:34,524][262582] Updated weights for policy 0, policy_version 36161 (0.0013) +[2026-06-02 17:03:34,734][262582] Updated weights for policy 0, policy_version 36171 (0.0011) +[2026-06-02 17:03:34,940][262582] Updated weights for policy 0, policy_version 36181 (0.0011) +[2026-06-02 17:03:35,150][262582] Updated weights for policy 0, policy_version 36191 (0.0010) +[2026-06-02 17:03:35,823][262582] Updated weights for policy 0, policy_version 36201 (0.0010) +[2026-06-02 17:03:36,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.6, 300 sec: 18438.9). Total num frames: 18546688. Throughput: 0: 18494.6. Samples: 18541696. Policy #0 lag: (min: 44.0, avg: 61.1, max: 108.0) +[2026-06-02 17:03:36,008][260776] Avg episode reward: [(0, '1454.888')] +[2026-06-02 17:03:36,026][262582] Updated weights for policy 0, policy_version 36211 (0.0012) +[2026-06-02 17:03:36,247][262582] Updated weights for policy 0, policy_version 36222 (0.0011) +[2026-06-02 17:03:36,455][262582] Updated weights for policy 0, policy_version 36232 (0.0010) +[2026-06-02 17:03:36,669][262582] Updated weights for policy 0, policy_version 36242 (0.0011) +[2026-06-02 17:03:36,872][262582] Updated weights for policy 0, policy_version 36252 (0.0012) +[2026-06-02 17:03:36,943][262026] Saving new best policy, reward=1454.888! +[2026-06-02 17:03:37,573][262582] Updated weights for policy 0, policy_version 36262 (0.0010) +[2026-06-02 17:03:37,771][262582] Updated weights for policy 0, policy_version 36272 (0.0008) +[2026-06-02 17:03:37,961][262582] Updated weights for policy 0, policy_version 36282 (0.0008) +[2026-06-02 17:03:38,171][262582] Updated weights for policy 0, policy_version 36292 (0.0009) +[2026-06-02 17:03:38,374][262582] Updated weights for policy 0, policy_version 36302 (0.0009) +[2026-06-02 17:03:38,581][262582] Updated weights for policy 0, policy_version 36312 (0.0009) +[2026-06-02 17:03:39,291][262582] Updated weights for policy 0, policy_version 36322 (0.0009) +[2026-06-02 17:03:39,479][262582] Updated weights for policy 0, policy_version 36332 (0.0008) +[2026-06-02 17:03:39,673][262582] Updated weights for policy 0, policy_version 36342 (0.0008) +[2026-06-02 17:03:39,870][262582] Updated weights for policy 0, policy_version 36352 (0.0008) +[2026-06-02 17:03:40,078][262582] Updated weights for policy 0, policy_version 36362 (0.0009) +[2026-06-02 17:03:40,282][262582] Updated weights for policy 0, policy_version 36372 (0.0008) +[2026-06-02 17:03:40,485][262582] Updated weights for policy 0, policy_version 36382 (0.0009) +[2026-06-02 17:03:41,007][260776] Fps is (10 sec: 19660.7, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 18644992. Throughput: 0: 18688.0. Samples: 18660864. Policy #0 lag: (min: 63.0, avg: 79.8, max: 127.0) +[2026-06-02 17:03:41,008][260776] Avg episode reward: [(0, '1464.940')] +[2026-06-02 17:03:41,195][262582] Updated weights for policy 0, policy_version 36392 (0.0008) +[2026-06-02 17:03:41,397][262582] Updated weights for policy 0, policy_version 36402 (0.0009) +[2026-06-02 17:03:41,624][262582] Updated weights for policy 0, policy_version 36413 (0.0008) +[2026-06-02 17:03:41,823][262582] Updated weights for policy 0, policy_version 36423 (0.0008) +[2026-06-02 17:03:42,017][262582] Updated weights for policy 0, policy_version 36433 (0.0008) +[2026-06-02 17:03:42,233][262582] Updated weights for policy 0, policy_version 36443 (0.0008) +[2026-06-02 17:03:42,326][262026] Saving new best policy, reward=1464.940! +[2026-06-02 17:03:42,967][262582] Updated weights for policy 0, policy_version 36454 (0.0009) +[2026-06-02 17:03:43,170][262582] Updated weights for policy 0, policy_version 36465 (0.0008) +[2026-06-02 17:03:43,371][262582] Updated weights for policy 0, policy_version 36475 (0.0008) +[2026-06-02 17:03:43,585][262582] Updated weights for policy 0, policy_version 36485 (0.0008) +[2026-06-02 17:03:43,784][262582] Updated weights for policy 0, policy_version 36495 (0.0008) +[2026-06-02 17:03:44,008][262582] Updated weights for policy 0, policy_version 36506 (0.0008) +[2026-06-02 17:03:44,686][262582] Updated weights for policy 0, policy_version 36516 (0.0008) +[2026-06-02 17:03:44,876][262582] Updated weights for policy 0, policy_version 36526 (0.0008) +[2026-06-02 17:03:45,086][262582] Updated weights for policy 0, policy_version 36536 (0.0008) +[2026-06-02 17:03:45,302][262582] Updated weights for policy 0, policy_version 36546 (0.0008) +[2026-06-02 17:03:45,495][262582] Updated weights for policy 0, policy_version 36556 (0.0008) +[2026-06-02 17:03:45,703][262582] Updated weights for policy 0, policy_version 36566 (0.0009) +[2026-06-02 17:03:45,893][262582] Updated weights for policy 0, policy_version 36576 (0.0008) +[2026-06-02 17:03:46,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 18743296. Throughput: 0: 18514.5. Samples: 18768128. Policy #0 lag: (min: 63.0, avg: 79.8, max: 127.0) +[2026-06-02 17:03:46,008][260776] Avg episode reward: [(0, '1455.382')] +[2026-06-02 17:03:46,586][262582] Updated weights for policy 0, policy_version 36586 (0.0008) +[2026-06-02 17:03:46,778][262582] Updated weights for policy 0, policy_version 36596 (0.0008) +[2026-06-02 17:03:46,977][262582] Updated weights for policy 0, policy_version 36606 (0.0008) +[2026-06-02 17:03:47,185][262582] Updated weights for policy 0, policy_version 36616 (0.0008) +[2026-06-02 17:03:47,389][262582] Updated weights for policy 0, policy_version 36626 (0.0008) +[2026-06-02 17:03:47,618][262582] Updated weights for policy 0, policy_version 36637 (0.0008) +[2026-06-02 17:03:48,298][262582] Updated weights for policy 0, policy_version 36647 (0.0008) +[2026-06-02 17:03:48,499][262582] Updated weights for policy 0, policy_version 36657 (0.0009) +[2026-06-02 17:03:48,692][262582] Updated weights for policy 0, policy_version 36667 (0.0008) +[2026-06-02 17:03:48,894][262582] Updated weights for policy 0, policy_version 36677 (0.0008) +[2026-06-02 17:03:49,118][262582] Updated weights for policy 0, policy_version 36688 (0.0008) +[2026-06-02 17:03:49,320][262582] Updated weights for policy 0, policy_version 36698 (0.0008) +[2026-06-02 17:03:50,019][262582] Updated weights for policy 0, policy_version 36708 (0.0008) +[2026-06-02 17:03:50,209][262582] Updated weights for policy 0, policy_version 36718 (0.0008) +[2026-06-02 17:03:50,433][262582] Updated weights for policy 0, policy_version 36729 (0.0009) +[2026-06-02 17:03:50,628][262582] Updated weights for policy 0, policy_version 36739 (0.0008) +[2026-06-02 17:03:50,832][262582] Updated weights for policy 0, policy_version 36749 (0.0008) +[2026-06-02 17:03:51,007][260776] Fps is (10 sec: 16383.9, 60 sec: 18022.4, 300 sec: 18327.9). Total num frames: 18808832. Throughput: 0: 18611.2. Samples: 18827008. Policy #0 lag: (min: 63.0, avg: 79.8, max: 127.0) +[2026-06-02 17:03:51,008][260776] Avg episode reward: [(0, '1411.861')] +[2026-06-02 17:03:51,040][262582] Updated weights for policy 0, policy_version 36759 (0.0008) +[2026-06-02 17:03:51,728][262582] Updated weights for policy 0, policy_version 36769 (0.0008) +[2026-06-02 17:03:51,911][262582] Updated weights for policy 0, policy_version 36779 (0.0008) +[2026-06-02 17:03:52,111][262582] Updated weights for policy 0, policy_version 36789 (0.0008) +[2026-06-02 17:03:52,313][262582] Updated weights for policy 0, policy_version 36799 (0.0008) +[2026-06-02 17:03:52,519][262582] Updated weights for policy 0, policy_version 36809 (0.0008) +[2026-06-02 17:03:52,723][262582] Updated weights for policy 0, policy_version 36819 (0.0009) +[2026-06-02 17:03:52,923][262582] Updated weights for policy 0, policy_version 36829 (0.0008) +[2026-06-02 17:03:53,618][262582] Updated weights for policy 0, policy_version 36839 (0.0008) +[2026-06-02 17:03:53,822][262582] Updated weights for policy 0, policy_version 36849 (0.0008) +[2026-06-02 17:03:54,019][262582] Updated weights for policy 0, policy_version 36859 (0.0009) +[2026-06-02 17:03:54,222][262582] Updated weights for policy 0, policy_version 36869 (0.0008) +[2026-06-02 17:03:54,425][262582] Updated weights for policy 0, policy_version 36879 (0.0008) +[2026-06-02 17:03:54,632][262582] Updated weights for policy 0, policy_version 36889 (0.0008) +[2026-06-02 17:03:55,311][262582] Updated weights for policy 0, policy_version 36899 (0.0008) +[2026-06-02 17:03:55,498][262582] Updated weights for policy 0, policy_version 36909 (0.0008) +[2026-06-02 17:03:55,697][262582] Updated weights for policy 0, policy_version 36919 (0.0009) +[2026-06-02 17:03:55,894][262582] Updated weights for policy 0, policy_version 36929 (0.0008) +[2026-06-02 17:03:56,007][260776] Fps is (10 sec: 16383.9, 60 sec: 18022.4, 300 sec: 18438.9). Total num frames: 18907136. Throughput: 0: 18471.8. Samples: 18932992. Policy #0 lag: (min: 63.0, avg: 79.8, max: 127.0) +[2026-06-02 17:03:56,008][260776] Avg episode reward: [(0, '1423.942')] +[2026-06-02 17:03:56,099][262582] Updated weights for policy 0, policy_version 36939 (0.0008) +[2026-06-02 17:03:56,303][262582] Updated weights for policy 0, policy_version 36949 (0.0008) +[2026-06-02 17:03:56,502][262582] Updated weights for policy 0, policy_version 36959 (0.0008) +[2026-06-02 17:03:57,211][262582] Updated weights for policy 0, policy_version 36969 (0.0009) +[2026-06-02 17:03:57,431][262582] Updated weights for policy 0, policy_version 36980 (0.0008) +[2026-06-02 17:03:57,633][262582] Updated weights for policy 0, policy_version 36990 (0.0009) +[2026-06-02 17:03:57,832][262582] Updated weights for policy 0, policy_version 37000 (0.0009) +[2026-06-02 17:03:58,026][262582] Updated weights for policy 0, policy_version 37010 (0.0009) +[2026-06-02 17:03:58,241][262582] Updated weights for policy 0, policy_version 37020 (0.0009) +[2026-06-02 17:03:58,927][262582] Updated weights for policy 0, policy_version 37030 (0.0009) +[2026-06-02 17:03:59,143][262582] Updated weights for policy 0, policy_version 37041 (0.0012) +[2026-06-02 17:03:59,348][262582] Updated weights for policy 0, policy_version 37051 (0.0012) +[2026-06-02 17:03:59,550][262582] Updated weights for policy 0, policy_version 37061 (0.0011) +[2026-06-02 17:03:59,755][262582] Updated weights for policy 0, policy_version 37071 (0.0008) +[2026-06-02 17:03:59,955][262582] Updated weights for policy 0, policy_version 37081 (0.0009) +[2026-06-02 17:04:00,639][262582] Updated weights for policy 0, policy_version 37091 (0.0009) +[2026-06-02 17:04:00,837][262582] Updated weights for policy 0, policy_version 37101 (0.0009) +[2026-06-02 17:04:01,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 19005440. Throughput: 0: 18568.5. Samples: 19043328. Policy #0 lag: (min: 63.0, avg: 79.8, max: 127.0) +[2026-06-02 17:04:01,008][260776] Avg episode reward: [(0, '1405.544')] +[2026-06-02 17:04:01,033][262582] Updated weights for policy 0, policy_version 37111 (0.0009) +[2026-06-02 17:04:01,239][262582] Updated weights for policy 0, policy_version 37121 (0.0010) +[2026-06-02 17:04:01,441][262582] Updated weights for policy 0, policy_version 37131 (0.0008) +[2026-06-02 17:04:01,632][262582] Updated weights for policy 0, policy_version 37141 (0.0008) +[2026-06-02 17:04:01,857][262582] Updated weights for policy 0, policy_version 37152 (0.0009) +[2026-06-02 17:04:02,545][262582] Updated weights for policy 0, policy_version 37162 (0.0009) +[2026-06-02 17:04:02,752][262582] Updated weights for policy 0, policy_version 37172 (0.0009) +[2026-06-02 17:04:02,947][262582] Updated weights for policy 0, policy_version 37182 (0.0009) +[2026-06-02 17:04:03,152][262582] Updated weights for policy 0, policy_version 37192 (0.0008) +[2026-06-02 17:04:03,348][262582] Updated weights for policy 0, policy_version 37202 (0.0008) +[2026-06-02 17:04:03,561][262582] Updated weights for policy 0, policy_version 37212 (0.0008) +[2026-06-02 17:04:04,227][262582] Updated weights for policy 0, policy_version 37222 (0.0009) +[2026-06-02 17:04:04,420][262582] Updated weights for policy 0, policy_version 37232 (0.0008) +[2026-06-02 17:04:04,619][262582] Updated weights for policy 0, policy_version 37242 (0.0008) +[2026-06-02 17:04:04,849][262582] Updated weights for policy 0, policy_version 37253 (0.0008) +[2026-06-02 17:04:05,051][262582] Updated weights for policy 0, policy_version 37263 (0.0008) +[2026-06-02 17:04:05,249][262582] Updated weights for policy 0, policy_version 37273 (0.0008) +[2026-06-02 17:04:05,936][262582] Updated weights for policy 0, policy_version 37283 (0.0008) +[2026-06-02 17:04:06,007][260776] Fps is (10 sec: 19660.9, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 19103744. Throughput: 0: 18446.2. Samples: 19097216. Policy #0 lag: (min: 63.0, avg: 80.5, max: 127.0) +[2026-06-02 17:04:06,008][260776] Avg episode reward: [(0, '1420.294')] +[2026-06-02 17:04:06,131][262582] Updated weights for policy 0, policy_version 37293 (0.0010) +[2026-06-02 17:04:06,335][262582] Updated weights for policy 0, policy_version 37303 (0.0012) +[2026-06-02 17:04:06,539][262582] Updated weights for policy 0, policy_version 37313 (0.0012) +[2026-06-02 17:04:06,735][262582] Updated weights for policy 0, policy_version 37323 (0.0009) +[2026-06-02 17:04:06,944][262582] Updated weights for policy 0, policy_version 37333 (0.0008) +[2026-06-02 17:04:07,146][262582] Updated weights for policy 0, policy_version 37343 (0.0008) +[2026-06-02 17:04:07,835][262582] Updated weights for policy 0, policy_version 37353 (0.0009) +[2026-06-02 17:04:08,023][262582] Updated weights for policy 0, policy_version 37363 (0.0008) +[2026-06-02 17:04:08,232][262582] Updated weights for policy 0, policy_version 37373 (0.0008) +[2026-06-02 17:04:08,430][262582] Updated weights for policy 0, policy_version 37383 (0.0008) +[2026-06-02 17:04:08,640][262582] Updated weights for policy 0, policy_version 37393 (0.0008) +[2026-06-02 17:04:08,838][262582] Updated weights for policy 0, policy_version 37403 (0.0008) +[2026-06-02 17:04:09,523][262582] Updated weights for policy 0, policy_version 37413 (0.0008) +[2026-06-02 17:04:09,714][262582] Updated weights for policy 0, policy_version 37423 (0.0008) +[2026-06-02 17:04:09,907][262582] Updated weights for policy 0, policy_version 37433 (0.0008) +[2026-06-02 17:04:10,117][262582] Updated weights for policy 0, policy_version 37443 (0.0008) +[2026-06-02 17:04:10,337][262582] Updated weights for policy 0, policy_version 37453 (0.0009) +[2026-06-02 17:04:10,550][262582] Updated weights for policy 0, policy_version 37464 (0.0008) +[2026-06-02 17:04:11,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 19202048. Throughput: 0: 18628.3. Samples: 19212288. Policy #0 lag: (min: 63.0, avg: 80.5, max: 127.0) +[2026-06-02 17:04:11,008][260776] Avg episode reward: [(0, '1466.538')] +[2026-06-02 17:04:11,012][262026] Saving new best policy, reward=1466.538! +[2026-06-02 17:04:11,254][262582] Updated weights for policy 0, policy_version 37474 (0.0008) +[2026-06-02 17:04:11,448][262582] Updated weights for policy 0, policy_version 37484 (0.0008) +[2026-06-02 17:04:11,654][262582] Updated weights for policy 0, policy_version 37494 (0.0008) +[2026-06-02 17:04:11,853][262582] Updated weights for policy 0, policy_version 37504 (0.0008) +[2026-06-02 17:04:12,046][262582] Updated weights for policy 0, policy_version 37514 (0.0008) +[2026-06-02 17:04:12,255][262582] Updated weights for policy 0, policy_version 37524 (0.0008) +[2026-06-02 17:04:12,467][262582] Updated weights for policy 0, policy_version 37534 (0.0008) +[2026-06-02 17:04:13,174][262582] Updated weights for policy 0, policy_version 37545 (0.0008) +[2026-06-02 17:04:13,395][262582] Updated weights for policy 0, policy_version 37556 (0.0009) +[2026-06-02 17:04:13,624][262582] Updated weights for policy 0, policy_version 37567 (0.0008) +[2026-06-02 17:04:13,828][262582] Updated weights for policy 0, policy_version 37577 (0.0008) +[2026-06-02 17:04:14,024][262582] Updated weights for policy 0, policy_version 37587 (0.0008) +[2026-06-02 17:04:14,233][262582] Updated weights for policy 0, policy_version 37597 (0.0008) +[2026-06-02 17:04:14,885][262582] Updated weights for policy 0, policy_version 37607 (0.0008) +[2026-06-02 17:04:15,077][262582] Updated weights for policy 0, policy_version 37617 (0.0008) +[2026-06-02 17:04:15,276][262582] Updated weights for policy 0, policy_version 37627 (0.0008) +[2026-06-02 17:04:15,496][262582] Updated weights for policy 0, policy_version 37638 (0.0008) +[2026-06-02 17:04:15,695][262582] Updated weights for policy 0, policy_version 37648 (0.0009) +[2026-06-02 17:04:15,902][262582] Updated weights for policy 0, policy_version 37658 (0.0008) +[2026-06-02 17:04:16,007][260776] Fps is (10 sec: 16383.9, 60 sec: 18022.4, 300 sec: 18327.9). Total num frames: 19267584. Throughput: 0: 18346.6. Samples: 19318912. Policy #0 lag: (min: 63.0, avg: 80.5, max: 127.0) +[2026-06-02 17:04:16,008][260776] Avg episode reward: [(0, '1482.922')] +[2026-06-02 17:04:16,013][262026] Saving new best policy, reward=1482.922! +[2026-06-02 17:04:16,607][262582] Updated weights for policy 0, policy_version 37668 (0.0008) +[2026-06-02 17:04:16,802][262582] Updated weights for policy 0, policy_version 37678 (0.0009) +[2026-06-02 17:04:17,020][262582] Updated weights for policy 0, policy_version 37689 (0.0008) +[2026-06-02 17:04:17,222][262582] Updated weights for policy 0, policy_version 37699 (0.0008) +[2026-06-02 17:04:17,442][262582] Updated weights for policy 0, policy_version 37710 (0.0008) +[2026-06-02 17:04:17,634][262582] Updated weights for policy 0, policy_version 37720 (0.0008) +[2026-06-02 17:04:18,366][262582] Updated weights for policy 0, policy_version 37731 (0.0008) +[2026-06-02 17:04:18,555][262582] Updated weights for policy 0, policy_version 37741 (0.0009) +[2026-06-02 17:04:18,750][262582] Updated weights for policy 0, policy_version 37751 (0.0008) +[2026-06-02 17:04:18,955][262582] Updated weights for policy 0, policy_version 37761 (0.0008) +[2026-06-02 17:04:19,154][262582] Updated weights for policy 0, policy_version 37771 (0.0008) +[2026-06-02 17:04:19,358][262582] Updated weights for policy 0, policy_version 37781 (0.0008) +[2026-06-02 17:04:19,567][262582] Updated weights for policy 0, policy_version 37792 (0.0008) +[2026-06-02 17:04:20,261][262582] Updated weights for policy 0, policy_version 37802 (0.0008) +[2026-06-02 17:04:20,488][262582] Updated weights for policy 0, policy_version 37813 (0.0009) +[2026-06-02 17:04:20,703][262582] Updated weights for policy 0, policy_version 37824 (0.0008) +[2026-06-02 17:04:20,905][262582] Updated weights for policy 0, policy_version 37834 (0.0008) +[2026-06-02 17:04:21,007][260776] Fps is (10 sec: 16384.1, 60 sec: 18022.4, 300 sec: 18438.9). Total num frames: 19365888. Throughput: 0: 18588.5. Samples: 19378176. Policy #0 lag: (min: 63.0, avg: 80.5, max: 127.0) +[2026-06-02 17:04:21,007][260776] Avg episode reward: [(0, '1498.765')] +[2026-06-02 17:04:21,127][262582] Updated weights for policy 0, policy_version 37845 (0.0008) +[2026-06-02 17:04:21,330][262582] Updated weights for policy 0, policy_version 37855 (0.0008) +[2026-06-02 17:04:21,341][262026] Saving new best policy, reward=1498.765! +[2026-06-02 17:04:22,012][262582] Updated weights for policy 0, policy_version 37865 (0.0009) +[2026-06-02 17:04:22,224][262582] Updated weights for policy 0, policy_version 37876 (0.0009) +[2026-06-02 17:04:22,428][262582] Updated weights for policy 0, policy_version 37886 (0.0009) +[2026-06-02 17:04:22,621][262582] Updated weights for policy 0, policy_version 37896 (0.0008) +[2026-06-02 17:04:22,873][262582] Updated weights for policy 0, policy_version 37908 (0.0009) +[2026-06-02 17:04:23,078][262582] Updated weights for policy 0, policy_version 37918 (0.0008) +[2026-06-02 17:04:23,759][262582] Updated weights for policy 0, policy_version 37928 (0.0008) +[2026-06-02 17:04:23,957][262582] Updated weights for policy 0, policy_version 37938 (0.0008) +[2026-06-02 17:04:24,155][262582] Updated weights for policy 0, policy_version 37948 (0.0009) +[2026-06-02 17:04:24,363][262582] Updated weights for policy 0, policy_version 37958 (0.0008) +[2026-06-02 17:04:24,569][262582] Updated weights for policy 0, policy_version 37968 (0.0008) +[2026-06-02 17:04:24,774][262582] Updated weights for policy 0, policy_version 37978 (0.0009) +[2026-06-02 17:04:25,449][262582] Updated weights for policy 0, policy_version 37988 (0.0008) +[2026-06-02 17:04:25,664][262582] Updated weights for policy 0, policy_version 37999 (0.0008) +[2026-06-02 17:04:25,858][262582] Updated weights for policy 0, policy_version 38009 (0.0008) +[2026-06-02 17:04:26,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 19464192. Throughput: 0: 18267.0. Samples: 19482880. Policy #0 lag: (min: 63.0, avg: 80.5, max: 127.0) +[2026-06-02 17:04:26,008][260776] Avg episode reward: [(0, '1519.654')] +[2026-06-02 17:04:26,059][262582] Updated weights for policy 0, policy_version 38019 (0.0007) +[2026-06-02 17:04:26,288][262582] Updated weights for policy 0, policy_version 38029 (0.0005) +[2026-06-02 17:04:26,478][262582] Updated weights for policy 0, policy_version 38039 (0.0007) +[2026-06-02 17:04:26,654][262026] Saving new best policy, reward=1519.654! +[2026-06-02 17:04:27,160][262582] Updated weights for policy 0, policy_version 38049 (0.0009) +[2026-06-02 17:04:27,368][262582] Updated weights for policy 0, policy_version 38060 (0.0009) +[2026-06-02 17:04:27,560][262582] Updated weights for policy 0, policy_version 38070 (0.0008) +[2026-06-02 17:04:27,765][262582] Updated weights for policy 0, policy_version 38080 (0.0009) +[2026-06-02 17:04:28,000][262582] Updated weights for policy 0, policy_version 38092 (0.0009) +[2026-06-02 17:04:28,220][262582] Updated weights for policy 0, policy_version 38103 (0.0009) +[2026-06-02 17:04:28,942][262582] Updated weights for policy 0, policy_version 38113 (0.0008) +[2026-06-02 17:04:29,127][262582] Updated weights for policy 0, policy_version 38123 (0.0008) +[2026-06-02 17:04:29,347][262582] Updated weights for policy 0, policy_version 38134 (0.0008) +[2026-06-02 17:04:29,562][262582] Updated weights for policy 0, policy_version 38145 (0.0008) +[2026-06-02 17:04:29,783][262582] Updated weights for policy 0, policy_version 38156 (0.0009) +[2026-06-02 17:04:29,980][262582] Updated weights for policy 0, policy_version 38166 (0.0009) +[2026-06-02 17:04:30,181][262582] Updated weights for policy 0, policy_version 38176 (0.0009) +[2026-06-02 17:04:30,876][262582] Updated weights for policy 0, policy_version 38186 (0.0009) +[2026-06-02 17:04:31,007][260776] Fps is (10 sec: 19660.7, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 19562496. Throughput: 0: 18230.1. Samples: 19588480. Policy #0 lag: (min: 63.0, avg: 80.5, max: 127.0) +[2026-06-02 17:04:31,008][260776] Avg episode reward: [(0, '1531.296')] +[2026-06-02 17:04:31,072][262582] Updated weights for policy 0, policy_version 38196 (0.0009) +[2026-06-02 17:04:31,269][262582] Updated weights for policy 0, policy_version 38206 (0.0009) +[2026-06-02 17:04:31,474][262582] Updated weights for policy 0, policy_version 38216 (0.0009) +[2026-06-02 17:04:31,672][262582] Updated weights for policy 0, policy_version 38226 (0.0009) +[2026-06-02 17:04:31,877][262582] Updated weights for policy 0, policy_version 38236 (0.0009) +[2026-06-02 17:04:31,954][262026] Saving new best policy, reward=1531.296! +[2026-06-02 17:04:32,585][262582] Updated weights for policy 0, policy_version 38246 (0.0008) +[2026-06-02 17:04:32,773][262582] Updated weights for policy 0, policy_version 38256 (0.0008) +[2026-06-02 17:04:32,984][262582] Updated weights for policy 0, policy_version 38266 (0.0008) +[2026-06-02 17:04:33,196][262582] Updated weights for policy 0, policy_version 38277 (0.0008) +[2026-06-02 17:04:33,402][262582] Updated weights for policy 0, policy_version 38287 (0.0008) +[2026-06-02 17:04:33,596][262582] Updated weights for policy 0, policy_version 38297 (0.0008) +[2026-06-02 17:04:34,309][262582] Updated weights for policy 0, policy_version 38307 (0.0008) +[2026-06-02 17:04:34,493][262582] Updated weights for policy 0, policy_version 38317 (0.0008) +[2026-06-02 17:04:34,719][262582] Updated weights for policy 0, policy_version 38328 (0.0008) +[2026-06-02 17:04:34,927][262582] Updated weights for policy 0, policy_version 38338 (0.0008) +[2026-06-02 17:04:35,127][262582] Updated weights for policy 0, policy_version 38348 (0.0008) +[2026-06-02 17:04:35,343][262582] Updated weights for policy 0, policy_version 38358 (0.0008) +[2026-06-02 17:04:35,538][262582] Updated weights for policy 0, policy_version 38368 (0.0009) +[2026-06-02 17:04:36,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 19660800. Throughput: 0: 18218.6. Samples: 19646848. Policy #0 lag: (min: 45.0, avg: 93.0, max: 106.0) +[2026-06-02 17:04:36,008][260776] Avg episode reward: [(0, '1553.246')] +[2026-06-02 17:04:36,203][262582] Updated weights for policy 0, policy_version 38378 (0.0009) +[2026-06-02 17:04:36,391][262582] Updated weights for policy 0, policy_version 38388 (0.0008) +[2026-06-02 17:04:36,593][262582] Updated weights for policy 0, policy_version 38398 (0.0008) +[2026-06-02 17:04:36,796][262582] Updated weights for policy 0, policy_version 38408 (0.0008) +[2026-06-02 17:04:37,005][262582] Updated weights for policy 0, policy_version 38418 (0.0008) +[2026-06-02 17:04:37,228][262582] Updated weights for policy 0, policy_version 38429 (0.0008) +[2026-06-02 17:04:37,280][262026] Saving new best policy, reward=1553.246! +[2026-06-02 17:04:37,919][262582] Updated weights for policy 0, policy_version 38439 (0.0008) +[2026-06-02 17:04:38,126][262582] Updated weights for policy 0, policy_version 38449 (0.0008) +[2026-06-02 17:04:38,330][262582] Updated weights for policy 0, policy_version 38459 (0.0008) +[2026-06-02 17:04:38,533][262582] Updated weights for policy 0, policy_version 38469 (0.0008) +[2026-06-02 17:04:38,731][262582] Updated weights for policy 0, policy_version 38479 (0.0008) +[2026-06-02 17:04:38,941][262582] Updated weights for policy 0, policy_version 38489 (0.0008) +[2026-06-02 17:04:39,636][262582] Updated weights for policy 0, policy_version 38499 (0.0008) +[2026-06-02 17:04:39,825][262582] Updated weights for policy 0, policy_version 38509 (0.0009) +[2026-06-02 17:04:40,024][262582] Updated weights for policy 0, policy_version 38519 (0.0009) +[2026-06-02 17:04:40,227][262582] Updated weights for policy 0, policy_version 38529 (0.0008) +[2026-06-02 17:04:40,444][262582] Updated weights for policy 0, policy_version 38539 (0.0008) +[2026-06-02 17:04:40,646][262582] Updated weights for policy 0, policy_version 38549 (0.0008) +[2026-06-02 17:04:40,849][262582] Updated weights for policy 0, policy_version 38559 (0.0008) +[2026-06-02 17:04:41,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 19759104. Throughput: 0: 18341.0. Samples: 19758336. Policy #0 lag: (min: 45.0, avg: 93.0, max: 106.0) +[2026-06-02 17:04:41,008][260776] Avg episode reward: [(0, '1570.762')] +[2026-06-02 17:04:41,012][262026] Saving new best policy, reward=1570.762! +[2026-06-02 17:04:41,541][262582] Updated weights for policy 0, policy_version 38569 (0.0008) +[2026-06-02 17:04:41,739][262582] Updated weights for policy 0, policy_version 38579 (0.0009) +[2026-06-02 17:04:41,940][262582] Updated weights for policy 0, policy_version 38589 (0.0008) +[2026-06-02 17:04:42,152][262582] Updated weights for policy 0, policy_version 38599 (0.0006) +[2026-06-02 17:04:42,350][262582] Updated weights for policy 0, policy_version 38609 (0.0005) +[2026-06-02 17:04:42,552][262582] Updated weights for policy 0, policy_version 38619 (0.0005) +[2026-06-02 17:04:43,238][262582] Updated weights for policy 0, policy_version 38629 (0.0006) +[2026-06-02 17:04:43,435][262582] Updated weights for policy 0, policy_version 38639 (0.0008) +[2026-06-02 17:04:43,632][262582] Updated weights for policy 0, policy_version 38649 (0.0008) +[2026-06-02 17:04:43,831][262582] Updated weights for policy 0, policy_version 38659 (0.0008) +[2026-06-02 17:04:44,041][262582] Updated weights for policy 0, policy_version 38669 (0.0008) +[2026-06-02 17:04:44,249][262582] Updated weights for policy 0, policy_version 38679 (0.0009) +[2026-06-02 17:04:44,940][262582] Updated weights for policy 0, policy_version 38690 (0.0008) +[2026-06-02 17:04:45,120][262582] Updated weights for policy 0, policy_version 38700 (0.0008) +[2026-06-02 17:04:45,317][262582] Updated weights for policy 0, policy_version 38710 (0.0008) +[2026-06-02 17:04:45,514][262582] Updated weights for policy 0, policy_version 38720 (0.0008) +[2026-06-02 17:04:45,727][262582] Updated weights for policy 0, policy_version 38730 (0.0008) +[2026-06-02 17:04:45,943][262582] Updated weights for policy 0, policy_version 38741 (0.0009) +[2026-06-02 17:04:46,007][260776] Fps is (10 sec: 16384.1, 60 sec: 18022.4, 300 sec: 18438.9). Total num frames: 19824640. Throughput: 0: 18386.5. Samples: 19870720. Policy #0 lag: (min: 45.0, avg: 93.0, max: 106.0) +[2026-06-02 17:04:46,008][260776] Avg episode reward: [(0, '1574.159')] +[2026-06-02 17:04:46,152][262026] Saving new best policy, reward=1574.159! +[2026-06-02 17:04:46,155][262582] Updated weights for policy 0, policy_version 38752 (0.0008) +[2026-06-02 17:04:46,854][262582] Updated weights for policy 0, policy_version 38762 (0.0008) +[2026-06-02 17:04:47,069][262582] Updated weights for policy 0, policy_version 38772 (0.0008) +[2026-06-02 17:04:47,264][262582] Updated weights for policy 0, policy_version 38782 (0.0008) +[2026-06-02 17:04:47,483][262582] Updated weights for policy 0, policy_version 38793 (0.0009) +[2026-06-02 17:04:47,677][262582] Updated weights for policy 0, policy_version 38803 (0.0008) +[2026-06-02 17:04:47,895][262582] Updated weights for policy 0, policy_version 38813 (0.0008) +[2026-06-02 17:04:48,568][262582] Updated weights for policy 0, policy_version 38823 (0.0009) +[2026-06-02 17:04:48,766][262582] Updated weights for policy 0, policy_version 38833 (0.0009) +[2026-06-02 17:04:48,958][262582] Updated weights for policy 0, policy_version 38843 (0.0008) +[2026-06-02 17:04:49,160][262582] Updated weights for policy 0, policy_version 38853 (0.0008) +[2026-06-02 17:04:49,368][262582] Updated weights for policy 0, policy_version 38863 (0.0008) +[2026-06-02 17:04:49,566][262582] Updated weights for policy 0, policy_version 38873 (0.0008) +[2026-06-02 17:04:50,309][262582] Updated weights for policy 0, policy_version 38884 (0.0009) +[2026-06-02 17:04:50,489][262582] Updated weights for policy 0, policy_version 38894 (0.0008) +[2026-06-02 17:04:50,695][262582] Updated weights for policy 0, policy_version 38904 (0.0008) +[2026-06-02 17:04:50,889][262582] Updated weights for policy 0, policy_version 38914 (0.0008) +[2026-06-02 17:04:51,007][260776] Fps is (10 sec: 16383.8, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 19922944. Throughput: 0: 18460.4. Samples: 19927936. Policy #0 lag: (min: 45.0, avg: 93.0, max: 106.0) +[2026-06-02 17:04:51,008][260776] Avg episode reward: [(0, '1601.000')] +[2026-06-02 17:04:51,090][262582] Updated weights for policy 0, policy_version 38924 (0.0008) +[2026-06-02 17:04:51,308][262582] Updated weights for policy 0, policy_version 38935 (0.0008) +[2026-06-02 17:04:51,479][262026] Saving new best policy, reward=1601.000! +[2026-06-02 17:04:52,041][262582] Updated weights for policy 0, policy_version 38945 (0.0009) +[2026-06-02 17:04:52,218][262582] Updated weights for policy 0, policy_version 38955 (0.0008) +[2026-06-02 17:04:52,422][262582] Updated weights for policy 0, policy_version 38965 (0.0008) +[2026-06-02 17:04:52,619][262582] Updated weights for policy 0, policy_version 38975 (0.0008) +[2026-06-02 17:04:52,846][262582] Updated weights for policy 0, policy_version 38986 (0.0009) +[2026-06-02 17:04:53,064][262582] Updated weights for policy 0, policy_version 38997 (0.0008) +[2026-06-02 17:04:53,259][262582] Updated weights for policy 0, policy_version 39007 (0.0009) +[2026-06-02 17:04:53,978][262582] Updated weights for policy 0, policy_version 39018 (0.0006) +[2026-06-02 17:04:54,172][262582] Updated weights for policy 0, policy_version 39028 (0.0005) +[2026-06-02 17:04:54,373][262582] Updated weights for policy 0, policy_version 39038 (0.0008) +[2026-06-02 17:04:54,585][262582] Updated weights for policy 0, policy_version 39048 (0.0009) +[2026-06-02 17:04:54,779][262582] Updated weights for policy 0, policy_version 39058 (0.0009) +[2026-06-02 17:04:54,989][262582] Updated weights for policy 0, policy_version 39068 (0.0009) +[2026-06-02 17:04:55,680][262582] Updated weights for policy 0, policy_version 39078 (0.0009) +[2026-06-02 17:04:55,874][262582] Updated weights for policy 0, policy_version 39088 (0.0009) +[2026-06-02 17:04:56,007][260776] Fps is (10 sec: 19660.7, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 20021248. Throughput: 0: 18304.0. Samples: 20035968. Policy #0 lag: (min: 45.0, avg: 93.0, max: 106.0) +[2026-06-02 17:04:56,008][260776] Avg episode reward: [(0, '1578.666')] +[2026-06-02 17:04:56,082][262582] Updated weights for policy 0, policy_version 39098 (0.0009) +[2026-06-02 17:04:56,280][262582] Updated weights for policy 0, policy_version 39108 (0.0009) +[2026-06-02 17:04:56,499][262582] Updated weights for policy 0, policy_version 39119 (0.0009) +[2026-06-02 17:04:56,710][262582] Updated weights for policy 0, policy_version 39129 (0.0009) +[2026-06-02 17:04:57,399][262582] Updated weights for policy 0, policy_version 39139 (0.0009) +[2026-06-02 17:04:57,586][262582] Updated weights for policy 0, policy_version 39149 (0.0009) +[2026-06-02 17:04:57,783][262582] Updated weights for policy 0, policy_version 39159 (0.0009) +[2026-06-02 17:04:57,984][262582] Updated weights for policy 0, policy_version 39169 (0.0008) +[2026-06-02 17:04:58,189][262582] Updated weights for policy 0, policy_version 39179 (0.0008) +[2026-06-02 17:04:58,399][262582] Updated weights for policy 0, policy_version 39189 (0.0009) +[2026-06-02 17:04:58,595][262582] Updated weights for policy 0, policy_version 39199 (0.0009) +[2026-06-02 17:04:59,284][262582] Updated weights for policy 0, policy_version 39209 (0.0009) +[2026-06-02 17:04:59,484][262582] Updated weights for policy 0, policy_version 39219 (0.0007) +[2026-06-02 17:04:59,695][262582] Updated weights for policy 0, policy_version 39229 (0.0005) +[2026-06-02 17:04:59,898][262582] Updated weights for policy 0, policy_version 39239 (0.0006) +[2026-06-02 17:05:00,120][262582] Updated weights for policy 0, policy_version 39250 (0.0005) +[2026-06-02 17:05:00,325][262582] Updated weights for policy 0, policy_version 39260 (0.0007) +[2026-06-02 17:05:01,007][260776] Fps is (10 sec: 19660.9, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 20119552. Throughput: 0: 18304.0. Samples: 20142592. Policy #0 lag: (min: 63.0, avg: 80.4, max: 127.0) +[2026-06-02 17:05:01,008][260776] Avg episode reward: [(0, '1573.200')] +[2026-06-02 17:05:01,017][262582] Updated weights for policy 0, policy_version 39270 (0.0009) +[2026-06-02 17:05:01,211][262582] Updated weights for policy 0, policy_version 39280 (0.0009) +[2026-06-02 17:05:01,405][262582] Updated weights for policy 0, policy_version 39290 (0.0008) +[2026-06-02 17:05:01,611][262582] Updated weights for policy 0, policy_version 39300 (0.0009) +[2026-06-02 17:05:01,815][262582] Updated weights for policy 0, policy_version 39310 (0.0009) +[2026-06-02 17:05:02,025][262582] Updated weights for policy 0, policy_version 39320 (0.0009) +[2026-06-02 17:05:02,695][262582] Updated weights for policy 0, policy_version 39330 (0.0009) +[2026-06-02 17:05:02,898][262582] Updated weights for policy 0, policy_version 39341 (0.0009) +[2026-06-02 17:05:03,125][262582] Updated weights for policy 0, policy_version 39352 (0.0008) +[2026-06-02 17:05:03,329][262582] Updated weights for policy 0, policy_version 39362 (0.0008) +[2026-06-02 17:05:03,534][262582] Updated weights for policy 0, policy_version 39372 (0.0009) +[2026-06-02 17:05:03,740][262582] Updated weights for policy 0, policy_version 39382 (0.0009) +[2026-06-02 17:05:03,936][262582] Updated weights for policy 0, policy_version 39392 (0.0008) +[2026-06-02 17:05:04,632][262582] Updated weights for policy 0, policy_version 39403 (0.0009) +[2026-06-02 17:05:04,832][262582] Updated weights for policy 0, policy_version 39413 (0.0009) +[2026-06-02 17:05:05,037][262582] Updated weights for policy 0, policy_version 39423 (0.0009) +[2026-06-02 17:05:05,232][262582] Updated weights for policy 0, policy_version 39433 (0.0009) +[2026-06-02 17:05:05,446][262582] Updated weights for policy 0, policy_version 39443 (0.0009) +[2026-06-02 17:05:05,643][262582] Updated weights for policy 0, policy_version 39453 (0.0009) +[2026-06-02 17:05:06,007][260776] Fps is (10 sec: 19660.9, 60 sec: 18568.5, 300 sec: 18439.0). Total num frames: 20217856. Throughput: 0: 18278.4. Samples: 20200704. Policy #0 lag: (min: 63.0, avg: 80.4, max: 127.0) +[2026-06-02 17:05:06,008][260776] Avg episode reward: [(0, '1580.203')] +[2026-06-02 17:05:06,307][262582] Updated weights for policy 0, policy_version 39463 (0.0009) +[2026-06-02 17:05:06,486][262582] Updated weights for policy 0, policy_version 39473 (0.0009) +[2026-06-02 17:05:06,694][262582] Updated weights for policy 0, policy_version 39483 (0.0009) +[2026-06-02 17:05:06,899][262582] Updated weights for policy 0, policy_version 39493 (0.0009) +[2026-06-02 17:05:07,096][262582] Updated weights for policy 0, policy_version 39503 (0.0009) +[2026-06-02 17:05:07,301][262582] Updated weights for policy 0, policy_version 39513 (0.0009) +[2026-06-02 17:05:07,991][262582] Updated weights for policy 0, policy_version 39523 (0.0009) +[2026-06-02 17:05:08,179][262582] Updated weights for policy 0, policy_version 39533 (0.0009) +[2026-06-02 17:05:08,387][262582] Updated weights for policy 0, policy_version 39543 (0.0008) +[2026-06-02 17:05:08,586][262582] Updated weights for policy 0, policy_version 39553 (0.0009) +[2026-06-02 17:05:08,791][262582] Updated weights for policy 0, policy_version 39563 (0.0008) +[2026-06-02 17:05:08,995][262582] Updated weights for policy 0, policy_version 39573 (0.0008) +[2026-06-02 17:05:09,201][262582] Updated weights for policy 0, policy_version 39583 (0.0009) +[2026-06-02 17:05:09,877][262582] Updated weights for policy 0, policy_version 39593 (0.0008) +[2026-06-02 17:05:10,084][262582] Updated weights for policy 0, policy_version 39603 (0.0008) +[2026-06-02 17:05:10,282][262582] Updated weights for policy 0, policy_version 39613 (0.0008) +[2026-06-02 17:05:10,482][262582] Updated weights for policy 0, policy_version 39623 (0.0008) +[2026-06-02 17:05:10,686][262582] Updated weights for policy 0, policy_version 39633 (0.0008) +[2026-06-02 17:05:10,900][262582] Updated weights for policy 0, policy_version 39643 (0.0009) +[2026-06-02 17:05:11,007][260776] Fps is (10 sec: 19661.0, 60 sec: 18568.6, 300 sec: 18550.0). Total num frames: 20316160. Throughput: 0: 18284.1. Samples: 20305664. Policy #0 lag: (min: 63.0, avg: 80.4, max: 127.0) +[2026-06-02 17:05:11,008][260776] Avg episode reward: [(0, '1606.037')] +[2026-06-02 17:05:11,012][262026] Saving new best policy, reward=1606.037! +[2026-06-02 17:05:11,576][262582] Updated weights for policy 0, policy_version 39653 (0.0008) +[2026-06-02 17:05:11,764][262582] Updated weights for policy 0, policy_version 39663 (0.0008) +[2026-06-02 17:05:11,963][262582] Updated weights for policy 0, policy_version 39673 (0.0008) +[2026-06-02 17:05:12,170][262582] Updated weights for policy 0, policy_version 39683 (0.0008) +[2026-06-02 17:05:12,382][262582] Updated weights for policy 0, policy_version 39693 (0.0009) +[2026-06-02 17:05:12,583][262582] Updated weights for policy 0, policy_version 39703 (0.0009) +[2026-06-02 17:05:13,272][262582] Updated weights for policy 0, policy_version 39713 (0.0008) +[2026-06-02 17:05:13,465][262582] Updated weights for policy 0, policy_version 39723 (0.0008) +[2026-06-02 17:05:13,670][262582] Updated weights for policy 0, policy_version 39733 (0.0009) +[2026-06-02 17:05:13,887][262582] Updated weights for policy 0, policy_version 39744 (0.0009) +[2026-06-02 17:05:14,095][262582] Updated weights for policy 0, policy_version 39754 (0.0009) +[2026-06-02 17:05:14,299][262582] Updated weights for policy 0, policy_version 39764 (0.0008) +[2026-06-02 17:05:14,507][262582] Updated weights for policy 0, policy_version 39774 (0.0008) +[2026-06-02 17:05:15,179][262582] Updated weights for policy 0, policy_version 39784 (0.0009) +[2026-06-02 17:05:15,395][262582] Updated weights for policy 0, policy_version 39795 (0.0008) +[2026-06-02 17:05:15,595][262582] Updated weights for policy 0, policy_version 39805 (0.0008) +[2026-06-02 17:05:15,800][262582] Updated weights for policy 0, policy_version 39815 (0.0008) +[2026-06-02 17:05:16,007][260776] Fps is (10 sec: 16383.9, 60 sec: 18568.6, 300 sec: 18438.9). Total num frames: 20381696. Throughput: 0: 18560.0. Samples: 20423680. Policy #0 lag: (min: 63.0, avg: 80.4, max: 127.0) +[2026-06-02 17:05:16,008][260776] Avg episode reward: [(0, '1614.659')] +[2026-06-02 17:05:16,011][262582] Updated weights for policy 0, policy_version 39825 (0.0008) +[2026-06-02 17:05:16,213][262582] Updated weights for policy 0, policy_version 39835 (0.0008) +[2026-06-02 17:05:16,303][262026] Saving new best policy, reward=1614.659! +[2026-06-02 17:05:16,884][262582] Updated weights for policy 0, policy_version 39845 (0.0008) +[2026-06-02 17:05:17,082][262582] Updated weights for policy 0, policy_version 39855 (0.0008) +[2026-06-02 17:05:17,279][262582] Updated weights for policy 0, policy_version 39865 (0.0008) +[2026-06-02 17:05:17,488][262582] Updated weights for policy 0, policy_version 39875 (0.0008) +[2026-06-02 17:05:17,689][262582] Updated weights for policy 0, policy_version 39885 (0.0008) +[2026-06-02 17:05:17,896][262582] Updated weights for policy 0, policy_version 39895 (0.0008) +[2026-06-02 17:05:18,603][262582] Updated weights for policy 0, policy_version 39905 (0.0009) +[2026-06-02 17:05:18,800][262582] Updated weights for policy 0, policy_version 39915 (0.0008) +[2026-06-02 17:05:18,999][262582] Updated weights for policy 0, policy_version 39925 (0.0009) +[2026-06-02 17:05:19,196][262582] Updated weights for policy 0, policy_version 39935 (0.0008) +[2026-06-02 17:05:19,398][262582] Updated weights for policy 0, policy_version 39945 (0.0008) +[2026-06-02 17:05:19,614][262582] Updated weights for policy 0, policy_version 39955 (0.0008) +[2026-06-02 17:05:19,816][262582] Updated weights for policy 0, policy_version 39965 (0.0008) +[2026-06-02 17:05:20,475][262582] Updated weights for policy 0, policy_version 39975 (0.0008) +[2026-06-02 17:05:20,697][262582] Updated weights for policy 0, policy_version 39986 (0.0008) +[2026-06-02 17:05:20,898][262582] Updated weights for policy 0, policy_version 39996 (0.0008) +[2026-06-02 17:05:21,007][260776] Fps is (10 sec: 16383.9, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 20480000. Throughput: 0: 18412.1. Samples: 20475392. Policy #0 lag: (min: 63.0, avg: 80.4, max: 127.0) +[2026-06-02 17:05:21,008][260776] Avg episode reward: [(0, '1600.752')] +[2026-06-02 17:05:21,092][262582] Updated weights for policy 0, policy_version 40006 (0.0008) +[2026-06-02 17:05:21,304][262582] Updated weights for policy 0, policy_version 40016 (0.0009) +[2026-06-02 17:05:21,524][262582] Updated weights for policy 0, policy_version 40027 (0.0009) +[2026-06-02 17:05:22,241][262582] Updated weights for policy 0, policy_version 40037 (0.0009) +[2026-06-02 17:05:22,433][262582] Updated weights for policy 0, policy_version 40047 (0.0009) +[2026-06-02 17:05:22,653][262582] Updated weights for policy 0, policy_version 40058 (0.0008) +[2026-06-02 17:05:22,853][262582] Updated weights for policy 0, policy_version 40068 (0.0008) +[2026-06-02 17:05:23,055][262582] Updated weights for policy 0, policy_version 40078 (0.0009) +[2026-06-02 17:05:23,276][262582] Updated weights for policy 0, policy_version 40089 (0.0008) +[2026-06-02 17:05:23,990][262582] Updated weights for policy 0, policy_version 40100 (0.0006) +[2026-06-02 17:05:24,181][262582] Updated weights for policy 0, policy_version 40110 (0.0005) +[2026-06-02 17:05:24,402][262582] Updated weights for policy 0, policy_version 40121 (0.0005) +[2026-06-02 17:05:24,604][262582] Updated weights for policy 0, policy_version 40131 (0.0005) +[2026-06-02 17:05:24,808][262582] Updated weights for policy 0, policy_version 40141 (0.0005) +[2026-06-02 17:05:25,020][262582] Updated weights for policy 0, policy_version 40151 (0.0005) +[2026-06-02 17:05:25,710][262582] Updated weights for policy 0, policy_version 40161 (0.0005) +[2026-06-02 17:05:25,901][262582] Updated weights for policy 0, policy_version 40171 (0.0005) +[2026-06-02 17:05:26,007][260776] Fps is (10 sec: 19660.9, 60 sec: 18568.6, 300 sec: 18439.0). Total num frames: 20578304. Throughput: 0: 18451.9. Samples: 20588672. Policy #0 lag: (min: 52.0, avg: 69.3, max: 116.0) +[2026-06-02 17:05:26,008][260776] Avg episode reward: [(0, '1614.178')] +[2026-06-02 17:05:26,099][262582] Updated weights for policy 0, policy_version 40181 (0.0005) +[2026-06-02 17:05:26,322][262582] Updated weights for policy 0, policy_version 40192 (0.0005) +[2026-06-02 17:05:26,538][262582] Updated weights for policy 0, policy_version 40203 (0.0005) +[2026-06-02 17:05:26,747][262582] Updated weights for policy 0, policy_version 40213 (0.0007) +[2026-06-02 17:05:26,957][262582] Updated weights for policy 0, policy_version 40224 (0.0008) +[2026-06-02 17:05:27,698][262582] Updated weights for policy 0, policy_version 40235 (0.0009) +[2026-06-02 17:05:27,891][262582] Updated weights for policy 0, policy_version 40245 (0.0008) +[2026-06-02 17:05:28,098][262582] Updated weights for policy 0, policy_version 40255 (0.0009) +[2026-06-02 17:05:28,332][262582] Updated weights for policy 0, policy_version 40267 (0.0008) +[2026-06-02 17:05:28,549][262582] Updated weights for policy 0, policy_version 40277 (0.0008) +[2026-06-02 17:05:28,748][262582] Updated weights for policy 0, policy_version 40287 (0.0009) +[2026-06-02 17:05:29,424][262582] Updated weights for policy 0, policy_version 40297 (0.0009) +[2026-06-02 17:05:29,618][262582] Updated weights for policy 0, policy_version 40307 (0.0008) +[2026-06-02 17:05:29,817][262582] Updated weights for policy 0, policy_version 40317 (0.0009) +[2026-06-02 17:05:30,007][262582] Updated weights for policy 0, policy_version 40327 (0.0008) +[2026-06-02 17:05:30,209][262582] Updated weights for policy 0, policy_version 40337 (0.0005) +[2026-06-02 17:05:30,434][262582] Updated weights for policy 0, policy_version 40348 (0.0004) +[2026-06-02 17:05:31,007][260776] Fps is (10 sec: 19661.0, 60 sec: 18568.6, 300 sec: 18439.0). Total num frames: 20676608. Throughput: 0: 18298.4. Samples: 20694144. Policy #0 lag: (min: 52.0, avg: 69.3, max: 116.0) +[2026-06-02 17:05:31,007][260776] Avg episode reward: [(0, '1645.939')] +[2026-06-02 17:05:31,134][262582] Updated weights for policy 0, policy_version 40358 (0.0004) +[2026-06-02 17:05:31,326][262582] Updated weights for policy 0, policy_version 40368 (0.0006) +[2026-06-02 17:05:31,524][262582] Updated weights for policy 0, policy_version 40378 (0.0007) +[2026-06-02 17:05:31,719][262582] Updated weights for policy 0, policy_version 40388 (0.0007) +[2026-06-02 17:05:31,930][262582] Updated weights for policy 0, policy_version 40398 (0.0009) +[2026-06-02 17:05:32,133][262582] Updated weights for policy 0, policy_version 40408 (0.0008) +[2026-06-02 17:05:32,285][262026] Saving new best policy, reward=1645.939! +[2026-06-02 17:05:32,840][262582] Updated weights for policy 0, policy_version 40418 (0.0008) +[2026-06-02 17:05:33,045][262582] Updated weights for policy 0, policy_version 40429 (0.0008) +[2026-06-02 17:05:33,246][262582] Updated weights for policy 0, policy_version 40439 (0.0008) +[2026-06-02 17:05:33,451][262582] Updated weights for policy 0, policy_version 40449 (0.0008) +[2026-06-02 17:05:33,655][262582] Updated weights for policy 0, policy_version 40459 (0.0008) +[2026-06-02 17:05:33,859][262582] Updated weights for policy 0, policy_version 40469 (0.0007) +[2026-06-02 17:05:34,052][262582] Updated weights for policy 0, policy_version 40479 (0.0005) +[2026-06-02 17:05:34,739][262582] Updated weights for policy 0, policy_version 40489 (0.0008) +[2026-06-02 17:05:34,941][262582] Updated weights for policy 0, policy_version 40499 (0.0008) +[2026-06-02 17:05:35,130][262582] Updated weights for policy 0, policy_version 40509 (0.0008) +[2026-06-02 17:05:35,332][262582] Updated weights for policy 0, policy_version 40519 (0.0008) +[2026-06-02 17:05:35,545][262582] Updated weights for policy 0, policy_version 40529 (0.0008) +[2026-06-02 17:05:35,739][262582] Updated weights for policy 0, policy_version 40539 (0.0008) +[2026-06-02 17:05:36,007][260776] Fps is (10 sec: 19660.5, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 20774912. Throughput: 0: 18352.3. Samples: 20753792. Policy #0 lag: (min: 52.0, avg: 69.3, max: 116.0) +[2026-06-02 17:05:36,008][260776] Avg episode reward: [(0, '1668.517')] +[2026-06-02 17:05:36,013][262026] Saving new best policy, reward=1668.517! +[2026-06-02 17:05:36,447][262582] Updated weights for policy 0, policy_version 40549 (0.0008) +[2026-06-02 17:05:36,659][262582] Updated weights for policy 0, policy_version 40560 (0.0008) +[2026-06-02 17:05:36,857][262582] Updated weights for policy 0, policy_version 40570 (0.0008) +[2026-06-02 17:05:37,057][262582] Updated weights for policy 0, policy_version 40580 (0.0008) +[2026-06-02 17:05:37,267][262582] Updated weights for policy 0, policy_version 40590 (0.0008) +[2026-06-02 17:05:37,469][262582] Updated weights for policy 0, policy_version 40600 (0.0008) +[2026-06-02 17:05:38,156][262582] Updated weights for policy 0, policy_version 40610 (0.0008) +[2026-06-02 17:05:38,367][262582] Updated weights for policy 0, policy_version 40621 (0.0008) +[2026-06-02 17:05:38,567][262582] Updated weights for policy 0, policy_version 40631 (0.0008) +[2026-06-02 17:05:38,774][262582] Updated weights for policy 0, policy_version 40641 (0.0008) +[2026-06-02 17:05:38,984][262582] Updated weights for policy 0, policy_version 40651 (0.0008) +[2026-06-02 17:05:39,213][262582] Updated weights for policy 0, policy_version 40662 (0.0008) +[2026-06-02 17:05:39,409][262582] Updated weights for policy 0, policy_version 40672 (0.0008) +[2026-06-02 17:05:40,086][262582] Updated weights for policy 0, policy_version 40682 (0.0009) +[2026-06-02 17:05:40,287][262582] Updated weights for policy 0, policy_version 40692 (0.0008) +[2026-06-02 17:05:40,513][262582] Updated weights for policy 0, policy_version 40703 (0.0009) +[2026-06-02 17:05:40,713][262582] Updated weights for policy 0, policy_version 40713 (0.0008) +[2026-06-02 17:05:40,917][262582] Updated weights for policy 0, policy_version 40723 (0.0008) +[2026-06-02 17:05:41,007][260776] Fps is (10 sec: 16383.8, 60 sec: 18022.4, 300 sec: 18438.9). Total num frames: 20840448. Throughput: 0: 18309.7. Samples: 20859904. Policy #0 lag: (min: 52.0, avg: 69.3, max: 116.0) +[2026-06-02 17:05:41,008][260776] Avg episode reward: [(0, '1669.713')] +[2026-06-02 17:05:41,123][262582] Updated weights for policy 0, policy_version 40733 (0.0008) +[2026-06-02 17:05:41,174][262026] Saving new best policy, reward=1669.713! +[2026-06-02 17:05:41,807][262582] Updated weights for policy 0, policy_version 40743 (0.0008) +[2026-06-02 17:05:42,029][262582] Updated weights for policy 0, policy_version 40754 (0.0008) +[2026-06-02 17:05:42,249][262582] Updated weights for policy 0, policy_version 40765 (0.0008) +[2026-06-02 17:05:42,446][262582] Updated weights for policy 0, policy_version 40775 (0.0008) +[2026-06-02 17:05:42,662][262582] Updated weights for policy 0, policy_version 40785 (0.0009) +[2026-06-02 17:05:42,882][262582] Updated weights for policy 0, policy_version 40796 (0.0009) +[2026-06-02 17:05:43,550][262582] Updated weights for policy 0, policy_version 40806 (0.0008) +[2026-06-02 17:05:43,746][262582] Updated weights for policy 0, policy_version 40816 (0.0008) +[2026-06-02 17:05:43,948][262582] Updated weights for policy 0, policy_version 40826 (0.0008) +[2026-06-02 17:05:44,153][262582] Updated weights for policy 0, policy_version 40836 (0.0008) +[2026-06-02 17:05:44,360][262582] Updated weights for policy 0, policy_version 40846 (0.0009) +[2026-06-02 17:05:44,560][262582] Updated weights for policy 0, policy_version 40856 (0.0008) +[2026-06-02 17:05:45,269][262582] Updated weights for policy 0, policy_version 40866 (0.0008) +[2026-06-02 17:05:45,451][262582] Updated weights for policy 0, policy_version 40876 (0.0008) +[2026-06-02 17:05:45,662][262582] Updated weights for policy 0, policy_version 40886 (0.0008) +[2026-06-02 17:05:45,868][262582] Updated weights for policy 0, policy_version 40896 (0.0009) +[2026-06-02 17:05:46,007][260776] Fps is (10 sec: 16384.2, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 20938752. Throughput: 0: 18531.6. Samples: 20976512. Policy #0 lag: (min: 52.0, avg: 69.3, max: 116.0) +[2026-06-02 17:05:46,008][260776] Avg episode reward: [(0, '1658.478')] +[2026-06-02 17:05:46,060][262582] Updated weights for policy 0, policy_version 40906 (0.0008) +[2026-06-02 17:05:46,277][262582] Updated weights for policy 0, policy_version 40916 (0.0008) +[2026-06-02 17:05:46,483][262582] Updated weights for policy 0, policy_version 40926 (0.0008) +[2026-06-02 17:05:47,159][262582] Updated weights for policy 0, policy_version 40936 (0.0008) +[2026-06-02 17:05:47,368][262582] Updated weights for policy 0, policy_version 40947 (0.0008) +[2026-06-02 17:05:47,583][262582] Updated weights for policy 0, policy_version 40957 (0.0008) +[2026-06-02 17:05:47,777][262582] Updated weights for policy 0, policy_version 40967 (0.0008) +[2026-06-02 17:05:47,988][262582] Updated weights for policy 0, policy_version 40977 (0.0009) +[2026-06-02 17:05:48,191][262582] Updated weights for policy 0, policy_version 40987 (0.0008) +[2026-06-02 17:05:48,878][262582] Updated weights for policy 0, policy_version 40997 (0.0008) +[2026-06-02 17:05:49,072][262582] Updated weights for policy 0, policy_version 41007 (0.0008) +[2026-06-02 17:05:49,280][262582] Updated weights for policy 0, policy_version 41017 (0.0007) +[2026-06-02 17:05:49,484][262582] Updated weights for policy 0, policy_version 41027 (0.0008) +[2026-06-02 17:05:49,708][262582] Updated weights for policy 0, policy_version 41038 (0.0010) +[2026-06-02 17:05:49,913][262582] Updated weights for policy 0, policy_version 41048 (0.0010) +[2026-06-02 17:05:50,601][262582] Updated weights for policy 0, policy_version 41058 (0.0008) +[2026-06-02 17:05:50,792][262582] Updated weights for policy 0, policy_version 41068 (0.0004) +[2026-06-02 17:05:51,007][260776] Fps is (10 sec: 19660.7, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 21037056. Throughput: 0: 18286.9. Samples: 21023616. Policy #0 lag: (min: 52.0, avg: 69.3, max: 116.0) +[2026-06-02 17:05:51,008][260776] Avg episode reward: [(0, '1658.478')] +[2026-06-02 17:05:51,011][262582] Updated weights for policy 0, policy_version 41079 (0.0006) +[2026-06-02 17:05:51,204][262582] Updated weights for policy 0, policy_version 41089 (0.0009) +[2026-06-02 17:05:51,416][262582] Updated weights for policy 0, policy_version 41099 (0.0008) +[2026-06-02 17:05:51,612][262582] Updated weights for policy 0, policy_version 41109 (0.0006) +[2026-06-02 17:05:51,837][262582] Updated weights for policy 0, policy_version 41120 (0.0006) +[2026-06-02 17:05:52,520][262582] Updated weights for policy 0, policy_version 41130 (0.0008) +[2026-06-02 17:05:52,729][262582] Updated weights for policy 0, policy_version 41140 (0.0009) +[2026-06-02 17:05:52,934][262582] Updated weights for policy 0, policy_version 41150 (0.0008) +[2026-06-02 17:05:53,141][262582] Updated weights for policy 0, policy_version 41160 (0.0009) +[2026-06-02 17:05:53,342][262582] Updated weights for policy 0, policy_version 41170 (0.0008) +[2026-06-02 17:05:53,539][262582] Updated weights for policy 0, policy_version 41180 (0.0008) +[2026-06-02 17:05:54,228][262582] Updated weights for policy 0, policy_version 41190 (0.0009) +[2026-06-02 17:05:54,416][262582] Updated weights for policy 0, policy_version 41200 (0.0008) +[2026-06-02 17:05:54,635][262582] Updated weights for policy 0, policy_version 41210 (0.0009) +[2026-06-02 17:05:54,829][262582] Updated weights for policy 0, policy_version 41220 (0.0008) +[2026-06-02 17:05:55,035][262582] Updated weights for policy 0, policy_version 41230 (0.0008) +[2026-06-02 17:05:55,243][262582] Updated weights for policy 0, policy_version 41240 (0.0009) +[2026-06-02 17:05:55,928][262582] Updated weights for policy 0, policy_version 41250 (0.0009) +[2026-06-02 17:05:56,007][260776] Fps is (10 sec: 19660.9, 60 sec: 18568.6, 300 sec: 18438.9). Total num frames: 21135360. Throughput: 0: 18574.2. Samples: 21141504. Policy #0 lag: (min: 19.0, avg: 50.1, max: 83.0) +[2026-06-02 17:05:56,008][260776] Avg episode reward: [(0, '1655.780')] +[2026-06-02 17:05:56,114][262582] Updated weights for policy 0, policy_version 41260 (0.0008) +[2026-06-02 17:05:56,315][262582] Updated weights for policy 0, policy_version 41270 (0.0009) +[2026-06-02 17:05:56,504][262582] Updated weights for policy 0, policy_version 41280 (0.0009) +[2026-06-02 17:05:56,720][262582] Updated weights for policy 0, policy_version 41290 (0.0009) +[2026-06-02 17:05:56,924][262582] Updated weights for policy 0, policy_version 41300 (0.0009) +[2026-06-02 17:05:57,151][262582] Updated weights for policy 0, policy_version 41311 (0.0009) +[2026-06-02 17:05:57,824][262582] Updated weights for policy 0, policy_version 41321 (0.0008) +[2026-06-02 17:05:58,017][262582] Updated weights for policy 0, policy_version 41331 (0.0009) +[2026-06-02 17:05:58,231][262582] Updated weights for policy 0, policy_version 41341 (0.0009) +[2026-06-02 17:05:58,433][262582] Updated weights for policy 0, policy_version 41351 (0.0009) +[2026-06-02 17:05:58,652][262582] Updated weights for policy 0, policy_version 41362 (0.0009) +[2026-06-02 17:05:58,862][262582] Updated weights for policy 0, policy_version 41372 (0.0009) +[2026-06-02 17:05:59,540][262582] Updated weights for policy 0, policy_version 41382 (0.0009) +[2026-06-02 17:05:59,736][262582] Updated weights for policy 0, policy_version 41392 (0.0008) +[2026-06-02 17:05:59,962][262582] Updated weights for policy 0, policy_version 41403 (0.0009) +[2026-06-02 17:06:00,156][262582] Updated weights for policy 0, policy_version 41413 (0.0008) +[2026-06-02 17:06:00,368][262582] Updated weights for policy 0, policy_version 41423 (0.0009) +[2026-06-02 17:06:00,572][262582] Updated weights for policy 0, policy_version 41433 (0.0008) +[2026-06-02 17:06:01,007][260776] Fps is (10 sec: 19660.9, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 21233664. Throughput: 0: 18295.5. Samples: 21246976. Policy #0 lag: (min: 19.0, avg: 50.1, max: 83.0) +[2026-06-02 17:06:01,008][260776] Avg episode reward: [(0, '1671.841')] +[2026-06-02 17:06:01,013][262026] Saving new best policy, reward=1671.841! +[2026-06-02 17:06:01,282][262582] Updated weights for policy 0, policy_version 41443 (0.0009) +[2026-06-02 17:06:01,465][262582] Updated weights for policy 0, policy_version 41453 (0.0008) +[2026-06-02 17:06:01,674][262582] Updated weights for policy 0, policy_version 41463 (0.0008) +[2026-06-02 17:06:01,879][262582] Updated weights for policy 0, policy_version 41473 (0.0008) +[2026-06-02 17:06:02,102][262582] Updated weights for policy 0, policy_version 41484 (0.0009) +[2026-06-02 17:06:02,305][262582] Updated weights for policy 0, policy_version 41494 (0.0008) +[2026-06-02 17:06:02,500][262582] Updated weights for policy 0, policy_version 41504 (0.0008) +[2026-06-02 17:06:03,139][262582] Updated weights for policy 0, policy_version 41514 (0.0008) +[2026-06-02 17:06:03,364][262582] Updated weights for policy 0, policy_version 41525 (0.0008) +[2026-06-02 17:06:03,565][262582] Updated weights for policy 0, policy_version 41535 (0.0008) +[2026-06-02 17:06:03,764][262582] Updated weights for policy 0, policy_version 41545 (0.0008) +[2026-06-02 17:06:03,971][262582] Updated weights for policy 0, policy_version 41555 (0.0008) +[2026-06-02 17:06:04,171][262582] Updated weights for policy 0, policy_version 41565 (0.0008) +[2026-06-02 17:06:04,845][262582] Updated weights for policy 0, policy_version 41575 (0.0008) +[2026-06-02 17:06:05,072][262582] Updated weights for policy 0, policy_version 41586 (0.0008) +[2026-06-02 17:06:05,274][262582] Updated weights for policy 0, policy_version 41596 (0.0008) +[2026-06-02 17:06:05,471][262582] Updated weights for policy 0, policy_version 41606 (0.0008) +[2026-06-02 17:06:05,682][262582] Updated weights for policy 0, policy_version 41616 (0.0009) +[2026-06-02 17:06:05,908][262582] Updated weights for policy 0, policy_version 41627 (0.0008) +[2026-06-02 17:06:06,007][260776] Fps is (10 sec: 19661.0, 60 sec: 18568.6, 300 sec: 18550.0). Total num frames: 21331968. Throughput: 0: 18437.7. Samples: 21305088. Policy #0 lag: (min: 19.0, avg: 50.1, max: 83.0) +[2026-06-02 17:06:06,007][260776] Avg episode reward: [(0, '1665.524')] +[2026-06-02 17:06:06,579][262582] Updated weights for policy 0, policy_version 41637 (0.0009) +[2026-06-02 17:06:06,789][262582] Updated weights for policy 0, policy_version 41648 (0.0009) +[2026-06-02 17:06:06,988][262582] Updated weights for policy 0, policy_version 41658 (0.0009) +[2026-06-02 17:06:07,194][262582] Updated weights for policy 0, policy_version 41668 (0.0008) +[2026-06-02 17:06:07,397][262582] Updated weights for policy 0, policy_version 41678 (0.0008) +[2026-06-02 17:06:07,598][262582] Updated weights for policy 0, policy_version 41688 (0.0008) +[2026-06-02 17:06:08,315][262582] Updated weights for policy 0, policy_version 41698 (0.0008) +[2026-06-02 17:06:08,501][262582] Updated weights for policy 0, policy_version 41708 (0.0009) +[2026-06-02 17:06:08,692][262582] Updated weights for policy 0, policy_version 41718 (0.0008) +[2026-06-02 17:06:08,896][262582] Updated weights for policy 0, policy_version 41728 (0.0008) +[2026-06-02 17:06:09,103][262582] Updated weights for policy 0, policy_version 41738 (0.0008) +[2026-06-02 17:06:09,310][262582] Updated weights for policy 0, policy_version 41748 (0.0009) +[2026-06-02 17:06:09,514][262582] Updated weights for policy 0, policy_version 41758 (0.0008) +[2026-06-02 17:06:10,187][262582] Updated weights for policy 0, policy_version 41768 (0.0008) +[2026-06-02 17:06:10,381][262582] Updated weights for policy 0, policy_version 41778 (0.0008) +[2026-06-02 17:06:10,579][262582] Updated weights for policy 0, policy_version 41788 (0.0008) +[2026-06-02 17:06:10,793][262582] Updated weights for policy 0, policy_version 41798 (0.0008) +[2026-06-02 17:06:10,995][262582] Updated weights for policy 0, policy_version 41808 (0.0008) +[2026-06-02 17:06:11,007][260776] Fps is (10 sec: 16384.0, 60 sec: 18022.4, 300 sec: 18438.9). Total num frames: 21397504. Throughput: 0: 18272.7. Samples: 21410944. Policy #0 lag: (min: 19.0, avg: 50.1, max: 83.0) +[2026-06-02 17:06:11,008][260776] Avg episode reward: [(0, '1666.005')] +[2026-06-02 17:06:11,200][262582] Updated weights for policy 0, policy_version 41818 (0.0008) +[2026-06-02 17:06:11,922][262582] Updated weights for policy 0, policy_version 41828 (0.0008) +[2026-06-02 17:06:12,124][262582] Updated weights for policy 0, policy_version 41838 (0.0008) +[2026-06-02 17:06:12,325][262582] Updated weights for policy 0, policy_version 41848 (0.0008) +[2026-06-02 17:06:12,524][262582] Updated weights for policy 0, policy_version 41858 (0.0008) +[2026-06-02 17:06:12,727][262582] Updated weights for policy 0, policy_version 41868 (0.0008) +[2026-06-02 17:06:12,941][262582] Updated weights for policy 0, policy_version 41878 (0.0008) +[2026-06-02 17:06:13,137][262582] Updated weights for policy 0, policy_version 41888 (0.0008) +[2026-06-02 17:06:13,791][262582] Updated weights for policy 0, policy_version 41898 (0.0008) +[2026-06-02 17:06:13,989][262582] Updated weights for policy 0, policy_version 41908 (0.0008) +[2026-06-02 17:06:14,207][262582] Updated weights for policy 0, policy_version 41918 (0.0008) +[2026-06-02 17:06:14,402][262582] Updated weights for policy 0, policy_version 41928 (0.0008) +[2026-06-02 17:06:14,615][262582] Updated weights for policy 0, policy_version 41938 (0.0008) +[2026-06-02 17:06:14,819][262582] Updated weights for policy 0, policy_version 41948 (0.0009) +[2026-06-02 17:06:15,509][262582] Updated weights for policy 0, policy_version 41958 (0.0008) +[2026-06-02 17:06:15,698][262582] Updated weights for policy 0, policy_version 41968 (0.0008) +[2026-06-02 17:06:15,909][262582] Updated weights for policy 0, policy_version 41978 (0.0008) +[2026-06-02 17:06:16,007][260776] Fps is (10 sec: 16383.8, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 21495808. Throughput: 0: 18551.4. Samples: 21528960. Policy #0 lag: (min: 19.0, avg: 50.1, max: 83.0) +[2026-06-02 17:06:16,008][260776] Avg episode reward: [(0, '1701.729')] +[2026-06-02 17:06:16,106][262582] Updated weights for policy 0, policy_version 41988 (0.0008) +[2026-06-02 17:06:16,308][262582] Updated weights for policy 0, policy_version 41998 (0.0008) +[2026-06-02 17:06:16,527][262582] Updated weights for policy 0, policy_version 42009 (0.0008) +[2026-06-02 17:06:16,667][262026] Saving new best policy, reward=1701.729! +[2026-06-02 17:06:17,225][262582] Updated weights for policy 0, policy_version 42019 (0.0008) +[2026-06-02 17:06:17,401][262582] Updated weights for policy 0, policy_version 42029 (0.0004) +[2026-06-02 17:06:17,606][262582] Updated weights for policy 0, policy_version 42039 (0.0004) +[2026-06-02 17:06:17,809][262582] Updated weights for policy 0, policy_version 42049 (0.0004) +[2026-06-02 17:06:18,013][262582] Updated weights for policy 0, policy_version 42059 (0.0004) +[2026-06-02 17:06:18,210][262582] Updated weights for policy 0, policy_version 42069 (0.0004) +[2026-06-02 17:06:18,415][262582] Updated weights for policy 0, policy_version 42079 (0.0004) +[2026-06-02 17:06:19,079][262582] Updated weights for policy 0, policy_version 42089 (0.0005) +[2026-06-02 17:06:19,301][262582] Updated weights for policy 0, policy_version 42100 (0.0007) +[2026-06-02 17:06:19,499][262582] Updated weights for policy 0, policy_version 42110 (0.0004) +[2026-06-02 17:06:19,702][262582] Updated weights for policy 0, policy_version 42120 (0.0004) +[2026-06-02 17:06:19,912][262582] Updated weights for policy 0, policy_version 42130 (0.0004) +[2026-06-02 17:06:20,119][262582] Updated weights for policy 0, policy_version 42140 (0.0004) +[2026-06-02 17:06:20,802][262582] Updated weights for policy 0, policy_version 42150 (0.0007) +[2026-06-02 17:06:21,007][260776] Fps is (10 sec: 19660.7, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 21594112. Throughput: 0: 18264.2. Samples: 21575680. Policy #0 lag: (min: 19.0, avg: 50.1, max: 83.0) +[2026-06-02 17:06:21,008][260776] Avg episode reward: [(0, '1682.944')] +[2026-06-02 17:06:21,012][262582] Updated weights for policy 0, policy_version 42160 (0.0009) +[2026-06-02 17:06:21,209][262582] Updated weights for policy 0, policy_version 42170 (0.0008) +[2026-06-02 17:06:21,414][262582] Updated weights for policy 0, policy_version 42180 (0.0005) +[2026-06-02 17:06:21,615][262582] Updated weights for policy 0, policy_version 42190 (0.0004) +[2026-06-02 17:06:21,818][262582] Updated weights for policy 0, policy_version 42200 (0.0008) +[2026-06-02 17:06:22,485][262582] Updated weights for policy 0, policy_version 42210 (0.0005) +[2026-06-02 17:06:22,677][262582] Updated weights for policy 0, policy_version 42220 (0.0008) +[2026-06-02 17:06:22,880][262582] Updated weights for policy 0, policy_version 42230 (0.0010) +[2026-06-02 17:06:23,085][262582] Updated weights for policy 0, policy_version 42240 (0.0008) +[2026-06-02 17:06:23,290][262582] Updated weights for policy 0, policy_version 42250 (0.0008) +[2026-06-02 17:06:23,483][262582] Updated weights for policy 0, policy_version 42260 (0.0009) +[2026-06-02 17:06:23,687][262582] Updated weights for policy 0, policy_version 42270 (0.0008) +[2026-06-02 17:06:24,393][262582] Updated weights for policy 0, policy_version 42281 (0.0008) +[2026-06-02 17:06:24,595][262582] Updated weights for policy 0, policy_version 42291 (0.0008) +[2026-06-02 17:06:24,801][262582] Updated weights for policy 0, policy_version 42301 (0.0008) +[2026-06-02 17:06:24,995][262582] Updated weights for policy 0, policy_version 42311 (0.0008) +[2026-06-02 17:06:25,201][262582] Updated weights for policy 0, policy_version 42321 (0.0009) +[2026-06-02 17:06:25,413][262582] Updated weights for policy 0, policy_version 42331 (0.0009) +[2026-06-02 17:06:26,007][260776] Fps is (10 sec: 19660.9, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 21692416. Throughput: 0: 18540.1. Samples: 21694208. Policy #0 lag: (min: 6.0, avg: 23.1, max: 70.0) +[2026-06-02 17:06:26,008][260776] Avg episode reward: [(0, '1707.124')] +[2026-06-02 17:06:26,099][262582] Updated weights for policy 0, policy_version 42341 (0.0008) +[2026-06-02 17:06:26,292][262582] Updated weights for policy 0, policy_version 42351 (0.0008) +[2026-06-02 17:06:26,500][262582] Updated weights for policy 0, policy_version 42361 (0.0009) +[2026-06-02 17:06:26,702][262582] Updated weights for policy 0, policy_version 42371 (0.0009) +[2026-06-02 17:06:26,905][262582] Updated weights for policy 0, policy_version 42381 (0.0009) +[2026-06-02 17:06:27,110][262582] Updated weights for policy 0, policy_version 42391 (0.0009) +[2026-06-02 17:06:27,280][262026] Saving new best policy, reward=1707.124! +[2026-06-02 17:06:27,807][262582] Updated weights for policy 0, policy_version 42401 (0.0008) +[2026-06-02 17:06:27,985][262582] Updated weights for policy 0, policy_version 42411 (0.0004) +[2026-06-02 17:06:28,181][262582] Updated weights for policy 0, policy_version 42421 (0.0004) +[2026-06-02 17:06:28,383][262582] Updated weights for policy 0, policy_version 42431 (0.0007) +[2026-06-02 17:06:28,589][262582] Updated weights for policy 0, policy_version 42441 (0.0008) +[2026-06-02 17:06:28,792][262582] Updated weights for policy 0, policy_version 42451 (0.0008) +[2026-06-02 17:06:28,993][262582] Updated weights for policy 0, policy_version 42461 (0.0008) +[2026-06-02 17:06:29,684][262582] Updated weights for policy 0, policy_version 42471 (0.0008) +[2026-06-02 17:06:29,883][262582] Updated weights for policy 0, policy_version 42481 (0.0008) +[2026-06-02 17:06:30,094][262582] Updated weights for policy 0, policy_version 42491 (0.0008) +[2026-06-02 17:06:30,291][262582] Updated weights for policy 0, policy_version 42501 (0.0009) +[2026-06-02 17:06:30,494][262582] Updated weights for policy 0, policy_version 42511 (0.0008) +[2026-06-02 17:06:30,694][262582] Updated weights for policy 0, policy_version 42521 (0.0008) +[2026-06-02 17:06:31,007][260776] Fps is (10 sec: 19660.9, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 21790720. Throughput: 0: 18318.2. Samples: 21800832. Policy #0 lag: (min: 6.0, avg: 23.1, max: 70.0) +[2026-06-02 17:06:31,008][260776] Avg episode reward: [(0, '1721.587')] +[2026-06-02 17:06:31,012][262026] Saving new best policy, reward=1721.587! +[2026-06-02 17:06:31,397][262582] Updated weights for policy 0, policy_version 42531 (0.0009) +[2026-06-02 17:06:31,580][262582] Updated weights for policy 0, policy_version 42541 (0.0008) +[2026-06-02 17:06:31,779][262582] Updated weights for policy 0, policy_version 42551 (0.0008) +[2026-06-02 17:06:32,006][262582] Updated weights for policy 0, policy_version 42562 (0.0008) +[2026-06-02 17:06:32,208][262582] Updated weights for policy 0, policy_version 42572 (0.0008) +[2026-06-02 17:06:32,412][262582] Updated weights for policy 0, policy_version 42582 (0.0008) +[2026-06-02 17:06:32,607][262582] Updated weights for policy 0, policy_version 42592 (0.0008) +[2026-06-02 17:06:33,310][262582] Updated weights for policy 0, policy_version 42602 (0.0008) +[2026-06-02 17:06:33,510][262582] Updated weights for policy 0, policy_version 42612 (0.0008) +[2026-06-02 17:06:33,712][262582] Updated weights for policy 0, policy_version 42622 (0.0008) +[2026-06-02 17:06:33,913][262582] Updated weights for policy 0, policy_version 42632 (0.0008) +[2026-06-02 17:06:34,109][262582] Updated weights for policy 0, policy_version 42642 (0.0008) +[2026-06-02 17:06:34,317][262582] Updated weights for policy 0, policy_version 42652 (0.0008) +[2026-06-02 17:06:35,016][262582] Updated weights for policy 0, policy_version 42662 (0.0008) +[2026-06-02 17:06:35,203][262582] Updated weights for policy 0, policy_version 42672 (0.0008) +[2026-06-02 17:06:35,406][262582] Updated weights for policy 0, policy_version 42682 (0.0008) +[2026-06-02 17:06:35,608][262582] Updated weights for policy 0, policy_version 42692 (0.0008) +[2026-06-02 17:06:35,811][262582] Updated weights for policy 0, policy_version 42702 (0.0008) +[2026-06-02 17:06:36,007][260776] Fps is (10 sec: 16383.9, 60 sec: 18022.4, 300 sec: 18438.9). Total num frames: 21856256. Throughput: 0: 18588.5. Samples: 21860096. Policy #0 lag: (min: 6.0, avg: 23.1, max: 70.0) +[2026-06-02 17:06:36,008][260776] Avg episode reward: [(0, '1798.539')] +[2026-06-02 17:06:36,036][262582] Updated weights for policy 0, policy_version 42713 (0.0008) +[2026-06-02 17:06:36,168][262026] Saving results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs5_seed12/checkpoint_p0/checkpoint_000042720_21889024.pth... +[2026-06-02 17:06:36,186][262026] Saving new best policy, reward=1798.539! +[2026-06-02 17:06:36,732][262582] Updated weights for policy 0, policy_version 42723 (0.0008) +[2026-06-02 17:06:36,919][262582] Updated weights for policy 0, policy_version 42733 (0.0008) +[2026-06-02 17:06:37,116][262582] Updated weights for policy 0, policy_version 42743 (0.0008) +[2026-06-02 17:06:37,317][262582] Updated weights for policy 0, policy_version 42753 (0.0008) +[2026-06-02 17:06:37,512][262582] Updated weights for policy 0, policy_version 42763 (0.0008) +[2026-06-02 17:06:37,726][262582] Updated weights for policy 0, policy_version 42773 (0.0008) +[2026-06-02 17:06:37,926][262582] Updated weights for policy 0, policy_version 42783 (0.0008) +[2026-06-02 17:06:38,640][262582] Updated weights for policy 0, policy_version 42793 (0.0008) +[2026-06-02 17:06:38,827][262582] Updated weights for policy 0, policy_version 42803 (0.0008) +[2026-06-02 17:06:39,035][262582] Updated weights for policy 0, policy_version 42813 (0.0008) +[2026-06-02 17:06:39,236][262582] Updated weights for policy 0, policy_version 42823 (0.0009) +[2026-06-02 17:06:39,436][262582] Updated weights for policy 0, policy_version 42833 (0.0008) +[2026-06-02 17:06:39,674][262582] Updated weights for policy 0, policy_version 42844 (0.0008) +[2026-06-02 17:06:40,345][262582] Updated weights for policy 0, policy_version 42854 (0.0008) +[2026-06-02 17:06:40,558][262582] Updated weights for policy 0, policy_version 42865 (0.0008) +[2026-06-02 17:06:40,754][262582] Updated weights for policy 0, policy_version 42875 (0.0008) +[2026-06-02 17:06:40,966][262582] Updated weights for policy 0, policy_version 42885 (0.0008) +[2026-06-02 17:06:41,007][260776] Fps is (10 sec: 16384.0, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 21954560. Throughput: 0: 18335.3. Samples: 21966592. Policy #0 lag: (min: 6.0, avg: 23.1, max: 70.0) +[2026-06-02 17:06:41,008][260776] Avg episode reward: [(0, '1845.384')] +[2026-06-02 17:06:41,164][262582] Updated weights for policy 0, policy_version 42895 (0.0008) +[2026-06-02 17:06:41,370][262582] Updated weights for policy 0, policy_version 42905 (0.0008) +[2026-06-02 17:06:41,498][262026] Saving new best policy, reward=1845.384! +[2026-06-02 17:06:42,087][262582] Updated weights for policy 0, policy_version 42915 (0.0008) +[2026-06-02 17:06:42,269][262582] Updated weights for policy 0, policy_version 42925 (0.0008) +[2026-06-02 17:06:42,471][262582] Updated weights for policy 0, policy_version 42935 (0.0008) +[2026-06-02 17:06:42,679][262582] Updated weights for policy 0, policy_version 42945 (0.0008) +[2026-06-02 17:06:42,884][262582] Updated weights for policy 0, policy_version 42955 (0.0008) +[2026-06-02 17:06:43,084][262582] Updated weights for policy 0, policy_version 42965 (0.0008) +[2026-06-02 17:06:43,289][262582] Updated weights for policy 0, policy_version 42975 (0.0008) +[2026-06-02 17:06:43,964][262582] Updated weights for policy 0, policy_version 42985 (0.0008) +[2026-06-02 17:06:44,162][262582] Updated weights for policy 0, policy_version 42995 (0.0008) +[2026-06-02 17:06:44,366][262582] Updated weights for policy 0, policy_version 43005 (0.0008) +[2026-06-02 17:06:44,563][262582] Updated weights for policy 0, policy_version 43015 (0.0008) +[2026-06-02 17:06:44,764][262582] Updated weights for policy 0, policy_version 43025 (0.0009) +[2026-06-02 17:06:44,979][262582] Updated weights for policy 0, policy_version 43035 (0.0008) +[2026-06-02 17:06:45,647][262582] Updated weights for policy 0, policy_version 43045 (0.0008) +[2026-06-02 17:06:45,837][262582] Updated weights for policy 0, policy_version 43055 (0.0008) +[2026-06-02 17:06:46,007][260776] Fps is (10 sec: 19660.7, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 22052864. Throughput: 0: 18451.9. Samples: 22077312. Policy #0 lag: (min: 6.0, avg: 23.1, max: 70.0) +[2026-06-02 17:06:46,008][260776] Avg episode reward: [(0, '1857.422')] +[2026-06-02 17:06:46,047][262582] Updated weights for policy 0, policy_version 43065 (0.0008) +[2026-06-02 17:06:46,250][262582] Updated weights for policy 0, policy_version 43075 (0.0009) +[2026-06-02 17:06:46,452][262582] Updated weights for policy 0, policy_version 43085 (0.0008) +[2026-06-02 17:06:46,656][262582] Updated weights for policy 0, policy_version 43095 (0.0009) +[2026-06-02 17:06:46,834][262026] Saving new best policy, reward=1857.422! +[2026-06-02 17:06:47,352][262582] Updated weights for policy 0, policy_version 43105 (0.0009) +[2026-06-02 17:06:47,542][262582] Updated weights for policy 0, policy_version 43115 (0.0008) +[2026-06-02 17:06:47,743][262582] Updated weights for policy 0, policy_version 43125 (0.0009) +[2026-06-02 17:06:47,932][262582] Updated weights for policy 0, policy_version 43135 (0.0008) +[2026-06-02 17:06:48,144][262582] Updated weights for policy 0, policy_version 43145 (0.0008) +[2026-06-02 17:06:48,338][262582] Updated weights for policy 0, policy_version 43155 (0.0008) +[2026-06-02 17:06:48,541][262582] Updated weights for policy 0, policy_version 43165 (0.0008) +[2026-06-02 17:06:49,268][262582] Updated weights for policy 0, policy_version 43177 (0.0008) +[2026-06-02 17:06:49,470][262582] Updated weights for policy 0, policy_version 43187 (0.0008) +[2026-06-02 17:06:49,667][262582] Updated weights for policy 0, policy_version 43197 (0.0008) +[2026-06-02 17:06:49,879][262582] Updated weights for policy 0, policy_version 43207 (0.0008) +[2026-06-02 17:06:50,088][262582] Updated weights for policy 0, policy_version 43217 (0.0009) +[2026-06-02 17:06:50,291][262582] Updated weights for policy 0, policy_version 43227 (0.0008) +[2026-06-02 17:06:50,977][262582] Updated weights for policy 0, policy_version 43237 (0.0009) +[2026-06-02 17:06:51,007][260776] Fps is (10 sec: 19660.6, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 22151168. Throughput: 0: 18343.8. Samples: 22130560. Policy #0 lag: (min: 39.0, avg: 56.0, max: 103.0) +[2026-06-02 17:06:51,008][260776] Avg episode reward: [(0, '1867.820')] +[2026-06-02 17:06:51,162][262582] Updated weights for policy 0, policy_version 43247 (0.0008) +[2026-06-02 17:06:51,371][262582] Updated weights for policy 0, policy_version 43257 (0.0008) +[2026-06-02 17:06:51,581][262582] Updated weights for policy 0, policy_version 43267 (0.0008) +[2026-06-02 17:06:51,783][262582] Updated weights for policy 0, policy_version 43277 (0.0010) +[2026-06-02 17:06:51,980][262582] Updated weights for policy 0, policy_version 43287 (0.0009) +[2026-06-02 17:06:52,164][262026] Saving new best policy, reward=1867.820! +[2026-06-02 17:06:52,693][262582] Updated weights for policy 0, policy_version 43297 (0.0009) +[2026-06-02 17:06:52,892][262582] Updated weights for policy 0, policy_version 43308 (0.0008) +[2026-06-02 17:06:53,096][262582] Updated weights for policy 0, policy_version 43318 (0.0008) +[2026-06-02 17:06:53,291][262582] Updated weights for policy 0, policy_version 43328 (0.0008) +[2026-06-02 17:06:53,505][262582] Updated weights for policy 0, policy_version 43338 (0.0008) +[2026-06-02 17:06:53,699][262582] Updated weights for policy 0, policy_version 43348 (0.0008) +[2026-06-02 17:06:53,910][262582] Updated weights for policy 0, policy_version 43358 (0.0008) +[2026-06-02 17:06:54,592][262582] Updated weights for policy 0, policy_version 43368 (0.0008) +[2026-06-02 17:06:54,790][262582] Updated weights for policy 0, policy_version 43378 (0.0008) +[2026-06-02 17:06:54,989][262582] Updated weights for policy 0, policy_version 43388 (0.0008) +[2026-06-02 17:06:55,194][262582] Updated weights for policy 0, policy_version 43398 (0.0008) +[2026-06-02 17:06:55,394][262582] Updated weights for policy 0, policy_version 43408 (0.0009) +[2026-06-02 17:06:55,595][262582] Updated weights for policy 0, policy_version 43418 (0.0008) +[2026-06-02 17:06:56,007][260776] Fps is (10 sec: 19660.9, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 22249472. Throughput: 0: 18588.4. Samples: 22247424. Policy #0 lag: (min: 39.0, avg: 56.0, max: 103.0) +[2026-06-02 17:06:56,008][260776] Avg episode reward: [(0, '1918.586')] +[2026-06-02 17:06:56,014][262026] Saving new best policy, reward=1918.586! +[2026-06-02 17:06:56,298][262582] Updated weights for policy 0, policy_version 43428 (0.0008) +[2026-06-02 17:06:56,483][262582] Updated weights for policy 0, policy_version 43438 (0.0008) +[2026-06-02 17:06:56,694][262582] Updated weights for policy 0, policy_version 43448 (0.0008) +[2026-06-02 17:06:56,891][262582] Updated weights for policy 0, policy_version 43458 (0.0008) +[2026-06-02 17:06:57,099][262582] Updated weights for policy 0, policy_version 43468 (0.0009) +[2026-06-02 17:06:57,303][262582] Updated weights for policy 0, policy_version 43478 (0.0008) +[2026-06-02 17:06:57,501][262582] Updated weights for policy 0, policy_version 43488 (0.0008) +[2026-06-02 17:06:58,187][262582] Updated weights for policy 0, policy_version 43498 (0.0007) +[2026-06-02 17:06:58,382][262582] Updated weights for policy 0, policy_version 43508 (0.0005) +[2026-06-02 17:06:58,568][262582] Updated weights for policy 0, policy_version 43518 (0.0004) +[2026-06-02 17:06:58,785][262582] Updated weights for policy 0, policy_version 43528 (0.0004) +[2026-06-02 17:06:58,988][262582] Updated weights for policy 0, policy_version 43538 (0.0004) +[2026-06-02 17:06:59,192][262582] Updated weights for policy 0, policy_version 43548 (0.0004) +[2026-06-02 17:06:59,872][262582] Updated weights for policy 0, policy_version 43558 (0.0004) +[2026-06-02 17:07:00,061][262582] Updated weights for policy 0, policy_version 43568 (0.0004) +[2026-06-02 17:07:00,268][262582] Updated weights for policy 0, policy_version 43578 (0.0006) +[2026-06-02 17:07:00,466][262582] Updated weights for policy 0, policy_version 43588 (0.0007) +[2026-06-02 17:07:00,663][262582] Updated weights for policy 0, policy_version 43598 (0.0008) +[2026-06-02 17:07:00,863][262582] Updated weights for policy 0, policy_version 43608 (0.0008) +[2026-06-02 17:07:01,007][260776] Fps is (10 sec: 16384.0, 60 sec: 18022.4, 300 sec: 18438.9). Total num frames: 22315008. Throughput: 0: 18375.1. Samples: 22355840. Policy #0 lag: (min: 39.0, avg: 56.0, max: 103.0) +[2026-06-02 17:07:01,008][260776] Avg episode reward: [(0, '1933.344')] +[2026-06-02 17:07:01,020][262026] Saving new best policy, reward=1933.344! +[2026-06-02 17:07:01,580][262582] Updated weights for policy 0, policy_version 43618 (0.0005) +[2026-06-02 17:07:01,773][262582] Updated weights for policy 0, policy_version 43628 (0.0004) +[2026-06-02 17:07:01,971][262582] Updated weights for policy 0, policy_version 43638 (0.0004) +[2026-06-02 17:07:02,178][262582] Updated weights for policy 0, policy_version 43648 (0.0004) +[2026-06-02 17:07:02,374][262582] Updated weights for policy 0, policy_version 43658 (0.0007) +[2026-06-02 17:07:02,603][262582] Updated weights for policy 0, policy_version 43669 (0.0008) +[2026-06-02 17:07:02,803][262582] Updated weights for policy 0, policy_version 43679 (0.0009) +[2026-06-02 17:07:03,468][262582] Updated weights for policy 0, policy_version 43689 (0.0008) +[2026-06-02 17:07:03,665][262582] Updated weights for policy 0, policy_version 43699 (0.0008) +[2026-06-02 17:07:03,865][262582] Updated weights for policy 0, policy_version 43709 (0.0009) +[2026-06-02 17:07:04,087][262582] Updated weights for policy 0, policy_version 43720 (0.0008) +[2026-06-02 17:07:04,286][262582] Updated weights for policy 0, policy_version 43730 (0.0009) +[2026-06-02 17:07:04,498][262582] Updated weights for policy 0, policy_version 43740 (0.0008) +[2026-06-02 17:07:05,174][262582] Updated weights for policy 0, policy_version 43750 (0.0008) +[2026-06-02 17:07:05,360][262582] Updated weights for policy 0, policy_version 43760 (0.0008) +[2026-06-02 17:07:05,561][262582] Updated weights for policy 0, policy_version 43770 (0.0008) +[2026-06-02 17:07:05,769][262582] Updated weights for policy 0, policy_version 43780 (0.0009) +[2026-06-02 17:07:05,983][262582] Updated weights for policy 0, policy_version 43790 (0.0008) +[2026-06-02 17:07:06,007][260776] Fps is (10 sec: 16383.7, 60 sec: 18022.3, 300 sec: 18438.9). Total num frames: 22413312. Throughput: 0: 18659.5. Samples: 22415360. Policy #0 lag: (min: 39.0, avg: 56.0, max: 103.0) +[2026-06-02 17:07:06,008][260776] Avg episode reward: [(0, '1947.266')] +[2026-06-02 17:07:06,188][262582] Updated weights for policy 0, policy_version 43800 (0.0009) +[2026-06-02 17:07:06,338][262026] Saving new best policy, reward=1947.266! +[2026-06-02 17:07:06,850][262582] Updated weights for policy 0, policy_version 43810 (0.0008) +[2026-06-02 17:07:07,048][262582] Updated weights for policy 0, policy_version 43820 (0.0009) +[2026-06-02 17:07:07,252][262582] Updated weights for policy 0, policy_version 43830 (0.0008) +[2026-06-02 17:07:07,457][262582] Updated weights for policy 0, policy_version 43840 (0.0008) +[2026-06-02 17:07:07,661][262582] Updated weights for policy 0, policy_version 43850 (0.0009) +[2026-06-02 17:07:07,856][262582] Updated weights for policy 0, policy_version 43860 (0.0008) +[2026-06-02 17:07:08,089][262582] Updated weights for policy 0, policy_version 43871 (0.0008) +[2026-06-02 17:07:08,753][262582] Updated weights for policy 0, policy_version 43881 (0.0008) +[2026-06-02 17:07:08,955][262582] Updated weights for policy 0, policy_version 43891 (0.0009) +[2026-06-02 17:07:09,170][262582] Updated weights for policy 0, policy_version 43901 (0.0009) +[2026-06-02 17:07:09,371][262582] Updated weights for policy 0, policy_version 43911 (0.0008) +[2026-06-02 17:07:09,613][262582] Updated weights for policy 0, policy_version 43923 (0.0008) +[2026-06-02 17:07:09,820][262582] Updated weights for policy 0, policy_version 43933 (0.0008) +[2026-06-02 17:07:10,501][262582] Updated weights for policy 0, policy_version 43943 (0.0009) +[2026-06-02 17:07:10,715][262582] Updated weights for policy 0, policy_version 43954 (0.0009) +[2026-06-02 17:07:10,921][262582] Updated weights for policy 0, policy_version 43964 (0.0008) +[2026-06-02 17:07:11,007][260776] Fps is (10 sec: 19660.9, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 22511616. Throughput: 0: 18369.4. Samples: 22520832. Policy #0 lag: (min: 39.0, avg: 56.0, max: 103.0) +[2026-06-02 17:07:11,008][260776] Avg episode reward: [(0, '1947.266')] +[2026-06-02 17:07:11,122][262582] Updated weights for policy 0, policy_version 43974 (0.0009) +[2026-06-02 17:07:11,331][262582] Updated weights for policy 0, policy_version 43984 (0.0009) +[2026-06-02 17:07:11,526][262582] Updated weights for policy 0, policy_version 43994 (0.0009) +[2026-06-02 17:07:12,241][262582] Updated weights for policy 0, policy_version 44004 (0.0009) +[2026-06-02 17:07:12,431][262582] Updated weights for policy 0, policy_version 44014 (0.0009) +[2026-06-02 17:07:12,627][262582] Updated weights for policy 0, policy_version 44024 (0.0009) +[2026-06-02 17:07:12,836][262582] Updated weights for policy 0, policy_version 44034 (0.0009) +[2026-06-02 17:07:13,044][262582] Updated weights for policy 0, policy_version 44044 (0.0008) +[2026-06-02 17:07:13,250][262582] Updated weights for policy 0, policy_version 44054 (0.0008) +[2026-06-02 17:07:13,441][262582] Updated weights for policy 0, policy_version 44064 (0.0009) +[2026-06-02 17:07:14,108][262582] Updated weights for policy 0, policy_version 44074 (0.0009) +[2026-06-02 17:07:14,316][262582] Updated weights for policy 0, policy_version 44084 (0.0010) +[2026-06-02 17:07:14,521][262582] Updated weights for policy 0, policy_version 44094 (0.0009) +[2026-06-02 17:07:14,727][262582] Updated weights for policy 0, policy_version 44104 (0.0009) +[2026-06-02 17:07:14,927][262582] Updated weights for policy 0, policy_version 44114 (0.0009) +[2026-06-02 17:07:15,126][262582] Updated weights for policy 0, policy_version 44124 (0.0009) +[2026-06-02 17:07:15,833][262582] Updated weights for policy 0, policy_version 44135 (0.0009) +[2026-06-02 17:07:16,007][260776] Fps is (10 sec: 19660.9, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 22609920. Throughput: 0: 18360.8. Samples: 22627072. Policy #0 lag: (min: 39.0, avg: 56.0, max: 103.0) +[2026-06-02 17:07:16,008][260776] Avg episode reward: [(0, '1919.974')] +[2026-06-02 17:07:16,025][262582] Updated weights for policy 0, policy_version 44145 (0.0009) +[2026-06-02 17:07:16,230][262582] Updated weights for policy 0, policy_version 44155 (0.0009) +[2026-06-02 17:07:16,433][262582] Updated weights for policy 0, policy_version 44165 (0.0009) +[2026-06-02 17:07:16,633][262582] Updated weights for policy 0, policy_version 44175 (0.0008) +[2026-06-02 17:07:16,838][262582] Updated weights for policy 0, policy_version 44185 (0.0006) +[2026-06-02 17:07:17,546][262582] Updated weights for policy 0, policy_version 44195 (0.0009) +[2026-06-02 17:07:17,735][262582] Updated weights for policy 0, policy_version 44205 (0.0009) +[2026-06-02 17:07:17,933][262582] Updated weights for policy 0, policy_version 44215 (0.0009) +[2026-06-02 17:07:18,130][262582] Updated weights for policy 0, policy_version 44225 (0.0008) +[2026-06-02 17:07:18,345][262582] Updated weights for policy 0, policy_version 44235 (0.0009) +[2026-06-02 17:07:18,542][262582] Updated weights for policy 0, policy_version 44245 (0.0009) +[2026-06-02 17:07:18,751][262582] Updated weights for policy 0, policy_version 44255 (0.0009) +[2026-06-02 17:07:19,431][262582] Updated weights for policy 0, policy_version 44265 (0.0008) +[2026-06-02 17:07:19,656][262582] Updated weights for policy 0, policy_version 44276 (0.0008) +[2026-06-02 17:07:19,861][262582] Updated weights for policy 0, policy_version 44286 (0.0008) +[2026-06-02 17:07:20,070][262582] Updated weights for policy 0, policy_version 44296 (0.0008) +[2026-06-02 17:07:20,270][262582] Updated weights for policy 0, policy_version 44306 (0.0009) +[2026-06-02 17:07:20,465][262582] Updated weights for policy 0, policy_version 44316 (0.0008) +[2026-06-02 17:07:21,007][260776] Fps is (10 sec: 19660.2, 60 sec: 18568.4, 300 sec: 18438.9). Total num frames: 22708224. Throughput: 0: 18352.2. Samples: 22685952. Policy #0 lag: (min: 16.0, avg: 34.7, max: 80.0) +[2026-06-02 17:07:21,009][260776] Avg episode reward: [(0, '1949.498')] +[2026-06-02 17:07:21,152][262582] Updated weights for policy 0, policy_version 44326 (0.0008) +[2026-06-02 17:07:21,343][262582] Updated weights for policy 0, policy_version 44336 (0.0008) +[2026-06-02 17:07:21,544][262582] Updated weights for policy 0, policy_version 44346 (0.0008) +[2026-06-02 17:07:21,760][262582] Updated weights for policy 0, policy_version 44356 (0.0008) +[2026-06-02 17:07:21,958][262582] Updated weights for policy 0, policy_version 44366 (0.0008) +[2026-06-02 17:07:22,157][262582] Updated weights for policy 0, policy_version 44376 (0.0008) +[2026-06-02 17:07:22,319][262026] Saving new best policy, reward=1949.498! +[2026-06-02 17:07:22,867][262582] Updated weights for policy 0, policy_version 44386 (0.0008) +[2026-06-02 17:07:23,061][262582] Updated weights for policy 0, policy_version 44396 (0.0008) +[2026-06-02 17:07:23,253][262582] Updated weights for policy 0, policy_version 44406 (0.0008) +[2026-06-02 17:07:23,463][262582] Updated weights for policy 0, policy_version 44416 (0.0008) +[2026-06-02 17:07:23,670][262582] Updated weights for policy 0, policy_version 44426 (0.0008) +[2026-06-02 17:07:23,867][262582] Updated weights for policy 0, policy_version 44436 (0.0008) +[2026-06-02 17:07:24,069][262582] Updated weights for policy 0, policy_version 44446 (0.0008) +[2026-06-02 17:07:24,748][262582] Updated weights for policy 0, policy_version 44456 (0.0009) +[2026-06-02 17:07:24,939][262582] Updated weights for policy 0, policy_version 44466 (0.0008) +[2026-06-02 17:07:25,132][262582] Updated weights for policy 0, policy_version 44476 (0.0008) +[2026-06-02 17:07:25,346][262582] Updated weights for policy 0, policy_version 44486 (0.0008) +[2026-06-02 17:07:25,550][262582] Updated weights for policy 0, policy_version 44496 (0.0008) +[2026-06-02 17:07:25,753][262582] Updated weights for policy 0, policy_version 44506 (0.0008) +[2026-06-02 17:07:26,007][260776] Fps is (10 sec: 19661.0, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 22806528. Throughput: 0: 18457.6. Samples: 22797184. Policy #0 lag: (min: 16.0, avg: 34.7, max: 80.0) +[2026-06-02 17:07:26,008][260776] Avg episode reward: [(0, '1949.498')] +[2026-06-02 17:07:26,480][262582] Updated weights for policy 0, policy_version 44517 (0.0008) +[2026-06-02 17:07:26,670][262582] Updated weights for policy 0, policy_version 44527 (0.0008) +[2026-06-02 17:07:26,871][262582] Updated weights for policy 0, policy_version 44537 (0.0008) +[2026-06-02 17:07:27,072][262582] Updated weights for policy 0, policy_version 44547 (0.0009) +[2026-06-02 17:07:27,266][262582] Updated weights for policy 0, policy_version 44557 (0.0008) +[2026-06-02 17:07:27,473][262582] Updated weights for policy 0, policy_version 44567 (0.0008) +[2026-06-02 17:07:28,187][262582] Updated weights for policy 0, policy_version 44577 (0.0008) +[2026-06-02 17:07:28,408][262582] Updated weights for policy 0, policy_version 44589 (0.0008) +[2026-06-02 17:07:28,609][262582] Updated weights for policy 0, policy_version 44599 (0.0008) +[2026-06-02 17:07:28,813][262582] Updated weights for policy 0, policy_version 44609 (0.0008) +[2026-06-02 17:07:29,013][262582] Updated weights for policy 0, policy_version 44619 (0.0009) +[2026-06-02 17:07:29,218][262582] Updated weights for policy 0, policy_version 44629 (0.0008) +[2026-06-02 17:07:29,424][262582] Updated weights for policy 0, policy_version 44639 (0.0009) +[2026-06-02 17:07:30,101][262582] Updated weights for policy 0, policy_version 44649 (0.0008) +[2026-06-02 17:07:30,306][262582] Updated weights for policy 0, policy_version 44659 (0.0009) +[2026-06-02 17:07:30,509][262582] Updated weights for policy 0, policy_version 44669 (0.0008) +[2026-06-02 17:07:30,705][262582] Updated weights for policy 0, policy_version 44679 (0.0008) +[2026-06-02 17:07:30,926][262582] Updated weights for policy 0, policy_version 44690 (0.0008) +[2026-06-02 17:07:31,007][260776] Fps is (10 sec: 16384.5, 60 sec: 18022.4, 300 sec: 18438.9). Total num frames: 22872064. Throughput: 0: 18543.0. Samples: 22911744. Policy #0 lag: (min: 16.0, avg: 34.7, max: 80.0) +[2026-06-02 17:07:31,008][260776] Avg episode reward: [(0, '1952.571')] +[2026-06-02 17:07:31,132][262582] Updated weights for policy 0, policy_version 44700 (0.0008) +[2026-06-02 17:07:31,208][262026] Saving new best policy, reward=1952.571! +[2026-06-02 17:07:31,839][262582] Updated weights for policy 0, policy_version 44710 (0.0008) +[2026-06-02 17:07:32,031][262582] Updated weights for policy 0, policy_version 44720 (0.0008) +[2026-06-02 17:07:32,251][262582] Updated weights for policy 0, policy_version 44731 (0.0008) +[2026-06-02 17:07:32,455][262582] Updated weights for policy 0, policy_version 44741 (0.0008) +[2026-06-02 17:07:32,655][262582] Updated weights for policy 0, policy_version 44751 (0.0008) +[2026-06-02 17:07:32,867][262582] Updated weights for policy 0, policy_version 44761 (0.0008) +[2026-06-02 17:07:33,565][262582] Updated weights for policy 0, policy_version 44771 (0.0008) +[2026-06-02 17:07:33,761][262582] Updated weights for policy 0, policy_version 44781 (0.0009) +[2026-06-02 17:07:33,966][262582] Updated weights for policy 0, policy_version 44791 (0.0005) +[2026-06-02 17:07:34,165][262582] Updated weights for policy 0, policy_version 44801 (0.0006) +[2026-06-02 17:07:34,364][262582] Updated weights for policy 0, policy_version 44811 (0.0005) +[2026-06-02 17:07:34,573][262582] Updated weights for policy 0, policy_version 44821 (0.0008) +[2026-06-02 17:07:34,770][262582] Updated weights for policy 0, policy_version 44831 (0.0008) +[2026-06-02 17:07:35,470][262582] Updated weights for policy 0, policy_version 44841 (0.0008) +[2026-06-02 17:07:35,659][262582] Updated weights for policy 0, policy_version 44851 (0.0008) +[2026-06-02 17:07:35,854][262582] Updated weights for policy 0, policy_version 44861 (0.0008) +[2026-06-02 17:07:36,007][260776] Fps is (10 sec: 16383.5, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 22970368. Throughput: 0: 18594.1. Samples: 22967296. Policy #0 lag: (min: 16.0, avg: 34.7, max: 80.0) +[2026-06-02 17:07:36,008][260776] Avg episode reward: [(0, '1991.190')] +[2026-06-02 17:07:36,060][262582] Updated weights for policy 0, policy_version 44871 (0.0007) +[2026-06-02 17:07:36,266][262582] Updated weights for policy 0, policy_version 44881 (0.0008) +[2026-06-02 17:07:36,470][262582] Updated weights for policy 0, policy_version 44891 (0.0009) +[2026-06-02 17:07:36,565][262026] Saving new best policy, reward=1991.190! +[2026-06-02 17:07:37,141][262582] Updated weights for policy 0, policy_version 44901 (0.0009) +[2026-06-02 17:07:37,341][262582] Updated weights for policy 0, policy_version 44911 (0.0009) +[2026-06-02 17:07:37,544][262582] Updated weights for policy 0, policy_version 44921 (0.0009) +[2026-06-02 17:07:37,768][262582] Updated weights for policy 0, policy_version 44932 (0.0009) +[2026-06-02 17:07:37,970][262582] Updated weights for policy 0, policy_version 44942 (0.0009) +[2026-06-02 17:07:38,191][262582] Updated weights for policy 0, policy_version 44953 (0.0007) +[2026-06-02 17:07:38,883][262582] Updated weights for policy 0, policy_version 44963 (0.0005) +[2026-06-02 17:07:39,084][262582] Updated weights for policy 0, policy_version 44973 (0.0005) +[2026-06-02 17:07:39,282][262582] Updated weights for policy 0, policy_version 44983 (0.0007) +[2026-06-02 17:07:39,486][262582] Updated weights for policy 0, policy_version 44993 (0.0008) +[2026-06-02 17:07:39,688][262582] Updated weights for policy 0, policy_version 45003 (0.0009) +[2026-06-02 17:07:39,893][262582] Updated weights for policy 0, policy_version 45013 (0.0008) +[2026-06-02 17:07:40,098][262582] Updated weights for policy 0, policy_version 45023 (0.0009) +[2026-06-02 17:07:40,782][262582] Updated weights for policy 0, policy_version 45033 (0.0008) +[2026-06-02 17:07:40,974][262582] Updated weights for policy 0, policy_version 45043 (0.0007) +[2026-06-02 17:07:41,007][260776] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 23068672. Throughput: 0: 18466.1. Samples: 23078400. Policy #0 lag: (min: 16.0, avg: 34.7, max: 80.0) +[2026-06-02 17:07:41,007][260776] Avg episode reward: [(0, '1956.084')] +[2026-06-02 17:07:41,226][262582] Updated weights for policy 0, policy_version 45053 (0.0016) +[2026-06-02 17:07:41,409][262582] Updated weights for policy 0, policy_version 45064 (0.0004) +[2026-06-02 17:07:41,608][262582] Updated weights for policy 0, policy_version 45074 (0.0004) +[2026-06-02 17:07:41,828][262582] Updated weights for policy 0, policy_version 45085 (0.0004) +[2026-06-02 17:07:42,547][262582] Updated weights for policy 0, policy_version 45095 (0.0004) +[2026-06-02 17:07:42,740][262582] Updated weights for policy 0, policy_version 45105 (0.0006) +[2026-06-02 17:07:42,931][262582] Updated weights for policy 0, policy_version 45115 (0.0004) +[2026-06-02 17:07:43,155][262582] Updated weights for policy 0, policy_version 45126 (0.0009) +[2026-06-02 17:07:43,361][262582] Updated weights for policy 0, policy_version 45136 (0.0009) +[2026-06-02 17:07:43,563][262582] Updated weights for policy 0, policy_version 45146 (0.0008) +[2026-06-02 17:07:44,256][262582] Updated weights for policy 0, policy_version 45156 (0.0006) +[2026-06-02 17:07:44,458][262582] Updated weights for policy 0, policy_version 45166 (0.0004) +[2026-06-02 17:07:44,658][262582] Updated weights for policy 0, policy_version 45176 (0.0004) +[2026-06-02 17:07:44,854][262582] Updated weights for policy 0, policy_version 45186 (0.0004) +[2026-06-02 17:07:45,062][262582] Updated weights for policy 0, policy_version 45196 (0.0004) +[2026-06-02 17:07:45,267][262582] Updated weights for policy 0, policy_version 45206 (0.0006) +[2026-06-02 17:07:45,459][262582] Updated weights for policy 0, policy_version 45216 (0.0008) +[2026-06-02 17:07:46,007][260776] Fps is (10 sec: 19661.4, 60 sec: 18568.6, 300 sec: 18438.9). Total num frames: 23166976. Throughput: 0: 18423.5. Samples: 23184896. Policy #0 lag: (min: 16.0, avg: 34.7, max: 80.0) +[2026-06-02 17:07:46,008][260776] Avg episode reward: [(0, '1985.005')] +[2026-06-02 17:07:46,162][262582] Updated weights for policy 0, policy_version 45227 (0.0008) +[2026-06-02 17:07:46,370][262582] Updated weights for policy 0, policy_version 45237 (0.0009) +[2026-06-02 17:07:46,563][262582] Updated weights for policy 0, policy_version 45247 (0.0009) +[2026-06-02 17:07:46,783][262582] Updated weights for policy 0, policy_version 45257 (0.0009) +[2026-06-02 17:07:46,978][262582] Updated weights for policy 0, policy_version 45267 (0.0009) +[2026-06-02 17:07:47,172][262582] Updated weights for policy 0, policy_version 45277 (0.0009) +[2026-06-02 17:07:47,928][262582] Updated weights for policy 0, policy_version 45288 (0.0009) +[2026-06-02 17:07:48,129][262582] Updated weights for policy 0, policy_version 45298 (0.0008) +[2026-06-02 17:07:48,328][262582] Updated weights for policy 0, policy_version 45308 (0.0009) +[2026-06-02 17:07:48,532][262582] Updated weights for policy 0, policy_version 45318 (0.0008) +[2026-06-02 17:07:48,736][262582] Updated weights for policy 0, policy_version 45328 (0.0009) +[2026-06-02 17:07:48,963][262582] Updated weights for policy 0, policy_version 45339 (0.0009) +[2026-06-02 17:07:49,653][262582] Updated weights for policy 0, policy_version 45349 (0.0008) +[2026-06-02 17:07:49,835][262582] Updated weights for policy 0, policy_version 45359 (0.0008) +[2026-06-02 17:07:50,044][262582] Updated weights for policy 0, policy_version 45369 (0.0006) +[2026-06-02 17:07:50,239][262582] Updated weights for policy 0, policy_version 45379 (0.0005) +[2026-06-02 17:07:50,453][262582] Updated weights for policy 0, policy_version 45389 (0.0005) +[2026-06-02 17:07:50,647][262582] Updated weights for policy 0, policy_version 45399 (0.0009) +[2026-06-02 17:07:51,007][260776] Fps is (10 sec: 19660.4, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 23265280. Throughput: 0: 18406.4. Samples: 23243648. Policy #0 lag: (min: 63.0, avg: 80.2, max: 127.0) +[2026-06-02 17:07:51,008][260776] Avg episode reward: [(0, '1987.583')] +[2026-06-02 17:07:51,352][262582] Updated weights for policy 0, policy_version 45409 (0.0009) +[2026-06-02 17:07:51,541][262582] Updated weights for policy 0, policy_version 45419 (0.0009) +[2026-06-02 17:07:51,753][262582] Updated weights for policy 0, policy_version 45430 (0.0008) +[2026-06-02 17:07:51,960][262582] Updated weights for policy 0, policy_version 45440 (0.0009) +[2026-06-02 17:07:52,154][262582] Updated weights for policy 0, policy_version 45450 (0.0008) +[2026-06-02 17:07:52,363][262582] Updated weights for policy 0, policy_version 45460 (0.0008) +[2026-06-02 17:07:52,560][262582] Updated weights for policy 0, policy_version 45470 (0.0008) +[2026-06-02 17:07:53,260][262582] Updated weights for policy 0, policy_version 45480 (0.0008) +[2026-06-02 17:07:53,449][262582] Updated weights for policy 0, policy_version 45490 (0.0008) +[2026-06-02 17:07:53,658][262582] Updated weights for policy 0, policy_version 45500 (0.0007) +[2026-06-02 17:07:53,858][262582] Updated weights for policy 0, policy_version 45510 (0.0004) +[2026-06-02 17:07:54,064][262582] Updated weights for policy 0, policy_version 45520 (0.0008) +[2026-06-02 17:07:54,266][262582] Updated weights for policy 0, policy_version 45530 (0.0008) +[2026-06-02 17:07:54,971][262582] Updated weights for policy 0, policy_version 45540 (0.0008) +[2026-06-02 17:07:55,163][262582] Updated weights for policy 0, policy_version 45550 (0.0008) +[2026-06-02 17:07:55,359][262582] Updated weights for policy 0, policy_version 45560 (0.0008) +[2026-06-02 17:07:55,582][262582] Updated weights for policy 0, policy_version 45571 (0.0008) +[2026-06-02 17:07:55,793][262582] Updated weights for policy 0, policy_version 45581 (0.0008) +[2026-06-02 17:07:55,994][262582] Updated weights for policy 0, policy_version 45591 (0.0008) +[2026-06-02 17:07:56,007][260776] Fps is (10 sec: 16383.8, 60 sec: 18022.4, 300 sec: 18438.9). Total num frames: 23330816. Throughput: 0: 18426.3. Samples: 23350016. Policy #0 lag: (min: 63.0, avg: 80.2, max: 127.0) +[2026-06-02 17:07:56,008][260776] Avg episode reward: [(0, '1969.569')] +[2026-06-02 17:07:56,736][262582] Updated weights for policy 0, policy_version 45602 (0.0008) +[2026-06-02 17:07:56,947][262582] Updated weights for policy 0, policy_version 45614 (0.0005) +[2026-06-02 17:07:57,148][262582] Updated weights for policy 0, policy_version 45624 (0.0009) +[2026-06-02 17:07:57,351][262582] Updated weights for policy 0, policy_version 45634 (0.0004) +[2026-06-02 17:07:57,556][262582] Updated weights for policy 0, policy_version 45644 (0.0004) +[2026-06-02 17:07:57,747][262582] Updated weights for policy 0, policy_version 45654 (0.0004) +[2026-06-02 17:07:57,950][262582] Updated weights for policy 0, policy_version 45664 (0.0004) +[2026-06-02 17:07:58,624][262582] Updated weights for policy 0, policy_version 45674 (0.0008) +[2026-06-02 17:07:58,821][262582] Updated weights for policy 0, policy_version 45684 (0.0009) +[2026-06-02 17:07:59,018][262582] Updated weights for policy 0, policy_version 45694 (0.0008) +[2026-06-02 17:07:59,223][262582] Updated weights for policy 0, policy_version 45704 (0.0008) +[2026-06-02 17:07:59,428][262582] Updated weights for policy 0, policy_version 45714 (0.0008) +[2026-06-02 17:07:59,649][262582] Updated weights for policy 0, policy_version 45725 (0.0006) +[2026-06-02 17:08:00,337][262582] Updated weights for policy 0, policy_version 45735 (0.0007) +[2026-06-02 17:08:00,538][262582] Updated weights for policy 0, policy_version 45745 (0.0008) +[2026-06-02 17:08:00,728][262582] Updated weights for policy 0, policy_version 45755 (0.0008) +[2026-06-02 17:08:00,933][262582] Updated weights for policy 0, policy_version 45765 (0.0008) +[2026-06-02 17:08:01,007][260776] Fps is (10 sec: 16384.3, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 23429120. Throughput: 0: 18705.1. Samples: 23468800. Policy #0 lag: (min: 63.0, avg: 80.2, max: 127.0) +[2026-06-02 17:08:01,008][260776] Avg episode reward: [(0, '2020.267')] +[2026-06-02 17:08:01,155][262582] Updated weights for policy 0, policy_version 45775 (0.0008) +[2026-06-02 17:08:01,350][262582] Updated weights for policy 0, policy_version 45785 (0.0008) +[2026-06-02 17:08:01,479][262026] Saving new best policy, reward=2020.267! +[2026-06-02 17:08:02,038][262582] Updated weights for policy 0, policy_version 45795 (0.0008) +[2026-06-02 17:08:02,235][262582] Updated weights for policy 0, policy_version 45805 (0.0008) +[2026-06-02 17:08:02,434][262582] Updated weights for policy 0, policy_version 45815 (0.0008) +[2026-06-02 17:08:02,645][262582] Updated weights for policy 0, policy_version 45825 (0.0008) +[2026-06-02 17:08:02,840][262582] Updated weights for policy 0, policy_version 45835 (0.0009) +[2026-06-02 17:08:03,035][262582] Updated weights for policy 0, policy_version 45845 (0.0008) +[2026-06-02 17:08:03,242][262582] Updated weights for policy 0, policy_version 45855 (0.0009) +[2026-06-02 17:08:03,945][262582] Updated weights for policy 0, policy_version 45865 (0.0008) +[2026-06-02 17:08:04,150][262582] Updated weights for policy 0, policy_version 45876 (0.0008) +[2026-06-02 17:08:04,346][262582] Updated weights for policy 0, policy_version 45886 (0.0009) +[2026-06-02 17:08:04,574][262582] Updated weights for policy 0, policy_version 45897 (0.0009) +[2026-06-02 17:08:04,769][262582] Updated weights for policy 0, policy_version 45907 (0.0008) +[2026-06-02 17:08:04,976][262582] Updated weights for policy 0, policy_version 45917 (0.0008) +[2026-06-02 17:08:05,690][262582] Updated weights for policy 0, policy_version 45927 (0.0009) +[2026-06-02 17:08:05,882][262582] Updated weights for policy 0, policy_version 45937 (0.0008) +[2026-06-02 17:08:06,007][260776] Fps is (10 sec: 19660.4, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 23527424. Throughput: 0: 18449.1. Samples: 23516160. Policy #0 lag: (min: 63.0, avg: 80.2, max: 127.0) +[2026-06-02 17:08:06,008][260776] Avg episode reward: [(0, '1991.640')] +[2026-06-02 17:08:06,080][262582] Updated weights for policy 0, policy_version 45947 (0.0009) +[2026-06-02 17:08:06,292][262582] Updated weights for policy 0, policy_version 45957 (0.0008) +[2026-06-02 17:08:06,487][262582] Updated weights for policy 0, policy_version 45967 (0.0008) +[2026-06-02 17:08:06,691][262582] Updated weights for policy 0, policy_version 45977 (0.0009) +[2026-06-02 17:08:07,400][262582] Updated weights for policy 0, policy_version 45987 (0.0009) +[2026-06-02 17:08:07,599][262582] Updated weights for policy 0, policy_version 45997 (0.0009) +[2026-06-02 17:08:07,802][262582] Updated weights for policy 0, policy_version 46007 (0.0008) +[2026-06-02 17:08:07,998][262582] Updated weights for policy 0, policy_version 46017 (0.0008) +[2026-06-02 17:08:08,221][262582] Updated weights for policy 0, policy_version 46028 (0.0009) +[2026-06-02 17:08:08,426][262582] Updated weights for policy 0, policy_version 46038 (0.0009) +[2026-06-02 17:08:08,617][262582] Updated weights for policy 0, policy_version 46048 (0.0009) +[2026-06-02 17:08:09,318][262582] Updated weights for policy 0, policy_version 46058 (0.0009) +[2026-06-02 17:08:09,506][262582] Updated weights for policy 0, policy_version 46068 (0.0009) +[2026-06-02 17:08:09,729][262582] Updated weights for policy 0, policy_version 46079 (0.0009) +[2026-06-02 17:08:09,933][262582] Updated weights for policy 0, policy_version 46089 (0.0009) +[2026-06-02 17:08:10,136][262582] Updated weights for policy 0, policy_version 46099 (0.0009) +[2026-06-02 17:08:10,327][262582] Updated weights for policy 0, policy_version 46109 (0.0008) +[2026-06-02 17:08:11,007][260776] Fps is (10 sec: 19660.7, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 23625728. Throughput: 0: 18622.5. Samples: 23635200. Policy #0 lag: (min: 63.0, avg: 80.2, max: 127.0) +[2026-06-02 17:08:11,008][260776] Avg episode reward: [(0, '2014.462')] +[2026-06-02 17:08:11,035][262582] Updated weights for policy 0, policy_version 46119 (0.0009) +[2026-06-02 17:08:11,231][262582] Updated weights for policy 0, policy_version 46129 (0.0009) +[2026-06-02 17:08:11,430][262582] Updated weights for policy 0, policy_version 46139 (0.0009) +[2026-06-02 17:08:11,629][262582] Updated weights for policy 0, policy_version 46149 (0.0009) +[2026-06-02 17:08:11,840][262582] Updated weights for policy 0, policy_version 46159 (0.0009) +[2026-06-02 17:08:12,044][262582] Updated weights for policy 0, policy_version 46169 (0.0008) +[2026-06-02 17:08:12,881][262582] Updated weights for policy 0, policy_version 46179 (0.0008) +[2026-06-02 17:08:13,076][262582] Updated weights for policy 0, policy_version 46189 (0.0007) +[2026-06-02 17:08:13,279][262582] Updated weights for policy 0, policy_version 46199 (0.0011) +[2026-06-02 17:08:13,469][262582] Updated weights for policy 0, policy_version 46209 (0.0011) +[2026-06-02 17:08:13,681][262582] Updated weights for policy 0, policy_version 46219 (0.0013) +[2026-06-02 17:08:13,889][262582] Updated weights for policy 0, policy_version 46229 (0.0009) +[2026-06-02 17:08:14,091][262582] Updated weights for policy 0, policy_version 46239 (0.0008) +[2026-06-02 17:08:14,623][262582] Updated weights for policy 0, policy_version 46249 (0.0009) +[2026-06-02 17:08:14,817][262582] Updated weights for policy 0, policy_version 46259 (0.0009) +[2026-06-02 17:08:15,026][262582] Updated weights for policy 0, policy_version 46269 (0.0008) +[2026-06-02 17:08:15,223][262582] Updated weights for policy 0, policy_version 46279 (0.0008) +[2026-06-02 17:08:15,439][262582] Updated weights for policy 0, policy_version 46290 (0.0008) +[2026-06-02 17:08:15,653][262582] Updated weights for policy 0, policy_version 46300 (0.0008) +[2026-06-02 17:08:16,007][260776] Fps is (10 sec: 19661.3, 60 sec: 18568.6, 300 sec: 18438.9). Total num frames: 23724032. Throughput: 0: 18440.5. Samples: 23741568. Policy #0 lag: (min: 63.0, avg: 80.2, max: 127.0) +[2026-06-02 17:08:16,008][260776] Avg episode reward: [(0, '2059.395')] +[2026-06-02 17:08:16,013][262026] Saving new best policy, reward=2059.395! +[2026-06-02 17:08:16,341][262582] Updated weights for policy 0, policy_version 46310 (0.0009) +[2026-06-02 17:08:16,535][262582] Updated weights for policy 0, policy_version 46320 (0.0008) +[2026-06-02 17:08:16,735][262582] Updated weights for policy 0, policy_version 46330 (0.0008) +[2026-06-02 17:08:16,928][262582] Updated weights for policy 0, policy_version 46340 (0.0009) +[2026-06-02 17:08:17,143][262582] Updated weights for policy 0, policy_version 46350 (0.0008) +[2026-06-02 17:08:17,346][262582] Updated weights for policy 0, policy_version 46360 (0.0008) +[2026-06-02 17:08:18,041][262582] Updated weights for policy 0, policy_version 46370 (0.0008) +[2026-06-02 17:08:18,228][262582] Updated weights for policy 0, policy_version 46380 (0.0008) +[2026-06-02 17:08:18,425][262582] Updated weights for policy 0, policy_version 46390 (0.0009) +[2026-06-02 17:08:18,630][262582] Updated weights for policy 0, policy_version 46400 (0.0008) +[2026-06-02 17:08:18,835][262582] Updated weights for policy 0, policy_version 46410 (0.0008) +[2026-06-02 17:08:19,038][262582] Updated weights for policy 0, policy_version 46420 (0.0009) +[2026-06-02 17:08:19,233][262582] Updated weights for policy 0, policy_version 46430 (0.0009) +[2026-06-02 17:08:20,006][262582] Updated weights for policy 0, policy_version 46441 (0.0028) +[2026-06-02 17:08:20,227][262582] Updated weights for policy 0, policy_version 46453 (0.0004) +[2026-06-02 17:08:20,438][262582] Updated weights for policy 0, policy_version 46464 (0.0004) +[2026-06-02 17:08:20,638][262582] Updated weights for policy 0, policy_version 46474 (0.0004) +[2026-06-02 17:08:20,832][262582] Updated weights for policy 0, policy_version 46484 (0.0007) +[2026-06-02 17:08:21,007][260776] Fps is (10 sec: 16383.8, 60 sec: 18022.4, 300 sec: 18438.9). Total num frames: 23789568. Throughput: 0: 18511.7. Samples: 23800320. Policy #0 lag: (min: 63.0, avg: 80.2, max: 127.0) +[2026-06-02 17:08:21,008][260776] Avg episode reward: [(0, '2071.396')] +[2026-06-02 17:08:21,044][262582] Updated weights for policy 0, policy_version 46494 (0.0004) +[2026-06-02 17:08:21,082][262026] Saving new best policy, reward=2071.396! +[2026-06-02 17:08:21,756][262582] Updated weights for policy 0, policy_version 46505 (0.0009) +[2026-06-02 17:08:21,968][262582] Updated weights for policy 0, policy_version 46516 (0.0009) +[2026-06-02 17:08:22,173][262582] Updated weights for policy 0, policy_version 46526 (0.0008) +[2026-06-02 17:08:22,373][262582] Updated weights for policy 0, policy_version 46536 (0.0008) +[2026-06-02 17:08:22,594][262582] Updated weights for policy 0, policy_version 46547 (0.0008) +[2026-06-02 17:08:22,797][262582] Updated weights for policy 0, policy_version 46557 (0.0006) +[2026-06-02 17:08:23,466][262582] Updated weights for policy 0, policy_version 46567 (0.0007) +[2026-06-02 17:08:23,663][262582] Updated weights for policy 0, policy_version 46577 (0.0008) +[2026-06-02 17:08:23,867][262582] Updated weights for policy 0, policy_version 46587 (0.0010) +[2026-06-02 17:08:24,092][262582] Updated weights for policy 0, policy_version 46598 (0.0008) +[2026-06-02 17:08:24,283][262582] Updated weights for policy 0, policy_version 46608 (0.0008) +[2026-06-02 17:08:24,497][262582] Updated weights for policy 0, policy_version 46618 (0.0006) +[2026-06-02 17:08:25,185][262582] Updated weights for policy 0, policy_version 46628 (0.0008) +[2026-06-02 17:08:25,373][262582] Updated weights for policy 0, policy_version 46638 (0.0009) +[2026-06-02 17:08:25,594][262582] Updated weights for policy 0, policy_version 46649 (0.0009) +[2026-06-02 17:08:25,798][262582] Updated weights for policy 0, policy_version 46659 (0.0008) +[2026-06-02 17:08:26,001][262582] Updated weights for policy 0, policy_version 46669 (0.0008) +[2026-06-02 17:08:26,007][260776] Fps is (10 sec: 16383.9, 60 sec: 18022.4, 300 sec: 18438.9). Total num frames: 23887872. Throughput: 0: 18417.7. Samples: 23907200. Policy #0 lag: (min: 63.0, avg: 79.5, max: 127.0) +[2026-06-02 17:08:26,008][260776] Avg episode reward: [(0, '2071.396')] +[2026-06-02 17:08:26,202][262582] Updated weights for policy 0, policy_version 46679 (0.0009) +[2026-06-02 17:08:26,882][262582] Updated weights for policy 0, policy_version 46689 (0.0009) +[2026-06-02 17:08:27,088][262582] Updated weights for policy 0, policy_version 46699 (0.0009) +[2026-06-02 17:08:27,284][262582] Updated weights for policy 0, policy_version 46709 (0.0009) +[2026-06-02 17:08:27,482][262582] Updated weights for policy 0, policy_version 46719 (0.0008) +[2026-06-02 17:08:27,692][262582] Updated weights for policy 0, policy_version 46729 (0.0008) +[2026-06-02 17:08:27,894][262582] Updated weights for policy 0, policy_version 46739 (0.0008) +[2026-06-02 17:08:28,100][262582] Updated weights for policy 0, policy_version 46749 (0.0009) +[2026-06-02 17:08:28,789][262582] Updated weights for policy 0, policy_version 46759 (0.0009) +[2026-06-02 17:08:28,994][262582] Updated weights for policy 0, policy_version 46769 (0.0009) +[2026-06-02 17:08:29,187][262582] Updated weights for policy 0, policy_version 46779 (0.0009) +[2026-06-02 17:08:29,390][262582] Updated weights for policy 0, policy_version 46789 (0.0008) +[2026-06-02 17:08:29,597][262582] Updated weights for policy 0, policy_version 46799 (0.0009) +[2026-06-02 17:08:29,813][262582] Updated weights for policy 0, policy_version 46809 (0.0008) +[2026-06-02 17:08:30,507][262582] Updated weights for policy 0, policy_version 46819 (0.0009) +[2026-06-02 17:08:30,701][262582] Updated weights for policy 0, policy_version 46829 (0.0008) +[2026-06-02 17:08:30,887][262582] Updated weights for policy 0, policy_version 46839 (0.0009) +[2026-06-02 17:08:31,007][260776] Fps is (10 sec: 19661.2, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 23986176. Throughput: 0: 18668.1. Samples: 24024960. Policy #0 lag: (min: 63.0, avg: 79.5, max: 127.0) +[2026-06-02 17:08:31,007][260776] Avg episode reward: [(0, '2080.889')] +[2026-06-02 17:08:31,092][262582] Updated weights for policy 0, policy_version 46849 (0.0008) +[2026-06-02 17:08:31,297][262582] Updated weights for policy 0, policy_version 46859 (0.0008) +[2026-06-02 17:08:31,507][262582] Updated weights for policy 0, policy_version 46869 (0.0009) +[2026-06-02 17:08:31,716][262582] Updated weights for policy 0, policy_version 46879 (0.0008) +[2026-06-02 17:08:31,727][262026] Saving new best policy, reward=2080.889! +[2026-06-02 17:08:32,396][262582] Updated weights for policy 0, policy_version 46889 (0.0008) +[2026-06-02 17:08:32,582][262582] Updated weights for policy 0, policy_version 46899 (0.0008) +[2026-06-02 17:08:32,789][262582] Updated weights for policy 0, policy_version 46909 (0.0009) +[2026-06-02 17:08:33,007][262582] Updated weights for policy 0, policy_version 46920 (0.0008) +[2026-06-02 17:08:33,240][262582] Updated weights for policy 0, policy_version 46931 (0.0009) +[2026-06-02 17:08:33,439][262582] Updated weights for policy 0, policy_version 46941 (0.0009) +[2026-06-02 17:08:34,139][262582] Updated weights for policy 0, policy_version 46951 (0.0009) +[2026-06-02 17:08:34,335][262582] Updated weights for policy 0, policy_version 46961 (0.0009) +[2026-06-02 17:08:34,533][262582] Updated weights for policy 0, policy_version 46971 (0.0009) +[2026-06-02 17:08:34,737][262582] Updated weights for policy 0, policy_version 46981 (0.0009) +[2026-06-02 17:08:34,949][262582] Updated weights for policy 0, policy_version 46991 (0.0009) +[2026-06-02 17:08:35,151][262582] Updated weights for policy 0, policy_version 47001 (0.0009) +[2026-06-02 17:08:35,831][262582] Updated weights for policy 0, policy_version 47011 (0.0009) +[2026-06-02 17:08:36,007][260776] Fps is (10 sec: 19660.5, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 24084480. Throughput: 0: 18471.8. Samples: 24074880. Policy #0 lag: (min: 63.0, avg: 79.5, max: 127.0) +[2026-06-02 17:08:36,008][260776] Avg episode reward: [(0, '2113.272')] +[2026-06-02 17:08:36,053][262582] Updated weights for policy 0, policy_version 47023 (0.0008) +[2026-06-02 17:08:36,254][262582] Updated weights for policy 0, policy_version 47033 (0.0008) +[2026-06-02 17:08:36,472][262582] Updated weights for policy 0, policy_version 47043 (0.0008) +[2026-06-02 17:08:36,672][262582] Updated weights for policy 0, policy_version 47053 (0.0008) +[2026-06-02 17:08:36,894][262582] Updated weights for policy 0, policy_version 47064 (0.0008) +[2026-06-02 17:08:37,058][262026] Saving new best policy, reward=2113.272! +[2026-06-02 17:08:37,596][262582] Updated weights for policy 0, policy_version 47075 (0.0009) +[2026-06-02 17:08:37,791][262582] Updated weights for policy 0, policy_version 47085 (0.0009) +[2026-06-02 17:08:37,991][262582] Updated weights for policy 0, policy_version 47095 (0.0008) +[2026-06-02 17:08:38,197][262582] Updated weights for policy 0, policy_version 47105 (0.0008) +[2026-06-02 17:08:38,399][262582] Updated weights for policy 0, policy_version 47115 (0.0008) +[2026-06-02 17:08:38,600][262582] Updated weights for policy 0, policy_version 47125 (0.0008) +[2026-06-02 17:08:38,796][262582] Updated weights for policy 0, policy_version 47135 (0.0008) +[2026-06-02 17:08:39,486][262582] Updated weights for policy 0, policy_version 47145 (0.0009) +[2026-06-02 17:08:39,687][262582] Updated weights for policy 0, policy_version 47155 (0.0008) +[2026-06-02 17:08:39,887][262582] Updated weights for policy 0, policy_version 47165 (0.0008) +[2026-06-02 17:08:40,091][262582] Updated weights for policy 0, policy_version 47175 (0.0008) +[2026-06-02 17:08:40,292][262582] Updated weights for policy 0, policy_version 47185 (0.0008) +[2026-06-02 17:08:40,502][262582] Updated weights for policy 0, policy_version 47195 (0.0008) +[2026-06-02 17:08:41,007][260776] Fps is (10 sec: 19660.7, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 24182784. Throughput: 0: 18739.2. Samples: 24193280. Policy #0 lag: (min: 63.0, avg: 79.5, max: 127.0) +[2026-06-02 17:08:41,008][260776] Avg episode reward: [(0, '2133.136')] +[2026-06-02 17:08:41,165][262582] Updated weights for policy 0, policy_version 47205 (0.0009) +[2026-06-02 17:08:41,359][262582] Updated weights for policy 0, policy_version 47215 (0.0008) +[2026-06-02 17:08:41,566][262582] Updated weights for policy 0, policy_version 47225 (0.0008) +[2026-06-02 17:08:41,760][262582] Updated weights for policy 0, policy_version 47235 (0.0009) +[2026-06-02 17:08:41,974][262582] Updated weights for policy 0, policy_version 47245 (0.0009) +[2026-06-02 17:08:42,183][262582] Updated weights for policy 0, policy_version 47255 (0.0008) +[2026-06-02 17:08:42,358][262026] Saving new best policy, reward=2133.136! +[2026-06-02 17:08:42,877][262582] Updated weights for policy 0, policy_version 47265 (0.0008) +[2026-06-02 17:08:43,083][262582] Updated weights for policy 0, policy_version 47276 (0.0009) +[2026-06-02 17:08:43,283][262582] Updated weights for policy 0, policy_version 47286 (0.0008) +[2026-06-02 17:08:43,488][262582] Updated weights for policy 0, policy_version 47296 (0.0005) +[2026-06-02 17:08:43,691][262582] Updated weights for policy 0, policy_version 47306 (0.0004) +[2026-06-02 17:08:43,889][262582] Updated weights for policy 0, policy_version 47316 (0.0004) +[2026-06-02 17:08:44,097][262582] Updated weights for policy 0, policy_version 47326 (0.0005) +[2026-06-02 17:08:44,733][262582] Updated weights for policy 0, policy_version 47336 (0.0008) +[2026-06-02 17:08:44,928][262582] Updated weights for policy 0, policy_version 47346 (0.0004) +[2026-06-02 17:08:45,132][262582] Updated weights for policy 0, policy_version 47356 (0.0006) +[2026-06-02 17:08:45,349][262582] Updated weights for policy 0, policy_version 47366 (0.0008) +[2026-06-02 17:08:45,550][262582] Updated weights for policy 0, policy_version 47376 (0.0008) +[2026-06-02 17:08:45,747][262582] Updated weights for policy 0, policy_version 47386 (0.0007) +[2026-06-02 17:08:46,007][260776] Fps is (10 sec: 19661.4, 60 sec: 18568.5, 300 sec: 18550.0). Total num frames: 24281088. Throughput: 0: 18454.8. Samples: 24299264. Policy #0 lag: (min: 63.0, avg: 79.5, max: 127.0) +[2026-06-02 17:08:46,008][260776] Avg episode reward: [(0, '2136.957')] +[2026-06-02 17:08:46,012][262026] Saving new best policy, reward=2136.957! +[2026-06-02 17:08:46,467][262582] Updated weights for policy 0, policy_version 47397 (0.0009) +[2026-06-02 17:08:46,669][262582] Updated weights for policy 0, policy_version 47407 (0.0008) +[2026-06-02 17:08:46,870][262582] Updated weights for policy 0, policy_version 47417 (0.0009) +[2026-06-02 17:08:47,085][262582] Updated weights for policy 0, policy_version 47428 (0.0007) +[2026-06-02 17:08:47,303][262582] Updated weights for policy 0, policy_version 47439 (0.0009) +[2026-06-02 17:08:47,510][262582] Updated weights for policy 0, policy_version 47449 (0.0006) +[2026-06-02 17:08:48,223][262582] Updated weights for policy 0, policy_version 47459 (0.0008) +[2026-06-02 17:08:48,411][262582] Updated weights for policy 0, policy_version 47469 (0.0008) +[2026-06-02 17:08:48,612][262582] Updated weights for policy 0, policy_version 47479 (0.0009) +[2026-06-02 17:08:48,811][262582] Updated weights for policy 0, policy_version 47489 (0.0009) +[2026-06-02 17:08:49,018][262582] Updated weights for policy 0, policy_version 47499 (0.0008) +[2026-06-02 17:08:49,212][262582] Updated weights for policy 0, policy_version 47509 (0.0008) +[2026-06-02 17:08:49,419][262582] Updated weights for policy 0, policy_version 47519 (0.0008) +[2026-06-02 17:08:50,099][262582] Updated weights for policy 0, policy_version 47529 (0.0008) +[2026-06-02 17:08:50,295][262582] Updated weights for policy 0, policy_version 47539 (0.0008) +[2026-06-02 17:08:50,503][262582] Updated weights for policy 0, policy_version 47549 (0.0005) +[2026-06-02 17:08:50,745][262582] Updated weights for policy 0, policy_version 47559 (0.0015) +[2026-06-02 17:08:50,925][262582] Updated weights for policy 0, policy_version 47569 (0.0008) +[2026-06-02 17:08:51,007][260776] Fps is (10 sec: 16383.8, 60 sec: 18022.4, 300 sec: 18438.9). Total num frames: 24346624. Throughput: 0: 18739.3. Samples: 24359424. Policy #0 lag: (min: 63.0, avg: 79.5, max: 127.0) +[2026-06-02 17:08:51,008][260776] Avg episode reward: [(0, '2134.350')] +[2026-06-02 17:08:51,124][262582] Updated weights for policy 0, policy_version 47579 (0.0009) +[2026-06-02 17:08:51,815][262582] Updated weights for policy 0, policy_version 47589 (0.0009) +[2026-06-02 17:08:52,011][262582] Updated weights for policy 0, policy_version 47599 (0.0009) +[2026-06-02 17:08:52,217][262582] Updated weights for policy 0, policy_version 47609 (0.0009) +[2026-06-02 17:08:52,418][262582] Updated weights for policy 0, policy_version 47619 (0.0007) +[2026-06-02 17:08:52,639][262582] Updated weights for policy 0, policy_version 47630 (0.0007) +[2026-06-02 17:08:52,849][262582] Updated weights for policy 0, policy_version 47640 (0.0008) +[2026-06-02 17:08:53,534][262582] Updated weights for policy 0, policy_version 47650 (0.0009) +[2026-06-02 17:08:53,721][262582] Updated weights for policy 0, policy_version 47660 (0.0008) +[2026-06-02 17:08:53,920][262582] Updated weights for policy 0, policy_version 47670 (0.0008) +[2026-06-02 17:08:54,124][262582] Updated weights for policy 0, policy_version 47680 (0.0006) +[2026-06-02 17:08:54,334][262582] Updated weights for policy 0, policy_version 47690 (0.0007) +[2026-06-02 17:08:54,529][262582] Updated weights for policy 0, policy_version 47700 (0.0007) +[2026-06-02 17:08:54,731][262582] Updated weights for policy 0, policy_version 47710 (0.0007) +[2026-06-02 17:08:55,409][262582] Updated weights for policy 0, policy_version 47720 (0.0008) +[2026-06-02 17:08:55,604][262582] Updated weights for policy 0, policy_version 47730 (0.0008) +[2026-06-02 17:08:55,815][262582] Updated weights for policy 0, policy_version 47740 (0.0008) +[2026-06-02 17:08:56,007][260776] Fps is (10 sec: 16384.1, 60 sec: 18568.6, 300 sec: 18439.0). Total num frames: 24444928. Throughput: 0: 18460.5. Samples: 24465920. Policy #0 lag: (min: 48.0, avg: 85.7, max: 111.0) +[2026-06-02 17:08:56,007][260776] Avg episode reward: [(0, '2137.055')] +[2026-06-02 17:08:56,029][262582] Updated weights for policy 0, policy_version 47751 (0.0008) +[2026-06-02 17:08:56,234][262582] Updated weights for policy 0, policy_version 47761 (0.0008) +[2026-06-02 17:08:56,450][262582] Updated weights for policy 0, policy_version 47771 (0.0008) +[2026-06-02 17:08:56,543][262026] Saving new best policy, reward=2137.055! +[2026-06-02 17:08:57,123][262582] Updated weights for policy 0, policy_version 47781 (0.0009) +[2026-06-02 17:08:57,320][262582] Updated weights for policy 0, policy_version 47791 (0.0008) +[2026-06-02 17:08:57,535][262582] Updated weights for policy 0, policy_version 47801 (0.0008) +[2026-06-02 17:08:57,732][262582] Updated weights for policy 0, policy_version 47811 (0.0008) +[2026-06-02 17:08:57,944][262582] Updated weights for policy 0, policy_version 47821 (0.0009) +[2026-06-02 17:08:58,169][262582] Updated weights for policy 0, policy_version 47832 (0.0008) +[2026-06-02 17:08:58,848][262582] Updated weights for policy 0, policy_version 47842 (0.0009) +[2026-06-02 17:08:59,054][262582] Updated weights for policy 0, policy_version 47853 (0.0009) +[2026-06-02 17:08:59,262][262582] Updated weights for policy 0, policy_version 47863 (0.0009) +[2026-06-02 17:08:59,468][262582] Updated weights for policy 0, policy_version 47873 (0.0008) +[2026-06-02 17:08:59,670][262582] Updated weights for policy 0, policy_version 47883 (0.0009) +[2026-06-02 17:08:59,869][262582] Updated weights for policy 0, policy_version 47893 (0.0009) +[2026-06-02 17:09:00,074][262582] Updated weights for policy 0, policy_version 47903 (0.0008) +[2026-06-02 17:09:00,757][262582] Updated weights for policy 0, policy_version 47913 (0.0008) +[2026-06-02 17:09:00,958][262582] Updated weights for policy 0, policy_version 47923 (0.0008) +[2026-06-02 17:09:01,007][260776] Fps is (10 sec: 19660.9, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 24543232. Throughput: 0: 18571.4. Samples: 24577280. Policy #0 lag: (min: 48.0, avg: 85.7, max: 111.0) +[2026-06-02 17:09:01,008][260776] Avg episode reward: [(0, '2137.055')] +[2026-06-02 17:09:01,158][262582] Updated weights for policy 0, policy_version 47933 (0.0008) +[2026-06-02 17:09:01,362][262582] Updated weights for policy 0, policy_version 47943 (0.0008) +[2026-06-02 17:09:01,557][262582] Updated weights for policy 0, policy_version 47953 (0.0008) +[2026-06-02 17:09:01,766][262582] Updated weights for policy 0, policy_version 47963 (0.0008) +[2026-06-02 17:09:02,429][262582] Updated weights for policy 0, policy_version 47973 (0.0008) +[2026-06-02 17:09:02,633][262582] Updated weights for policy 0, policy_version 47983 (0.0008) +[2026-06-02 17:09:02,822][262582] Updated weights for policy 0, policy_version 47993 (0.0009) +[2026-06-02 17:09:03,034][262582] Updated weights for policy 0, policy_version 48003 (0.0009) +[2026-06-02 17:09:03,241][262582] Updated weights for policy 0, policy_version 48013 (0.0008) +[2026-06-02 17:09:03,443][262582] Updated weights for policy 0, policy_version 48023 (0.0008) +[2026-06-02 17:09:04,141][262582] Updated weights for policy 0, policy_version 48033 (0.0008) +[2026-06-02 17:09:04,330][262582] Updated weights for policy 0, policy_version 48043 (0.0008) +[2026-06-02 17:09:04,528][262582] Updated weights for policy 0, policy_version 48053 (0.0008) +[2026-06-02 17:09:04,733][262582] Updated weights for policy 0, policy_version 48063 (0.0008) +[2026-06-02 17:09:04,936][262582] Updated weights for policy 0, policy_version 48073 (0.0009) +[2026-06-02 17:09:05,139][262582] Updated weights for policy 0, policy_version 48083 (0.0008) +[2026-06-02 17:09:05,336][262582] Updated weights for policy 0, policy_version 48093 (0.0008) +[2026-06-02 17:09:06,007][260776] Fps is (10 sec: 19660.4, 60 sec: 18568.6, 300 sec: 18438.9). Total num frames: 24641536. Throughput: 0: 18480.4. Samples: 24631936. Policy #0 lag: (min: 48.0, avg: 85.7, max: 111.0) +[2026-06-02 17:09:06,008][260776] Avg episode reward: [(0, '2145.368')] +[2026-06-02 17:09:06,032][262582] Updated weights for policy 0, policy_version 48103 (0.0008) +[2026-06-02 17:09:06,230][262582] Updated weights for policy 0, policy_version 48113 (0.0008) +[2026-06-02 17:09:06,433][262582] Updated weights for policy 0, policy_version 48123 (0.0008) +[2026-06-02 17:09:06,634][262582] Updated weights for policy 0, policy_version 48133 (0.0009) +[2026-06-02 17:09:06,829][262582] Updated weights for policy 0, policy_version 48143 (0.0008) +[2026-06-02 17:09:07,041][262582] Updated weights for policy 0, policy_version 48153 (0.0008) +[2026-06-02 17:09:07,174][262026] Saving new best policy, reward=2145.368! +[2026-06-02 17:09:07,721][262582] Updated weights for policy 0, policy_version 48163 (0.0008) +[2026-06-02 17:09:07,924][262582] Updated weights for policy 0, policy_version 48174 (0.0008) +[2026-06-02 17:09:08,127][262582] Updated weights for policy 0, policy_version 48184 (0.0008) +[2026-06-02 17:09:08,335][262582] Updated weights for policy 0, policy_version 48194 (0.0008) +[2026-06-02 17:09:08,543][262582] Updated weights for policy 0, policy_version 48204 (0.0008) +[2026-06-02 17:09:08,740][262582] Updated weights for policy 0, policy_version 48214 (0.0008) +[2026-06-02 17:09:08,950][262582] Updated weights for policy 0, policy_version 48224 (0.0009) +[2026-06-02 17:09:09,619][262582] Updated weights for policy 0, policy_version 48234 (0.0009) +[2026-06-02 17:09:09,833][262582] Updated weights for policy 0, policy_version 48244 (0.0009) +[2026-06-02 17:09:10,023][262582] Updated weights for policy 0, policy_version 48254 (0.0008) +[2026-06-02 17:09:10,239][262582] Updated weights for policy 0, policy_version 48264 (0.0009) +[2026-06-02 17:09:10,442][262582] Updated weights for policy 0, policy_version 48274 (0.0008) +[2026-06-02 17:09:10,656][262582] Updated weights for policy 0, policy_version 48285 (0.0008) +[2026-06-02 17:09:11,007][260776] Fps is (10 sec: 19660.7, 60 sec: 18568.5, 300 sec: 18550.0). Total num frames: 24739840. Throughput: 0: 18707.9. Samples: 24749056. Policy #0 lag: (min: 48.0, avg: 85.7, max: 111.0) +[2026-06-02 17:09:11,008][260776] Avg episode reward: [(0, '2145.368')] +[2026-06-02 17:09:11,324][262582] Updated weights for policy 0, policy_version 48295 (0.0008) +[2026-06-02 17:09:11,522][262582] Updated weights for policy 0, policy_version 48305 (0.0009) +[2026-06-02 17:09:11,732][262582] Updated weights for policy 0, policy_version 48315 (0.0008) +[2026-06-02 17:09:11,941][262582] Updated weights for policy 0, policy_version 48325 (0.0008) +[2026-06-02 17:09:12,140][262582] Updated weights for policy 0, policy_version 48335 (0.0009) +[2026-06-02 17:09:12,342][262582] Updated weights for policy 0, policy_version 48345 (0.0008) +[2026-06-02 17:09:13,070][262582] Updated weights for policy 0, policy_version 48355 (0.0007) +[2026-06-02 17:09:13,259][262582] Updated weights for policy 0, policy_version 48365 (0.0004) +[2026-06-02 17:09:13,466][262582] Updated weights for policy 0, policy_version 48375 (0.0004) +[2026-06-02 17:09:13,661][262582] Updated weights for policy 0, policy_version 48385 (0.0005) +[2026-06-02 17:09:13,875][262582] Updated weights for policy 0, policy_version 48395 (0.0005) +[2026-06-02 17:09:14,072][262582] Updated weights for policy 0, policy_version 48405 (0.0004) +[2026-06-02 17:09:14,280][262582] Updated weights for policy 0, policy_version 48415 (0.0005) +[2026-06-02 17:09:14,923][262582] Updated weights for policy 0, policy_version 48425 (0.0004) +[2026-06-02 17:09:15,117][262582] Updated weights for policy 0, policy_version 48435 (0.0004) +[2026-06-02 17:09:15,322][262582] Updated weights for policy 0, policy_version 48445 (0.0004) +[2026-06-02 17:09:15,534][262582] Updated weights for policy 0, policy_version 48455 (0.0004) +[2026-06-02 17:09:15,730][262582] Updated weights for policy 0, policy_version 48465 (0.0005) +[2026-06-02 17:09:15,941][262582] Updated weights for policy 0, policy_version 48475 (0.0008) +[2026-06-02 17:09:16,007][260776] Fps is (10 sec: 16384.2, 60 sec: 18022.4, 300 sec: 18438.9). Total num frames: 24805376. Throughput: 0: 18505.9. Samples: 24857728. Policy #0 lag: (min: 48.0, avg: 85.7, max: 111.0) +[2026-06-02 17:09:16,008][260776] Avg episode reward: [(0, '2145.368')] +[2026-06-02 17:09:16,606][262582] Updated weights for policy 0, policy_version 48485 (0.0008) +[2026-06-02 17:09:16,809][262582] Updated weights for policy 0, policy_version 48495 (0.0009) +[2026-06-02 17:09:17,010][262582] Updated weights for policy 0, policy_version 48505 (0.0008) +[2026-06-02 17:09:17,203][262582] Updated weights for policy 0, policy_version 48515 (0.0008) +[2026-06-02 17:09:17,413][262582] Updated weights for policy 0, policy_version 48525 (0.0008) +[2026-06-02 17:09:17,621][262582] Updated weights for policy 0, policy_version 48535 (0.0009) +[2026-06-02 17:09:18,316][262582] Updated weights for policy 0, policy_version 48545 (0.0008) +[2026-06-02 17:09:18,503][262582] Updated weights for policy 0, policy_version 48555 (0.0008) +[2026-06-02 17:09:18,696][262582] Updated weights for policy 0, policy_version 48565 (0.0008) +[2026-06-02 17:09:18,900][262582] Updated weights for policy 0, policy_version 48575 (0.0009) +[2026-06-02 17:09:19,102][262582] Updated weights for policy 0, policy_version 48585 (0.0008) +[2026-06-02 17:09:19,311][262582] Updated weights for policy 0, policy_version 48595 (0.0008) +[2026-06-02 17:09:19,512][262582] Updated weights for policy 0, policy_version 48605 (0.0008) +[2026-06-02 17:09:20,195][262582] Updated weights for policy 0, policy_version 48615 (0.0008) +[2026-06-02 17:09:20,387][262582] Updated weights for policy 0, policy_version 48625 (0.0008) +[2026-06-02 17:09:20,582][262582] Updated weights for policy 0, policy_version 48635 (0.0008) +[2026-06-02 17:09:20,802][262582] Updated weights for policy 0, policy_version 48645 (0.0008) +[2026-06-02 17:09:21,003][262582] Updated weights for policy 0, policy_version 48655 (0.0008) +[2026-06-02 17:09:21,007][260776] Fps is (10 sec: 16383.8, 60 sec: 18568.5, 300 sec: 18438.9). Total num frames: 24903680. Throughput: 0: 18713.6. Samples: 24916992. Policy #0 lag: (min: 48.0, avg: 85.7, max: 111.0) +[2026-06-02 17:09:21,008][260776] Avg episode reward: [(0, '2145.368')] +[2026-06-02 17:09:21,196][262582] Updated weights for policy 0, policy_version 48665 (0.0008) +[2026-06-02 17:09:21,884][262582] Updated weights for policy 0, policy_version 48675 (0.0008) +[2026-06-02 17:09:22,073][262582] Updated weights for policy 0, policy_version 48685 (0.0009) +[2026-06-02 17:09:22,283][262582] Updated weights for policy 0, policy_version 48695 (0.0008) +[2026-06-02 17:09:22,485][262582] Updated weights for policy 0, policy_version 48705 (0.0008) +[2026-06-02 17:09:22,689][262582] Updated weights for policy 0, policy_version 48715 (0.0008) +[2026-06-02 17:09:22,889][262582] Updated weights for policy 0, policy_version 48725 (0.0008) +[2026-06-02 17:09:23,093][262582] Updated weights for policy 0, policy_version 48735 (0.0008) +[2026-06-02 17:09:23,776][262582] Updated weights for policy 0, policy_version 48745 (0.0008) +[2026-06-02 17:09:23,971][262582] Updated weights for policy 0, policy_version 48755 (0.0008) +[2026-06-02 17:09:24,179][262582] Updated weights for policy 0, policy_version 48765 (0.0008) +[2026-06-02 17:09:24,381][262582] Updated weights for policy 0, policy_version 48775 (0.0008) +[2026-06-02 17:09:24,579][262582] Updated weights for policy 0, policy_version 48785 (0.0008) +[2026-06-02 17:09:24,789][262582] Updated weights for policy 0, policy_version 48795 (0.0010) +[2026-06-02 17:09:25,451][262582] Updated weights for policy 0, policy_version 48805 (0.0008) +[2026-06-02 17:09:25,647][262582] Updated weights for policy 0, policy_version 48815 (0.0009) +[2026-06-02 17:09:25,856][262582] Updated weights for policy 0, policy_version 48825 (0.0008) +[2026-06-02 17:09:26,007][260776] Fps is (10 sec: 19660.7, 60 sec: 18568.6, 300 sec: 18438.9). Total num frames: 25001984. Throughput: 0: 18463.3. Samples: 25024128. Policy #0 lag: (min: 7.0, avg: 25.3, max: 71.0) +[2026-06-02 17:09:26,008][260776] Avg episode reward: [(0, '2156.198')] +[2026-06-02 17:09:26,063][262582] Updated weights for policy 0, policy_version 48835 (0.0008) +[2026-06-02 17:09:26,152][262026] Early stopping after 5 epochs (40 sgd steps), loss delta 0.0000000 +[2026-06-02 17:09:26,153][262026] Saving new best policy, reward=2156.198! +[2026-06-02 17:09:26,155][260776] Component Batcher_0 stopped! +[2026-06-02 17:09:26,155][262585] Stopping RolloutWorker_w1... +[2026-06-02 17:09:26,156][262585] Loop rollout_proc1_evt_loop terminating... +[2026-06-02 17:09:26,156][260776] Component RolloutWorker_w1 stopped! +[2026-06-02 17:09:26,156][262583] Stopping RolloutWorker_w0... +[2026-06-02 17:09:26,157][262583] Loop rollout_proc0_evt_loop terminating... +[2026-06-02 17:09:26,156][260776] Component RolloutWorker_w0 stopped! +[2026-06-02 17:09:26,154][262026] Stopping Batcher_0... +[2026-06-02 17:09:26,157][262026] Loop batcher_evt_loop terminating... +[2026-06-02 17:09:26,188][262026] Saving results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs5_seed12/checkpoint_p0/checkpoint_000048840_25034752.pth... +[2026-06-02 17:09:26,215][262026] Saving results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs5_seed12/checkpoint_p0/checkpoint_000048840_25034752.pth... +[2026-06-02 17:09:26,243][262026] Stopping LearnerWorker_p0... +[2026-06-02 17:09:26,243][262026] Loop learner_proc0_evt_loop terminating... +[2026-06-02 17:09:26,243][260776] Component LearnerWorker_p0 stopped! +[2026-06-02 17:09:26,244][262582] Weights refcount: 2 0 +[2026-06-02 17:09:26,245][262582] Stopping InferenceWorker_p0-w0... +[2026-06-02 17:09:26,245][262582] Loop inference_proc0-0_evt_loop terminating... +[2026-06-02 17:09:26,245][260776] Component InferenceWorker_p0-w0 stopped! +[2026-06-02 17:09:26,246][260776] Waiting for process learner_proc0 to stop... +[2026-06-02 17:09:27,230][260776] Waiting for process inference_proc0-0 to join... +[2026-06-02 17:09:27,231][260776] Waiting for process rollout_proc0 to join... +[2026-06-02 17:09:27,231][260776] Waiting for process rollout_proc1 to join... +[2026-06-02 17:09:27,232][260776] Batcher 0 profile tree view: +batching: 0.9236, releasing_batches: 0.0290 +[2026-06-02 17:09:27,233][260776] InferenceWorker_p0-w0 profile tree view: +wait_policy: 0.0000 + wait_policy_total: 889.2052 +update_model: 45.1066 + weight_update: 0.0008 +one_step: 0.0016 + handle_policy_step: 400.9443 + deserialize: 5.0692, stack: 0.3805, obs_to_device_normalize: 54.5358, forward: 144.2884, prepare_outputs: 170.3695, send_messages: 10.2437 +[2026-06-02 17:09:27,233][260776] Learner 0 profile tree view: +misc: 0.0049, prepare_batch: 125.7614 +train: 967.6898 + epoch_init: 0.0597, minibatch_init: 2.5532, losses_postprocess: 354.5311, kl_divergence: 25.0734, after_optimizer: 408.8326 + calculate_losses: 39.3293 + losses_init: 0.0868, forward_head: 13.1969, bptt_initial: 0.3639, bptt: 0.4053, tail: 8.8236, advantages_returns: 2.8598, losses: 10.7078 + update: 134.1171 + clip: 12.9593 +[2026-06-02 17:09:27,234][260776] RolloutWorker_w0 profile tree view: +wait_for_trajectories: 0.0335, enqueue_policy_requests: 137.6981, process_policy_outputs: 7.3438, env_step: 932.0902, finalize_trajectories: 0.1073, complete_rollouts: 0.0778 +post_env_step: 16.0025 + process_env_step: 4.6544 +[2026-06-02 17:09:27,234][260776] RolloutWorker_w1 profile tree view: +wait_for_trajectories: 0.0342, enqueue_policy_requests: 144.1337, process_policy_outputs: 7.3880, env_step: 930.1733, finalize_trajectories: 0.1048, complete_rollouts: 0.0793 +post_env_step: 16.0247 + process_env_step: 4.7121 +[2026-06-02 17:09:27,235][260776] Loop Runner_EvtLoop terminating... +[2026-06-02 17:09:27,236][260776] Runner profile tree view: +main_loop: 1366.0574 +[2026-06-02 17:09:27,236][260776] Collected {0: 25034752}, FPS: 18326.3