diff --git a/.gitattributes b/.gitattributes index 29e329f8d12152d40e18e26038217642e91ec3f4..9b6ae7a65403289783d7e758209eb03f9f9187bc 100644 --- a/.gitattributes +++ b/.gitattributes @@ -304,3 +304,4 @@ factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:unifor factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:uniform_u1_3:fs1:obs30:stride1:seed14/episode_metrics.jsonl filter=lfs diff=lfs merge=lfs -text factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:uniform_u1_3:fs2:obs30:stride1:seed10/episode_metrics.jsonl filter=lfs diff=lfs merge=lfs -text factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:uniform_u1_3:fs2:obs30:stride1:seed12/episode_metrics.jsonl filter=lfs diff=lfs merge=lfs -text +factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:uniform_u1_3:fs2:obs30:stride1:seed11/episode_metrics.jsonl filter=lfs diff=lfs merge=lfs -text diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:uniform_u1_3:fs2:obs30:stride1:seed11/checkpoint_p0/best_000047560_24379392_reward_162.209.pth b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:uniform_u1_3:fs2:obs30:stride1:seed11/checkpoint_p0/best_000047560_24379392_reward_162.209.pth new file mode 100644 index 0000000000000000000000000000000000000000..79761362e5f9e43f6a76e3bdec174ef19d6c7f35 --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:uniform_u1_3:fs2:obs30:stride1:seed11/checkpoint_p0/best_000047560_24379392_reward_162.209.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24721e3bec8ece9546842c6df6a06df510511a9c068aea3380705fba7f6cea2a +size 20973113 diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:uniform_u1_3:fs2:obs30:stride1:seed11/checkpoint_p0/checkpoint_000027792_14254080.pth b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:uniform_u1_3:fs2:obs30:stride1:seed11/checkpoint_p0/checkpoint_000027792_14254080.pth new file mode 100644 index 0000000000000000000000000000000000000000..ed0a9e691a8afffc13c951204b895a4044e9188d --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:uniform_u1_3:fs2:obs30:stride1:seed11/checkpoint_p0/checkpoint_000027792_14254080.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17260ff97f47699c36688fc3d77b0a3683b18e9166bedc02c789cb39e2f293e1 +size 20973473 diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:uniform_u1_3:fs2:obs30:stride1:seed11/checkpoint_p0/checkpoint_000048840_25034752.pth b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:uniform_u1_3:fs2:obs30:stride1:seed11/checkpoint_p0/checkpoint_000048840_25034752.pth new file mode 100644 index 0000000000000000000000000000000000000000..165f21929e60744a053a286e63eeaf14ce5be1ab --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:uniform_u1_3:fs2:obs30:stride1:seed11/checkpoint_p0/checkpoint_000048840_25034752.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:153cf8f5970c4ea2aef5058594be2b4c688d843345d991a486ffb9582bdfc068 +size 20973473 diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:uniform_u1_3:fs2:obs30:stride1:seed11/config.json b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:uniform_u1_3:fs2:obs30:stride1:seed11/config.json new file mode 100644 index 0000000000000000000000000000000000000000..05cfecb0d7aaa92c156af830d032a8cd531f61e4 --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:uniform_u1_3:fs2:obs30:stride1:seed11/config.json @@ -0,0 +1,268 @@ +{ + "help": false, + "algo": "APPO", + "env": "latency_flappy", + "experiment": "flappy_frame_stack_uniform_u1_3_fs2_seed11", + "train_dir": "results/checkpoints_factor_sweeps/flappy/context_window", + "restart_behavior": "resume", + "device": "gpu", + "seed": 11, + "num_policies": 1, + "async_rl": true, + "serial_mode": false, + "batched_sampling": true, + "num_batches_to_accumulate": 2, + "worker_num_splits": 1, + "policy_workers_per_policy": 1, + "max_policy_lag": 400, + "num_workers": 2, + "num_envs_per_worker": 1, + "batch_size": 4096, + "num_batches_per_epoch": 8, + "num_epochs": 8, + "rollout": 128, + "recurrence": 1, + "shuffle_minibatches": false, + "gamma": 0.99, + "reward_scale": 1.0, + "reward_clip": 1000.0, + "value_bootstrap": false, + "normalize_returns": true, + "exploration_loss_coeff": 0.003, + "value_loss_coeff": 0.5, + "kl_loss_coeff": 0.0, + "exploration_loss": "entropy", + "gae_lambda": 0.95, + "ppo_clip_ratio": 0.1, + "ppo_clip_value": 0.2, + "with_vtrace": false, + "vtrace_rho": 1.0, + "vtrace_c": 1.0, + "optimizer": "adam", + "adam_eps": 1e-05, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "max_grad_norm": 0.5, + "learning_rate": 0.00025, + "lr_schedule": "linear_decay", + "lr_schedule_kl_threshold": 0.008, + "lr_adaptive_min": 1e-06, + "lr_adaptive_max": 0.01, + "obs_subtract_mean": 0.0, + "obs_scale": 255.0, + "normalize_input": true, + "normalize_input_keys": null, + "decorrelate_experience_max_seconds": 0, + "decorrelate_envs_on_one_worker": true, + "actor_worker_gpus": [ + 0 + ], + "set_workers_cpu_affinity": true, + "force_envs_single_thread": false, + "default_niceness": 0, + "log_to_file": true, + "experiment_summaries_interval": 1, + "flush_summaries_interval": 30, + "stats_avg": 100, + "summaries_use_frameskip": true, + "heartbeat_interval": 20, + "heartbeat_reporting_interval": 180, + "train_for_env_steps": 25000000, + "train_for_seconds": 10000000000, + "save_every_sec": 600, + "keep_checkpoints": 5, + "load_checkpoint_kind": "latest", + "save_milestones_sec": -1, + "save_best_every_sec": 5, + "save_best_metric": "reward", + "save_best_after": 100000, + "benchmark": false, + "encoder_mlp_layers": [ + 512, + 512 + ], + "encoder_conv_architecture": "convnet_atari", + "encoder_conv_mlp_layers": [ + 512 + ], + "use_rnn": false, + "rnn_size": 512, + "rnn_type": "gru", + "rnn_num_layers": 1, + "decoder_mlp_layers": [], + "nonlinearity": "elu", + "policy_initialization": "orthogonal", + "policy_init_gain": 1.0, + "actor_critic_share_weights": true, + "adaptive_stddev": true, + "continuous_tanh_scale": 0.0, + "initial_stddev": 1.0, + "use_env_info_cache": false, + "env_gpu_actions": true, + "env_gpu_observations": true, + "env_frameskip": 1, + "env_framestack": 1, + "pixel_format": "CHW", + "use_record_episode_statistics": false, + "with_wandb": true, + "wandb_user": null, + "wandb_project": "latency-sensitive-bench", + "wandb_group": "flappy-fs2-uniform_u1_3", + "wandb_job_type": "sample_factory", + "wandb_tags": [ + "factor_sweep", + "flappy", + "frame_stack", + "uniform", + "uniform_u1_3", + "fs2", + "seed11" + ], + "with_pbt": false, + "pbt_mix_policies_in_one_env": true, + "pbt_period_env_steps": 5000000, + "pbt_start_mutation": 20000000, + "pbt_replace_fraction": 0.3, + "pbt_mutation_rate": 0.15, + "pbt_replace_reward_gap": 0.1, + "pbt_replace_reward_gap_absolute": 1e-06, + "pbt_optimize_gamma": false, + "pbt_target_objective": "true_objective", + "pbt_perturb_min": 1.1, + "pbt_perturb_max": 1.5, + "gym_id": "FlappyBird-v0", + "env_fps": 30.0, + "obs_fps": 30.0, + "use_lidar": false, + "normalize_obs": true, + "audio_on": false, + "screen_size": "", + "obs_resize": "84,84", + "use_gpu_render": true, + "simulator": "gpu", + "gpu_render_device": "auto", + "gpu_render_batch_size": 128, + "gpu_render_profile": false, + "gpu_render_profile_interval": 200, + "pipe_gap": 100, + "bird_color": "yellow", + "pipe_color": "green", + "background": "day", + "score_limit": -1, + "frame_stack": 2, + "debug": false, + "debug_timelimit_diagnostics": false, + "max_episode_steps": 0, + "mode": "train", + "latency_type": "uniform_distribution", + "fixed_latency_ms": null, + "mean_latency_ms": null, + "std_latency_ms": null, + "min_latency_ms": 33.333333333333336, + "max_latency_ms": 100.0, + "latency_seed": 11, + "add_latency_info": false, + "max_pending_actions": null, + "hold_policy": "one_frame_then_noop", + "ordering_policy": "latest_ready", + "eval_episodes": 100, + "eval_parallel_envs": 100, + "eval_latency_raw_frame_values": "0,1,2,3,4,5", + "eval_max_steps": 3600, + "eval_deterministic": true, + "eval_raw_reward": false, + "episode_metrics_path": "results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_uniform_u1_3_fs2_seed11/episode_metrics.jsonl", + "command_line": "--mode train --algo APPO --env latency_flappy --experiment flappy_frame_stack_uniform_u1_3_fs2_seed11 --train_dir results/checkpoints_factor_sweeps/flappy/context_window --restart_behavior resume --device gpu --actor_worker_gpus 0 --env_gpu_observations True --env_gpu_actions True --gpu-render-batch-size 128 --seed 11 --episode_metrics_path results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_uniform_u1_3_fs2_seed11/episode_metrics.jsonl --train_for_env_steps 25000000 --num_workers 2 --num_envs_per_worker 1 --num_policies 1 --batch_size 4096 --rollout 128 --recurrence 1 --num_epochs 8 --num_batches_per_epoch 8 --worker_num_splits 1 --max_policy_lag 400 --learning_rate 0.00025 --gamma 0.99 --gae_lambda 0.95 --ppo_clip_ratio 0.1 --ppo_clip_value 0.2 --value_loss_coeff 0.5 --max_grad_norm 0.5 --save_every_sec 600 --keep_checkpoints 5 --stats_avg 100 --experiment_summaries_interval 1 --batched_sampling True --async_rl True --use_rnn False --normalize_returns True --normalize_input True --latency-type uniform_distribution --min-latency-ms 33.333333333333336 --max-latency-ms 100.0 --latency-seed 11 --add-latency-info False --eval-episodes 100 --eval-parallel-envs 100 --eval-max-steps 3600 --eval-deterministic True --with_wandb True --wandb_project latency-sensitive-bench --wandb_group flappy-fs2-uniform_u1_3 --wandb_job_type sample_factory --wandb_tags factor_sweep flappy frame_stack uniform uniform_u1_3 fs2 seed11 --gym_id FlappyBird-v0 --env-fps 30 --obs-fps 30.0 --use_lidar False --normalize_obs True --audio_on False --obs_resize 84,84 --use-gpu-render True --simulator gpu --gpu-render-device auto --gpu-render-profile False --gpu-render-profile-interval 200 --pipe_gap 100 --bird_color yellow --pipe_color green --background day --frame_stack 2 --debug False --debug-timelimit-diagnostics False --hold-policy one_frame_then_noop --ordering-policy latest_ready", + "cli_args": { + "algo": "APPO", + "env": "latency_flappy", + "experiment": "flappy_frame_stack_uniform_u1_3_fs2_seed11", + "train_dir": "results/checkpoints_factor_sweeps/flappy/context_window", + "restart_behavior": "resume", + "device": "gpu", + "seed": 11, + "num_policies": 1, + "async_rl": true, + "batched_sampling": true, + "worker_num_splits": 1, + "max_policy_lag": 400, + "num_workers": 2, + "num_envs_per_worker": 1, + "batch_size": 4096, + "num_batches_per_epoch": 8, + "num_epochs": 8, + "rollout": 128, + "recurrence": 1, + "gamma": 0.99, + "normalize_returns": true, + "value_loss_coeff": 0.5, + "gae_lambda": 0.95, + "ppo_clip_ratio": 0.1, + "ppo_clip_value": 0.2, + "max_grad_norm": 0.5, + "learning_rate": 0.00025, + "normalize_input": true, + "actor_worker_gpus": [ + 0 + ], + "experiment_summaries_interval": 1, + "stats_avg": 100, + "train_for_env_steps": 25000000, + "save_every_sec": 600, + "keep_checkpoints": 5, + "use_rnn": false, + "env_gpu_actions": true, + "env_gpu_observations": true, + "with_wandb": true, + "wandb_project": "latency-sensitive-bench", + "wandb_group": "flappy-fs2-uniform_u1_3", + "wandb_job_type": "sample_factory", + "wandb_tags": [ + "factor_sweep", + "flappy", + "frame_stack", + "uniform", + "uniform_u1_3", + "fs2", + "seed11" + ], + "gym_id": "FlappyBird-v0", + "env_fps": 30.0, + "obs_fps": 30.0, + "use_lidar": false, + "normalize_obs": true, + "audio_on": false, + "obs_resize": "84,84", + "use_gpu_render": true, + "simulator": "gpu", + "gpu_render_device": "auto", + "gpu_render_batch_size": 128, + "gpu_render_profile": false, + "gpu_render_profile_interval": 200, + "pipe_gap": 100, + "bird_color": "yellow", + "pipe_color": "green", + "background": "day", + "frame_stack": 2, + "debug": false, + "debug_timelimit_diagnostics": false, + "mode": "train", + "latency_type": "uniform_distribution", + "min_latency_ms": 33.333333333333336, + "max_latency_ms": 100.0, + "latency_seed": 11, + "add_latency_info": false, + "hold_policy": "one_frame_then_noop", + "ordering_policy": "latest_ready", + "eval_episodes": 100, + "eval_parallel_envs": 100, + "eval_max_steps": 3600, + "eval_deterministic": true, + "episode_metrics_path": "results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_uniform_u1_3_fs2_seed11/episode_metrics.jsonl" + }, + "git_hash": "eb3a2e1efbd2aa03a60d7f44f5e18d8fdd0f5a2d", + "git_repo_name": "git@github.com:ZihanWang314/latency-sensitive-bench.git", + "eval_env_frameskip": 1, + "output_dir": "outputs/factor_sweeps/flappy/context_window/train/frame_stack/uniform_u1_3/fs2/seed_11", + "wandb_unique_id": "flappy-fs2-uniform_u1_3-s11" +} \ No newline at end of file diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:uniform_u1_3:fs2:obs30:stride1:seed11/episode_metrics.jsonl b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:uniform_u1_3:fs2:obs30:stride1:seed11/episode_metrics.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..28c96ab6176914e69ddd106186e7daee1ef02e19 --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:uniform_u1_3:fs2:obs30:stride1:seed11/episode_metrics.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30e1753c182d33786d65e84c13dad303355dab240814dd0c7fa8da067d3f6f15 +size 46699538 diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:uniform_u1_3:fs2:obs30:stride1:seed11/git.diff b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:uniform_u1_3:fs2:obs30:stride1:seed11/git.diff new file mode 100644 index 0000000000000000000000000000000000000000..466fb8b9b61c2e47b54ca5d7f5f930e28515b107 --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:uniform_u1_3:fs2:obs30:stride1:seed11/git.diff @@ -0,0 +1,33 @@ +diff --git a/latency_bench/run.py b/latency_bench/run.py +index 163ca1b..e9016f3 100644 +--- a/latency_bench/run.py ++++ b/latency_bench/run.py +@@ -152,6 +152,8 @@ def _record_stratified_replay_videos( + video_cfg = config["logging"]["video"] + if not video_cfg["enabled"]: + return ++ if not config["logging"].get("save_step_records", False): ++ return + if ExecutorMode(config["executor"]["mode"]) == ExecutorMode.REALTIME: + return + selections = select_episode_return_stratified( +diff --git a/scripts/experiment_runner/run_manifest_jobs.py b/scripts/experiment_runner/run_manifest_jobs.py +index 18376d9..646fe8f 100644 +--- a/scripts/experiment_runner/run_manifest_jobs.py ++++ b/scripts/experiment_runner/run_manifest_jobs.py +@@ -192,6 +192,8 @@ def _resume_stage(job: dict[str, str], states: dict[str, str]) -> str: + return "upload" + if stage == "train_succeeded": + return "eval" ++ if stage == "eval_failed": ++ return "eval" + return "train" + + +diff --git a/starVLA b/starVLA +index ab3380d..9d8c567 160000 +--- a/starVLA ++++ b/starVLA +@@ -1 +1 @@ +-Subproject commit ab3380dfbd1de9649c15d154cc41b97788674537 ++Subproject commit 9d8c567188a3aa2a825296016cf17f3977101d8f diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:uniform_u1_3:fs2:obs30:stride1:seed11/sf_log.txt b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:uniform_u1_3:fs2:obs30:stride1:seed11/sf_log.txt new file mode 100644 index 0000000000000000000000000000000000000000..30f3c5ae1f4f87f429748c43b1e4bd956e18cb70 --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:uniform_u1_3:fs2:obs30:stride1:seed11/sf_log.txt @@ -0,0 +1,5140 @@ +[2026-06-07 03:22:37,694][527010] Saving configuration to results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_uniform_u1_3_fs2_seed11/config.json... +[2026-06-07 03:22:37,760][527010] Using GPUs [0] for process 0 (actually maps to GPUs [1]) +[2026-06-07 03:22:37,761][527010] Rollout worker 0 uses device cuda:0 +[2026-06-07 03:22:37,762][527010] Using GPUs [0] for process 1 (actually maps to GPUs [1]) +[2026-06-07 03:22:37,762][527010] Rollout worker 1 uses device cuda:0 +[2026-06-07 03:22:38,767][527010] Using GPUs [0] for process 0 (actually maps to GPUs [1]) +[2026-06-07 03:22:38,767][527010] InferenceWorker_p0-w0: min num requests: 1 +[2026-06-07 03:22:38,772][527010] Using GPUs [0] for process 0 (actually maps to GPUs [1]) +[2026-06-07 03:22:38,776][527010] Using GPUs [0] for process 1 (actually maps to GPUs [1]) +[2026-06-07 03:22:38,776][527010] Starting all processes... +[2026-06-07 03:22:38,777][527010] Starting process learner_proc0 +[2026-06-07 03:22:40,003][527010] Starting all processes... +[2026-06-07 03:22:40,006][527010] Starting process inference_proc0-0 +[2026-06-07 03:22:40,006][527010] Starting process rollout_proc0 +[2026-06-07 03:22:40,007][527010] Starting process rollout_proc1 +[2026-06-07 03:22:40,356][528093] Using GPUs [0] for process 0 (actually maps to GPUs [1]) +[2026-06-07 03:22:40,356][528093] Set environment var CUDA_VISIBLE_DEVICES to '1' (GPU indices [0]) for learning process 0 +[2026-06-07 03:22:40,356][528093] Num visible devices: 1 +[2026-06-07 03:22:40,356][528093] Setting fixed seed 11 +[2026-06-07 03:22:40,357][528093] Using GPUs [0] for process 0 (actually maps to GPUs [1]) +[2026-06-07 03:22:40,357][528093] Initializing actor-critic model on device cuda:0 +[2026-06-07 03:22:40,358][528093] RunningMeanStd input shape: (6, 84, 84) +[2026-06-07 03:22:40,387][528093] RunningMeanStd input shape: (1,) +[2026-06-07 03:22:40,394][528093] ConvEncoder: input_channels=6 +[2026-06-07 03:22:40,454][528093] Conv encoder output size: 512 +[2026-06-07 03:22:40,456][528093] Created Actor Critic model with architecture: +[2026-06-07 03:22:40,456][528093] ActorCriticSharedWeights( + (obs_normalizer): ObservationNormalizer( + (running_mean_std): RunningMeanStdDictInPlace( + (running_mean_std): ModuleDict( + (obs): RunningMeanStdInPlace() + ) + ) + ) + (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) + (encoder): MultiInputEncoder( + (encoders): ModuleDict( + (obs): ConvEncoder( + (enc): RecursiveScriptModule( + original_name=ConvEncoderImpl + (conv_head): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Conv2d) + (1): RecursiveScriptModule(original_name=ELU) + (2): RecursiveScriptModule(original_name=Conv2d) + (3): RecursiveScriptModule(original_name=ELU) + (4): RecursiveScriptModule(original_name=Conv2d) + (5): RecursiveScriptModule(original_name=ELU) + ) + (mlp_layers): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Linear) + (1): RecursiveScriptModule(original_name=ELU) + ) + ) + ) + ) + ) + (core): ModelCoreIdentity() + (decoder): MlpDecoder( + (mlp): Identity() + ) + (critic_linear): Linear(in_features=512, out_features=1, bias=True) + (action_parameterization): ActionParameterizationDefault( + (distribution_linear): Linear(in_features=512, out_features=2, bias=True) + ) +) +[2026-06-07 03:22:40,459][528093] Using optimizer +[2026-06-07 03:22:41,196][528093] No checkpoints found +[2026-06-07 03:22:41,196][528093] Did not load from checkpoint, starting from scratch! +[2026-06-07 03:22:41,196][528093] Initialized policy 0 weights for model version 0 +[2026-06-07 03:22:41,198][528093] LearnerWorker_p0 finished initialization! +[2026-06-07 03:22:41,198][528093] Using GPUs [0] for process 0 (actually maps to GPUs [1]) +[2026-06-07 03:22:41,337][527010] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2026-06-07 03:22:42,021][528167] Worker 0 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191] +[2026-06-07 03:22:42,022][528167] Using GPUs [0] for process 0 (actually maps to GPUs [1]) +[2026-06-07 03:22:42,022][528167] Set environment var CUDA_VISIBLE_DEVICES to '1' (GPU indices [0]) for actor process 0 +[2026-06-07 03:22:42,022][528167] Num visible devices: 1 +[2026-06-07 03:22:42,059][528168] Worker 1 uses CPU cores [192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383] +[2026-06-07 03:22:42,059][528168] Using GPUs [0] for process 1 (actually maps to GPUs [1]) +[2026-06-07 03:22:42,059][528168] Set environment var CUDA_VISIBLE_DEVICES to '1' (GPU indices [0]) for actor process 1 +[2026-06-07 03:22:42,060][528168] Num visible devices: 1 +[2026-06-07 03:22:42,092][528169] Using GPUs [0] for process 0 (actually maps to GPUs [1]) +[2026-06-07 03:22:42,092][528169] Set environment var CUDA_VISIBLE_DEVICES to '1' (GPU indices [0]) for inference process 0 +[2026-06-07 03:22:42,092][528169] Num visible devices: 1 +[2026-06-07 03:22:42,093][528169] RunningMeanStd input shape: (6, 84, 84) +[2026-06-07 03:22:42,124][528169] RunningMeanStd input shape: (1,) +[2026-06-07 03:22:42,132][528169] ConvEncoder: input_channels=6 +[2026-06-07 03:22:42,199][528169] Conv encoder output size: 512 +[2026-06-07 03:22:42,203][527010] Inference worker 0-0 is ready! +[2026-06-07 03:22:42,204][527010] All inference workers are ready! Signal rollout workers to start! +[2026-06-07 03:22:42,205][528168] EnvRunner 1-0 uses policy 0 +[2026-06-07 03:22:42,205][528167] EnvRunner 0-0 uses policy 0 +[2026-06-07 03:22:44,703][528093] Signal inference workers to stop experience collection... +[2026-06-07 03:22:44,718][528169] InferenceWorker_p0-w0: stopping experience collection +[2026-06-07 03:22:45,103][527010] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2026-06-07 03:22:45,104][527010] Avg episode reward: [(0, '-5.588')] +[2026-06-07 03:22:46,098][528093] Signal inference workers to resume experience collection... +[2026-06-07 03:22:46,099][528169] InferenceWorker_p0-w0: resuming experience collection +[2026-06-07 03:22:46,435][528169] Updated weights for policy 0, policy_version 85 (0.0029) +[2026-06-07 03:22:46,584][528169] Updated weights for policy 0, policy_version 95 (0.0004) +[2026-06-07 03:22:46,769][528169] Updated weights for policy 0, policy_version 109 (0.0005) +[2026-06-07 03:22:46,912][528169] Updated weights for policy 0, policy_version 119 (0.0008) +[2026-06-07 03:22:47,400][528169] Updated weights for policy 0, policy_version 129 (0.0008) +[2026-06-07 03:22:47,533][528169] Updated weights for policy 0, policy_version 139 (0.0008) +[2026-06-07 03:22:47,675][528169] Updated weights for policy 0, policy_version 149 (0.0008) +[2026-06-07 03:22:47,821][528169] Updated weights for policy 0, policy_version 159 (0.0006) +[2026-06-07 03:22:47,961][528169] Updated weights for policy 0, policy_version 169 (0.0005) +[2026-06-07 03:22:48,102][528169] Updated weights for policy 0, policy_version 179 (0.0008) +[2026-06-07 03:22:48,243][528169] Updated weights for policy 0, policy_version 189 (0.0008) +[2026-06-07 03:22:48,706][528169] Updated weights for policy 0, policy_version 199 (0.0005) +[2026-06-07 03:22:48,845][528169] Updated weights for policy 0, policy_version 209 (0.0006) +[2026-06-07 03:22:48,981][528169] Updated weights for policy 0, policy_version 219 (0.0006) +[2026-06-07 03:22:49,127][528169] Updated weights for policy 0, policy_version 229 (0.0008) +[2026-06-07 03:22:49,271][528169] Updated weights for policy 0, policy_version 239 (0.0008) +[2026-06-07 03:22:49,414][528169] Updated weights for policy 0, policy_version 249 (0.0008) +[2026-06-07 03:22:49,826][528169] Updated weights for policy 0, policy_version 259 (0.0007) +[2026-06-07 03:22:49,961][528169] Updated weights for policy 0, policy_version 269 (0.0006) +[2026-06-07 03:22:50,096][528169] Updated weights for policy 0, policy_version 279 (0.0008) +[2026-06-07 03:22:50,103][527010] Fps is (10 sec: 14952.0, 60 sec: 14952.0, 300 sec: 14952.0). Total num frames: 131072. Throughput: 0: 13491.8. Samples: 118272. Policy #0 lag: (min: 5.0, avg: 24.6, max: 69.0) +[2026-06-07 03:22:50,104][527010] Avg episode reward: [(0, '-3.998')] +[2026-06-07 03:22:50,236][528169] Updated weights for policy 0, policy_version 289 (0.0008) +[2026-06-07 03:22:50,382][528169] Updated weights for policy 0, policy_version 299 (0.0008) +[2026-06-07 03:22:50,529][528169] Updated weights for policy 0, policy_version 309 (0.0008) +[2026-06-07 03:22:50,658][528093] Saving new best policy, reward=-3.998! +[2026-06-07 03:22:51,104][528169] Updated weights for policy 0, policy_version 324 (0.0006) +[2026-06-07 03:22:51,249][528169] Updated weights for policy 0, policy_version 334 (0.0004) +[2026-06-07 03:22:51,488][528169] Updated weights for policy 0, policy_version 353 (0.0008) +[2026-06-07 03:22:51,634][528169] Updated weights for policy 0, policy_version 363 (0.0008) +[2026-06-07 03:22:51,782][528169] Updated weights for policy 0, policy_version 373 (0.0008) +[2026-06-07 03:22:51,921][528169] Updated weights for policy 0, policy_version 383 (0.0008) +[2026-06-07 03:22:52,383][528169] Updated weights for policy 0, policy_version 397 (0.0008) +[2026-06-07 03:22:52,517][528169] Updated weights for policy 0, policy_version 407 (0.0008) +[2026-06-07 03:22:52,664][528169] Updated weights for policy 0, policy_version 417 (0.0008) +[2026-06-07 03:22:52,810][528169] Updated weights for policy 0, policy_version 427 (0.0008) +[2026-06-07 03:22:52,964][528169] Updated weights for policy 0, policy_version 438 (0.0008) +[2026-06-07 03:22:53,087][528169] Updated weights for policy 0, policy_version 448 (0.0008) +[2026-06-07 03:22:53,647][528169] Updated weights for policy 0, policy_version 459 (0.0005) +[2026-06-07 03:22:53,796][528169] Updated weights for policy 0, policy_version 471 (0.0008) +[2026-06-07 03:22:53,930][528169] Updated weights for policy 0, policy_version 481 (0.0008) +[2026-06-07 03:22:54,075][528169] Updated weights for policy 0, policy_version 491 (0.0007) +[2026-06-07 03:22:54,219][528169] Updated weights for policy 0, policy_version 501 (0.0004) +[2026-06-07 03:22:54,366][528169] Updated weights for policy 0, policy_version 512 (0.0008) +[2026-06-07 03:22:54,844][528169] Updated weights for policy 0, policy_version 522 (0.0008) +[2026-06-07 03:22:54,986][528169] Updated weights for policy 0, policy_version 532 (0.0008) +[2026-06-07 03:22:55,103][527010] Fps is (10 sec: 26213.9, 60 sec: 19042.4, 300 sec: 19042.4). Total num frames: 262144. Throughput: 0: 20781.1. Samples: 286080. Policy #0 lag: (min: 63.0, avg: 80.3, max: 127.0) +[2026-06-07 03:22:55,105][527010] Avg episode reward: [(0, '0.742')] +[2026-06-07 03:22:55,127][528169] Updated weights for policy 0, policy_version 542 (0.0008) +[2026-06-07 03:22:55,253][528169] Updated weights for policy 0, policy_version 552 (0.0008) +[2026-06-07 03:22:55,402][528169] Updated weights for policy 0, policy_version 563 (0.0009) +[2026-06-07 03:22:55,555][528169] Updated weights for policy 0, policy_version 573 (0.0010) +[2026-06-07 03:22:55,603][528093] Saving new best policy, reward=0.742! +[2026-06-07 03:22:56,112][528169] Updated weights for policy 0, policy_version 583 (0.0009) +[2026-06-07 03:22:56,272][528169] Updated weights for policy 0, policy_version 595 (0.0008) +[2026-06-07 03:22:56,410][528169] Updated weights for policy 0, policy_version 605 (0.0008) +[2026-06-07 03:22:56,589][528169] Updated weights for policy 0, policy_version 618 (0.0007) +[2026-06-07 03:22:56,717][528169] Updated weights for policy 0, policy_version 629 (0.0008) +[2026-06-07 03:22:56,847][528169] Updated weights for policy 0, policy_version 639 (0.0007) +[2026-06-07 03:22:57,461][528169] Updated weights for policy 0, policy_version 649 (0.0008) +[2026-06-07 03:22:57,606][528169] Updated weights for policy 0, policy_version 661 (0.0008) +[2026-06-07 03:22:57,741][528169] Updated weights for policy 0, policy_version 671 (0.0008) +[2026-06-07 03:22:57,888][528169] Updated weights for policy 0, policy_version 681 (0.0006) +[2026-06-07 03:22:58,077][528169] Updated weights for policy 0, policy_version 696 (0.0005) +[2026-06-07 03:22:58,729][528169] Updated weights for policy 0, policy_version 707 (0.0008) +[2026-06-07 03:22:58,756][527010] Heartbeat connected on Batcher_0 +[2026-06-07 03:22:58,776][527010] Heartbeat connected on RolloutWorker_w1 +[2026-06-07 03:22:58,779][527010] Heartbeat connected on InferenceWorker_p0-w0 +[2026-06-07 03:22:58,810][527010] Heartbeat connected on RolloutWorker_w0 +[2026-06-07 03:22:58,910][528169] Updated weights for policy 0, policy_version 720 (0.0008) +[2026-06-07 03:22:59,059][528169] Updated weights for policy 0, policy_version 732 (0.0008) +[2026-06-07 03:22:59,193][528169] Updated weights for policy 0, policy_version 743 (0.0008) +[2026-06-07 03:22:59,338][528169] Updated weights for policy 0, policy_version 754 (0.0008) +[2026-06-07 03:22:59,475][528169] Updated weights for policy 0, policy_version 764 (0.0008) +[2026-06-07 03:22:59,530][527010] Heartbeat connected on LearnerWorker_p0 +[2026-06-07 03:23:00,103][527010] Fps is (10 sec: 26214.6, 60 sec: 20953.5, 300 sec: 20953.5). Total num frames: 393216. Throughput: 0: 19684.8. Samples: 369408. Policy #0 lag: (min: 43.0, avg: 74.1, max: 83.0) +[2026-06-07 03:23:00,104][527010] Avg episode reward: [(0, '3.937')] +[2026-06-07 03:23:00,153][528169] Updated weights for policy 0, policy_version 775 (0.0008) +[2026-06-07 03:23:00,279][528169] Updated weights for policy 0, policy_version 785 (0.0008) +[2026-06-07 03:23:00,409][528169] Updated weights for policy 0, policy_version 795 (0.0009) +[2026-06-07 03:23:00,551][528169] Updated weights for policy 0, policy_version 805 (0.0005) +[2026-06-07 03:23:00,687][528169] Updated weights for policy 0, policy_version 816 (0.0004) +[2026-06-07 03:23:00,834][528169] Updated weights for policy 0, policy_version 828 (0.0004) +[2026-06-07 03:23:00,881][528093] Saving new best policy, reward=3.937! +[2026-06-07 03:23:01,556][528169] Updated weights for policy 0, policy_version 840 (0.0006) +[2026-06-07 03:23:01,690][528169] Updated weights for policy 0, policy_version 851 (0.0009) +[2026-06-07 03:23:01,842][528169] Updated weights for policy 0, policy_version 863 (0.0006) +[2026-06-07 03:23:01,972][528169] Updated weights for policy 0, policy_version 874 (0.0004) +[2026-06-07 03:23:02,134][528169] Updated weights for policy 0, policy_version 887 (0.0007) +[2026-06-07 03:23:02,938][528169] Updated weights for policy 0, policy_version 899 (0.0009) +[2026-06-07 03:23:03,072][528169] Updated weights for policy 0, policy_version 909 (0.0009) +[2026-06-07 03:23:03,191][528169] Updated weights for policy 0, policy_version 919 (0.0008) +[2026-06-07 03:23:03,322][528169] Updated weights for policy 0, policy_version 930 (0.0007) +[2026-06-07 03:23:03,460][528169] Updated weights for policy 0, policy_version 940 (0.0009) +[2026-06-07 03:23:03,662][528169] Updated weights for policy 0, policy_version 957 (0.0009) +[2026-06-07 03:23:04,441][528169] Updated weights for policy 0, policy_version 970 (0.0009) +[2026-06-07 03:23:04,574][528169] Updated weights for policy 0, policy_version 981 (0.0009) +[2026-06-07 03:23:04,716][528169] Updated weights for policy 0, policy_version 993 (0.0008) +[2026-06-07 03:23:04,866][528169] Updated weights for policy 0, policy_version 1005 (0.0008) +[2026-06-07 03:23:05,024][528169] Updated weights for policy 0, policy_version 1018 (0.0008) +[2026-06-07 03:23:05,103][527010] Fps is (10 sec: 26215.0, 60 sec: 22060.3, 300 sec: 22060.3). Total num frames: 524288. Throughput: 0: 21521.7. Samples: 511488. Policy #0 lag: (min: 63.0, avg: 74.4, max: 127.0) +[2026-06-07 03:23:05,104][527010] Avg episode reward: [(0, '4.098')] +[2026-06-07 03:23:05,109][528093] Saving new best policy, reward=4.098! +[2026-06-07 03:23:05,821][528169] Updated weights for policy 0, policy_version 1030 (0.0009) +[2026-06-07 03:23:06,037][528169] Updated weights for policy 0, policy_version 1048 (0.0008) +[2026-06-07 03:23:06,222][528169] Updated weights for policy 0, policy_version 1063 (0.0008) +[2026-06-07 03:23:06,403][528169] Updated weights for policy 0, policy_version 1077 (0.0008) +[2026-06-07 03:23:07,231][528169] Updated weights for policy 0, policy_version 1090 (0.0009) +[2026-06-07 03:23:07,353][528169] Updated weights for policy 0, policy_version 1100 (0.0008) +[2026-06-07 03:23:07,523][528169] Updated weights for policy 0, policy_version 1114 (0.0008) +[2026-06-07 03:23:07,659][528169] Updated weights for policy 0, policy_version 1124 (0.0008) +[2026-06-07 03:23:07,780][528169] Updated weights for policy 0, policy_version 1134 (0.0008) +[2026-06-07 03:23:07,912][528169] Updated weights for policy 0, policy_version 1144 (0.0009) +[2026-06-07 03:23:08,704][528169] Updated weights for policy 0, policy_version 1154 (0.0009) +[2026-06-07 03:23:08,854][528169] Updated weights for policy 0, policy_version 1166 (0.0009) +[2026-06-07 03:23:09,009][528169] Updated weights for policy 0, policy_version 1178 (0.0009) +[2026-06-07 03:23:09,161][528169] Updated weights for policy 0, policy_version 1190 (0.0010) +[2026-06-07 03:23:09,293][528169] Updated weights for policy 0, policy_version 1200 (0.0009) +[2026-06-07 03:23:09,450][528169] Updated weights for policy 0, policy_version 1211 (0.0008) +[2026-06-07 03:23:10,104][527010] Fps is (10 sec: 22937.0, 60 sec: 21643.0, 300 sec: 21643.0). Total num frames: 622592. Throughput: 0: 22430.6. Samples: 645248. Policy #0 lag: (min: 63.0, avg: 72.5, max: 127.0) +[2026-06-07 03:23:10,106][527010] Avg episode reward: [(0, '4.312')] +[2026-06-07 03:23:10,215][528169] Updated weights for policy 0, policy_version 1222 (0.0009) +[2026-06-07 03:23:10,345][528169] Updated weights for policy 0, policy_version 1232 (0.0008) +[2026-06-07 03:23:10,540][528169] Updated weights for policy 0, policy_version 1248 (0.0008) +[2026-06-07 03:23:10,663][528169] Updated weights for policy 0, policy_version 1258 (0.0005) +[2026-06-07 03:23:10,825][528169] Updated weights for policy 0, policy_version 1270 (0.0008) +[2026-06-07 03:23:10,956][528093] Saving new best policy, reward=4.312! +[2026-06-07 03:23:11,612][528169] Updated weights for policy 0, policy_version 1281 (0.0008) +[2026-06-07 03:23:11,738][528169] Updated weights for policy 0, policy_version 1291 (0.0008) +[2026-06-07 03:23:11,865][528169] Updated weights for policy 0, policy_version 1301 (0.0008) +[2026-06-07 03:23:12,011][528169] Updated weights for policy 0, policy_version 1312 (0.0008) +[2026-06-07 03:23:12,137][528169] Updated weights for policy 0, policy_version 1322 (0.0008) +[2026-06-07 03:23:12,268][528169] Updated weights for policy 0, policy_version 1332 (0.0008) +[2026-06-07 03:23:12,409][528169] Updated weights for policy 0, policy_version 1343 (0.0008) +[2026-06-07 03:23:13,202][528169] Updated weights for policy 0, policy_version 1354 (0.0008) +[2026-06-07 03:23:13,336][528169] Updated weights for policy 0, policy_version 1365 (0.0008) +[2026-06-07 03:23:13,469][528169] Updated weights for policy 0, policy_version 1375 (0.0008) +[2026-06-07 03:23:13,605][528169] Updated weights for policy 0, policy_version 1385 (0.0008) +[2026-06-07 03:23:13,742][528169] Updated weights for policy 0, policy_version 1396 (0.0008) +[2026-06-07 03:23:13,891][528169] Updated weights for policy 0, policy_version 1407 (0.0008) +[2026-06-07 03:23:14,679][528169] Updated weights for policy 0, policy_version 1418 (0.0009) +[2026-06-07 03:23:14,804][528169] Updated weights for policy 0, policy_version 1428 (0.0008) +[2026-06-07 03:23:14,926][528169] Updated weights for policy 0, policy_version 1438 (0.0009) +[2026-06-07 03:23:15,051][528169] Updated weights for policy 0, policy_version 1448 (0.0008) +[2026-06-07 03:23:15,103][527010] Fps is (10 sec: 19660.8, 60 sec: 21349.7, 300 sec: 21349.7). Total num frames: 720896. Throughput: 0: 21088.1. Samples: 712064. Policy #0 lag: (min: 61.0, avg: 72.2, max: 125.0) +[2026-06-07 03:23:15,104][527010] Avg episode reward: [(0, '4.991')] +[2026-06-07 03:23:15,180][528169] Updated weights for policy 0, policy_version 1458 (0.0009) +[2026-06-07 03:23:15,305][528169] Updated weights for policy 0, policy_version 1468 (0.0010) +[2026-06-07 03:23:15,361][528093] Saving new best policy, reward=4.991! +[2026-06-07 03:23:16,109][528169] Updated weights for policy 0, policy_version 1479 (0.0010) +[2026-06-07 03:23:16,232][528169] Updated weights for policy 0, policy_version 1489 (0.0008) +[2026-06-07 03:23:16,354][528169] Updated weights for policy 0, policy_version 1499 (0.0008) +[2026-06-07 03:23:16,503][528169] Updated weights for policy 0, policy_version 1510 (0.0008) +[2026-06-07 03:23:16,632][528169] Updated weights for policy 0, policy_version 1520 (0.0008) +[2026-06-07 03:23:16,784][528169] Updated weights for policy 0, policy_version 1532 (0.0008) +[2026-06-07 03:23:17,574][528169] Updated weights for policy 0, policy_version 1543 (0.0008) +[2026-06-07 03:23:17,742][528169] Updated weights for policy 0, policy_version 1556 (0.0008) +[2026-06-07 03:23:17,897][528169] Updated weights for policy 0, policy_version 1568 (0.0008) +[2026-06-07 03:23:18,030][528169] Updated weights for policy 0, policy_version 1579 (0.0008) +[2026-06-07 03:23:18,163][528169] Updated weights for policy 0, policy_version 1589 (0.0008) +[2026-06-07 03:23:18,973][528169] Updated weights for policy 0, policy_version 1601 (0.0009) +[2026-06-07 03:23:19,131][528169] Updated weights for policy 0, policy_version 1614 (0.0008) +[2026-06-07 03:23:19,257][528169] Updated weights for policy 0, policy_version 1624 (0.0008) +[2026-06-07 03:23:19,399][528169] Updated weights for policy 0, policy_version 1635 (0.0008) +[2026-06-07 03:23:19,534][528169] Updated weights for policy 0, policy_version 1645 (0.0008) +[2026-06-07 03:23:19,666][528169] Updated weights for policy 0, policy_version 1655 (0.0008) +[2026-06-07 03:23:20,103][527010] Fps is (10 sec: 22937.9, 60 sec: 21977.0, 300 sec: 21977.0). Total num frames: 851968. Throughput: 0: 21778.9. Samples: 844288. Policy #0 lag: (min: 62.0, avg: 74.6, max: 126.0) +[2026-06-07 03:23:20,105][527010] Avg episode reward: [(0, '5.142')] +[2026-06-07 03:23:20,110][528093] Saving new best policy, reward=5.142! +[2026-06-07 03:23:20,439][528169] Updated weights for policy 0, policy_version 1666 (0.0008) +[2026-06-07 03:23:20,567][528169] Updated weights for policy 0, policy_version 1676 (0.0008) +[2026-06-07 03:23:20,729][528169] Updated weights for policy 0, policy_version 1688 (0.0008) +[2026-06-07 03:23:20,884][528169] Updated weights for policy 0, policy_version 1700 (0.0008) +[2026-06-07 03:23:21,010][528169] Updated weights for policy 0, policy_version 1710 (0.0008) +[2026-06-07 03:23:21,157][528169] Updated weights for policy 0, policy_version 1722 (0.0008) +[2026-06-07 03:23:21,930][528169] Updated weights for policy 0, policy_version 1732 (0.0009) +[2026-06-07 03:23:22,049][528169] Updated weights for policy 0, policy_version 1742 (0.0009) +[2026-06-07 03:23:22,219][528169] Updated weights for policy 0, policy_version 1755 (0.0008) +[2026-06-07 03:23:22,352][528169] Updated weights for policy 0, policy_version 1766 (0.0008) +[2026-06-07 03:23:22,507][528169] Updated weights for policy 0, policy_version 1777 (0.0008) +[2026-06-07 03:23:22,651][528169] Updated weights for policy 0, policy_version 1788 (0.0008) +[2026-06-07 03:23:23,446][528169] Updated weights for policy 0, policy_version 1800 (0.0008) +[2026-06-07 03:23:23,579][528169] Updated weights for policy 0, policy_version 1810 (0.0008) +[2026-06-07 03:23:23,703][528169] Updated weights for policy 0, policy_version 1820 (0.0008) +[2026-06-07 03:23:23,830][528169] Updated weights for policy 0, policy_version 1830 (0.0008) +[2026-06-07 03:23:23,947][528169] Updated weights for policy 0, policy_version 1840 (0.0008) +[2026-06-07 03:23:24,114][528169] Updated weights for policy 0, policy_version 1853 (0.0007) +[2026-06-07 03:23:24,902][528169] Updated weights for policy 0, policy_version 1866 (0.0008) +[2026-06-07 03:23:25,026][528169] Updated weights for policy 0, policy_version 1876 (0.0008) +[2026-06-07 03:23:25,103][527010] Fps is (10 sec: 22937.7, 60 sec: 21712.5, 300 sec: 21712.5). Total num frames: 950272. Throughput: 0: 22317.9. Samples: 976768. Policy #0 lag: (min: 56.0, avg: 80.2, max: 125.0) +[2026-06-07 03:23:25,104][527010] Avg episode reward: [(0, '5.986')] +[2026-06-07 03:23:25,150][528169] Updated weights for policy 0, policy_version 1886 (0.0008) +[2026-06-07 03:23:25,272][528169] Updated weights for policy 0, policy_version 1896 (0.0008) +[2026-06-07 03:23:25,405][528169] Updated weights for policy 0, policy_version 1907 (0.0008) +[2026-06-07 03:23:25,531][528169] Updated weights for policy 0, policy_version 1917 (0.0009) +[2026-06-07 03:23:25,563][528093] Saving new best policy, reward=5.986! +[2026-06-07 03:23:26,320][528169] Updated weights for policy 0, policy_version 1927 (0.0009) +[2026-06-07 03:23:26,465][528169] Updated weights for policy 0, policy_version 1938 (0.0009) +[2026-06-07 03:23:26,609][528169] Updated weights for policy 0, policy_version 1950 (0.0009) +[2026-06-07 03:23:26,751][528169] Updated weights for policy 0, policy_version 1961 (0.0008) +[2026-06-07 03:23:26,923][528169] Updated weights for policy 0, policy_version 1974 (0.0008) +[2026-06-07 03:23:27,737][528169] Updated weights for policy 0, policy_version 1988 (0.0008) +[2026-06-07 03:23:27,855][528169] Updated weights for policy 0, policy_version 1998 (0.0009) +[2026-06-07 03:23:28,003][528169] Updated weights for policy 0, policy_version 2010 (0.0008) +[2026-06-07 03:23:28,151][528169] Updated weights for policy 0, policy_version 2021 (0.0008) +[2026-06-07 03:23:28,275][528169] Updated weights for policy 0, policy_version 2031 (0.0008) +[2026-06-07 03:23:28,443][528169] Updated weights for policy 0, policy_version 2044 (0.0008) +[2026-06-07 03:23:29,291][528169] Updated weights for policy 0, policy_version 2058 (0.0009) +[2026-06-07 03:23:29,432][528169] Updated weights for policy 0, policy_version 2070 (0.0009) +[2026-06-07 03:23:29,575][528169] Updated weights for policy 0, policy_version 2081 (0.0008) +[2026-06-07 03:23:29,706][528169] Updated weights for policy 0, policy_version 2091 (0.0008) +[2026-06-07 03:23:29,842][528169] Updated weights for policy 0, policy_version 2101 (0.0008) +[2026-06-07 03:23:29,971][528169] Updated weights for policy 0, policy_version 2111 (0.0008) +[2026-06-07 03:23:30,103][527010] Fps is (10 sec: 22938.0, 60 sec: 22174.1, 300 sec: 22174.1). Total num frames: 1081344. Throughput: 0: 23261.9. Samples: 1046784. Policy #0 lag: (min: 63.0, avg: 74.1, max: 127.0) +[2026-06-07 03:23:30,104][527010] Avg episode reward: [(0, '5.995')] +[2026-06-07 03:23:30,110][528093] Saving new best policy, reward=5.995! +[2026-06-07 03:23:30,771][528169] Updated weights for policy 0, policy_version 2122 (0.0008) +[2026-06-07 03:23:30,894][528169] Updated weights for policy 0, policy_version 2132 (0.0008) +[2026-06-07 03:23:31,030][528169] Updated weights for policy 0, policy_version 2143 (0.0009) +[2026-06-07 03:23:31,200][528169] Updated weights for policy 0, policy_version 2157 (0.0007) +[2026-06-07 03:23:31,345][528169] Updated weights for policy 0, policy_version 2168 (0.0008) +[2026-06-07 03:23:32,203][528169] Updated weights for policy 0, policy_version 2179 (0.0009) +[2026-06-07 03:23:32,332][528169] Updated weights for policy 0, policy_version 2190 (0.0008) +[2026-06-07 03:23:32,465][528169] Updated weights for policy 0, policy_version 2201 (0.0009) +[2026-06-07 03:23:32,650][528169] Updated weights for policy 0, policy_version 2216 (0.0004) +[2026-06-07 03:23:32,783][528169] Updated weights for policy 0, policy_version 2226 (0.0004) +[2026-06-07 03:23:32,936][528169] Updated weights for policy 0, policy_version 2238 (0.0004) +[2026-06-07 03:23:33,705][528169] Updated weights for policy 0, policy_version 2250 (0.0004) +[2026-06-07 03:23:33,873][528169] Updated weights for policy 0, policy_version 2263 (0.0004) +[2026-06-07 03:23:34,044][528169] Updated weights for policy 0, policy_version 2276 (0.0004) +[2026-06-07 03:23:34,195][528169] Updated weights for policy 0, policy_version 2288 (0.0006) +[2026-06-07 03:23:34,324][528169] Updated weights for policy 0, policy_version 2298 (0.0008) +[2026-06-07 03:23:35,103][527010] Fps is (10 sec: 22937.0, 60 sec: 21940.3, 300 sec: 21940.3). Total num frames: 1179648. Throughput: 0: 23631.6. Samples: 1181696. Policy #0 lag: (min: 63.0, avg: 73.1, max: 127.0) +[2026-06-07 03:23:35,105][527010] Avg episode reward: [(0, '6.620')] +[2026-06-07 03:23:35,139][528169] Updated weights for policy 0, policy_version 2309 (0.0008) +[2026-06-07 03:23:35,280][528169] Updated weights for policy 0, policy_version 2320 (0.0008) +[2026-06-07 03:23:35,414][528169] Updated weights for policy 0, policy_version 2331 (0.0008) +[2026-06-07 03:23:35,567][528169] Updated weights for policy 0, policy_version 2342 (0.0008) +[2026-06-07 03:23:35,692][528169] Updated weights for policy 0, policy_version 2352 (0.0008) +[2026-06-07 03:23:35,824][528169] Updated weights for policy 0, policy_version 2363 (0.0008) +[2026-06-07 03:23:35,883][528093] Saving new best policy, reward=6.620! +[2026-06-07 03:23:36,607][528169] Updated weights for policy 0, policy_version 2373 (0.0008) +[2026-06-07 03:23:36,743][528169] Updated weights for policy 0, policy_version 2384 (0.0008) +[2026-06-07 03:23:36,909][528169] Updated weights for policy 0, policy_version 2397 (0.0008) +[2026-06-07 03:23:37,057][528169] Updated weights for policy 0, policy_version 2408 (0.0008) +[2026-06-07 03:23:37,223][528169] Updated weights for policy 0, policy_version 2420 (0.0009) +[2026-06-07 03:23:37,356][528169] Updated weights for policy 0, policy_version 2431 (0.0008) +[2026-06-07 03:23:38,161][528169] Updated weights for policy 0, policy_version 2441 (0.0008) +[2026-06-07 03:23:38,295][528169] Updated weights for policy 0, policy_version 2452 (0.0008) +[2026-06-07 03:23:38,442][528169] Updated weights for policy 0, policy_version 2463 (0.0008) +[2026-06-07 03:23:38,581][528169] Updated weights for policy 0, policy_version 2474 (0.0008) +[2026-06-07 03:23:38,790][528169] Updated weights for policy 0, policy_version 2490 (0.0008) +[2026-06-07 03:23:39,549][528169] Updated weights for policy 0, policy_version 2502 (0.0007) +[2026-06-07 03:23:39,688][528169] Updated weights for policy 0, policy_version 2512 (0.0005) +[2026-06-07 03:23:39,807][528169] Updated weights for policy 0, policy_version 2522 (0.0005) +[2026-06-07 03:23:39,934][528169] Updated weights for policy 0, policy_version 2532 (0.0006) +[2026-06-07 03:23:40,103][528169] Updated weights for policy 0, policy_version 2545 (0.0006) +[2026-06-07 03:23:40,103][527010] Fps is (10 sec: 19660.6, 60 sec: 21746.4, 300 sec: 21746.4). Total num frames: 1277952. Throughput: 0: 22951.9. Samples: 1318912. Policy #0 lag: (min: 29.0, avg: 39.2, max: 93.0) +[2026-06-07 03:23:40,105][527010] Avg episode reward: [(0, '6.918')] +[2026-06-07 03:23:40,263][528169] Updated weights for policy 0, policy_version 2557 (0.0008) +[2026-06-07 03:23:40,288][528093] Saving new best policy, reward=6.918! +[2026-06-07 03:23:41,040][528169] Updated weights for policy 0, policy_version 2567 (0.0008) +[2026-06-07 03:23:41,190][528169] Updated weights for policy 0, policy_version 2579 (0.0008) +[2026-06-07 03:23:41,332][528169] Updated weights for policy 0, policy_version 2590 (0.0008) +[2026-06-07 03:23:41,463][528169] Updated weights for policy 0, policy_version 2600 (0.0008) +[2026-06-07 03:23:41,606][528169] Updated weights for policy 0, policy_version 2611 (0.0008) +[2026-06-07 03:23:41,736][528169] Updated weights for policy 0, policy_version 2621 (0.0008) +[2026-06-07 03:23:42,508][528169] Updated weights for policy 0, policy_version 2633 (0.0008) +[2026-06-07 03:23:42,675][528169] Updated weights for policy 0, policy_version 2646 (0.0009) +[2026-06-07 03:23:42,798][528169] Updated weights for policy 0, policy_version 2656 (0.0008) +[2026-06-07 03:23:42,933][528169] Updated weights for policy 0, policy_version 2666 (0.0009) +[2026-06-07 03:23:43,065][528169] Updated weights for policy 0, policy_version 2677 (0.0008) +[2026-06-07 03:23:43,198][528169] Updated weights for policy 0, policy_version 2687 (0.0008) +[2026-06-07 03:23:43,984][528169] Updated weights for policy 0, policy_version 2698 (0.0008) +[2026-06-07 03:23:44,108][528169] Updated weights for policy 0, policy_version 2708 (0.0008) +[2026-06-07 03:23:44,248][528169] Updated weights for policy 0, policy_version 2719 (0.0008) +[2026-06-07 03:23:44,371][528169] Updated weights for policy 0, policy_version 2729 (0.0008) +[2026-06-07 03:23:44,565][528169] Updated weights for policy 0, policy_version 2744 (0.0008) +[2026-06-07 03:23:45,103][527010] Fps is (10 sec: 22938.1, 60 sec: 23483.8, 300 sec: 22096.8). Total num frames: 1409024. Throughput: 0: 22624.7. Samples: 1387520. Policy #0 lag: (min: 63.0, avg: 74.4, max: 127.0) +[2026-06-07 03:23:45,104][527010] Avg episode reward: [(0, '7.687')] +[2026-06-07 03:23:45,109][528093] Saving new best policy, reward=7.687! +[2026-06-07 03:23:45,361][528169] Updated weights for policy 0, policy_version 2754 (0.0007) +[2026-06-07 03:23:45,484][528169] Updated weights for policy 0, policy_version 2765 (0.0004) +[2026-06-07 03:23:45,642][528169] Updated weights for policy 0, policy_version 2777 (0.0004) +[2026-06-07 03:23:45,788][528169] Updated weights for policy 0, policy_version 2789 (0.0008) +[2026-06-07 03:23:45,930][528169] Updated weights for policy 0, policy_version 2800 (0.0008) +[2026-06-07 03:23:46,134][528169] Updated weights for policy 0, policy_version 2815 (0.0008) +[2026-06-07 03:23:46,923][528169] Updated weights for policy 0, policy_version 2826 (0.0008) +[2026-06-07 03:23:47,081][528169] Updated weights for policy 0, policy_version 2838 (0.0008) +[2026-06-07 03:23:47,201][528169] Updated weights for policy 0, policy_version 2848 (0.0008) +[2026-06-07 03:23:47,331][528169] Updated weights for policy 0, policy_version 2858 (0.0008) +[2026-06-07 03:23:47,471][528169] Updated weights for policy 0, policy_version 2869 (0.0008) +[2026-06-07 03:23:47,601][528169] Updated weights for policy 0, policy_version 2879 (0.0009) +[2026-06-07 03:23:48,398][528169] Updated weights for policy 0, policy_version 2891 (0.0009) +[2026-06-07 03:23:48,540][528169] Updated weights for policy 0, policy_version 2902 (0.0008) +[2026-06-07 03:23:48,701][528169] Updated weights for policy 0, policy_version 2915 (0.0009) +[2026-06-07 03:23:48,837][528169] Updated weights for policy 0, policy_version 2925 (0.0008) +[2026-06-07 03:23:48,990][528169] Updated weights for policy 0, policy_version 2937 (0.0008) +[2026-06-07 03:23:49,757][528169] Updated weights for policy 0, policy_version 2948 (0.0009) +[2026-06-07 03:23:49,897][528169] Updated weights for policy 0, policy_version 2959 (0.0008) +[2026-06-07 03:23:50,043][528169] Updated weights for policy 0, policy_version 2971 (0.0008) +[2026-06-07 03:23:50,103][527010] Fps is (10 sec: 22937.8, 60 sec: 22937.6, 300 sec: 21919.6). Total num frames: 1507328. Throughput: 0: 22502.4. Samples: 1524096. Policy #0 lag: (min: 63.0, avg: 73.4, max: 127.0) +[2026-06-07 03:23:50,104][527010] Avg episode reward: [(0, '8.623')] +[2026-06-07 03:23:50,193][528169] Updated weights for policy 0, policy_version 2982 (0.0009) +[2026-06-07 03:23:50,326][528169] Updated weights for policy 0, policy_version 2993 (0.0008) +[2026-06-07 03:23:50,471][528169] Updated weights for policy 0, policy_version 3004 (0.0008) +[2026-06-07 03:23:50,515][528093] Saving new best policy, reward=8.623! +[2026-06-07 03:23:51,285][528169] Updated weights for policy 0, policy_version 3016 (0.0008) +[2026-06-07 03:23:51,411][528169] Updated weights for policy 0, policy_version 3026 (0.0008) +[2026-06-07 03:23:51,563][528169] Updated weights for policy 0, policy_version 3038 (0.0008) +[2026-06-07 03:23:51,725][528169] Updated weights for policy 0, policy_version 3050 (0.0008) +[2026-06-07 03:23:51,869][528169] Updated weights for policy 0, policy_version 3062 (0.0008) +[2026-06-07 03:23:51,995][528169] Updated weights for policy 0, policy_version 3072 (0.0008) +[2026-06-07 03:23:52,782][528169] Updated weights for policy 0, policy_version 3082 (0.0008) +[2026-06-07 03:23:52,927][528169] Updated weights for policy 0, policy_version 3093 (0.0008) +[2026-06-07 03:23:53,050][528169] Updated weights for policy 0, policy_version 3103 (0.0008) +[2026-06-07 03:23:53,166][528169] Updated weights for policy 0, policy_version 3113 (0.0008) +[2026-06-07 03:23:53,298][528169] Updated weights for policy 0, policy_version 3123 (0.0008) +[2026-06-07 03:23:53,456][528169] Updated weights for policy 0, policy_version 3135 (0.0009) +[2026-06-07 03:23:54,237][528169] Updated weights for policy 0, policy_version 3145 (0.0008) +[2026-06-07 03:23:54,377][528169] Updated weights for policy 0, policy_version 3156 (0.0008) +[2026-06-07 03:23:54,541][528169] Updated weights for policy 0, policy_version 3169 (0.0008) +[2026-06-07 03:23:54,664][528169] Updated weights for policy 0, policy_version 3179 (0.0008) +[2026-06-07 03:23:54,798][528169] Updated weights for policy 0, policy_version 3189 (0.0008) +[2026-06-07 03:23:55,103][527010] Fps is (10 sec: 22937.6, 60 sec: 22937.7, 300 sec: 22210.7). Total num frames: 1638400. Throughput: 0: 22471.3. Samples: 1656448. Policy #0 lag: (min: 63.0, avg: 73.1, max: 125.0) +[2026-06-07 03:23:55,104][527010] Avg episode reward: [(0, '9.013')] +[2026-06-07 03:23:55,109][528093] Saving new best policy, reward=9.013! +[2026-06-07 03:23:55,598][528169] Updated weights for policy 0, policy_version 3201 (0.0009) +[2026-06-07 03:23:55,746][528169] Updated weights for policy 0, policy_version 3213 (0.0008) +[2026-06-07 03:23:55,866][528169] Updated weights for policy 0, policy_version 3223 (0.0008) +[2026-06-07 03:23:56,002][528169] Updated weights for policy 0, policy_version 3233 (0.0008) +[2026-06-07 03:23:56,148][528169] Updated weights for policy 0, policy_version 3245 (0.0008) +[2026-06-07 03:23:56,302][528169] Updated weights for policy 0, policy_version 3257 (0.0008) +[2026-06-07 03:23:57,104][528169] Updated weights for policy 0, policy_version 3268 (0.0008) +[2026-06-07 03:23:57,256][528169] Updated weights for policy 0, policy_version 3280 (0.0008) +[2026-06-07 03:23:57,402][528169] Updated weights for policy 0, policy_version 3292 (0.0008) +[2026-06-07 03:23:57,541][528169] Updated weights for policy 0, policy_version 3302 (0.0008) +[2026-06-07 03:23:57,696][528169] Updated weights for policy 0, policy_version 3314 (0.0008) +[2026-06-07 03:23:57,840][528169] Updated weights for policy 0, policy_version 3326 (0.0008) +[2026-06-07 03:23:58,607][528169] Updated weights for policy 0, policy_version 3337 (0.0008) +[2026-06-07 03:23:58,763][528169] Updated weights for policy 0, policy_version 3349 (0.0008) +[2026-06-07 03:23:58,886][528169] Updated weights for policy 0, policy_version 3359 (0.0008) +[2026-06-07 03:23:59,042][528169] Updated weights for policy 0, policy_version 3371 (0.0008) +[2026-06-07 03:23:59,192][528169] Updated weights for policy 0, policy_version 3382 (0.0008) +[2026-06-07 03:23:59,309][528169] Updated weights for policy 0, policy_version 3392 (0.0008) +[2026-06-07 03:24:00,089][528169] Updated weights for policy 0, policy_version 3403 (0.0008) +[2026-06-07 03:24:00,103][527010] Fps is (10 sec: 22937.7, 60 sec: 22391.5, 300 sec: 22048.9). Total num frames: 1736704. Throughput: 0: 22462.6. Samples: 1722880. Policy #0 lag: (min: 81.0, avg: 89.9, max: 144.0) +[2026-06-07 03:24:00,104][527010] Avg episode reward: [(0, '9.736')] +[2026-06-07 03:24:00,218][528169] Updated weights for policy 0, policy_version 3414 (0.0008) +[2026-06-07 03:24:00,366][528169] Updated weights for policy 0, policy_version 3425 (0.0008) +[2026-06-07 03:24:00,488][528169] Updated weights for policy 0, policy_version 3435 (0.0008) +[2026-06-07 03:24:00,613][528169] Updated weights for policy 0, policy_version 3445 (0.0008) +[2026-06-07 03:24:00,747][528093] Saving new best policy, reward=9.736! +[2026-06-07 03:24:01,445][528169] Updated weights for policy 0, policy_version 3458 (0.0008) +[2026-06-07 03:24:01,589][528169] Updated weights for policy 0, policy_version 3470 (0.0008) +[2026-06-07 03:24:01,715][528169] Updated weights for policy 0, policy_version 3480 (0.0008) +[2026-06-07 03:24:01,863][528169] Updated weights for policy 0, policy_version 3491 (0.0008) +[2026-06-07 03:24:02,033][528169] Updated weights for policy 0, policy_version 3505 (0.0008) +[2026-06-07 03:24:02,170][528169] Updated weights for policy 0, policy_version 3515 (0.0008) +[2026-06-07 03:24:02,946][528169] Updated weights for policy 0, policy_version 3527 (0.0008) +[2026-06-07 03:24:03,092][528169] Updated weights for policy 0, policy_version 3538 (0.0008) +[2026-06-07 03:24:03,264][528169] Updated weights for policy 0, policy_version 3551 (0.0009) +[2026-06-07 03:24:03,404][528169] Updated weights for policy 0, policy_version 3562 (0.0008) +[2026-06-07 03:24:03,539][528169] Updated weights for policy 0, policy_version 3573 (0.0008) +[2026-06-07 03:24:03,685][528169] Updated weights for policy 0, policy_version 3584 (0.0008) +[2026-06-07 03:24:04,456][528169] Updated weights for policy 0, policy_version 3594 (0.0008) +[2026-06-07 03:24:04,599][528169] Updated weights for policy 0, policy_version 3605 (0.0008) +[2026-06-07 03:24:04,722][528169] Updated weights for policy 0, policy_version 3615 (0.0008) +[2026-06-07 03:24:04,865][528169] Updated weights for policy 0, policy_version 3627 (0.0008) +[2026-06-07 03:24:04,999][528169] Updated weights for policy 0, policy_version 3637 (0.0008) +[2026-06-07 03:24:05,103][527010] Fps is (10 sec: 19660.7, 60 sec: 21845.3, 300 sec: 21906.3). Total num frames: 1835008. Throughput: 0: 22465.5. Samples: 1855232. Policy #0 lag: (min: 21.0, avg: 31.5, max: 85.0) +[2026-06-07 03:24:05,104][527010] Avg episode reward: [(0, '10.281')] +[2026-06-07 03:24:05,134][528093] Saving new best policy, reward=10.281! +[2026-06-07 03:24:05,804][528169] Updated weights for policy 0, policy_version 3649 (0.0008) +[2026-06-07 03:24:05,957][528169] Updated weights for policy 0, policy_version 3661 (0.0008) +[2026-06-07 03:24:06,115][528169] Updated weights for policy 0, policy_version 3673 (0.0008) +[2026-06-07 03:24:06,239][528169] Updated weights for policy 0, policy_version 3683 (0.0008) +[2026-06-07 03:24:06,381][528169] Updated weights for policy 0, policy_version 3694 (0.0008) +[2026-06-07 03:24:06,546][528169] Updated weights for policy 0, policy_version 3707 (0.0008) +[2026-06-07 03:24:07,318][528169] Updated weights for policy 0, policy_version 3717 (0.0008) +[2026-06-07 03:24:07,450][528169] Updated weights for policy 0, policy_version 3727 (0.0008) +[2026-06-07 03:24:07,584][528169] Updated weights for policy 0, policy_version 3737 (0.0008) +[2026-06-07 03:24:07,733][528169] Updated weights for policy 0, policy_version 3749 (0.0008) +[2026-06-07 03:24:07,883][528169] Updated weights for policy 0, policy_version 3761 (0.0008) +[2026-06-07 03:24:08,038][528169] Updated weights for policy 0, policy_version 3772 (0.0008) +[2026-06-07 03:24:08,784][528169] Updated weights for policy 0, policy_version 3782 (0.0008) +[2026-06-07 03:24:08,904][528169] Updated weights for policy 0, policy_version 3792 (0.0008) +[2026-06-07 03:24:09,074][528169] Updated weights for policy 0, policy_version 3805 (0.0008) +[2026-06-07 03:24:09,213][528169] Updated weights for policy 0, policy_version 3816 (0.0008) +[2026-06-07 03:24:09,378][528169] Updated weights for policy 0, policy_version 3829 (0.0008) +[2026-06-07 03:24:09,511][528169] Updated weights for policy 0, policy_version 3839 (0.0008) +[2026-06-07 03:24:10,103][527010] Fps is (10 sec: 22937.0, 60 sec: 22391.5, 300 sec: 22148.9). Total num frames: 1966080. Throughput: 0: 22502.3. Samples: 1989376. Policy #0 lag: (min: 63.0, avg: 73.4, max: 127.0) +[2026-06-07 03:24:10,105][527010] Avg episode reward: [(0, '10.820')] +[2026-06-07 03:24:10,279][528169] Updated weights for policy 0, policy_version 3851 (0.0008) +[2026-06-07 03:24:10,399][528169] Updated weights for policy 0, policy_version 3861 (0.0008) +[2026-06-07 03:24:10,534][528169] Updated weights for policy 0, policy_version 3871 (0.0008) +[2026-06-07 03:24:10,681][528169] Updated weights for policy 0, policy_version 3882 (0.0008) +[2026-06-07 03:24:10,828][528169] Updated weights for policy 0, policy_version 3894 (0.0008) +[2026-06-07 03:24:10,949][528093] Saving new best policy, reward=10.820! +[2026-06-07 03:24:10,953][528169] Updated weights for policy 0, policy_version 3904 (0.0008) +[2026-06-07 03:24:11,708][528169] Updated weights for policy 0, policy_version 3914 (0.0008) +[2026-06-07 03:24:11,857][528169] Updated weights for policy 0, policy_version 3925 (0.0008) +[2026-06-07 03:24:11,982][528169] Updated weights for policy 0, policy_version 3935 (0.0008) +[2026-06-07 03:24:12,136][528169] Updated weights for policy 0, policy_version 3947 (0.0008) +[2026-06-07 03:24:12,265][528169] Updated weights for policy 0, policy_version 3957 (0.0008) +[2026-06-07 03:24:12,403][528169] Updated weights for policy 0, policy_version 3967 (0.0008) +[2026-06-07 03:24:13,142][528169] Updated weights for policy 0, policy_version 3978 (0.0008) +[2026-06-07 03:24:13,312][528169] Updated weights for policy 0, policy_version 3992 (0.0008) +[2026-06-07 03:24:13,442][528169] Updated weights for policy 0, policy_version 4002 (0.0008) +[2026-06-07 03:24:13,583][528169] Updated weights for policy 0, policy_version 4013 (0.0008) +[2026-06-07 03:24:13,715][528169] Updated weights for policy 0, policy_version 4023 (0.0008) +[2026-06-07 03:24:14,528][528169] Updated weights for policy 0, policy_version 4035 (0.0008) +[2026-06-07 03:24:14,693][528169] Updated weights for policy 0, policy_version 4048 (0.0008) +[2026-06-07 03:24:14,842][528169] Updated weights for policy 0, policy_version 4060 (0.0009) +[2026-06-07 03:24:14,987][528169] Updated weights for policy 0, policy_version 4071 (0.0008) +[2026-06-07 03:24:15,103][527010] Fps is (10 sec: 22936.9, 60 sec: 22391.3, 300 sec: 22016.2). Total num frames: 2064384. Throughput: 0: 22439.6. Samples: 2056576. Policy #0 lag: (min: 63.0, avg: 73.4, max: 127.0) +[2026-06-07 03:24:15,105][527010] Avg episode reward: [(0, '11.866')] +[2026-06-07 03:24:15,164][528169] Updated weights for policy 0, policy_version 4085 (0.0008) +[2026-06-07 03:24:15,302][528093] Saving new best policy, reward=11.866! +[2026-06-07 03:24:15,922][528169] Updated weights for policy 0, policy_version 4097 (0.0009) +[2026-06-07 03:24:16,066][528169] Updated weights for policy 0, policy_version 4108 (0.0008) +[2026-06-07 03:24:16,218][528169] Updated weights for policy 0, policy_version 4120 (0.0008) +[2026-06-07 03:24:16,412][528169] Updated weights for policy 0, policy_version 4135 (0.0008) +[2026-06-07 03:24:16,548][528169] Updated weights for policy 0, policy_version 4145 (0.0008) +[2026-06-07 03:24:16,696][528169] Updated weights for policy 0, policy_version 4157 (0.0008) +[2026-06-07 03:24:17,498][528169] Updated weights for policy 0, policy_version 4170 (0.0008) +[2026-06-07 03:24:17,642][528169] Updated weights for policy 0, policy_version 4181 (0.0008) +[2026-06-07 03:24:17,796][528169] Updated weights for policy 0, policy_version 4193 (0.0008) +[2026-06-07 03:24:17,935][528169] Updated weights for policy 0, policy_version 4203 (0.0008) +[2026-06-07 03:24:18,067][528169] Updated weights for policy 0, policy_version 4214 (0.0008) +[2026-06-07 03:24:18,193][528093] Early stopping after 8 epochs (64 sgd steps), loss delta 0.0000006 +[2026-06-07 03:24:18,854][528169] Updated weights for policy 0, policy_version 4227 (0.0008) +[2026-06-07 03:24:19,012][528169] Updated weights for policy 0, policy_version 4239 (0.0008) +[2026-06-07 03:24:19,166][528169] Updated weights for policy 0, policy_version 4251 (0.0008) +[2026-06-07 03:24:19,287][528169] Updated weights for policy 0, policy_version 4261 (0.0008) +[2026-06-07 03:24:19,428][528169] Updated weights for policy 0, policy_version 4272 (0.0008) +[2026-06-07 03:24:19,563][528169] Updated weights for policy 0, policy_version 4283 (0.0008) +[2026-06-07 03:24:20,103][527010] Fps is (10 sec: 22938.0, 60 sec: 22391.5, 300 sec: 22228.8). Total num frames: 2195456. Throughput: 0: 22405.8. Samples: 2189952. Policy #0 lag: (min: 63.0, avg: 73.0, max: 127.0) +[2026-06-07 03:24:20,106][527010] Avg episode reward: [(0, '14.257')] +[2026-06-07 03:24:20,117][528093] Saving new best policy, reward=14.257! +[2026-06-07 03:24:20,319][528169] Updated weights for policy 0, policy_version 4295 (0.0008) +[2026-06-07 03:24:20,473][528169] Updated weights for policy 0, policy_version 4307 (0.0008) +[2026-06-07 03:24:20,632][528169] Updated weights for policy 0, policy_version 4319 (0.0008) +[2026-06-07 03:24:20,758][528169] Updated weights for policy 0, policy_version 4329 (0.0008) +[2026-06-07 03:24:20,900][528169] Updated weights for policy 0, policy_version 4340 (0.0008) +[2026-06-07 03:24:21,034][528169] Updated weights for policy 0, policy_version 4350 (0.0008) +[2026-06-07 03:24:21,770][528169] Updated weights for policy 0, policy_version 4361 (0.0009) +[2026-06-07 03:24:21,940][528169] Updated weights for policy 0, policy_version 4374 (0.0008) +[2026-06-07 03:24:22,062][528169] Updated weights for policy 0, policy_version 4384 (0.0008) +[2026-06-07 03:24:22,191][528169] Updated weights for policy 0, policy_version 4394 (0.0008) +[2026-06-07 03:24:22,314][528169] Updated weights for policy 0, policy_version 4404 (0.0008) +[2026-06-07 03:24:22,459][528169] Updated weights for policy 0, policy_version 4414 (0.0008) +[2026-06-07 03:24:23,195][528169] Updated weights for policy 0, policy_version 4425 (0.0008) +[2026-06-07 03:24:23,335][528169] Updated weights for policy 0, policy_version 4436 (0.0009) +[2026-06-07 03:24:23,501][528169] Updated weights for policy 0, policy_version 4449 (0.0008) +[2026-06-07 03:24:23,638][528169] Updated weights for policy 0, policy_version 4459 (0.0008) +[2026-06-07 03:24:23,777][528169] Updated weights for policy 0, policy_version 4470 (0.0008) +[2026-06-07 03:24:24,520][528169] Updated weights for policy 0, policy_version 4482 (0.0009) +[2026-06-07 03:24:24,652][528169] Updated weights for policy 0, policy_version 4493 (0.0008) +[2026-06-07 03:24:24,776][528169] Updated weights for policy 0, policy_version 4503 (0.0008) +[2026-06-07 03:24:24,926][528169] Updated weights for policy 0, policy_version 4515 (0.0008) +[2026-06-07 03:24:25,069][528169] Updated weights for policy 0, policy_version 4526 (0.0006) +[2026-06-07 03:24:25,103][527010] Fps is (10 sec: 22938.3, 60 sec: 22391.4, 300 sec: 22105.1). Total num frames: 2293760. Throughput: 0: 22542.3. Samples: 2333312. Policy #0 lag: (min: 63.0, avg: 73.6, max: 127.0) +[2026-06-07 03:24:25,104][527010] Avg episode reward: [(0, '13.758')] +[2026-06-07 03:24:25,232][528169] Updated weights for policy 0, policy_version 4538 (0.0008) +[2026-06-07 03:24:25,970][528169] Updated weights for policy 0, policy_version 4549 (0.0008) +[2026-06-07 03:24:26,103][528169] Updated weights for policy 0, policy_version 4560 (0.0008) +[2026-06-07 03:24:26,272][528169] Updated weights for policy 0, policy_version 4573 (0.0008) +[2026-06-07 03:24:26,423][528169] Updated weights for policy 0, policy_version 4584 (0.0008) +[2026-06-07 03:24:26,545][528169] Updated weights for policy 0, policy_version 4594 (0.0008) +[2026-06-07 03:24:26,684][528169] Updated weights for policy 0, policy_version 4604 (0.0008) +[2026-06-07 03:24:27,427][528169] Updated weights for policy 0, policy_version 4614 (0.0008) +[2026-06-07 03:24:27,579][528169] Updated weights for policy 0, policy_version 4626 (0.0008) +[2026-06-07 03:24:27,721][528169] Updated weights for policy 0, policy_version 4638 (0.0008) +[2026-06-07 03:24:27,847][528169] Updated weights for policy 0, policy_version 4648 (0.0008) +[2026-06-07 03:24:28,008][528169] Updated weights for policy 0, policy_version 4660 (0.0008) +[2026-06-07 03:24:28,149][528169] Updated weights for policy 0, policy_version 4672 (0.0008) +[2026-06-07 03:24:28,915][528169] Updated weights for policy 0, policy_version 4683 (0.0008) +[2026-06-07 03:24:29,068][528169] Updated weights for policy 0, policy_version 4696 (0.0008) +[2026-06-07 03:24:29,217][528169] Updated weights for policy 0, policy_version 4707 (0.0008) +[2026-06-07 03:24:29,374][528169] Updated weights for policy 0, policy_version 4720 (0.0008) +[2026-06-07 03:24:29,552][528169] Updated weights for policy 0, policy_version 4734 (0.0008) +[2026-06-07 03:24:30,103][527010] Fps is (10 sec: 22937.7, 60 sec: 22391.5, 300 sec: 22294.0). Total num frames: 2424832. Throughput: 0: 22590.6. Samples: 2404096. Policy #0 lag: (min: 111.0, avg: 131.7, max: 186.0) +[2026-06-07 03:24:30,105][527010] Avg episode reward: [(0, '18.884')] +[2026-06-07 03:24:30,109][528093] Saving new best policy, reward=18.884! +[2026-06-07 03:24:30,390][528169] Updated weights for policy 0, policy_version 4749 (0.0008) +[2026-06-07 03:24:30,565][528169] Updated weights for policy 0, policy_version 4763 (0.0009) +[2026-06-07 03:24:30,743][528169] Updated weights for policy 0, policy_version 4777 (0.0008) +[2026-06-07 03:24:30,928][528169] Updated weights for policy 0, policy_version 4792 (0.0008) +[2026-06-07 03:24:31,713][528169] Updated weights for policy 0, policy_version 4804 (0.0008) +[2026-06-07 03:24:31,848][528169] Updated weights for policy 0, policy_version 4815 (0.0009) +[2026-06-07 03:24:32,013][528169] Updated weights for policy 0, policy_version 4829 (0.0008) +[2026-06-07 03:24:32,173][528169] Updated weights for policy 0, policy_version 4842 (0.0008) +[2026-06-07 03:24:32,325][528169] Updated weights for policy 0, policy_version 4853 (0.0008) +[2026-06-07 03:24:33,089][528169] Updated weights for policy 0, policy_version 4865 (0.0008) +[2026-06-07 03:24:33,217][528169] Updated weights for policy 0, policy_version 4875 (0.0009) +[2026-06-07 03:24:33,361][528169] Updated weights for policy 0, policy_version 4886 (0.0009) +[2026-06-07 03:24:33,522][528169] Updated weights for policy 0, policy_version 4899 (0.0008) +[2026-06-07 03:24:33,681][528169] Updated weights for policy 0, policy_version 4911 (0.0008) +[2026-06-07 03:24:33,851][528169] Updated weights for policy 0, policy_version 4924 (0.0009) +[2026-06-07 03:24:34,565][528169] Updated weights for policy 0, policy_version 4934 (0.0008) +[2026-06-07 03:24:34,702][528169] Updated weights for policy 0, policy_version 4946 (0.0008) +[2026-06-07 03:24:34,852][528169] Updated weights for policy 0, policy_version 4957 (0.0009) +[2026-06-07 03:24:35,004][528169] Updated weights for policy 0, policy_version 4969 (0.0009) +[2026-06-07 03:24:35,103][527010] Fps is (10 sec: 22937.6, 60 sec: 22391.5, 300 sec: 22178.3). Total num frames: 2523136. Throughput: 0: 22599.1. Samples: 2541056. Policy #0 lag: (min: 63.0, avg: 73.6, max: 127.0) +[2026-06-07 03:24:35,104][527010] Avg episode reward: [(0, '18.519')] +[2026-06-07 03:24:35,166][528169] Updated weights for policy 0, policy_version 4981 (0.0009) +[2026-06-07 03:24:35,299][528169] Updated weights for policy 0, policy_version 4992 (0.0008) +[2026-06-07 03:24:36,086][528169] Updated weights for policy 0, policy_version 5004 (0.0008) +[2026-06-07 03:24:36,225][528169] Updated weights for policy 0, policy_version 5015 (0.0008) +[2026-06-07 03:24:36,390][528169] Updated weights for policy 0, policy_version 5028 (0.0008) +[2026-06-07 03:24:36,524][528169] Updated weights for policy 0, policy_version 5038 (0.0008) +[2026-06-07 03:24:36,663][528169] Updated weights for policy 0, policy_version 5049 (0.0008) +[2026-06-07 03:24:37,421][528169] Updated weights for policy 0, policy_version 5063 (0.0008) +[2026-06-07 03:24:37,583][528169] Updated weights for policy 0, policy_version 5076 (0.0008) +[2026-06-07 03:24:37,721][528169] Updated weights for policy 0, policy_version 5086 (0.0008) +[2026-06-07 03:24:37,874][528169] Updated weights for policy 0, policy_version 5098 (0.0008) +[2026-06-07 03:24:38,001][528169] Updated weights for policy 0, policy_version 5108 (0.0008) +[2026-06-07 03:24:38,143][528169] Updated weights for policy 0, policy_version 5119 (0.0008) +[2026-06-07 03:24:38,890][528169] Updated weights for policy 0, policy_version 5131 (0.0008) +[2026-06-07 03:24:39,063][528169] Updated weights for policy 0, policy_version 5144 (0.0008) +[2026-06-07 03:24:39,200][528169] Updated weights for policy 0, policy_version 5155 (0.0008) +[2026-06-07 03:24:39,341][528169] Updated weights for policy 0, policy_version 5166 (0.0008) +[2026-06-07 03:24:39,496][528169] Updated weights for policy 0, policy_version 5178 (0.0008) +[2026-06-07 03:24:40,103][527010] Fps is (10 sec: 22936.7, 60 sec: 22937.5, 300 sec: 22348.1). Total num frames: 2654208. Throughput: 0: 22667.2. Samples: 2676480. Policy #0 lag: (min: 63.0, avg: 73.0, max: 127.0) +[2026-06-07 03:24:40,105][527010] Avg episode reward: [(0, '20.196')] +[2026-06-07 03:24:40,224][528169] Updated weights for policy 0, policy_version 5188 (0.0008) +[2026-06-07 03:24:40,359][528169] Updated weights for policy 0, policy_version 5199 (0.0008) +[2026-06-07 03:24:40,511][528169] Updated weights for policy 0, policy_version 5211 (0.0008) +[2026-06-07 03:24:40,641][528169] Updated weights for policy 0, policy_version 5221 (0.0008) +[2026-06-07 03:24:40,823][528169] Updated weights for policy 0, policy_version 5235 (0.0008) +[2026-06-07 03:24:40,967][528169] Updated weights for policy 0, policy_version 5246 (0.0009) +[2026-06-07 03:24:40,987][528093] Saving new best policy, reward=20.196! +[2026-06-07 03:24:41,701][528169] Updated weights for policy 0, policy_version 5258 (0.0009) +[2026-06-07 03:24:41,839][528169] Updated weights for policy 0, policy_version 5269 (0.0008) +[2026-06-07 03:24:41,975][528169] Updated weights for policy 0, policy_version 5279 (0.0008) +[2026-06-07 03:24:42,124][528169] Updated weights for policy 0, policy_version 5291 (0.0008) +[2026-06-07 03:24:42,269][528169] Updated weights for policy 0, policy_version 5302 (0.0008) +[2026-06-07 03:24:42,392][528169] Updated weights for policy 0, policy_version 5312 (0.0008) +[2026-06-07 03:24:43,075][528169] Updated weights for policy 0, policy_version 5323 (0.0008) +[2026-06-07 03:24:43,232][528169] Updated weights for policy 0, policy_version 5336 (0.0008) +[2026-06-07 03:24:43,381][528169] Updated weights for policy 0, policy_version 5347 (0.0008) +[2026-06-07 03:24:43,521][528169] Updated weights for policy 0, policy_version 5358 (0.0008) +[2026-06-07 03:24:43,656][528169] Updated weights for policy 0, policy_version 5368 (0.0008) +[2026-06-07 03:24:44,381][528169] Updated weights for policy 0, policy_version 5378 (0.0008) +[2026-06-07 03:24:44,538][528169] Updated weights for policy 0, policy_version 5391 (0.0009) +[2026-06-07 03:24:44,694][528169] Updated weights for policy 0, policy_version 5403 (0.0008) +[2026-06-07 03:24:44,849][528169] Updated weights for policy 0, policy_version 5415 (0.0009) +[2026-06-07 03:24:44,979][528169] Updated weights for policy 0, policy_version 5425 (0.0009) +[2026-06-07 03:24:45,103][527010] Fps is (10 sec: 22937.8, 60 sec: 22391.5, 300 sec: 22239.6). Total num frames: 2752512. Throughput: 0: 22704.4. Samples: 2744576. Policy #0 lag: (min: 63.0, avg: 73.0, max: 127.0) +[2026-06-07 03:24:45,104][527010] Avg episode reward: [(0, '21.028')] +[2026-06-07 03:24:45,128][528169] Updated weights for policy 0, policy_version 5436 (0.0008) +[2026-06-07 03:24:45,167][528093] Saving new best policy, reward=21.028! +[2026-06-07 03:24:45,818][528169] Updated weights for policy 0, policy_version 5447 (0.0008) +[2026-06-07 03:24:45,947][528169] Updated weights for policy 0, policy_version 5457 (0.0008) +[2026-06-07 03:24:46,083][528169] Updated weights for policy 0, policy_version 5468 (0.0008) +[2026-06-07 03:24:46,217][528169] Updated weights for policy 0, policy_version 5478 (0.0008) +[2026-06-07 03:24:46,368][528169] Updated weights for policy 0, policy_version 5489 (0.0008) +[2026-06-07 03:24:46,550][528169] Updated weights for policy 0, policy_version 5503 (0.0008) +[2026-06-07 03:24:47,279][528169] Updated weights for policy 0, policy_version 5514 (0.0009) +[2026-06-07 03:24:47,406][528169] Updated weights for policy 0, policy_version 5524 (0.0008) +[2026-06-07 03:24:47,532][528169] Updated weights for policy 0, policy_version 5534 (0.0005) +[2026-06-07 03:24:47,660][528169] Updated weights for policy 0, policy_version 5544 (0.0005) +[2026-06-07 03:24:47,805][528169] Updated weights for policy 0, policy_version 5555 (0.0005) +[2026-06-07 03:24:47,957][528169] Updated weights for policy 0, policy_version 5567 (0.0005) +[2026-06-07 03:24:48,621][528169] Updated weights for policy 0, policy_version 5578 (0.0006) +[2026-06-07 03:24:48,792][528169] Updated weights for policy 0, policy_version 5591 (0.0005) +[2026-06-07 03:24:48,914][528169] Updated weights for policy 0, policy_version 5601 (0.0005) +[2026-06-07 03:24:49,062][528169] Updated weights for policy 0, policy_version 5612 (0.0005) +[2026-06-07 03:24:49,209][528169] Updated weights for policy 0, policy_version 5623 (0.0005) +[2026-06-07 03:24:49,941][528169] Updated weights for policy 0, policy_version 5635 (0.0005) +[2026-06-07 03:24:50,081][528169] Updated weights for policy 0, policy_version 5646 (0.0010) +[2026-06-07 03:24:50,103][527010] Fps is (10 sec: 22938.4, 60 sec: 22937.6, 300 sec: 22394.0). Total num frames: 2883584. Throughput: 0: 22937.6. Samples: 2887424. Policy #0 lag: (min: 63.0, avg: 74.2, max: 127.0) +[2026-06-07 03:24:50,104][527010] Avg episode reward: [(0, '24.945')] +[2026-06-07 03:24:50,223][528169] Updated weights for policy 0, policy_version 5656 (0.0007) +[2026-06-07 03:24:50,370][528169] Updated weights for policy 0, policy_version 5668 (0.0009) +[2026-06-07 03:24:50,512][528169] Updated weights for policy 0, policy_version 5679 (0.0005) +[2026-06-07 03:24:50,650][528169] Updated weights for policy 0, policy_version 5689 (0.0008) +[2026-06-07 03:24:50,726][528093] Saving new best policy, reward=24.945! +[2026-06-07 03:24:51,399][528169] Updated weights for policy 0, policy_version 5701 (0.0009) +[2026-06-07 03:24:51,538][528169] Updated weights for policy 0, policy_version 5712 (0.0009) +[2026-06-07 03:24:51,681][528169] Updated weights for policy 0, policy_version 5723 (0.0009) +[2026-06-07 03:24:51,811][528169] Updated weights for policy 0, policy_version 5733 (0.0008) +[2026-06-07 03:24:51,954][528169] Updated weights for policy 0, policy_version 5744 (0.0008) +[2026-06-07 03:24:52,103][528169] Updated weights for policy 0, policy_version 5755 (0.0009) +[2026-06-07 03:24:52,775][528169] Updated weights for policy 0, policy_version 5765 (0.0009) +[2026-06-07 03:24:52,910][528169] Updated weights for policy 0, policy_version 5776 (0.0008) +[2026-06-07 03:24:53,059][528169] Updated weights for policy 0, policy_version 5787 (0.0008) +[2026-06-07 03:24:53,214][528169] Updated weights for policy 0, policy_version 5800 (0.0009) +[2026-06-07 03:24:53,388][528169] Updated weights for policy 0, policy_version 5813 (0.0012) +[2026-06-07 03:24:53,512][528169] Updated weights for policy 0, policy_version 5823 (0.0009) +[2026-06-07 03:24:54,252][528169] Updated weights for policy 0, policy_version 5834 (0.0008) +[2026-06-07 03:24:54,389][528169] Updated weights for policy 0, policy_version 5845 (0.0008) +[2026-06-07 03:24:54,524][528169] Updated weights for policy 0, policy_version 5855 (0.0009) +[2026-06-07 03:24:54,658][528169] Updated weights for policy 0, policy_version 5865 (0.0008) +[2026-06-07 03:24:54,793][528169] Updated weights for policy 0, policy_version 5876 (0.0008) +[2026-06-07 03:24:54,920][528169] Updated weights for policy 0, policy_version 5886 (0.0008) +[2026-06-07 03:24:55,103][527010] Fps is (10 sec: 26214.1, 60 sec: 22937.6, 300 sec: 22536.8). Total num frames: 3014656. Throughput: 0: 23188.0. Samples: 3032832. Policy #0 lag: (min: 63.0, avg: 74.5, max: 127.0) +[2026-06-07 03:24:55,105][527010] Avg episode reward: [(0, '23.312')] +[2026-06-07 03:24:55,605][528169] Updated weights for policy 0, policy_version 5896 (0.0008) +[2026-06-07 03:24:55,728][528169] Updated weights for policy 0, policy_version 5906 (0.0009) +[2026-06-07 03:24:55,866][528169] Updated weights for policy 0, policy_version 5917 (0.0008) +[2026-06-07 03:24:56,001][528169] Updated weights for policy 0, policy_version 5927 (0.0008) +[2026-06-07 03:24:56,157][528169] Updated weights for policy 0, policy_version 5939 (0.0008) +[2026-06-07 03:24:56,280][528169] Updated weights for policy 0, policy_version 5949 (0.0008) +[2026-06-07 03:24:56,962][528169] Updated weights for policy 0, policy_version 5960 (0.0005) +[2026-06-07 03:24:57,134][528169] Updated weights for policy 0, policy_version 5974 (0.0006) +[2026-06-07 03:24:57,277][528169] Updated weights for policy 0, policy_version 5986 (0.0008) +[2026-06-07 03:24:57,406][528169] Updated weights for policy 0, policy_version 5996 (0.0008) +[2026-06-07 03:24:57,541][528169] Updated weights for policy 0, policy_version 6006 (0.0008) +[2026-06-07 03:24:57,663][528169] Updated weights for policy 0, policy_version 6016 (0.0008) +[2026-06-07 03:24:58,407][528169] Updated weights for policy 0, policy_version 6029 (0.0009) +[2026-06-07 03:24:58,554][528169] Updated weights for policy 0, policy_version 6041 (0.0008) +[2026-06-07 03:24:58,718][528169] Updated weights for policy 0, policy_version 6053 (0.0008) +[2026-06-07 03:24:58,874][528169] Updated weights for policy 0, policy_version 6065 (0.0009) +[2026-06-07 03:24:58,997][528169] Updated weights for policy 0, policy_version 6075 (0.0008) +[2026-06-07 03:24:59,689][528169] Updated weights for policy 0, policy_version 6087 (0.0009) +[2026-06-07 03:24:59,821][528169] Updated weights for policy 0, policy_version 6097 (0.0008) +[2026-06-07 03:24:59,953][528169] Updated weights for policy 0, policy_version 6107 (0.0008) +[2026-06-07 03:25:00,080][528169] Updated weights for policy 0, policy_version 6117 (0.0008) +[2026-06-07 03:25:00,103][527010] Fps is (10 sec: 22937.5, 60 sec: 22937.5, 300 sec: 22433.1). Total num frames: 3112960. Throughput: 0: 23185.2. Samples: 3099904. Policy #0 lag: (min: 63.0, avg: 74.5, max: 127.0) +[2026-06-07 03:25:00,104][527010] Avg episode reward: [(0, '24.744')] +[2026-06-07 03:25:00,224][528169] Updated weights for policy 0, policy_version 6128 (0.0008) +[2026-06-07 03:25:00,370][528169] Updated weights for policy 0, policy_version 6140 (0.0009) +[2026-06-07 03:25:01,048][528169] Updated weights for policy 0, policy_version 6151 (0.0008) +[2026-06-07 03:25:01,177][528169] Updated weights for policy 0, policy_version 6161 (0.0007) +[2026-06-07 03:25:01,341][528169] Updated weights for policy 0, policy_version 6174 (0.0009) +[2026-06-07 03:25:01,485][528169] Updated weights for policy 0, policy_version 6185 (0.0008) +[2026-06-07 03:25:01,651][528169] Updated weights for policy 0, policy_version 6198 (0.0008) +[2026-06-07 03:25:02,334][528169] Updated weights for policy 0, policy_version 6209 (0.0009) +[2026-06-07 03:25:02,483][528169] Updated weights for policy 0, policy_version 6221 (0.0008) +[2026-06-07 03:25:02,618][528169] Updated weights for policy 0, policy_version 6232 (0.0008) +[2026-06-07 03:25:02,750][528169] Updated weights for policy 0, policy_version 6242 (0.0008) +[2026-06-07 03:25:02,917][528169] Updated weights for policy 0, policy_version 6255 (0.0008) +[2026-06-07 03:25:03,069][528169] Updated weights for policy 0, policy_version 6266 (0.0009) +[2026-06-07 03:25:03,752][528169] Updated weights for policy 0, policy_version 6277 (0.0008) +[2026-06-07 03:25:03,891][528169] Updated weights for policy 0, policy_version 6288 (0.0008) +[2026-06-07 03:25:04,046][528169] Updated weights for policy 0, policy_version 6300 (0.0008) +[2026-06-07 03:25:04,165][528169] Updated weights for policy 0, policy_version 6310 (0.0008) +[2026-06-07 03:25:04,324][528169] Updated weights for policy 0, policy_version 6322 (0.0008) +[2026-06-07 03:25:05,087][528169] Updated weights for policy 0, policy_version 6337 (0.0009) +[2026-06-07 03:25:05,103][527010] Fps is (10 sec: 22937.8, 60 sec: 23483.7, 300 sec: 22564.7). Total num frames: 3244032. Throughput: 0: 23307.4. Samples: 3238784. Policy #0 lag: (min: 66.0, avg: 76.8, max: 117.0) +[2026-06-07 03:25:05,104][527010] Avg episode reward: [(0, '24.732')] +[2026-06-07 03:25:05,234][528169] Updated weights for policy 0, policy_version 6349 (0.0009) +[2026-06-07 03:25:05,369][528169] Updated weights for policy 0, policy_version 6360 (0.0008) +[2026-06-07 03:25:05,518][528169] Updated weights for policy 0, policy_version 6371 (0.0008) +[2026-06-07 03:25:05,653][528169] Updated weights for policy 0, policy_version 6381 (0.0008) +[2026-06-07 03:25:05,784][528169] Updated weights for policy 0, policy_version 6392 (0.0008) +[2026-06-07 03:25:06,478][528169] Updated weights for policy 0, policy_version 6405 (0.0008) +[2026-06-07 03:25:06,611][528169] Updated weights for policy 0, policy_version 6415 (0.0008) +[2026-06-07 03:25:06,757][528169] Updated weights for policy 0, policy_version 6427 (0.0008) +[2026-06-07 03:25:06,904][528169] Updated weights for policy 0, policy_version 6438 (0.0008) +[2026-06-07 03:25:07,051][528169] Updated weights for policy 0, policy_version 6450 (0.0008) +[2026-06-07 03:25:07,187][528169] Updated weights for policy 0, policy_version 6460 (0.0008) +[2026-06-07 03:25:07,892][528169] Updated weights for policy 0, policy_version 6471 (0.0008) +[2026-06-07 03:25:08,037][528169] Updated weights for policy 0, policy_version 6482 (0.0009) +[2026-06-07 03:25:08,185][528169] Updated weights for policy 0, policy_version 6493 (0.0008) +[2026-06-07 03:25:08,309][528169] Updated weights for policy 0, policy_version 6503 (0.0009) +[2026-06-07 03:25:08,446][528169] Updated weights for policy 0, policy_version 6514 (0.0008) +[2026-06-07 03:25:08,600][528169] Updated weights for policy 0, policy_version 6526 (0.0008) +[2026-06-07 03:25:09,295][528169] Updated weights for policy 0, policy_version 6536 (0.0008) +[2026-06-07 03:25:09,423][528169] Updated weights for policy 0, policy_version 6546 (0.0008) +[2026-06-07 03:25:09,562][528169] Updated weights for policy 0, policy_version 6557 (0.0008) +[2026-06-07 03:25:09,730][528169] Updated weights for policy 0, policy_version 6570 (0.0008) +[2026-06-07 03:25:09,867][528169] Updated weights for policy 0, policy_version 6581 (0.0008) +[2026-06-07 03:25:10,103][527010] Fps is (10 sec: 26214.4, 60 sec: 23483.8, 300 sec: 22687.3). Total num frames: 3375104. Throughput: 0: 23486.5. Samples: 3390208. Policy #0 lag: (min: 90.0, avg: 116.8, max: 153.0) +[2026-06-07 03:25:10,105][527010] Avg episode reward: [(0, '29.419')] +[2026-06-07 03:25:10,110][528093] Saving new best policy, reward=29.419! +[2026-06-07 03:25:10,616][528169] Updated weights for policy 0, policy_version 6596 (0.0008) +[2026-06-07 03:25:10,757][528169] Updated weights for policy 0, policy_version 6607 (0.0009) +[2026-06-07 03:25:10,884][528169] Updated weights for policy 0, policy_version 6617 (0.0008) +[2026-06-07 03:25:11,029][528169] Updated weights for policy 0, policy_version 6628 (0.0008) +[2026-06-07 03:25:11,183][528169] Updated weights for policy 0, policy_version 6640 (0.0008) +[2026-06-07 03:25:11,320][528169] Updated weights for policy 0, policy_version 6650 (0.0004) +[2026-06-07 03:25:12,017][528169] Updated weights for policy 0, policy_version 6662 (0.0008) +[2026-06-07 03:25:12,172][528169] Updated weights for policy 0, policy_version 6674 (0.0008) +[2026-06-07 03:25:12,314][528169] Updated weights for policy 0, policy_version 6685 (0.0008) +[2026-06-07 03:25:12,454][528169] Updated weights for policy 0, policy_version 6696 (0.0008) +[2026-06-07 03:25:12,578][528169] Updated weights for policy 0, policy_version 6706 (0.0008) +[2026-06-07 03:25:12,740][528169] Updated weights for policy 0, policy_version 6718 (0.0008) +[2026-06-07 03:25:13,418][528169] Updated weights for policy 0, policy_version 6728 (0.0008) +[2026-06-07 03:25:13,572][528169] Updated weights for policy 0, policy_version 6740 (0.0008) +[2026-06-07 03:25:13,703][528169] Updated weights for policy 0, policy_version 6750 (0.0008) +[2026-06-07 03:25:13,840][528169] Updated weights for policy 0, policy_version 6761 (0.0008) +[2026-06-07 03:25:13,985][528169] Updated weights for policy 0, policy_version 6772 (0.0008) +[2026-06-07 03:25:14,128][528169] Updated weights for policy 0, policy_version 6783 (0.0008) +[2026-06-07 03:25:14,798][528169] Updated weights for policy 0, policy_version 6793 (0.0008) +[2026-06-07 03:25:14,945][528169] Updated weights for policy 0, policy_version 6804 (0.0008) +[2026-06-07 03:25:15,103][527010] Fps is (10 sec: 22937.1, 60 sec: 23483.8, 300 sec: 22588.9). Total num frames: 3473408. Throughput: 0: 23438.1. Samples: 3458816. Policy #0 lag: (min: 63.0, avg: 75.0, max: 127.0) +[2026-06-07 03:25:15,105][527010] Avg episode reward: [(0, '28.966')] +[2026-06-07 03:25:15,106][528169] Updated weights for policy 0, policy_version 6817 (0.0008) +[2026-06-07 03:25:15,254][528169] Updated weights for policy 0, policy_version 6828 (0.0008) +[2026-06-07 03:25:15,377][528169] Updated weights for policy 0, policy_version 6838 (0.0008) +[2026-06-07 03:25:16,086][528169] Updated weights for policy 0, policy_version 6849 (0.0008) +[2026-06-07 03:25:16,218][528169] Updated weights for policy 0, policy_version 6860 (0.0008) +[2026-06-07 03:25:16,360][528169] Updated weights for policy 0, policy_version 6871 (0.0008) +[2026-06-07 03:25:16,500][528169] Updated weights for policy 0, policy_version 6882 (0.0008) +[2026-06-07 03:25:16,637][528169] Updated weights for policy 0, policy_version 6892 (0.0008) +[2026-06-07 03:25:16,805][528169] Updated weights for policy 0, policy_version 6905 (0.0009) +[2026-06-07 03:25:17,481][528169] Updated weights for policy 0, policy_version 6915 (0.0009) +[2026-06-07 03:25:17,619][528169] Updated weights for policy 0, policy_version 6926 (0.0008) +[2026-06-07 03:25:17,747][528169] Updated weights for policy 0, policy_version 6937 (0.0007) +[2026-06-07 03:25:17,880][528169] Updated weights for policy 0, policy_version 6947 (0.0008) +[2026-06-07 03:25:18,021][528169] Updated weights for policy 0, policy_version 6958 (0.0008) +[2026-06-07 03:25:18,165][528169] Updated weights for policy 0, policy_version 6969 (0.0008) +[2026-06-07 03:25:18,871][528169] Updated weights for policy 0, policy_version 6981 (0.0008) +[2026-06-07 03:25:19,014][528169] Updated weights for policy 0, policy_version 6992 (0.0005) +[2026-06-07 03:25:19,157][528169] Updated weights for policy 0, policy_version 7003 (0.0008) +[2026-06-07 03:25:19,278][528169] Updated weights for policy 0, policy_version 7013 (0.0008) +[2026-06-07 03:25:19,445][528169] Updated weights for policy 0, policy_version 7026 (0.0008) +[2026-06-07 03:25:19,608][528169] Updated weights for policy 0, policy_version 7039 (0.0006) +[2026-06-07 03:25:20,103][527010] Fps is (10 sec: 22937.9, 60 sec: 23483.8, 300 sec: 22703.1). Total num frames: 3604480. Throughput: 0: 23458.2. Samples: 3596672. Policy #0 lag: (min: 63.0, avg: 75.0, max: 127.0) +[2026-06-07 03:25:20,104][527010] Avg episode reward: [(0, '34.448')] +[2026-06-07 03:25:20,289][528169] Updated weights for policy 0, policy_version 7051 (0.0008) +[2026-06-07 03:25:20,433][528169] Updated weights for policy 0, policy_version 7062 (0.0008) +[2026-06-07 03:25:20,558][528169] Updated weights for policy 0, policy_version 7072 (0.0008) +[2026-06-07 03:25:20,702][528169] Updated weights for policy 0, policy_version 7083 (0.0008) +[2026-06-07 03:25:20,840][528169] Updated weights for policy 0, policy_version 7093 (0.0008) +[2026-06-07 03:25:20,973][528169] Updated weights for policy 0, policy_version 7103 (0.0008) +[2026-06-07 03:25:20,978][528093] Saving new best policy, reward=34.448! +[2026-06-07 03:25:21,644][528169] Updated weights for policy 0, policy_version 7113 (0.0008) +[2026-06-07 03:25:21,773][528169] Updated weights for policy 0, policy_version 7123 (0.0008) +[2026-06-07 03:25:21,902][528169] Updated weights for policy 0, policy_version 7133 (0.0008) +[2026-06-07 03:25:22,054][528169] Updated weights for policy 0, policy_version 7144 (0.0008) +[2026-06-07 03:25:22,191][528169] Updated weights for policy 0, policy_version 7155 (0.0008) +[2026-06-07 03:25:22,333][528169] Updated weights for policy 0, policy_version 7166 (0.0008) +[2026-06-07 03:25:23,005][528169] Updated weights for policy 0, policy_version 7177 (0.0006) +[2026-06-07 03:25:23,131][528169] Updated weights for policy 0, policy_version 7187 (0.0007) +[2026-06-07 03:25:23,290][528169] Updated weights for policy 0, policy_version 7199 (0.0008) +[2026-06-07 03:25:23,416][528169] Updated weights for policy 0, policy_version 7209 (0.0009) +[2026-06-07 03:25:23,557][528169] Updated weights for policy 0, policy_version 7219 (0.0008) +[2026-06-07 03:25:23,695][528169] Updated weights for policy 0, policy_version 7230 (0.0008) +[2026-06-07 03:25:24,337][528169] Updated weights for policy 0, policy_version 7240 (0.0008) +[2026-06-07 03:25:24,496][528169] Updated weights for policy 0, policy_version 7252 (0.0008) +[2026-06-07 03:25:24,637][528169] Updated weights for policy 0, policy_version 7263 (0.0008) +[2026-06-07 03:25:24,787][528169] Updated weights for policy 0, policy_version 7274 (0.0008) +[2026-06-07 03:25:24,939][528169] Updated weights for policy 0, policy_version 7285 (0.0008) +[2026-06-07 03:25:25,070][528169] Updated weights for policy 0, policy_version 7296 (0.0009) +[2026-06-07 03:25:25,103][527010] Fps is (10 sec: 26214.4, 60 sec: 24029.8, 300 sec: 22810.3). Total num frames: 3735552. Throughput: 0: 23853.6. Samples: 3749888. Policy #0 lag: (min: 37.0, avg: 48.9, max: 101.0) +[2026-06-07 03:25:25,105][527010] Avg episode reward: [(0, '30.063')] +[2026-06-07 03:25:25,752][528169] Updated weights for policy 0, policy_version 7306 (0.0008) +[2026-06-07 03:25:25,881][528169] Updated weights for policy 0, policy_version 7316 (0.0008) +[2026-06-07 03:25:26,035][528169] Updated weights for policy 0, policy_version 7328 (0.0008) +[2026-06-07 03:25:26,169][528169] Updated weights for policy 0, policy_version 7338 (0.0008) +[2026-06-07 03:25:26,330][528169] Updated weights for policy 0, policy_version 7350 (0.0008) +[2026-06-07 03:25:27,030][528169] Updated weights for policy 0, policy_version 7362 (0.0009) +[2026-06-07 03:25:27,165][528169] Updated weights for policy 0, policy_version 7373 (0.0008) +[2026-06-07 03:25:27,295][528169] Updated weights for policy 0, policy_version 7383 (0.0008) +[2026-06-07 03:25:27,434][528169] Updated weights for policy 0, policy_version 7394 (0.0008) +[2026-06-07 03:25:27,572][528169] Updated weights for policy 0, policy_version 7404 (0.0008) +[2026-06-07 03:25:27,712][528169] Updated weights for policy 0, policy_version 7415 (0.0008) +[2026-06-07 03:25:28,380][528169] Updated weights for policy 0, policy_version 7425 (0.0008) +[2026-06-07 03:25:28,527][528169] Updated weights for policy 0, policy_version 7437 (0.0008) +[2026-06-07 03:25:28,675][528169] Updated weights for policy 0, policy_version 7448 (0.0008) +[2026-06-07 03:25:28,834][528169] Updated weights for policy 0, policy_version 7460 (0.0008) +[2026-06-07 03:25:28,978][528169] Updated weights for policy 0, policy_version 7471 (0.0008) +[2026-06-07 03:25:29,104][528169] Updated weights for policy 0, policy_version 7481 (0.0008) +[2026-06-07 03:25:29,801][528169] Updated weights for policy 0, policy_version 7492 (0.0008) +[2026-06-07 03:25:29,929][528169] Updated weights for policy 0, policy_version 7502 (0.0008) +[2026-06-07 03:25:30,071][528169] Updated weights for policy 0, policy_version 7513 (0.0008) +[2026-06-07 03:25:30,103][527010] Fps is (10 sec: 22937.6, 60 sec: 23483.7, 300 sec: 22717.0). Total num frames: 3833856. Throughput: 0: 23864.9. Samples: 3818496. Policy #0 lag: (min: 63.0, avg: 74.1, max: 127.0) +[2026-06-07 03:25:30,104][527010] Avg episode reward: [(0, '35.286')] +[2026-06-07 03:25:30,216][528169] Updated weights for policy 0, policy_version 7524 (0.0009) +[2026-06-07 03:25:30,337][528169] Updated weights for policy 0, policy_version 7534 (0.0009) +[2026-06-07 03:25:30,487][528169] Updated weights for policy 0, policy_version 7545 (0.0008) +[2026-06-07 03:25:30,572][528093] Saving new best policy, reward=35.286! +[2026-06-07 03:25:31,157][528169] Updated weights for policy 0, policy_version 7555 (0.0008) +[2026-06-07 03:25:31,310][528169] Updated weights for policy 0, policy_version 7567 (0.0004) +[2026-06-07 03:25:31,463][528169] Updated weights for policy 0, policy_version 7579 (0.0004) +[2026-06-07 03:25:31,602][528169] Updated weights for policy 0, policy_version 7590 (0.0004) +[2026-06-07 03:25:31,754][528169] Updated weights for policy 0, policy_version 7601 (0.0004) +[2026-06-07 03:25:31,881][528169] Updated weights for policy 0, policy_version 7611 (0.0004) +[2026-06-07 03:25:32,539][528169] Updated weights for policy 0, policy_version 7623 (0.0004) +[2026-06-07 03:25:32,711][528169] Updated weights for policy 0, policy_version 7636 (0.0004) +[2026-06-07 03:25:32,854][528169] Updated weights for policy 0, policy_version 7647 (0.0008) +[2026-06-07 03:25:32,988][528169] Updated weights for policy 0, policy_version 7657 (0.0008) +[2026-06-07 03:25:33,142][528169] Updated weights for policy 0, policy_version 7669 (0.0008) +[2026-06-07 03:25:33,267][528169] Updated weights for policy 0, policy_version 7679 (0.0008) +[2026-06-07 03:25:33,923][528169] Updated weights for policy 0, policy_version 7689 (0.0008) +[2026-06-07 03:25:34,059][528169] Updated weights for policy 0, policy_version 7699 (0.0008) +[2026-06-07 03:25:34,186][528169] Updated weights for policy 0, policy_version 7709 (0.0008) +[2026-06-07 03:25:34,322][528169] Updated weights for policy 0, policy_version 7720 (0.0008) +[2026-06-07 03:25:34,470][528169] Updated weights for policy 0, policy_version 7731 (0.0008) +[2026-06-07 03:25:34,600][528169] Updated weights for policy 0, policy_version 7741 (0.0008) +[2026-06-07 03:25:35,103][527010] Fps is (10 sec: 22938.1, 60 sec: 24029.9, 300 sec: 22817.6). Total num frames: 3964928. Throughput: 0: 23776.7. Samples: 3957376. Policy #0 lag: (min: 63.0, avg: 74.1, max: 127.0) +[2026-06-07 03:25:35,104][527010] Avg episode reward: [(0, '38.405')] +[2026-06-07 03:25:35,250][528169] Updated weights for policy 0, policy_version 7751 (0.0008) +[2026-06-07 03:25:35,397][528169] Updated weights for policy 0, policy_version 7762 (0.0008) +[2026-06-07 03:25:35,533][528169] Updated weights for policy 0, policy_version 7772 (0.0009) +[2026-06-07 03:25:35,691][528169] Updated weights for policy 0, policy_version 7783 (0.0008) +[2026-06-07 03:25:35,830][528169] Updated weights for policy 0, policy_version 7794 (0.0008) +[2026-06-07 03:25:35,979][528169] Updated weights for policy 0, policy_version 7806 (0.0008) +[2026-06-07 03:25:36,001][528093] Saving new best policy, reward=38.405! +[2026-06-07 03:25:36,627][528169] Updated weights for policy 0, policy_version 7816 (0.0008) +[2026-06-07 03:25:36,764][528169] Updated weights for policy 0, policy_version 7826 (0.0008) +[2026-06-07 03:25:36,900][528169] Updated weights for policy 0, policy_version 7837 (0.0008) +[2026-06-07 03:25:37,045][528169] Updated weights for policy 0, policy_version 7848 (0.0008) +[2026-06-07 03:25:37,176][528169] Updated weights for policy 0, policy_version 7858 (0.0008) +[2026-06-07 03:25:37,323][528169] Updated weights for policy 0, policy_version 7869 (0.0008) +[2026-06-07 03:25:38,028][528169] Updated weights for policy 0, policy_version 7881 (0.0008) +[2026-06-07 03:25:38,189][528169] Updated weights for policy 0, policy_version 7894 (0.0008) +[2026-06-07 03:25:38,325][528169] Updated weights for policy 0, policy_version 7905 (0.0008) +[2026-06-07 03:25:38,457][528169] Updated weights for policy 0, policy_version 7915 (0.0008) +[2026-06-07 03:25:38,609][528169] Updated weights for policy 0, policy_version 7927 (0.0008) +[2026-06-07 03:25:39,297][528169] Updated weights for policy 0, policy_version 7937 (0.0008) +[2026-06-07 03:25:39,457][528169] Updated weights for policy 0, policy_version 7950 (0.0008) +[2026-06-07 03:25:39,634][528169] Updated weights for policy 0, policy_version 7964 (0.0008) +[2026-06-07 03:25:39,794][528169] Updated weights for policy 0, policy_version 7977 (0.0008) +[2026-06-07 03:25:39,936][528169] Updated weights for policy 0, policy_version 7988 (0.0008) +[2026-06-07 03:25:40,066][528169] Updated weights for policy 0, policy_version 7998 (0.0008) +[2026-06-07 03:25:40,103][527010] Fps is (10 sec: 26214.5, 60 sec: 24030.1, 300 sec: 22912.6). Total num frames: 4096000. Throughput: 0: 23947.4. Samples: 4110464. Policy #0 lag: (min: 55.0, avg: 67.6, max: 119.0) +[2026-06-07 03:25:40,104][527010] Avg episode reward: [(0, '40.834')] +[2026-06-07 03:25:40,109][528093] Saving new best policy, reward=40.834! +[2026-06-07 03:25:40,747][528169] Updated weights for policy 0, policy_version 8008 (0.0008) +[2026-06-07 03:25:40,881][528169] Updated weights for policy 0, policy_version 8018 (0.0008) +[2026-06-07 03:25:41,016][528169] Updated weights for policy 0, policy_version 8029 (0.0008) +[2026-06-07 03:25:41,144][528169] Updated weights for policy 0, policy_version 8039 (0.0009) +[2026-06-07 03:25:41,279][528169] Updated weights for policy 0, policy_version 8049 (0.0008) +[2026-06-07 03:25:41,407][528169] Updated weights for policy 0, policy_version 8059 (0.0008) +[2026-06-07 03:25:42,046][528169] Updated weights for policy 0, policy_version 8069 (0.0009) +[2026-06-07 03:25:42,193][528169] Updated weights for policy 0, policy_version 8080 (0.0008) +[2026-06-07 03:25:42,340][528169] Updated weights for policy 0, policy_version 8091 (0.0008) +[2026-06-07 03:25:42,461][528169] Updated weights for policy 0, policy_version 8101 (0.0008) +[2026-06-07 03:25:42,592][528169] Updated weights for policy 0, policy_version 8111 (0.0008) +[2026-06-07 03:25:42,752][528169] Updated weights for policy 0, policy_version 8123 (0.0008) +[2026-06-07 03:25:43,397][528169] Updated weights for policy 0, policy_version 8133 (0.0008) +[2026-06-07 03:25:43,564][528169] Updated weights for policy 0, policy_version 8146 (0.0008) +[2026-06-07 03:25:43,691][528169] Updated weights for policy 0, policy_version 8156 (0.0008) +[2026-06-07 03:25:43,828][528169] Updated weights for policy 0, policy_version 8166 (0.0008) +[2026-06-07 03:25:43,981][528169] Updated weights for policy 0, policy_version 8177 (0.0008) +[2026-06-07 03:25:44,135][528169] Updated weights for policy 0, policy_version 8189 (0.0006) +[2026-06-07 03:25:44,787][528169] Updated weights for policy 0, policy_version 8200 (0.0008) +[2026-06-07 03:25:44,921][528169] Updated weights for policy 0, policy_version 8210 (0.0008) +[2026-06-07 03:25:45,047][528169] Updated weights for policy 0, policy_version 8220 (0.0008) +[2026-06-07 03:25:45,103][527010] Fps is (10 sec: 22937.7, 60 sec: 24029.9, 300 sec: 22824.1). Total num frames: 4194304. Throughput: 0: 23973.0. Samples: 4178688. Policy #0 lag: (min: 55.0, avg: 67.6, max: 119.0) +[2026-06-07 03:25:45,104][527010] Avg episode reward: [(0, '39.765')] +[2026-06-07 03:25:45,184][528169] Updated weights for policy 0, policy_version 8231 (0.0008) +[2026-06-07 03:25:45,336][528169] Updated weights for policy 0, policy_version 8242 (0.0008) +[2026-06-07 03:25:45,465][528169] Updated weights for policy 0, policy_version 8252 (0.0007) +[2026-06-07 03:25:46,139][528169] Updated weights for policy 0, policy_version 8262 (0.0007) +[2026-06-07 03:25:46,272][528169] Updated weights for policy 0, policy_version 8272 (0.0008) +[2026-06-07 03:25:46,400][528169] Updated weights for policy 0, policy_version 8282 (0.0008) +[2026-06-07 03:25:46,551][528169] Updated weights for policy 0, policy_version 8294 (0.0008) +[2026-06-07 03:25:46,685][528169] Updated weights for policy 0, policy_version 8304 (0.0008) +[2026-06-07 03:25:46,816][528169] Updated weights for policy 0, policy_version 8314 (0.0008) +[2026-06-07 03:25:47,502][528169] Updated weights for policy 0, policy_version 8326 (0.0010) +[2026-06-07 03:25:47,624][528169] Updated weights for policy 0, policy_version 8336 (0.0008) +[2026-06-07 03:25:47,786][528169] Updated weights for policy 0, policy_version 8348 (0.0008) +[2026-06-07 03:25:47,924][528169] Updated weights for policy 0, policy_version 8359 (0.0008) +[2026-06-07 03:25:48,056][528169] Updated weights for policy 0, policy_version 8369 (0.0009) +[2026-06-07 03:25:48,186][528169] Updated weights for policy 0, policy_version 8379 (0.0009) +[2026-06-07 03:25:48,845][528169] Updated weights for policy 0, policy_version 8389 (0.0008) +[2026-06-07 03:25:48,978][528169] Updated weights for policy 0, policy_version 8399 (0.0008) +[2026-06-07 03:25:49,135][528169] Updated weights for policy 0, policy_version 8411 (0.0008) +[2026-06-07 03:25:49,252][528169] Updated weights for policy 0, policy_version 8421 (0.0009) +[2026-06-07 03:25:49,413][528169] Updated weights for policy 0, policy_version 8433 (0.0009) +[2026-06-07 03:25:49,586][528169] Updated weights for policy 0, policy_version 8446 (0.0009) +[2026-06-07 03:25:50,103][527010] Fps is (10 sec: 22937.2, 60 sec: 24029.8, 300 sec: 22913.9). Total num frames: 4325376. Throughput: 0: 23964.4. Samples: 4317184. Policy #0 lag: (min: 63.0, avg: 76.3, max: 127.0) +[2026-06-07 03:25:50,104][527010] Avg episode reward: [(0, '34.346')] +[2026-06-07 03:25:50,262][528169] Updated weights for policy 0, policy_version 8457 (0.0008) +[2026-06-07 03:25:50,393][528169] Updated weights for policy 0, policy_version 8467 (0.0009) +[2026-06-07 03:25:50,526][528169] Updated weights for policy 0, policy_version 8477 (0.0008) +[2026-06-07 03:25:50,651][528169] Updated weights for policy 0, policy_version 8487 (0.0008) +[2026-06-07 03:25:50,795][528169] Updated weights for policy 0, policy_version 8498 (0.0008) +[2026-06-07 03:25:50,944][528169] Updated weights for policy 0, policy_version 8510 (0.0008) +[2026-06-07 03:25:51,580][528169] Updated weights for policy 0, policy_version 8520 (0.0008) +[2026-06-07 03:25:51,726][528169] Updated weights for policy 0, policy_version 8531 (0.0008) +[2026-06-07 03:25:51,873][528169] Updated weights for policy 0, policy_version 8543 (0.0008) +[2026-06-07 03:25:52,013][528169] Updated weights for policy 0, policy_version 8554 (0.0008) +[2026-06-07 03:25:52,174][528169] Updated weights for policy 0, policy_version 8566 (0.0008) +[2026-06-07 03:25:52,298][528169] Updated weights for policy 0, policy_version 8576 (0.0008) +[2026-06-07 03:25:52,980][528169] Updated weights for policy 0, policy_version 8588 (0.0008) +[2026-06-07 03:25:53,137][528169] Updated weights for policy 0, policy_version 8600 (0.0008) +[2026-06-07 03:25:53,277][528169] Updated weights for policy 0, policy_version 8611 (0.0008) +[2026-06-07 03:25:53,411][528169] Updated weights for policy 0, policy_version 8621 (0.0008) +[2026-06-07 03:25:53,542][528169] Updated weights for policy 0, policy_version 8631 (0.0008) +[2026-06-07 03:25:54,215][528169] Updated weights for policy 0, policy_version 8642 (0.0008) +[2026-06-07 03:25:54,349][528169] Updated weights for policy 0, policy_version 8652 (0.0009) +[2026-06-07 03:25:54,475][528169] Updated weights for policy 0, policy_version 8662 (0.0008) +[2026-06-07 03:25:54,623][528169] Updated weights for policy 0, policy_version 8674 (0.0008) +[2026-06-07 03:25:54,766][528169] Updated weights for policy 0, policy_version 8685 (0.0008) +[2026-06-07 03:25:54,888][528169] Updated weights for policy 0, policy_version 8695 (0.0008) +[2026-06-07 03:25:55,103][527010] Fps is (10 sec: 26214.4, 60 sec: 24029.9, 300 sec: 22999.1). Total num frames: 4456448. Throughput: 0: 23978.7. Samples: 4469248. Policy #0 lag: (min: 63.0, avg: 74.2, max: 127.0) +[2026-06-07 03:25:55,104][527010] Avg episode reward: [(0, '40.051')] +[2026-06-07 03:25:55,565][528169] Updated weights for policy 0, policy_version 8705 (0.0008) +[2026-06-07 03:25:55,693][528169] Updated weights for policy 0, policy_version 8715 (0.0008) +[2026-06-07 03:25:55,837][528169] Updated weights for policy 0, policy_version 8726 (0.0009) +[2026-06-07 03:25:55,990][528169] Updated weights for policy 0, policy_version 8738 (0.0008) +[2026-06-07 03:25:56,124][528169] Updated weights for policy 0, policy_version 8748 (0.0008) +[2026-06-07 03:25:56,258][528169] Updated weights for policy 0, policy_version 8759 (0.0008) +[2026-06-07 03:25:56,991][528169] Updated weights for policy 0, policy_version 8774 (0.0008) +[2026-06-07 03:25:57,129][528169] Updated weights for policy 0, policy_version 8786 (0.0008) +[2026-06-07 03:25:57,295][528169] Updated weights for policy 0, policy_version 8799 (0.0008) +[2026-06-07 03:25:57,432][528169] Updated weights for policy 0, policy_version 8810 (0.0008) +[2026-06-07 03:25:57,587][528169] Updated weights for policy 0, policy_version 8822 (0.0008) +[2026-06-07 03:25:58,260][528169] Updated weights for policy 0, policy_version 8833 (0.0008) +[2026-06-07 03:25:58,407][528169] Updated weights for policy 0, policy_version 8845 (0.0008) +[2026-06-07 03:25:58,532][528169] Updated weights for policy 0, policy_version 8855 (0.0008) +[2026-06-07 03:25:58,663][528169] Updated weights for policy 0, policy_version 8865 (0.0008) +[2026-06-07 03:25:58,831][528169] Updated weights for policy 0, policy_version 8878 (0.0008) +[2026-06-07 03:25:58,993][528169] Updated weights for policy 0, policy_version 8890 (0.0005) +[2026-06-07 03:25:59,677][528169] Updated weights for policy 0, policy_version 8901 (0.0006) +[2026-06-07 03:25:59,816][528169] Updated weights for policy 0, policy_version 8912 (0.0008) +[2026-06-07 03:25:59,977][528169] Updated weights for policy 0, policy_version 8924 (0.0008) +[2026-06-07 03:26:00,103][527010] Fps is (10 sec: 22937.9, 60 sec: 24029.9, 300 sec: 22915.1). Total num frames: 4554752. Throughput: 0: 23967.4. Samples: 4537344. Policy #0 lag: (min: 63.0, avg: 74.2, max: 127.0) +[2026-06-07 03:26:00,104][528169] Updated weights for policy 0, policy_version 8934 (0.0008) +[2026-06-07 03:26:00,104][527010] Avg episode reward: [(0, '42.001')] +[2026-06-07 03:26:00,236][528169] Updated weights for policy 0, policy_version 8944 (0.0008) +[2026-06-07 03:26:00,372][528169] Updated weights for policy 0, policy_version 8954 (0.0008) +[2026-06-07 03:26:00,438][528093] Saving new best policy, reward=42.001! +[2026-06-07 03:26:01,014][528169] Updated weights for policy 0, policy_version 8964 (0.0008) +[2026-06-07 03:26:01,162][528169] Updated weights for policy 0, policy_version 8976 (0.0008) +[2026-06-07 03:26:01,311][528169] Updated weights for policy 0, policy_version 8988 (0.0008) +[2026-06-07 03:26:01,465][528169] Updated weights for policy 0, policy_version 9000 (0.0008) +[2026-06-07 03:26:01,605][528169] Updated weights for policy 0, policy_version 9011 (0.0008) +[2026-06-07 03:26:01,749][528169] Updated weights for policy 0, policy_version 9022 (0.0008) +[2026-06-07 03:26:02,431][528169] Updated weights for policy 0, policy_version 9033 (0.0008) +[2026-06-07 03:26:02,575][528169] Updated weights for policy 0, policy_version 9044 (0.0008) +[2026-06-07 03:26:02,731][528169] Updated weights for policy 0, policy_version 9056 (0.0009) +[2026-06-07 03:26:02,926][528169] Updated weights for policy 0, policy_version 9072 (0.0008) +[2026-06-07 03:26:03,064][528169] Updated weights for policy 0, policy_version 9083 (0.0009) +[2026-06-07 03:26:03,760][528169] Updated weights for policy 0, policy_version 9093 (0.0009) +[2026-06-07 03:26:03,933][528169] Updated weights for policy 0, policy_version 9106 (0.0009) +[2026-06-07 03:26:04,058][528169] Updated weights for policy 0, policy_version 9116 (0.0008) +[2026-06-07 03:26:04,231][528169] Updated weights for policy 0, policy_version 9129 (0.0009) +[2026-06-07 03:26:04,368][528169] Updated weights for policy 0, policy_version 9139 (0.0009) +[2026-06-07 03:26:04,508][528169] Updated weights for policy 0, policy_version 9150 (0.0008) +[2026-06-07 03:26:05,103][527010] Fps is (10 sec: 22937.6, 60 sec: 24029.9, 300 sec: 22996.1). Total num frames: 4685824. Throughput: 0: 24004.3. Samples: 4676864. Policy #0 lag: (min: 63.0, avg: 75.1, max: 127.0) +[2026-06-07 03:26:05,104][527010] Avg episode reward: [(0, '41.290')] +[2026-06-07 03:26:05,178][528169] Updated weights for policy 0, policy_version 9160 (0.0011) +[2026-06-07 03:26:05,339][528169] Updated weights for policy 0, policy_version 9173 (0.0009) +[2026-06-07 03:26:05,482][528169] Updated weights for policy 0, policy_version 9184 (0.0008) +[2026-06-07 03:26:05,626][528169] Updated weights for policy 0, policy_version 9195 (0.0008) +[2026-06-07 03:26:05,757][528169] Updated weights for policy 0, policy_version 9205 (0.0008) +[2026-06-07 03:26:05,889][528169] Updated weights for policy 0, policy_version 9215 (0.0008) +[2026-06-07 03:26:06,531][528169] Updated weights for policy 0, policy_version 9226 (0.0009) +[2026-06-07 03:26:06,661][528169] Updated weights for policy 0, policy_version 9236 (0.0009) +[2026-06-07 03:26:06,795][528169] Updated weights for policy 0, policy_version 9246 (0.0009) +[2026-06-07 03:26:06,935][528169] Updated weights for policy 0, policy_version 9257 (0.0009) +[2026-06-07 03:26:07,075][528169] Updated weights for policy 0, policy_version 9267 (0.0008) +[2026-06-07 03:26:07,208][528169] Updated weights for policy 0, policy_version 9277 (0.0008) +[2026-06-07 03:26:07,909][528169] Updated weights for policy 0, policy_version 9288 (0.0008) +[2026-06-07 03:26:08,034][528169] Updated weights for policy 0, policy_version 9298 (0.0008) +[2026-06-07 03:26:08,166][528169] Updated weights for policy 0, policy_version 9308 (0.0008) +[2026-06-07 03:26:08,294][528169] Updated weights for policy 0, policy_version 9318 (0.0008) +[2026-06-07 03:26:08,425][528169] Updated weights for policy 0, policy_version 9328 (0.0008) +[2026-06-07 03:26:08,556][528169] Updated weights for policy 0, policy_version 9338 (0.0008) +[2026-06-07 03:26:09,228][528169] Updated weights for policy 0, policy_version 9349 (0.0008) +[2026-06-07 03:26:09,378][528169] Updated weights for policy 0, policy_version 9360 (0.0008) +[2026-06-07 03:26:09,506][528169] Updated weights for policy 0, policy_version 9370 (0.0008) +[2026-06-07 03:26:09,667][528169] Updated weights for policy 0, policy_version 9381 (0.0009) +[2026-06-07 03:26:09,815][528169] Updated weights for policy 0, policy_version 9392 (0.0008) +[2026-06-07 03:26:09,962][528169] Updated weights for policy 0, policy_version 9403 (0.0008) +[2026-06-07 03:26:10,103][527010] Fps is (10 sec: 26214.3, 60 sec: 24029.9, 300 sec: 23073.2). Total num frames: 4816896. Throughput: 0: 23978.8. Samples: 4828928. Policy #0 lag: (min: 63.0, avg: 75.1, max: 127.0) +[2026-06-07 03:26:10,104][527010] Avg episode reward: [(0, '42.183')] +[2026-06-07 03:26:10,113][528093] Saving new best policy, reward=42.183! +[2026-06-07 03:26:10,592][528169] Updated weights for policy 0, policy_version 9413 (0.0008) +[2026-06-07 03:26:10,728][528169] Updated weights for policy 0, policy_version 9424 (0.0008) +[2026-06-07 03:26:10,860][528169] Updated weights for policy 0, policy_version 9434 (0.0008) +[2026-06-07 03:26:11,011][528169] Updated weights for policy 0, policy_version 9446 (0.0008) +[2026-06-07 03:26:11,152][528169] Updated weights for policy 0, policy_version 9457 (0.0008) +[2026-06-07 03:26:11,288][528169] Updated weights for policy 0, policy_version 9467 (0.0008) +[2026-06-07 03:26:11,995][528169] Updated weights for policy 0, policy_version 9478 (0.0008) +[2026-06-07 03:26:12,135][528169] Updated weights for policy 0, policy_version 9489 (0.0008) +[2026-06-07 03:26:12,263][528169] Updated weights for policy 0, policy_version 9499 (0.0008) +[2026-06-07 03:26:12,412][528169] Updated weights for policy 0, policy_version 9510 (0.0008) +[2026-06-07 03:26:12,553][528169] Updated weights for policy 0, policy_version 9521 (0.0008) +[2026-06-07 03:26:12,692][528169] Updated weights for policy 0, policy_version 9532 (0.0009) +[2026-06-07 03:26:13,333][528169] Updated weights for policy 0, policy_version 9542 (0.0008) +[2026-06-07 03:26:13,497][528169] Updated weights for policy 0, policy_version 9554 (0.0008) +[2026-06-07 03:26:13,641][528169] Updated weights for policy 0, policy_version 9565 (0.0008) +[2026-06-07 03:26:13,778][528169] Updated weights for policy 0, policy_version 9575 (0.0008) +[2026-06-07 03:26:13,908][528169] Updated weights for policy 0, policy_version 9585 (0.0008) +[2026-06-07 03:26:14,052][528169] Updated weights for policy 0, policy_version 9596 (0.0008) +[2026-06-07 03:26:14,694][528169] Updated weights for policy 0, policy_version 9606 (0.0008) +[2026-06-07 03:26:14,836][528169] Updated weights for policy 0, policy_version 9617 (0.0008) +[2026-06-07 03:26:14,967][528169] Updated weights for policy 0, policy_version 9627 (0.0007) +[2026-06-07 03:26:15,103][527010] Fps is (10 sec: 22937.6, 60 sec: 24029.9, 300 sec: 22993.4). Total num frames: 4915200. Throughput: 0: 23992.9. Samples: 4898176. Policy #0 lag: (min: 35.0, avg: 79.5, max: 88.0) +[2026-06-07 03:26:15,104][527010] Avg episode reward: [(0, '43.715')] +[2026-06-07 03:26:15,117][528169] Updated weights for policy 0, policy_version 9638 (0.0008) +[2026-06-07 03:26:15,257][528169] Updated weights for policy 0, policy_version 9648 (0.0008) +[2026-06-07 03:26:15,421][528169] Updated weights for policy 0, policy_version 9661 (0.0008) +[2026-06-07 03:26:15,454][528093] Saving new best policy, reward=43.715! +[2026-06-07 03:26:16,073][528169] Updated weights for policy 0, policy_version 9671 (0.0008) +[2026-06-07 03:26:16,225][528169] Updated weights for policy 0, policy_version 9683 (0.0008) +[2026-06-07 03:26:16,370][528169] Updated weights for policy 0, policy_version 9694 (0.0008) +[2026-06-07 03:26:16,512][528169] Updated weights for policy 0, policy_version 9705 (0.0008) +[2026-06-07 03:26:16,650][528169] Updated weights for policy 0, policy_version 9715 (0.0008) +[2026-06-07 03:26:16,789][528169] Updated weights for policy 0, policy_version 9726 (0.0008) +[2026-06-07 03:26:17,459][528169] Updated weights for policy 0, policy_version 9737 (0.0005) +[2026-06-07 03:26:17,583][528169] Updated weights for policy 0, policy_version 9747 (0.0004) +[2026-06-07 03:26:17,743][528169] Updated weights for policy 0, policy_version 9759 (0.0005) +[2026-06-07 03:26:17,893][528169] Updated weights for policy 0, policy_version 9770 (0.0004) +[2026-06-07 03:26:18,049][528169] Updated weights for policy 0, policy_version 9782 (0.0005) +[2026-06-07 03:26:18,675][528169] Updated weights for policy 0, policy_version 9793 (0.0004) +[2026-06-07 03:26:18,800][528169] Updated weights for policy 0, policy_version 9803 (0.0005) +[2026-06-07 03:26:18,937][528169] Updated weights for policy 0, policy_version 9814 (0.0005) +[2026-06-07 03:26:19,082][528169] Updated weights for policy 0, policy_version 9824 (0.0005) +[2026-06-07 03:26:19,215][528169] Updated weights for policy 0, policy_version 9834 (0.0005) +[2026-06-07 03:26:19,373][528169] Updated weights for policy 0, policy_version 9846 (0.0004) +[2026-06-07 03:26:20,030][528169] Updated weights for policy 0, policy_version 9857 (0.0005) +[2026-06-07 03:26:20,103][527010] Fps is (10 sec: 22937.4, 60 sec: 24029.8, 300 sec: 23067.0). Total num frames: 5046272. Throughput: 0: 24009.9. Samples: 5037824. Policy #0 lag: (min: 28.0, avg: 79.7, max: 90.0) +[2026-06-07 03:26:20,105][527010] Avg episode reward: [(0, '42.556')] +[2026-06-07 03:26:20,176][528169] Updated weights for policy 0, policy_version 9868 (0.0005) +[2026-06-07 03:26:20,307][528169] Updated weights for policy 0, policy_version 9878 (0.0005) +[2026-06-07 03:26:20,456][528169] Updated weights for policy 0, policy_version 9889 (0.0008) +[2026-06-07 03:26:20,586][528169] Updated weights for policy 0, policy_version 9899 (0.0008) +[2026-06-07 03:26:20,741][528169] Updated weights for policy 0, policy_version 9911 (0.0008) +[2026-06-07 03:26:21,410][528169] Updated weights for policy 0, policy_version 9922 (0.0008) +[2026-06-07 03:26:21,538][528169] Updated weights for policy 0, policy_version 9932 (0.0007) +[2026-06-07 03:26:21,679][528169] Updated weights for policy 0, policy_version 9943 (0.0007) +[2026-06-07 03:26:21,840][528169] Updated weights for policy 0, policy_version 9956 (0.0007) +[2026-06-07 03:26:21,995][528169] Updated weights for policy 0, policy_version 9967 (0.0007) +[2026-06-07 03:26:22,159][528169] Updated weights for policy 0, policy_version 9980 (0.0007) +[2026-06-07 03:26:22,811][528169] Updated weights for policy 0, policy_version 9990 (0.0008) +[2026-06-07 03:26:22,947][528169] Updated weights for policy 0, policy_version 10000 (0.0008) +[2026-06-07 03:26:23,074][528169] Updated weights for policy 0, policy_version 10011 (0.0008) +[2026-06-07 03:26:23,234][528169] Updated weights for policy 0, policy_version 10023 (0.0008) +[2026-06-07 03:26:23,380][528169] Updated weights for policy 0, policy_version 10034 (0.0008) +[2026-06-07 03:26:23,542][528169] Updated weights for policy 0, policy_version 10046 (0.0008) +[2026-06-07 03:26:24,218][528169] Updated weights for policy 0, policy_version 10056 (0.0008) +[2026-06-07 03:26:24,345][528169] Updated weights for policy 0, policy_version 10066 (0.0008) +[2026-06-07 03:26:24,489][528169] Updated weights for policy 0, policy_version 10077 (0.0008) +[2026-06-07 03:26:24,639][528169] Updated weights for policy 0, policy_version 10089 (0.0008) +[2026-06-07 03:26:24,785][528169] Updated weights for policy 0, policy_version 10100 (0.0008) +[2026-06-07 03:26:24,913][528169] Updated weights for policy 0, policy_version 10110 (0.0008) +[2026-06-07 03:26:25,103][527010] Fps is (10 sec: 26214.5, 60 sec: 24030.0, 300 sec: 23137.3). Total num frames: 5177344. Throughput: 0: 24001.4. Samples: 5190528. Policy #0 lag: (min: 28.0, avg: 79.7, max: 90.0) +[2026-06-07 03:26:25,104][527010] Avg episode reward: [(0, '44.589')] +[2026-06-07 03:26:25,108][528093] Saving new best policy, reward=44.589! +[2026-06-07 03:26:25,602][528169] Updated weights for policy 0, policy_version 10121 (0.0008) +[2026-06-07 03:26:25,761][528169] Updated weights for policy 0, policy_version 10133 (0.0008) +[2026-06-07 03:26:25,917][528169] Updated weights for policy 0, policy_version 10145 (0.0008) +[2026-06-07 03:26:26,053][528169] Updated weights for policy 0, policy_version 10155 (0.0008) +[2026-06-07 03:26:26,208][528169] Updated weights for policy 0, policy_version 10167 (0.0008) +[2026-06-07 03:26:26,922][528169] Updated weights for policy 0, policy_version 10178 (0.0008) +[2026-06-07 03:26:27,044][528169] Updated weights for policy 0, policy_version 10188 (0.0008) +[2026-06-07 03:26:27,173][528169] Updated weights for policy 0, policy_version 10198 (0.0008) +[2026-06-07 03:26:27,309][528169] Updated weights for policy 0, policy_version 10208 (0.0008) +[2026-06-07 03:26:27,444][528169] Updated weights for policy 0, policy_version 10218 (0.0008) +[2026-06-07 03:26:27,585][528169] Updated weights for policy 0, policy_version 10229 (0.0007) +[2026-06-07 03:26:27,724][528169] Updated weights for policy 0, policy_version 10240 (0.0005) +[2026-06-07 03:26:28,367][528169] Updated weights for policy 0, policy_version 10250 (0.0008) +[2026-06-07 03:26:28,494][528169] Updated weights for policy 0, policy_version 10260 (0.0008) +[2026-06-07 03:26:28,627][528169] Updated weights for policy 0, policy_version 10270 (0.0008) +[2026-06-07 03:26:28,773][528169] Updated weights for policy 0, policy_version 10281 (0.0008) +[2026-06-07 03:26:28,921][528169] Updated weights for policy 0, policy_version 10292 (0.0008) +[2026-06-07 03:26:29,067][528169] Updated weights for policy 0, policy_version 10303 (0.0008) +[2026-06-07 03:26:29,761][528169] Updated weights for policy 0, policy_version 10316 (0.0008) +[2026-06-07 03:26:29,897][528169] Updated weights for policy 0, policy_version 10326 (0.0009) +[2026-06-07 03:26:30,053][528169] Updated weights for policy 0, policy_version 10338 (0.0008) +[2026-06-07 03:26:30,103][527010] Fps is (10 sec: 22937.8, 60 sec: 24029.9, 300 sec: 23061.3). Total num frames: 5275648. Throughput: 0: 24024.2. Samples: 5259776. Policy #0 lag: (min: 55.0, avg: 67.6, max: 119.0) +[2026-06-07 03:26:30,104][527010] Avg episode reward: [(0, '37.155')] +[2026-06-07 03:26:30,200][528169] Updated weights for policy 0, policy_version 10349 (0.0008) +[2026-06-07 03:26:30,345][528169] Updated weights for policy 0, policy_version 10360 (0.0009) +[2026-06-07 03:26:31,032][528169] Updated weights for policy 0, policy_version 10371 (0.0008) +[2026-06-07 03:26:31,177][528169] Updated weights for policy 0, policy_version 10382 (0.0008) +[2026-06-07 03:26:31,316][528169] Updated weights for policy 0, policy_version 10393 (0.0009) +[2026-06-07 03:26:31,457][528169] Updated weights for policy 0, policy_version 10404 (0.0008) +[2026-06-07 03:26:31,597][528169] Updated weights for policy 0, policy_version 10414 (0.0008) +[2026-06-07 03:26:31,756][528169] Updated weights for policy 0, policy_version 10426 (0.0009) +[2026-06-07 03:26:32,401][528169] Updated weights for policy 0, policy_version 10436 (0.0009) +[2026-06-07 03:26:32,545][528169] Updated weights for policy 0, policy_version 10447 (0.0009) +[2026-06-07 03:26:32,687][528169] Updated weights for policy 0, policy_version 10458 (0.0008) +[2026-06-07 03:26:32,841][528169] Updated weights for policy 0, policy_version 10470 (0.0008) +[2026-06-07 03:26:32,969][528169] Updated weights for policy 0, policy_version 10480 (0.0009) +[2026-06-07 03:26:33,102][528169] Updated weights for policy 0, policy_version 10490 (0.0009) +[2026-06-07 03:26:33,753][528169] Updated weights for policy 0, policy_version 10500 (0.0009) +[2026-06-07 03:26:33,892][528169] Updated weights for policy 0, policy_version 10511 (0.0008) +[2026-06-07 03:26:34,029][528169] Updated weights for policy 0, policy_version 10522 (0.0008) +[2026-06-07 03:26:34,178][528169] Updated weights for policy 0, policy_version 10533 (0.0009) +[2026-06-07 03:26:34,310][528169] Updated weights for policy 0, policy_version 10543 (0.0009) +[2026-06-07 03:26:34,481][528169] Updated weights for policy 0, policy_version 10556 (0.0008) +[2026-06-07 03:26:35,103][527010] Fps is (10 sec: 22937.5, 60 sec: 24029.9, 300 sec: 23128.8). Total num frames: 5406720. Throughput: 0: 24066.9. Samples: 5400192. Policy #0 lag: (min: 55.0, avg: 67.6, max: 119.0) +[2026-06-07 03:26:35,104][527010] Avg episode reward: [(0, '48.950')] +[2026-06-07 03:26:35,159][528169] Updated weights for policy 0, policy_version 10567 (0.0008) +[2026-06-07 03:26:35,301][528169] Updated weights for policy 0, policy_version 10578 (0.0009) +[2026-06-07 03:26:35,439][528169] Updated weights for policy 0, policy_version 10589 (0.0009) +[2026-06-07 03:26:35,604][528169] Updated weights for policy 0, policy_version 10601 (0.0009) +[2026-06-07 03:26:35,732][528169] Updated weights for policy 0, policy_version 10611 (0.0008) +[2026-06-07 03:26:35,869][528169] Updated weights for policy 0, policy_version 10622 (0.0008) +[2026-06-07 03:26:35,891][528093] Saving new best policy, reward=48.950! +[2026-06-07 03:26:36,548][528169] Updated weights for policy 0, policy_version 10632 (0.0009) +[2026-06-07 03:26:36,700][528169] Updated weights for policy 0, policy_version 10644 (0.0009) +[2026-06-07 03:26:36,839][528169] Updated weights for policy 0, policy_version 10655 (0.0008) +[2026-06-07 03:26:36,993][528169] Updated weights for policy 0, policy_version 10666 (0.0008) +[2026-06-07 03:26:37,137][528169] Updated weights for policy 0, policy_version 10677 (0.0008) +[2026-06-07 03:26:37,276][528169] Updated weights for policy 0, policy_version 10687 (0.0009) +[2026-06-07 03:26:37,929][528169] Updated weights for policy 0, policy_version 10697 (0.0008) +[2026-06-07 03:26:38,068][528169] Updated weights for policy 0, policy_version 10708 (0.0008) +[2026-06-07 03:26:38,191][528169] Updated weights for policy 0, policy_version 10718 (0.0008) +[2026-06-07 03:26:38,352][528169] Updated weights for policy 0, policy_version 10730 (0.0008) +[2026-06-07 03:26:38,486][528169] Updated weights for policy 0, policy_version 10740 (0.0008) +[2026-06-07 03:26:38,647][528169] Updated weights for policy 0, policy_version 10752 (0.0009) +[2026-06-07 03:26:39,284][528169] Updated weights for policy 0, policy_version 10762 (0.0009) +[2026-06-07 03:26:39,447][528169] Updated weights for policy 0, policy_version 10775 (0.0008) +[2026-06-07 03:26:39,605][528169] Updated weights for policy 0, policy_version 10787 (0.0008) +[2026-06-07 03:26:39,735][528169] Updated weights for policy 0, policy_version 10797 (0.0009) +[2026-06-07 03:26:39,869][528169] Updated weights for policy 0, policy_version 10807 (0.0009) +[2026-06-07 03:26:40,103][527010] Fps is (10 sec: 26214.3, 60 sec: 24029.8, 300 sec: 23193.4). Total num frames: 5537792. Throughput: 0: 24106.7. Samples: 5554048. Policy #0 lag: (min: 63.0, avg: 74.1, max: 127.0) +[2026-06-07 03:26:40,104][527010] Avg episode reward: [(0, '45.918')] +[2026-06-07 03:26:40,507][528169] Updated weights for policy 0, policy_version 10818 (0.0009) +[2026-06-07 03:26:40,636][528169] Updated weights for policy 0, policy_version 10828 (0.0008) +[2026-06-07 03:26:40,773][528169] Updated weights for policy 0, policy_version 10839 (0.0008) +[2026-06-07 03:26:40,902][528169] Updated weights for policy 0, policy_version 10849 (0.0008) +[2026-06-07 03:26:41,029][528169] Updated weights for policy 0, policy_version 10859 (0.0008) +[2026-06-07 03:26:41,178][528169] Updated weights for policy 0, policy_version 10870 (0.0008) +[2026-06-07 03:26:41,857][528169] Updated weights for policy 0, policy_version 10883 (0.0008) +[2026-06-07 03:26:41,990][528169] Updated weights for policy 0, policy_version 10894 (0.0008) +[2026-06-07 03:26:42,125][528169] Updated weights for policy 0, policy_version 10904 (0.0008) +[2026-06-07 03:26:42,287][528169] Updated weights for policy 0, policy_version 10916 (0.0008) +[2026-06-07 03:26:42,435][528169] Updated weights for policy 0, policy_version 10928 (0.0008) +[2026-06-07 03:26:42,572][528169] Updated weights for policy 0, policy_version 10938 (0.0009) +[2026-06-07 03:26:43,212][528169] Updated weights for policy 0, policy_version 10949 (0.0008) +[2026-06-07 03:26:43,344][528169] Updated weights for policy 0, policy_version 10959 (0.0008) +[2026-06-07 03:26:43,502][528169] Updated weights for policy 0, policy_version 10971 (0.0008) +[2026-06-07 03:26:43,686][528169] Updated weights for policy 0, policy_version 10985 (0.0008) +[2026-06-07 03:26:43,831][528169] Updated weights for policy 0, policy_version 10996 (0.0008) +[2026-06-07 03:26:43,965][528169] Updated weights for policy 0, policy_version 11006 (0.0008) +[2026-06-07 03:26:44,627][528169] Updated weights for policy 0, policy_version 11018 (0.0008) +[2026-06-07 03:26:44,763][528169] Updated weights for policy 0, policy_version 11029 (0.0008) +[2026-06-07 03:26:44,915][528169] Updated weights for policy 0, policy_version 11040 (0.0008) +[2026-06-07 03:26:45,074][528169] Updated weights for policy 0, policy_version 11052 (0.0008) +[2026-06-07 03:26:45,103][527010] Fps is (10 sec: 22937.7, 60 sec: 24029.9, 300 sec: 23120.9). Total num frames: 5636096. Throughput: 0: 24095.3. Samples: 5621632. Policy #0 lag: (min: 63.0, avg: 74.1, max: 127.0) +[2026-06-07 03:26:45,104][527010] Avg episode reward: [(0, '46.235')] +[2026-06-07 03:26:45,208][528169] Updated weights for policy 0, policy_version 11062 (0.0008) +[2026-06-07 03:26:45,340][528169] Updated weights for policy 0, policy_version 11072 (0.0009) +[2026-06-07 03:26:45,986][528169] Updated weights for policy 0, policy_version 11082 (0.0009) +[2026-06-07 03:26:46,117][528169] Updated weights for policy 0, policy_version 11092 (0.0008) +[2026-06-07 03:26:46,274][528169] Updated weights for policy 0, policy_version 11104 (0.0008) +[2026-06-07 03:26:46,427][528169] Updated weights for policy 0, policy_version 11116 (0.0008) +[2026-06-07 03:26:46,564][528169] Updated weights for policy 0, policy_version 11126 (0.0008) +[2026-06-07 03:26:47,193][528169] Updated weights for policy 0, policy_version 11137 (0.0008) +[2026-06-07 03:26:47,354][528169] Updated weights for policy 0, policy_version 11149 (0.0008) +[2026-06-07 03:26:47,478][528169] Updated weights for policy 0, policy_version 11159 (0.0008) +[2026-06-07 03:26:47,642][528169] Updated weights for policy 0, policy_version 11171 (0.0008) +[2026-06-07 03:26:47,801][528169] Updated weights for policy 0, policy_version 11183 (0.0008) +[2026-06-07 03:26:47,936][528169] Updated weights for policy 0, policy_version 11193 (0.0008) +[2026-06-07 03:26:48,600][528169] Updated weights for policy 0, policy_version 11204 (0.0008) +[2026-06-07 03:26:48,726][528169] Updated weights for policy 0, policy_version 11214 (0.0008) +[2026-06-07 03:26:48,846][528169] Updated weights for policy 0, policy_version 11224 (0.0008) +[2026-06-07 03:26:49,002][528169] Updated weights for policy 0, policy_version 11236 (0.0008) +[2026-06-07 03:26:49,148][528169] Updated weights for policy 0, policy_version 11247 (0.0008) +[2026-06-07 03:26:49,291][528169] Updated weights for policy 0, policy_version 11258 (0.0008) +[2026-06-07 03:26:49,949][528169] Updated weights for policy 0, policy_version 11268 (0.0008) +[2026-06-07 03:26:50,094][528169] Updated weights for policy 0, policy_version 11278 (0.0008) +[2026-06-07 03:26:50,103][527010] Fps is (10 sec: 22937.7, 60 sec: 24029.9, 300 sec: 23183.1). Total num frames: 5767168. Throughput: 0: 24214.8. Samples: 5766528. Policy #0 lag: (min: 46.0, avg: 60.0, max: 110.0) +[2026-06-07 03:26:50,104][527010] Avg episode reward: [(0, '50.019')] +[2026-06-07 03:26:50,215][528169] Updated weights for policy 0, policy_version 11288 (0.0008) +[2026-06-07 03:26:50,346][528169] Updated weights for policy 0, policy_version 11298 (0.0008) +[2026-06-07 03:26:50,505][528169] Updated weights for policy 0, policy_version 11310 (0.0008) +[2026-06-07 03:26:50,650][528169] Updated weights for policy 0, policy_version 11321 (0.0008) +[2026-06-07 03:26:50,736][528093] Saving new best policy, reward=50.019! +[2026-06-07 03:26:51,323][528169] Updated weights for policy 0, policy_version 11331 (0.0008) +[2026-06-07 03:26:51,470][528169] Updated weights for policy 0, policy_version 11343 (0.0008) +[2026-06-07 03:26:51,598][528169] Updated weights for policy 0, policy_version 11353 (0.0008) +[2026-06-07 03:26:51,739][528169] Updated weights for policy 0, policy_version 11363 (0.0008) +[2026-06-07 03:26:51,871][528169] Updated weights for policy 0, policy_version 11374 (0.0008) +[2026-06-07 03:26:52,014][528169] Updated weights for policy 0, policy_version 11385 (0.0008) +[2026-06-07 03:26:52,671][528169] Updated weights for policy 0, policy_version 11395 (0.0010) +[2026-06-07 03:26:52,797][528169] Updated weights for policy 0, policy_version 11405 (0.0010) +[2026-06-07 03:26:52,973][528169] Updated weights for policy 0, policy_version 11418 (0.0008) +[2026-06-07 03:26:53,107][528169] Updated weights for policy 0, policy_version 11429 (0.0008) +[2026-06-07 03:26:53,241][528169] Updated weights for policy 0, policy_version 11439 (0.0008) +[2026-06-07 03:26:53,367][528169] Updated weights for policy 0, policy_version 11449 (0.0008) +[2026-06-07 03:26:54,003][528169] Updated weights for policy 0, policy_version 11459 (0.0008) +[2026-06-07 03:26:54,139][528169] Updated weights for policy 0, policy_version 11469 (0.0008) +[2026-06-07 03:26:54,278][528169] Updated weights for policy 0, policy_version 11480 (0.0008) +[2026-06-07 03:26:54,412][528169] Updated weights for policy 0, policy_version 11490 (0.0008) +[2026-06-07 03:26:54,567][528169] Updated weights for policy 0, policy_version 11502 (0.0008) +[2026-06-07 03:26:54,717][528169] Updated weights for policy 0, policy_version 11513 (0.0008) +[2026-06-07 03:26:55,103][527010] Fps is (10 sec: 26214.4, 60 sec: 24029.9, 300 sec: 23242.8). Total num frames: 5898240. Throughput: 0: 24101.0. Samples: 5913472. Policy #0 lag: (min: 46.0, avg: 60.0, max: 110.0) +[2026-06-07 03:26:55,104][527010] Avg episode reward: [(0, '46.027')] +[2026-06-07 03:26:55,354][528169] Updated weights for policy 0, policy_version 11523 (0.0009) +[2026-06-07 03:26:55,482][528169] Updated weights for policy 0, policy_version 11533 (0.0008) +[2026-06-07 03:26:55,652][528169] Updated weights for policy 0, policy_version 11546 (0.0009) +[2026-06-07 03:26:55,781][528169] Updated weights for policy 0, policy_version 11556 (0.0008) +[2026-06-07 03:26:55,939][528169] Updated weights for policy 0, policy_version 11568 (0.0008) +[2026-06-07 03:26:56,103][528169] Updated weights for policy 0, policy_version 11580 (0.0008) +[2026-06-07 03:26:56,762][528169] Updated weights for policy 0, policy_version 11591 (0.0008) +[2026-06-07 03:26:56,889][528169] Updated weights for policy 0, policy_version 11601 (0.0008) +[2026-06-07 03:26:57,015][528169] Updated weights for policy 0, policy_version 11611 (0.0008) +[2026-06-07 03:26:57,168][528169] Updated weights for policy 0, policy_version 11623 (0.0008) +[2026-06-07 03:26:57,317][528169] Updated weights for policy 0, policy_version 11633 (0.0009) +[2026-06-07 03:26:57,450][528169] Updated weights for policy 0, policy_version 11643 (0.0008) +[2026-06-07 03:26:58,095][528169] Updated weights for policy 0, policy_version 11654 (0.0008) +[2026-06-07 03:26:58,271][528169] Updated weights for policy 0, policy_version 11667 (0.0008) +[2026-06-07 03:26:58,407][528169] Updated weights for policy 0, policy_version 11678 (0.0009) +[2026-06-07 03:26:58,555][528169] Updated weights for policy 0, policy_version 11689 (0.0008) +[2026-06-07 03:26:58,708][528169] Updated weights for policy 0, policy_version 11701 (0.0008) +[2026-06-07 03:26:58,845][528169] Updated weights for policy 0, policy_version 11712 (0.0008) +[2026-06-07 03:26:59,526][528169] Updated weights for policy 0, policy_version 11722 (0.0008) +[2026-06-07 03:26:59,664][528169] Updated weights for policy 0, policy_version 11732 (0.0008) +[2026-06-07 03:26:59,785][528169] Updated weights for policy 0, policy_version 11742 (0.0008) +[2026-06-07 03:26:59,957][528169] Updated weights for policy 0, policy_version 11755 (0.0008) +[2026-06-07 03:27:00,086][528169] Updated weights for policy 0, policy_version 11765 (0.0009) +[2026-06-07 03:27:00,103][527010] Fps is (10 sec: 22937.7, 60 sec: 24029.9, 300 sec: 23173.6). Total num frames: 5996544. Throughput: 0: 24109.5. Samples: 5983104. Policy #0 lag: (min: 58.0, avg: 100.7, max: 117.0) +[2026-06-07 03:27:00,103][527010] Avg episode reward: [(0, '54.686')] +[2026-06-07 03:27:00,227][528093] Saving new best policy, reward=54.686! +[2026-06-07 03:27:00,773][528169] Updated weights for policy 0, policy_version 11777 (0.0008) +[2026-06-07 03:27:00,888][528169] Updated weights for policy 0, policy_version 11787 (0.0008) +[2026-06-07 03:27:01,062][528169] Updated weights for policy 0, policy_version 11801 (0.0008) +[2026-06-07 03:27:01,254][528169] Updated weights for policy 0, policy_version 11816 (0.0009) +[2026-06-07 03:27:01,409][528169] Updated weights for policy 0, policy_version 11828 (0.0008) +[2026-06-07 03:27:01,552][528169] Updated weights for policy 0, policy_version 11840 (0.0008) +[2026-06-07 03:27:02,221][528169] Updated weights for policy 0, policy_version 11850 (0.0008) +[2026-06-07 03:27:02,394][528169] Updated weights for policy 0, policy_version 11864 (0.0008) +[2026-06-07 03:27:02,532][528169] Updated weights for policy 0, policy_version 11875 (0.0008) +[2026-06-07 03:27:02,669][528169] Updated weights for policy 0, policy_version 11886 (0.0008) +[2026-06-07 03:27:02,820][528169] Updated weights for policy 0, policy_version 11898 (0.0008) +[2026-06-07 03:27:03,511][528169] Updated weights for policy 0, policy_version 11909 (0.0008) +[2026-06-07 03:27:03,641][528169] Updated weights for policy 0, policy_version 11920 (0.0008) +[2026-06-07 03:27:03,790][528169] Updated weights for policy 0, policy_version 11932 (0.0008) +[2026-06-07 03:27:03,944][528169] Updated weights for policy 0, policy_version 11944 (0.0008) +[2026-06-07 03:27:04,110][528169] Updated weights for policy 0, policy_version 11956 (0.0008) +[2026-06-07 03:27:04,238][528169] Updated weights for policy 0, policy_version 11966 (0.0008) +[2026-06-07 03:27:04,949][528169] Updated weights for policy 0, policy_version 11977 (0.0009) +[2026-06-07 03:27:05,084][528169] Updated weights for policy 0, policy_version 11987 (0.0008) +[2026-06-07 03:27:05,103][527010] Fps is (10 sec: 22937.5, 60 sec: 24029.9, 300 sec: 23231.2). Total num frames: 6127616. Throughput: 0: 24294.4. Samples: 6131072. Policy #0 lag: (min: 58.0, avg: 100.7, max: 117.0) +[2026-06-07 03:27:05,104][527010] Avg episode reward: [(0, '58.834')] +[2026-06-07 03:27:05,210][528169] Updated weights for policy 0, policy_version 11997 (0.0008) +[2026-06-07 03:27:05,355][528169] Updated weights for policy 0, policy_version 12008 (0.0008) +[2026-06-07 03:27:05,498][528169] Updated weights for policy 0, policy_version 12018 (0.0008) +[2026-06-07 03:27:05,624][528169] Updated weights for policy 0, policy_version 12028 (0.0008) +[2026-06-07 03:27:05,671][528093] Saving new best policy, reward=58.834! +[2026-06-07 03:27:06,257][528169] Updated weights for policy 0, policy_version 12038 (0.0008) +[2026-06-07 03:27:06,403][528169] Updated weights for policy 0, policy_version 12049 (0.0008) +[2026-06-07 03:27:06,551][528169] Updated weights for policy 0, policy_version 12060 (0.0008) +[2026-06-07 03:27:06,692][528169] Updated weights for policy 0, policy_version 12071 (0.0008) +[2026-06-07 03:27:06,836][528169] Updated weights for policy 0, policy_version 12082 (0.0008) +[2026-06-07 03:27:06,998][528169] Updated weights for policy 0, policy_version 12094 (0.0008) +[2026-06-07 03:27:07,630][528169] Updated weights for policy 0, policy_version 12104 (0.0008) +[2026-06-07 03:27:07,753][528169] Updated weights for policy 0, policy_version 12114 (0.0008) +[2026-06-07 03:27:07,885][528169] Updated weights for policy 0, policy_version 12124 (0.0008) +[2026-06-07 03:27:08,021][528169] Updated weights for policy 0, policy_version 12134 (0.0008) +[2026-06-07 03:27:08,172][528169] Updated weights for policy 0, policy_version 12146 (0.0008) +[2026-06-07 03:27:08,331][528169] Updated weights for policy 0, policy_version 12158 (0.0008) +[2026-06-07 03:27:09,002][528169] Updated weights for policy 0, policy_version 12168 (0.0008) +[2026-06-07 03:27:09,130][528169] Updated weights for policy 0, policy_version 12178 (0.0008) +[2026-06-07 03:27:09,265][528169] Updated weights for policy 0, policy_version 12188 (0.0008) +[2026-06-07 03:27:09,404][528169] Updated weights for policy 0, policy_version 12198 (0.0008) +[2026-06-07 03:27:09,541][528169] Updated weights for policy 0, policy_version 12208 (0.0008) +[2026-06-07 03:27:09,687][528169] Updated weights for policy 0, policy_version 12219 (0.0008) +[2026-06-07 03:27:10,104][527010] Fps is (10 sec: 26212.4, 60 sec: 24029.6, 300 sec: 23286.7). Total num frames: 6258688. Throughput: 0: 24140.4. Samples: 6276864. Policy #0 lag: (min: 63.0, avg: 75.9, max: 127.0) +[2026-06-07 03:27:10,105][527010] Avg episode reward: [(0, '46.661')] +[2026-06-07 03:27:10,307][528169] Updated weights for policy 0, policy_version 12229 (0.0008) +[2026-06-07 03:27:10,447][528169] Updated weights for policy 0, policy_version 12240 (0.0008) +[2026-06-07 03:27:10,584][528169] Updated weights for policy 0, policy_version 12250 (0.0008) +[2026-06-07 03:27:10,719][528169] Updated weights for policy 0, policy_version 12260 (0.0008) +[2026-06-07 03:27:10,873][528169] Updated weights for policy 0, policy_version 12272 (0.0008) +[2026-06-07 03:27:11,004][528169] Updated weights for policy 0, policy_version 12282 (0.0008) +[2026-06-07 03:27:11,647][528169] Updated weights for policy 0, policy_version 12293 (0.0008) +[2026-06-07 03:27:11,773][528169] Updated weights for policy 0, policy_version 12303 (0.0008) +[2026-06-07 03:27:11,924][528169] Updated weights for policy 0, policy_version 12314 (0.0008) +[2026-06-07 03:27:12,045][528169] Updated weights for policy 0, policy_version 12324 (0.0008) +[2026-06-07 03:27:12,196][528169] Updated weights for policy 0, policy_version 12335 (0.0008) +[2026-06-07 03:27:12,344][528169] Updated weights for policy 0, policy_version 12347 (0.0008) +[2026-06-07 03:27:12,994][528169] Updated weights for policy 0, policy_version 12357 (0.0008) +[2026-06-07 03:27:13,159][528169] Updated weights for policy 0, policy_version 12370 (0.0008) +[2026-06-07 03:27:13,287][528169] Updated weights for policy 0, policy_version 12380 (0.0008) +[2026-06-07 03:27:13,422][528169] Updated weights for policy 0, policy_version 12390 (0.0008) +[2026-06-07 03:27:13,555][528169] Updated weights for policy 0, policy_version 12400 (0.0008) +[2026-06-07 03:27:13,698][528169] Updated weights for policy 0, policy_version 12411 (0.0008) +[2026-06-07 03:27:14,365][528169] Updated weights for policy 0, policy_version 12422 (0.0008) +[2026-06-07 03:27:14,495][528169] Updated weights for policy 0, policy_version 12432 (0.0008) +[2026-06-07 03:27:14,631][528169] Updated weights for policy 0, policy_version 12443 (0.0008) +[2026-06-07 03:27:14,774][528169] Updated weights for policy 0, policy_version 12453 (0.0008) +[2026-06-07 03:27:14,913][528169] Updated weights for policy 0, policy_version 12464 (0.0008) +[2026-06-07 03:27:15,040][528169] Updated weights for policy 0, policy_version 12474 (0.0008) +[2026-06-07 03:27:15,103][527010] Fps is (10 sec: 22937.7, 60 sec: 24029.9, 300 sec: 23220.5). Total num frames: 6356992. Throughput: 0: 24138.0. Samples: 6345984. Policy #0 lag: (min: 63.0, avg: 75.9, max: 127.0) +[2026-06-07 03:27:15,104][527010] Avg episode reward: [(0, '53.906')] +[2026-06-07 03:27:15,714][528169] Updated weights for policy 0, policy_version 12484 (0.0008) +[2026-06-07 03:27:15,845][528169] Updated weights for policy 0, policy_version 12494 (0.0008) +[2026-06-07 03:27:15,983][528169] Updated weights for policy 0, policy_version 12505 (0.0008) +[2026-06-07 03:27:16,129][528169] Updated weights for policy 0, policy_version 12516 (0.0008) +[2026-06-07 03:27:16,251][528169] Updated weights for policy 0, policy_version 12526 (0.0008) +[2026-06-07 03:27:16,391][528169] Updated weights for policy 0, policy_version 12536 (0.0008) +[2026-06-07 03:27:17,049][528169] Updated weights for policy 0, policy_version 12547 (0.0008) +[2026-06-07 03:27:17,193][528169] Updated weights for policy 0, policy_version 12559 (0.0006) +[2026-06-07 03:27:17,317][528169] Updated weights for policy 0, policy_version 12569 (0.0005) +[2026-06-07 03:27:17,448][528169] Updated weights for policy 0, policy_version 12579 (0.0005) +[2026-06-07 03:27:17,603][528169] Updated weights for policy 0, policy_version 12590 (0.0005) +[2026-06-07 03:27:17,777][528169] Updated weights for policy 0, policy_version 12603 (0.0005) +[2026-06-07 03:27:18,409][528169] Updated weights for policy 0, policy_version 12614 (0.0005) +[2026-06-07 03:27:18,534][528169] Updated weights for policy 0, policy_version 12624 (0.0005) +[2026-06-07 03:27:18,698][528169] Updated weights for policy 0, policy_version 12636 (0.0005) +[2026-06-07 03:27:18,833][528169] Updated weights for policy 0, policy_version 12647 (0.0005) +[2026-06-07 03:27:18,982][528169] Updated weights for policy 0, policy_version 12658 (0.0005) +[2026-06-07 03:27:19,139][528169] Updated weights for policy 0, policy_version 12670 (0.0004) +[2026-06-07 03:27:19,765][528169] Updated weights for policy 0, policy_version 12680 (0.0008) +[2026-06-07 03:27:19,893][528169] Updated weights for policy 0, policy_version 12690 (0.0008) +[2026-06-07 03:27:20,033][528169] Updated weights for policy 0, policy_version 12701 (0.0008) +[2026-06-07 03:27:20,103][527010] Fps is (10 sec: 22938.9, 60 sec: 24029.9, 300 sec: 23274.2). Total num frames: 6488064. Throughput: 0: 24374.0. Samples: 6497024. Policy #0 lag: (min: 63.0, avg: 74.0, max: 127.0) +[2026-06-07 03:27:20,105][527010] Avg episode reward: [(0, '50.673')] +[2026-06-07 03:27:20,199][528169] Updated weights for policy 0, policy_version 12714 (0.0008) +[2026-06-07 03:27:20,338][528169] Updated weights for policy 0, policy_version 12724 (0.0008) +[2026-06-07 03:27:20,496][528169] Updated weights for policy 0, policy_version 12736 (0.0008) +[2026-06-07 03:27:21,161][528169] Updated weights for policy 0, policy_version 12748 (0.0009) +[2026-06-07 03:27:21,286][528169] Updated weights for policy 0, policy_version 12758 (0.0009) +[2026-06-07 03:27:21,421][528169] Updated weights for policy 0, policy_version 12768 (0.0009) +[2026-06-07 03:27:21,546][528169] Updated weights for policy 0, policy_version 12778 (0.0008) +[2026-06-07 03:27:21,693][528169] Updated weights for policy 0, policy_version 12789 (0.0008) +[2026-06-07 03:27:21,826][528169] Updated weights for policy 0, policy_version 12799 (0.0008) +[2026-06-07 03:27:22,490][528169] Updated weights for policy 0, policy_version 12810 (0.0009) +[2026-06-07 03:27:22,634][528169] Updated weights for policy 0, policy_version 12821 (0.0008) +[2026-06-07 03:27:22,802][528169] Updated weights for policy 0, policy_version 12834 (0.0008) +[2026-06-07 03:27:22,946][528169] Updated weights for policy 0, policy_version 12845 (0.0008) +[2026-06-07 03:27:23,113][528169] Updated weights for policy 0, policy_version 12857 (0.0008) +[2026-06-07 03:27:23,747][528169] Updated weights for policy 0, policy_version 12867 (0.0008) +[2026-06-07 03:27:23,874][528169] Updated weights for policy 0, policy_version 12877 (0.0008) +[2026-06-07 03:27:24,018][528169] Updated weights for policy 0, policy_version 12888 (0.0008) +[2026-06-07 03:27:24,141][528169] Updated weights for policy 0, policy_version 12898 (0.0008) +[2026-06-07 03:27:24,272][528169] Updated weights for policy 0, policy_version 12908 (0.0008) +[2026-06-07 03:27:24,426][528169] Updated weights for policy 0, policy_version 12920 (0.0008) +[2026-06-07 03:27:25,103][527010] Fps is (10 sec: 26214.1, 60 sec: 24029.8, 300 sec: 23326.0). Total num frames: 6619136. Throughput: 0: 24081.0. Samples: 6637696. Policy #0 lag: (min: 63.0, avg: 74.0, max: 127.0) +[2026-06-07 03:27:25,105][527010] Avg episode reward: [(0, '48.332')] +[2026-06-07 03:27:25,110][528169] Updated weights for policy 0, policy_version 12931 (0.0008) +[2026-06-07 03:27:25,241][528169] Updated weights for policy 0, policy_version 12941 (0.0008) +[2026-06-07 03:27:25,366][528169] Updated weights for policy 0, policy_version 12951 (0.0008) +[2026-06-07 03:27:25,520][528169] Updated weights for policy 0, policy_version 12962 (0.0008) +[2026-06-07 03:27:25,659][528169] Updated weights for policy 0, policy_version 12973 (0.0008) +[2026-06-07 03:27:25,791][528169] Updated weights for policy 0, policy_version 12983 (0.0008) +[2026-06-07 03:27:26,463][528169] Updated weights for policy 0, policy_version 12995 (0.0008) +[2026-06-07 03:27:26,604][528169] Updated weights for policy 0, policy_version 13006 (0.0008) +[2026-06-07 03:27:26,756][528169] Updated weights for policy 0, policy_version 13018 (0.0008) +[2026-06-07 03:27:26,904][528169] Updated weights for policy 0, policy_version 13030 (0.0008) +[2026-06-07 03:27:27,038][528169] Updated weights for policy 0, policy_version 13040 (0.0008) +[2026-06-07 03:27:27,167][528169] Updated weights for policy 0, policy_version 13050 (0.0008) +[2026-06-07 03:27:27,832][528169] Updated weights for policy 0, policy_version 13062 (0.0008) +[2026-06-07 03:27:27,991][528169] Updated weights for policy 0, policy_version 13075 (0.0008) +[2026-06-07 03:27:28,121][528169] Updated weights for policy 0, policy_version 13085 (0.0009) +[2026-06-07 03:27:28,250][528169] Updated weights for policy 0, policy_version 13095 (0.0009) +[2026-06-07 03:27:28,387][528169] Updated weights for policy 0, policy_version 13105 (0.0008) +[2026-06-07 03:27:28,514][528169] Updated weights for policy 0, policy_version 13115 (0.0009) +[2026-06-07 03:27:29,154][528169] Updated weights for policy 0, policy_version 13125 (0.0009) +[2026-06-07 03:27:29,292][528169] Updated weights for policy 0, policy_version 13136 (0.0010) +[2026-06-07 03:27:29,461][528169] Updated weights for policy 0, policy_version 13148 (0.0009) +[2026-06-07 03:27:29,590][528169] Updated weights for policy 0, policy_version 13158 (0.0008) +[2026-06-07 03:27:29,732][528169] Updated weights for policy 0, policy_version 13169 (0.0009) +[2026-06-07 03:27:29,885][528169] Updated weights for policy 0, policy_version 13180 (0.0008) +[2026-06-07 03:27:30,103][527010] Fps is (10 sec: 26214.8, 60 sec: 24576.0, 300 sec: 23376.0). Total num frames: 6750208. Throughput: 0: 24112.4. Samples: 6706688. Policy #0 lag: (min: 20.0, avg: 32.2, max: 84.0) +[2026-06-07 03:27:30,104][527010] Avg episode reward: [(0, '60.673')] +[2026-06-07 03:27:30,109][528093] Saving new best policy, reward=60.673! +[2026-06-07 03:27:30,523][528169] Updated weights for policy 0, policy_version 13190 (0.0006) +[2026-06-07 03:27:30,646][528093] Early stopping after 2 epochs (16 sgd steps), loss delta 0.0000003 +[2026-06-07 03:27:30,648][528169] Updated weights for policy 0, policy_version 13200 (0.0005) +[2026-06-07 03:27:31,442][528169] Updated weights for policy 0, policy_version 13210 (0.0008) +[2026-06-07 03:27:31,577][528169] Updated weights for policy 0, policy_version 13220 (0.0008) +[2026-06-07 03:27:31,700][528169] Updated weights for policy 0, policy_version 13230 (0.0008) +[2026-06-07 03:27:31,835][528169] Updated weights for policy 0, policy_version 13240 (0.0008) +[2026-06-07 03:27:31,954][528169] Updated weights for policy 0, policy_version 13250 (0.0008) +[2026-06-07 03:27:32,142][528169] Updated weights for policy 0, policy_version 13264 (0.0008) +[2026-06-07 03:27:32,780][528169] Updated weights for policy 0, policy_version 13274 (0.0005) +[2026-06-07 03:27:32,927][528169] Updated weights for policy 0, policy_version 13285 (0.0004) +[2026-06-07 03:27:33,058][528169] Updated weights for policy 0, policy_version 13295 (0.0004) +[2026-06-07 03:27:33,212][528169] Updated weights for policy 0, policy_version 13307 (0.0004) +[2026-06-07 03:27:33,365][528169] Updated weights for policy 0, policy_version 13318 (0.0004) +[2026-06-07 03:27:33,488][528169] Updated weights for policy 0, policy_version 13328 (0.0004) +[2026-06-07 03:27:34,110][528169] Updated weights for policy 0, policy_version 13340 (0.0004) +[2026-06-07 03:27:34,261][528169] Updated weights for policy 0, policy_version 13352 (0.0004) +[2026-06-07 03:27:34,394][528169] Updated weights for policy 0, policy_version 13362 (0.0006) +[2026-06-07 03:27:34,537][528169] Updated weights for policy 0, policy_version 13373 (0.0008) +[2026-06-07 03:27:34,674][528169] Updated weights for policy 0, policy_version 13383 (0.0008) +[2026-06-07 03:27:35,103][527010] Fps is (10 sec: 26214.8, 60 sec: 24576.0, 300 sec: 23424.4). Total num frames: 6881280. Throughput: 0: 24470.8. Samples: 6867712. Policy #0 lag: (min: 20.0, avg: 32.2, max: 84.0) +[2026-06-07 03:27:35,104][527010] Avg episode reward: [(0, '51.546')] +[2026-06-07 03:27:35,336][528169] Updated weights for policy 0, policy_version 13393 (0.0008) +[2026-06-07 03:27:35,474][528169] Updated weights for policy 0, policy_version 13404 (0.0008) +[2026-06-07 03:27:35,600][528169] Updated weights for policy 0, policy_version 13414 (0.0008) +[2026-06-07 03:27:35,733][528169] Updated weights for policy 0, policy_version 13424 (0.0008) +[2026-06-07 03:27:35,893][528169] Updated weights for policy 0, policy_version 13436 (0.0009) +[2026-06-07 03:27:36,048][528169] Updated weights for policy 0, policy_version 13448 (0.0008) +[2026-06-07 03:27:36,710][528169] Updated weights for policy 0, policy_version 13459 (0.0009) +[2026-06-07 03:27:36,845][528169] Updated weights for policy 0, policy_version 13470 (0.0008) +[2026-06-07 03:27:36,974][528169] Updated weights for policy 0, policy_version 13480 (0.0008) +[2026-06-07 03:27:37,116][528169] Updated weights for policy 0, policy_version 13491 (0.0008) +[2026-06-07 03:27:37,247][528169] Updated weights for policy 0, policy_version 13501 (0.0008) +[2026-06-07 03:27:37,391][528169] Updated weights for policy 0, policy_version 13512 (0.0008) +[2026-06-07 03:27:38,048][528169] Updated weights for policy 0, policy_version 13522 (0.0008) +[2026-06-07 03:27:38,171][528169] Updated weights for policy 0, policy_version 13532 (0.0008) +[2026-06-07 03:27:38,297][528169] Updated weights for policy 0, policy_version 13542 (0.0008) +[2026-06-07 03:27:38,440][528169] Updated weights for policy 0, policy_version 13553 (0.0008) +[2026-06-07 03:27:38,593][528169] Updated weights for policy 0, policy_version 13564 (0.0008) +[2026-06-07 03:27:38,721][528169] Updated weights for policy 0, policy_version 13574 (0.0008) +[2026-06-07 03:27:38,849][528169] Updated weights for policy 0, policy_version 13584 (0.0008) +[2026-06-07 03:27:39,455][528169] Updated weights for policy 0, policy_version 13594 (0.0008) +[2026-06-07 03:27:39,592][528169] Updated weights for policy 0, policy_version 13604 (0.0008) +[2026-06-07 03:27:39,725][528169] Updated weights for policy 0, policy_version 13614 (0.0009) +[2026-06-07 03:27:39,847][528169] Updated weights for policy 0, policy_version 13624 (0.0009) +[2026-06-07 03:27:39,999][528169] Updated weights for policy 0, policy_version 13635 (0.0008) +[2026-06-07 03:27:40,103][527010] Fps is (10 sec: 22937.6, 60 sec: 24029.9, 300 sec: 23659.6). Total num frames: 6979584. Throughput: 0: 24590.2. Samples: 7020032. Policy #0 lag: (min: 20.0, avg: 32.2, max: 84.0) +[2026-06-07 03:27:40,104][527010] Avg episode reward: [(0, '52.717')] +[2026-06-07 03:27:40,134][528169] Updated weights for policy 0, policy_version 13645 (0.0008) +[2026-06-07 03:27:40,781][528169] Updated weights for policy 0, policy_version 13656 (0.0008) +[2026-06-07 03:27:40,914][528169] Updated weights for policy 0, policy_version 13666 (0.0008) +[2026-06-07 03:27:41,047][528169] Updated weights for policy 0, policy_version 13676 (0.0008) +[2026-06-07 03:27:41,194][528169] Updated weights for policy 0, policy_version 13687 (0.0008) +[2026-06-07 03:27:41,340][528169] Updated weights for policy 0, policy_version 13698 (0.0008) +[2026-06-07 03:27:41,487][528169] Updated weights for policy 0, policy_version 13709 (0.0008) +[2026-06-07 03:27:42,106][528169] Updated weights for policy 0, policy_version 13719 (0.0008) +[2026-06-07 03:27:42,252][528169] Updated weights for policy 0, policy_version 13730 (0.0008) +[2026-06-07 03:27:42,384][528169] Updated weights for policy 0, policy_version 13740 (0.0010) +[2026-06-07 03:27:42,527][528169] Updated weights for policy 0, policy_version 13751 (0.0010) +[2026-06-07 03:27:42,692][528169] Updated weights for policy 0, policy_version 13764 (0.0011) +[2026-06-07 03:27:42,842][528169] Updated weights for policy 0, policy_version 13775 (0.0010) +[2026-06-07 03:27:43,544][528169] Updated weights for policy 0, policy_version 13786 (0.0009) +[2026-06-07 03:27:43,692][528169] Updated weights for policy 0, policy_version 13798 (0.0010) +[2026-06-07 03:27:43,835][528169] Updated weights for policy 0, policy_version 13809 (0.0009) +[2026-06-07 03:27:43,970][528169] Updated weights for policy 0, policy_version 13819 (0.0005) +[2026-06-07 03:27:44,116][528169] Updated weights for policy 0, policy_version 13830 (0.0005) +[2026-06-07 03:27:44,787][528169] Updated weights for policy 0, policy_version 13842 (0.0005) +[2026-06-07 03:27:44,911][528169] Updated weights for policy 0, policy_version 13852 (0.0008) +[2026-06-07 03:27:45,043][528169] Updated weights for policy 0, policy_version 13862 (0.0008) +[2026-06-07 03:27:45,103][527010] Fps is (10 sec: 22936.3, 60 sec: 24575.8, 300 sec: 23659.6). Total num frames: 7110656. Throughput: 0: 24609.8. Samples: 7090560. Policy #0 lag: (min: 63.0, avg: 74.9, max: 127.0) +[2026-06-07 03:27:45,107][527010] Avg episode reward: [(0, '61.943')] +[2026-06-07 03:27:45,174][528169] Updated weights for policy 0, policy_version 13872 (0.0006) +[2026-06-07 03:27:45,328][528169] Updated weights for policy 0, policy_version 13883 (0.0008) +[2026-06-07 03:27:45,457][528169] Updated weights for policy 0, policy_version 13893 (0.0008) +[2026-06-07 03:27:45,593][528169] Updated weights for policy 0, policy_version 13903 (0.0005) +[2026-06-07 03:27:45,600][528093] Saving new best policy, reward=61.943! +[2026-06-07 03:27:46,176][528169] Updated weights for policy 0, policy_version 13913 (0.0008) +[2026-06-07 03:27:46,317][528169] Updated weights for policy 0, policy_version 13924 (0.0008) +[2026-06-07 03:27:46,450][528169] Updated weights for policy 0, policy_version 13935 (0.0009) +[2026-06-07 03:27:46,618][528169] Updated weights for policy 0, policy_version 13947 (0.0009) +[2026-06-07 03:27:46,741][528169] Updated weights for policy 0, policy_version 13957 (0.0008) +[2026-06-07 03:27:46,878][528169] Updated weights for policy 0, policy_version 13967 (0.0009) +[2026-06-07 03:27:47,526][528169] Updated weights for policy 0, policy_version 13977 (0.0010) +[2026-06-07 03:27:47,677][528169] Updated weights for policy 0, policy_version 13989 (0.0008) +[2026-06-07 03:27:47,814][528169] Updated weights for policy 0, policy_version 14000 (0.0008) +[2026-06-07 03:27:47,952][528169] Updated weights for policy 0, policy_version 14010 (0.0009) +[2026-06-07 03:27:48,083][528169] Updated weights for policy 0, policy_version 14020 (0.0008) +[2026-06-07 03:27:48,236][528169] Updated weights for policy 0, policy_version 14031 (0.0008) +[2026-06-07 03:27:48,874][528169] Updated weights for policy 0, policy_version 14042 (0.0008) +[2026-06-07 03:27:49,014][528169] Updated weights for policy 0, policy_version 14053 (0.0008) +[2026-06-07 03:27:49,157][528169] Updated weights for policy 0, policy_version 14064 (0.0008) +[2026-06-07 03:27:49,290][528169] Updated weights for policy 0, policy_version 14074 (0.0008) +[2026-06-07 03:27:49,434][528169] Updated weights for policy 0, policy_version 14085 (0.0008) +[2026-06-07 03:27:49,568][528169] Updated weights for policy 0, policy_version 14095 (0.0008) +[2026-06-07 03:27:50,103][527010] Fps is (10 sec: 26214.4, 60 sec: 24576.0, 300 sec: 23659.6). Total num frames: 7241728. Throughput: 0: 24453.7. Samples: 7231488. Policy #0 lag: (min: 63.0, avg: 74.9, max: 127.0) +[2026-06-07 03:27:50,104][527010] Avg episode reward: [(0, '56.656')] +[2026-06-07 03:27:50,180][528169] Updated weights for policy 0, policy_version 14105 (0.0008) +[2026-06-07 03:27:50,318][528169] Updated weights for policy 0, policy_version 14116 (0.0008) +[2026-06-07 03:27:50,456][528169] Updated weights for policy 0, policy_version 14126 (0.0008) +[2026-06-07 03:27:50,622][528169] Updated weights for policy 0, policy_version 14139 (0.0008) +[2026-06-07 03:27:50,756][528169] Updated weights for policy 0, policy_version 14149 (0.0008) +[2026-06-07 03:27:50,900][528169] Updated weights for policy 0, policy_version 14160 (0.0008) +[2026-06-07 03:27:51,529][528169] Updated weights for policy 0, policy_version 14170 (0.0011) +[2026-06-07 03:27:51,680][528169] Updated weights for policy 0, policy_version 14181 (0.0010) +[2026-06-07 03:27:51,832][528169] Updated weights for policy 0, policy_version 14193 (0.0008) +[2026-06-07 03:27:51,962][528169] Updated weights for policy 0, policy_version 14203 (0.0009) +[2026-06-07 03:27:52,107][528169] Updated weights for policy 0, policy_version 14214 (0.0005) +[2026-06-07 03:27:52,764][528169] Updated weights for policy 0, policy_version 14225 (0.0006) +[2026-06-07 03:27:52,913][528169] Updated weights for policy 0, policy_version 14237 (0.0008) +[2026-06-07 03:27:53,056][528169] Updated weights for policy 0, policy_version 14248 (0.0008) +[2026-06-07 03:27:53,195][528169] Updated weights for policy 0, policy_version 14258 (0.0009) +[2026-06-07 03:27:53,335][528169] Updated weights for policy 0, policy_version 14269 (0.0008) +[2026-06-07 03:27:53,487][528169] Updated weights for policy 0, policy_version 14280 (0.0008) +[2026-06-07 03:27:54,135][528169] Updated weights for policy 0, policy_version 14290 (0.0008) +[2026-06-07 03:27:54,281][528169] Updated weights for policy 0, policy_version 14302 (0.0008) +[2026-06-07 03:27:54,422][528169] Updated weights for policy 0, policy_version 14312 (0.0008) +[2026-06-07 03:27:54,563][528169] Updated weights for policy 0, policy_version 14323 (0.0009) +[2026-06-07 03:27:54,712][528169] Updated weights for policy 0, policy_version 14334 (0.0008) +[2026-06-07 03:27:54,877][528169] Updated weights for policy 0, policy_version 14347 (0.0008) +[2026-06-07 03:27:55,103][527010] Fps is (10 sec: 26215.9, 60 sec: 24576.0, 300 sec: 23659.6). Total num frames: 7372800. Throughput: 0: 24678.8. Samples: 7387392. Policy #0 lag: (min: 63.0, avg: 74.7, max: 127.0) +[2026-06-07 03:27:55,104][527010] Avg episode reward: [(0, '58.250')] +[2026-06-07 03:27:55,508][528169] Updated weights for policy 0, policy_version 14357 (0.0008) +[2026-06-07 03:27:55,650][528169] Updated weights for policy 0, policy_version 14368 (0.0008) +[2026-06-07 03:27:55,779][528169] Updated weights for policy 0, policy_version 14378 (0.0008) +[2026-06-07 03:27:55,916][528169] Updated weights for policy 0, policy_version 14388 (0.0008) +[2026-06-07 03:27:56,062][528169] Updated weights for policy 0, policy_version 14400 (0.0008) +[2026-06-07 03:27:56,234][528169] Updated weights for policy 0, policy_version 14413 (0.0008) +[2026-06-07 03:27:56,872][528169] Updated weights for policy 0, policy_version 14423 (0.0008) +[2026-06-07 03:27:57,013][528169] Updated weights for policy 0, policy_version 14434 (0.0008) +[2026-06-07 03:27:57,178][528169] Updated weights for policy 0, policy_version 14447 (0.0008) +[2026-06-07 03:27:57,313][528169] Updated weights for policy 0, policy_version 14457 (0.0008) +[2026-06-07 03:27:57,482][528169] Updated weights for policy 0, policy_version 14470 (0.0008) +[2026-06-07 03:27:58,153][528169] Updated weights for policy 0, policy_version 14481 (0.0009) +[2026-06-07 03:27:58,283][528169] Updated weights for policy 0, policy_version 14491 (0.0008) +[2026-06-07 03:27:58,411][528169] Updated weights for policy 0, policy_version 14501 (0.0008) +[2026-06-07 03:27:58,563][528169] Updated weights for policy 0, policy_version 14512 (0.0008) +[2026-06-07 03:27:58,695][528169] Updated weights for policy 0, policy_version 14522 (0.0008) +[2026-06-07 03:27:58,824][528169] Updated weights for policy 0, policy_version 14532 (0.0008) +[2026-06-07 03:27:58,977][528169] Updated weights for policy 0, policy_version 14544 (0.0010) +[2026-06-07 03:27:59,610][528169] Updated weights for policy 0, policy_version 14554 (0.0005) +[2026-06-07 03:27:59,737][528169] Updated weights for policy 0, policy_version 14564 (0.0009) +[2026-06-07 03:27:59,895][528169] Updated weights for policy 0, policy_version 14576 (0.0011) +[2026-06-07 03:28:00,049][528169] Updated weights for policy 0, policy_version 14588 (0.0011) +[2026-06-07 03:28:00,103][527010] Fps is (10 sec: 22937.7, 60 sec: 24576.0, 300 sec: 23548.5). Total num frames: 7471104. Throughput: 0: 24692.6. Samples: 7457152. Policy #0 lag: (min: 63.0, avg: 74.7, max: 127.0) +[2026-06-07 03:28:00,104][527010] Avg episode reward: [(0, '60.841')] +[2026-06-07 03:28:00,185][528169] Updated weights for policy 0, policy_version 14598 (0.0011) +[2026-06-07 03:28:00,808][528169] Updated weights for policy 0, policy_version 14609 (0.0008) +[2026-06-07 03:28:00,935][528169] Updated weights for policy 0, policy_version 14619 (0.0008) +[2026-06-07 03:28:01,067][528169] Updated weights for policy 0, policy_version 14629 (0.0008) +[2026-06-07 03:28:01,213][528169] Updated weights for policy 0, policy_version 14640 (0.0008) +[2026-06-07 03:28:01,350][528169] Updated weights for policy 0, policy_version 14651 (0.0009) +[2026-06-07 03:28:01,489][528169] Updated weights for policy 0, policy_version 14662 (0.0009) +[2026-06-07 03:28:02,199][528169] Updated weights for policy 0, policy_version 14673 (0.0008) +[2026-06-07 03:28:02,335][528169] Updated weights for policy 0, policy_version 14684 (0.0008) +[2026-06-07 03:28:02,522][528169] Updated weights for policy 0, policy_version 14698 (0.0008) +[2026-06-07 03:28:02,654][528169] Updated weights for policy 0, policy_version 14708 (0.0008) +[2026-06-07 03:28:02,776][528169] Updated weights for policy 0, policy_version 14718 (0.0008) +[2026-06-07 03:28:02,925][528169] Updated weights for policy 0, policy_version 14729 (0.0008) +[2026-06-07 03:28:03,550][528169] Updated weights for policy 0, policy_version 14740 (0.0008) +[2026-06-07 03:28:03,693][528169] Updated weights for policy 0, policy_version 14751 (0.0008) +[2026-06-07 03:28:03,827][528169] Updated weights for policy 0, policy_version 14761 (0.0009) +[2026-06-07 03:28:03,949][528169] Updated weights for policy 0, policy_version 14771 (0.0008) +[2026-06-07 03:28:04,111][528169] Updated weights for policy 0, policy_version 14783 (0.0008) +[2026-06-07 03:28:04,271][528169] Updated weights for policy 0, policy_version 14795 (0.0008) +[2026-06-07 03:28:04,949][528169] Updated weights for policy 0, policy_version 14805 (0.0008) +[2026-06-07 03:28:05,076][528169] Updated weights for policy 0, policy_version 14815 (0.0008) +[2026-06-07 03:28:05,103][527010] Fps is (10 sec: 22937.5, 60 sec: 24576.0, 300 sec: 23659.6). Total num frames: 7602176. Throughput: 0: 24570.4. Samples: 7602688. Policy #0 lag: (min: 38.0, avg: 51.0, max: 102.0) +[2026-06-07 03:28:05,104][527010] Avg episode reward: [(0, '62.549')] +[2026-06-07 03:28:05,210][528169] Updated weights for policy 0, policy_version 14825 (0.0009) +[2026-06-07 03:28:05,342][528169] Updated weights for policy 0, policy_version 14835 (0.0009) +[2026-06-07 03:28:05,472][528169] Updated weights for policy 0, policy_version 14845 (0.0008) +[2026-06-07 03:28:05,622][528169] Updated weights for policy 0, policy_version 14856 (0.0008) +[2026-06-07 03:28:05,720][528093] Saving new best policy, reward=62.549! +[2026-06-07 03:28:06,245][528169] Updated weights for policy 0, policy_version 14866 (0.0008) +[2026-06-07 03:28:06,381][528169] Updated weights for policy 0, policy_version 14876 (0.0009) +[2026-06-07 03:28:06,523][528169] Updated weights for policy 0, policy_version 14887 (0.0009) +[2026-06-07 03:28:06,652][528169] Updated weights for policy 0, policy_version 14897 (0.0008) +[2026-06-07 03:28:06,787][528169] Updated weights for policy 0, policy_version 14907 (0.0008) +[2026-06-07 03:28:06,938][528169] Updated weights for policy 0, policy_version 14918 (0.0008) +[2026-06-07 03:28:07,595][528169] Updated weights for policy 0, policy_version 14930 (0.0008) +[2026-06-07 03:28:07,722][528169] Updated weights for policy 0, policy_version 14940 (0.0008) +[2026-06-07 03:28:07,853][528169] Updated weights for policy 0, policy_version 14950 (0.0009) +[2026-06-07 03:28:08,013][528169] Updated weights for policy 0, policy_version 14962 (0.0008) +[2026-06-07 03:28:08,152][528169] Updated weights for policy 0, policy_version 14972 (0.0005) +[2026-06-07 03:28:08,300][528169] Updated weights for policy 0, policy_version 14983 (0.0005) +[2026-06-07 03:28:08,945][528169] Updated weights for policy 0, policy_version 14995 (0.0004) +[2026-06-07 03:28:09,071][528169] Updated weights for policy 0, policy_version 15005 (0.0004) +[2026-06-07 03:28:09,205][528169] Updated weights for policy 0, policy_version 15015 (0.0004) +[2026-06-07 03:28:09,334][528169] Updated weights for policy 0, policy_version 15025 (0.0004) +[2026-06-07 03:28:09,467][528169] Updated weights for policy 0, policy_version 15035 (0.0004) +[2026-06-07 03:28:09,613][528169] Updated weights for policy 0, policy_version 15046 (0.0004) +[2026-06-07 03:28:09,738][528169] Updated weights for policy 0, policy_version 15056 (0.0007) +[2026-06-07 03:28:10,103][527010] Fps is (10 sec: 26214.4, 60 sec: 24576.3, 300 sec: 23770.7). Total num frames: 7733248. Throughput: 0: 24758.1. Samples: 7751808. Policy #0 lag: (min: 38.0, avg: 51.0, max: 102.0) +[2026-06-07 03:28:10,104][527010] Avg episode reward: [(0, '66.216')] +[2026-06-07 03:28:10,109][528093] Saving new best policy, reward=66.216! +[2026-06-07 03:28:10,391][528169] Updated weights for policy 0, policy_version 15067 (0.0008) +[2026-06-07 03:28:10,519][528169] Updated weights for policy 0, policy_version 15077 (0.0008) +[2026-06-07 03:28:10,652][528169] Updated weights for policy 0, policy_version 15087 (0.0008) +[2026-06-07 03:28:10,786][528169] Updated weights for policy 0, policy_version 15097 (0.0008) +[2026-06-07 03:28:10,921][528169] Updated weights for policy 0, policy_version 15107 (0.0008) +[2026-06-07 03:28:11,054][528169] Updated weights for policy 0, policy_version 15117 (0.0008) +[2026-06-07 03:28:11,739][528169] Updated weights for policy 0, policy_version 15129 (0.0009) +[2026-06-07 03:28:11,861][528169] Updated weights for policy 0, policy_version 15139 (0.0008) +[2026-06-07 03:28:11,993][528169] Updated weights for policy 0, policy_version 15149 (0.0008) +[2026-06-07 03:28:12,124][528169] Updated weights for policy 0, policy_version 15159 (0.0008) +[2026-06-07 03:28:12,288][528169] Updated weights for policy 0, policy_version 15171 (0.0008) +[2026-06-07 03:28:12,446][528169] Updated weights for policy 0, policy_version 15183 (0.0008) +[2026-06-07 03:28:13,091][528169] Updated weights for policy 0, policy_version 15193 (0.0008) +[2026-06-07 03:28:13,234][528169] Updated weights for policy 0, policy_version 15204 (0.0008) +[2026-06-07 03:28:13,389][528169] Updated weights for policy 0, policy_version 15216 (0.0008) +[2026-06-07 03:28:13,533][528169] Updated weights for policy 0, policy_version 15227 (0.0008) +[2026-06-07 03:28:13,681][528169] Updated weights for policy 0, policy_version 15238 (0.0008) +[2026-06-07 03:28:14,343][528169] Updated weights for policy 0, policy_version 15249 (0.0008) +[2026-06-07 03:28:14,468][528169] Updated weights for policy 0, policy_version 15259 (0.0008) +[2026-06-07 03:28:14,614][528169] Updated weights for policy 0, policy_version 15270 (0.0009) +[2026-06-07 03:28:14,743][528169] Updated weights for policy 0, policy_version 15280 (0.0008) +[2026-06-07 03:28:14,888][528169] Updated weights for policy 0, policy_version 15291 (0.0008) +[2026-06-07 03:28:15,029][528169] Updated weights for policy 0, policy_version 15301 (0.0008) +[2026-06-07 03:28:15,103][527010] Fps is (10 sec: 22937.6, 60 sec: 24576.0, 300 sec: 23659.6). Total num frames: 7831552. Throughput: 0: 24772.3. Samples: 7821440. Policy #0 lag: (min: 38.0, avg: 51.0, max: 102.0) +[2026-06-07 03:28:15,104][527010] Avg episode reward: [(0, '71.781')] +[2026-06-07 03:28:15,161][528093] Saving new best policy, reward=71.781! +[2026-06-07 03:28:15,702][528169] Updated weights for policy 0, policy_version 15313 (0.0008) +[2026-06-07 03:28:15,849][528169] Updated weights for policy 0, policy_version 15324 (0.0008) +[2026-06-07 03:28:15,978][528169] Updated weights for policy 0, policy_version 15334 (0.0008) +[2026-06-07 03:28:16,114][528169] Updated weights for policy 0, policy_version 15344 (0.0008) +[2026-06-07 03:28:16,238][528169] Updated weights for policy 0, policy_version 15354 (0.0008) +[2026-06-07 03:28:16,380][528169] Updated weights for policy 0, policy_version 15364 (0.0008) +[2026-06-07 03:28:16,534][528169] Updated weights for policy 0, policy_version 15376 (0.0008) +[2026-06-07 03:28:17,166][528169] Updated weights for policy 0, policy_version 15387 (0.0008) +[2026-06-07 03:28:17,294][528169] Updated weights for policy 0, policy_version 15397 (0.0008) +[2026-06-07 03:28:17,426][528169] Updated weights for policy 0, policy_version 15407 (0.0008) +[2026-06-07 03:28:17,559][528169] Updated weights for policy 0, policy_version 15417 (0.0008) +[2026-06-07 03:28:17,727][528169] Updated weights for policy 0, policy_version 15430 (0.0008) +[2026-06-07 03:28:18,382][528169] Updated weights for policy 0, policy_version 15441 (0.0008) +[2026-06-07 03:28:18,520][528169] Updated weights for policy 0, policy_version 15451 (0.0008) +[2026-06-07 03:28:18,657][528169] Updated weights for policy 0, policy_version 15462 (0.0008) +[2026-06-07 03:28:18,807][528169] Updated weights for policy 0, policy_version 15473 (0.0008) +[2026-06-07 03:28:18,935][528169] Updated weights for policy 0, policy_version 15483 (0.0007) +[2026-06-07 03:28:19,077][528169] Updated weights for policy 0, policy_version 15493 (0.0006) +[2026-06-07 03:28:19,209][528169] Updated weights for policy 0, policy_version 15503 (0.0008) +[2026-06-07 03:28:19,829][528169] Updated weights for policy 0, policy_version 15514 (0.0009) +[2026-06-07 03:28:19,962][528169] Updated weights for policy 0, policy_version 15524 (0.0008) +[2026-06-07 03:28:20,103][527010] Fps is (10 sec: 22937.3, 60 sec: 24576.0, 300 sec: 23770.7). Total num frames: 7962624. Throughput: 0: 24575.9. Samples: 7973632. Policy #0 lag: (min: 46.0, avg: 57.9, max: 110.0) +[2026-06-07 03:28:20,104][527010] Avg episode reward: [(0, '58.661')] +[2026-06-07 03:28:20,118][528169] Updated weights for policy 0, policy_version 15536 (0.0009) +[2026-06-07 03:28:20,253][528169] Updated weights for policy 0, policy_version 15546 (0.0008) +[2026-06-07 03:28:20,395][528169] Updated weights for policy 0, policy_version 15557 (0.0008) +[2026-06-07 03:28:20,522][528169] Updated weights for policy 0, policy_version 15567 (0.0008) +[2026-06-07 03:28:21,149][528169] Updated weights for policy 0, policy_version 15577 (0.0009) +[2026-06-07 03:28:21,312][528169] Updated weights for policy 0, policy_version 15590 (0.0008) +[2026-06-07 03:28:21,468][528169] Updated weights for policy 0, policy_version 15602 (0.0008) +[2026-06-07 03:28:21,619][528169] Updated weights for policy 0, policy_version 15614 (0.0008) +[2026-06-07 03:28:21,797][528169] Updated weights for policy 0, policy_version 15627 (0.0008) +[2026-06-07 03:28:22,475][528169] Updated weights for policy 0, policy_version 15639 (0.0008) +[2026-06-07 03:28:22,599][528169] Updated weights for policy 0, policy_version 15649 (0.0008) +[2026-06-07 03:28:22,736][528169] Updated weights for policy 0, policy_version 15659 (0.0008) +[2026-06-07 03:28:22,875][528169] Updated weights for policy 0, policy_version 15670 (0.0008) +[2026-06-07 03:28:23,009][528169] Updated weights for policy 0, policy_version 15680 (0.0008) +[2026-06-07 03:28:23,144][528169] Updated weights for policy 0, policy_version 15690 (0.0008) +[2026-06-07 03:28:23,817][528169] Updated weights for policy 0, policy_version 15701 (0.0008) +[2026-06-07 03:28:23,954][528169] Updated weights for policy 0, policy_version 15711 (0.0009) +[2026-06-07 03:28:24,085][528169] Updated weights for policy 0, policy_version 15721 (0.0008) +[2026-06-07 03:28:24,251][528169] Updated weights for policy 0, policy_version 15734 (0.0008) +[2026-06-07 03:28:24,403][528169] Updated weights for policy 0, policy_version 15746 (0.0008) +[2026-06-07 03:28:24,547][528169] Updated weights for policy 0, policy_version 15756 (0.0008) +[2026-06-07 03:28:25,103][527010] Fps is (10 sec: 26214.3, 60 sec: 24576.0, 300 sec: 23770.7). Total num frames: 8093696. Throughput: 0: 24334.2. Samples: 8115072. Policy #0 lag: (min: 46.0, avg: 57.9, max: 110.0) +[2026-06-07 03:28:25,104][527010] Avg episode reward: [(0, '62.611')] +[2026-06-07 03:28:25,178][528169] Updated weights for policy 0, policy_version 15766 (0.0009) +[2026-06-07 03:28:25,308][528169] Updated weights for policy 0, policy_version 15777 (0.0008) +[2026-06-07 03:28:25,443][528169] Updated weights for policy 0, policy_version 15787 (0.0008) +[2026-06-07 03:28:25,576][528169] Updated weights for policy 0, policy_version 15797 (0.0008) +[2026-06-07 03:28:25,707][528169] Updated weights for policy 0, policy_version 15807 (0.0007) +[2026-06-07 03:28:25,837][528169] Updated weights for policy 0, policy_version 15817 (0.0011) +[2026-06-07 03:28:26,495][528169] Updated weights for policy 0, policy_version 15827 (0.0008) +[2026-06-07 03:28:26,633][528169] Updated weights for policy 0, policy_version 15838 (0.0008) +[2026-06-07 03:28:26,757][528169] Updated weights for policy 0, policy_version 15848 (0.0008) +[2026-06-07 03:28:26,908][528169] Updated weights for policy 0, policy_version 15859 (0.0008) +[2026-06-07 03:28:27,047][528169] Updated weights for policy 0, policy_version 15869 (0.0009) +[2026-06-07 03:28:27,194][528169] Updated weights for policy 0, policy_version 15880 (0.0008) +[2026-06-07 03:28:27,813][528169] Updated weights for policy 0, policy_version 15890 (0.0008) +[2026-06-07 03:28:27,951][528169] Updated weights for policy 0, policy_version 15901 (0.0008) +[2026-06-07 03:28:28,113][528169] Updated weights for policy 0, policy_version 15913 (0.0008) +[2026-06-07 03:28:28,237][528169] Updated weights for policy 0, policy_version 15923 (0.0008) +[2026-06-07 03:28:28,397][528169] Updated weights for policy 0, policy_version 15935 (0.0008) +[2026-06-07 03:28:28,532][528169] Updated weights for policy 0, policy_version 15945 (0.0008) +[2026-06-07 03:28:29,178][528169] Updated weights for policy 0, policy_version 15956 (0.0008) +[2026-06-07 03:28:29,304][528169] Updated weights for policy 0, policy_version 15966 (0.0008) +[2026-06-07 03:28:29,436][528169] Updated weights for policy 0, policy_version 15976 (0.0008) +[2026-06-07 03:28:29,578][528169] Updated weights for policy 0, policy_version 15987 (0.0008) +[2026-06-07 03:28:29,720][528169] Updated weights for policy 0, policy_version 15998 (0.0008) +[2026-06-07 03:28:29,883][528169] Updated weights for policy 0, policy_version 16010 (0.0009) +[2026-06-07 03:28:30,103][527010] Fps is (10 sec: 26214.5, 60 sec: 24576.0, 300 sec: 23881.8). Total num frames: 8224768. Throughput: 0: 24323.1. Samples: 8185088. Policy #0 lag: (min: 63.0, avg: 76.5, max: 127.0) +[2026-06-07 03:28:30,104][527010] Avg episode reward: [(0, '75.667')] +[2026-06-07 03:28:30,110][528093] Saving new best policy, reward=75.667! +[2026-06-07 03:28:30,528][528169] Updated weights for policy 0, policy_version 16020 (0.0008) +[2026-06-07 03:28:30,680][528169] Updated weights for policy 0, policy_version 16032 (0.0008) +[2026-06-07 03:28:30,821][528169] Updated weights for policy 0, policy_version 16043 (0.0008) +[2026-06-07 03:28:30,967][528169] Updated weights for policy 0, policy_version 16054 (0.0008) +[2026-06-07 03:28:31,108][528169] Updated weights for policy 0, policy_version 16065 (0.0008) +[2026-06-07 03:28:31,261][528169] Updated weights for policy 0, policy_version 16076 (0.0008) +[2026-06-07 03:28:31,917][528169] Updated weights for policy 0, policy_version 16087 (0.0008) +[2026-06-07 03:28:32,057][528169] Updated weights for policy 0, policy_version 16098 (0.0008) +[2026-06-07 03:28:32,203][528169] Updated weights for policy 0, policy_version 16109 (0.0008) +[2026-06-07 03:28:32,368][528169] Updated weights for policy 0, policy_version 16121 (0.0008) +[2026-06-07 03:28:32,496][528169] Updated weights for policy 0, policy_version 16131 (0.0008) +[2026-06-07 03:28:32,648][528169] Updated weights for policy 0, policy_version 16142 (0.0008) +[2026-06-07 03:28:33,278][528169] Updated weights for policy 0, policy_version 16152 (0.0008) +[2026-06-07 03:28:33,432][528169] Updated weights for policy 0, policy_version 16164 (0.0008) +[2026-06-07 03:28:33,568][528169] Updated weights for policy 0, policy_version 16174 (0.0008) +[2026-06-07 03:28:33,718][528169] Updated weights for policy 0, policy_version 16186 (0.0008) +[2026-06-07 03:28:33,852][528169] Updated weights for policy 0, policy_version 16196 (0.0008) +[2026-06-07 03:28:34,006][528169] Updated weights for policy 0, policy_version 16208 (0.0008) +[2026-06-07 03:28:34,640][528169] Updated weights for policy 0, policy_version 16220 (0.0008) +[2026-06-07 03:28:34,777][528169] Updated weights for policy 0, policy_version 16231 (0.0008) +[2026-06-07 03:28:34,924][528169] Updated weights for policy 0, policy_version 16242 (0.0009) +[2026-06-07 03:28:35,053][528169] Updated weights for policy 0, policy_version 16252 (0.0008) +[2026-06-07 03:28:35,103][527010] Fps is (10 sec: 22937.6, 60 sec: 24029.8, 300 sec: 23881.8). Total num frames: 8323072. Throughput: 0: 24598.7. Samples: 8338432. Policy #0 lag: (min: 63.0, avg: 76.5, max: 127.0) +[2026-06-07 03:28:35,104][527010] Avg episode reward: [(0, '57.412')] +[2026-06-07 03:28:35,202][528169] Updated weights for policy 0, policy_version 16263 (0.0008) +[2026-06-07 03:28:35,860][528169] Updated weights for policy 0, policy_version 16274 (0.0008) +[2026-06-07 03:28:35,985][528169] Updated weights for policy 0, policy_version 16284 (0.0008) +[2026-06-07 03:28:36,127][528169] Updated weights for policy 0, policy_version 16295 (0.0008) +[2026-06-07 03:28:36,273][528169] Updated weights for policy 0, policy_version 16306 (0.0008) +[2026-06-07 03:28:36,417][528169] Updated weights for policy 0, policy_version 16317 (0.0008) +[2026-06-07 03:28:36,559][528169] Updated weights for policy 0, policy_version 16328 (0.0008) +[2026-06-07 03:28:37,210][528169] Updated weights for policy 0, policy_version 16338 (0.0009) +[2026-06-07 03:28:37,339][528169] Updated weights for policy 0, policy_version 16348 (0.0008) +[2026-06-07 03:28:37,469][528169] Updated weights for policy 0, policy_version 16358 (0.0009) +[2026-06-07 03:28:37,629][528169] Updated weights for policy 0, policy_version 16370 (0.0008) +[2026-06-07 03:28:37,772][528169] Updated weights for policy 0, policy_version 16381 (0.0008) +[2026-06-07 03:28:37,909][528169] Updated weights for policy 0, policy_version 16392 (0.0008) +[2026-06-07 03:28:38,574][528169] Updated weights for policy 0, policy_version 16402 (0.0008) +[2026-06-07 03:28:38,713][528169] Updated weights for policy 0, policy_version 16413 (0.0008) +[2026-06-07 03:28:38,857][528169] Updated weights for policy 0, policy_version 16424 (0.0008) +[2026-06-07 03:28:38,982][528169] Updated weights for policy 0, policy_version 16434 (0.0008) +[2026-06-07 03:28:39,122][528169] Updated weights for policy 0, policy_version 16445 (0.0008) +[2026-06-07 03:28:39,278][528169] Updated weights for policy 0, policy_version 16456 (0.0008) +[2026-06-07 03:28:39,959][528169] Updated weights for policy 0, policy_version 16468 (0.0008) +[2026-06-07 03:28:40,092][528169] Updated weights for policy 0, policy_version 16478 (0.0008) +[2026-06-07 03:28:40,103][527010] Fps is (10 sec: 22937.3, 60 sec: 24575.9, 300 sec: 23881.7). Total num frames: 8454144. Throughput: 0: 24191.9. Samples: 8476032. Policy #0 lag: (min: 63.0, avg: 76.5, max: 127.0) +[2026-06-07 03:28:40,104][527010] Avg episode reward: [(0, '64.724')] +[2026-06-07 03:28:40,220][528169] Updated weights for policy 0, policy_version 16488 (0.0008) +[2026-06-07 03:28:40,346][528169] Updated weights for policy 0, policy_version 16498 (0.0008) +[2026-06-07 03:28:40,478][528169] Updated weights for policy 0, policy_version 16508 (0.0008) +[2026-06-07 03:28:40,622][528169] Updated weights for policy 0, policy_version 16518 (0.0008) +[2026-06-07 03:28:41,237][528169] Updated weights for policy 0, policy_version 16529 (0.0008) +[2026-06-07 03:28:41,389][528169] Updated weights for policy 0, policy_version 16541 (0.0008) +[2026-06-07 03:28:41,528][528169] Updated weights for policy 0, policy_version 16552 (0.0006) +[2026-06-07 03:28:41,679][528169] Updated weights for policy 0, policy_version 16563 (0.0006) +[2026-06-07 03:28:41,825][528169] Updated weights for policy 0, policy_version 16574 (0.0008) +[2026-06-07 03:28:41,954][528169] Updated weights for policy 0, policy_version 16584 (0.0008) +[2026-06-07 03:28:42,609][528169] Updated weights for policy 0, policy_version 16595 (0.0008) +[2026-06-07 03:28:42,766][528169] Updated weights for policy 0, policy_version 16607 (0.0008) +[2026-06-07 03:28:42,914][528169] Updated weights for policy 0, policy_version 16618 (0.0008) +[2026-06-07 03:28:43,063][528169] Updated weights for policy 0, policy_version 16630 (0.0009) +[2026-06-07 03:28:43,234][528169] Updated weights for policy 0, policy_version 16643 (0.0008) +[2026-06-07 03:28:43,380][528169] Updated weights for policy 0, policy_version 16654 (0.0008) +[2026-06-07 03:28:44,000][528169] Updated weights for policy 0, policy_version 16664 (0.0008) +[2026-06-07 03:28:44,147][528169] Updated weights for policy 0, policy_version 16675 (0.0008) +[2026-06-07 03:28:44,317][528169] Updated weights for policy 0, policy_version 16688 (0.0008) +[2026-06-07 03:28:44,450][528169] Updated weights for policy 0, policy_version 16698 (0.0008) +[2026-06-07 03:28:44,595][528169] Updated weights for policy 0, policy_version 16709 (0.0008) +[2026-06-07 03:28:44,726][528169] Updated weights for policy 0, policy_version 16719 (0.0008) +[2026-06-07 03:28:45,103][527010] Fps is (10 sec: 26214.6, 60 sec: 24576.2, 300 sec: 23992.8). Total num frames: 8585216. Throughput: 0: 24294.4. Samples: 8550400. Policy #0 lag: (min: 63.0, avg: 76.3, max: 127.0) +[2026-06-07 03:28:45,103][527010] Avg episode reward: [(0, '67.096')] +[2026-06-07 03:28:45,358][528169] Updated weights for policy 0, policy_version 16729 (0.0008) +[2026-06-07 03:28:45,498][528169] Updated weights for policy 0, policy_version 16740 (0.0008) +[2026-06-07 03:28:45,650][528169] Updated weights for policy 0, policy_version 16752 (0.0008) +[2026-06-07 03:28:45,794][528169] Updated weights for policy 0, policy_version 16763 (0.0008) +[2026-06-07 03:28:45,938][528169] Updated weights for policy 0, policy_version 16774 (0.0008) +[2026-06-07 03:28:46,598][528169] Updated weights for policy 0, policy_version 16785 (0.0008) +[2026-06-07 03:28:46,736][528169] Updated weights for policy 0, policy_version 16796 (0.0008) +[2026-06-07 03:28:46,892][528169] Updated weights for policy 0, policy_version 16809 (0.0008) +[2026-06-07 03:28:47,034][528169] Updated weights for policy 0, policy_version 16820 (0.0008) +[2026-06-07 03:28:47,183][528169] Updated weights for policy 0, policy_version 16831 (0.0008) +[2026-06-07 03:28:47,313][528169] Updated weights for policy 0, policy_version 16841 (0.0008) +[2026-06-07 03:28:47,974][528169] Updated weights for policy 0, policy_version 16852 (0.0008) +[2026-06-07 03:28:48,129][528169] Updated weights for policy 0, policy_version 16865 (0.0009) +[2026-06-07 03:28:48,297][528169] Updated weights for policy 0, policy_version 16878 (0.0008) +[2026-06-07 03:28:48,455][528169] Updated weights for policy 0, policy_version 16891 (0.0008) +[2026-06-07 03:28:48,610][528169] Updated weights for policy 0, policy_version 16903 (0.0009) +[2026-06-07 03:28:49,290][528169] Updated weights for policy 0, policy_version 16914 (0.0008) +[2026-06-07 03:28:49,448][528169] Updated weights for policy 0, policy_version 16927 (0.0008) +[2026-06-07 03:28:49,613][528169] Updated weights for policy 0, policy_version 16940 (0.0009) +[2026-06-07 03:28:49,763][528169] Updated weights for policy 0, policy_version 16952 (0.0008) +[2026-06-07 03:28:49,904][528169] Updated weights for policy 0, policy_version 16963 (0.0008) +[2026-06-07 03:28:50,069][528169] Updated weights for policy 0, policy_version 16976 (0.0008) +[2026-06-07 03:28:50,103][527010] Fps is (10 sec: 26215.0, 60 sec: 24576.0, 300 sec: 23992.8). Total num frames: 8716288. Throughput: 0: 24322.9. Samples: 8697216. Policy #0 lag: (min: 63.0, avg: 76.3, max: 127.0) +[2026-06-07 03:28:50,104][527010] Avg episode reward: [(0, '71.487')] +[2026-06-07 03:28:50,742][528169] Updated weights for policy 0, policy_version 16988 (0.0008) +[2026-06-07 03:28:50,875][528169] Updated weights for policy 0, policy_version 16999 (0.0008) +[2026-06-07 03:28:51,014][528169] Updated weights for policy 0, policy_version 17010 (0.0008) +[2026-06-07 03:28:51,169][528169] Updated weights for policy 0, policy_version 17022 (0.0008) +[2026-06-07 03:28:51,311][528169] Updated weights for policy 0, policy_version 17033 (0.0009) +[2026-06-07 03:28:51,998][528169] Updated weights for policy 0, policy_version 17044 (0.0008) +[2026-06-07 03:28:52,150][528169] Updated weights for policy 0, policy_version 17056 (0.0008) +[2026-06-07 03:28:52,279][528169] Updated weights for policy 0, policy_version 17066 (0.0008) +[2026-06-07 03:28:52,427][528169] Updated weights for policy 0, policy_version 17078 (0.0008) +[2026-06-07 03:28:52,585][528169] Updated weights for policy 0, policy_version 17091 (0.0008) +[2026-06-07 03:28:52,743][528169] Updated weights for policy 0, policy_version 17103 (0.0008) +[2026-06-07 03:28:53,409][528169] Updated weights for policy 0, policy_version 17115 (0.0008) +[2026-06-07 03:28:53,570][528169] Updated weights for policy 0, policy_version 17128 (0.0008) +[2026-06-07 03:28:53,728][528169] Updated weights for policy 0, policy_version 17141 (0.0008) +[2026-06-07 03:28:53,877][528169] Updated weights for policy 0, policy_version 17153 (0.0008) +[2026-06-07 03:28:54,021][528169] Updated weights for policy 0, policy_version 17164 (0.0008) +[2026-06-07 03:28:54,730][528169] Updated weights for policy 0, policy_version 17175 (0.0008) +[2026-06-07 03:28:54,864][528169] Updated weights for policy 0, policy_version 17186 (0.0008) +[2026-06-07 03:28:54,993][528169] Updated weights for policy 0, policy_version 17196 (0.0008) +[2026-06-07 03:28:55,103][527010] Fps is (10 sec: 22937.1, 60 sec: 24029.8, 300 sec: 23992.8). Total num frames: 8814592. Throughput: 0: 24197.6. Samples: 8840704. Policy #0 lag: (min: 63.0, avg: 76.3, max: 127.0) +[2026-06-07 03:28:55,105][527010] Avg episode reward: [(0, '67.041')] +[2026-06-07 03:28:55,176][528169] Updated weights for policy 0, policy_version 17210 (0.0008) +[2026-06-07 03:28:55,329][528169] Updated weights for policy 0, policy_version 17222 (0.0008) +[2026-06-07 03:28:56,013][528169] Updated weights for policy 0, policy_version 17234 (0.0008) +[2026-06-07 03:28:56,176][528169] Updated weights for policy 0, policy_version 17248 (0.0008) +[2026-06-07 03:28:56,346][528169] Updated weights for policy 0, policy_version 17261 (0.0008) +[2026-06-07 03:28:56,502][528169] Updated weights for policy 0, policy_version 17274 (0.0008) +[2026-06-07 03:28:56,654][528169] Updated weights for policy 0, policy_version 17286 (0.0009) +[2026-06-07 03:28:57,334][528169] Updated weights for policy 0, policy_version 17297 (0.0008) +[2026-06-07 03:28:57,484][528169] Updated weights for policy 0, policy_version 17309 (0.0009) +[2026-06-07 03:28:57,624][528169] Updated weights for policy 0, policy_version 17320 (0.0009) +[2026-06-07 03:28:57,769][528169] Updated weights for policy 0, policy_version 17331 (0.0009) +[2026-06-07 03:28:57,914][528169] Updated weights for policy 0, policy_version 17342 (0.0009) +[2026-06-07 03:28:58,053][528169] Updated weights for policy 0, policy_version 17352 (0.0009) +[2026-06-07 03:28:58,724][528169] Updated weights for policy 0, policy_version 17363 (0.0008) +[2026-06-07 03:28:58,847][528169] Updated weights for policy 0, policy_version 17373 (0.0008) +[2026-06-07 03:28:58,980][528169] Updated weights for policy 0, policy_version 17383 (0.0008) +[2026-06-07 03:28:59,110][528169] Updated weights for policy 0, policy_version 17393 (0.0009) +[2026-06-07 03:28:59,268][528169] Updated weights for policy 0, policy_version 17405 (0.0008) +[2026-06-07 03:28:59,404][528169] Updated weights for policy 0, policy_version 17415 (0.0008) +[2026-06-07 03:29:00,061][528169] Updated weights for policy 0, policy_version 17425 (0.0008) +[2026-06-07 03:29:00,103][527010] Fps is (10 sec: 22937.8, 60 sec: 24576.0, 300 sec: 24103.9). Total num frames: 8945664. Throughput: 0: 24453.7. Samples: 8921856. Policy #0 lag: (min: 41.0, avg: 89.9, max: 105.0) +[2026-06-07 03:29:00,103][527010] Avg episode reward: [(0, '58.887')] +[2026-06-07 03:29:00,197][528169] Updated weights for policy 0, policy_version 17435 (0.0008) +[2026-06-07 03:29:00,338][528169] Updated weights for policy 0, policy_version 17446 (0.0008) +[2026-06-07 03:29:00,466][528169] Updated weights for policy 0, policy_version 17456 (0.0008) +[2026-06-07 03:29:00,609][528169] Updated weights for policy 0, policy_version 17467 (0.0008) +[2026-06-07 03:29:00,753][528169] Updated weights for policy 0, policy_version 17478 (0.0008) +[2026-06-07 03:29:01,411][528169] Updated weights for policy 0, policy_version 17489 (0.0008) +[2026-06-07 03:29:01,542][528169] Updated weights for policy 0, policy_version 17499 (0.0008) +[2026-06-07 03:29:01,680][528169] Updated weights for policy 0, policy_version 17510 (0.0008) +[2026-06-07 03:29:01,832][528169] Updated weights for policy 0, policy_version 17521 (0.0008) +[2026-06-07 03:29:01,974][528169] Updated weights for policy 0, policy_version 17532 (0.0008) +[2026-06-07 03:29:02,109][528169] Updated weights for policy 0, policy_version 17542 (0.0008) +[2026-06-07 03:29:02,785][528169] Updated weights for policy 0, policy_version 17554 (0.0009) +[2026-06-07 03:29:02,929][528169] Updated weights for policy 0, policy_version 17565 (0.0006) +[2026-06-07 03:29:03,070][528169] Updated weights for policy 0, policy_version 17576 (0.0005) +[2026-06-07 03:29:03,205][528169] Updated weights for policy 0, policy_version 17586 (0.0005) +[2026-06-07 03:29:03,348][528169] Updated weights for policy 0, policy_version 17597 (0.0005) +[2026-06-07 03:29:03,481][528169] Updated weights for policy 0, policy_version 17607 (0.0005) +[2026-06-07 03:29:04,084][528169] Updated weights for policy 0, policy_version 17618 (0.0006) +[2026-06-07 03:29:04,211][528169] Updated weights for policy 0, policy_version 17629 (0.0008) +[2026-06-07 03:29:04,376][528169] Updated weights for policy 0, policy_version 17641 (0.0008) +[2026-06-07 03:29:04,530][528169] Updated weights for policy 0, policy_version 17653 (0.0008) +[2026-06-07 03:29:04,676][528169] Updated weights for policy 0, policy_version 17664 (0.0008) +[2026-06-07 03:29:04,810][528169] Updated weights for policy 0, policy_version 17674 (0.0008) +[2026-06-07 03:29:05,103][527010] Fps is (10 sec: 26214.8, 60 sec: 24576.0, 300 sec: 24103.9). Total num frames: 9076736. Throughput: 0: 24243.2. Samples: 9064576. Policy #0 lag: (min: 41.0, avg: 89.9, max: 105.0) +[2026-06-07 03:29:05,104][527010] Avg episode reward: [(0, '76.548')] +[2026-06-07 03:29:05,116][528093] Saving new best policy, reward=76.548! +[2026-06-07 03:29:05,435][528169] Updated weights for policy 0, policy_version 17685 (0.0008) +[2026-06-07 03:29:05,573][528169] Updated weights for policy 0, policy_version 17696 (0.0008) +[2026-06-07 03:29:05,707][528169] Updated weights for policy 0, policy_version 17706 (0.0008) +[2026-06-07 03:29:05,849][528169] Updated weights for policy 0, policy_version 17717 (0.0008) +[2026-06-07 03:29:05,996][528169] Updated weights for policy 0, policy_version 17728 (0.0008) +[2026-06-07 03:29:06,129][528169] Updated weights for policy 0, policy_version 17738 (0.0009) +[2026-06-07 03:29:06,778][528169] Updated weights for policy 0, policy_version 17748 (0.0008) +[2026-06-07 03:29:06,913][528169] Updated weights for policy 0, policy_version 17759 (0.0008) +[2026-06-07 03:29:07,073][528169] Updated weights for policy 0, policy_version 17771 (0.0008) +[2026-06-07 03:29:07,231][528169] Updated weights for policy 0, policy_version 17783 (0.0008) +[2026-06-07 03:29:07,369][528169] Updated weights for policy 0, policy_version 17794 (0.0008) +[2026-06-07 03:29:07,507][528169] Updated weights for policy 0, policy_version 17804 (0.0008) +[2026-06-07 03:29:08,151][528169] Updated weights for policy 0, policy_version 17815 (0.0008) +[2026-06-07 03:29:08,285][528169] Updated weights for policy 0, policy_version 17826 (0.0008) +[2026-06-07 03:29:08,431][528169] Updated weights for policy 0, policy_version 17837 (0.0008) +[2026-06-07 03:29:08,574][528169] Updated weights for policy 0, policy_version 17848 (0.0008) +[2026-06-07 03:29:08,712][528169] Updated weights for policy 0, policy_version 17858 (0.0008) +[2026-06-07 03:29:08,841][528169] Updated weights for policy 0, policy_version 17868 (0.0008) +[2026-06-07 03:29:09,517][528169] Updated weights for policy 0, policy_version 17879 (0.0008) +[2026-06-07 03:29:09,658][528169] Updated weights for policy 0, policy_version 17890 (0.0008) +[2026-06-07 03:29:09,812][528169] Updated weights for policy 0, policy_version 17901 (0.0009) +[2026-06-07 03:29:09,934][528169] Updated weights for policy 0, policy_version 17911 (0.0008) +[2026-06-07 03:29:10,080][528169] Updated weights for policy 0, policy_version 17922 (0.0008) +[2026-06-07 03:29:10,103][527010] Fps is (10 sec: 22937.4, 60 sec: 24029.9, 300 sec: 24103.9). Total num frames: 9175040. Throughput: 0: 24359.8. Samples: 9211264. Policy #0 lag: (min: 28.0, avg: 40.8, max: 92.0) +[2026-06-07 03:29:10,104][527010] Avg episode reward: [(0, '62.084')] +[2026-06-07 03:29:10,233][528169] Updated weights for policy 0, policy_version 17933 (0.0008) +[2026-06-07 03:29:10,847][528169] Updated weights for policy 0, policy_version 17943 (0.0008) +[2026-06-07 03:29:10,980][528169] Updated weights for policy 0, policy_version 17953 (0.0008) +[2026-06-07 03:29:11,160][528169] Updated weights for policy 0, policy_version 17967 (0.0008) +[2026-06-07 03:29:11,292][528169] Updated weights for policy 0, policy_version 17977 (0.0008) +[2026-06-07 03:29:11,433][528169] Updated weights for policy 0, policy_version 17988 (0.0008) +[2026-06-07 03:29:11,584][528169] Updated weights for policy 0, policy_version 17999 (0.0009) +[2026-06-07 03:29:12,228][528169] Updated weights for policy 0, policy_version 18010 (0.0008) +[2026-06-07 03:29:12,371][528169] Updated weights for policy 0, policy_version 18021 (0.0008) +[2026-06-07 03:29:12,515][528169] Updated weights for policy 0, policy_version 18032 (0.0008) +[2026-06-07 03:29:12,671][528169] Updated weights for policy 0, policy_version 18044 (0.0008) +[2026-06-07 03:29:12,798][528169] Updated weights for policy 0, policy_version 18054 (0.0008) +[2026-06-07 03:29:13,482][528169] Updated weights for policy 0, policy_version 18065 (0.0008) +[2026-06-07 03:29:13,622][528169] Updated weights for policy 0, policy_version 18076 (0.0008) +[2026-06-07 03:29:13,751][528169] Updated weights for policy 0, policy_version 18086 (0.0008) +[2026-06-07 03:29:13,886][528169] Updated weights for policy 0, policy_version 18096 (0.0008) +[2026-06-07 03:29:14,027][528169] Updated weights for policy 0, policy_version 18106 (0.0008) +[2026-06-07 03:29:14,159][528169] Updated weights for policy 0, policy_version 18116 (0.0008) +[2026-06-07 03:29:14,304][528169] Updated weights for policy 0, policy_version 18127 (0.0008) +[2026-06-07 03:29:14,899][528169] Updated weights for policy 0, policy_version 18137 (0.0008) +[2026-06-07 03:29:15,027][528169] Updated weights for policy 0, policy_version 18147 (0.0008) +[2026-06-07 03:29:15,103][527010] Fps is (10 sec: 22937.8, 60 sec: 24576.0, 300 sec: 24103.9). Total num frames: 9306112. Throughput: 0: 24476.5. Samples: 9286528. Policy #0 lag: (min: 28.0, avg: 40.8, max: 92.0) +[2026-06-07 03:29:15,104][527010] Avg episode reward: [(0, '80.298')] +[2026-06-07 03:29:15,184][528169] Updated weights for policy 0, policy_version 18159 (0.0008) +[2026-06-07 03:29:15,313][528169] Updated weights for policy 0, policy_version 18169 (0.0009) +[2026-06-07 03:29:15,445][528169] Updated weights for policy 0, policy_version 18179 (0.0008) +[2026-06-07 03:29:15,599][528169] Updated weights for policy 0, policy_version 18190 (0.0008) +[2026-06-07 03:29:15,614][528093] Saving new best policy, reward=80.298! +[2026-06-07 03:29:16,250][528169] Updated weights for policy 0, policy_version 18200 (0.0008) +[2026-06-07 03:29:16,371][528169] Updated weights for policy 0, policy_version 18210 (0.0008) +[2026-06-07 03:29:16,504][528169] Updated weights for policy 0, policy_version 18220 (0.0008) +[2026-06-07 03:29:16,636][528169] Updated weights for policy 0, policy_version 18230 (0.0008) +[2026-06-07 03:29:16,778][528169] Updated weights for policy 0, policy_version 18241 (0.0008) +[2026-06-07 03:29:16,921][528169] Updated weights for policy 0, policy_version 18252 (0.0008) +[2026-06-07 03:29:17,578][528169] Updated weights for policy 0, policy_version 18262 (0.0008) +[2026-06-07 03:29:17,705][528169] Updated weights for policy 0, policy_version 18272 (0.0008) +[2026-06-07 03:29:17,865][528169] Updated weights for policy 0, policy_version 18284 (0.0008) +[2026-06-07 03:29:18,002][528169] Updated weights for policy 0, policy_version 18294 (0.0008) +[2026-06-07 03:29:18,130][528169] Updated weights for policy 0, policy_version 18304 (0.0008) +[2026-06-07 03:29:18,282][528169] Updated weights for policy 0, policy_version 18315 (0.0008) +[2026-06-07 03:29:18,886][528169] Updated weights for policy 0, policy_version 18325 (0.0008) +[2026-06-07 03:29:19,019][528169] Updated weights for policy 0, policy_version 18336 (0.0008) +[2026-06-07 03:29:19,165][528169] Updated weights for policy 0, policy_version 18347 (0.0008) +[2026-06-07 03:29:19,316][528169] Updated weights for policy 0, policy_version 18358 (0.0008) +[2026-06-07 03:29:19,457][528169] Updated weights for policy 0, policy_version 18369 (0.0008) +[2026-06-07 03:29:19,595][528169] Updated weights for policy 0, policy_version 18379 (0.0008) +[2026-06-07 03:29:20,103][527010] Fps is (10 sec: 26214.2, 60 sec: 24576.0, 300 sec: 24215.0). Total num frames: 9437184. Throughput: 0: 24155.0. Samples: 9425408. Policy #0 lag: (min: 28.0, avg: 40.8, max: 92.0) +[2026-06-07 03:29:20,104][527010] Avg episode reward: [(0, '66.391')] +[2026-06-07 03:29:20,261][528169] Updated weights for policy 0, policy_version 18390 (0.0009) +[2026-06-07 03:29:20,407][528169] Updated weights for policy 0, policy_version 18401 (0.0008) +[2026-06-07 03:29:20,567][528169] Updated weights for policy 0, policy_version 18413 (0.0008) +[2026-06-07 03:29:20,725][528169] Updated weights for policy 0, policy_version 18425 (0.0008) +[2026-06-07 03:29:20,861][528169] Updated weights for policy 0, policy_version 18435 (0.0008) +[2026-06-07 03:29:20,995][528169] Updated weights for policy 0, policy_version 18445 (0.0008) +[2026-06-07 03:29:21,600][528169] Updated weights for policy 0, policy_version 18456 (0.0008) +[2026-06-07 03:29:21,726][528169] Updated weights for policy 0, policy_version 18466 (0.0008) +[2026-06-07 03:29:21,871][528169] Updated weights for policy 0, policy_version 18477 (0.0008) +[2026-06-07 03:29:21,999][528169] Updated weights for policy 0, policy_version 18487 (0.0008) +[2026-06-07 03:29:22,127][528169] Updated weights for policy 0, policy_version 18497 (0.0008) +[2026-06-07 03:29:22,299][528169] Updated weights for policy 0, policy_version 18509 (0.0008) +[2026-06-07 03:29:22,990][528169] Updated weights for policy 0, policy_version 18521 (0.0008) +[2026-06-07 03:29:23,114][528169] Updated weights for policy 0, policy_version 18531 (0.0008) +[2026-06-07 03:29:23,268][528169] Updated weights for policy 0, policy_version 18543 (0.0008) +[2026-06-07 03:29:23,418][528169] Updated weights for policy 0, policy_version 18554 (0.0008) +[2026-06-07 03:29:23,565][528169] Updated weights for policy 0, policy_version 18566 (0.0008) +[2026-06-07 03:29:24,202][528169] Updated weights for policy 0, policy_version 18577 (0.0008) +[2026-06-07 03:29:24,325][528169] Updated weights for policy 0, policy_version 18587 (0.0008) +[2026-06-07 03:29:24,478][528169] Updated weights for policy 0, policy_version 18599 (0.0008) +[2026-06-07 03:29:24,647][528169] Updated weights for policy 0, policy_version 18612 (0.0008) +[2026-06-07 03:29:24,794][528169] Updated weights for policy 0, policy_version 18623 (0.0008) +[2026-06-07 03:29:24,942][528169] Updated weights for policy 0, policy_version 18634 (0.0009) +[2026-06-07 03:29:25,103][527010] Fps is (10 sec: 26214.3, 60 sec: 24576.0, 300 sec: 24215.0). Total num frames: 9568256. Throughput: 0: 24510.7. Samples: 9579008. Policy #0 lag: (min: 63.0, avg: 75.8, max: 127.0) +[2026-06-07 03:29:25,104][527010] Avg episode reward: [(0, '68.029')] +[2026-06-07 03:29:25,594][528169] Updated weights for policy 0, policy_version 18644 (0.0008) +[2026-06-07 03:29:25,725][528169] Updated weights for policy 0, policy_version 18654 (0.0008) +[2026-06-07 03:29:25,862][528169] Updated weights for policy 0, policy_version 18664 (0.0008) +[2026-06-07 03:29:26,017][528169] Updated weights for policy 0, policy_version 18676 (0.0008) +[2026-06-07 03:29:26,178][528169] Updated weights for policy 0, policy_version 18688 (0.0008) +[2026-06-07 03:29:26,312][528169] Updated weights for policy 0, policy_version 18698 (0.0008) +[2026-06-07 03:29:26,947][528169] Updated weights for policy 0, policy_version 18709 (0.0008) +[2026-06-07 03:29:27,080][528169] Updated weights for policy 0, policy_version 18719 (0.0008) +[2026-06-07 03:29:27,203][528169] Updated weights for policy 0, policy_version 18729 (0.0008) +[2026-06-07 03:29:27,341][528169] Updated weights for policy 0, policy_version 18739 (0.0009) +[2026-06-07 03:29:27,468][528169] Updated weights for policy 0, policy_version 18749 (0.0008) +[2026-06-07 03:29:27,601][528169] Updated weights for policy 0, policy_version 18759 (0.0008) +[2026-06-07 03:29:28,242][528169] Updated weights for policy 0, policy_version 18769 (0.0008) +[2026-06-07 03:29:28,363][528169] Updated weights for policy 0, policy_version 18779 (0.0008) +[2026-06-07 03:29:28,505][528169] Updated weights for policy 0, policy_version 18790 (0.0008) +[2026-06-07 03:29:28,629][528169] Updated weights for policy 0, policy_version 18800 (0.0008) +[2026-06-07 03:29:28,772][528169] Updated weights for policy 0, policy_version 18810 (0.0008) +[2026-06-07 03:29:28,912][528169] Updated weights for policy 0, policy_version 18820 (0.0008) +[2026-06-07 03:29:29,052][528169] Updated weights for policy 0, policy_version 18831 (0.0008) +[2026-06-07 03:29:29,691][528169] Updated weights for policy 0, policy_version 18841 (0.0008) +[2026-06-07 03:29:29,825][528169] Updated weights for policy 0, policy_version 18851 (0.0008) +[2026-06-07 03:29:29,957][528169] Updated weights for policy 0, policy_version 18861 (0.0008) +[2026-06-07 03:29:30,085][528169] Updated weights for policy 0, policy_version 18871 (0.0008) +[2026-06-07 03:29:30,103][527010] Fps is (10 sec: 22937.7, 60 sec: 24029.9, 300 sec: 24215.0). Total num frames: 9666560. Throughput: 0: 24405.3. Samples: 9648640. Policy #0 lag: (min: 63.0, avg: 75.8, max: 127.0) +[2026-06-07 03:29:30,104][527010] Avg episode reward: [(0, '64.273')] +[2026-06-07 03:29:30,221][528169] Updated weights for policy 0, policy_version 18881 (0.0009) +[2026-06-07 03:29:30,374][528169] Updated weights for policy 0, policy_version 18892 (0.0008) +[2026-06-07 03:29:30,947][528169] Updated weights for policy 0, policy_version 18902 (0.0008) +[2026-06-07 03:29:31,088][528169] Updated weights for policy 0, policy_version 18913 (0.0008) +[2026-06-07 03:29:31,212][528169] Updated weights for policy 0, policy_version 18923 (0.0008) +[2026-06-07 03:29:31,375][528169] Updated weights for policy 0, policy_version 18935 (0.0008) +[2026-06-07 03:29:31,507][528169] Updated weights for policy 0, policy_version 18945 (0.0008) +[2026-06-07 03:29:31,634][528169] Updated weights for policy 0, policy_version 18955 (0.0008) +[2026-06-07 03:29:32,287][528169] Updated weights for policy 0, policy_version 18965 (0.0009) +[2026-06-07 03:29:32,433][528169] Updated weights for policy 0, policy_version 18976 (0.0008) +[2026-06-07 03:29:32,561][528169] Updated weights for policy 0, policy_version 18986 (0.0008) +[2026-06-07 03:29:32,729][528169] Updated weights for policy 0, policy_version 18998 (0.0008) +[2026-06-07 03:29:32,857][528169] Updated weights for policy 0, policy_version 19008 (0.0008) +[2026-06-07 03:29:32,984][528169] Updated weights for policy 0, policy_version 19018 (0.0008) +[2026-06-07 03:29:33,616][528169] Updated weights for policy 0, policy_version 19028 (0.0008) +[2026-06-07 03:29:33,778][528169] Updated weights for policy 0, policy_version 19041 (0.0008) +[2026-06-07 03:29:33,920][528169] Updated weights for policy 0, policy_version 19052 (0.0008) +[2026-06-07 03:29:34,082][528169] Updated weights for policy 0, policy_version 19064 (0.0008) +[2026-06-07 03:29:34,211][528169] Updated weights for policy 0, policy_version 19074 (0.0005) +[2026-06-07 03:29:34,347][528169] Updated weights for policy 0, policy_version 19084 (0.0005) +[2026-06-07 03:29:34,991][528169] Updated weights for policy 0, policy_version 19095 (0.0008) +[2026-06-07 03:29:35,103][527010] Fps is (10 sec: 22937.6, 60 sec: 24576.0, 300 sec: 24215.0). Total num frames: 9797632. Throughput: 0: 24322.8. Samples: 9791744. Policy #0 lag: (min: 63.0, avg: 75.8, max: 127.0) +[2026-06-07 03:29:35,104][527010] Avg episode reward: [(0, '69.709')] +[2026-06-07 03:29:35,112][528169] Updated weights for policy 0, policy_version 19105 (0.0008) +[2026-06-07 03:29:35,255][528169] Updated weights for policy 0, policy_version 19116 (0.0008) +[2026-06-07 03:29:35,398][528169] Updated weights for policy 0, policy_version 19127 (0.0008) +[2026-06-07 03:29:35,532][528169] Updated weights for policy 0, policy_version 19137 (0.0008) +[2026-06-07 03:29:35,660][528169] Updated weights for policy 0, policy_version 19147 (0.0008) +[2026-06-07 03:29:36,314][528169] Updated weights for policy 0, policy_version 19157 (0.0008) +[2026-06-07 03:29:36,456][528169] Updated weights for policy 0, policy_version 19168 (0.0008) +[2026-06-07 03:29:36,588][528169] Updated weights for policy 0, policy_version 19178 (0.0008) +[2026-06-07 03:29:36,735][528169] Updated weights for policy 0, policy_version 19190 (0.0008) +[2026-06-07 03:29:36,865][528169] Updated weights for policy 0, policy_version 19200 (0.0008) +[2026-06-07 03:29:37,010][528169] Updated weights for policy 0, policy_version 19210 (0.0008) +[2026-06-07 03:29:37,676][528169] Updated weights for policy 0, policy_version 19221 (0.0008) +[2026-06-07 03:29:37,814][528169] Updated weights for policy 0, policy_version 19232 (0.0008) +[2026-06-07 03:29:37,970][528169] Updated weights for policy 0, policy_version 19243 (0.0008) +[2026-06-07 03:29:38,108][528169] Updated weights for policy 0, policy_version 19254 (0.0008) +[2026-06-07 03:29:38,242][528169] Updated weights for policy 0, policy_version 19264 (0.0008) +[2026-06-07 03:29:38,391][528169] Updated weights for policy 0, policy_version 19275 (0.0008) +[2026-06-07 03:29:39,032][528169] Updated weights for policy 0, policy_version 19286 (0.0009) +[2026-06-07 03:29:39,171][528169] Updated weights for policy 0, policy_version 19297 (0.0009) +[2026-06-07 03:29:39,306][528169] Updated weights for policy 0, policy_version 19307 (0.0008) +[2026-06-07 03:29:39,461][528169] Updated weights for policy 0, policy_version 19319 (0.0008) +[2026-06-07 03:29:39,601][528169] Updated weights for policy 0, policy_version 19329 (0.0008) +[2026-06-07 03:29:39,743][528169] Updated weights for policy 0, policy_version 19340 (0.0008) +[2026-06-07 03:29:40,103][527010] Fps is (10 sec: 26214.6, 60 sec: 24576.1, 300 sec: 24326.1). Total num frames: 9928704. Throughput: 0: 24456.7. Samples: 9941248. Policy #0 lag: (min: 63.0, avg: 73.8, max: 127.0) +[2026-06-07 03:29:40,104][527010] Avg episode reward: [(0, '65.204')] +[2026-06-07 03:29:40,397][528169] Updated weights for policy 0, policy_version 19350 (0.0009) +[2026-06-07 03:29:40,519][528169] Updated weights for policy 0, policy_version 19360 (0.0008) +[2026-06-07 03:29:40,650][528169] Updated weights for policy 0, policy_version 19370 (0.0008) +[2026-06-07 03:29:40,804][528169] Updated weights for policy 0, policy_version 19381 (0.0008) +[2026-06-07 03:29:40,942][528169] Updated weights for policy 0, policy_version 19392 (0.0008) +[2026-06-07 03:29:41,073][528169] Updated weights for policy 0, policy_version 19402 (0.0008) +[2026-06-07 03:29:41,672][528169] Updated weights for policy 0, policy_version 19413 (0.0008) +[2026-06-07 03:29:41,809][528169] Updated weights for policy 0, policy_version 19424 (0.0008) +[2026-06-07 03:29:41,969][528169] Updated weights for policy 0, policy_version 19436 (0.0008) +[2026-06-07 03:29:42,103][528169] Updated weights for policy 0, policy_version 19446 (0.0008) +[2026-06-07 03:29:42,234][528169] Updated weights for policy 0, policy_version 19456 (0.0008) +[2026-06-07 03:29:42,366][528169] Updated weights for policy 0, policy_version 19466 (0.0008) +[2026-06-07 03:29:43,020][528169] Updated weights for policy 0, policy_version 19478 (0.0008) +[2026-06-07 03:29:43,176][528169] Updated weights for policy 0, policy_version 19490 (0.0008) +[2026-06-07 03:29:43,312][528169] Updated weights for policy 0, policy_version 19500 (0.0008) +[2026-06-07 03:29:43,465][528169] Updated weights for policy 0, policy_version 19512 (0.0008) +[2026-06-07 03:29:43,602][528169] Updated weights for policy 0, policy_version 19522 (0.0008) +[2026-06-07 03:29:43,762][528169] Updated weights for policy 0, policy_version 19534 (0.0008) +[2026-06-07 03:29:44,418][528169] Updated weights for policy 0, policy_version 19545 (0.0008) +[2026-06-07 03:29:44,535][528169] Updated weights for policy 0, policy_version 19555 (0.0008) +[2026-06-07 03:29:44,672][528169] Updated weights for policy 0, policy_version 19565 (0.0010) +[2026-06-07 03:29:44,805][528169] Updated weights for policy 0, policy_version 19575 (0.0008) +[2026-06-07 03:29:44,949][528169] Updated weights for policy 0, policy_version 19586 (0.0009) +[2026-06-07 03:29:45,083][528169] Updated weights for policy 0, policy_version 19596 (0.0008) +[2026-06-07 03:29:45,103][527010] Fps is (10 sec: 22937.6, 60 sec: 24029.9, 300 sec: 24215.0). Total num frames: 10027008. Throughput: 0: 24200.5. Samples: 10010880. Policy #0 lag: (min: 63.0, avg: 73.8, max: 127.0) +[2026-06-07 03:29:45,104][527010] Avg episode reward: [(0, '66.019')] +[2026-06-07 03:29:45,757][528169] Updated weights for policy 0, policy_version 19607 (0.0005) +[2026-06-07 03:29:45,885][528169] Updated weights for policy 0, policy_version 19617 (0.0005) +[2026-06-07 03:29:46,020][528169] Updated weights for policy 0, policy_version 19628 (0.0009) +[2026-06-07 03:29:46,164][528169] Updated weights for policy 0, policy_version 19638 (0.0006) +[2026-06-07 03:29:46,302][528169] Updated weights for policy 0, policy_version 19649 (0.0006) +[2026-06-07 03:29:46,446][528169] Updated weights for policy 0, policy_version 19659 (0.0005) +[2026-06-07 03:29:47,082][528169] Updated weights for policy 0, policy_version 19670 (0.0005) +[2026-06-07 03:29:47,215][528169] Updated weights for policy 0, policy_version 19681 (0.0005) +[2026-06-07 03:29:47,361][528169] Updated weights for policy 0, policy_version 19692 (0.0009) +[2026-06-07 03:29:47,495][528169] Updated weights for policy 0, policy_version 19702 (0.0008) +[2026-06-07 03:29:47,636][528169] Updated weights for policy 0, policy_version 19713 (0.0008) +[2026-06-07 03:29:47,771][528169] Updated weights for policy 0, policy_version 19723 (0.0008) +[2026-06-07 03:29:48,409][528169] Updated weights for policy 0, policy_version 19733 (0.0006) +[2026-06-07 03:29:48,542][528169] Updated weights for policy 0, policy_version 19743 (0.0005) +[2026-06-07 03:29:48,683][528169] Updated weights for policy 0, policy_version 19754 (0.0004) +[2026-06-07 03:29:48,810][528169] Updated weights for policy 0, policy_version 19764 (0.0004) +[2026-06-07 03:29:48,985][528169] Updated weights for policy 0, policy_version 19777 (0.0004) +[2026-06-07 03:29:49,134][528169] Updated weights for policy 0, policy_version 19788 (0.0004) +[2026-06-07 03:29:49,713][528169] Updated weights for policy 0, policy_version 19798 (0.0004) +[2026-06-07 03:29:49,858][528169] Updated weights for policy 0, policy_version 19809 (0.0004) +[2026-06-07 03:29:49,988][528169] Updated weights for policy 0, policy_version 19819 (0.0004) +[2026-06-07 03:29:50,103][527010] Fps is (10 sec: 22937.6, 60 sec: 24029.9, 300 sec: 24215.0). Total num frames: 10158080. Throughput: 0: 24428.1. Samples: 10163840. Policy #0 lag: (min: 63.0, avg: 73.8, max: 127.0) +[2026-06-07 03:29:50,103][527010] Avg episode reward: [(0, '65.762')] +[2026-06-07 03:29:50,133][528169] Updated weights for policy 0, policy_version 19830 (0.0008) +[2026-06-07 03:29:50,265][528169] Updated weights for policy 0, policy_version 19840 (0.0008) +[2026-06-07 03:29:50,394][528169] Updated weights for policy 0, policy_version 19850 (0.0008) +[2026-06-07 03:29:51,018][528169] Updated weights for policy 0, policy_version 19860 (0.0009) +[2026-06-07 03:29:51,163][528169] Updated weights for policy 0, policy_version 19871 (0.0009) +[2026-06-07 03:29:51,299][528169] Updated weights for policy 0, policy_version 19881 (0.0009) +[2026-06-07 03:29:51,448][528169] Updated weights for policy 0, policy_version 19892 (0.0008) +[2026-06-07 03:29:51,589][528169] Updated weights for policy 0, policy_version 19903 (0.0008) +[2026-06-07 03:29:51,740][528169] Updated weights for policy 0, policy_version 19914 (0.0008) +[2026-06-07 03:29:52,352][528169] Updated weights for policy 0, policy_version 19925 (0.0008) +[2026-06-07 03:29:52,483][528169] Updated weights for policy 0, policy_version 19935 (0.0008) +[2026-06-07 03:29:52,615][528169] Updated weights for policy 0, policy_version 19945 (0.0008) +[2026-06-07 03:29:52,746][528169] Updated weights for policy 0, policy_version 19955 (0.0008) +[2026-06-07 03:29:52,899][528169] Updated weights for policy 0, policy_version 19966 (0.0008) +[2026-06-07 03:29:53,033][528169] Updated weights for policy 0, policy_version 19976 (0.0005) +[2026-06-07 03:29:53,639][528169] Updated weights for policy 0, policy_version 19986 (0.0007) +[2026-06-07 03:29:53,769][528169] Updated weights for policy 0, policy_version 19996 (0.0008) +[2026-06-07 03:29:53,902][528169] Updated weights for policy 0, policy_version 20006 (0.0009) +[2026-06-07 03:29:54,054][528169] Updated weights for policy 0, policy_version 20017 (0.0008) +[2026-06-07 03:29:54,177][528169] Updated weights for policy 0, policy_version 20027 (0.0008) +[2026-06-07 03:29:54,313][528169] Updated weights for policy 0, policy_version 20037 (0.0008) +[2026-06-07 03:29:54,440][528169] Updated weights for policy 0, policy_version 20047 (0.0008) +[2026-06-07 03:29:55,090][528169] Updated weights for policy 0, policy_version 20058 (0.0008) +[2026-06-07 03:29:55,103][527010] Fps is (10 sec: 26214.4, 60 sec: 24576.1, 300 sec: 24326.1). Total num frames: 10289152. Throughput: 0: 24300.1. Samples: 10304768. Policy #0 lag: (min: 7.0, avg: 20.4, max: 71.0) +[2026-06-07 03:29:55,104][527010] Avg episode reward: [(0, '83.760')] +[2026-06-07 03:29:55,218][528169] Updated weights for policy 0, policy_version 20068 (0.0008) +[2026-06-07 03:29:55,378][528169] Updated weights for policy 0, policy_version 20080 (0.0008) +[2026-06-07 03:29:55,531][528169] Updated weights for policy 0, policy_version 20092 (0.0008) +[2026-06-07 03:29:55,663][528169] Updated weights for policy 0, policy_version 20102 (0.0008) +[2026-06-07 03:29:55,785][528093] Saving new best policy, reward=83.760! +[2026-06-07 03:29:56,316][528169] Updated weights for policy 0, policy_version 20113 (0.0009) +[2026-06-07 03:29:56,441][528169] Updated weights for policy 0, policy_version 20123 (0.0008) +[2026-06-07 03:29:56,585][528169] Updated weights for policy 0, policy_version 20134 (0.0008) +[2026-06-07 03:29:56,712][528169] Updated weights for policy 0, policy_version 20144 (0.0008) +[2026-06-07 03:29:56,844][528169] Updated weights for policy 0, policy_version 20154 (0.0008) +[2026-06-07 03:29:56,992][528169] Updated weights for policy 0, policy_version 20165 (0.0008) +[2026-06-07 03:29:57,133][528169] Updated weights for policy 0, policy_version 20176 (0.0008) +[2026-06-07 03:29:57,771][528169] Updated weights for policy 0, policy_version 20186 (0.0008) +[2026-06-07 03:29:57,905][528169] Updated weights for policy 0, policy_version 20196 (0.0008) +[2026-06-07 03:29:58,040][528169] Updated weights for policy 0, policy_version 20206 (0.0009) +[2026-06-07 03:29:58,175][528169] Updated weights for policy 0, policy_version 20217 (0.0008) +[2026-06-07 03:29:58,311][528169] Updated weights for policy 0, policy_version 20227 (0.0008) +[2026-06-07 03:29:58,441][528169] Updated weights for policy 0, policy_version 20237 (0.0008) +[2026-06-07 03:29:59,064][528169] Updated weights for policy 0, policy_version 20247 (0.0008) +[2026-06-07 03:29:59,194][528169] Updated weights for policy 0, policy_version 20257 (0.0008) +[2026-06-07 03:29:59,325][528169] Updated weights for policy 0, policy_version 20267 (0.0008) +[2026-06-07 03:29:59,458][528169] Updated weights for policy 0, policy_version 20277 (0.0008) +[2026-06-07 03:29:59,599][528169] Updated weights for policy 0, policy_version 20288 (0.0008) +[2026-06-07 03:29:59,744][528169] Updated weights for policy 0, policy_version 20299 (0.0008) +[2026-06-07 03:30:00,103][527010] Fps is (10 sec: 26214.3, 60 sec: 24576.0, 300 sec: 24326.1). Total num frames: 10420224. Throughput: 0: 24223.3. Samples: 10376576. Policy #0 lag: (min: 7.0, avg: 20.4, max: 71.0) +[2026-06-07 03:30:00,104][527010] Avg episode reward: [(0, '69.406')] +[2026-06-07 03:30:00,375][528169] Updated weights for policy 0, policy_version 20310 (0.0008) +[2026-06-07 03:30:00,517][528169] Updated weights for policy 0, policy_version 20321 (0.0008) +[2026-06-07 03:30:00,655][528169] Updated weights for policy 0, policy_version 20332 (0.0008) +[2026-06-07 03:30:00,800][528169] Updated weights for policy 0, policy_version 20343 (0.0008) +[2026-06-07 03:30:00,948][528169] Updated weights for policy 0, policy_version 20354 (0.0008) +[2026-06-07 03:30:01,083][528169] Updated weights for policy 0, policy_version 20364 (0.0008) +[2026-06-07 03:30:01,713][528169] Updated weights for policy 0, policy_version 20374 (0.0007) +[2026-06-07 03:30:01,851][528169] Updated weights for policy 0, policy_version 20385 (0.0008) +[2026-06-07 03:30:01,997][528169] Updated weights for policy 0, policy_version 20396 (0.0008) +[2026-06-07 03:30:02,151][528169] Updated weights for policy 0, policy_version 20408 (0.0008) +[2026-06-07 03:30:02,273][528169] Updated weights for policy 0, policy_version 20418 (0.0008) +[2026-06-07 03:30:02,425][528169] Updated weights for policy 0, policy_version 20429 (0.0008) +[2026-06-07 03:30:03,124][528169] Updated weights for policy 0, policy_version 20441 (0.0008) +[2026-06-07 03:30:03,272][528169] Updated weights for policy 0, policy_version 20452 (0.0008) +[2026-06-07 03:30:03,415][528169] Updated weights for policy 0, policy_version 20463 (0.0008) +[2026-06-07 03:30:03,559][528169] Updated weights for policy 0, policy_version 20474 (0.0008) +[2026-06-07 03:30:03,685][528169] Updated weights for policy 0, policy_version 20484 (0.0008) +[2026-06-07 03:30:03,823][528169] Updated weights for policy 0, policy_version 20494 (0.0009) +[2026-06-07 03:30:04,423][528169] Updated weights for policy 0, policy_version 20504 (0.0006) +[2026-06-07 03:30:04,564][528169] Updated weights for policy 0, policy_version 20515 (0.0005) +[2026-06-07 03:30:04,743][528169] Updated weights for policy 0, policy_version 20528 (0.0008) +[2026-06-07 03:30:04,870][528169] Updated weights for policy 0, policy_version 20538 (0.0008) +[2026-06-07 03:30:05,018][528169] Updated weights for policy 0, policy_version 20549 (0.0008) +[2026-06-07 03:30:05,103][527010] Fps is (10 sec: 22937.6, 60 sec: 24029.9, 300 sec: 24215.0). Total num frames: 10518528. Throughput: 0: 24519.2. Samples: 10528768. Policy #0 lag: (min: 7.0, avg: 20.4, max: 71.0) +[2026-06-07 03:30:05,103][527010] Avg episode reward: [(0, '70.579')] +[2026-06-07 03:30:05,648][528169] Updated weights for policy 0, policy_version 20561 (0.0009) +[2026-06-07 03:30:05,811][528169] Updated weights for policy 0, policy_version 20574 (0.0008) +[2026-06-07 03:30:05,957][528169] Updated weights for policy 0, policy_version 20585 (0.0008) +[2026-06-07 03:30:06,085][528169] Updated weights for policy 0, policy_version 20595 (0.0008) +[2026-06-07 03:30:06,225][528169] Updated weights for policy 0, policy_version 20605 (0.0008) +[2026-06-07 03:30:06,357][528169] Updated weights for policy 0, policy_version 20615 (0.0008) +[2026-06-07 03:30:07,003][528169] Updated weights for policy 0, policy_version 20626 (0.0008) +[2026-06-07 03:30:07,141][528169] Updated weights for policy 0, policy_version 20637 (0.0008) +[2026-06-07 03:30:07,283][528169] Updated weights for policy 0, policy_version 20648 (0.0008) +[2026-06-07 03:30:07,429][528169] Updated weights for policy 0, policy_version 20659 (0.0008) +[2026-06-07 03:30:07,564][528169] Updated weights for policy 0, policy_version 20669 (0.0008) +[2026-06-07 03:30:07,693][528169] Updated weights for policy 0, policy_version 20679 (0.0008) +[2026-06-07 03:30:08,302][528169] Updated weights for policy 0, policy_version 20689 (0.0008) +[2026-06-07 03:30:08,431][528169] Updated weights for policy 0, policy_version 20699 (0.0008) +[2026-06-07 03:30:08,579][528169] Updated weights for policy 0, policy_version 20710 (0.0008) +[2026-06-07 03:30:08,739][528169] Updated weights for policy 0, policy_version 20722 (0.0008) +[2026-06-07 03:30:08,865][528169] Updated weights for policy 0, policy_version 20732 (0.0008) +[2026-06-07 03:30:09,015][528169] Updated weights for policy 0, policy_version 20743 (0.0008) +[2026-06-07 03:30:09,641][528169] Updated weights for policy 0, policy_version 20753 (0.0008) +[2026-06-07 03:30:09,804][528169] Updated weights for policy 0, policy_version 20766 (0.0009) +[2026-06-07 03:30:09,938][528169] Updated weights for policy 0, policy_version 20776 (0.0006) +[2026-06-07 03:30:10,069][528169] Updated weights for policy 0, policy_version 20786 (0.0005) +[2026-06-07 03:30:10,103][527010] Fps is (10 sec: 22937.4, 60 sec: 24576.0, 300 sec: 24326.1). Total num frames: 10649600. Throughput: 0: 24240.3. Samples: 10669824. Policy #0 lag: (min: 20.0, avg: 33.1, max: 84.0) +[2026-06-07 03:30:10,104][527010] Avg episode reward: [(0, '82.546')] +[2026-06-07 03:30:10,198][528169] Updated weights for policy 0, policy_version 20796 (0.0005) +[2026-06-07 03:30:10,348][528169] Updated weights for policy 0, policy_version 20807 (0.0004) +[2026-06-07 03:30:10,977][528169] Updated weights for policy 0, policy_version 20819 (0.0005) +[2026-06-07 03:30:11,101][528169] Updated weights for policy 0, policy_version 20829 (0.0008) +[2026-06-07 03:30:11,269][528169] Updated weights for policy 0, policy_version 20842 (0.0008) +[2026-06-07 03:30:11,410][528169] Updated weights for policy 0, policy_version 20853 (0.0008) +[2026-06-07 03:30:11,544][528169] Updated weights for policy 0, policy_version 20863 (0.0008) +[2026-06-07 03:30:11,678][528169] Updated weights for policy 0, policy_version 20873 (0.0008) +[2026-06-07 03:30:12,334][528169] Updated weights for policy 0, policy_version 20883 (0.0008) +[2026-06-07 03:30:12,455][528169] Updated weights for policy 0, policy_version 20893 (0.0008) +[2026-06-07 03:30:12,585][528169] Updated weights for policy 0, policy_version 20903 (0.0009) +[2026-06-07 03:30:12,715][528169] Updated weights for policy 0, policy_version 20913 (0.0008) +[2026-06-07 03:30:12,857][528169] Updated weights for policy 0, policy_version 20924 (0.0008) +[2026-06-07 03:30:13,014][528169] Updated weights for policy 0, policy_version 20936 (0.0008) +[2026-06-07 03:30:13,653][528169] Updated weights for policy 0, policy_version 20946 (0.0008) +[2026-06-07 03:30:13,810][528169] Updated weights for policy 0, policy_version 20958 (0.0007) +[2026-06-07 03:30:13,945][528169] Updated weights for policy 0, policy_version 20968 (0.0008) +[2026-06-07 03:30:14,093][528169] Updated weights for policy 0, policy_version 20980 (0.0008) +[2026-06-07 03:30:14,226][528169] Updated weights for policy 0, policy_version 20990 (0.0008) +[2026-06-07 03:30:14,398][528169] Updated weights for policy 0, policy_version 21003 (0.0008) +[2026-06-07 03:30:15,018][528169] Updated weights for policy 0, policy_version 21013 (0.0008) +[2026-06-07 03:30:15,103][527010] Fps is (10 sec: 26214.0, 60 sec: 24575.9, 300 sec: 24326.1). Total num frames: 10780672. Throughput: 0: 24541.8. Samples: 10753024. Policy #0 lag: (min: 20.0, avg: 33.1, max: 84.0) +[2026-06-07 03:30:15,104][527010] Avg episode reward: [(0, '71.803')] +[2026-06-07 03:30:15,154][528169] Updated weights for policy 0, policy_version 21024 (0.0008) +[2026-06-07 03:30:15,290][528169] Updated weights for policy 0, policy_version 21034 (0.0008) +[2026-06-07 03:30:15,449][528169] Updated weights for policy 0, policy_version 21046 (0.0008) +[2026-06-07 03:30:15,576][528169] Updated weights for policy 0, policy_version 21056 (0.0008) +[2026-06-07 03:30:15,706][528169] Updated weights for policy 0, policy_version 21066 (0.0008) +[2026-06-07 03:30:16,354][528169] Updated weights for policy 0, policy_version 21076 (0.0008) +[2026-06-07 03:30:16,493][528169] Updated weights for policy 0, policy_version 21087 (0.0008) +[2026-06-07 03:30:16,621][528169] Updated weights for policy 0, policy_version 21097 (0.0008) +[2026-06-07 03:30:16,779][528169] Updated weights for policy 0, policy_version 21109 (0.0008) +[2026-06-07 03:30:16,917][528169] Updated weights for policy 0, policy_version 21119 (0.0008) +[2026-06-07 03:30:17,059][528169] Updated weights for policy 0, policy_version 21130 (0.0008) +[2026-06-07 03:30:17,714][528169] Updated weights for policy 0, policy_version 21142 (0.0008) +[2026-06-07 03:30:17,840][528169] Updated weights for policy 0, policy_version 21152 (0.0008) +[2026-06-07 03:30:17,993][528169] Updated weights for policy 0, policy_version 21163 (0.0007) +[2026-06-07 03:30:18,146][528169] Updated weights for policy 0, policy_version 21175 (0.0008) +[2026-06-07 03:30:18,285][528169] Updated weights for policy 0, policy_version 21186 (0.0008) +[2026-06-07 03:30:18,426][528169] Updated weights for policy 0, policy_version 21196 (0.0008) +[2026-06-07 03:30:19,054][528169] Updated weights for policy 0, policy_version 21207 (0.0008) +[2026-06-07 03:30:19,190][528169] Updated weights for policy 0, policy_version 21217 (0.0008) +[2026-06-07 03:30:19,323][528169] Updated weights for policy 0, policy_version 21227 (0.0008) +[2026-06-07 03:30:19,466][528169] Updated weights for policy 0, policy_version 21238 (0.0008) +[2026-06-07 03:30:19,594][528169] Updated weights for policy 0, policy_version 21248 (0.0008) +[2026-06-07 03:30:19,729][528169] Updated weights for policy 0, policy_version 21258 (0.0008) +[2026-06-07 03:30:20,103][527010] Fps is (10 sec: 26214.7, 60 sec: 24576.0, 300 sec: 24326.1). Total num frames: 10911744. Throughput: 0: 24430.9. Samples: 10891136. Policy #0 lag: (min: 20.0, avg: 33.1, max: 84.0) +[2026-06-07 03:30:20,104][527010] Avg episode reward: [(0, '66.661')] +[2026-06-07 03:30:20,367][528169] Updated weights for policy 0, policy_version 21269 (0.0008) +[2026-06-07 03:30:20,493][528169] Updated weights for policy 0, policy_version 21279 (0.0008) +[2026-06-07 03:30:20,641][528169] Updated weights for policy 0, policy_version 21290 (0.0008) +[2026-06-07 03:30:20,791][528169] Updated weights for policy 0, policy_version 21302 (0.0008) +[2026-06-07 03:30:20,931][528169] Updated weights for policy 0, policy_version 21312 (0.0008) +[2026-06-07 03:30:21,065][528169] Updated weights for policy 0, policy_version 21322 (0.0008) +[2026-06-07 03:30:21,699][528169] Updated weights for policy 0, policy_version 21333 (0.0008) +[2026-06-07 03:30:21,834][528169] Updated weights for policy 0, policy_version 21344 (0.0008) +[2026-06-07 03:30:21,965][528169] Updated weights for policy 0, policy_version 21354 (0.0008) +[2026-06-07 03:30:22,111][528169] Updated weights for policy 0, policy_version 21365 (0.0008) +[2026-06-07 03:30:22,245][528169] Updated weights for policy 0, policy_version 21375 (0.0008) +[2026-06-07 03:30:22,389][528169] Updated weights for policy 0, policy_version 21386 (0.0008) +[2026-06-07 03:30:23,025][528169] Updated weights for policy 0, policy_version 21396 (0.0008) +[2026-06-07 03:30:23,145][528169] Updated weights for policy 0, policy_version 21406 (0.0008) +[2026-06-07 03:30:23,282][528169] Updated weights for policy 0, policy_version 21416 (0.0008) +[2026-06-07 03:30:23,431][528169] Updated weights for policy 0, policy_version 21428 (0.0008) +[2026-06-07 03:30:23,562][528169] Updated weights for policy 0, policy_version 21438 (0.0008) +[2026-06-07 03:30:23,694][528169] Updated weights for policy 0, policy_version 21448 (0.0008) +[2026-06-07 03:30:24,326][528169] Updated weights for policy 0, policy_version 21458 (0.0008) +[2026-06-07 03:30:24,448][528169] Updated weights for policy 0, policy_version 21468 (0.0008) +[2026-06-07 03:30:24,584][528169] Updated weights for policy 0, policy_version 21479 (0.0008) +[2026-06-07 03:30:24,719][528169] Updated weights for policy 0, policy_version 21489 (0.0008) +[2026-06-07 03:30:24,852][528169] Updated weights for policy 0, policy_version 21499 (0.0008) +[2026-06-07 03:30:24,994][528169] Updated weights for policy 0, policy_version 21510 (0.0008) +[2026-06-07 03:30:25,103][527010] Fps is (10 sec: 22937.6, 60 sec: 24029.8, 300 sec: 24326.1). Total num frames: 11010048. Throughput: 0: 24504.8. Samples: 11043968. Policy #0 lag: (min: 63.0, avg: 75.3, max: 127.0) +[2026-06-07 03:30:25,105][527010] Avg episode reward: [(0, '74.843')] +[2026-06-07 03:30:25,133][528169] Updated weights for policy 0, policy_version 21520 (0.0008) +[2026-06-07 03:30:25,779][528169] Updated weights for policy 0, policy_version 21531 (0.0008) +[2026-06-07 03:30:25,909][528169] Updated weights for policy 0, policy_version 21541 (0.0008) +[2026-06-07 03:30:26,029][528169] Updated weights for policy 0, policy_version 21551 (0.0008) +[2026-06-07 03:30:26,166][528169] Updated weights for policy 0, policy_version 21561 (0.0008) +[2026-06-07 03:30:26,312][528169] Updated weights for policy 0, policy_version 21571 (0.0009) +[2026-06-07 03:30:26,438][528169] Updated weights for policy 0, policy_version 21581 (0.0008) +[2026-06-07 03:30:27,102][528169] Updated weights for policy 0, policy_version 21592 (0.0008) +[2026-06-07 03:30:27,240][528169] Updated weights for policy 0, policy_version 21603 (0.0008) +[2026-06-07 03:30:27,385][528169] Updated weights for policy 0, policy_version 21614 (0.0008) +[2026-06-07 03:30:27,524][528169] Updated weights for policy 0, policy_version 21625 (0.0008) +[2026-06-07 03:30:27,661][528169] Updated weights for policy 0, policy_version 21635 (0.0008) +[2026-06-07 03:30:27,799][528169] Updated weights for policy 0, policy_version 21646 (0.0008) +[2026-06-07 03:30:28,467][528169] Updated weights for policy 0, policy_version 21658 (0.0008) +[2026-06-07 03:30:28,595][528169] Updated weights for policy 0, policy_version 21668 (0.0008) +[2026-06-07 03:30:28,739][528169] Updated weights for policy 0, policy_version 21679 (0.0008) +[2026-06-07 03:30:28,880][528169] Updated weights for policy 0, policy_version 21690 (0.0008) +[2026-06-07 03:30:29,011][528169] Updated weights for policy 0, policy_version 21700 (0.0008) +[2026-06-07 03:30:29,163][528169] Updated weights for policy 0, policy_version 21711 (0.0008) +[2026-06-07 03:30:29,797][528169] Updated weights for policy 0, policy_version 21721 (0.0008) +[2026-06-07 03:30:29,931][528169] Updated weights for policy 0, policy_version 21731 (0.0008) +[2026-06-07 03:30:30,058][528169] Updated weights for policy 0, policy_version 21741 (0.0008) +[2026-06-07 03:30:30,103][527010] Fps is (10 sec: 22937.7, 60 sec: 24576.0, 300 sec: 24326.1). Total num frames: 11141120. Throughput: 0: 24522.0. Samples: 11114368. Policy #0 lag: (min: 63.0, avg: 75.3, max: 127.0) +[2026-06-07 03:30:30,103][527010] Avg episode reward: [(0, '65.708')] +[2026-06-07 03:30:30,196][528169] Updated weights for policy 0, policy_version 21751 (0.0008) +[2026-06-07 03:30:30,345][528169] Updated weights for policy 0, policy_version 21762 (0.0008) +[2026-06-07 03:30:30,518][528169] Updated weights for policy 0, policy_version 21775 (0.0008) +[2026-06-07 03:30:31,128][528169] Updated weights for policy 0, policy_version 21785 (0.0008) +[2026-06-07 03:30:31,258][528169] Updated weights for policy 0, policy_version 21795 (0.0008) +[2026-06-07 03:30:31,409][528169] Updated weights for policy 0, policy_version 21807 (0.0008) +[2026-06-07 03:30:31,549][528169] Updated weights for policy 0, policy_version 21817 (0.0008) +[2026-06-07 03:30:31,684][528169] Updated weights for policy 0, policy_version 21827 (0.0008) +[2026-06-07 03:30:31,816][528169] Updated weights for policy 0, policy_version 21837 (0.0008) +[2026-06-07 03:30:32,441][528169] Updated weights for policy 0, policy_version 21847 (0.0008) +[2026-06-07 03:30:32,568][528169] Updated weights for policy 0, policy_version 21857 (0.0008) +[2026-06-07 03:30:32,731][528169] Updated weights for policy 0, policy_version 21869 (0.0008) +[2026-06-07 03:30:32,873][528169] Updated weights for policy 0, policy_version 21880 (0.0008) +[2026-06-07 03:30:33,002][528169] Updated weights for policy 0, policy_version 21890 (0.0008) +[2026-06-07 03:30:33,138][528169] Updated weights for policy 0, policy_version 21900 (0.0008) +[2026-06-07 03:30:33,739][528169] Updated weights for policy 0, policy_version 21910 (0.0008) +[2026-06-07 03:30:33,878][528169] Updated weights for policy 0, policy_version 21920 (0.0008) +[2026-06-07 03:30:34,018][528169] Updated weights for policy 0, policy_version 21931 (0.0008) +[2026-06-07 03:30:34,163][528169] Updated weights for policy 0, policy_version 21942 (0.0008) +[2026-06-07 03:30:34,305][528169] Updated weights for policy 0, policy_version 21953 (0.0008) +[2026-06-07 03:30:34,457][528169] Updated weights for policy 0, policy_version 21964 (0.0008) +[2026-06-07 03:30:35,098][528169] Updated weights for policy 0, policy_version 21975 (0.0008) +[2026-06-07 03:30:35,103][527010] Fps is (10 sec: 26214.8, 60 sec: 24576.0, 300 sec: 24326.1). Total num frames: 11272192. Throughput: 0: 24231.8. Samples: 11254272. Policy #0 lag: (min: 63.0, avg: 75.3, max: 127.0) +[2026-06-07 03:30:35,104][527010] Avg episode reward: [(0, '74.519')] +[2026-06-07 03:30:35,253][528169] Updated weights for policy 0, policy_version 21987 (0.0008) +[2026-06-07 03:30:35,385][528169] Updated weights for policy 0, policy_version 21997 (0.0008) +[2026-06-07 03:30:35,535][528169] Updated weights for policy 0, policy_version 22008 (0.0008) +[2026-06-07 03:30:35,669][528169] Updated weights for policy 0, policy_version 22018 (0.0008) +[2026-06-07 03:30:35,803][528169] Updated weights for policy 0, policy_version 22028 (0.0008) +[2026-06-07 03:30:36,408][528169] Updated weights for policy 0, policy_version 22038 (0.0008) +[2026-06-07 03:30:36,553][528169] Updated weights for policy 0, policy_version 22049 (0.0008) +[2026-06-07 03:30:36,689][528169] Updated weights for policy 0, policy_version 22059 (0.0009) +[2026-06-07 03:30:36,822][528169] Updated weights for policy 0, policy_version 22069 (0.0009) +[2026-06-07 03:30:36,955][528169] Updated weights for policy 0, policy_version 22079 (0.0008) +[2026-06-07 03:30:37,092][528169] Updated weights for policy 0, policy_version 22089 (0.0009) +[2026-06-07 03:30:37,710][528169] Updated weights for policy 0, policy_version 22100 (0.0007) +[2026-06-07 03:30:37,849][528169] Updated weights for policy 0, policy_version 22111 (0.0005) +[2026-06-07 03:30:38,002][528169] Updated weights for policy 0, policy_version 22122 (0.0005) +[2026-06-07 03:30:38,121][528169] Updated weights for policy 0, policy_version 22132 (0.0007) +[2026-06-07 03:30:38,251][528169] Updated weights for policy 0, policy_version 22142 (0.0008) +[2026-06-07 03:30:38,386][528169] Updated weights for policy 0, policy_version 22152 (0.0008) +[2026-06-07 03:30:39,034][528169] Updated weights for policy 0, policy_version 22163 (0.0008) +[2026-06-07 03:30:39,181][528169] Updated weights for policy 0, policy_version 22175 (0.0009) +[2026-06-07 03:30:39,319][528169] Updated weights for policy 0, policy_version 22185 (0.0008) +[2026-06-07 03:30:39,470][528169] Updated weights for policy 0, policy_version 22196 (0.0008) +[2026-06-07 03:30:39,596][528169] Updated weights for policy 0, policy_version 22206 (0.0008) +[2026-06-07 03:30:39,728][528169] Updated weights for policy 0, policy_version 22216 (0.0008) +[2026-06-07 03:30:40,103][527010] Fps is (10 sec: 26214.4, 60 sec: 24576.0, 300 sec: 24437.2). Total num frames: 11403264. Throughput: 0: 24507.8. Samples: 11407616. Policy #0 lag: (min: 52.0, avg: 65.2, max: 116.0) +[2026-06-07 03:30:40,103][527010] Avg episode reward: [(0, '76.943')] +[2026-06-07 03:30:40,351][528169] Updated weights for policy 0, policy_version 22227 (0.0009) +[2026-06-07 03:30:40,479][528169] Updated weights for policy 0, policy_version 22237 (0.0008) +[2026-06-07 03:30:40,635][528169] Updated weights for policy 0, policy_version 22249 (0.0008) +[2026-06-07 03:30:40,772][528169] Updated weights for policy 0, policy_version 22259 (0.0005) +[2026-06-07 03:30:40,919][528169] Updated weights for policy 0, policy_version 22270 (0.0007) +[2026-06-07 03:30:41,054][528169] Updated weights for policy 0, policy_version 22280 (0.0008) +[2026-06-07 03:30:41,686][528169] Updated weights for policy 0, policy_version 22291 (0.0008) +[2026-06-07 03:30:41,812][528169] Updated weights for policy 0, policy_version 22301 (0.0008) +[2026-06-07 03:30:41,954][528169] Updated weights for policy 0, policy_version 22312 (0.0009) +[2026-06-07 03:30:42,102][528169] Updated weights for policy 0, policy_version 22323 (0.0009) +[2026-06-07 03:30:42,238][528169] Updated weights for policy 0, policy_version 22333 (0.0009) +[2026-06-07 03:30:42,381][528169] Updated weights for policy 0, policy_version 22344 (0.0009) +[2026-06-07 03:30:43,007][528169] Updated weights for policy 0, policy_version 22354 (0.0008) +[2026-06-07 03:30:43,158][528169] Updated weights for policy 0, policy_version 22365 (0.0008) +[2026-06-07 03:30:43,285][528169] Updated weights for policy 0, policy_version 22375 (0.0008) +[2026-06-07 03:30:43,435][528169] Updated weights for policy 0, policy_version 22387 (0.0008) +[2026-06-07 03:30:43,576][528169] Updated weights for policy 0, policy_version 22398 (0.0008) +[2026-06-07 03:30:43,719][528169] Updated weights for policy 0, policy_version 22409 (0.0008) +[2026-06-07 03:30:44,370][528169] Updated weights for policy 0, policy_version 22419 (0.0008) +[2026-06-07 03:30:44,509][528169] Updated weights for policy 0, policy_version 22429 (0.0008) +[2026-06-07 03:30:44,656][528169] Updated weights for policy 0, policy_version 22440 (0.0008) +[2026-06-07 03:30:44,786][528169] Updated weights for policy 0, policy_version 22450 (0.0008) +[2026-06-07 03:30:44,919][528169] Updated weights for policy 0, policy_version 22460 (0.0008) +[2026-06-07 03:30:45,063][528169] Updated weights for policy 0, policy_version 22471 (0.0008) +[2026-06-07 03:30:45,103][527010] Fps is (10 sec: 22937.6, 60 sec: 24576.0, 300 sec: 24326.1). Total num frames: 11501568. Throughput: 0: 24470.8. Samples: 11477760. Policy #0 lag: (min: 52.0, avg: 65.2, max: 116.0) +[2026-06-07 03:30:45,104][527010] Avg episode reward: [(0, '83.107')] +[2026-06-07 03:30:45,691][528169] Updated weights for policy 0, policy_version 22481 (0.0008) +[2026-06-07 03:30:45,817][528169] Updated weights for policy 0, policy_version 22491 (0.0008) +[2026-06-07 03:30:45,954][528169] Updated weights for policy 0, policy_version 22501 (0.0008) +[2026-06-07 03:30:46,081][528169] Updated weights for policy 0, policy_version 22511 (0.0009) +[2026-06-07 03:30:46,229][528169] Updated weights for policy 0, policy_version 22522 (0.0008) +[2026-06-07 03:30:46,367][528169] Updated weights for policy 0, policy_version 22533 (0.0008) +[2026-06-07 03:30:46,510][528169] Updated weights for policy 0, policy_version 22544 (0.0008) +[2026-06-07 03:30:47,181][528169] Updated weights for policy 0, policy_version 22555 (0.0009) +[2026-06-07 03:30:47,315][528169] Updated weights for policy 0, policy_version 22565 (0.0008) +[2026-06-07 03:30:47,466][528169] Updated weights for policy 0, policy_version 22576 (0.0008) +[2026-06-07 03:30:47,597][528169] Updated weights for policy 0, policy_version 22586 (0.0009) +[2026-06-07 03:30:47,725][528169] Updated weights for policy 0, policy_version 22596 (0.0008) +[2026-06-07 03:30:47,874][528169] Updated weights for policy 0, policy_version 22607 (0.0008) +[2026-06-07 03:30:48,507][528169] Updated weights for policy 0, policy_version 22617 (0.0008) +[2026-06-07 03:30:48,641][528169] Updated weights for policy 0, policy_version 22628 (0.0008) +[2026-06-07 03:30:48,792][528169] Updated weights for policy 0, policy_version 22639 (0.0008) +[2026-06-07 03:30:48,911][528169] Updated weights for policy 0, policy_version 22649 (0.0008) +[2026-06-07 03:30:49,063][528169] Updated weights for policy 0, policy_version 22660 (0.0008) +[2026-06-07 03:30:49,199][528169] Updated weights for policy 0, policy_version 22671 (0.0008) +[2026-06-07 03:30:49,849][528169] Updated weights for policy 0, policy_version 22681 (0.0008) +[2026-06-07 03:30:49,992][528169] Updated weights for policy 0, policy_version 22692 (0.0008) +[2026-06-07 03:30:50,103][527010] Fps is (10 sec: 22937.5, 60 sec: 24576.0, 300 sec: 24326.1). Total num frames: 11632640. Throughput: 0: 24450.8. Samples: 11629056. Policy #0 lag: (min: 52.0, avg: 65.2, max: 116.0) +[2026-06-07 03:30:50,104][527010] Avg episode reward: [(0, '75.595')] +[2026-06-07 03:30:50,148][528169] Updated weights for policy 0, policy_version 22704 (0.0008) +[2026-06-07 03:30:50,279][528169] Updated weights for policy 0, policy_version 22714 (0.0008) +[2026-06-07 03:30:50,433][528169] Updated weights for policy 0, policy_version 22725 (0.0009) +[2026-06-07 03:30:50,580][528169] Updated weights for policy 0, policy_version 22736 (0.0008) +[2026-06-07 03:30:51,174][528169] Updated weights for policy 0, policy_version 22746 (0.0008) +[2026-06-07 03:30:51,318][528169] Updated weights for policy 0, policy_version 22757 (0.0008) +[2026-06-07 03:30:51,454][528169] Updated weights for policy 0, policy_version 22767 (0.0008) +[2026-06-07 03:30:51,589][528169] Updated weights for policy 0, policy_version 22777 (0.0008) +[2026-06-07 03:30:51,738][528169] Updated weights for policy 0, policy_version 22788 (0.0008) +[2026-06-07 03:30:51,873][528169] Updated weights for policy 0, policy_version 22798 (0.0008) +[2026-06-07 03:30:52,524][528169] Updated weights for policy 0, policy_version 22810 (0.0008) +[2026-06-07 03:30:52,677][528169] Updated weights for policy 0, policy_version 22821 (0.0008) +[2026-06-07 03:30:52,807][528169] Updated weights for policy 0, policy_version 22831 (0.0008) +[2026-06-07 03:30:52,946][528169] Updated weights for policy 0, policy_version 22841 (0.0008) +[2026-06-07 03:30:53,091][528169] Updated weights for policy 0, policy_version 22852 (0.0008) +[2026-06-07 03:30:53,227][528169] Updated weights for policy 0, policy_version 22862 (0.0008) +[2026-06-07 03:30:53,857][528169] Updated weights for policy 0, policy_version 22874 (0.0008) +[2026-06-07 03:30:54,011][528169] Updated weights for policy 0, policy_version 22886 (0.0008) +[2026-06-07 03:30:54,149][528169] Updated weights for policy 0, policy_version 22896 (0.0008) +[2026-06-07 03:30:54,295][528169] Updated weights for policy 0, policy_version 22907 (0.0008) +[2026-06-07 03:30:54,427][528169] Updated weights for policy 0, policy_version 22917 (0.0008) +[2026-06-07 03:30:54,551][528169] Updated weights for policy 0, policy_version 22927 (0.0008) +[2026-06-07 03:30:55,103][527010] Fps is (10 sec: 26214.1, 60 sec: 24575.9, 300 sec: 24437.1). Total num frames: 11763712. Throughput: 0: 24485.0. Samples: 11771648. Policy #0 lag: (min: 5.0, avg: 17.8, max: 69.0) +[2026-06-07 03:30:55,105][527010] Avg episode reward: [(0, '86.867')] +[2026-06-07 03:30:55,179][528169] Updated weights for policy 0, policy_version 22938 (0.0008) +[2026-06-07 03:30:55,335][528169] Updated weights for policy 0, policy_version 22950 (0.0008) +[2026-06-07 03:30:55,475][528169] Updated weights for policy 0, policy_version 22961 (0.0008) +[2026-06-07 03:30:55,631][528169] Updated weights for policy 0, policy_version 22973 (0.0008) +[2026-06-07 03:30:55,774][528169] Updated weights for policy 0, policy_version 22983 (0.0008) +[2026-06-07 03:30:55,886][528093] Saving new best policy, reward=86.867! +[2026-06-07 03:30:56,448][528169] Updated weights for policy 0, policy_version 22994 (0.0008) +[2026-06-07 03:30:56,594][528169] Updated weights for policy 0, policy_version 23005 (0.0008) +[2026-06-07 03:30:56,731][528169] Updated weights for policy 0, policy_version 23016 (0.0008) +[2026-06-07 03:30:56,878][528169] Updated weights for policy 0, policy_version 23027 (0.0008) +[2026-06-07 03:30:57,008][528169] Updated weights for policy 0, policy_version 23037 (0.0008) +[2026-06-07 03:30:57,148][528169] Updated weights for policy 0, policy_version 23047 (0.0008) +[2026-06-07 03:30:57,775][528169] Updated weights for policy 0, policy_version 23058 (0.0008) +[2026-06-07 03:30:57,903][528169] Updated weights for policy 0, policy_version 23068 (0.0008) +[2026-06-07 03:30:58,052][528169] Updated weights for policy 0, policy_version 23079 (0.0009) +[2026-06-07 03:30:58,179][528169] Updated weights for policy 0, policy_version 23089 (0.0008) +[2026-06-07 03:30:58,329][528169] Updated weights for policy 0, policy_version 23100 (0.0008) +[2026-06-07 03:30:58,461][528169] Updated weights for policy 0, policy_version 23110 (0.0008) +[2026-06-07 03:30:59,088][528169] Updated weights for policy 0, policy_version 23121 (0.0008) +[2026-06-07 03:30:59,216][528169] Updated weights for policy 0, policy_version 23131 (0.0009) +[2026-06-07 03:30:59,363][528169] Updated weights for policy 0, policy_version 23142 (0.0009) +[2026-06-07 03:30:59,494][528169] Updated weights for policy 0, policy_version 23152 (0.0008) +[2026-06-07 03:30:59,661][528169] Updated weights for policy 0, policy_version 23165 (0.0008) +[2026-06-07 03:30:59,796][528169] Updated weights for policy 0, policy_version 23175 (0.0008) +[2026-06-07 03:31:00,103][527010] Fps is (10 sec: 26214.4, 60 sec: 24576.0, 300 sec: 24437.2). Total num frames: 11894784. Throughput: 0: 24192.1. Samples: 11841664. Policy #0 lag: (min: 5.0, avg: 17.8, max: 69.0) +[2026-06-07 03:31:00,104][527010] Avg episode reward: [(0, '63.472')] +[2026-06-07 03:31:00,425][528169] Updated weights for policy 0, policy_version 23185 (0.0008) +[2026-06-07 03:31:00,566][528169] Updated weights for policy 0, policy_version 23196 (0.0008) +[2026-06-07 03:31:00,713][528169] Updated weights for policy 0, policy_version 23207 (0.0009) +[2026-06-07 03:31:00,872][528169] Updated weights for policy 0, policy_version 23219 (0.0008) +[2026-06-07 03:31:01,006][528169] Updated weights for policy 0, policy_version 23229 (0.0008) +[2026-06-07 03:31:01,152][528169] Updated weights for policy 0, policy_version 23240 (0.0008) +[2026-06-07 03:31:01,785][528169] Updated weights for policy 0, policy_version 23251 (0.0008) +[2026-06-07 03:31:01,914][528169] Updated weights for policy 0, policy_version 23261 (0.0009) +[2026-06-07 03:31:02,057][528169] Updated weights for policy 0, policy_version 23272 (0.0008) +[2026-06-07 03:31:02,191][528169] Updated weights for policy 0, policy_version 23282 (0.0008) +[2026-06-07 03:31:02,323][528169] Updated weights for policy 0, policy_version 23292 (0.0008) +[2026-06-07 03:31:02,454][528169] Updated weights for policy 0, policy_version 23302 (0.0008) +[2026-06-07 03:31:02,583][528169] Updated weights for policy 0, policy_version 23312 (0.0008) +[2026-06-07 03:31:03,205][528169] Updated weights for policy 0, policy_version 23322 (0.0008) +[2026-06-07 03:31:03,334][528169] Updated weights for policy 0, policy_version 23332 (0.0008) +[2026-06-07 03:31:03,459][528169] Updated weights for policy 0, policy_version 23342 (0.0008) +[2026-06-07 03:31:03,602][528169] Updated weights for policy 0, policy_version 23353 (0.0008) +[2026-06-07 03:31:03,732][528169] Updated weights for policy 0, policy_version 23363 (0.0008) +[2026-06-07 03:31:03,884][528169] Updated weights for policy 0, policy_version 23374 (0.0008) +[2026-06-07 03:31:04,520][528169] Updated weights for policy 0, policy_version 23384 (0.0008) +[2026-06-07 03:31:04,650][528169] Updated weights for policy 0, policy_version 23394 (0.0008) +[2026-06-07 03:31:04,807][528169] Updated weights for policy 0, policy_version 23406 (0.0008) +[2026-06-07 03:31:04,940][528169] Updated weights for policy 0, policy_version 23416 (0.0008) +[2026-06-07 03:31:05,090][528169] Updated weights for policy 0, policy_version 23427 (0.0008) +[2026-06-07 03:31:05,103][527010] Fps is (10 sec: 22937.8, 60 sec: 24576.0, 300 sec: 24326.1). Total num frames: 11993088. Throughput: 0: 24556.1. Samples: 11996160. Policy #0 lag: (min: 5.0, avg: 17.8, max: 69.0) +[2026-06-07 03:31:05,104][527010] Avg episode reward: [(0, '83.878')] +[2026-06-07 03:31:05,232][528169] Updated weights for policy 0, policy_version 23438 (0.0008) +[2026-06-07 03:31:05,858][528169] Updated weights for policy 0, policy_version 23448 (0.0008) +[2026-06-07 03:31:06,043][528169] Updated weights for policy 0, policy_version 23462 (0.0008) +[2026-06-07 03:31:06,199][528169] Updated weights for policy 0, policy_version 23474 (0.0008) +[2026-06-07 03:31:06,330][528169] Updated weights for policy 0, policy_version 23484 (0.0008) +[2026-06-07 03:31:06,488][528169] Updated weights for policy 0, policy_version 23496 (0.0008) +[2026-06-07 03:31:07,139][528169] Updated weights for policy 0, policy_version 23506 (0.0008) +[2026-06-07 03:31:07,280][528169] Updated weights for policy 0, policy_version 23517 (0.0008) +[2026-06-07 03:31:07,406][528169] Updated weights for policy 0, policy_version 23527 (0.0008) +[2026-06-07 03:31:07,552][528169] Updated weights for policy 0, policy_version 23538 (0.0008) +[2026-06-07 03:31:07,743][528169] Updated weights for policy 0, policy_version 23552 (0.0008) +[2026-06-07 03:31:07,882][528169] Updated weights for policy 0, policy_version 23563 (0.0008) +[2026-06-07 03:31:08,548][528169] Updated weights for policy 0, policy_version 23576 (0.0008) +[2026-06-07 03:31:08,685][528169] Updated weights for policy 0, policy_version 23587 (0.0008) +[2026-06-07 03:31:08,827][528169] Updated weights for policy 0, policy_version 23598 (0.0008) +[2026-06-07 03:31:08,966][528169] Updated weights for policy 0, policy_version 23608 (0.0008) +[2026-06-07 03:31:09,108][528169] Updated weights for policy 0, policy_version 23619 (0.0008) +[2026-06-07 03:31:09,238][528169] Updated weights for policy 0, policy_version 23629 (0.0008) +[2026-06-07 03:31:09,903][528169] Updated weights for policy 0, policy_version 23640 (0.0008) +[2026-06-07 03:31:10,025][528169] Updated weights for policy 0, policy_version 23650 (0.0008) +[2026-06-07 03:31:10,103][527010] Fps is (10 sec: 22937.6, 60 sec: 24576.0, 300 sec: 24437.2). Total num frames: 12124160. Throughput: 0: 24260.4. Samples: 12135680. Policy #0 lag: (min: 63.0, avg: 75.9, max: 127.0) +[2026-06-07 03:31:10,104][527010] Avg episode reward: [(0, '75.933')] +[2026-06-07 03:31:10,175][528169] Updated weights for policy 0, policy_version 23661 (0.0008) +[2026-06-07 03:31:10,314][528169] Updated weights for policy 0, policy_version 23672 (0.0008) +[2026-06-07 03:31:10,490][528169] Updated weights for policy 0, policy_version 23685 (0.0008) +[2026-06-07 03:31:10,621][528169] Updated weights for policy 0, policy_version 23695 (0.0005) +[2026-06-07 03:31:11,255][528169] Updated weights for policy 0, policy_version 23707 (0.0008) +[2026-06-07 03:31:11,398][528169] Updated weights for policy 0, policy_version 23718 (0.0009) +[2026-06-07 03:31:11,552][528169] Updated weights for policy 0, policy_version 23730 (0.0008) +[2026-06-07 03:31:11,702][528169] Updated weights for policy 0, policy_version 23741 (0.0005) +[2026-06-07 03:31:11,847][528169] Updated weights for policy 0, policy_version 23752 (0.0005) +[2026-06-07 03:31:12,434][528169] Updated weights for policy 0, policy_version 23762 (0.0006) +[2026-06-07 03:31:12,562][528169] Updated weights for policy 0, policy_version 23772 (0.0008) +[2026-06-07 03:31:12,687][528169] Updated weights for policy 0, policy_version 23782 (0.0011) +[2026-06-07 03:31:12,823][528169] Updated weights for policy 0, policy_version 23792 (0.0006) +[2026-06-07 03:31:12,951][528169] Updated weights for policy 0, policy_version 23802 (0.0005) +[2026-06-07 03:31:13,117][528169] Updated weights for policy 0, policy_version 23814 (0.0005) +[2026-06-07 03:31:13,243][528169] Updated weights for policy 0, policy_version 23824 (0.0005) +[2026-06-07 03:31:13,865][528169] Updated weights for policy 0, policy_version 23834 (0.0011) +[2026-06-07 03:31:13,990][528169] Updated weights for policy 0, policy_version 23844 (0.0005) +[2026-06-07 03:31:14,135][528169] Updated weights for policy 0, policy_version 23854 (0.0005) +[2026-06-07 03:31:14,267][528169] Updated weights for policy 0, policy_version 23864 (0.0005) +[2026-06-07 03:31:14,425][528169] Updated weights for policy 0, policy_version 23876 (0.0007) +[2026-06-07 03:31:14,574][528169] Updated weights for policy 0, policy_version 23887 (0.0008) +[2026-06-07 03:31:15,103][527010] Fps is (10 sec: 26214.5, 60 sec: 24576.1, 300 sec: 24437.2). Total num frames: 12255232. Throughput: 0: 24487.8. Samples: 12216320. Policy #0 lag: (min: 63.0, avg: 75.9, max: 127.0) +[2026-06-07 03:31:15,104][527010] Avg episode reward: [(0, '75.319')] +[2026-06-07 03:31:15,211][528169] Updated weights for policy 0, policy_version 23898 (0.0008) +[2026-06-07 03:31:15,341][528169] Updated weights for policy 0, policy_version 23908 (0.0008) +[2026-06-07 03:31:15,464][528169] Updated weights for policy 0, policy_version 23918 (0.0008) +[2026-06-07 03:31:15,612][528169] Updated weights for policy 0, policy_version 23929 (0.0008) +[2026-06-07 03:31:15,741][528169] Updated weights for policy 0, policy_version 23939 (0.0008) +[2026-06-07 03:31:15,880][528169] Updated weights for policy 0, policy_version 23950 (0.0008) +[2026-06-07 03:31:16,530][528169] Updated weights for policy 0, policy_version 23960 (0.0008) +[2026-06-07 03:31:16,653][528169] Updated weights for policy 0, policy_version 23970 (0.0008) +[2026-06-07 03:31:16,787][528169] Updated weights for policy 0, policy_version 23980 (0.0008) +[2026-06-07 03:31:16,916][528169] Updated weights for policy 0, policy_version 23990 (0.0008) +[2026-06-07 03:31:17,045][528169] Updated weights for policy 0, policy_version 24000 (0.0008) +[2026-06-07 03:31:17,214][528169] Updated weights for policy 0, policy_version 24012 (0.0008) +[2026-06-07 03:31:17,871][528169] Updated weights for policy 0, policy_version 24024 (0.0008) +[2026-06-07 03:31:18,017][528169] Updated weights for policy 0, policy_version 24035 (0.0008) +[2026-06-07 03:31:18,174][528169] Updated weights for policy 0, policy_version 24047 (0.0008) +[2026-06-07 03:31:18,327][528169] Updated weights for policy 0, policy_version 24059 (0.0008) +[2026-06-07 03:31:18,476][528169] Updated weights for policy 0, policy_version 24071 (0.0008) +[2026-06-07 03:31:19,120][528169] Updated weights for policy 0, policy_version 24082 (0.0008) +[2026-06-07 03:31:19,243][528169] Updated weights for policy 0, policy_version 24092 (0.0008) +[2026-06-07 03:31:19,364][528169] Updated weights for policy 0, policy_version 24102 (0.0008) +[2026-06-07 03:31:19,509][528169] Updated weights for policy 0, policy_version 24114 (0.0008) +[2026-06-07 03:31:19,644][528169] Updated weights for policy 0, policy_version 24124 (0.0008) +[2026-06-07 03:31:19,844][528169] Updated weights for policy 0, policy_version 24140 (0.0008) +[2026-06-07 03:31:20,103][527010] Fps is (10 sec: 26213.1, 60 sec: 24575.8, 300 sec: 24437.1). Total num frames: 12386304. Throughput: 0: 24646.8. Samples: 12363392. Policy #0 lag: (min: 63.0, avg: 75.9, max: 127.0) +[2026-06-07 03:31:20,105][527010] Avg episode reward: [(0, '81.622')] +[2026-06-07 03:31:20,531][528169] Updated weights for policy 0, policy_version 24151 (0.0009) +[2026-06-07 03:31:20,671][528169] Updated weights for policy 0, policy_version 24162 (0.0009) +[2026-06-07 03:31:20,809][528169] Updated weights for policy 0, policy_version 24173 (0.0008) +[2026-06-07 03:31:20,962][528169] Updated weights for policy 0, policy_version 24185 (0.0008) +[2026-06-07 03:31:21,097][528169] Updated weights for policy 0, policy_version 24195 (0.0008) +[2026-06-07 03:31:21,247][528169] Updated weights for policy 0, policy_version 24207 (0.0008) +[2026-06-07 03:31:21,932][528169] Updated weights for policy 0, policy_version 24220 (0.0008) +[2026-06-07 03:31:22,090][528169] Updated weights for policy 0, policy_version 24233 (0.0008) +[2026-06-07 03:31:22,253][528169] Updated weights for policy 0, policy_version 24246 (0.0008) +[2026-06-07 03:31:22,427][528169] Updated weights for policy 0, policy_version 24259 (0.0008) +[2026-06-07 03:31:22,564][528169] Updated weights for policy 0, policy_version 24270 (0.0008) +[2026-06-07 03:31:23,256][528169] Updated weights for policy 0, policy_version 24281 (0.0008) +[2026-06-07 03:31:23,412][528169] Updated weights for policy 0, policy_version 24293 (0.0008) +[2026-06-07 03:31:23,567][528169] Updated weights for policy 0, policy_version 24306 (0.0008) +[2026-06-07 03:31:23,727][528169] Updated weights for policy 0, policy_version 24318 (0.0008) +[2026-06-07 03:31:23,865][528169] Updated weights for policy 0, policy_version 24329 (0.0008) +[2026-06-07 03:31:24,530][528169] Updated weights for policy 0, policy_version 24340 (0.0009) +[2026-06-07 03:31:24,689][528169] Updated weights for policy 0, policy_version 24353 (0.0008) +[2026-06-07 03:31:24,858][528169] Updated weights for policy 0, policy_version 24367 (0.0008) +[2026-06-07 03:31:25,042][528169] Updated weights for policy 0, policy_version 24381 (0.0008) +[2026-06-07 03:31:25,103][527010] Fps is (10 sec: 22937.7, 60 sec: 24576.1, 300 sec: 24437.2). Total num frames: 12484608. Throughput: 0: 24485.0. Samples: 12509440. Policy #0 lag: (min: 63.0, avg: 75.9, max: 127.0) +[2026-06-07 03:31:25,103][527010] Avg episode reward: [(0, '95.054')] +[2026-06-07 03:31:25,196][528169] Updated weights for policy 0, policy_version 24393 (0.0008) +[2026-06-07 03:31:25,275][528093] Saving new best policy, reward=95.054! +[2026-06-07 03:31:25,885][528169] Updated weights for policy 0, policy_version 24408 (0.0008) +[2026-06-07 03:31:26,011][528169] Updated weights for policy 0, policy_version 24418 (0.0008) +[2026-06-07 03:31:26,159][528169] Updated weights for policy 0, policy_version 24430 (0.0008) +[2026-06-07 03:31:26,326][528169] Updated weights for policy 0, policy_version 24443 (0.0009) +[2026-06-07 03:31:26,476][528169] Updated weights for policy 0, policy_version 24455 (0.0008) +[2026-06-07 03:31:27,149][528169] Updated weights for policy 0, policy_version 24466 (0.0008) +[2026-06-07 03:31:27,293][528169] Updated weights for policy 0, policy_version 24478 (0.0008) +[2026-06-07 03:31:27,433][528169] Updated weights for policy 0, policy_version 24489 (0.0008) +[2026-06-07 03:31:27,581][528169] Updated weights for policy 0, policy_version 24501 (0.0008) +[2026-06-07 03:31:27,738][528169] Updated weights for policy 0, policy_version 24513 (0.0008) +[2026-06-07 03:31:27,906][528169] Updated weights for policy 0, policy_version 24526 (0.0008) +[2026-06-07 03:31:28,569][528169] Updated weights for policy 0, policy_version 24538 (0.0008) +[2026-06-07 03:31:28,698][528169] Updated weights for policy 0, policy_version 24548 (0.0008) +[2026-06-07 03:31:28,843][528169] Updated weights for policy 0, policy_version 24560 (0.0008) +[2026-06-07 03:31:28,975][528169] Updated weights for policy 0, policy_version 24570 (0.0009) +[2026-06-07 03:31:29,108][528169] Updated weights for policy 0, policy_version 24580 (0.0009) +[2026-06-07 03:31:29,262][528169] Updated weights for policy 0, policy_version 24591 (0.0008) +[2026-06-07 03:31:29,893][528169] Updated weights for policy 0, policy_version 24601 (0.0008) +[2026-06-07 03:31:30,011][528169] Updated weights for policy 0, policy_version 24611 (0.0008) +[2026-06-07 03:31:30,103][527010] Fps is (10 sec: 22938.7, 60 sec: 24576.0, 300 sec: 24437.2). Total num frames: 12615680. Throughput: 0: 24630.0. Samples: 12586112. Policy #0 lag: (min: 55.0, avg: 65.8, max: 119.0) +[2026-06-07 03:31:30,104][527010] Avg episode reward: [(0, '84.460')] +[2026-06-07 03:31:30,148][528169] Updated weights for policy 0, policy_version 24621 (0.0008) +[2026-06-07 03:31:30,279][528169] Updated weights for policy 0, policy_version 24631 (0.0008) +[2026-06-07 03:31:30,417][528169] Updated weights for policy 0, policy_version 24641 (0.0008) +[2026-06-07 03:31:30,561][528169] Updated weights for policy 0, policy_version 24652 (0.0008) +[2026-06-07 03:31:31,234][528169] Updated weights for policy 0, policy_version 24666 (0.0008) +[2026-06-07 03:31:31,378][528169] Updated weights for policy 0, policy_version 24677 (0.0008) +[2026-06-07 03:31:31,529][528169] Updated weights for policy 0, policy_version 24688 (0.0008) +[2026-06-07 03:31:31,672][528169] Updated weights for policy 0, policy_version 24699 (0.0008) +[2026-06-07 03:31:31,806][528169] Updated weights for policy 0, policy_version 24709 (0.0008) +[2026-06-07 03:31:31,951][528169] Updated weights for policy 0, policy_version 24720 (0.0009) +[2026-06-07 03:31:32,570][528169] Updated weights for policy 0, policy_version 24731 (0.0008) +[2026-06-07 03:31:32,698][528169] Updated weights for policy 0, policy_version 24741 (0.0008) +[2026-06-07 03:31:32,827][528169] Updated weights for policy 0, policy_version 24751 (0.0004) +[2026-06-07 03:31:32,968][528169] Updated weights for policy 0, policy_version 24761 (0.0005) +[2026-06-07 03:31:33,099][528169] Updated weights for policy 0, policy_version 24771 (0.0005) +[2026-06-07 03:31:33,231][528169] Updated weights for policy 0, policy_version 24781 (0.0007) +[2026-06-07 03:31:33,881][528169] Updated weights for policy 0, policy_version 24793 (0.0008) +[2026-06-07 03:31:34,021][528169] Updated weights for policy 0, policy_version 24804 (0.0008) +[2026-06-07 03:31:34,155][528169] Updated weights for policy 0, policy_version 24814 (0.0008) +[2026-06-07 03:31:34,288][528169] Updated weights for policy 0, policy_version 24824 (0.0008) +[2026-06-07 03:31:34,433][528169] Updated weights for policy 0, policy_version 24835 (0.0008) +[2026-06-07 03:31:34,582][528169] Updated weights for policy 0, policy_version 24846 (0.0008) +[2026-06-07 03:31:35,103][527010] Fps is (10 sec: 26214.0, 60 sec: 24575.9, 300 sec: 24437.1). Total num frames: 12746752. Throughput: 0: 24402.4. Samples: 12727168. Policy #0 lag: (min: 55.0, avg: 65.8, max: 119.0) +[2026-06-07 03:31:35,104][527010] Avg episode reward: [(0, '85.590')] +[2026-06-07 03:31:35,183][528169] Updated weights for policy 0, policy_version 24856 (0.0008) +[2026-06-07 03:31:35,318][528169] Updated weights for policy 0, policy_version 24866 (0.0008) +[2026-06-07 03:31:35,461][528169] Updated weights for policy 0, policy_version 24877 (0.0008) +[2026-06-07 03:31:35,606][528169] Updated weights for policy 0, policy_version 24888 (0.0008) +[2026-06-07 03:31:35,769][528169] Updated weights for policy 0, policy_version 24900 (0.0008) +[2026-06-07 03:31:35,911][528169] Updated weights for policy 0, policy_version 24911 (0.0008) +[2026-06-07 03:31:36,538][528169] Updated weights for policy 0, policy_version 24921 (0.0008) +[2026-06-07 03:31:36,663][528169] Updated weights for policy 0, policy_version 24931 (0.0008) +[2026-06-07 03:31:36,841][528169] Updated weights for policy 0, policy_version 24945 (0.0008) +[2026-06-07 03:31:36,976][528169] Updated weights for policy 0, policy_version 24955 (0.0008) +[2026-06-07 03:31:37,119][528169] Updated weights for policy 0, policy_version 24966 (0.0008) +[2026-06-07 03:31:37,252][528169] Updated weights for policy 0, policy_version 24976 (0.0008) +[2026-06-07 03:31:37,906][528169] Updated weights for policy 0, policy_version 24988 (0.0008) +[2026-06-07 03:31:38,049][528169] Updated weights for policy 0, policy_version 24999 (0.0008) +[2026-06-07 03:31:38,186][528169] Updated weights for policy 0, policy_version 25009 (0.0008) +[2026-06-07 03:31:38,350][528169] Updated weights for policy 0, policy_version 25021 (0.0008) +[2026-06-07 03:31:38,479][528169] Updated weights for policy 0, policy_version 25031 (0.0008) +[2026-06-07 03:31:39,167][528169] Updated weights for policy 0, policy_version 25043 (0.0008) +[2026-06-07 03:31:39,298][528169] Updated weights for policy 0, policy_version 25054 (0.0008) +[2026-06-07 03:31:39,424][528169] Updated weights for policy 0, policy_version 25064 (0.0008) +[2026-06-07 03:31:39,592][528169] Updated weights for policy 0, policy_version 25076 (0.0008) +[2026-06-07 03:31:39,739][528169] Updated weights for policy 0, policy_version 25087 (0.0009) +[2026-06-07 03:31:39,867][528169] Updated weights for policy 0, policy_version 25097 (0.0009) +[2026-06-07 03:31:40,103][527010] Fps is (10 sec: 26214.5, 60 sec: 24576.0, 300 sec: 24548.2). Total num frames: 12877824. Throughput: 0: 24672.8. Samples: 12881920. Policy #0 lag: (min: 55.0, avg: 65.8, max: 119.0) +[2026-06-07 03:31:40,104][527010] Avg episode reward: [(0, '80.300')] +[2026-06-07 03:31:40,532][528169] Updated weights for policy 0, policy_version 25108 (0.0008) +[2026-06-07 03:31:40,662][528169] Updated weights for policy 0, policy_version 25118 (0.0008) +[2026-06-07 03:31:40,808][528169] Updated weights for policy 0, policy_version 25129 (0.0009) +[2026-06-07 03:31:40,951][528169] Updated weights for policy 0, policy_version 25140 (0.0008) +[2026-06-07 03:31:41,099][528169] Updated weights for policy 0, policy_version 25151 (0.0008) +[2026-06-07 03:31:41,224][528169] Updated weights for policy 0, policy_version 25161 (0.0008) +[2026-06-07 03:31:41,878][528169] Updated weights for policy 0, policy_version 25172 (0.0008) +[2026-06-07 03:31:42,015][528169] Updated weights for policy 0, policy_version 25183 (0.0008) +[2026-06-07 03:31:42,172][528169] Updated weights for policy 0, policy_version 25195 (0.0008) +[2026-06-07 03:31:42,327][528169] Updated weights for policy 0, policy_version 25207 (0.0010) +[2026-06-07 03:31:42,464][528169] Updated weights for policy 0, policy_version 25217 (0.0009) +[2026-06-07 03:31:42,604][528169] Updated weights for policy 0, policy_version 25228 (0.0008) +[2026-06-07 03:31:43,241][528169] Updated weights for policy 0, policy_version 25240 (0.0008) +[2026-06-07 03:31:43,373][528169] Updated weights for policy 0, policy_version 25251 (0.0008) +[2026-06-07 03:31:43,512][528169] Updated weights for policy 0, policy_version 25261 (0.0008) +[2026-06-07 03:31:43,658][528169] Updated weights for policy 0, policy_version 25272 (0.0008) +[2026-06-07 03:31:43,817][528169] Updated weights for policy 0, policy_version 25284 (0.0008) +[2026-06-07 03:31:43,965][528169] Updated weights for policy 0, policy_version 25295 (0.0008) +[2026-06-07 03:31:44,587][528169] Updated weights for policy 0, policy_version 25305 (0.0008) +[2026-06-07 03:31:44,724][528169] Updated weights for policy 0, policy_version 25315 (0.0008) +[2026-06-07 03:31:44,851][528169] Updated weights for policy 0, policy_version 25325 (0.0008) +[2026-06-07 03:31:44,999][528169] Updated weights for policy 0, policy_version 25337 (0.0008) +[2026-06-07 03:31:45,103][527010] Fps is (10 sec: 22937.5, 60 sec: 24575.9, 300 sec: 24437.1). Total num frames: 12976128. Throughput: 0: 24675.5. Samples: 12952064. Policy #0 lag: (min: 63.0, avg: 76.2, max: 127.0) +[2026-06-07 03:31:45,104][527010] Avg episode reward: [(0, '87.141')] +[2026-06-07 03:31:45,159][528169] Updated weights for policy 0, policy_version 25349 (0.0008) +[2026-06-07 03:31:45,303][528169] Updated weights for policy 0, policy_version 25360 (0.0008) +[2026-06-07 03:31:45,941][528169] Updated weights for policy 0, policy_version 25371 (0.0008) +[2026-06-07 03:31:46,076][528169] Updated weights for policy 0, policy_version 25381 (0.0008) +[2026-06-07 03:31:46,206][528169] Updated weights for policy 0, policy_version 25391 (0.0008) +[2026-06-07 03:31:46,357][528169] Updated weights for policy 0, policy_version 25402 (0.0008) +[2026-06-07 03:31:46,486][528169] Updated weights for policy 0, policy_version 25412 (0.0008) +[2026-06-07 03:31:46,615][528169] Updated weights for policy 0, policy_version 25422 (0.0008) +[2026-06-07 03:31:47,230][528169] Updated weights for policy 0, policy_version 25432 (0.0008) +[2026-06-07 03:31:47,384][528169] Updated weights for policy 0, policy_version 25444 (0.0008) +[2026-06-07 03:31:47,533][528169] Updated weights for policy 0, policy_version 25455 (0.0008) +[2026-06-07 03:31:47,667][528169] Updated weights for policy 0, policy_version 25465 (0.0008) +[2026-06-07 03:31:47,804][528169] Updated weights for policy 0, policy_version 25476 (0.0008) +[2026-06-07 03:31:47,940][528169] Updated weights for policy 0, policy_version 25486 (0.0008) +[2026-06-07 03:31:48,572][528169] Updated weights for policy 0, policy_version 25496 (0.0008) +[2026-06-07 03:31:48,725][528169] Updated weights for policy 0, policy_version 25508 (0.0008) +[2026-06-07 03:31:48,864][528169] Updated weights for policy 0, policy_version 25518 (0.0007) +[2026-06-07 03:31:48,998][528169] Updated weights for policy 0, policy_version 25528 (0.0008) +[2026-06-07 03:31:49,146][528169] Updated weights for policy 0, policy_version 25539 (0.0008) +[2026-06-07 03:31:49,280][528169] Updated weights for policy 0, policy_version 25549 (0.0008) +[2026-06-07 03:31:49,893][528169] Updated weights for policy 0, policy_version 25560 (0.0008) +[2026-06-07 03:31:50,032][528169] Updated weights for policy 0, policy_version 25571 (0.0008) +[2026-06-07 03:31:50,103][527010] Fps is (10 sec: 22937.5, 60 sec: 24576.0, 300 sec: 24437.2). Total num frames: 13107200. Throughput: 0: 24513.4. Samples: 13099264. Policy #0 lag: (min: 63.0, avg: 76.2, max: 127.0) +[2026-06-07 03:31:50,104][527010] Avg episode reward: [(0, '86.711')] +[2026-06-07 03:31:50,157][528169] Updated weights for policy 0, policy_version 25581 (0.0008) +[2026-06-07 03:31:50,296][528169] Updated weights for policy 0, policy_version 25591 (0.0008) +[2026-06-07 03:31:50,444][528169] Updated weights for policy 0, policy_version 25602 (0.0008) +[2026-06-07 03:31:50,576][528169] Updated weights for policy 0, policy_version 25612 (0.0008) +[2026-06-07 03:31:51,195][528169] Updated weights for policy 0, policy_version 25622 (0.0008) +[2026-06-07 03:31:51,334][528169] Updated weights for policy 0, policy_version 25633 (0.0008) +[2026-06-07 03:31:51,470][528169] Updated weights for policy 0, policy_version 25643 (0.0008) +[2026-06-07 03:31:51,638][528169] Updated weights for policy 0, policy_version 25656 (0.0008) +[2026-06-07 03:31:51,776][528169] Updated weights for policy 0, policy_version 25667 (0.0008) +[2026-06-07 03:31:51,936][528169] Updated weights for policy 0, policy_version 25678 (0.0007) +[2026-06-07 03:31:52,553][528169] Updated weights for policy 0, policy_version 25689 (0.0005) +[2026-06-07 03:31:52,709][528169] Updated weights for policy 0, policy_version 25701 (0.0005) +[2026-06-07 03:31:52,847][528169] Updated weights for policy 0, policy_version 25711 (0.0005) +[2026-06-07 03:31:52,976][528169] Updated weights for policy 0, policy_version 25721 (0.0004) +[2026-06-07 03:31:53,121][528169] Updated weights for policy 0, policy_version 25731 (0.0004) +[2026-06-07 03:31:53,255][528169] Updated weights for policy 0, policy_version 25741 (0.0005) +[2026-06-07 03:31:53,873][528169] Updated weights for policy 0, policy_version 25752 (0.0005) +[2026-06-07 03:31:54,000][528169] Updated weights for policy 0, policy_version 25762 (0.0005) +[2026-06-07 03:31:54,125][528169] Updated weights for policy 0, policy_version 25772 (0.0004) +[2026-06-07 03:31:54,261][528169] Updated weights for policy 0, policy_version 25782 (0.0004) +[2026-06-07 03:31:54,392][528169] Updated weights for policy 0, policy_version 25792 (0.0005) +[2026-06-07 03:31:54,525][528169] Updated weights for policy 0, policy_version 25802 (0.0008) +[2026-06-07 03:31:55,103][527010] Fps is (10 sec: 26214.5, 60 sec: 24576.0, 300 sec: 24548.2). Total num frames: 13238272. Throughput: 0: 24692.6. Samples: 13246848. Policy #0 lag: (min: 63.0, avg: 76.2, max: 127.0) +[2026-06-07 03:31:55,104][527010] Avg episode reward: [(0, '96.165')] +[2026-06-07 03:31:55,170][528169] Updated weights for policy 0, policy_version 25812 (0.0007) +[2026-06-07 03:31:55,302][528169] Updated weights for policy 0, policy_version 25822 (0.0004) +[2026-06-07 03:31:55,438][528169] Updated weights for policy 0, policy_version 25832 (0.0005) +[2026-06-07 03:31:55,597][528169] Updated weights for policy 0, policy_version 25844 (0.0006) +[2026-06-07 03:31:55,741][528169] Updated weights for policy 0, policy_version 25855 (0.0008) +[2026-06-07 03:31:55,892][528169] Updated weights for policy 0, policy_version 25866 (0.0008) +[2026-06-07 03:31:55,968][528093] Saving new best policy, reward=96.165! +[2026-06-07 03:31:56,514][528169] Updated weights for policy 0, policy_version 25877 (0.0009) +[2026-06-07 03:31:56,641][528169] Updated weights for policy 0, policy_version 25887 (0.0008) +[2026-06-07 03:31:56,786][528169] Updated weights for policy 0, policy_version 25898 (0.0008) +[2026-06-07 03:31:56,918][528169] Updated weights for policy 0, policy_version 25908 (0.0009) +[2026-06-07 03:31:57,065][528169] Updated weights for policy 0, policy_version 25919 (0.0008) +[2026-06-07 03:31:57,195][528169] Updated weights for policy 0, policy_version 25929 (0.0008) +[2026-06-07 03:31:57,854][528169] Updated weights for policy 0, policy_version 25940 (0.0009) +[2026-06-07 03:31:57,992][528169] Updated weights for policy 0, policy_version 25951 (0.0008) +[2026-06-07 03:31:58,140][528169] Updated weights for policy 0, policy_version 25962 (0.0008) +[2026-06-07 03:31:58,279][528169] Updated weights for policy 0, policy_version 25972 (0.0008) +[2026-06-07 03:31:58,399][528169] Updated weights for policy 0, policy_version 25982 (0.0008) +[2026-06-07 03:31:58,555][528169] Updated weights for policy 0, policy_version 25993 (0.0008) +[2026-06-07 03:31:59,190][528169] Updated weights for policy 0, policy_version 26004 (0.0008) +[2026-06-07 03:31:59,324][528169] Updated weights for policy 0, policy_version 26014 (0.0008) +[2026-06-07 03:31:59,454][528169] Updated weights for policy 0, policy_version 26024 (0.0008) +[2026-06-07 03:31:59,595][528169] Updated weights for policy 0, policy_version 26034 (0.0008) +[2026-06-07 03:31:59,724][528169] Updated weights for policy 0, policy_version 26044 (0.0008) +[2026-06-07 03:31:59,859][528169] Updated weights for policy 0, policy_version 26054 (0.0008) +[2026-06-07 03:32:00,103][527010] Fps is (10 sec: 26214.3, 60 sec: 24576.0, 300 sec: 24548.2). Total num frames: 13369344. Throughput: 0: 24467.9. Samples: 13317376. Policy #0 lag: (min: 63.0, avg: 76.2, max: 127.0) +[2026-06-07 03:32:00,104][527010] Avg episode reward: [(0, '73.754')] +[2026-06-07 03:32:00,508][528169] Updated weights for policy 0, policy_version 26066 (0.0008) +[2026-06-07 03:32:00,637][528169] Updated weights for policy 0, policy_version 26076 (0.0008) +[2026-06-07 03:32:00,781][528169] Updated weights for policy 0, policy_version 26087 (0.0008) +[2026-06-07 03:32:00,928][528169] Updated weights for policy 0, policy_version 26098 (0.0008) +[2026-06-07 03:32:01,079][528169] Updated weights for policy 0, policy_version 26109 (0.0008) +[2026-06-07 03:32:01,220][528169] Updated weights for policy 0, policy_version 26120 (0.0008) +[2026-06-07 03:32:01,826][528169] Updated weights for policy 0, policy_version 26130 (0.0008) +[2026-06-07 03:32:01,973][528169] Updated weights for policy 0, policy_version 26142 (0.0008) +[2026-06-07 03:32:02,112][528169] Updated weights for policy 0, policy_version 26153 (0.0008) +[2026-06-07 03:32:02,245][528169] Updated weights for policy 0, policy_version 26163 (0.0008) +[2026-06-07 03:32:02,380][528169] Updated weights for policy 0, policy_version 26174 (0.0008) +[2026-06-07 03:32:02,530][528169] Updated weights for policy 0, policy_version 26185 (0.0008) +[2026-06-07 03:32:03,162][528169] Updated weights for policy 0, policy_version 26195 (0.0008) +[2026-06-07 03:32:03,314][528169] Updated weights for policy 0, policy_version 26207 (0.0008) +[2026-06-07 03:32:03,464][528169] Updated weights for policy 0, policy_version 26218 (0.0008) +[2026-06-07 03:32:03,594][528169] Updated weights for policy 0, policy_version 26228 (0.0008) +[2026-06-07 03:32:03,730][528169] Updated weights for policy 0, policy_version 26238 (0.0008) +[2026-06-07 03:32:03,897][528169] Updated weights for policy 0, policy_version 26251 (0.0008) +[2026-06-07 03:32:04,526][528169] Updated weights for policy 0, policy_version 26263 (0.0006) +[2026-06-07 03:32:04,672][528169] Updated weights for policy 0, policy_version 26275 (0.0008) +[2026-06-07 03:32:04,815][528169] Updated weights for policy 0, policy_version 26286 (0.0008) +[2026-06-07 03:32:04,950][528169] Updated weights for policy 0, policy_version 26296 (0.0008) +[2026-06-07 03:32:05,086][528169] Updated weights for policy 0, policy_version 26306 (0.0008) +[2026-06-07 03:32:05,103][527010] Fps is (10 sec: 22936.9, 60 sec: 24575.9, 300 sec: 24437.2). Total num frames: 13467648. Throughput: 0: 24627.3. Samples: 13471616. Policy #0 lag: (min: 63.0, avg: 76.1, max: 127.0) +[2026-06-07 03:32:05,104][527010] Avg episode reward: [(0, '89.302')] +[2026-06-07 03:32:05,250][528169] Updated weights for policy 0, policy_version 26318 (0.0008) +[2026-06-07 03:32:05,932][528169] Updated weights for policy 0, policy_version 26332 (0.0008) +[2026-06-07 03:32:06,055][528169] Updated weights for policy 0, policy_version 26342 (0.0008) +[2026-06-07 03:32:06,191][528169] Updated weights for policy 0, policy_version 26352 (0.0008) +[2026-06-07 03:32:06,337][528169] Updated weights for policy 0, policy_version 26363 (0.0008) +[2026-06-07 03:32:06,467][528169] Updated weights for policy 0, policy_version 26373 (0.0008) +[2026-06-07 03:32:06,597][528169] Updated weights for policy 0, policy_version 26383 (0.0008) +[2026-06-07 03:32:07,272][528169] Updated weights for policy 0, policy_version 26395 (0.0008) +[2026-06-07 03:32:07,419][528169] Updated weights for policy 0, policy_version 26406 (0.0008) +[2026-06-07 03:32:07,549][528169] Updated weights for policy 0, policy_version 26416 (0.0008) +[2026-06-07 03:32:07,706][528169] Updated weights for policy 0, policy_version 26428 (0.0008) +[2026-06-07 03:32:07,852][528169] Updated weights for policy 0, policy_version 26439 (0.0008) +[2026-06-07 03:32:08,489][528169] Updated weights for policy 0, policy_version 26449 (0.0008) +[2026-06-07 03:32:08,624][528169] Updated weights for policy 0, policy_version 26460 (0.0008) +[2026-06-07 03:32:08,757][528169] Updated weights for policy 0, policy_version 26470 (0.0008) +[2026-06-07 03:32:08,924][528169] Updated weights for policy 0, policy_version 26483 (0.0008) +[2026-06-07 03:32:09,069][528169] Updated weights for policy 0, policy_version 26494 (0.0008) +[2026-06-07 03:32:09,202][528169] Updated weights for policy 0, policy_version 26504 (0.0008) +[2026-06-07 03:32:09,858][528169] Updated weights for policy 0, policy_version 26514 (0.0008) +[2026-06-07 03:32:09,984][528169] Updated weights for policy 0, policy_version 26524 (0.0008) +[2026-06-07 03:32:10,103][527010] Fps is (10 sec: 22937.7, 60 sec: 24576.0, 300 sec: 24548.2). Total num frames: 13598720. Throughput: 0: 24476.4. Samples: 13610880. Policy #0 lag: (min: 63.0, avg: 76.1, max: 127.0) +[2026-06-07 03:32:10,104][527010] Avg episode reward: [(0, '89.076')] +[2026-06-07 03:32:10,111][528169] Updated weights for policy 0, policy_version 26534 (0.0008) +[2026-06-07 03:32:10,241][528169] Updated weights for policy 0, policy_version 26544 (0.0008) +[2026-06-07 03:32:10,387][528169] Updated weights for policy 0, policy_version 26555 (0.0008) +[2026-06-07 03:32:10,526][528169] Updated weights for policy 0, policy_version 26565 (0.0008) +[2026-06-07 03:32:10,665][528169] Updated weights for policy 0, policy_version 26575 (0.0008) +[2026-06-07 03:32:11,279][528169] Updated weights for policy 0, policy_version 26586 (0.0008) +[2026-06-07 03:32:11,428][528169] Updated weights for policy 0, policy_version 26597 (0.0008) +[2026-06-07 03:32:11,558][528169] Updated weights for policy 0, policy_version 26607 (0.0008) +[2026-06-07 03:32:11,694][528169] Updated weights for policy 0, policy_version 26617 (0.0008) +[2026-06-07 03:32:11,836][528169] Updated weights for policy 0, policy_version 26628 (0.0008) +[2026-06-07 03:32:11,995][528169] Updated weights for policy 0, policy_version 26640 (0.0008) +[2026-06-07 03:32:12,614][528169] Updated weights for policy 0, policy_version 26650 (0.0008) +[2026-06-07 03:32:12,743][528169] Updated weights for policy 0, policy_version 26660 (0.0008) +[2026-06-07 03:32:12,876][528169] Updated weights for policy 0, policy_version 26670 (0.0008) +[2026-06-07 03:32:13,008][528169] Updated weights for policy 0, policy_version 26680 (0.0008) +[2026-06-07 03:32:13,164][528169] Updated weights for policy 0, policy_version 26692 (0.0008) +[2026-06-07 03:32:13,319][528169] Updated weights for policy 0, policy_version 26703 (0.0008) +[2026-06-07 03:32:13,958][528169] Updated weights for policy 0, policy_version 26715 (0.0005) +[2026-06-07 03:32:14,095][528169] Updated weights for policy 0, policy_version 26725 (0.0005) +[2026-06-07 03:32:14,223][528169] Updated weights for policy 0, policy_version 26735 (0.0005) +[2026-06-07 03:32:14,365][528169] Updated weights for policy 0, policy_version 26746 (0.0004) +[2026-06-07 03:32:14,524][528169] Updated weights for policy 0, policy_version 26758 (0.0004) +[2026-06-07 03:32:14,656][528169] Updated weights for policy 0, policy_version 26768 (0.0005) +[2026-06-07 03:32:15,103][527010] Fps is (10 sec: 26214.6, 60 sec: 24575.9, 300 sec: 24548.2). Total num frames: 13729792. Throughput: 0: 24487.6. Samples: 13688064. Policy #0 lag: (min: 63.0, avg: 76.1, max: 127.0) +[2026-06-07 03:32:15,104][527010] Avg episode reward: [(0, '87.907')] +[2026-06-07 03:32:15,241][528169] Updated weights for policy 0, policy_version 26778 (0.0009) +[2026-06-07 03:32:15,372][528169] Updated weights for policy 0, policy_version 26788 (0.0009) +[2026-06-07 03:32:15,504][528169] Updated weights for policy 0, policy_version 26798 (0.0008) +[2026-06-07 03:32:15,638][528169] Updated weights for policy 0, policy_version 26808 (0.0008) +[2026-06-07 03:32:15,775][528169] Updated weights for policy 0, policy_version 26818 (0.0008) +[2026-06-07 03:32:15,906][528169] Updated weights for policy 0, policy_version 26828 (0.0008) +[2026-06-07 03:32:16,543][528169] Updated weights for policy 0, policy_version 26839 (0.0008) +[2026-06-07 03:32:16,661][528169] Updated weights for policy 0, policy_version 26849 (0.0008) +[2026-06-07 03:32:16,801][528169] Updated weights for policy 0, policy_version 26859 (0.0008) +[2026-06-07 03:32:16,950][528169] Updated weights for policy 0, policy_version 26870 (0.0008) +[2026-06-07 03:32:17,097][528169] Updated weights for policy 0, policy_version 26881 (0.0006) +[2026-06-07 03:32:17,238][528169] Updated weights for policy 0, policy_version 26891 (0.0006) +[2026-06-07 03:32:17,846][528169] Updated weights for policy 0, policy_version 26902 (0.0007) +[2026-06-07 03:32:17,975][528169] Updated weights for policy 0, policy_version 26912 (0.0008) +[2026-06-07 03:32:18,118][528169] Updated weights for policy 0, policy_version 26923 (0.0008) +[2026-06-07 03:32:18,257][528169] Updated weights for policy 0, policy_version 26933 (0.0008) +[2026-06-07 03:32:18,390][528169] Updated weights for policy 0, policy_version 26943 (0.0008) +[2026-06-07 03:32:18,523][528169] Updated weights for policy 0, policy_version 26953 (0.0008) +[2026-06-07 03:32:19,165][528169] Updated weights for policy 0, policy_version 26965 (0.0008) +[2026-06-07 03:32:19,321][528169] Updated weights for policy 0, policy_version 26977 (0.0008) +[2026-06-07 03:32:19,465][528169] Updated weights for policy 0, policy_version 26988 (0.0008) +[2026-06-07 03:32:19,594][528169] Updated weights for policy 0, policy_version 26998 (0.0008) +[2026-06-07 03:32:19,724][528169] Updated weights for policy 0, policy_version 27008 (0.0007) +[2026-06-07 03:32:19,866][528169] Updated weights for policy 0, policy_version 27018 (0.0008) +[2026-06-07 03:32:20,103][527010] Fps is (10 sec: 26214.2, 60 sec: 24576.2, 300 sec: 24548.2). Total num frames: 13860864. Throughput: 0: 24624.4. Samples: 13835264. Policy #0 lag: (min: 63.0, avg: 76.1, max: 127.0) +[2026-06-07 03:32:20,104][527010] Avg episode reward: [(0, '92.957')] +[2026-06-07 03:32:20,473][528169] Updated weights for policy 0, policy_version 27028 (0.0008) +[2026-06-07 03:32:20,607][528169] Updated weights for policy 0, policy_version 27038 (0.0008) +[2026-06-07 03:32:20,739][528169] Updated weights for policy 0, policy_version 27048 (0.0009) +[2026-06-07 03:32:20,910][528169] Updated weights for policy 0, policy_version 27061 (0.0008) +[2026-06-07 03:32:21,066][528169] Updated weights for policy 0, policy_version 27072 (0.0008) +[2026-06-07 03:32:21,201][528169] Updated weights for policy 0, policy_version 27083 (0.0008) +[2026-06-07 03:32:21,834][528169] Updated weights for policy 0, policy_version 27093 (0.0008) +[2026-06-07 03:32:21,986][528169] Updated weights for policy 0, policy_version 27105 (0.0008) +[2026-06-07 03:32:22,117][528169] Updated weights for policy 0, policy_version 27115 (0.0008) +[2026-06-07 03:32:22,250][528169] Updated weights for policy 0, policy_version 27125 (0.0008) +[2026-06-07 03:32:22,395][528169] Updated weights for policy 0, policy_version 27136 (0.0008) +[2026-06-07 03:32:22,539][528169] Updated weights for policy 0, policy_version 27147 (0.0008) +[2026-06-07 03:32:23,176][528169] Updated weights for policy 0, policy_version 27158 (0.0008) +[2026-06-07 03:32:23,328][528169] Updated weights for policy 0, policy_version 27169 (0.0005) +[2026-06-07 03:32:23,453][528169] Updated weights for policy 0, policy_version 27179 (0.0005) +[2026-06-07 03:32:23,589][528169] Updated weights for policy 0, policy_version 27189 (0.0005) +[2026-06-07 03:32:23,718][528169] Updated weights for policy 0, policy_version 27199 (0.0006) +[2026-06-07 03:32:23,876][528169] Updated weights for policy 0, policy_version 27210 (0.0005) +[2026-06-07 03:32:24,457][528169] Updated weights for policy 0, policy_version 27221 (0.0006) +[2026-06-07 03:32:24,604][528169] Updated weights for policy 0, policy_version 27232 (0.0008) +[2026-06-07 03:32:24,746][528169] Updated weights for policy 0, policy_version 27243 (0.0009) +[2026-06-07 03:32:24,883][528169] Updated weights for policy 0, policy_version 27253 (0.0006) +[2026-06-07 03:32:25,033][528169] Updated weights for policy 0, policy_version 27264 (0.0006) +[2026-06-07 03:32:25,103][527010] Fps is (10 sec: 22938.4, 60 sec: 24576.0, 300 sec: 24437.2). Total num frames: 13959168. Throughput: 0: 24536.2. Samples: 13986048. Policy #0 lag: (min: 37.0, avg: 49.0, max: 101.0) +[2026-06-07 03:32:25,103][527010] Avg episode reward: [(0, '89.261')] +[2026-06-07 03:32:25,160][528169] Updated weights for policy 0, policy_version 27274 (0.0008) +[2026-06-07 03:32:25,776][528169] Updated weights for policy 0, policy_version 27284 (0.0008) +[2026-06-07 03:32:25,902][528169] Updated weights for policy 0, policy_version 27294 (0.0009) +[2026-06-07 03:32:26,037][528169] Updated weights for policy 0, policy_version 27304 (0.0008) +[2026-06-07 03:32:26,169][528169] Updated weights for policy 0, policy_version 27314 (0.0008) +[2026-06-07 03:32:26,300][528169] Updated weights for policy 0, policy_version 27324 (0.0008) +[2026-06-07 03:32:26,453][528169] Updated weights for policy 0, policy_version 27336 (0.0008) +[2026-06-07 03:32:27,086][528169] Updated weights for policy 0, policy_version 27346 (0.0008) +[2026-06-07 03:32:27,219][528169] Updated weights for policy 0, policy_version 27356 (0.0008) +[2026-06-07 03:32:27,354][528169] Updated weights for policy 0, policy_version 27367 (0.0008) +[2026-06-07 03:32:27,498][528169] Updated weights for policy 0, policy_version 27378 (0.0008) +[2026-06-07 03:32:27,622][528169] Updated weights for policy 0, policy_version 27388 (0.0008) +[2026-06-07 03:32:27,775][528169] Updated weights for policy 0, policy_version 27399 (0.0008) +[2026-06-07 03:32:28,411][528169] Updated weights for policy 0, policy_version 27409 (0.0008) +[2026-06-07 03:32:28,554][528169] Updated weights for policy 0, policy_version 27420 (0.0008) +[2026-06-07 03:32:28,686][528169] Updated weights for policy 0, policy_version 27430 (0.0008) +[2026-06-07 03:32:28,834][528169] Updated weights for policy 0, policy_version 27441 (0.0008) +[2026-06-07 03:32:28,968][528169] Updated weights for policy 0, policy_version 27451 (0.0008) +[2026-06-07 03:32:29,110][528169] Updated weights for policy 0, policy_version 27462 (0.0008) +[2026-06-07 03:32:29,720][528169] Updated weights for policy 0, policy_version 27473 (0.0008) +[2026-06-07 03:32:29,866][528169] Updated weights for policy 0, policy_version 27484 (0.0008) +[2026-06-07 03:32:29,999][528169] Updated weights for policy 0, policy_version 27495 (0.0008) +[2026-06-07 03:32:30,103][527010] Fps is (10 sec: 22937.6, 60 sec: 24576.0, 300 sec: 24437.1). Total num frames: 14090240. Throughput: 0: 24638.6. Samples: 14060800. Policy #0 lag: (min: 37.0, avg: 49.0, max: 101.0) +[2026-06-07 03:32:30,104][527010] Avg episode reward: [(0, '87.406')] +[2026-06-07 03:32:30,156][528169] Updated weights for policy 0, policy_version 27507 (0.0009) +[2026-06-07 03:32:30,295][528169] Updated weights for policy 0, policy_version 27517 (0.0008) +[2026-06-07 03:32:30,428][528169] Updated weights for policy 0, policy_version 27527 (0.0009) +[2026-06-07 03:32:31,051][528169] Updated weights for policy 0, policy_version 27537 (0.0008) +[2026-06-07 03:32:31,177][528169] Updated weights for policy 0, policy_version 27547 (0.0009) +[2026-06-07 03:32:31,328][528169] Updated weights for policy 0, policy_version 27559 (0.0009) +[2026-06-07 03:32:31,471][528169] Updated weights for policy 0, policy_version 27570 (0.0008) +[2026-06-07 03:32:31,602][528169] Updated weights for policy 0, policy_version 27580 (0.0008) +[2026-06-07 03:32:31,761][528169] Updated weights for policy 0, policy_version 27592 (0.0008) +[2026-06-07 03:32:32,395][528169] Updated weights for policy 0, policy_version 27602 (0.0008) +[2026-06-07 03:32:32,521][528169] Updated weights for policy 0, policy_version 27612 (0.0008) +[2026-06-07 03:32:32,666][528169] Updated weights for policy 0, policy_version 27623 (0.0008) +[2026-06-07 03:32:32,809][528169] Updated weights for policy 0, policy_version 27634 (0.0008) +[2026-06-07 03:32:32,950][528169] Updated weights for policy 0, policy_version 27645 (0.0008) +[2026-06-07 03:32:33,099][528169] Updated weights for policy 0, policy_version 27656 (0.0008) +[2026-06-07 03:32:33,736][528169] Updated weights for policy 0, policy_version 27667 (0.0008) +[2026-06-07 03:32:33,877][528169] Updated weights for policy 0, policy_version 27678 (0.0006) +[2026-06-07 03:32:34,039][528169] Updated weights for policy 0, policy_version 27691 (0.0008) +[2026-06-07 03:32:34,181][528169] Updated weights for policy 0, policy_version 27701 (0.0008) +[2026-06-07 03:32:34,339][528169] Updated weights for policy 0, policy_version 27713 (0.0008) +[2026-06-07 03:32:34,474][528169] Updated weights for policy 0, policy_version 27723 (0.0008) +[2026-06-07 03:32:35,094][528169] Updated weights for policy 0, policy_version 27733 (0.0008) +[2026-06-07 03:32:35,103][527010] Fps is (10 sec: 26213.9, 60 sec: 24576.0, 300 sec: 24548.2). Total num frames: 14221312. Throughput: 0: 24490.6. Samples: 14201344. Policy #0 lag: (min: 37.0, avg: 49.0, max: 101.0) +[2026-06-07 03:32:35,105][527010] Avg episode reward: [(0, '104.299')] +[2026-06-07 03:32:35,238][528169] Updated weights for policy 0, policy_version 27745 (0.0008) +[2026-06-07 03:32:35,389][528169] Updated weights for policy 0, policy_version 27756 (0.0008) +[2026-06-07 03:32:35,516][528169] Updated weights for policy 0, policy_version 27766 (0.0008) +[2026-06-07 03:32:35,657][528169] Updated weights for policy 0, policy_version 27776 (0.0009) +[2026-06-07 03:32:35,796][528169] Updated weights for policy 0, policy_version 27787 (0.0009) +[2026-06-07 03:32:35,855][528093] Saving results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_uniform_u1_3_fs2_seed11/checkpoint_p0/checkpoint_000027792_14254080.pth... +[2026-06-07 03:32:35,872][528093] Saving new best policy, reward=104.299! +[2026-06-07 03:32:36,444][528169] Updated weights for policy 0, policy_version 27798 (0.0009) +[2026-06-07 03:32:36,592][528169] Updated weights for policy 0, policy_version 27809 (0.0008) +[2026-06-07 03:32:36,726][528169] Updated weights for policy 0, policy_version 27820 (0.0009) +[2026-06-07 03:32:36,920][528169] Updated weights for policy 0, policy_version 27835 (0.0008) +[2026-06-07 03:32:37,064][528169] Updated weights for policy 0, policy_version 27847 (0.0008) +[2026-06-07 03:32:37,741][528169] Updated weights for policy 0, policy_version 27859 (0.0008) +[2026-06-07 03:32:37,868][528169] Updated weights for policy 0, policy_version 27869 (0.0008) +[2026-06-07 03:32:38,000][528169] Updated weights for policy 0, policy_version 27879 (0.0008) +[2026-06-07 03:32:38,136][528169] Updated weights for policy 0, policy_version 27890 (0.0008) +[2026-06-07 03:32:38,270][528169] Updated weights for policy 0, policy_version 27900 (0.0008) +[2026-06-07 03:32:38,404][528169] Updated weights for policy 0, policy_version 27910 (0.0008) +[2026-06-07 03:32:39,108][528169] Updated weights for policy 0, policy_version 27923 (0.0008) +[2026-06-07 03:32:39,246][528169] Updated weights for policy 0, policy_version 27934 (0.0008) +[2026-06-07 03:32:39,374][528169] Updated weights for policy 0, policy_version 27944 (0.0008) +[2026-06-07 03:32:39,503][528169] Updated weights for policy 0, policy_version 27954 (0.0008) +[2026-06-07 03:32:39,640][528169] Updated weights for policy 0, policy_version 27964 (0.0008) +[2026-06-07 03:32:39,767][528169] Updated weights for policy 0, policy_version 27974 (0.0008) +[2026-06-07 03:32:39,901][528169] Updated weights for policy 0, policy_version 27984 (0.0008) +[2026-06-07 03:32:40,103][527010] Fps is (10 sec: 26214.7, 60 sec: 24576.0, 300 sec: 24548.3). Total num frames: 14352384. Throughput: 0: 24652.9. Samples: 14356224. Policy #0 lag: (min: 37.0, avg: 49.0, max: 101.0) +[2026-06-07 03:32:40,104][527010] Avg episode reward: [(0, '99.838')] +[2026-06-07 03:32:40,522][528169] Updated weights for policy 0, policy_version 27994 (0.0008) +[2026-06-07 03:32:40,651][528169] Updated weights for policy 0, policy_version 28004 (0.0008) +[2026-06-07 03:32:40,793][528169] Updated weights for policy 0, policy_version 28015 (0.0008) +[2026-06-07 03:32:40,932][528169] Updated weights for policy 0, policy_version 28025 (0.0008) +[2026-06-07 03:32:41,087][528169] Updated weights for policy 0, policy_version 28037 (0.0008) +[2026-06-07 03:32:41,214][528169] Updated weights for policy 0, policy_version 28047 (0.0008) +[2026-06-07 03:32:41,860][528169] Updated weights for policy 0, policy_version 28058 (0.0008) +[2026-06-07 03:32:41,987][528169] Updated weights for policy 0, policy_version 28068 (0.0008) +[2026-06-07 03:32:42,126][528169] Updated weights for policy 0, policy_version 28078 (0.0008) +[2026-06-07 03:32:42,265][528169] Updated weights for policy 0, policy_version 28089 (0.0008) +[2026-06-07 03:32:42,412][528169] Updated weights for policy 0, policy_version 28100 (0.0008) +[2026-06-07 03:32:42,549][528169] Updated weights for policy 0, policy_version 28110 (0.0008) +[2026-06-07 03:32:43,193][528169] Updated weights for policy 0, policy_version 28121 (0.0008) +[2026-06-07 03:32:43,330][528169] Updated weights for policy 0, policy_version 28131 (0.0008) +[2026-06-07 03:32:43,457][528169] Updated weights for policy 0, policy_version 28141 (0.0008) +[2026-06-07 03:32:43,621][528169] Updated weights for policy 0, policy_version 28153 (0.0008) +[2026-06-07 03:32:43,756][528169] Updated weights for policy 0, policy_version 28163 (0.0009) +[2026-06-07 03:32:43,893][528169] Updated weights for policy 0, policy_version 28173 (0.0008) +[2026-06-07 03:32:44,517][528169] Updated weights for policy 0, policy_version 28184 (0.0008) +[2026-06-07 03:32:44,652][528169] Updated weights for policy 0, policy_version 28195 (0.0008) +[2026-06-07 03:32:44,808][528169] Updated weights for policy 0, policy_version 28207 (0.0008) +[2026-06-07 03:32:44,952][528169] Updated weights for policy 0, policy_version 28218 (0.0008) +[2026-06-07 03:32:45,103][527010] Fps is (10 sec: 22937.7, 60 sec: 24576.0, 300 sec: 24437.1). Total num frames: 14450688. Throughput: 0: 24627.2. Samples: 14425600. Policy #0 lag: (min: 63.0, avg: 76.4, max: 127.0) +[2026-06-07 03:32:45,104][527010] Avg episode reward: [(0, '104.832')] +[2026-06-07 03:32:45,111][528169] Updated weights for policy 0, policy_version 28230 (0.0008) +[2026-06-07 03:32:45,238][528093] Saving new best policy, reward=104.832! +[2026-06-07 03:32:45,243][528169] Updated weights for policy 0, policy_version 28240 (0.0008) +[2026-06-07 03:32:45,873][528169] Updated weights for policy 0, policy_version 28253 (0.0009) +[2026-06-07 03:32:46,006][528169] Updated weights for policy 0, policy_version 28263 (0.0008) +[2026-06-07 03:32:46,144][528169] Updated weights for policy 0, policy_version 28273 (0.0008) +[2026-06-07 03:32:46,290][528169] Updated weights for policy 0, policy_version 28284 (0.0008) +[2026-06-07 03:32:46,445][528169] Updated weights for policy 0, policy_version 28296 (0.0008) +[2026-06-07 03:32:47,087][528169] Updated weights for policy 0, policy_version 28307 (0.0008) +[2026-06-07 03:32:47,227][528169] Updated weights for policy 0, policy_version 28318 (0.0008) +[2026-06-07 03:32:47,358][528169] Updated weights for policy 0, policy_version 28328 (0.0008) +[2026-06-07 03:32:47,491][528169] Updated weights for policy 0, policy_version 28338 (0.0009) +[2026-06-07 03:32:47,621][528169] Updated weights for policy 0, policy_version 28348 (0.0008) +[2026-06-07 03:32:47,782][528169] Updated weights for policy 0, policy_version 28360 (0.0008) +[2026-06-07 03:32:48,422][528169] Updated weights for policy 0, policy_version 28370 (0.0008) +[2026-06-07 03:32:48,560][528169] Updated weights for policy 0, policy_version 28381 (0.0008) +[2026-06-07 03:32:48,708][528169] Updated weights for policy 0, policy_version 28392 (0.0008) +[2026-06-07 03:32:48,856][528169] Updated weights for policy 0, policy_version 28403 (0.0008) +[2026-06-07 03:32:48,999][528169] Updated weights for policy 0, policy_version 28414 (0.0008) +[2026-06-07 03:32:49,133][528169] Updated weights for policy 0, policy_version 28424 (0.0008) +[2026-06-07 03:32:49,786][528169] Updated weights for policy 0, policy_version 28434 (0.0008) +[2026-06-07 03:32:49,934][528169] Updated weights for policy 0, policy_version 28445 (0.0008) +[2026-06-07 03:32:50,072][528169] Updated weights for policy 0, policy_version 28456 (0.0008) +[2026-06-07 03:32:50,103][527010] Fps is (10 sec: 22937.4, 60 sec: 24576.0, 300 sec: 24437.1). Total num frames: 14581760. Throughput: 0: 24547.7. Samples: 14576256. Policy #0 lag: (min: 63.0, avg: 76.4, max: 127.0) +[2026-06-07 03:32:50,104][527010] Avg episode reward: [(0, '79.835')] +[2026-06-07 03:32:50,215][528169] Updated weights for policy 0, policy_version 28467 (0.0009) +[2026-06-07 03:32:50,346][528169] Updated weights for policy 0, policy_version 28477 (0.0008) +[2026-06-07 03:32:50,493][528169] Updated weights for policy 0, policy_version 28488 (0.0009) +[2026-06-07 03:32:51,131][528169] Updated weights for policy 0, policy_version 28498 (0.0008) +[2026-06-07 03:32:51,276][528169] Updated weights for policy 0, policy_version 28509 (0.0008) +[2026-06-07 03:32:51,404][528169] Updated weights for policy 0, policy_version 28519 (0.0008) +[2026-06-07 03:32:51,550][528169] Updated weights for policy 0, policy_version 28530 (0.0009) +[2026-06-07 03:32:51,698][528169] Updated weights for policy 0, policy_version 28541 (0.0009) +[2026-06-07 03:32:51,858][528169] Updated weights for policy 0, policy_version 28553 (0.0008) +[2026-06-07 03:32:52,488][528169] Updated weights for policy 0, policy_version 28564 (0.0008) +[2026-06-07 03:32:52,613][528169] Updated weights for policy 0, policy_version 28574 (0.0008) +[2026-06-07 03:32:52,774][528169] Updated weights for policy 0, policy_version 28586 (0.0009) +[2026-06-07 03:32:52,906][528169] Updated weights for policy 0, policy_version 28596 (0.0008) +[2026-06-07 03:32:53,049][528169] Updated weights for policy 0, policy_version 28607 (0.0008) +[2026-06-07 03:32:53,193][528169] Updated weights for policy 0, policy_version 28618 (0.0008) +[2026-06-07 03:32:53,833][528169] Updated weights for policy 0, policy_version 28629 (0.0008) +[2026-06-07 03:32:53,990][528169] Updated weights for policy 0, policy_version 28641 (0.0008) +[2026-06-07 03:32:54,137][528169] Updated weights for policy 0, policy_version 28653 (0.0008) +[2026-06-07 03:32:54,271][528169] Updated weights for policy 0, policy_version 28663 (0.0008) +[2026-06-07 03:32:54,428][528169] Updated weights for policy 0, policy_version 28674 (0.0008) +[2026-06-07 03:32:54,565][528169] Updated weights for policy 0, policy_version 28685 (0.0008) +[2026-06-07 03:32:55,103][527010] Fps is (10 sec: 26214.4, 60 sec: 24576.0, 300 sec: 24548.2). Total num frames: 14712832. Throughput: 0: 24647.1. Samples: 14720000. Policy #0 lag: (min: 63.0, avg: 76.4, max: 127.0) +[2026-06-07 03:32:55,104][527010] Avg episode reward: [(0, '82.990')] +[2026-06-07 03:32:55,235][528169] Updated weights for policy 0, policy_version 28698 (0.0008) +[2026-06-07 03:32:55,372][528169] Updated weights for policy 0, policy_version 28709 (0.0008) +[2026-06-07 03:32:55,527][528169] Updated weights for policy 0, policy_version 28720 (0.0008) +[2026-06-07 03:32:55,668][528169] Updated weights for policy 0, policy_version 28731 (0.0008) +[2026-06-07 03:32:55,824][528169] Updated weights for policy 0, policy_version 28743 (0.0008) +[2026-06-07 03:32:56,426][528169] Updated weights for policy 0, policy_version 28753 (0.0008) +[2026-06-07 03:32:56,555][528169] Updated weights for policy 0, policy_version 28763 (0.0004) +[2026-06-07 03:32:56,697][528169] Updated weights for policy 0, policy_version 28774 (0.0004) +[2026-06-07 03:32:56,828][528169] Updated weights for policy 0, policy_version 28784 (0.0004) +[2026-06-07 03:32:56,976][528169] Updated weights for policy 0, policy_version 28795 (0.0006) +[2026-06-07 03:32:57,116][528169] Updated weights for policy 0, policy_version 28805 (0.0008) +[2026-06-07 03:32:57,249][528169] Updated weights for policy 0, policy_version 28816 (0.0008) +[2026-06-07 03:32:57,868][528169] Updated weights for policy 0, policy_version 28827 (0.0008) +[2026-06-07 03:32:58,001][528169] Updated weights for policy 0, policy_version 28838 (0.0008) +[2026-06-07 03:32:58,140][528169] Updated weights for policy 0, policy_version 28848 (0.0008) +[2026-06-07 03:32:58,284][528169] Updated weights for policy 0, policy_version 28859 (0.0008) +[2026-06-07 03:32:58,412][528169] Updated weights for policy 0, policy_version 28869 (0.0008) +[2026-06-07 03:32:58,542][528169] Updated weights for policy 0, policy_version 28879 (0.0008) +[2026-06-07 03:32:59,203][528169] Updated weights for policy 0, policy_version 28889 (0.0008) +[2026-06-07 03:32:59,345][528169] Updated weights for policy 0, policy_version 28900 (0.0008) +[2026-06-07 03:32:59,477][528169] Updated weights for policy 0, policy_version 28910 (0.0008) +[2026-06-07 03:32:59,636][528169] Updated weights for policy 0, policy_version 28922 (0.0008) +[2026-06-07 03:32:59,773][528169] Updated weights for policy 0, policy_version 28933 (0.0008) +[2026-06-07 03:32:59,914][528169] Updated weights for policy 0, policy_version 28943 (0.0008) +[2026-06-07 03:33:00,103][527010] Fps is (10 sec: 26214.6, 60 sec: 24576.0, 300 sec: 24548.2). Total num frames: 14843904. Throughput: 0: 24516.4. Samples: 14791296. Policy #0 lag: (min: 63.0, avg: 76.4, max: 127.0) +[2026-06-07 03:33:00,104][527010] Avg episode reward: [(0, '102.207')] +[2026-06-07 03:33:00,526][528169] Updated weights for policy 0, policy_version 28953 (0.0008) +[2026-06-07 03:33:00,669][528169] Updated weights for policy 0, policy_version 28964 (0.0008) +[2026-06-07 03:33:00,808][528169] Updated weights for policy 0, policy_version 28975 (0.0009) +[2026-06-07 03:33:00,949][528169] Updated weights for policy 0, policy_version 28985 (0.0008) +[2026-06-07 03:33:01,076][528169] Updated weights for policy 0, policy_version 28995 (0.0008) +[2026-06-07 03:33:01,230][528169] Updated weights for policy 0, policy_version 29006 (0.0008) +[2026-06-07 03:33:01,858][528169] Updated weights for policy 0, policy_version 29016 (0.0008) +[2026-06-07 03:33:02,023][528169] Updated weights for policy 0, policy_version 29028 (0.0008) +[2026-06-07 03:33:02,152][528169] Updated weights for policy 0, policy_version 29038 (0.0008) +[2026-06-07 03:33:02,281][528169] Updated weights for policy 0, policy_version 29048 (0.0008) +[2026-06-07 03:33:02,423][528169] Updated weights for policy 0, policy_version 29059 (0.0008) +[2026-06-07 03:33:02,561][528169] Updated weights for policy 0, policy_version 29069 (0.0008) +[2026-06-07 03:33:03,178][528169] Updated weights for policy 0, policy_version 29079 (0.0008) +[2026-06-07 03:33:03,332][528169] Updated weights for policy 0, policy_version 29091 (0.0008) +[2026-06-07 03:33:03,497][528169] Updated weights for policy 0, policy_version 29103 (0.0008) +[2026-06-07 03:33:03,627][528169] Updated weights for policy 0, policy_version 29113 (0.0008) +[2026-06-07 03:33:03,763][528169] Updated weights for policy 0, policy_version 29123 (0.0008) +[2026-06-07 03:33:03,905][528169] Updated weights for policy 0, policy_version 29134 (0.0008) +[2026-06-07 03:33:04,565][528169] Updated weights for policy 0, policy_version 29145 (0.0008) +[2026-06-07 03:33:04,701][528169] Updated weights for policy 0, policy_version 29156 (0.0008) +[2026-06-07 03:33:04,865][528169] Updated weights for policy 0, policy_version 29168 (0.0008) +[2026-06-07 03:33:04,995][528169] Updated weights for policy 0, policy_version 29178 (0.0008) +[2026-06-07 03:33:05,103][527010] Fps is (10 sec: 22937.6, 60 sec: 24576.1, 300 sec: 24437.1). Total num frames: 14942208. Throughput: 0: 24701.2. Samples: 14946816. Policy #0 lag: (min: 63.0, avg: 75.6, max: 127.0) +[2026-06-07 03:33:05,104][527010] Avg episode reward: [(0, '94.203')] +[2026-06-07 03:33:05,153][528169] Updated weights for policy 0, policy_version 29189 (0.0008) +[2026-06-07 03:33:05,280][528169] Updated weights for policy 0, policy_version 29199 (0.0008) +[2026-06-07 03:33:05,894][528169] Updated weights for policy 0, policy_version 29209 (0.0004) +[2026-06-07 03:33:06,030][528169] Updated weights for policy 0, policy_version 29219 (0.0004) +[2026-06-07 03:33:06,167][528169] Updated weights for policy 0, policy_version 29229 (0.0007) +[2026-06-07 03:33:06,314][528169] Updated weights for policy 0, policy_version 29240 (0.0008) +[2026-06-07 03:33:06,447][528169] Updated weights for policy 0, policy_version 29250 (0.0008) +[2026-06-07 03:33:06,583][528169] Updated weights for policy 0, policy_version 29260 (0.0008) +[2026-06-07 03:33:07,185][528169] Updated weights for policy 0, policy_version 29270 (0.0008) +[2026-06-07 03:33:07,321][528169] Updated weights for policy 0, policy_version 29281 (0.0008) +[2026-06-07 03:33:07,451][528169] Updated weights for policy 0, policy_version 29291 (0.0008) +[2026-06-07 03:33:07,582][528169] Updated weights for policy 0, policy_version 29301 (0.0009) +[2026-06-07 03:33:07,725][528169] Updated weights for policy 0, policy_version 29311 (0.0008) +[2026-06-07 03:33:07,882][528169] Updated weights for policy 0, policy_version 29323 (0.0008) +[2026-06-07 03:33:08,521][528169] Updated weights for policy 0, policy_version 29333 (0.0008) +[2026-06-07 03:33:08,653][528169] Updated weights for policy 0, policy_version 29343 (0.0008) +[2026-06-07 03:33:08,784][528169] Updated weights for policy 0, policy_version 29353 (0.0008) +[2026-06-07 03:33:08,923][528169] Updated weights for policy 0, policy_version 29364 (0.0008) +[2026-06-07 03:33:09,075][528169] Updated weights for policy 0, policy_version 29375 (0.0008) +[2026-06-07 03:33:09,201][528169] Updated weights for policy 0, policy_version 29385 (0.0008) +[2026-06-07 03:33:09,857][528169] Updated weights for policy 0, policy_version 29395 (0.0009) +[2026-06-07 03:33:09,981][528169] Updated weights for policy 0, policy_version 29405 (0.0008) +[2026-06-07 03:33:10,103][527010] Fps is (10 sec: 22937.6, 60 sec: 24576.0, 300 sec: 24548.2). Total num frames: 15073280. Throughput: 0: 24485.0. Samples: 15087872. Policy #0 lag: (min: 63.0, avg: 75.6, max: 127.0) +[2026-06-07 03:33:10,103][527010] Avg episode reward: [(0, '104.399')] +[2026-06-07 03:33:10,110][528169] Updated weights for policy 0, policy_version 29415 (0.0008) +[2026-06-07 03:33:10,237][528169] Updated weights for policy 0, policy_version 29425 (0.0008) +[2026-06-07 03:33:10,400][528169] Updated weights for policy 0, policy_version 29437 (0.0008) +[2026-06-07 03:33:10,547][528169] Updated weights for policy 0, policy_version 29448 (0.0008) +[2026-06-07 03:33:11,155][528169] Updated weights for policy 0, policy_version 29458 (0.0008) +[2026-06-07 03:33:11,309][528169] Updated weights for policy 0, policy_version 29470 (0.0007) +[2026-06-07 03:33:11,457][528169] Updated weights for policy 0, policy_version 29481 (0.0007) +[2026-06-07 03:33:11,611][528169] Updated weights for policy 0, policy_version 29493 (0.0009) +[2026-06-07 03:33:11,757][528169] Updated weights for policy 0, policy_version 29504 (0.0008) +[2026-06-07 03:33:11,905][528169] Updated weights for policy 0, policy_version 29515 (0.0008) +[2026-06-07 03:33:12,543][528169] Updated weights for policy 0, policy_version 29525 (0.0008) +[2026-06-07 03:33:12,685][528169] Updated weights for policy 0, policy_version 29536 (0.0008) +[2026-06-07 03:33:12,815][528169] Updated weights for policy 0, policy_version 29546 (0.0008) +[2026-06-07 03:33:12,970][528169] Updated weights for policy 0, policy_version 29558 (0.0008) +[2026-06-07 03:33:13,135][528169] Updated weights for policy 0, policy_version 29570 (0.0008) +[2026-06-07 03:33:13,281][528169] Updated weights for policy 0, policy_version 29581 (0.0008) +[2026-06-07 03:33:13,920][528169] Updated weights for policy 0, policy_version 29592 (0.0009) +[2026-06-07 03:33:14,059][528169] Updated weights for policy 0, policy_version 29603 (0.0008) +[2026-06-07 03:33:14,189][528169] Updated weights for policy 0, policy_version 29613 (0.0008) +[2026-06-07 03:33:14,326][528169] Updated weights for policy 0, policy_version 29623 (0.0008) +[2026-06-07 03:33:14,473][528169] Updated weights for policy 0, policy_version 29634 (0.0009) +[2026-06-07 03:33:14,608][528169] Updated weights for policy 0, policy_version 29644 (0.0008) +[2026-06-07 03:33:15,103][527010] Fps is (10 sec: 26214.9, 60 sec: 24576.2, 300 sec: 24548.2). Total num frames: 15204352. Throughput: 0: 24564.7. Samples: 15166208. Policy #0 lag: (min: 63.0, avg: 75.6, max: 127.0) +[2026-06-07 03:33:15,103][527010] Avg episode reward: [(0, '90.437')] +[2026-06-07 03:33:15,221][528169] Updated weights for policy 0, policy_version 29654 (0.0008) +[2026-06-07 03:33:15,374][528169] Updated weights for policy 0, policy_version 29666 (0.0009) +[2026-06-07 03:33:15,514][528169] Updated weights for policy 0, policy_version 29676 (0.0008) +[2026-06-07 03:33:15,663][528169] Updated weights for policy 0, policy_version 29687 (0.0009) +[2026-06-07 03:33:15,814][528169] Updated weights for policy 0, policy_version 29698 (0.0008) +[2026-06-07 03:33:15,954][528169] Updated weights for policy 0, policy_version 29708 (0.0008) +[2026-06-07 03:33:16,607][528169] Updated weights for policy 0, policy_version 29721 (0.0008) +[2026-06-07 03:33:16,738][528169] Updated weights for policy 0, policy_version 29731 (0.0008) +[2026-06-07 03:33:16,868][528169] Updated weights for policy 0, policy_version 29742 (0.0008) +[2026-06-07 03:33:17,020][528169] Updated weights for policy 0, policy_version 29753 (0.0008) +[2026-06-07 03:33:17,149][528169] Updated weights for policy 0, policy_version 29763 (0.0008) +[2026-06-07 03:33:17,283][528169] Updated weights for policy 0, policy_version 29773 (0.0008) +[2026-06-07 03:33:17,901][528169] Updated weights for policy 0, policy_version 29784 (0.0009) +[2026-06-07 03:33:18,037][528169] Updated weights for policy 0, policy_version 29795 (0.0008) +[2026-06-07 03:33:18,187][528169] Updated weights for policy 0, policy_version 29806 (0.0009) +[2026-06-07 03:33:18,314][528169] Updated weights for policy 0, policy_version 29816 (0.0009) +[2026-06-07 03:33:18,461][528169] Updated weights for policy 0, policy_version 29827 (0.0009) +[2026-06-07 03:33:18,608][528169] Updated weights for policy 0, policy_version 29838 (0.0008) +[2026-06-07 03:33:19,251][528169] Updated weights for policy 0, policy_version 29850 (0.0008) +[2026-06-07 03:33:19,379][528169] Updated weights for policy 0, policy_version 29860 (0.0008) +[2026-06-07 03:33:19,511][528169] Updated weights for policy 0, policy_version 29870 (0.0008) +[2026-06-07 03:33:19,657][528169] Updated weights for policy 0, policy_version 29881 (0.0008) +[2026-06-07 03:33:19,827][528169] Updated weights for policy 0, policy_version 29894 (0.0008) +[2026-06-07 03:33:19,951][528169] Updated weights for policy 0, policy_version 29904 (0.0008) +[2026-06-07 03:33:20,103][527010] Fps is (10 sec: 26214.0, 60 sec: 24576.0, 300 sec: 24548.2). Total num frames: 15335424. Throughput: 0: 24706.8. Samples: 15313152. Policy #0 lag: (min: 63.0, avg: 75.6, max: 127.0) +[2026-06-07 03:33:20,104][527010] Avg episode reward: [(0, '85.996')] +[2026-06-07 03:33:20,601][528169] Updated weights for policy 0, policy_version 29914 (0.0008) +[2026-06-07 03:33:20,731][528169] Updated weights for policy 0, policy_version 29924 (0.0008) +[2026-06-07 03:33:20,883][528169] Updated weights for policy 0, policy_version 29935 (0.0008) +[2026-06-07 03:33:21,020][528169] Updated weights for policy 0, policy_version 29945 (0.0008) +[2026-06-07 03:33:21,171][528169] Updated weights for policy 0, policy_version 29956 (0.0008) +[2026-06-07 03:33:21,318][528169] Updated weights for policy 0, policy_version 29967 (0.0008) +[2026-06-07 03:33:21,935][528169] Updated weights for policy 0, policy_version 29978 (0.0008) +[2026-06-07 03:33:22,064][528169] Updated weights for policy 0, policy_version 29988 (0.0008) +[2026-06-07 03:33:22,193][528169] Updated weights for policy 0, policy_version 29998 (0.0008) +[2026-06-07 03:33:22,329][528169] Updated weights for policy 0, policy_version 30008 (0.0008) +[2026-06-07 03:33:22,478][528169] Updated weights for policy 0, policy_version 30019 (0.0009) +[2026-06-07 03:33:22,606][528169] Updated weights for policy 0, policy_version 30029 (0.0008) +[2026-06-07 03:33:23,225][528169] Updated weights for policy 0, policy_version 30039 (0.0008) +[2026-06-07 03:33:23,354][528169] Updated weights for policy 0, policy_version 30049 (0.0008) +[2026-06-07 03:33:23,495][528169] Updated weights for policy 0, policy_version 30060 (0.0008) +[2026-06-07 03:33:23,643][528169] Updated weights for policy 0, policy_version 30071 (0.0008) +[2026-06-07 03:33:23,785][528169] Updated weights for policy 0, policy_version 30082 (0.0008) +[2026-06-07 03:33:23,927][528169] Updated weights for policy 0, policy_version 30092 (0.0008) +[2026-06-07 03:33:24,531][528169] Updated weights for policy 0, policy_version 30103 (0.0008) +[2026-06-07 03:33:24,677][528169] Updated weights for policy 0, policy_version 30114 (0.0008) +[2026-06-07 03:33:24,807][528169] Updated weights for policy 0, policy_version 30124 (0.0008) +[2026-06-07 03:33:24,940][528169] Updated weights for policy 0, policy_version 30134 (0.0008) +[2026-06-07 03:33:25,066][528169] Updated weights for policy 0, policy_version 30144 (0.0008) +[2026-06-07 03:33:25,103][527010] Fps is (10 sec: 22937.4, 60 sec: 24576.0, 300 sec: 24437.2). Total num frames: 15433728. Throughput: 0: 24576.0. Samples: 15462144. Policy #0 lag: (min: 63.0, avg: 76.8, max: 127.0) +[2026-06-07 03:33:25,103][527010] Avg episode reward: [(0, '112.092')] +[2026-06-07 03:33:25,233][528169] Updated weights for policy 0, policy_version 30156 (0.0008) +[2026-06-07 03:33:25,275][528093] Saving new best policy, reward=112.092! +[2026-06-07 03:33:25,851][528169] Updated weights for policy 0, policy_version 30167 (0.0009) +[2026-06-07 03:33:25,982][528169] Updated weights for policy 0, policy_version 30177 (0.0008) +[2026-06-07 03:33:26,113][528169] Updated weights for policy 0, policy_version 30187 (0.0008) +[2026-06-07 03:33:26,238][528169] Updated weights for policy 0, policy_version 30197 (0.0008) +[2026-06-07 03:33:26,376][528169] Updated weights for policy 0, policy_version 30207 (0.0008) +[2026-06-07 03:33:26,524][528169] Updated weights for policy 0, policy_version 30218 (0.0008) +[2026-06-07 03:33:27,191][528169] Updated weights for policy 0, policy_version 30229 (0.0008) +[2026-06-07 03:33:27,335][528169] Updated weights for policy 0, policy_version 30240 (0.0008) +[2026-06-07 03:33:27,475][528169] Updated weights for policy 0, policy_version 30251 (0.0008) +[2026-06-07 03:33:27,656][528169] Updated weights for policy 0, policy_version 30264 (0.0008) +[2026-06-07 03:33:27,786][528169] Updated weights for policy 0, policy_version 30274 (0.0008) +[2026-06-07 03:33:27,924][528169] Updated weights for policy 0, policy_version 30284 (0.0008) +[2026-06-07 03:33:28,543][528169] Updated weights for policy 0, policy_version 30294 (0.0008) +[2026-06-07 03:33:28,680][528169] Updated weights for policy 0, policy_version 30305 (0.0008) +[2026-06-07 03:33:28,819][528169] Updated weights for policy 0, policy_version 30315 (0.0008) +[2026-06-07 03:33:28,966][528169] Updated weights for policy 0, policy_version 30326 (0.0008) +[2026-06-07 03:33:29,103][528169] Updated weights for policy 0, policy_version 30336 (0.0008) +[2026-06-07 03:33:29,233][528169] Updated weights for policy 0, policy_version 30346 (0.0008) +[2026-06-07 03:33:29,861][528169] Updated weights for policy 0, policy_version 30358 (0.0008) +[2026-06-07 03:33:29,983][528169] Updated weights for policy 0, policy_version 30368 (0.0007) +[2026-06-07 03:33:30,103][527010] Fps is (10 sec: 22937.5, 60 sec: 24576.0, 300 sec: 24548.2). Total num frames: 15564800. Throughput: 0: 24726.7. Samples: 15538304. Policy #0 lag: (min: 63.0, avg: 76.8, max: 127.0) +[2026-06-07 03:33:30,104][527010] Avg episode reward: [(0, '109.577')] +[2026-06-07 03:33:30,127][528169] Updated weights for policy 0, policy_version 30379 (0.0009) +[2026-06-07 03:33:30,261][528169] Updated weights for policy 0, policy_version 30389 (0.0008) +[2026-06-07 03:33:30,413][528169] Updated weights for policy 0, policy_version 30400 (0.0010) +[2026-06-07 03:33:30,559][528169] Updated weights for policy 0, policy_version 30411 (0.0009) +[2026-06-07 03:33:31,178][528169] Updated weights for policy 0, policy_version 30422 (0.0008) +[2026-06-07 03:33:31,319][528169] Updated weights for policy 0, policy_version 30433 (0.0008) +[2026-06-07 03:33:31,454][528169] Updated weights for policy 0, policy_version 30443 (0.0008) +[2026-06-07 03:33:31,589][528169] Updated weights for policy 0, policy_version 30454 (0.0008) +[2026-06-07 03:33:31,742][528169] Updated weights for policy 0, policy_version 30465 (0.0008) +[2026-06-07 03:33:31,886][528169] Updated weights for policy 0, policy_version 30476 (0.0008) +[2026-06-07 03:33:32,497][528169] Updated weights for policy 0, policy_version 30486 (0.0008) +[2026-06-07 03:33:32,623][528169] Updated weights for policy 0, policy_version 30496 (0.0008) +[2026-06-07 03:33:32,759][528169] Updated weights for policy 0, policy_version 30506 (0.0008) +[2026-06-07 03:33:32,892][528169] Updated weights for policy 0, policy_version 30516 (0.0008) +[2026-06-07 03:33:33,022][528169] Updated weights for policy 0, policy_version 30526 (0.0008) +[2026-06-07 03:33:33,167][528169] Updated weights for policy 0, policy_version 30537 (0.0008) +[2026-06-07 03:33:33,797][528169] Updated weights for policy 0, policy_version 30547 (0.0008) +[2026-06-07 03:33:33,935][528169] Updated weights for policy 0, policy_version 30558 (0.0008) +[2026-06-07 03:33:34,068][528169] Updated weights for policy 0, policy_version 30568 (0.0008) +[2026-06-07 03:33:34,194][528169] Updated weights for policy 0, policy_version 30578 (0.0009) +[2026-06-07 03:33:34,326][528169] Updated weights for policy 0, policy_version 30588 (0.0008) +[2026-06-07 03:33:34,463][528169] Updated weights for policy 0, policy_version 30598 (0.0008) +[2026-06-07 03:33:35,103][527010] Fps is (10 sec: 26214.3, 60 sec: 24576.0, 300 sec: 24548.2). Total num frames: 15695872. Throughput: 0: 24504.9. Samples: 15678976. Policy #0 lag: (min: 63.0, avg: 76.8, max: 127.0) +[2026-06-07 03:33:35,105][527010] Avg episode reward: [(0, '97.076')] +[2026-06-07 03:33:35,119][528169] Updated weights for policy 0, policy_version 30611 (0.0008) +[2026-06-07 03:33:35,254][528169] Updated weights for policy 0, policy_version 30621 (0.0008) +[2026-06-07 03:33:35,397][528169] Updated weights for policy 0, policy_version 30632 (0.0008) +[2026-06-07 03:33:35,541][528169] Updated weights for policy 0, policy_version 30642 (0.0008) +[2026-06-07 03:33:35,670][528169] Updated weights for policy 0, policy_version 30652 (0.0008) +[2026-06-07 03:33:35,809][528169] Updated weights for policy 0, policy_version 30662 (0.0008) +[2026-06-07 03:33:36,403][528169] Updated weights for policy 0, policy_version 30673 (0.0008) +[2026-06-07 03:33:36,556][528169] Updated weights for policy 0, policy_version 30685 (0.0008) +[2026-06-07 03:33:36,704][528169] Updated weights for policy 0, policy_version 30696 (0.0008) +[2026-06-07 03:33:36,840][528169] Updated weights for policy 0, policy_version 30706 (0.0008) +[2026-06-07 03:33:36,982][528169] Updated weights for policy 0, policy_version 30717 (0.0008) +[2026-06-07 03:33:37,118][528169] Updated weights for policy 0, policy_version 30727 (0.0008) +[2026-06-07 03:33:37,747][528169] Updated weights for policy 0, policy_version 30737 (0.0008) +[2026-06-07 03:33:37,869][528169] Updated weights for policy 0, policy_version 30747 (0.0008) +[2026-06-07 03:33:38,002][528169] Updated weights for policy 0, policy_version 30757 (0.0009) +[2026-06-07 03:33:38,145][528169] Updated weights for policy 0, policy_version 30768 (0.0008) +[2026-06-07 03:33:38,273][528169] Updated weights for policy 0, policy_version 30778 (0.0008) +[2026-06-07 03:33:38,408][528169] Updated weights for policy 0, policy_version 30788 (0.0008) +[2026-06-07 03:33:38,540][528169] Updated weights for policy 0, policy_version 30798 (0.0008) +[2026-06-07 03:33:39,194][528169] Updated weights for policy 0, policy_version 30810 (0.0008) +[2026-06-07 03:33:39,327][528169] Updated weights for policy 0, policy_version 30820 (0.0008) +[2026-06-07 03:33:39,456][528169] Updated weights for policy 0, policy_version 30830 (0.0008) +[2026-06-07 03:33:39,593][528169] Updated weights for policy 0, policy_version 30840 (0.0008) +[2026-06-07 03:33:39,746][528169] Updated weights for policy 0, policy_version 30851 (0.0008) +[2026-06-07 03:33:39,874][528169] Updated weights for policy 0, policy_version 30861 (0.0008) +[2026-06-07 03:33:40,103][527010] Fps is (10 sec: 26215.0, 60 sec: 24576.0, 300 sec: 24548.2). Total num frames: 15826944. Throughput: 0: 24761.0. Samples: 15834240. Policy #0 lag: (min: 63.0, avg: 76.8, max: 127.0) +[2026-06-07 03:33:40,104][527010] Avg episode reward: [(0, '96.851')] +[2026-06-07 03:33:40,486][528169] Updated weights for policy 0, policy_version 30872 (0.0008) +[2026-06-07 03:33:40,617][528169] Updated weights for policy 0, policy_version 30882 (0.0008) +[2026-06-07 03:33:40,778][528169] Updated weights for policy 0, policy_version 30894 (0.0008) +[2026-06-07 03:33:40,925][528169] Updated weights for policy 0, policy_version 30905 (0.0008) +[2026-06-07 03:33:41,083][528169] Updated weights for policy 0, policy_version 30917 (0.0008) +[2026-06-07 03:33:41,728][528169] Updated weights for policy 0, policy_version 30929 (0.0008) +[2026-06-07 03:33:41,854][528169] Updated weights for policy 0, policy_version 30939 (0.0008) +[2026-06-07 03:33:42,003][528169] Updated weights for policy 0, policy_version 30950 (0.0008) +[2026-06-07 03:33:42,135][528169] Updated weights for policy 0, policy_version 30960 (0.0008) +[2026-06-07 03:33:42,266][528169] Updated weights for policy 0, policy_version 30970 (0.0008) +[2026-06-07 03:33:42,398][528169] Updated weights for policy 0, policy_version 30980 (0.0008) +[2026-06-07 03:33:42,529][528169] Updated weights for policy 0, policy_version 30990 (0.0008) +[2026-06-07 03:33:43,135][528169] Updated weights for policy 0, policy_version 31001 (0.0008) +[2026-06-07 03:33:43,302][528169] Updated weights for policy 0, policy_version 31014 (0.0008) +[2026-06-07 03:33:43,429][528169] Updated weights for policy 0, policy_version 31024 (0.0008) +[2026-06-07 03:33:43,567][528169] Updated weights for policy 0, policy_version 31034 (0.0008) +[2026-06-07 03:33:43,699][528169] Updated weights for policy 0, policy_version 31044 (0.0008) +[2026-06-07 03:33:43,856][528169] Updated weights for policy 0, policy_version 31055 (0.0008) +[2026-06-07 03:33:44,492][528169] Updated weights for policy 0, policy_version 31066 (0.0009) +[2026-06-07 03:33:44,644][528169] Updated weights for policy 0, policy_version 31078 (0.0008) +[2026-06-07 03:33:44,778][528169] Updated weights for policy 0, policy_version 31088 (0.0008) +[2026-06-07 03:33:44,915][528169] Updated weights for policy 0, policy_version 31099 (0.0008) +[2026-06-07 03:33:45,062][528169] Updated weights for policy 0, policy_version 31111 (0.0008) +[2026-06-07 03:33:45,103][527010] Fps is (10 sec: 22937.6, 60 sec: 24576.0, 300 sec: 24437.1). Total num frames: 15925248. Throughput: 0: 24718.2. Samples: 15903616. Policy #0 lag: (min: 63.0, avg: 76.3, max: 127.0) +[2026-06-07 03:33:45,104][527010] Avg episode reward: [(0, '96.458')] +[2026-06-07 03:33:45,712][528169] Updated weights for policy 0, policy_version 31121 (0.0009) +[2026-06-07 03:33:45,889][528169] Updated weights for policy 0, policy_version 31136 (0.0008) +[2026-06-07 03:33:46,034][528169] Updated weights for policy 0, policy_version 31147 (0.0008) +[2026-06-07 03:33:46,170][528169] Updated weights for policy 0, policy_version 31158 (0.0008) +[2026-06-07 03:33:46,322][528169] Updated weights for policy 0, policy_version 31170 (0.0008) +[2026-06-07 03:33:46,480][528169] Updated weights for policy 0, policy_version 31182 (0.0008) +[2026-06-07 03:33:47,151][528169] Updated weights for policy 0, policy_version 31193 (0.0008) +[2026-06-07 03:33:47,287][528169] Updated weights for policy 0, policy_version 31204 (0.0008) +[2026-06-07 03:33:47,445][528169] Updated weights for policy 0, policy_version 31217 (0.0008) +[2026-06-07 03:33:47,586][528169] Updated weights for policy 0, policy_version 31228 (0.0008) +[2026-06-07 03:33:47,749][528169] Updated weights for policy 0, policy_version 31240 (0.0008) +[2026-06-07 03:33:48,400][528169] Updated weights for policy 0, policy_version 31252 (0.0009) +[2026-06-07 03:33:48,562][528169] Updated weights for policy 0, policy_version 31264 (0.0008) +[2026-06-07 03:33:48,697][528169] Updated weights for policy 0, policy_version 31274 (0.0008) +[2026-06-07 03:33:48,831][528169] Updated weights for policy 0, policy_version 31284 (0.0008) +[2026-06-07 03:33:48,967][528169] Updated weights for policy 0, policy_version 31294 (0.0008) +[2026-06-07 03:33:49,116][528169] Updated weights for policy 0, policy_version 31305 (0.0008) +[2026-06-07 03:33:49,728][528169] Updated weights for policy 0, policy_version 31315 (0.0008) +[2026-06-07 03:33:49,875][528169] Updated weights for policy 0, policy_version 31326 (0.0008) +[2026-06-07 03:33:50,015][528169] Updated weights for policy 0, policy_version 31336 (0.0008) +[2026-06-07 03:33:50,103][527010] Fps is (10 sec: 22937.5, 60 sec: 24576.0, 300 sec: 24548.2). Total num frames: 16056320. Throughput: 0: 24650.0. Samples: 16056064. Policy #0 lag: (min: 63.0, avg: 76.3, max: 127.0) +[2026-06-07 03:33:50,104][527010] Avg episode reward: [(0, '101.278')] +[2026-06-07 03:33:50,161][528169] Updated weights for policy 0, policy_version 31347 (0.0008) +[2026-06-07 03:33:50,300][528169] Updated weights for policy 0, policy_version 31357 (0.0008) +[2026-06-07 03:33:50,439][528169] Updated weights for policy 0, policy_version 31367 (0.0008) +[2026-06-07 03:33:51,024][528169] Updated weights for policy 0, policy_version 31377 (0.0008) +[2026-06-07 03:33:51,157][528169] Updated weights for policy 0, policy_version 31388 (0.0008) +[2026-06-07 03:33:51,323][528169] Updated weights for policy 0, policy_version 31401 (0.0008) +[2026-06-07 03:33:51,469][528169] Updated weights for policy 0, policy_version 31413 (0.0008) +[2026-06-07 03:33:51,599][528169] Updated weights for policy 0, policy_version 31423 (0.0008) +[2026-06-07 03:33:51,761][528169] Updated weights for policy 0, policy_version 31436 (0.0008) +[2026-06-07 03:33:52,442][528169] Updated weights for policy 0, policy_version 31449 (0.0008) +[2026-06-07 03:33:52,606][528169] Updated weights for policy 0, policy_version 31462 (0.0008) +[2026-06-07 03:33:52,750][528169] Updated weights for policy 0, policy_version 31473 (0.0008) +[2026-06-07 03:33:52,887][528169] Updated weights for policy 0, policy_version 31484 (0.0008) +[2026-06-07 03:33:53,037][528169] Updated weights for policy 0, policy_version 31496 (0.0008) +[2026-06-07 03:33:53,697][528169] Updated weights for policy 0, policy_version 31506 (0.0008) +[2026-06-07 03:33:53,861][528169] Updated weights for policy 0, policy_version 31519 (0.0008) +[2026-06-07 03:33:54,001][528169] Updated weights for policy 0, policy_version 31530 (0.0008) +[2026-06-07 03:33:54,145][528169] Updated weights for policy 0, policy_version 31542 (0.0008) +[2026-06-07 03:33:54,304][528169] Updated weights for policy 0, policy_version 31554 (0.0008) +[2026-06-07 03:33:54,459][528169] Updated weights for policy 0, policy_version 31566 (0.0008) +[2026-06-07 03:33:55,103][527010] Fps is (10 sec: 26214.5, 60 sec: 24576.0, 300 sec: 24548.2). Total num frames: 16187392. Throughput: 0: 24684.1. Samples: 16198656. Policy #0 lag: (min: 63.0, avg: 76.3, max: 127.0) +[2026-06-07 03:33:55,104][527010] Avg episode reward: [(0, '96.755')] +[2026-06-07 03:33:55,117][528169] Updated weights for policy 0, policy_version 31578 (0.0008) +[2026-06-07 03:33:55,248][528169] Updated weights for policy 0, policy_version 31588 (0.0008) +[2026-06-07 03:33:55,398][528169] Updated weights for policy 0, policy_version 31600 (0.0008) +[2026-06-07 03:33:55,571][528169] Updated weights for policy 0, policy_version 31614 (0.0008) +[2026-06-07 03:33:55,717][528169] Updated weights for policy 0, policy_version 31625 (0.0008) +[2026-06-07 03:33:56,366][528169] Updated weights for policy 0, policy_version 31636 (0.0008) +[2026-06-07 03:33:56,539][528169] Updated weights for policy 0, policy_version 31650 (0.0008) +[2026-06-07 03:33:56,661][528169] Updated weights for policy 0, policy_version 31660 (0.0008) +[2026-06-07 03:33:56,795][528169] Updated weights for policy 0, policy_version 31670 (0.0008) +[2026-06-07 03:33:56,931][528169] Updated weights for policy 0, policy_version 31681 (0.0008) +[2026-06-07 03:33:57,065][528169] Updated weights for policy 0, policy_version 31691 (0.0007) +[2026-06-07 03:33:57,724][528169] Updated weights for policy 0, policy_version 31702 (0.0007) +[2026-06-07 03:33:57,869][528169] Updated weights for policy 0, policy_version 31713 (0.0009) +[2026-06-07 03:33:57,998][528169] Updated weights for policy 0, policy_version 31723 (0.0008) +[2026-06-07 03:33:58,145][528169] Updated weights for policy 0, policy_version 31734 (0.0009) +[2026-06-07 03:33:58,295][528169] Updated weights for policy 0, policy_version 31746 (0.0008) +[2026-06-07 03:33:58,434][528169] Updated weights for policy 0, policy_version 31756 (0.0008) +[2026-06-07 03:33:59,062][528169] Updated weights for policy 0, policy_version 31766 (0.0008) +[2026-06-07 03:33:59,195][528169] Updated weights for policy 0, policy_version 31776 (0.0008) +[2026-06-07 03:33:59,356][528169] Updated weights for policy 0, policy_version 31789 (0.0008) +[2026-06-07 03:33:59,522][528169] Updated weights for policy 0, policy_version 31801 (0.0008) +[2026-06-07 03:33:59,671][528169] Updated weights for policy 0, policy_version 31812 (0.0008) +[2026-06-07 03:33:59,821][528169] Updated weights for policy 0, policy_version 31823 (0.0009) +[2026-06-07 03:34:00,103][527010] Fps is (10 sec: 26214.5, 60 sec: 24576.0, 300 sec: 24548.2). Total num frames: 16318464. Throughput: 0: 24553.2. Samples: 16271104. Policy #0 lag: (min: 63.0, avg: 76.3, max: 127.0) +[2026-06-07 03:34:00,104][527010] Avg episode reward: [(0, '89.818')] +[2026-06-07 03:34:00,431][528169] Updated weights for policy 0, policy_version 31834 (0.0008) +[2026-06-07 03:34:00,562][528169] Updated weights for policy 0, policy_version 31844 (0.0008) +[2026-06-07 03:34:00,708][528169] Updated weights for policy 0, policy_version 31855 (0.0008) +[2026-06-07 03:34:00,857][528169] Updated weights for policy 0, policy_version 31866 (0.0008) +[2026-06-07 03:34:00,994][528169] Updated weights for policy 0, policy_version 31876 (0.0008) +[2026-06-07 03:34:01,132][528169] Updated weights for policy 0, policy_version 31887 (0.0009) +[2026-06-07 03:34:01,775][528169] Updated weights for policy 0, policy_version 31898 (0.0008) +[2026-06-07 03:34:01,902][528169] Updated weights for policy 0, policy_version 31908 (0.0008) +[2026-06-07 03:34:02,050][528169] Updated weights for policy 0, policy_version 31920 (0.0008) +[2026-06-07 03:34:02,216][528169] Updated weights for policy 0, policy_version 31932 (0.0008) +[2026-06-07 03:34:02,358][528169] Updated weights for policy 0, policy_version 31943 (0.0008) +[2026-06-07 03:34:02,989][528169] Updated weights for policy 0, policy_version 31953 (0.0008) +[2026-06-07 03:34:03,109][528169] Updated weights for policy 0, policy_version 31963 (0.0008) +[2026-06-07 03:34:03,239][528169] Updated weights for policy 0, policy_version 31973 (0.0008) +[2026-06-07 03:34:03,389][528169] Updated weights for policy 0, policy_version 31985 (0.0008) +[2026-06-07 03:34:03,566][528169] Updated weights for policy 0, policy_version 31999 (0.0008) +[2026-06-07 03:34:03,701][528169] Updated weights for policy 0, policy_version 32009 (0.0008) +[2026-06-07 03:34:04,364][528169] Updated weights for policy 0, policy_version 32019 (0.0008) +[2026-06-07 03:34:04,497][528169] Updated weights for policy 0, policy_version 32030 (0.0008) +[2026-06-07 03:34:04,675][528169] Updated weights for policy 0, policy_version 32044 (0.0008) +[2026-06-07 03:34:04,823][528169] Updated weights for policy 0, policy_version 32056 (0.0008) +[2026-06-07 03:34:04,979][528169] Updated weights for policy 0, policy_version 32068 (0.0008) +[2026-06-07 03:34:05,103][527010] Fps is (10 sec: 22937.6, 60 sec: 24576.0, 300 sec: 24548.2). Total num frames: 16416768. Throughput: 0: 24661.4. Samples: 16422912. Policy #0 lag: (min: 63.0, avg: 76.3, max: 127.0) +[2026-06-07 03:34:05,104][527010] Avg episode reward: [(0, '103.793')] +[2026-06-07 03:34:05,127][528169] Updated weights for policy 0, policy_version 32080 (0.0008) +[2026-06-07 03:34:05,792][528169] Updated weights for policy 0, policy_version 32090 (0.0008) +[2026-06-07 03:34:05,934][528169] Updated weights for policy 0, policy_version 32102 (0.0008) +[2026-06-07 03:34:06,064][528169] Updated weights for policy 0, policy_version 32112 (0.0008) +[2026-06-07 03:34:06,193][528169] Updated weights for policy 0, policy_version 32123 (0.0008) +[2026-06-07 03:34:06,343][528169] Updated weights for policy 0, policy_version 32134 (0.0008) +[2026-06-07 03:34:06,984][528169] Updated weights for policy 0, policy_version 32145 (0.0009) +[2026-06-07 03:34:07,118][528169] Updated weights for policy 0, policy_version 32156 (0.0008) +[2026-06-07 03:34:07,265][528169] Updated weights for policy 0, policy_version 32168 (0.0008) +[2026-06-07 03:34:07,410][528169] Updated weights for policy 0, policy_version 32179 (0.0008) +[2026-06-07 03:34:07,553][528169] Updated weights for policy 0, policy_version 32189 (0.0009) +[2026-06-07 03:34:07,680][528169] Updated weights for policy 0, policy_version 32199 (0.0008) +[2026-06-07 03:34:08,322][528169] Updated weights for policy 0, policy_version 32209 (0.0008) +[2026-06-07 03:34:08,455][528169] Updated weights for policy 0, policy_version 32219 (0.0008) +[2026-06-07 03:34:08,607][528169] Updated weights for policy 0, policy_version 32231 (0.0008) +[2026-06-07 03:34:08,754][528169] Updated weights for policy 0, policy_version 32242 (0.0008) +[2026-06-07 03:34:08,883][528169] Updated weights for policy 0, policy_version 32252 (0.0008) +[2026-06-07 03:34:09,027][528169] Updated weights for policy 0, policy_version 32263 (0.0008) +[2026-06-07 03:34:09,687][528169] Updated weights for policy 0, policy_version 32275 (0.0008) +[2026-06-07 03:34:09,817][528169] Updated weights for policy 0, policy_version 32285 (0.0008) +[2026-06-07 03:34:09,950][528169] Updated weights for policy 0, policy_version 32295 (0.0008) +[2026-06-07 03:34:10,103][527010] Fps is (10 sec: 22937.6, 60 sec: 24576.0, 300 sec: 24548.2). Total num frames: 16547840. Throughput: 0: 24499.2. Samples: 16564608. Policy #0 lag: (min: 63.0, avg: 74.5, max: 127.0) +[2026-06-07 03:34:10,104][527010] Avg episode reward: [(0, '97.912')] +[2026-06-07 03:34:10,112][528169] Updated weights for policy 0, policy_version 32308 (0.0008) +[2026-06-07 03:34:10,245][528169] Updated weights for policy 0, policy_version 32318 (0.0008) +[2026-06-07 03:34:10,405][528169] Updated weights for policy 0, policy_version 32329 (0.0008) +[2026-06-07 03:34:11,018][528169] Updated weights for policy 0, policy_version 32341 (0.0008) +[2026-06-07 03:34:11,166][528169] Updated weights for policy 0, policy_version 32353 (0.0008) +[2026-06-07 03:34:11,302][528169] Updated weights for policy 0, policy_version 32363 (0.0009) +[2026-06-07 03:34:11,435][528169] Updated weights for policy 0, policy_version 32373 (0.0008) +[2026-06-07 03:34:11,591][528169] Updated weights for policy 0, policy_version 32385 (0.0009) +[2026-06-07 03:34:11,729][528169] Updated weights for policy 0, policy_version 32395 (0.0009) +[2026-06-07 03:34:12,398][528169] Updated weights for policy 0, policy_version 32408 (0.0008) +[2026-06-07 03:34:12,544][528169] Updated weights for policy 0, policy_version 32420 (0.0008) +[2026-06-07 03:34:12,672][528169] Updated weights for policy 0, policy_version 32430 (0.0008) +[2026-06-07 03:34:12,829][528169] Updated weights for policy 0, policy_version 32441 (0.0008) +[2026-06-07 03:34:12,977][528169] Updated weights for policy 0, policy_version 32452 (0.0008) +[2026-06-07 03:34:13,124][528169] Updated weights for policy 0, policy_version 32463 (0.0009) +[2026-06-07 03:34:13,729][528169] Updated weights for policy 0, policy_version 32473 (0.0008) +[2026-06-07 03:34:13,867][528169] Updated weights for policy 0, policy_version 32483 (0.0008) +[2026-06-07 03:34:14,020][528169] Updated weights for policy 0, policy_version 32494 (0.0009) +[2026-06-07 03:34:14,156][528169] Updated weights for policy 0, policy_version 32504 (0.0008) +[2026-06-07 03:34:14,282][528169] Updated weights for policy 0, policy_version 32514 (0.0008) +[2026-06-07 03:34:14,421][528169] Updated weights for policy 0, policy_version 32524 (0.0008) +[2026-06-07 03:34:15,022][528169] Updated weights for policy 0, policy_version 32534 (0.0008) +[2026-06-07 03:34:15,103][527010] Fps is (10 sec: 26214.4, 60 sec: 24576.0, 300 sec: 24548.2). Total num frames: 16678912. Throughput: 0: 24658.6. Samples: 16647936. Policy #0 lag: (min: 63.0, avg: 74.5, max: 127.0) +[2026-06-07 03:34:15,104][527010] Avg episode reward: [(0, '101.468')] +[2026-06-07 03:34:15,153][528169] Updated weights for policy 0, policy_version 32544 (0.0008) +[2026-06-07 03:34:15,297][528169] Updated weights for policy 0, policy_version 32555 (0.0008) +[2026-06-07 03:34:15,433][528169] Updated weights for policy 0, policy_version 32565 (0.0008) +[2026-06-07 03:34:15,579][528169] Updated weights for policy 0, policy_version 32576 (0.0008) +[2026-06-07 03:34:15,748][528169] Updated weights for policy 0, policy_version 32589 (0.0008) +[2026-06-07 03:34:16,393][528169] Updated weights for policy 0, policy_version 32601 (0.0008) +[2026-06-07 03:34:16,525][528169] Updated weights for policy 0, policy_version 32611 (0.0008) +[2026-06-07 03:34:16,663][528169] Updated weights for policy 0, policy_version 32621 (0.0009) +[2026-06-07 03:34:16,802][528169] Updated weights for policy 0, policy_version 32632 (0.0008) +[2026-06-07 03:34:16,940][528169] Updated weights for policy 0, policy_version 32642 (0.0008) +[2026-06-07 03:34:17,084][528169] Updated weights for policy 0, policy_version 32653 (0.0008) +[2026-06-07 03:34:17,716][528169] Updated weights for policy 0, policy_version 32664 (0.0008) +[2026-06-07 03:34:17,860][528169] Updated weights for policy 0, policy_version 32675 (0.0008) +[2026-06-07 03:34:17,992][528169] Updated weights for policy 0, policy_version 32685 (0.0008) +[2026-06-07 03:34:18,135][528169] Updated weights for policy 0, policy_version 32696 (0.0008) +[2026-06-07 03:34:18,286][528169] Updated weights for policy 0, policy_version 32707 (0.0008) +[2026-06-07 03:34:18,420][528169] Updated weights for policy 0, policy_version 32717 (0.0008) +[2026-06-07 03:34:19,028][528169] Updated weights for policy 0, policy_version 32728 (0.0008) +[2026-06-07 03:34:19,175][528169] Updated weights for policy 0, policy_version 32739 (0.0008) +[2026-06-07 03:34:19,312][528169] Updated weights for policy 0, policy_version 32749 (0.0008) +[2026-06-07 03:34:19,458][528169] Updated weights for policy 0, policy_version 32760 (0.0008) +[2026-06-07 03:34:19,610][528169] Updated weights for policy 0, policy_version 32772 (0.0009) +[2026-06-07 03:34:19,758][528169] Updated weights for policy 0, policy_version 32783 (0.0008) +[2026-06-07 03:34:20,103][527010] Fps is (10 sec: 26214.4, 60 sec: 24576.1, 300 sec: 24548.2). Total num frames: 16809984. Throughput: 0: 24664.2. Samples: 16788864. Policy #0 lag: (min: 63.0, avg: 74.5, max: 127.0) +[2026-06-07 03:34:20,104][527010] Avg episode reward: [(0, '95.177')] +[2026-06-07 03:34:20,405][528169] Updated weights for policy 0, policy_version 32795 (0.0008) +[2026-06-07 03:34:20,550][528169] Updated weights for policy 0, policy_version 32806 (0.0008) +[2026-06-07 03:34:20,718][528169] Updated weights for policy 0, policy_version 32818 (0.0009) +[2026-06-07 03:34:20,841][528169] Updated weights for policy 0, policy_version 32828 (0.0008) +[2026-06-07 03:34:20,978][528169] Updated weights for policy 0, policy_version 32838 (0.0008) +[2026-06-07 03:34:21,584][528169] Updated weights for policy 0, policy_version 32849 (0.0008) +[2026-06-07 03:34:21,711][528169] Updated weights for policy 0, policy_version 32859 (0.0008) +[2026-06-07 03:34:21,848][528169] Updated weights for policy 0, policy_version 32870 (0.0009) +[2026-06-07 03:34:21,988][528169] Updated weights for policy 0, policy_version 32880 (0.0008) +[2026-06-07 03:34:22,122][528169] Updated weights for policy 0, policy_version 32890 (0.0008) +[2026-06-07 03:34:22,257][528169] Updated weights for policy 0, policy_version 32900 (0.0008) +[2026-06-07 03:34:22,385][528169] Updated weights for policy 0, policy_version 32910 (0.0008) +[2026-06-07 03:34:23,015][528169] Updated weights for policy 0, policy_version 32920 (0.0008) +[2026-06-07 03:34:23,146][528169] Updated weights for policy 0, policy_version 32930 (0.0008) +[2026-06-07 03:34:23,276][528169] Updated weights for policy 0, policy_version 32940 (0.0008) +[2026-06-07 03:34:23,407][528169] Updated weights for policy 0, policy_version 32950 (0.0008) +[2026-06-07 03:34:23,539][528169] Updated weights for policy 0, policy_version 32960 (0.0008) +[2026-06-07 03:34:23,670][528169] Updated weights for policy 0, policy_version 32970 (0.0008) +[2026-06-07 03:34:24,302][528169] Updated weights for policy 0, policy_version 32980 (0.0009) +[2026-06-07 03:34:24,448][528169] Updated weights for policy 0, policy_version 32991 (0.0008) +[2026-06-07 03:34:24,579][528169] Updated weights for policy 0, policy_version 33001 (0.0008) +[2026-06-07 03:34:24,709][528169] Updated weights for policy 0, policy_version 33011 (0.0008) +[2026-06-07 03:34:24,849][528169] Updated weights for policy 0, policy_version 33021 (0.0008) +[2026-06-07 03:34:24,989][528169] Updated weights for policy 0, policy_version 33032 (0.0008) +[2026-06-07 03:34:25,103][527010] Fps is (10 sec: 26214.5, 60 sec: 25122.1, 300 sec: 24659.3). Total num frames: 16941056. Throughput: 0: 24630.0. Samples: 16942592. Policy #0 lag: (min: 63.0, avg: 74.5, max: 127.0) +[2026-06-07 03:34:25,104][527010] Avg episode reward: [(0, '117.416')] +[2026-06-07 03:34:25,108][528093] Saving new best policy, reward=117.416! +[2026-06-07 03:34:25,621][528169] Updated weights for policy 0, policy_version 33044 (0.0009) +[2026-06-07 03:34:25,749][528169] Updated weights for policy 0, policy_version 33054 (0.0008) +[2026-06-07 03:34:25,883][528169] Updated weights for policy 0, policy_version 33064 (0.0008) +[2026-06-07 03:34:26,015][528169] Updated weights for policy 0, policy_version 33074 (0.0008) +[2026-06-07 03:34:26,155][528169] Updated weights for policy 0, policy_version 33084 (0.0009) +[2026-06-07 03:34:26,315][528169] Updated weights for policy 0, policy_version 33096 (0.0009) +[2026-06-07 03:34:26,926][528169] Updated weights for policy 0, policy_version 33107 (0.0008) +[2026-06-07 03:34:27,059][528169] Updated weights for policy 0, policy_version 33117 (0.0008) +[2026-06-07 03:34:27,182][528169] Updated weights for policy 0, policy_version 33127 (0.0008) +[2026-06-07 03:34:27,312][528169] Updated weights for policy 0, policy_version 33137 (0.0008) +[2026-06-07 03:34:27,443][528169] Updated weights for policy 0, policy_version 33147 (0.0008) +[2026-06-07 03:34:27,610][528169] Updated weights for policy 0, policy_version 33159 (0.0008) +[2026-06-07 03:34:28,244][528169] Updated weights for policy 0, policy_version 33170 (0.0009) +[2026-06-07 03:34:28,389][528169] Updated weights for policy 0, policy_version 33181 (0.0010) +[2026-06-07 03:34:28,525][528169] Updated weights for policy 0, policy_version 33192 (0.0009) +[2026-06-07 03:34:28,663][528169] Updated weights for policy 0, policy_version 33202 (0.0008) +[2026-06-07 03:34:28,801][528169] Updated weights for policy 0, policy_version 33213 (0.0008) +[2026-06-07 03:34:28,944][528169] Updated weights for policy 0, policy_version 33223 (0.0008) +[2026-06-07 03:34:29,600][528169] Updated weights for policy 0, policy_version 33233 (0.0010) +[2026-06-07 03:34:29,726][528169] Updated weights for policy 0, policy_version 33243 (0.0010) +[2026-06-07 03:34:29,858][528169] Updated weights for policy 0, policy_version 33253 (0.0008) +[2026-06-07 03:34:29,985][528169] Updated weights for policy 0, policy_version 33263 (0.0008) +[2026-06-07 03:34:30,103][527010] Fps is (10 sec: 22937.7, 60 sec: 24576.1, 300 sec: 24548.2). Total num frames: 17039360. Throughput: 0: 24635.8. Samples: 17012224. Policy #0 lag: (min: 55.0, avg: 68.0, max: 119.0) +[2026-06-07 03:34:30,103][527010] Avg episode reward: [(0, '109.883')] +[2026-06-07 03:34:30,125][528169] Updated weights for policy 0, policy_version 33273 (0.0008) +[2026-06-07 03:34:30,255][528169] Updated weights for policy 0, policy_version 33283 (0.0008) +[2026-06-07 03:34:30,384][528169] Updated weights for policy 0, policy_version 33293 (0.0008) +[2026-06-07 03:34:30,990][528169] Updated weights for policy 0, policy_version 33303 (0.0009) +[2026-06-07 03:34:31,118][528169] Updated weights for policy 0, policy_version 33313 (0.0008) +[2026-06-07 03:34:31,250][528169] Updated weights for policy 0, policy_version 33323 (0.0009) +[2026-06-07 03:34:31,394][528169] Updated weights for policy 0, policy_version 33334 (0.0008) +[2026-06-07 03:34:31,528][528169] Updated weights for policy 0, policy_version 33344 (0.0008) +[2026-06-07 03:34:31,669][528169] Updated weights for policy 0, policy_version 33354 (0.0008) +[2026-06-07 03:34:32,281][528169] Updated weights for policy 0, policy_version 33364 (0.0008) +[2026-06-07 03:34:32,413][528169] Updated weights for policy 0, policy_version 33374 (0.0008) +[2026-06-07 03:34:32,551][528169] Updated weights for policy 0, policy_version 33384 (0.0008) +[2026-06-07 03:34:32,708][528169] Updated weights for policy 0, policy_version 33396 (0.0008) +[2026-06-07 03:34:32,842][528169] Updated weights for policy 0, policy_version 33406 (0.0008) +[2026-06-07 03:34:32,975][528169] Updated weights for policy 0, policy_version 33416 (0.0008) +[2026-06-07 03:34:33,612][528169] Updated weights for policy 0, policy_version 33428 (0.0008) +[2026-06-07 03:34:33,747][528169] Updated weights for policy 0, policy_version 33439 (0.0008) +[2026-06-07 03:34:33,871][528169] Updated weights for policy 0, policy_version 33449 (0.0008) +[2026-06-07 03:34:34,023][528169] Updated weights for policy 0, policy_version 33460 (0.0008) +[2026-06-07 03:34:34,165][528169] Updated weights for policy 0, policy_version 33470 (0.0008) +[2026-06-07 03:34:34,302][528169] Updated weights for policy 0, policy_version 33480 (0.0008) +[2026-06-07 03:34:34,934][528169] Updated weights for policy 0, policy_version 33491 (0.0008) +[2026-06-07 03:34:35,071][528169] Updated weights for policy 0, policy_version 33501 (0.0008) +[2026-06-07 03:34:35,103][527010] Fps is (10 sec: 22937.5, 60 sec: 24576.0, 300 sec: 24548.2). Total num frames: 17170432. Throughput: 0: 24485.0. Samples: 17157888. Policy #0 lag: (min: 55.0, avg: 68.0, max: 119.0) +[2026-06-07 03:34:35,103][527010] Avg episode reward: [(0, '86.165')] +[2026-06-07 03:34:35,206][528169] Updated weights for policy 0, policy_version 33511 (0.0008) +[2026-06-07 03:34:35,360][528169] Updated weights for policy 0, policy_version 33522 (0.0008) +[2026-06-07 03:34:35,494][528169] Updated weights for policy 0, policy_version 33532 (0.0008) +[2026-06-07 03:34:35,659][528169] Updated weights for policy 0, policy_version 33545 (0.0008) +[2026-06-07 03:34:36,269][528169] Updated weights for policy 0, policy_version 33555 (0.0008) +[2026-06-07 03:34:36,412][528169] Updated weights for policy 0, policy_version 33566 (0.0008) +[2026-06-07 03:34:36,551][528169] Updated weights for policy 0, policy_version 33576 (0.0008) +[2026-06-07 03:34:36,678][528169] Updated weights for policy 0, policy_version 33586 (0.0008) +[2026-06-07 03:34:36,817][528169] Updated weights for policy 0, policy_version 33597 (0.0008) +[2026-06-07 03:34:36,946][528169] Updated weights for policy 0, policy_version 33607 (0.0008) +[2026-06-07 03:34:37,569][528169] Updated weights for policy 0, policy_version 33618 (0.0008) +[2026-06-07 03:34:37,696][528169] Updated weights for policy 0, policy_version 33628 (0.0008) +[2026-06-07 03:34:37,825][528169] Updated weights for policy 0, policy_version 33638 (0.0008) +[2026-06-07 03:34:37,966][528169] Updated weights for policy 0, policy_version 33649 (0.0008) +[2026-06-07 03:34:38,108][528169] Updated weights for policy 0, policy_version 33659 (0.0008) +[2026-06-07 03:34:38,235][528169] Updated weights for policy 0, policy_version 33669 (0.0008) +[2026-06-07 03:34:38,378][528169] Updated weights for policy 0, policy_version 33679 (0.0008) +[2026-06-07 03:34:39,010][528169] Updated weights for policy 0, policy_version 33690 (0.0008) +[2026-06-07 03:34:39,147][528169] Updated weights for policy 0, policy_version 33701 (0.0006) +[2026-06-07 03:34:39,306][528169] Updated weights for policy 0, policy_version 33713 (0.0008) +[2026-06-07 03:34:39,455][528169] Updated weights for policy 0, policy_version 33724 (0.0008) +[2026-06-07 03:34:39,584][528169] Updated weights for policy 0, policy_version 33734 (0.0008) +[2026-06-07 03:34:39,718][528169] Updated weights for policy 0, policy_version 33744 (0.0008) +[2026-06-07 03:34:40,103][527010] Fps is (10 sec: 26214.3, 60 sec: 24576.0, 300 sec: 24659.3). Total num frames: 17301504. Throughput: 0: 24669.9. Samples: 17308800. Policy #0 lag: (min: 55.0, avg: 68.0, max: 119.0) +[2026-06-07 03:34:40,104][527010] Avg episode reward: [(0, '118.976')] +[2026-06-07 03:34:40,109][528093] Saving new best policy, reward=118.976! +[2026-06-07 03:34:40,359][528169] Updated weights for policy 0, policy_version 33755 (0.0008) +[2026-06-07 03:34:40,510][528169] Updated weights for policy 0, policy_version 33766 (0.0009) +[2026-06-07 03:34:40,636][528169] Updated weights for policy 0, policy_version 33776 (0.0008) +[2026-06-07 03:34:40,764][528169] Updated weights for policy 0, policy_version 33786 (0.0008) +[2026-06-07 03:34:40,906][528169] Updated weights for policy 0, policy_version 33796 (0.0008) +[2026-06-07 03:34:41,056][528169] Updated weights for policy 0, policy_version 33808 (0.0008) +[2026-06-07 03:34:41,654][528169] Updated weights for policy 0, policy_version 33819 (0.0007) +[2026-06-07 03:34:41,788][528169] Updated weights for policy 0, policy_version 33829 (0.0004) +[2026-06-07 03:34:41,924][528169] Updated weights for policy 0, policy_version 33840 (0.0004) +[2026-06-07 03:34:42,060][528169] Updated weights for policy 0, policy_version 33850 (0.0004) +[2026-06-07 03:34:42,196][528169] Updated weights for policy 0, policy_version 33860 (0.0004) +[2026-06-07 03:34:42,343][528169] Updated weights for policy 0, policy_version 33871 (0.0004) +[2026-06-07 03:34:42,948][528169] Updated weights for policy 0, policy_version 33881 (0.0008) +[2026-06-07 03:34:43,107][528169] Updated weights for policy 0, policy_version 33893 (0.0008) +[2026-06-07 03:34:43,237][528169] Updated weights for policy 0, policy_version 33903 (0.0008) +[2026-06-07 03:34:43,368][528169] Updated weights for policy 0, policy_version 33913 (0.0008) +[2026-06-07 03:34:43,523][528169] Updated weights for policy 0, policy_version 33925 (0.0008) +[2026-06-07 03:34:43,666][528169] Updated weights for policy 0, policy_version 33936 (0.0008) +[2026-06-07 03:34:44,307][528169] Updated weights for policy 0, policy_version 33947 (0.0008) +[2026-06-07 03:34:44,437][528169] Updated weights for policy 0, policy_version 33957 (0.0008) +[2026-06-07 03:34:44,566][528169] Updated weights for policy 0, policy_version 33967 (0.0008) +[2026-06-07 03:34:44,719][528169] Updated weights for policy 0, policy_version 33978 (0.0008) +[2026-06-07 03:34:44,841][528169] Updated weights for policy 0, policy_version 33988 (0.0008) +[2026-06-07 03:34:44,971][528169] Updated weights for policy 0, policy_version 33998 (0.0008) +[2026-06-07 03:34:45,103][527010] Fps is (10 sec: 26214.5, 60 sec: 25122.2, 300 sec: 24659.3). Total num frames: 17432576. Throughput: 0: 24610.1. Samples: 17378560. Policy #0 lag: (min: 55.0, avg: 68.0, max: 119.0) +[2026-06-07 03:34:45,104][527010] Avg episode reward: [(0, '117.049')] +[2026-06-07 03:34:45,597][528169] Updated weights for policy 0, policy_version 34008 (0.0008) +[2026-06-07 03:34:45,724][528169] Updated weights for policy 0, policy_version 34018 (0.0008) +[2026-06-07 03:34:45,860][528169] Updated weights for policy 0, policy_version 34028 (0.0008) +[2026-06-07 03:34:45,992][528169] Updated weights for policy 0, policy_version 34038 (0.0008) +[2026-06-07 03:34:46,122][528169] Updated weights for policy 0, policy_version 34048 (0.0008) +[2026-06-07 03:34:46,262][528169] Updated weights for policy 0, policy_version 34058 (0.0009) +[2026-06-07 03:34:46,860][528169] Updated weights for policy 0, policy_version 34070 (0.0008) +[2026-06-07 03:34:46,986][528169] Updated weights for policy 0, policy_version 34080 (0.0008) +[2026-06-07 03:34:47,116][528169] Updated weights for policy 0, policy_version 34090 (0.0008) +[2026-06-07 03:34:47,268][528169] Updated weights for policy 0, policy_version 34101 (0.0008) +[2026-06-07 03:34:47,425][528169] Updated weights for policy 0, policy_version 34113 (0.0008) +[2026-06-07 03:34:47,555][528169] Updated weights for policy 0, policy_version 34123 (0.0008) +[2026-06-07 03:34:48,184][528169] Updated weights for policy 0, policy_version 34133 (0.0010) +[2026-06-07 03:34:48,313][528169] Updated weights for policy 0, policy_version 34143 (0.0010) +[2026-06-07 03:34:48,466][528169] Updated weights for policy 0, policy_version 34154 (0.0011) +[2026-06-07 03:34:48,601][528169] Updated weights for policy 0, policy_version 34164 (0.0010) +[2026-06-07 03:34:48,737][528169] Updated weights for policy 0, policy_version 34174 (0.0012) +[2026-06-07 03:34:48,873][528169] Updated weights for policy 0, policy_version 34184 (0.0011) +[2026-06-07 03:34:49,493][528169] Updated weights for policy 0, policy_version 34196 (0.0010) +[2026-06-07 03:34:49,634][528169] Updated weights for policy 0, policy_version 34206 (0.0011) +[2026-06-07 03:34:49,773][528169] Updated weights for policy 0, policy_version 34217 (0.0010) +[2026-06-07 03:34:49,920][528169] Updated weights for policy 0, policy_version 34228 (0.0008) +[2026-06-07 03:34:50,057][528169] Updated weights for policy 0, policy_version 34238 (0.0008) +[2026-06-07 03:34:50,103][527010] Fps is (10 sec: 22937.5, 60 sec: 24576.0, 300 sec: 24548.2). Total num frames: 17530880. Throughput: 0: 24698.3. Samples: 17534336. Policy #0 lag: (min: 55.0, avg: 68.0, max: 119.0) +[2026-06-07 03:34:50,104][527010] Avg episode reward: [(0, '119.423')] +[2026-06-07 03:34:50,197][528169] Updated weights for policy 0, policy_version 34249 (0.0008) +[2026-06-07 03:34:50,288][528093] Saving new best policy, reward=119.423! +[2026-06-07 03:34:50,802][528169] Updated weights for policy 0, policy_version 34259 (0.0008) +[2026-06-07 03:34:50,929][528169] Updated weights for policy 0, policy_version 34269 (0.0008) +[2026-06-07 03:34:51,071][528169] Updated weights for policy 0, policy_version 34280 (0.0008) +[2026-06-07 03:34:51,230][528169] Updated weights for policy 0, policy_version 34292 (0.0008) +[2026-06-07 03:34:51,379][528169] Updated weights for policy 0, policy_version 34303 (0.0008) +[2026-06-07 03:34:51,509][528169] Updated weights for policy 0, policy_version 34313 (0.0008) +[2026-06-07 03:34:52,174][528169] Updated weights for policy 0, policy_version 34324 (0.0008) +[2026-06-07 03:34:52,309][528169] Updated weights for policy 0, policy_version 34334 (0.0008) +[2026-06-07 03:34:52,430][528169] Updated weights for policy 0, policy_version 34344 (0.0008) +[2026-06-07 03:34:52,566][528169] Updated weights for policy 0, policy_version 34354 (0.0008) +[2026-06-07 03:34:52,710][528169] Updated weights for policy 0, policy_version 34365 (0.0008) +[2026-06-07 03:34:52,848][528169] Updated weights for policy 0, policy_version 34375 (0.0008) +[2026-06-07 03:34:53,442][528169] Updated weights for policy 0, policy_version 34385 (0.0008) +[2026-06-07 03:34:53,594][528169] Updated weights for policy 0, policy_version 34397 (0.0008) +[2026-06-07 03:34:53,756][528169] Updated weights for policy 0, policy_version 34409 (0.0008) +[2026-06-07 03:34:53,892][528169] Updated weights for policy 0, policy_version 34419 (0.0008) +[2026-06-07 03:34:54,031][528169] Updated weights for policy 0, policy_version 34430 (0.0008) +[2026-06-07 03:34:54,179][528169] Updated weights for policy 0, policy_version 34441 (0.0008) +[2026-06-07 03:34:54,818][528169] Updated weights for policy 0, policy_version 34451 (0.0008) +[2026-06-07 03:34:54,944][528169] Updated weights for policy 0, policy_version 34461 (0.0008) +[2026-06-07 03:34:55,084][528169] Updated weights for policy 0, policy_version 34471 (0.0008) +[2026-06-07 03:34:55,103][527010] Fps is (10 sec: 22937.6, 60 sec: 24576.0, 300 sec: 24548.2). Total num frames: 17661952. Throughput: 0: 24661.3. Samples: 17674368. Policy #0 lag: (min: 63.0, avg: 74.6, max: 127.0) +[2026-06-07 03:34:55,104][527010] Avg episode reward: [(0, '116.591')] +[2026-06-07 03:34:55,214][528169] Updated weights for policy 0, policy_version 34481 (0.0008) +[2026-06-07 03:34:55,346][528169] Updated weights for policy 0, policy_version 34491 (0.0008) +[2026-06-07 03:34:55,489][528169] Updated weights for policy 0, policy_version 34501 (0.0009) +[2026-06-07 03:34:55,619][528169] Updated weights for policy 0, policy_version 34511 (0.0008) +[2026-06-07 03:34:56,202][528169] Updated weights for policy 0, policy_version 34521 (0.0008) +[2026-06-07 03:34:56,339][528169] Updated weights for policy 0, policy_version 34531 (0.0008) +[2026-06-07 03:34:56,491][528169] Updated weights for policy 0, policy_version 34543 (0.0008) +[2026-06-07 03:34:56,626][528169] Updated weights for policy 0, policy_version 34553 (0.0008) +[2026-06-07 03:34:56,759][528169] Updated weights for policy 0, policy_version 34563 (0.0008) +[2026-06-07 03:34:56,892][528169] Updated weights for policy 0, policy_version 34573 (0.0008) +[2026-06-07 03:34:57,537][528169] Updated weights for policy 0, policy_version 34584 (0.0008) +[2026-06-07 03:34:57,675][528169] Updated weights for policy 0, policy_version 34595 (0.0008) +[2026-06-07 03:34:57,822][528169] Updated weights for policy 0, policy_version 34606 (0.0008) +[2026-06-07 03:34:57,958][528169] Updated weights for policy 0, policy_version 34616 (0.0008) +[2026-06-07 03:34:58,115][528169] Updated weights for policy 0, policy_version 34627 (0.0008) +[2026-06-07 03:34:58,255][528169] Updated weights for policy 0, policy_version 34638 (0.0008) +[2026-06-07 03:34:58,872][528169] Updated weights for policy 0, policy_version 34648 (0.0008) +[2026-06-07 03:34:59,007][528169] Updated weights for policy 0, policy_version 34658 (0.0008) +[2026-06-07 03:34:59,145][528169] Updated weights for policy 0, policy_version 34669 (0.0008) +[2026-06-07 03:34:59,311][528169] Updated weights for policy 0, policy_version 34681 (0.0007) +[2026-06-07 03:34:59,449][528169] Updated weights for policy 0, policy_version 34691 (0.0007) +[2026-06-07 03:34:59,569][528169] Updated weights for policy 0, policy_version 34701 (0.0008) +[2026-06-07 03:35:00,103][527010] Fps is (10 sec: 26214.4, 60 sec: 24576.0, 300 sec: 24659.3). Total num frames: 17793024. Throughput: 0: 24567.5. Samples: 17753472. Policy #0 lag: (min: 63.0, avg: 74.6, max: 127.0) +[2026-06-07 03:35:00,104][527010] Avg episode reward: [(0, '118.720')] +[2026-06-07 03:35:00,179][528169] Updated weights for policy 0, policy_version 34712 (0.0008) +[2026-06-07 03:35:00,351][528169] Updated weights for policy 0, policy_version 34725 (0.0008) +[2026-06-07 03:35:00,484][528169] Updated weights for policy 0, policy_version 34735 (0.0008) +[2026-06-07 03:35:00,617][528169] Updated weights for policy 0, policy_version 34745 (0.0008) +[2026-06-07 03:35:00,747][528169] Updated weights for policy 0, policy_version 34755 (0.0008) +[2026-06-07 03:35:00,880][528169] Updated weights for policy 0, policy_version 34765 (0.0008) +[2026-06-07 03:35:01,493][528169] Updated weights for policy 0, policy_version 34776 (0.0005) +[2026-06-07 03:35:01,651][528169] Updated weights for policy 0, policy_version 34788 (0.0004) +[2026-06-07 03:35:01,817][528169] Updated weights for policy 0, policy_version 34800 (0.0004) +[2026-06-07 03:35:01,943][528169] Updated weights for policy 0, policy_version 34810 (0.0004) +[2026-06-07 03:35:02,092][528169] Updated weights for policy 0, policy_version 34821 (0.0004) +[2026-06-07 03:35:02,225][528169] Updated weights for policy 0, policy_version 34831 (0.0004) +[2026-06-07 03:35:02,793][528169] Updated weights for policy 0, policy_version 34841 (0.0004) +[2026-06-07 03:35:02,922][528169] Updated weights for policy 0, policy_version 34851 (0.0004) +[2026-06-07 03:35:03,050][528169] Updated weights for policy 0, policy_version 34861 (0.0004) +[2026-06-07 03:35:03,195][528169] Updated weights for policy 0, policy_version 34872 (0.0004) +[2026-06-07 03:35:03,341][528169] Updated weights for policy 0, policy_version 34883 (0.0004) +[2026-06-07 03:35:03,479][528169] Updated weights for policy 0, policy_version 34893 (0.0006) +[2026-06-07 03:35:04,079][528169] Updated weights for policy 0, policy_version 34903 (0.0008) +[2026-06-07 03:35:04,245][528169] Updated weights for policy 0, policy_version 34916 (0.0008) +[2026-06-07 03:35:04,382][528169] Updated weights for policy 0, policy_version 34926 (0.0008) +[2026-06-07 03:35:04,514][528169] Updated weights for policy 0, policy_version 34936 (0.0008) +[2026-06-07 03:35:04,645][528169] Updated weights for policy 0, policy_version 34946 (0.0008) +[2026-06-07 03:35:04,776][528169] Updated weights for policy 0, policy_version 34956 (0.0008) +[2026-06-07 03:35:05,103][527010] Fps is (10 sec: 26214.4, 60 sec: 25122.1, 300 sec: 24659.3). Total num frames: 17924096. Throughput: 0: 24712.5. Samples: 17900928. Policy #0 lag: (min: 63.0, avg: 74.6, max: 127.0) +[2026-06-07 03:35:05,104][527010] Avg episode reward: [(0, '123.602')] +[2026-06-07 03:35:05,109][528093] Saving new best policy, reward=123.602! +[2026-06-07 03:35:05,417][528169] Updated weights for policy 0, policy_version 34968 (0.0008) +[2026-06-07 03:35:05,572][528169] Updated weights for policy 0, policy_version 34979 (0.0009) +[2026-06-07 03:35:05,694][528169] Updated weights for policy 0, policy_version 34989 (0.0008) +[2026-06-07 03:35:05,829][528169] Updated weights for policy 0, policy_version 34999 (0.0008) +[2026-06-07 03:35:05,990][528169] Updated weights for policy 0, policy_version 35011 (0.0008) +[2026-06-07 03:35:06,145][528169] Updated weights for policy 0, policy_version 35022 (0.0008) +[2026-06-07 03:35:06,753][528169] Updated weights for policy 0, policy_version 35033 (0.0008) +[2026-06-07 03:35:06,886][528169] Updated weights for policy 0, policy_version 35043 (0.0008) +[2026-06-07 03:35:07,016][528169] Updated weights for policy 0, policy_version 35053 (0.0008) +[2026-06-07 03:35:07,153][528169] Updated weights for policy 0, policy_version 35063 (0.0008) +[2026-06-07 03:35:07,294][528169] Updated weights for policy 0, policy_version 35074 (0.0008) +[2026-06-07 03:35:07,434][528169] Updated weights for policy 0, policy_version 35084 (0.0009) +[2026-06-07 03:35:08,046][528169] Updated weights for policy 0, policy_version 35094 (0.0008) +[2026-06-07 03:35:08,217][528169] Updated weights for policy 0, policy_version 35107 (0.0008) +[2026-06-07 03:35:08,355][528169] Updated weights for policy 0, policy_version 35117 (0.0009) +[2026-06-07 03:35:08,499][528169] Updated weights for policy 0, policy_version 35128 (0.0008) +[2026-06-07 03:35:08,631][528169] Updated weights for policy 0, policy_version 35138 (0.0008) +[2026-06-07 03:35:08,762][528169] Updated weights for policy 0, policy_version 35148 (0.0008) +[2026-06-07 03:35:09,399][528169] Updated weights for policy 0, policy_version 35158 (0.0009) +[2026-06-07 03:35:09,528][528169] Updated weights for policy 0, policy_version 35168 (0.0008) +[2026-06-07 03:35:09,689][528169] Updated weights for policy 0, policy_version 35180 (0.0008) +[2026-06-07 03:35:09,842][528169] Updated weights for policy 0, policy_version 35192 (0.0009) +[2026-06-07 03:35:09,982][528169] Updated weights for policy 0, policy_version 35202 (0.0009) +[2026-06-07 03:35:10,103][527010] Fps is (10 sec: 22937.1, 60 sec: 24575.9, 300 sec: 24548.2). Total num frames: 18022400. Throughput: 0: 24701.0. Samples: 18054144. Policy #0 lag: (min: 63.0, avg: 74.6, max: 127.0) +[2026-06-07 03:35:10,104][527010] Avg episode reward: [(0, '117.305')] +[2026-06-07 03:35:10,120][528169] Updated weights for policy 0, policy_version 35212 (0.0008) +[2026-06-07 03:35:10,732][528169] Updated weights for policy 0, policy_version 35224 (0.0008) +[2026-06-07 03:35:10,866][528169] Updated weights for policy 0, policy_version 35234 (0.0008) +[2026-06-07 03:35:11,007][528169] Updated weights for policy 0, policy_version 35245 (0.0009) +[2026-06-07 03:35:11,147][528169] Updated weights for policy 0, policy_version 35255 (0.0008) +[2026-06-07 03:35:11,292][528169] Updated weights for policy 0, policy_version 35266 (0.0008) +[2026-06-07 03:35:11,419][528169] Updated weights for policy 0, policy_version 35276 (0.0008) +[2026-06-07 03:35:12,044][528169] Updated weights for policy 0, policy_version 35287 (0.0008) +[2026-06-07 03:35:12,168][528169] Updated weights for policy 0, policy_version 35297 (0.0008) +[2026-06-07 03:35:12,305][528169] Updated weights for policy 0, policy_version 35307 (0.0009) +[2026-06-07 03:35:12,441][528169] Updated weights for policy 0, policy_version 35317 (0.0009) +[2026-06-07 03:35:12,581][528169] Updated weights for policy 0, policy_version 35328 (0.0008) +[2026-06-07 03:35:12,711][528169] Updated weights for policy 0, policy_version 35338 (0.0008) +[2026-06-07 03:35:13,335][528169] Updated weights for policy 0, policy_version 35348 (0.0009) +[2026-06-07 03:35:13,468][528169] Updated weights for policy 0, policy_version 35359 (0.0008) +[2026-06-07 03:35:13,622][528169] Updated weights for policy 0, policy_version 35370 (0.0009) +[2026-06-07 03:35:13,765][528169] Updated weights for policy 0, policy_version 35381 (0.0008) +[2026-06-07 03:35:13,921][528169] Updated weights for policy 0, policy_version 35393 (0.0008) +[2026-06-07 03:35:14,055][528169] Updated weights for policy 0, policy_version 35403 (0.0007) +[2026-06-07 03:35:14,653][528169] Updated weights for policy 0, policy_version 35413 (0.0006) +[2026-06-07 03:35:14,785][528169] Updated weights for policy 0, policy_version 35423 (0.0007) +[2026-06-07 03:35:14,929][528169] Updated weights for policy 0, policy_version 35434 (0.0008) +[2026-06-07 03:35:15,075][528169] Updated weights for policy 0, policy_version 35445 (0.0009) +[2026-06-07 03:35:15,103][527010] Fps is (10 sec: 22937.4, 60 sec: 24576.0, 300 sec: 24548.2). Total num frames: 18153472. Throughput: 0: 24755.1. Samples: 18126208. Policy #0 lag: (min: 63.0, avg: 74.6, max: 127.0) +[2026-06-07 03:35:15,104][527010] Avg episode reward: [(0, '108.428')] +[2026-06-07 03:35:15,204][528169] Updated weights for policy 0, policy_version 35455 (0.0008) +[2026-06-07 03:35:15,344][528169] Updated weights for policy 0, policy_version 35465 (0.0008) +[2026-06-07 03:35:15,938][528169] Updated weights for policy 0, policy_version 35475 (0.0008) +[2026-06-07 03:35:16,094][528169] Updated weights for policy 0, policy_version 35487 (0.0008) +[2026-06-07 03:35:16,222][528169] Updated weights for policy 0, policy_version 35497 (0.0009) +[2026-06-07 03:35:16,376][528169] Updated weights for policy 0, policy_version 35509 (0.0008) +[2026-06-07 03:35:16,508][528169] Updated weights for policy 0, policy_version 35519 (0.0008) +[2026-06-07 03:35:16,644][528169] Updated weights for policy 0, policy_version 35529 (0.0008) +[2026-06-07 03:35:17,292][528169] Updated weights for policy 0, policy_version 35539 (0.0008) +[2026-06-07 03:35:17,430][528169] Updated weights for policy 0, policy_version 35550 (0.0005) +[2026-06-07 03:35:17,564][528169] Updated weights for policy 0, policy_version 35560 (0.0008) +[2026-06-07 03:35:17,728][528169] Updated weights for policy 0, policy_version 35572 (0.0009) +[2026-06-07 03:35:17,864][528169] Updated weights for policy 0, policy_version 35582 (0.0008) +[2026-06-07 03:35:18,009][528169] Updated weights for policy 0, policy_version 35593 (0.0008) +[2026-06-07 03:35:18,599][528169] Updated weights for policy 0, policy_version 35603 (0.0008) +[2026-06-07 03:35:18,730][528169] Updated weights for policy 0, policy_version 35613 (0.0008) +[2026-06-07 03:35:18,858][528169] Updated weights for policy 0, policy_version 35623 (0.0008) +[2026-06-07 03:35:19,007][528169] Updated weights for policy 0, policy_version 35634 (0.0008) +[2026-06-07 03:35:19,136][528169] Updated weights for policy 0, policy_version 35644 (0.0008) +[2026-06-07 03:35:19,268][528169] Updated weights for policy 0, policy_version 35654 (0.0008) +[2026-06-07 03:35:19,908][528169] Updated weights for policy 0, policy_version 35665 (0.0008) +[2026-06-07 03:35:20,048][528169] Updated weights for policy 0, policy_version 35676 (0.0008) +[2026-06-07 03:35:20,103][527010] Fps is (10 sec: 26214.9, 60 sec: 24576.0, 300 sec: 24659.3). Total num frames: 18284544. Throughput: 0: 24723.9. Samples: 18270464. Policy #0 lag: (min: 63.0, avg: 76.6, max: 127.0) +[2026-06-07 03:35:20,104][527010] Avg episode reward: [(0, '118.856')] +[2026-06-07 03:35:20,178][528169] Updated weights for policy 0, policy_version 35686 (0.0008) +[2026-06-07 03:35:20,310][528169] Updated weights for policy 0, policy_version 35696 (0.0008) +[2026-06-07 03:35:20,447][528169] Updated weights for policy 0, policy_version 35706 (0.0008) +[2026-06-07 03:35:20,568][528169] Updated weights for policy 0, policy_version 35716 (0.0008) +[2026-06-07 03:35:20,704][528169] Updated weights for policy 0, policy_version 35726 (0.0007) +[2026-06-07 03:35:21,295][528169] Updated weights for policy 0, policy_version 35736 (0.0007) +[2026-06-07 03:35:21,430][528169] Updated weights for policy 0, policy_version 35747 (0.0008) +[2026-06-07 03:35:21,590][528169] Updated weights for policy 0, policy_version 35759 (0.0008) +[2026-06-07 03:35:21,721][528169] Updated weights for policy 0, policy_version 35769 (0.0008) +[2026-06-07 03:35:21,876][528169] Updated weights for policy 0, policy_version 35780 (0.0008) +[2026-06-07 03:35:22,009][528169] Updated weights for policy 0, policy_version 35790 (0.0008) +[2026-06-07 03:35:22,640][528169] Updated weights for policy 0, policy_version 35800 (0.0008) +[2026-06-07 03:35:22,769][528169] Updated weights for policy 0, policy_version 35810 (0.0008) +[2026-06-07 03:35:22,900][528169] Updated weights for policy 0, policy_version 35820 (0.0008) +[2026-06-07 03:35:23,039][528169] Updated weights for policy 0, policy_version 35831 (0.0008) +[2026-06-07 03:35:23,178][528169] Updated weights for policy 0, policy_version 35841 (0.0008) +[2026-06-07 03:35:23,316][528169] Updated weights for policy 0, policy_version 35851 (0.0009) +[2026-06-07 03:35:23,910][528169] Updated weights for policy 0, policy_version 35861 (0.0008) +[2026-06-07 03:35:24,034][528169] Updated weights for policy 0, policy_version 35871 (0.0008) +[2026-06-07 03:35:24,181][528169] Updated weights for policy 0, policy_version 35882 (0.0008) +[2026-06-07 03:35:24,310][528169] Updated weights for policy 0, policy_version 35892 (0.0005) +[2026-06-07 03:35:24,441][528169] Updated weights for policy 0, policy_version 35902 (0.0004) +[2026-06-07 03:35:24,573][528169] Updated weights for policy 0, policy_version 35912 (0.0004) +[2026-06-07 03:35:25,103][527010] Fps is (10 sec: 26214.8, 60 sec: 24576.0, 300 sec: 24659.3). Total num frames: 18415616. Throughput: 0: 24752.4. Samples: 18422656. Policy #0 lag: (min: 63.0, avg: 76.6, max: 127.0) +[2026-06-07 03:35:25,103][527010] Avg episode reward: [(0, '113.010')] +[2026-06-07 03:35:25,183][528169] Updated weights for policy 0, policy_version 35922 (0.0005) +[2026-06-07 03:35:25,351][528169] Updated weights for policy 0, policy_version 35935 (0.0008) +[2026-06-07 03:35:25,480][528169] Updated weights for policy 0, policy_version 35945 (0.0008) +[2026-06-07 03:35:25,616][528169] Updated weights for policy 0, policy_version 35956 (0.0008) +[2026-06-07 03:35:25,757][528169] Updated weights for policy 0, policy_version 35966 (0.0008) +[2026-06-07 03:35:25,917][528169] Updated weights for policy 0, policy_version 35979 (0.0008) +[2026-06-07 03:35:26,558][528169] Updated weights for policy 0, policy_version 35989 (0.0008) +[2026-06-07 03:35:26,705][528169] Updated weights for policy 0, policy_version 36001 (0.0008) +[2026-06-07 03:35:26,857][528169] Updated weights for policy 0, policy_version 36013 (0.0008) +[2026-06-07 03:35:27,015][528169] Updated weights for policy 0, policy_version 36025 (0.0008) +[2026-06-07 03:35:27,160][528169] Updated weights for policy 0, policy_version 36035 (0.0008) +[2026-06-07 03:35:27,295][528169] Updated weights for policy 0, policy_version 36045 (0.0008) +[2026-06-07 03:35:27,938][528169] Updated weights for policy 0, policy_version 36056 (0.0008) +[2026-06-07 03:35:28,068][528169] Updated weights for policy 0, policy_version 36066 (0.0008) +[2026-06-07 03:35:28,191][528169] Updated weights for policy 0, policy_version 36076 (0.0008) +[2026-06-07 03:35:28,333][528169] Updated weights for policy 0, policy_version 36086 (0.0008) +[2026-06-07 03:35:28,463][528169] Updated weights for policy 0, policy_version 36096 (0.0008) +[2026-06-07 03:35:28,608][528169] Updated weights for policy 0, policy_version 36107 (0.0008) +[2026-06-07 03:35:29,228][528169] Updated weights for policy 0, policy_version 36118 (0.0008) +[2026-06-07 03:35:29,360][528169] Updated weights for policy 0, policy_version 36128 (0.0008) +[2026-06-07 03:35:29,496][528169] Updated weights for policy 0, policy_version 36139 (0.0008) +[2026-06-07 03:35:29,630][528169] Updated weights for policy 0, policy_version 36149 (0.0008) +[2026-06-07 03:35:29,776][528169] Updated weights for policy 0, policy_version 36160 (0.0008) +[2026-06-07 03:35:29,926][528169] Updated weights for policy 0, policy_version 36171 (0.0008) +[2026-06-07 03:35:30,103][527010] Fps is (10 sec: 26214.5, 60 sec: 25122.1, 300 sec: 24659.3). Total num frames: 18546688. Throughput: 0: 24760.9. Samples: 18492800. Policy #0 lag: (min: 63.0, avg: 76.6, max: 127.0) +[2026-06-07 03:35:30,104][527010] Avg episode reward: [(0, '126.033')] +[2026-06-07 03:35:30,108][528093] Saving new best policy, reward=126.033! +[2026-06-07 03:35:30,553][528169] Updated weights for policy 0, policy_version 36181 (0.0009) +[2026-06-07 03:35:30,682][528169] Updated weights for policy 0, policy_version 36191 (0.0008) +[2026-06-07 03:35:30,822][528169] Updated weights for policy 0, policy_version 36201 (0.0008) +[2026-06-07 03:35:30,967][528169] Updated weights for policy 0, policy_version 36212 (0.0009) +[2026-06-07 03:35:31,103][528169] Updated weights for policy 0, policy_version 36222 (0.0009) +[2026-06-07 03:35:31,234][528169] Updated weights for policy 0, policy_version 36232 (0.0008) +[2026-06-07 03:35:31,850][528169] Updated weights for policy 0, policy_version 36244 (0.0007) +[2026-06-07 03:35:31,994][528169] Updated weights for policy 0, policy_version 36255 (0.0008) +[2026-06-07 03:35:32,124][528169] Updated weights for policy 0, policy_version 36265 (0.0008) +[2026-06-07 03:35:32,255][528169] Updated weights for policy 0, policy_version 36275 (0.0009) +[2026-06-07 03:35:32,392][528169] Updated weights for policy 0, policy_version 36285 (0.0008) +[2026-06-07 03:35:32,542][528169] Updated weights for policy 0, policy_version 36296 (0.0008) +[2026-06-07 03:35:33,166][528169] Updated weights for policy 0, policy_version 36307 (0.0008) +[2026-06-07 03:35:33,309][528169] Updated weights for policy 0, policy_version 36318 (0.0008) +[2026-06-07 03:35:33,450][528169] Updated weights for policy 0, policy_version 36329 (0.0008) +[2026-06-07 03:35:33,585][528169] Updated weights for policy 0, policy_version 36339 (0.0008) +[2026-06-07 03:35:33,715][528169] Updated weights for policy 0, policy_version 36349 (0.0008) +[2026-06-07 03:35:33,876][528169] Updated weights for policy 0, policy_version 36361 (0.0008) +[2026-06-07 03:35:34,518][528169] Updated weights for policy 0, policy_version 36372 (0.0009) +[2026-06-07 03:35:34,657][528169] Updated weights for policy 0, policy_version 36383 (0.0008) +[2026-06-07 03:35:34,794][528169] Updated weights for policy 0, policy_version 36394 (0.0009) +[2026-06-07 03:35:34,946][528169] Updated weights for policy 0, policy_version 36405 (0.0008) +[2026-06-07 03:35:35,092][528169] Updated weights for policy 0, policy_version 36416 (0.0008) +[2026-06-07 03:35:35,103][527010] Fps is (10 sec: 22937.5, 60 sec: 24576.0, 300 sec: 24548.2). Total num frames: 18644992. Throughput: 0: 24760.9. Samples: 18648576. Policy #0 lag: (min: 63.0, avg: 76.6, max: 127.0) +[2026-06-07 03:35:35,103][527010] Avg episode reward: [(0, '103.873')] +[2026-06-07 03:35:35,229][528169] Updated weights for policy 0, policy_version 36427 (0.0008) +[2026-06-07 03:35:35,856][528169] Updated weights for policy 0, policy_version 36437 (0.0009) +[2026-06-07 03:35:35,980][528169] Updated weights for policy 0, policy_version 36447 (0.0008) +[2026-06-07 03:35:36,125][528169] Updated weights for policy 0, policy_version 36458 (0.0008) +[2026-06-07 03:35:36,256][528169] Updated weights for policy 0, policy_version 36468 (0.0008) +[2026-06-07 03:35:36,387][528169] Updated weights for policy 0, policy_version 36478 (0.0009) +[2026-06-07 03:35:36,524][528169] Updated weights for policy 0, policy_version 36488 (0.0010) +[2026-06-07 03:35:37,133][528169] Updated weights for policy 0, policy_version 36499 (0.0011) +[2026-06-07 03:35:37,255][528169] Updated weights for policy 0, policy_version 36509 (0.0008) +[2026-06-07 03:35:37,392][528169] Updated weights for policy 0, policy_version 36520 (0.0008) +[2026-06-07 03:35:37,547][528169] Updated weights for policy 0, policy_version 36532 (0.0008) +[2026-06-07 03:35:37,683][528169] Updated weights for policy 0, policy_version 36542 (0.0008) +[2026-06-07 03:35:37,818][528169] Updated weights for policy 0, policy_version 36552 (0.0008) +[2026-06-07 03:35:38,468][528169] Updated weights for policy 0, policy_version 36563 (0.0008) +[2026-06-07 03:35:38,619][528169] Updated weights for policy 0, policy_version 36574 (0.0009) +[2026-06-07 03:35:38,750][528169] Updated weights for policy 0, policy_version 36584 (0.0008) +[2026-06-07 03:35:38,883][528169] Updated weights for policy 0, policy_version 36594 (0.0008) +[2026-06-07 03:35:39,030][528169] Updated weights for policy 0, policy_version 36605 (0.0008) +[2026-06-07 03:35:39,158][528169] Updated weights for policy 0, policy_version 36615 (0.0008) +[2026-06-07 03:35:39,815][528169] Updated weights for policy 0, policy_version 36626 (0.0008) +[2026-06-07 03:35:39,959][528169] Updated weights for policy 0, policy_version 36638 (0.0008) +[2026-06-07 03:35:40,103][527010] Fps is (10 sec: 22937.6, 60 sec: 24576.0, 300 sec: 24659.3). Total num frames: 18776064. Throughput: 0: 24780.8. Samples: 18789504. Policy #0 lag: (min: 63.0, avg: 76.6, max: 127.0) +[2026-06-07 03:35:40,103][527010] Avg episode reward: [(0, '123.369')] +[2026-06-07 03:35:40,128][528169] Updated weights for policy 0, policy_version 36651 (0.0008) +[2026-06-07 03:35:40,304][528169] Updated weights for policy 0, policy_version 36664 (0.0009) +[2026-06-07 03:35:40,444][528169] Updated weights for policy 0, policy_version 36675 (0.0008) +[2026-06-07 03:35:40,575][528169] Updated weights for policy 0, policy_version 36685 (0.0008) +[2026-06-07 03:35:41,178][528169] Updated weights for policy 0, policy_version 36695 (0.0008) +[2026-06-07 03:35:41,309][528169] Updated weights for policy 0, policy_version 36705 (0.0008) +[2026-06-07 03:35:41,441][528169] Updated weights for policy 0, policy_version 36715 (0.0008) +[2026-06-07 03:35:41,590][528169] Updated weights for policy 0, policy_version 36726 (0.0008) +[2026-06-07 03:35:41,735][528169] Updated weights for policy 0, policy_version 36737 (0.0008) +[2026-06-07 03:35:41,882][528169] Updated weights for policy 0, policy_version 36748 (0.0008) +[2026-06-07 03:35:42,537][528169] Updated weights for policy 0, policy_version 36762 (0.0008) +[2026-06-07 03:35:42,671][528169] Updated weights for policy 0, policy_version 36772 (0.0008) +[2026-06-07 03:35:42,808][528169] Updated weights for policy 0, policy_version 36783 (0.0008) +[2026-06-07 03:35:42,958][528169] Updated weights for policy 0, policy_version 36793 (0.0009) +[2026-06-07 03:35:43,102][528169] Updated weights for policy 0, policy_version 36804 (0.0008) +[2026-06-07 03:35:43,238][528169] Updated weights for policy 0, policy_version 36814 (0.0008) +[2026-06-07 03:35:43,822][528169] Updated weights for policy 0, policy_version 36824 (0.0008) +[2026-06-07 03:35:43,981][528169] Updated weights for policy 0, policy_version 36836 (0.0008) +[2026-06-07 03:35:44,099][528169] Updated weights for policy 0, policy_version 36846 (0.0008) +[2026-06-07 03:35:44,261][528169] Updated weights for policy 0, policy_version 36858 (0.0009) +[2026-06-07 03:35:44,410][528169] Updated weights for policy 0, policy_version 36869 (0.0008) +[2026-06-07 03:35:44,555][528169] Updated weights for policy 0, policy_version 36880 (0.0008) +[2026-06-07 03:35:45,103][527010] Fps is (10 sec: 26214.1, 60 sec: 24576.0, 300 sec: 24659.3). Total num frames: 18907136. Throughput: 0: 24795.0. Samples: 18869248. Policy #0 lag: (min: 28.0, avg: 40.4, max: 92.0) +[2026-06-07 03:35:45,104][527010] Avg episode reward: [(0, '121.208')] +[2026-06-07 03:35:45,178][528169] Updated weights for policy 0, policy_version 36890 (0.0009) +[2026-06-07 03:35:45,307][528169] Updated weights for policy 0, policy_version 36900 (0.0008) +[2026-06-07 03:35:45,449][528169] Updated weights for policy 0, policy_version 36911 (0.0008) +[2026-06-07 03:35:45,588][528169] Updated weights for policy 0, policy_version 36921 (0.0009) +[2026-06-07 03:35:45,745][528169] Updated weights for policy 0, policy_version 36933 (0.0009) +[2026-06-07 03:35:45,876][528169] Updated weights for policy 0, policy_version 36943 (0.0008) +[2026-06-07 03:35:46,484][528169] Updated weights for policy 0, policy_version 36953 (0.0009) +[2026-06-07 03:35:46,627][528169] Updated weights for policy 0, policy_version 36964 (0.0009) +[2026-06-07 03:35:46,783][528169] Updated weights for policy 0, policy_version 36976 (0.0008) +[2026-06-07 03:35:46,931][528169] Updated weights for policy 0, policy_version 36987 (0.0009) +[2026-06-07 03:35:47,081][528169] Updated weights for policy 0, policy_version 36998 (0.0008) +[2026-06-07 03:35:47,209][528169] Updated weights for policy 0, policy_version 37008 (0.0009) +[2026-06-07 03:35:47,832][528169] Updated weights for policy 0, policy_version 37019 (0.0008) +[2026-06-07 03:35:47,962][528169] Updated weights for policy 0, policy_version 37029 (0.0008) +[2026-06-07 03:35:48,109][528169] Updated weights for policy 0, policy_version 37040 (0.0008) +[2026-06-07 03:35:48,240][528169] Updated weights for policy 0, policy_version 37050 (0.0009) +[2026-06-07 03:35:48,372][528169] Updated weights for policy 0, policy_version 37060 (0.0009) +[2026-06-07 03:35:48,505][528169] Updated weights for policy 0, policy_version 37070 (0.0008) +[2026-06-07 03:35:49,120][528169] Updated weights for policy 0, policy_version 37080 (0.0008) +[2026-06-07 03:35:49,261][528169] Updated weights for policy 0, policy_version 37091 (0.0007) +[2026-06-07 03:35:49,422][528169] Updated weights for policy 0, policy_version 37103 (0.0008) +[2026-06-07 03:35:49,565][528169] Updated weights for policy 0, policy_version 37114 (0.0008) +[2026-06-07 03:35:49,701][528169] Updated weights for policy 0, policy_version 37124 (0.0008) +[2026-06-07 03:35:49,834][528169] Updated weights for policy 0, policy_version 37134 (0.0008) +[2026-06-07 03:35:50,103][527010] Fps is (10 sec: 26214.2, 60 sec: 25122.1, 300 sec: 24659.3). Total num frames: 19038208. Throughput: 0: 24729.6. Samples: 19013760. Policy #0 lag: (min: 28.0, avg: 40.4, max: 92.0) +[2026-06-07 03:35:50,104][527010] Avg episode reward: [(0, '112.495')] +[2026-06-07 03:35:50,450][528169] Updated weights for policy 0, policy_version 37144 (0.0008) +[2026-06-07 03:35:50,578][528169] Updated weights for policy 0, policy_version 37154 (0.0008) +[2026-06-07 03:35:50,718][528169] Updated weights for policy 0, policy_version 37164 (0.0008) +[2026-06-07 03:35:50,858][528169] Updated weights for policy 0, policy_version 37175 (0.0008) +[2026-06-07 03:35:50,992][528169] Updated weights for policy 0, policy_version 37185 (0.0008) +[2026-06-07 03:35:51,126][528169] Updated weights for policy 0, policy_version 37195 (0.0008) +[2026-06-07 03:35:51,723][528169] Updated weights for policy 0, policy_version 37205 (0.0006) +[2026-06-07 03:35:51,860][528169] Updated weights for policy 0, policy_version 37216 (0.0004) +[2026-06-07 03:35:51,994][528169] Updated weights for policy 0, policy_version 37226 (0.0004) +[2026-06-07 03:35:52,128][528169] Updated weights for policy 0, policy_version 37236 (0.0004) +[2026-06-07 03:35:52,276][528169] Updated weights for policy 0, policy_version 37248 (0.0004) +[2026-06-07 03:35:52,401][528169] Updated weights for policy 0, policy_version 37258 (0.0004) +[2026-06-07 03:35:53,032][528169] Updated weights for policy 0, policy_version 37269 (0.0004) +[2026-06-07 03:35:53,166][528169] Updated weights for policy 0, policy_version 37280 (0.0004) +[2026-06-07 03:35:53,302][528169] Updated weights for policy 0, policy_version 37291 (0.0004) +[2026-06-07 03:35:53,455][528169] Updated weights for policy 0, policy_version 37303 (0.0007) +[2026-06-07 03:35:53,588][528169] Updated weights for policy 0, policy_version 37313 (0.0008) +[2026-06-07 03:35:53,723][528169] Updated weights for policy 0, policy_version 37323 (0.0008) +[2026-06-07 03:35:54,329][528169] Updated weights for policy 0, policy_version 37334 (0.0006) +[2026-06-07 03:35:54,469][528169] Updated weights for policy 0, policy_version 37345 (0.0005) +[2026-06-07 03:35:54,615][528169] Updated weights for policy 0, policy_version 37356 (0.0005) +[2026-06-07 03:35:54,743][528169] Updated weights for policy 0, policy_version 37366 (0.0005) +[2026-06-07 03:35:54,892][528169] Updated weights for policy 0, policy_version 37377 (0.0005) +[2026-06-07 03:35:55,016][528169] Updated weights for policy 0, policy_version 37387 (0.0005) +[2026-06-07 03:35:55,103][527010] Fps is (10 sec: 26214.6, 60 sec: 25122.1, 300 sec: 24659.3). Total num frames: 19169280. Throughput: 0: 24806.5. Samples: 19170432. Policy #0 lag: (min: 28.0, avg: 40.4, max: 92.0) +[2026-06-07 03:35:55,103][527010] Avg episode reward: [(0, '115.593')] +[2026-06-07 03:35:55,640][528169] Updated weights for policy 0, policy_version 37397 (0.0007) +[2026-06-07 03:35:55,801][528169] Updated weights for policy 0, policy_version 37409 (0.0009) +[2026-06-07 03:35:55,934][528169] Updated weights for policy 0, policy_version 37420 (0.0009) +[2026-06-07 03:35:56,092][528169] Updated weights for policy 0, policy_version 37432 (0.0008) +[2026-06-07 03:35:56,247][528169] Updated weights for policy 0, policy_version 37444 (0.0008) +[2026-06-07 03:35:56,384][528169] Updated weights for policy 0, policy_version 37455 (0.0008) +[2026-06-07 03:35:57,018][528169] Updated weights for policy 0, policy_version 37466 (0.0008) +[2026-06-07 03:35:57,141][528169] Updated weights for policy 0, policy_version 37476 (0.0008) +[2026-06-07 03:35:57,269][528169] Updated weights for policy 0, policy_version 37486 (0.0008) +[2026-06-07 03:35:57,405][528169] Updated weights for policy 0, policy_version 37497 (0.0008) +[2026-06-07 03:35:57,552][528169] Updated weights for policy 0, policy_version 37508 (0.0008) +[2026-06-07 03:35:57,688][528169] Updated weights for policy 0, policy_version 37518 (0.0008) +[2026-06-07 03:35:58,323][528169] Updated weights for policy 0, policy_version 37529 (0.0008) +[2026-06-07 03:35:58,466][528169] Updated weights for policy 0, policy_version 37540 (0.0008) +[2026-06-07 03:35:58,624][528169] Updated weights for policy 0, policy_version 37551 (0.0008) +[2026-06-07 03:35:58,760][528169] Updated weights for policy 0, policy_version 37562 (0.0008) +[2026-06-07 03:35:58,926][528169] Updated weights for policy 0, policy_version 37574 (0.0008) +[2026-06-07 03:35:59,053][528169] Updated weights for policy 0, policy_version 37584 (0.0008) +[2026-06-07 03:35:59,664][528169] Updated weights for policy 0, policy_version 37594 (0.0008) +[2026-06-07 03:35:59,806][528169] Updated weights for policy 0, policy_version 37605 (0.0008) +[2026-06-07 03:35:59,959][528169] Updated weights for policy 0, policy_version 37617 (0.0008) +[2026-06-07 03:36:00,103][527010] Fps is (10 sec: 22937.7, 60 sec: 24576.0, 300 sec: 24659.3). Total num frames: 19267584. Throughput: 0: 24760.9. Samples: 19240448. Policy #0 lag: (min: 28.0, avg: 40.4, max: 92.0) +[2026-06-07 03:36:00,103][528169] Updated weights for policy 0, policy_version 37628 (0.0009) +[2026-06-07 03:36:00,104][527010] Avg episode reward: [(0, '111.516')] +[2026-06-07 03:36:00,239][528169] Updated weights for policy 0, policy_version 37638 (0.0009) +[2026-06-07 03:36:00,368][528169] Updated weights for policy 0, policy_version 37648 (0.0008) +[2026-06-07 03:36:00,981][528169] Updated weights for policy 0, policy_version 37658 (0.0009) +[2026-06-07 03:36:01,109][528169] Updated weights for policy 0, policy_version 37668 (0.0011) +[2026-06-07 03:36:01,251][528169] Updated weights for policy 0, policy_version 37679 (0.0009) +[2026-06-07 03:36:01,383][528169] Updated weights for policy 0, policy_version 37689 (0.0004) +[2026-06-07 03:36:01,514][528169] Updated weights for policy 0, policy_version 37699 (0.0006) +[2026-06-07 03:36:01,673][528169] Updated weights for policy 0, policy_version 37711 (0.0008) +[2026-06-07 03:36:02,301][528169] Updated weights for policy 0, policy_version 37722 (0.0009) +[2026-06-07 03:36:02,426][528169] Updated weights for policy 0, policy_version 37732 (0.0008) +[2026-06-07 03:36:02,557][528169] Updated weights for policy 0, policy_version 37742 (0.0008) +[2026-06-07 03:36:02,687][528169] Updated weights for policy 0, policy_version 37752 (0.0008) +[2026-06-07 03:36:02,845][528169] Updated weights for policy 0, policy_version 37764 (0.0008) +[2026-06-07 03:36:02,987][528169] Updated weights for policy 0, policy_version 37775 (0.0008) +[2026-06-07 03:36:03,590][528169] Updated weights for policy 0, policy_version 37785 (0.0008) +[2026-06-07 03:36:03,736][528169] Updated weights for policy 0, policy_version 37796 (0.0008) +[2026-06-07 03:36:03,902][528169] Updated weights for policy 0, policy_version 37809 (0.0008) +[2026-06-07 03:36:04,030][528169] Updated weights for policy 0, policy_version 37819 (0.0008) +[2026-06-07 03:36:04,190][528169] Updated weights for policy 0, policy_version 37831 (0.0008) +[2026-06-07 03:36:04,791][528169] Updated weights for policy 0, policy_version 37841 (0.0008) +[2026-06-07 03:36:04,920][528169] Updated weights for policy 0, policy_version 37851 (0.0008) +[2026-06-07 03:36:05,063][528169] Updated weights for policy 0, policy_version 37862 (0.0008) +[2026-06-07 03:36:05,103][527010] Fps is (10 sec: 22937.5, 60 sec: 24576.0, 300 sec: 24659.3). Total num frames: 19398656. Throughput: 0: 24908.8. Samples: 19391360. Policy #0 lag: (min: 28.0, avg: 40.4, max: 92.0) +[2026-06-07 03:36:05,104][527010] Avg episode reward: [(0, '126.580')] +[2026-06-07 03:36:05,195][528169] Updated weights for policy 0, policy_version 37872 (0.0008) +[2026-06-07 03:36:05,342][528169] Updated weights for policy 0, policy_version 37883 (0.0008) +[2026-06-07 03:36:05,488][528169] Updated weights for policy 0, policy_version 37894 (0.0008) +[2026-06-07 03:36:05,611][528093] Saving new best policy, reward=126.580! +[2026-06-07 03:36:05,613][528169] Updated weights for policy 0, policy_version 37904 (0.0008) +[2026-06-07 03:36:06,220][528169] Updated weights for policy 0, policy_version 37914 (0.0008) +[2026-06-07 03:36:06,366][528169] Updated weights for policy 0, policy_version 37925 (0.0008) +[2026-06-07 03:36:06,510][528169] Updated weights for policy 0, policy_version 37936 (0.0008) +[2026-06-07 03:36:06,649][528169] Updated weights for policy 0, policy_version 37947 (0.0008) +[2026-06-07 03:36:06,810][528169] Updated weights for policy 0, policy_version 37959 (0.0008) +[2026-06-07 03:36:07,447][528169] Updated weights for policy 0, policy_version 37969 (0.0008) +[2026-06-07 03:36:07,571][528169] Updated weights for policy 0, policy_version 37979 (0.0008) +[2026-06-07 03:36:07,722][528169] Updated weights for policy 0, policy_version 37990 (0.0008) +[2026-06-07 03:36:07,849][528169] Updated weights for policy 0, policy_version 38000 (0.0008) +[2026-06-07 03:36:08,010][528169] Updated weights for policy 0, policy_version 38013 (0.0008) +[2026-06-07 03:36:08,190][528169] Updated weights for policy 0, policy_version 38026 (0.0007) +[2026-06-07 03:36:08,862][528169] Updated weights for policy 0, policy_version 38039 (0.0008) +[2026-06-07 03:36:09,008][528169] Updated weights for policy 0, policy_version 38050 (0.0008) +[2026-06-07 03:36:09,157][528169] Updated weights for policy 0, policy_version 38061 (0.0008) +[2026-06-07 03:36:09,336][528169] Updated weights for policy 0, policy_version 38075 (0.0008) +[2026-06-07 03:36:09,475][528169] Updated weights for policy 0, policy_version 38086 (0.0008) +[2026-06-07 03:36:10,103][527010] Fps is (10 sec: 26214.3, 60 sec: 25122.2, 300 sec: 24659.3). Total num frames: 19529728. Throughput: 0: 24851.8. Samples: 19540992. Policy #0 lag: (min: 63.0, avg: 75.8, max: 127.0) +[2026-06-07 03:36:10,104][527010] Avg episode reward: [(0, '129.785')] +[2026-06-07 03:36:10,130][528169] Updated weights for policy 0, policy_version 38098 (0.0009) +[2026-06-07 03:36:10,249][528169] Updated weights for policy 0, policy_version 38108 (0.0008) +[2026-06-07 03:36:10,414][528169] Updated weights for policy 0, policy_version 38121 (0.0009) +[2026-06-07 03:36:10,579][528169] Updated weights for policy 0, policy_version 38134 (0.0008) +[2026-06-07 03:36:10,702][528169] Updated weights for policy 0, policy_version 38144 (0.0008) +[2026-06-07 03:36:10,843][528169] Updated weights for policy 0, policy_version 38155 (0.0008) +[2026-06-07 03:36:10,899][528093] Saving new best policy, reward=129.785! +[2026-06-07 03:36:11,487][528169] Updated weights for policy 0, policy_version 38166 (0.0006) +[2026-06-07 03:36:11,613][528169] Updated weights for policy 0, policy_version 38176 (0.0006) +[2026-06-07 03:36:11,774][528169] Updated weights for policy 0, policy_version 38188 (0.0008) +[2026-06-07 03:36:11,907][528169] Updated weights for policy 0, policy_version 38198 (0.0008) +[2026-06-07 03:36:12,058][528169] Updated weights for policy 0, policy_version 38210 (0.0008) +[2026-06-07 03:36:12,209][528169] Updated weights for policy 0, policy_version 38221 (0.0008) +[2026-06-07 03:36:12,824][528169] Updated weights for policy 0, policy_version 38232 (0.0008) +[2026-06-07 03:36:12,969][528169] Updated weights for policy 0, policy_version 38243 (0.0008) +[2026-06-07 03:36:13,133][528169] Updated weights for policy 0, policy_version 38256 (0.0008) +[2026-06-07 03:36:13,285][528169] Updated weights for policy 0, policy_version 38268 (0.0008) +[2026-06-07 03:36:13,448][528169] Updated weights for policy 0, policy_version 38281 (0.0008) +[2026-06-07 03:36:14,124][528169] Updated weights for policy 0, policy_version 38293 (0.0009) +[2026-06-07 03:36:14,265][528169] Updated weights for policy 0, policy_version 38304 (0.0008) +[2026-06-07 03:36:14,383][528169] Updated weights for policy 0, policy_version 38314 (0.0008) +[2026-06-07 03:36:14,524][528169] Updated weights for policy 0, policy_version 38325 (0.0008) +[2026-06-07 03:36:14,663][528169] Updated weights for policy 0, policy_version 38335 (0.0008) +[2026-06-07 03:36:14,787][528169] Updated weights for policy 0, policy_version 38345 (0.0008) +[2026-06-07 03:36:15,103][527010] Fps is (10 sec: 26214.4, 60 sec: 25122.2, 300 sec: 24659.3). Total num frames: 19660800. Throughput: 0: 24869.0. Samples: 19611904. Policy #0 lag: (min: 63.0, avg: 75.8, max: 127.0) +[2026-06-07 03:36:15,104][527010] Avg episode reward: [(0, '121.773')] +[2026-06-07 03:36:15,413][528169] Updated weights for policy 0, policy_version 38355 (0.0008) +[2026-06-07 03:36:15,540][528169] Updated weights for policy 0, policy_version 38365 (0.0008) +[2026-06-07 03:36:15,706][528169] Updated weights for policy 0, policy_version 38377 (0.0008) +[2026-06-07 03:36:15,839][528169] Updated weights for policy 0, policy_version 38387 (0.0008) +[2026-06-07 03:36:15,978][528169] Updated weights for policy 0, policy_version 38398 (0.0008) +[2026-06-07 03:36:16,112][528169] Updated weights for policy 0, policy_version 38408 (0.0008) +[2026-06-07 03:36:16,732][528169] Updated weights for policy 0, policy_version 38420 (0.0007) +[2026-06-07 03:36:16,877][528169] Updated weights for policy 0, policy_version 38431 (0.0008) +[2026-06-07 03:36:17,022][528169] Updated weights for policy 0, policy_version 38442 (0.0009) +[2026-06-07 03:36:17,171][528169] Updated weights for policy 0, policy_version 38453 (0.0008) +[2026-06-07 03:36:17,308][528169] Updated weights for policy 0, policy_version 38464 (0.0008) +[2026-06-07 03:36:17,441][528169] Updated weights for policy 0, policy_version 38474 (0.0008) +[2026-06-07 03:36:18,043][528169] Updated weights for policy 0, policy_version 38484 (0.0009) +[2026-06-07 03:36:18,186][528169] Updated weights for policy 0, policy_version 38495 (0.0008) +[2026-06-07 03:36:18,317][528169] Updated weights for policy 0, policy_version 38505 (0.0008) +[2026-06-07 03:36:18,459][528169] Updated weights for policy 0, policy_version 38516 (0.0008) +[2026-06-07 03:36:18,645][528169] Updated weights for policy 0, policy_version 38530 (0.0009) +[2026-06-07 03:36:19,317][528169] Updated weights for policy 0, policy_version 38545 (0.0008) +[2026-06-07 03:36:19,447][528169] Updated weights for policy 0, policy_version 38555 (0.0008) +[2026-06-07 03:36:19,598][528169] Updated weights for policy 0, policy_version 38567 (0.0008) +[2026-06-07 03:36:19,737][528169] Updated weights for policy 0, policy_version 38578 (0.0008) +[2026-06-07 03:36:19,885][528169] Updated weights for policy 0, policy_version 38589 (0.0008) +[2026-06-07 03:36:20,011][528169] Updated weights for policy 0, policy_version 38599 (0.0008) +[2026-06-07 03:36:20,103][527010] Fps is (10 sec: 22937.7, 60 sec: 24576.0, 300 sec: 24659.3). Total num frames: 19759104. Throughput: 0: 24869.0. Samples: 19767680. Policy #0 lag: (min: 63.0, avg: 75.8, max: 127.0) +[2026-06-07 03:36:20,103][527010] Avg episode reward: [(0, '127.795')] +[2026-06-07 03:36:20,666][528169] Updated weights for policy 0, policy_version 38610 (0.0009) +[2026-06-07 03:36:20,797][528169] Updated weights for policy 0, policy_version 38620 (0.0008) +[2026-06-07 03:36:20,949][528169] Updated weights for policy 0, policy_version 38632 (0.0008) +[2026-06-07 03:36:21,106][528169] Updated weights for policy 0, policy_version 38644 (0.0008) +[2026-06-07 03:36:21,242][528169] Updated weights for policy 0, policy_version 38654 (0.0006) +[2026-06-07 03:36:21,374][528169] Updated weights for policy 0, policy_version 38664 (0.0006) +[2026-06-07 03:36:22,008][528169] Updated weights for policy 0, policy_version 38676 (0.0009) +[2026-06-07 03:36:22,138][528169] Updated weights for policy 0, policy_version 38686 (0.0008) +[2026-06-07 03:36:22,276][528169] Updated weights for policy 0, policy_version 38697 (0.0008) +[2026-06-07 03:36:22,425][528169] Updated weights for policy 0, policy_version 38708 (0.0008) +[2026-06-07 03:36:22,576][528169] Updated weights for policy 0, policy_version 38719 (0.0009) +[2026-06-07 03:36:22,714][528169] Updated weights for policy 0, policy_version 38729 (0.0009) +[2026-06-07 03:36:23,340][528169] Updated weights for policy 0, policy_version 38739 (0.0009) +[2026-06-07 03:36:23,473][528169] Updated weights for policy 0, policy_version 38749 (0.0009) +[2026-06-07 03:36:23,597][528169] Updated weights for policy 0, policy_version 38759 (0.0009) +[2026-06-07 03:36:23,732][528169] Updated weights for policy 0, policy_version 38769 (0.0010) +[2026-06-07 03:36:23,861][528169] Updated weights for policy 0, policy_version 38779 (0.0008) +[2026-06-07 03:36:23,997][528169] Updated weights for policy 0, policy_version 38789 (0.0008) +[2026-06-07 03:36:24,146][528169] Updated weights for policy 0, policy_version 38800 (0.0007) +[2026-06-07 03:36:24,734][528169] Updated weights for policy 0, policy_version 38810 (0.0009) +[2026-06-07 03:36:24,871][528169] Updated weights for policy 0, policy_version 38820 (0.0011) +[2026-06-07 03:36:25,019][528169] Updated weights for policy 0, policy_version 38831 (0.0009) +[2026-06-07 03:36:25,103][527010] Fps is (10 sec: 22937.5, 60 sec: 24576.0, 300 sec: 24659.3). Total num frames: 19890176. Throughput: 0: 24954.3. Samples: 19912448. Policy #0 lag: (min: 63.0, avg: 75.8, max: 127.0) +[2026-06-07 03:36:25,104][527010] Avg episode reward: [(0, '142.349')] +[2026-06-07 03:36:25,177][528169] Updated weights for policy 0, policy_version 38843 (0.0008) +[2026-06-07 03:36:25,322][528169] Updated weights for policy 0, policy_version 38854 (0.0008) +[2026-06-07 03:36:25,448][528093] Saving new best policy, reward=142.349! +[2026-06-07 03:36:25,450][528169] Updated weights for policy 0, policy_version 38864 (0.0009) +[2026-06-07 03:36:26,036][528169] Updated weights for policy 0, policy_version 38874 (0.0009) +[2026-06-07 03:36:26,176][528169] Updated weights for policy 0, policy_version 38884 (0.0008) +[2026-06-07 03:36:26,316][528169] Updated weights for policy 0, policy_version 38895 (0.0008) +[2026-06-07 03:36:26,460][528169] Updated weights for policy 0, policy_version 38906 (0.0008) +[2026-06-07 03:36:26,601][528169] Updated weights for policy 0, policy_version 38917 (0.0008) +[2026-06-07 03:36:26,736][528169] Updated weights for policy 0, policy_version 38927 (0.0008) +[2026-06-07 03:36:27,342][528169] Updated weights for policy 0, policy_version 38938 (0.0009) +[2026-06-07 03:36:27,465][528169] Updated weights for policy 0, policy_version 38948 (0.0008) +[2026-06-07 03:36:27,605][528169] Updated weights for policy 0, policy_version 38959 (0.0009) +[2026-06-07 03:36:27,736][528169] Updated weights for policy 0, policy_version 38969 (0.0008) +[2026-06-07 03:36:27,877][528169] Updated weights for policy 0, policy_version 38979 (0.0009) +[2026-06-07 03:36:28,030][528169] Updated weights for policy 0, policy_version 38990 (0.0008) +[2026-06-07 03:36:28,624][528169] Updated weights for policy 0, policy_version 39001 (0.0008) +[2026-06-07 03:36:28,755][528169] Updated weights for policy 0, policy_version 39011 (0.0008) +[2026-06-07 03:36:28,910][528169] Updated weights for policy 0, policy_version 39023 (0.0009) +[2026-06-07 03:36:29,043][528169] Updated weights for policy 0, policy_version 39033 (0.0005) +[2026-06-07 03:36:29,181][528169] Updated weights for policy 0, policy_version 39043 (0.0005) +[2026-06-07 03:36:29,330][528169] Updated weights for policy 0, policy_version 39054 (0.0005) +[2026-06-07 03:36:29,958][528169] Updated weights for policy 0, policy_version 39066 (0.0009) +[2026-06-07 03:36:30,089][528169] Updated weights for policy 0, policy_version 39076 (0.0008) +[2026-06-07 03:36:30,103][527010] Fps is (10 sec: 26214.3, 60 sec: 24576.0, 300 sec: 24659.3). Total num frames: 20021248. Throughput: 0: 24991.3. Samples: 19993856. Policy #0 lag: (min: 63.0, avg: 75.8, max: 127.0) +[2026-06-07 03:36:30,104][527010] Avg episode reward: [(0, '120.863')] +[2026-06-07 03:36:30,241][528169] Updated weights for policy 0, policy_version 39087 (0.0008) +[2026-06-07 03:36:30,367][528169] Updated weights for policy 0, policy_version 39097 (0.0008) +[2026-06-07 03:36:30,520][528169] Updated weights for policy 0, policy_version 39108 (0.0008) +[2026-06-07 03:36:30,650][528169] Updated weights for policy 0, policy_version 39118 (0.0008) +[2026-06-07 03:36:31,307][528169] Updated weights for policy 0, policy_version 39130 (0.0009) +[2026-06-07 03:36:31,434][528169] Updated weights for policy 0, policy_version 39140 (0.0008) +[2026-06-07 03:36:31,566][528169] Updated weights for policy 0, policy_version 39150 (0.0008) +[2026-06-07 03:36:31,706][528169] Updated weights for policy 0, policy_version 39160 (0.0008) +[2026-06-07 03:36:31,850][528169] Updated weights for policy 0, policy_version 39171 (0.0008) +[2026-06-07 03:36:31,977][528169] Updated weights for policy 0, policy_version 39181 (0.0008) +[2026-06-07 03:36:32,575][528169] Updated weights for policy 0, policy_version 39191 (0.0008) +[2026-06-07 03:36:32,721][528169] Updated weights for policy 0, policy_version 39202 (0.0008) +[2026-06-07 03:36:32,871][528169] Updated weights for policy 0, policy_version 39213 (0.0008) +[2026-06-07 03:36:33,037][528169] Updated weights for policy 0, policy_version 39226 (0.0008) +[2026-06-07 03:36:33,172][528169] Updated weights for policy 0, policy_version 39236 (0.0008) +[2026-06-07 03:36:33,315][528169] Updated weights for policy 0, policy_version 39247 (0.0008) +[2026-06-07 03:36:33,945][528169] Updated weights for policy 0, policy_version 39257 (0.0008) +[2026-06-07 03:36:34,074][528169] Updated weights for policy 0, policy_version 39267 (0.0008) +[2026-06-07 03:36:34,204][528169] Updated weights for policy 0, policy_version 39277 (0.0008) +[2026-06-07 03:36:34,337][528169] Updated weights for policy 0, policy_version 39287 (0.0008) +[2026-06-07 03:36:34,473][528169] Updated weights for policy 0, policy_version 39297 (0.0008) +[2026-06-07 03:36:34,610][528169] Updated weights for policy 0, policy_version 39307 (0.0008) +[2026-06-07 03:36:35,103][527010] Fps is (10 sec: 26214.6, 60 sec: 25122.1, 300 sec: 24659.3). Total num frames: 20152320. Throughput: 0: 24923.1. Samples: 20135296. Policy #0 lag: (min: 27.0, avg: 59.4, max: 91.0) +[2026-06-07 03:36:35,104][527010] Avg episode reward: [(0, '114.787')] +[2026-06-07 03:36:35,216][528169] Updated weights for policy 0, policy_version 39317 (0.0008) +[2026-06-07 03:36:35,364][528169] Updated weights for policy 0, policy_version 39328 (0.0008) +[2026-06-07 03:36:35,486][528169] Updated weights for policy 0, policy_version 39338 (0.0008) +[2026-06-07 03:36:35,634][528169] Updated weights for policy 0, policy_version 39349 (0.0008) +[2026-06-07 03:36:35,763][528169] Updated weights for policy 0, policy_version 39359 (0.0008) +[2026-06-07 03:36:35,912][528169] Updated weights for policy 0, policy_version 39370 (0.0008) +[2026-06-07 03:36:36,555][528169] Updated weights for policy 0, policy_version 39382 (0.0008) +[2026-06-07 03:36:36,696][528169] Updated weights for policy 0, policy_version 39393 (0.0008) +[2026-06-07 03:36:36,836][528169] Updated weights for policy 0, policy_version 39404 (0.0008) +[2026-06-07 03:36:36,986][528169] Updated weights for policy 0, policy_version 39415 (0.0008) +[2026-06-07 03:36:37,116][528169] Updated weights for policy 0, policy_version 39425 (0.0008) +[2026-06-07 03:36:37,268][528169] Updated weights for policy 0, policy_version 39436 (0.0008) +[2026-06-07 03:36:37,854][528169] Updated weights for policy 0, policy_version 39446 (0.0008) +[2026-06-07 03:36:37,989][528169] Updated weights for policy 0, policy_version 39456 (0.0008) +[2026-06-07 03:36:38,121][528169] Updated weights for policy 0, policy_version 39466 (0.0008) +[2026-06-07 03:36:38,259][528169] Updated weights for policy 0, policy_version 39476 (0.0008) +[2026-06-07 03:36:38,379][528169] Updated weights for policy 0, policy_version 39486 (0.0008) +[2026-06-07 03:36:38,517][528169] Updated weights for policy 0, policy_version 39496 (0.0008) +[2026-06-07 03:36:39,178][528169] Updated weights for policy 0, policy_version 39508 (0.0008) +[2026-06-07 03:36:39,325][528169] Updated weights for policy 0, policy_version 39519 (0.0008) +[2026-06-07 03:36:39,451][528169] Updated weights for policy 0, policy_version 39529 (0.0008) +[2026-06-07 03:36:39,585][528169] Updated weights for policy 0, policy_version 39539 (0.0008) +[2026-06-07 03:36:39,720][528169] Updated weights for policy 0, policy_version 39549 (0.0008) +[2026-06-07 03:36:39,848][528169] Updated weights for policy 0, policy_version 39559 (0.0008) +[2026-06-07 03:36:40,103][527010] Fps is (10 sec: 26214.6, 60 sec: 25122.2, 300 sec: 24770.4). Total num frames: 20283392. Throughput: 0: 24928.7. Samples: 20292224. Policy #0 lag: (min: 27.0, avg: 59.4, max: 91.0) +[2026-06-07 03:36:40,103][527010] Avg episode reward: [(0, '137.075')] +[2026-06-07 03:36:40,448][528169] Updated weights for policy 0, policy_version 39569 (0.0008) +[2026-06-07 03:36:40,571][528169] Updated weights for policy 0, policy_version 39579 (0.0008) +[2026-06-07 03:36:40,703][528169] Updated weights for policy 0, policy_version 39589 (0.0008) +[2026-06-07 03:36:40,859][528169] Updated weights for policy 0, policy_version 39601 (0.0009) +[2026-06-07 03:36:41,011][528169] Updated weights for policy 0, policy_version 39612 (0.0008) +[2026-06-07 03:36:41,144][528169] Updated weights for policy 0, policy_version 39622 (0.0008) +[2026-06-07 03:36:41,270][528169] Updated weights for policy 0, policy_version 39632 (0.0008) +[2026-06-07 03:36:41,897][528169] Updated weights for policy 0, policy_version 39642 (0.0008) +[2026-06-07 03:36:42,036][528169] Updated weights for policy 0, policy_version 39652 (0.0008) +[2026-06-07 03:36:42,169][528169] Updated weights for policy 0, policy_version 39662 (0.0008) +[2026-06-07 03:36:42,293][528169] Updated weights for policy 0, policy_version 39672 (0.0008) +[2026-06-07 03:36:42,430][528169] Updated weights for policy 0, policy_version 39682 (0.0008) +[2026-06-07 03:36:42,560][528169] Updated weights for policy 0, policy_version 39692 (0.0008) +[2026-06-07 03:36:43,187][528169] Updated weights for policy 0, policy_version 39703 (0.0008) +[2026-06-07 03:36:43,316][528169] Updated weights for policy 0, policy_version 39713 (0.0008) +[2026-06-07 03:36:43,478][528169] Updated weights for policy 0, policy_version 39726 (0.0008) +[2026-06-07 03:36:43,650][528169] Updated weights for policy 0, policy_version 39739 (0.0008) +[2026-06-07 03:36:43,794][528169] Updated weights for policy 0, policy_version 39751 (0.0008) +[2026-06-07 03:36:44,453][528169] Updated weights for policy 0, policy_version 39763 (0.0008) +[2026-06-07 03:36:44,588][528169] Updated weights for policy 0, policy_version 39774 (0.0008) +[2026-06-07 03:36:44,751][528169] Updated weights for policy 0, policy_version 39787 (0.0008) +[2026-06-07 03:36:44,918][528169] Updated weights for policy 0, policy_version 39800 (0.0008) +[2026-06-07 03:36:45,066][528169] Updated weights for policy 0, policy_version 39812 (0.0008) +[2026-06-07 03:36:45,103][527010] Fps is (10 sec: 22937.5, 60 sec: 24576.0, 300 sec: 24659.3). Total num frames: 20381696. Throughput: 0: 24928.7. Samples: 20362240. Policy #0 lag: (min: 27.0, avg: 59.4, max: 91.0) +[2026-06-07 03:36:45,104][527010] Avg episode reward: [(0, '148.802')] +[2026-06-07 03:36:45,211][528169] Updated weights for policy 0, policy_version 39823 (0.0008) +[2026-06-07 03:36:45,217][528093] Saving new best policy, reward=148.802! +[2026-06-07 03:36:45,835][528169] Updated weights for policy 0, policy_version 39833 (0.0008) +[2026-06-07 03:36:45,978][528169] Updated weights for policy 0, policy_version 39844 (0.0008) +[2026-06-07 03:36:46,142][528169] Updated weights for policy 0, policy_version 39857 (0.0009) +[2026-06-07 03:36:46,288][528169] Updated weights for policy 0, policy_version 39868 (0.0007) +[2026-06-07 03:36:46,422][528169] Updated weights for policy 0, policy_version 39878 (0.0005) +[2026-06-07 03:36:46,552][528169] Updated weights for policy 0, policy_version 39888 (0.0007) +[2026-06-07 03:36:47,192][528169] Updated weights for policy 0, policy_version 39899 (0.0008) +[2026-06-07 03:36:47,334][528169] Updated weights for policy 0, policy_version 39910 (0.0008) +[2026-06-07 03:36:47,480][528169] Updated weights for policy 0, policy_version 39921 (0.0008) +[2026-06-07 03:36:47,613][528169] Updated weights for policy 0, policy_version 39931 (0.0008) +[2026-06-07 03:36:47,759][528169] Updated weights for policy 0, policy_version 39942 (0.0008) +[2026-06-07 03:36:47,890][528169] Updated weights for policy 0, policy_version 39952 (0.0008) +[2026-06-07 03:36:48,518][528169] Updated weights for policy 0, policy_version 39963 (0.0008) +[2026-06-07 03:36:48,680][528169] Updated weights for policy 0, policy_version 39975 (0.0008) +[2026-06-07 03:36:48,809][528169] Updated weights for policy 0, policy_version 39985 (0.0008) +[2026-06-07 03:36:48,958][528169] Updated weights for policy 0, policy_version 39996 (0.0008) +[2026-06-07 03:36:49,094][528169] Updated weights for policy 0, policy_version 40006 (0.0008) +[2026-06-07 03:36:49,736][528169] Updated weights for policy 0, policy_version 40017 (0.0008) +[2026-06-07 03:36:49,865][528169] Updated weights for policy 0, policy_version 40027 (0.0008) +[2026-06-07 03:36:50,002][528169] Updated weights for policy 0, policy_version 40038 (0.0008) +[2026-06-07 03:36:50,103][527010] Fps is (10 sec: 22937.4, 60 sec: 24576.0, 300 sec: 24659.3). Total num frames: 20512768. Throughput: 0: 24925.9. Samples: 20513024. Policy #0 lag: (min: 27.0, avg: 59.4, max: 91.0) +[2026-06-07 03:36:50,104][527010] Avg episode reward: [(0, '110.445')] +[2026-06-07 03:36:50,142][528169] Updated weights for policy 0, policy_version 40048 (0.0008) +[2026-06-07 03:36:50,279][528169] Updated weights for policy 0, policy_version 40058 (0.0008) +[2026-06-07 03:36:50,407][528169] Updated weights for policy 0, policy_version 40068 (0.0008) +[2026-06-07 03:36:50,540][528169] Updated weights for policy 0, policy_version 40078 (0.0008) +[2026-06-07 03:36:51,119][528169] Updated weights for policy 0, policy_version 40088 (0.0008) +[2026-06-07 03:36:51,257][528169] Updated weights for policy 0, policy_version 40099 (0.0008) +[2026-06-07 03:36:51,417][528169] Updated weights for policy 0, policy_version 40110 (0.0008) +[2026-06-07 03:36:51,539][528169] Updated weights for policy 0, policy_version 40120 (0.0008) +[2026-06-07 03:36:51,669][528169] Updated weights for policy 0, policy_version 40130 (0.0007) +[2026-06-07 03:36:51,835][528169] Updated weights for policy 0, policy_version 40142 (0.0004) +[2026-06-07 03:36:52,468][528169] Updated weights for policy 0, policy_version 40154 (0.0008) +[2026-06-07 03:36:52,582][528169] Updated weights for policy 0, policy_version 40164 (0.0008) +[2026-06-07 03:36:52,724][528169] Updated weights for policy 0, policy_version 40174 (0.0008) +[2026-06-07 03:36:52,882][528169] Updated weights for policy 0, policy_version 40186 (0.0009) +[2026-06-07 03:36:53,019][528169] Updated weights for policy 0, policy_version 40197 (0.0008) +[2026-06-07 03:36:53,168][528169] Updated weights for policy 0, policy_version 40208 (0.0008) +[2026-06-07 03:36:53,785][528169] Updated weights for policy 0, policy_version 40218 (0.0008) +[2026-06-07 03:36:53,922][528169] Updated weights for policy 0, policy_version 40228 (0.0008) +[2026-06-07 03:36:54,050][528169] Updated weights for policy 0, policy_version 40238 (0.0008) +[2026-06-07 03:36:54,200][528169] Updated weights for policy 0, policy_version 40249 (0.0009) +[2026-06-07 03:36:54,337][528169] Updated weights for policy 0, policy_version 40259 (0.0008) +[2026-06-07 03:36:54,485][528169] Updated weights for policy 0, policy_version 40270 (0.0008) +[2026-06-07 03:36:55,103][527010] Fps is (10 sec: 26214.5, 60 sec: 24576.0, 300 sec: 24659.3). Total num frames: 20643840. Throughput: 0: 24823.5. Samples: 20658048. Policy #0 lag: (min: 27.0, avg: 59.4, max: 91.0) +[2026-06-07 03:36:55,103][528169] Updated weights for policy 0, policy_version 40280 (0.0008) +[2026-06-07 03:36:55,104][527010] Avg episode reward: [(0, '133.493')] +[2026-06-07 03:36:55,231][528169] Updated weights for policy 0, policy_version 40290 (0.0008) +[2026-06-07 03:36:55,359][528169] Updated weights for policy 0, policy_version 40300 (0.0008) +[2026-06-07 03:36:55,480][528169] Updated weights for policy 0, policy_version 40310 (0.0008) +[2026-06-07 03:36:55,625][528169] Updated weights for policy 0, policy_version 40320 (0.0008) +[2026-06-07 03:36:55,757][528169] Updated weights for policy 0, policy_version 40330 (0.0008) +[2026-06-07 03:36:56,391][528169] Updated weights for policy 0, policy_version 40340 (0.0008) +[2026-06-07 03:36:56,524][528169] Updated weights for policy 0, policy_version 40350 (0.0008) +[2026-06-07 03:36:56,679][528169] Updated weights for policy 0, policy_version 40361 (0.0008) +[2026-06-07 03:36:56,811][528169] Updated weights for policy 0, policy_version 40371 (0.0008) +[2026-06-07 03:36:56,938][528169] Updated weights for policy 0, policy_version 40381 (0.0008) +[2026-06-07 03:36:57,083][528169] Updated weights for policy 0, policy_version 40392 (0.0008) +[2026-06-07 03:36:57,696][528169] Updated weights for policy 0, policy_version 40402 (0.0009) +[2026-06-07 03:36:57,823][528169] Updated weights for policy 0, policy_version 40412 (0.0009) +[2026-06-07 03:36:57,976][528169] Updated weights for policy 0, policy_version 40424 (0.0007) +[2026-06-07 03:36:58,104][528169] Updated weights for policy 0, policy_version 40434 (0.0007) +[2026-06-07 03:36:58,264][528169] Updated weights for policy 0, policy_version 40446 (0.0008) +[2026-06-07 03:36:58,394][528169] Updated weights for policy 0, policy_version 40456 (0.0008) +[2026-06-07 03:36:59,035][528169] Updated weights for policy 0, policy_version 40466 (0.0008) +[2026-06-07 03:36:59,153][528169] Updated weights for policy 0, policy_version 40476 (0.0008) +[2026-06-07 03:36:59,284][528169] Updated weights for policy 0, policy_version 40486 (0.0008) +[2026-06-07 03:36:59,414][528169] Updated weights for policy 0, policy_version 40496 (0.0008) +[2026-06-07 03:36:59,554][528169] Updated weights for policy 0, policy_version 40507 (0.0008) +[2026-06-07 03:36:59,707][528169] Updated weights for policy 0, policy_version 40518 (0.0009) +[2026-06-07 03:36:59,844][528169] Updated weights for policy 0, policy_version 40528 (0.0008) +[2026-06-07 03:37:00,103][527010] Fps is (10 sec: 26214.5, 60 sec: 25122.1, 300 sec: 24770.4). Total num frames: 20774912. Throughput: 0: 24843.4. Samples: 20729856. Policy #0 lag: (min: 63.0, avg: 74.8, max: 127.0) +[2026-06-07 03:37:00,104][527010] Avg episode reward: [(0, '147.155')] +[2026-06-07 03:37:00,453][528169] Updated weights for policy 0, policy_version 40538 (0.0008) +[2026-06-07 03:37:00,589][528169] Updated weights for policy 0, policy_version 40549 (0.0008) +[2026-06-07 03:37:00,752][528169] Updated weights for policy 0, policy_version 40561 (0.0008) +[2026-06-07 03:37:00,914][528169] Updated weights for policy 0, policy_version 40573 (0.0009) +[2026-06-07 03:37:01,048][528169] Updated weights for policy 0, policy_version 40583 (0.0008) +[2026-06-07 03:37:01,709][528169] Updated weights for policy 0, policy_version 40594 (0.0008) +[2026-06-07 03:37:01,838][528169] Updated weights for policy 0, policy_version 40604 (0.0008) +[2026-06-07 03:37:01,969][528169] Updated weights for policy 0, policy_version 40614 (0.0008) +[2026-06-07 03:37:02,105][528169] Updated weights for policy 0, policy_version 40624 (0.0008) +[2026-06-07 03:37:02,246][528169] Updated weights for policy 0, policy_version 40635 (0.0008) +[2026-06-07 03:37:02,391][528169] Updated weights for policy 0, policy_version 40646 (0.0009) +[2026-06-07 03:37:02,524][528169] Updated weights for policy 0, policy_version 40656 (0.0008) +[2026-06-07 03:37:03,141][528169] Updated weights for policy 0, policy_version 40666 (0.0008) +[2026-06-07 03:37:03,282][528169] Updated weights for policy 0, policy_version 40677 (0.0009) +[2026-06-07 03:37:03,416][528169] Updated weights for policy 0, policy_version 40687 (0.0008) +[2026-06-07 03:37:03,547][528169] Updated weights for policy 0, policy_version 40697 (0.0008) +[2026-06-07 03:37:03,695][528169] Updated weights for policy 0, policy_version 40708 (0.0008) +[2026-06-07 03:37:03,851][528169] Updated weights for policy 0, policy_version 40720 (0.0009) +[2026-06-07 03:37:04,453][528169] Updated weights for policy 0, policy_version 40730 (0.0005) +[2026-06-07 03:37:04,579][528169] Updated weights for policy 0, policy_version 40740 (0.0007) +[2026-06-07 03:37:04,726][528169] Updated weights for policy 0, policy_version 40751 (0.0008) +[2026-06-07 03:37:04,866][528169] Updated weights for policy 0, policy_version 40762 (0.0008) +[2026-06-07 03:37:04,996][528169] Updated weights for policy 0, policy_version 40772 (0.0008) +[2026-06-07 03:37:05,103][527010] Fps is (10 sec: 22937.5, 60 sec: 24576.0, 300 sec: 24659.3). Total num frames: 20873216. Throughput: 0: 24854.7. Samples: 20886144. Policy #0 lag: (min: 63.0, avg: 74.8, max: 127.0) +[2026-06-07 03:37:05,104][527010] Avg episode reward: [(0, '121.658')] +[2026-06-07 03:37:05,142][528169] Updated weights for policy 0, policy_version 40782 (0.0008) +[2026-06-07 03:37:05,750][528169] Updated weights for policy 0, policy_version 40792 (0.0008) +[2026-06-07 03:37:05,888][528169] Updated weights for policy 0, policy_version 40802 (0.0008) +[2026-06-07 03:37:06,050][528169] Updated weights for policy 0, policy_version 40815 (0.0009) +[2026-06-07 03:37:06,196][528169] Updated weights for policy 0, policy_version 40826 (0.0009) +[2026-06-07 03:37:06,334][528169] Updated weights for policy 0, policy_version 40836 (0.0008) +[2026-06-07 03:37:06,470][528169] Updated weights for policy 0, policy_version 40846 (0.0008) +[2026-06-07 03:37:07,072][528169] Updated weights for policy 0, policy_version 40856 (0.0008) +[2026-06-07 03:37:07,203][528169] Updated weights for policy 0, policy_version 40866 (0.0009) +[2026-06-07 03:37:07,335][528169] Updated weights for policy 0, policy_version 40876 (0.0008) +[2026-06-07 03:37:07,507][528169] Updated weights for policy 0, policy_version 40889 (0.0008) +[2026-06-07 03:37:07,638][528169] Updated weights for policy 0, policy_version 40899 (0.0008) +[2026-06-07 03:37:07,790][528169] Updated weights for policy 0, policy_version 40911 (0.0008) +[2026-06-07 03:37:08,426][528169] Updated weights for policy 0, policy_version 40921 (0.0008) +[2026-06-07 03:37:08,553][528169] Updated weights for policy 0, policy_version 40931 (0.0008) +[2026-06-07 03:37:08,677][528169] Updated weights for policy 0, policy_version 40941 (0.0008) +[2026-06-07 03:37:08,815][528169] Updated weights for policy 0, policy_version 40951 (0.0008) +[2026-06-07 03:37:08,957][528169] Updated weights for policy 0, policy_version 40961 (0.0008) +[2026-06-07 03:37:09,098][528169] Updated weights for policy 0, policy_version 40972 (0.0008) +[2026-06-07 03:37:09,689][528169] Updated weights for policy 0, policy_version 40982 (0.0008) +[2026-06-07 03:37:09,830][528169] Updated weights for policy 0, policy_version 40993 (0.0008) +[2026-06-07 03:37:09,976][528169] Updated weights for policy 0, policy_version 41004 (0.0008) +[2026-06-07 03:37:10,103][527010] Fps is (10 sec: 22937.6, 60 sec: 24576.0, 300 sec: 24659.3). Total num frames: 21004288. Throughput: 0: 24803.6. Samples: 21028608. Policy #0 lag: (min: 63.0, avg: 74.8, max: 127.0) +[2026-06-07 03:37:10,104][527010] Avg episode reward: [(0, '130.161')] +[2026-06-07 03:37:10,112][528169] Updated weights for policy 0, policy_version 41014 (0.0009) +[2026-06-07 03:37:10,243][528169] Updated weights for policy 0, policy_version 41024 (0.0008) +[2026-06-07 03:37:10,379][528169] Updated weights for policy 0, policy_version 41034 (0.0008) +[2026-06-07 03:37:10,985][528169] Updated weights for policy 0, policy_version 41044 (0.0008) +[2026-06-07 03:37:11,120][528169] Updated weights for policy 0, policy_version 41054 (0.0008) +[2026-06-07 03:37:11,241][528169] Updated weights for policy 0, policy_version 41064 (0.0008) +[2026-06-07 03:37:11,376][528169] Updated weights for policy 0, policy_version 41074 (0.0008) +[2026-06-07 03:37:11,512][528169] Updated weights for policy 0, policy_version 41084 (0.0008) +[2026-06-07 03:37:11,661][528169] Updated weights for policy 0, policy_version 41095 (0.0008) +[2026-06-07 03:37:12,298][528169] Updated weights for policy 0, policy_version 41108 (0.0008) +[2026-06-07 03:37:12,427][528169] Updated weights for policy 0, policy_version 41118 (0.0008) +[2026-06-07 03:37:12,572][528169] Updated weights for policy 0, policy_version 41129 (0.0008) +[2026-06-07 03:37:12,721][528169] Updated weights for policy 0, policy_version 41140 (0.0008) +[2026-06-07 03:37:12,869][528169] Updated weights for policy 0, policy_version 41151 (0.0009) +[2026-06-07 03:37:12,999][528169] Updated weights for policy 0, policy_version 41161 (0.0008) +[2026-06-07 03:37:13,630][528169] Updated weights for policy 0, policy_version 41172 (0.0008) +[2026-06-07 03:37:13,787][528169] Updated weights for policy 0, policy_version 41184 (0.0008) +[2026-06-07 03:37:13,916][528169] Updated weights for policy 0, policy_version 41194 (0.0008) +[2026-06-07 03:37:14,060][528169] Updated weights for policy 0, policy_version 41205 (0.0008) +[2026-06-07 03:37:14,256][528169] Updated weights for policy 0, policy_version 41220 (0.0008) +[2026-06-07 03:37:14,394][528169] Updated weights for policy 0, policy_version 41230 (0.0008) +[2026-06-07 03:37:14,985][528169] Updated weights for policy 0, policy_version 41240 (0.0008) +[2026-06-07 03:37:15,103][527010] Fps is (10 sec: 26214.5, 60 sec: 24576.0, 300 sec: 24659.3). Total num frames: 21135360. Throughput: 0: 24846.2. Samples: 21111936. Policy #0 lag: (min: 63.0, avg: 74.8, max: 127.0) +[2026-06-07 03:37:15,104][527010] Avg episode reward: [(0, '138.072')] +[2026-06-07 03:37:15,129][528169] Updated weights for policy 0, policy_version 41251 (0.0008) +[2026-06-07 03:37:15,283][528169] Updated weights for policy 0, policy_version 41263 (0.0008) +[2026-06-07 03:37:15,433][528169] Updated weights for policy 0, policy_version 41274 (0.0008) +[2026-06-07 03:37:15,591][528169] Updated weights for policy 0, policy_version 41286 (0.0008) +[2026-06-07 03:37:15,726][528169] Updated weights for policy 0, policy_version 41296 (0.0006) +[2026-06-07 03:37:16,327][528169] Updated weights for policy 0, policy_version 41306 (0.0005) +[2026-06-07 03:37:16,461][528169] Updated weights for policy 0, policy_version 41316 (0.0005) +[2026-06-07 03:37:16,595][528169] Updated weights for policy 0, policy_version 41326 (0.0005) +[2026-06-07 03:37:16,748][528169] Updated weights for policy 0, policy_version 41337 (0.0005) +[2026-06-07 03:37:16,877][528169] Updated weights for policy 0, policy_version 41347 (0.0004) +[2026-06-07 03:37:17,040][528169] Updated weights for policy 0, policy_version 41359 (0.0004) +[2026-06-07 03:37:17,655][528169] Updated weights for policy 0, policy_version 41371 (0.0005) +[2026-06-07 03:37:17,784][528169] Updated weights for policy 0, policy_version 41381 (0.0008) +[2026-06-07 03:37:17,919][528169] Updated weights for policy 0, policy_version 41391 (0.0008) +[2026-06-07 03:37:18,054][528169] Updated weights for policy 0, policy_version 41401 (0.0008) +[2026-06-07 03:37:18,179][528169] Updated weights for policy 0, policy_version 41411 (0.0008) +[2026-06-07 03:37:18,326][528169] Updated weights for policy 0, policy_version 41421 (0.0009) +[2026-06-07 03:37:18,923][528169] Updated weights for policy 0, policy_version 41431 (0.0008) +[2026-06-07 03:37:19,060][528169] Updated weights for policy 0, policy_version 41441 (0.0008) +[2026-06-07 03:37:19,188][528169] Updated weights for policy 0, policy_version 41451 (0.0008) +[2026-06-07 03:37:19,322][528169] Updated weights for policy 0, policy_version 41461 (0.0008) +[2026-06-07 03:37:19,456][528169] Updated weights for policy 0, policy_version 41471 (0.0008) +[2026-06-07 03:37:19,600][528169] Updated weights for policy 0, policy_version 41482 (0.0008) +[2026-06-07 03:37:20,103][527010] Fps is (10 sec: 26214.5, 60 sec: 25122.1, 300 sec: 24770.4). Total num frames: 21266432. Throughput: 0: 24860.4. Samples: 21254016. Policy #0 lag: (min: 63.0, avg: 74.8, max: 127.0) +[2026-06-07 03:37:20,104][527010] Avg episode reward: [(0, '144.911')] +[2026-06-07 03:37:20,239][528169] Updated weights for policy 0, policy_version 41492 (0.0008) +[2026-06-07 03:37:20,369][528169] Updated weights for policy 0, policy_version 41502 (0.0008) +[2026-06-07 03:37:20,510][528169] Updated weights for policy 0, policy_version 41513 (0.0008) +[2026-06-07 03:37:20,637][528169] Updated weights for policy 0, policy_version 41523 (0.0008) +[2026-06-07 03:37:20,783][528169] Updated weights for policy 0, policy_version 41534 (0.0008) +[2026-06-07 03:37:20,969][528169] Updated weights for policy 0, policy_version 41548 (0.0008) +[2026-06-07 03:37:21,598][528169] Updated weights for policy 0, policy_version 41559 (0.0008) +[2026-06-07 03:37:21,730][528169] Updated weights for policy 0, policy_version 41570 (0.0008) +[2026-06-07 03:37:21,870][528169] Updated weights for policy 0, policy_version 41581 (0.0008) +[2026-06-07 03:37:22,006][528169] Updated weights for policy 0, policy_version 41591 (0.0008) +[2026-06-07 03:37:22,157][528169] Updated weights for policy 0, policy_version 41602 (0.0009) +[2026-06-07 03:37:22,286][528169] Updated weights for policy 0, policy_version 41612 (0.0008) +[2026-06-07 03:37:22,908][528169] Updated weights for policy 0, policy_version 41622 (0.0008) +[2026-06-07 03:37:23,037][528169] Updated weights for policy 0, policy_version 41632 (0.0008) +[2026-06-07 03:37:23,170][528169] Updated weights for policy 0, policy_version 41642 (0.0008) +[2026-06-07 03:37:23,300][528169] Updated weights for policy 0, policy_version 41652 (0.0008) +[2026-06-07 03:37:23,435][528169] Updated weights for policy 0, policy_version 41662 (0.0008) +[2026-06-07 03:37:23,572][528169] Updated weights for policy 0, policy_version 41672 (0.0008) +[2026-06-07 03:37:24,186][528169] Updated weights for policy 0, policy_version 41683 (0.0008) +[2026-06-07 03:37:24,317][528169] Updated weights for policy 0, policy_version 41693 (0.0008) +[2026-06-07 03:37:24,448][528169] Updated weights for policy 0, policy_version 41703 (0.0008) +[2026-06-07 03:37:24,592][528169] Updated weights for policy 0, policy_version 41714 (0.0008) +[2026-06-07 03:37:24,732][528169] Updated weights for policy 0, policy_version 41725 (0.0008) +[2026-06-07 03:37:24,897][528169] Updated weights for policy 0, policy_version 41737 (0.0008) +[2026-06-07 03:37:25,103][527010] Fps is (10 sec: 26214.6, 60 sec: 25122.2, 300 sec: 24770.4). Total num frames: 21397504. Throughput: 0: 24823.5. Samples: 21409280. Policy #0 lag: (min: 63.0, avg: 74.8, max: 127.0) +[2026-06-07 03:37:25,103][527010] Avg episode reward: [(0, '120.788')] +[2026-06-07 03:37:25,514][528169] Updated weights for policy 0, policy_version 41748 (0.0009) +[2026-06-07 03:37:25,638][528169] Updated weights for policy 0, policy_version 41758 (0.0008) +[2026-06-07 03:37:25,773][528169] Updated weights for policy 0, policy_version 41768 (0.0008) +[2026-06-07 03:37:25,907][528169] Updated weights for policy 0, policy_version 41778 (0.0008) +[2026-06-07 03:37:26,055][528169] Updated weights for policy 0, policy_version 41789 (0.0008) +[2026-06-07 03:37:26,190][528169] Updated weights for policy 0, policy_version 41799 (0.0008) +[2026-06-07 03:37:26,829][528169] Updated weights for policy 0, policy_version 41810 (0.0008) +[2026-06-07 03:37:26,956][528169] Updated weights for policy 0, policy_version 41820 (0.0008) +[2026-06-07 03:37:27,085][528169] Updated weights for policy 0, policy_version 41830 (0.0008) +[2026-06-07 03:37:27,230][528169] Updated weights for policy 0, policy_version 41841 (0.0008) +[2026-06-07 03:37:27,359][528169] Updated weights for policy 0, policy_version 41851 (0.0008) +[2026-06-07 03:37:27,502][528169] Updated weights for policy 0, policy_version 41861 (0.0008) +[2026-06-07 03:37:27,651][528169] Updated weights for policy 0, policy_version 41872 (0.0008) +[2026-06-07 03:37:28,268][528169] Updated weights for policy 0, policy_version 41882 (0.0008) +[2026-06-07 03:37:28,431][528169] Updated weights for policy 0, policy_version 41894 (0.0008) +[2026-06-07 03:37:28,559][528169] Updated weights for policy 0, policy_version 41904 (0.0008) +[2026-06-07 03:37:28,712][528169] Updated weights for policy 0, policy_version 41916 (0.0010) +[2026-06-07 03:37:28,855][528169] Updated weights for policy 0, policy_version 41926 (0.0005) +[2026-06-07 03:37:29,470][528169] Updated weights for policy 0, policy_version 41938 (0.0005) +[2026-06-07 03:37:29,601][528169] Updated weights for policy 0, policy_version 41948 (0.0008) +[2026-06-07 03:37:29,736][528169] Updated weights for policy 0, policy_version 41958 (0.0008) +[2026-06-07 03:37:29,886][528169] Updated weights for policy 0, policy_version 41969 (0.0008) +[2026-06-07 03:37:30,023][528169] Updated weights for policy 0, policy_version 41979 (0.0008) +[2026-06-07 03:37:30,103][527010] Fps is (10 sec: 22937.6, 60 sec: 24576.0, 300 sec: 24659.3). Total num frames: 21495808. Throughput: 0: 24846.2. Samples: 21480320. Policy #0 lag: (min: 63.0, avg: 75.4, max: 127.0) +[2026-06-07 03:37:30,104][527010] Avg episode reward: [(0, '124.703')] +[2026-06-07 03:37:30,153][528169] Updated weights for policy 0, policy_version 41989 (0.0008) +[2026-06-07 03:37:30,286][528169] Updated weights for policy 0, policy_version 41999 (0.0008) +[2026-06-07 03:37:30,900][528169] Updated weights for policy 0, policy_version 42009 (0.0008) +[2026-06-07 03:37:31,041][528169] Updated weights for policy 0, policy_version 42019 (0.0008) +[2026-06-07 03:37:31,169][528169] Updated weights for policy 0, policy_version 42029 (0.0008) +[2026-06-07 03:37:31,333][528169] Updated weights for policy 0, policy_version 42041 (0.0008) +[2026-06-07 03:37:31,469][528169] Updated weights for policy 0, policy_version 42051 (0.0008) +[2026-06-07 03:37:31,606][528169] Updated weights for policy 0, policy_version 42061 (0.0008) +[2026-06-07 03:37:32,210][528169] Updated weights for policy 0, policy_version 42072 (0.0008) +[2026-06-07 03:37:32,349][528169] Updated weights for policy 0, policy_version 42083 (0.0008) +[2026-06-07 03:37:32,482][528169] Updated weights for policy 0, policy_version 42093 (0.0008) +[2026-06-07 03:37:32,615][528169] Updated weights for policy 0, policy_version 42103 (0.0008) +[2026-06-07 03:37:32,743][528169] Updated weights for policy 0, policy_version 42113 (0.0008) +[2026-06-07 03:37:32,912][528169] Updated weights for policy 0, policy_version 42126 (0.0008) +[2026-06-07 03:37:33,539][528169] Updated weights for policy 0, policy_version 42136 (0.0008) +[2026-06-07 03:37:33,671][528169] Updated weights for policy 0, policy_version 42146 (0.0008) +[2026-06-07 03:37:33,807][528169] Updated weights for policy 0, policy_version 42156 (0.0008) +[2026-06-07 03:37:33,960][528169] Updated weights for policy 0, policy_version 42168 (0.0008) +[2026-06-07 03:37:34,094][528169] Updated weights for policy 0, policy_version 42178 (0.0008) +[2026-06-07 03:37:34,231][528169] Updated weights for policy 0, policy_version 42188 (0.0008) +[2026-06-07 03:37:34,817][528169] Updated weights for policy 0, policy_version 42198 (0.0008) +[2026-06-07 03:37:34,974][528169] Updated weights for policy 0, policy_version 42210 (0.0008) +[2026-06-07 03:37:35,103][527010] Fps is (10 sec: 22937.3, 60 sec: 24576.0, 300 sec: 24659.3). Total num frames: 21626880. Throughput: 0: 24817.8. Samples: 21629824. Policy #0 lag: (min: 63.0, avg: 75.4, max: 127.0) +[2026-06-07 03:37:35,104][527010] Avg episode reward: [(0, '117.093')] +[2026-06-07 03:37:35,117][528169] Updated weights for policy 0, policy_version 42221 (0.0008) +[2026-06-07 03:37:35,256][528169] Updated weights for policy 0, policy_version 42232 (0.0008) +[2026-06-07 03:37:35,414][528169] Updated weights for policy 0, policy_version 42244 (0.0008) +[2026-06-07 03:37:35,556][528169] Updated weights for policy 0, policy_version 42254 (0.0008) +[2026-06-07 03:37:36,157][528169] Updated weights for policy 0, policy_version 42264 (0.0008) +[2026-06-07 03:37:36,317][528169] Updated weights for policy 0, policy_version 42276 (0.0008) +[2026-06-07 03:37:36,470][528169] Updated weights for policy 0, policy_version 42287 (0.0008) +[2026-06-07 03:37:36,608][528169] Updated weights for policy 0, policy_version 42298 (0.0008) +[2026-06-07 03:37:36,756][528169] Updated weights for policy 0, policy_version 42309 (0.0008) +[2026-06-07 03:37:36,889][528169] Updated weights for policy 0, policy_version 42319 (0.0008) +[2026-06-07 03:37:37,519][528169] Updated weights for policy 0, policy_version 42329 (0.0009) +[2026-06-07 03:37:37,649][528169] Updated weights for policy 0, policy_version 42339 (0.0008) +[2026-06-07 03:37:37,783][528169] Updated weights for policy 0, policy_version 42349 (0.0008) +[2026-06-07 03:37:37,909][528169] Updated weights for policy 0, policy_version 42359 (0.0008) +[2026-06-07 03:37:38,048][528169] Updated weights for policy 0, policy_version 42369 (0.0008) +[2026-06-07 03:37:38,183][528169] Updated weights for policy 0, policy_version 42379 (0.0008) +[2026-06-07 03:37:38,859][528169] Updated weights for policy 0, policy_version 42393 (0.0009) +[2026-06-07 03:37:38,987][528169] Updated weights for policy 0, policy_version 42403 (0.0008) +[2026-06-07 03:37:39,123][528169] Updated weights for policy 0, policy_version 42413 (0.0008) +[2026-06-07 03:37:39,247][528169] Updated weights for policy 0, policy_version 42423 (0.0008) +[2026-06-07 03:37:39,385][528169] Updated weights for policy 0, policy_version 42433 (0.0008) +[2026-06-07 03:37:39,516][528169] Updated weights for policy 0, policy_version 42443 (0.0008) +[2026-06-07 03:37:40,103][527010] Fps is (10 sec: 26214.4, 60 sec: 24576.0, 300 sec: 24770.4). Total num frames: 21757952. Throughput: 0: 24832.0. Samples: 21775488. Policy #0 lag: (min: 63.0, avg: 75.4, max: 127.0) +[2026-06-07 03:37:40,104][527010] Avg episode reward: [(0, '125.236')] +[2026-06-07 03:37:40,137][528169] Updated weights for policy 0, policy_version 42453 (0.0006) +[2026-06-07 03:37:40,269][528169] Updated weights for policy 0, policy_version 42463 (0.0004) +[2026-06-07 03:37:40,398][528169] Updated weights for policy 0, policy_version 42473 (0.0004) +[2026-06-07 03:37:40,533][528169] Updated weights for policy 0, policy_version 42483 (0.0008) +[2026-06-07 03:37:40,696][528169] Updated weights for policy 0, policy_version 42495 (0.0008) +[2026-06-07 03:37:40,838][528169] Updated weights for policy 0, policy_version 42506 (0.0008) +[2026-06-07 03:37:41,460][528169] Updated weights for policy 0, policy_version 42518 (0.0008) +[2026-06-07 03:37:41,594][528169] Updated weights for policy 0, policy_version 42528 (0.0008) +[2026-06-07 03:37:41,731][528169] Updated weights for policy 0, policy_version 42539 (0.0009) +[2026-06-07 03:37:41,870][528169] Updated weights for policy 0, policy_version 42550 (0.0009) +[2026-06-07 03:37:42,010][528169] Updated weights for policy 0, policy_version 42560 (0.0009) +[2026-06-07 03:37:42,176][528169] Updated weights for policy 0, policy_version 42573 (0.0009) +[2026-06-07 03:37:42,796][528169] Updated weights for policy 0, policy_version 42583 (0.0009) +[2026-06-07 03:37:42,929][528169] Updated weights for policy 0, policy_version 42594 (0.0009) +[2026-06-07 03:37:43,097][528169] Updated weights for policy 0, policy_version 42607 (0.0009) +[2026-06-07 03:37:43,245][528169] Updated weights for policy 0, policy_version 42618 (0.0009) +[2026-06-07 03:37:43,429][528169] Updated weights for policy 0, policy_version 42632 (0.0007) +[2026-06-07 03:37:44,041][528169] Updated weights for policy 0, policy_version 42642 (0.0005) +[2026-06-07 03:37:44,166][528169] Updated weights for policy 0, policy_version 42652 (0.0008) +[2026-06-07 03:37:44,306][528169] Updated weights for policy 0, policy_version 42662 (0.0008) +[2026-06-07 03:37:44,433][528169] Updated weights for policy 0, policy_version 42672 (0.0008) +[2026-06-07 03:37:44,580][528169] Updated weights for policy 0, policy_version 42683 (0.0008) +[2026-06-07 03:37:44,712][528169] Updated weights for policy 0, policy_version 42693 (0.0008) +[2026-06-07 03:37:44,854][528169] Updated weights for policy 0, policy_version 42704 (0.0008) +[2026-06-07 03:37:45,103][527010] Fps is (10 sec: 26214.5, 60 sec: 25122.1, 300 sec: 24770.4). Total num frames: 21889024. Throughput: 0: 24769.4. Samples: 21844480. Policy #0 lag: (min: 63.0, avg: 75.4, max: 127.0) +[2026-06-07 03:37:45,104][527010] Avg episode reward: [(0, '116.503')] +[2026-06-07 03:37:45,502][528169] Updated weights for policy 0, policy_version 42716 (0.0008) +[2026-06-07 03:37:45,663][528169] Updated weights for policy 0, policy_version 42728 (0.0008) +[2026-06-07 03:37:45,790][528169] Updated weights for policy 0, policy_version 42738 (0.0008) +[2026-06-07 03:37:45,934][528169] Updated weights for policy 0, policy_version 42749 (0.0008) +[2026-06-07 03:37:46,089][528169] Updated weights for policy 0, policy_version 42760 (0.0009) +[2026-06-07 03:37:46,696][528169] Updated weights for policy 0, policy_version 42770 (0.0008) +[2026-06-07 03:37:46,826][528169] Updated weights for policy 0, policy_version 42780 (0.0008) +[2026-06-07 03:37:46,963][528169] Updated weights for policy 0, policy_version 42791 (0.0008) +[2026-06-07 03:37:47,091][528169] Updated weights for policy 0, policy_version 42801 (0.0008) +[2026-06-07 03:37:47,232][528169] Updated weights for policy 0, policy_version 42811 (0.0008) +[2026-06-07 03:37:47,363][528169] Updated weights for policy 0, policy_version 42821 (0.0008) +[2026-06-07 03:37:47,395][528093] Early stopping after 7 epochs (56 sgd steps), loss delta 0.0000009 +[2026-06-07 03:37:47,993][528169] Updated weights for policy 0, policy_version 42831 (0.0008) +[2026-06-07 03:37:48,148][528169] Updated weights for policy 0, policy_version 42843 (0.0008) +[2026-06-07 03:37:48,296][528169] Updated weights for policy 0, policy_version 42854 (0.0008) +[2026-06-07 03:37:48,442][528169] Updated weights for policy 0, policy_version 42865 (0.0008) +[2026-06-07 03:37:48,583][528169] Updated weights for policy 0, policy_version 42876 (0.0008) +[2026-06-07 03:37:48,732][528169] Updated weights for policy 0, policy_version 42887 (0.0008) +[2026-06-07 03:37:49,361][528169] Updated weights for policy 0, policy_version 42897 (0.0008) +[2026-06-07 03:37:49,500][528169] Updated weights for policy 0, policy_version 42908 (0.0008) +[2026-06-07 03:37:49,646][528169] Updated weights for policy 0, policy_version 42919 (0.0008) +[2026-06-07 03:37:49,804][528169] Updated weights for policy 0, policy_version 42931 (0.0008) +[2026-06-07 03:37:49,942][528169] Updated weights for policy 0, policy_version 42942 (0.0008) +[2026-06-07 03:37:50,103][527010] Fps is (10 sec: 26214.4, 60 sec: 25122.1, 300 sec: 24770.4). Total num frames: 22020096. Throughput: 0: 24812.1. Samples: 22002688. Policy #0 lag: (min: 63.0, avg: 75.4, max: 127.0) +[2026-06-07 03:37:50,104][527010] Avg episode reward: [(0, '114.476')] +[2026-06-07 03:37:50,591][528169] Updated weights for policy 0, policy_version 42953 (0.0008) +[2026-06-07 03:37:50,728][528169] Updated weights for policy 0, policy_version 42964 (0.0008) +[2026-06-07 03:37:50,880][528169] Updated weights for policy 0, policy_version 42975 (0.0008) +[2026-06-07 03:37:51,038][528169] Updated weights for policy 0, policy_version 42987 (0.0009) +[2026-06-07 03:37:51,191][528169] Updated weights for policy 0, policy_version 42999 (0.0008) +[2026-06-07 03:37:51,324][528169] Updated weights for policy 0, policy_version 43009 (0.0008) +[2026-06-07 03:37:51,916][528169] Updated weights for policy 0, policy_version 43021 (0.0008) +[2026-06-07 03:37:52,050][528169] Updated weights for policy 0, policy_version 43032 (0.0008) +[2026-06-07 03:37:52,208][528169] Updated weights for policy 0, policy_version 43044 (0.0008) +[2026-06-07 03:37:52,351][528169] Updated weights for policy 0, policy_version 43055 (0.0008) +[2026-06-07 03:37:52,483][528169] Updated weights for policy 0, policy_version 43065 (0.0008) +[2026-06-07 03:37:52,628][528169] Updated weights for policy 0, policy_version 43076 (0.0009) +[2026-06-07 03:37:53,294][528169] Updated weights for policy 0, policy_version 43087 (0.0008) +[2026-06-07 03:37:53,430][528169] Updated weights for policy 0, policy_version 43098 (0.0008) +[2026-06-07 03:37:53,571][528169] Updated weights for policy 0, policy_version 43109 (0.0008) +[2026-06-07 03:37:53,745][528169] Updated weights for policy 0, policy_version 43123 (0.0008) +[2026-06-07 03:37:53,891][528169] Updated weights for policy 0, policy_version 43134 (0.0008) +[2026-06-07 03:37:54,579][528169] Updated weights for policy 0, policy_version 43149 (0.0007) +[2026-06-07 03:37:54,711][528169] Updated weights for policy 0, policy_version 43159 (0.0005) +[2026-06-07 03:37:54,855][528169] Updated weights for policy 0, policy_version 43170 (0.0006) +[2026-06-07 03:37:54,991][528169] Updated weights for policy 0, policy_version 43180 (0.0008) +[2026-06-07 03:37:55,103][527010] Fps is (10 sec: 22937.6, 60 sec: 24576.0, 300 sec: 24659.3). Total num frames: 22118400. Throughput: 0: 24897.4. Samples: 22148992. Policy #0 lag: (min: 63.0, avg: 75.4, max: 127.0) +[2026-06-07 03:37:55,104][527010] Avg episode reward: [(0, '122.953')] +[2026-06-07 03:37:55,153][528169] Updated weights for policy 0, policy_version 43192 (0.0009) +[2026-06-07 03:37:55,294][528169] Updated weights for policy 0, policy_version 43203 (0.0008) +[2026-06-07 03:37:55,943][528169] Updated weights for policy 0, policy_version 43215 (0.0008) +[2026-06-07 03:37:56,112][528169] Updated weights for policy 0, policy_version 43228 (0.0008) +[2026-06-07 03:37:56,245][528169] Updated weights for policy 0, policy_version 43238 (0.0008) +[2026-06-07 03:37:56,381][528169] Updated weights for policy 0, policy_version 43248 (0.0008) +[2026-06-07 03:37:56,528][528169] Updated weights for policy 0, policy_version 43259 (0.0008) +[2026-06-07 03:37:56,674][528169] Updated weights for policy 0, policy_version 43270 (0.0008) +[2026-06-07 03:37:57,270][528169] Updated weights for policy 0, policy_version 43281 (0.0009) +[2026-06-07 03:37:57,397][528169] Updated weights for policy 0, policy_version 43291 (0.0008) +[2026-06-07 03:37:57,537][528169] Updated weights for policy 0, policy_version 43302 (0.0008) +[2026-06-07 03:37:57,672][528169] Updated weights for policy 0, policy_version 43312 (0.0008) +[2026-06-07 03:37:57,817][528169] Updated weights for policy 0, policy_version 43323 (0.0008) +[2026-06-07 03:37:57,962][528169] Updated weights for policy 0, policy_version 43334 (0.0008) +[2026-06-07 03:37:58,596][528169] Updated weights for policy 0, policy_version 43346 (0.0008) +[2026-06-07 03:37:58,735][528169] Updated weights for policy 0, policy_version 43357 (0.0005) +[2026-06-07 03:37:58,877][528169] Updated weights for policy 0, policy_version 43368 (0.0008) +[2026-06-07 03:37:59,013][528169] Updated weights for policy 0, policy_version 43378 (0.0008) +[2026-06-07 03:37:59,155][528169] Updated weights for policy 0, policy_version 43388 (0.0008) +[2026-06-07 03:37:59,310][528169] Updated weights for policy 0, policy_version 43400 (0.0008) +[2026-06-07 03:37:59,925][528169] Updated weights for policy 0, policy_version 43410 (0.0008) +[2026-06-07 03:38:00,063][528169] Updated weights for policy 0, policy_version 43421 (0.0008) +[2026-06-07 03:38:00,103][527010] Fps is (10 sec: 22937.2, 60 sec: 24575.9, 300 sec: 24770.4). Total num frames: 22249472. Throughput: 0: 24817.7. Samples: 22228736. Policy #0 lag: (min: 63.0, avg: 74.5, max: 127.0) +[2026-06-07 03:38:00,104][527010] Avg episode reward: [(0, '130.105')] +[2026-06-07 03:38:00,206][528169] Updated weights for policy 0, policy_version 43432 (0.0008) +[2026-06-07 03:38:00,343][528169] Updated weights for policy 0, policy_version 43442 (0.0009) +[2026-06-07 03:38:00,480][528169] Updated weights for policy 0, policy_version 43452 (0.0009) +[2026-06-07 03:38:00,612][528169] Updated weights for policy 0, policy_version 43462 (0.0008) +[2026-06-07 03:38:01,244][528169] Updated weights for policy 0, policy_version 43473 (0.0005) +[2026-06-07 03:38:01,380][528169] Updated weights for policy 0, policy_version 43484 (0.0005) +[2026-06-07 03:38:01,509][528169] Updated weights for policy 0, policy_version 43494 (0.0009) +[2026-06-07 03:38:01,658][528169] Updated weights for policy 0, policy_version 43506 (0.0012) +[2026-06-07 03:38:01,817][528169] Updated weights for policy 0, policy_version 43518 (0.0011) +[2026-06-07 03:38:01,934][528169] Updated weights for policy 0, policy_version 43528 (0.0004) +[2026-06-07 03:38:02,568][528169] Updated weights for policy 0, policy_version 43538 (0.0008) +[2026-06-07 03:38:02,690][528169] Updated weights for policy 0, policy_version 43548 (0.0008) +[2026-06-07 03:38:02,833][528169] Updated weights for policy 0, policy_version 43559 (0.0009) +[2026-06-07 03:38:02,977][528169] Updated weights for policy 0, policy_version 43570 (0.0009) +[2026-06-07 03:38:03,099][528169] Updated weights for policy 0, policy_version 43580 (0.0008) +[2026-06-07 03:38:03,231][528169] Updated weights for policy 0, policy_version 43590 (0.0009) +[2026-06-07 03:38:03,881][528169] Updated weights for policy 0, policy_version 43601 (0.0008) +[2026-06-07 03:38:04,027][528169] Updated weights for policy 0, policy_version 43613 (0.0008) +[2026-06-07 03:38:04,195][528169] Updated weights for policy 0, policy_version 43626 (0.0008) +[2026-06-07 03:38:04,347][528169] Updated weights for policy 0, policy_version 43638 (0.0008) +[2026-06-07 03:38:04,502][528169] Updated weights for policy 0, policy_version 43650 (0.0006) +[2026-06-07 03:38:05,103][527010] Fps is (10 sec: 26214.2, 60 sec: 25122.1, 300 sec: 24770.4). Total num frames: 22380544. Throughput: 0: 24792.1. Samples: 22369664. Policy #0 lag: (min: 63.0, avg: 74.5, max: 127.0) +[2026-06-07 03:38:05,104][527010] Avg episode reward: [(0, '130.646')] +[2026-06-07 03:38:05,133][528169] Updated weights for policy 0, policy_version 43661 (0.0009) +[2026-06-07 03:38:05,256][528169] Updated weights for policy 0, policy_version 43671 (0.0008) +[2026-06-07 03:38:05,397][528169] Updated weights for policy 0, policy_version 43682 (0.0009) +[2026-06-07 03:38:05,531][528169] Updated weights for policy 0, policy_version 43692 (0.0008) +[2026-06-07 03:38:05,708][528169] Updated weights for policy 0, policy_version 43706 (0.0008) +[2026-06-07 03:38:05,851][528169] Updated weights for policy 0, policy_version 43717 (0.0008) +[2026-06-07 03:38:06,466][528169] Updated weights for policy 0, policy_version 43727 (0.0009) +[2026-06-07 03:38:06,597][528169] Updated weights for policy 0, policy_version 43737 (0.0008) +[2026-06-07 03:38:06,739][528169] Updated weights for policy 0, policy_version 43748 (0.0008) +[2026-06-07 03:38:06,860][528169] Updated weights for policy 0, policy_version 43758 (0.0008) +[2026-06-07 03:38:07,000][528169] Updated weights for policy 0, policy_version 43768 (0.0008) +[2026-06-07 03:38:07,142][528169] Updated weights for policy 0, policy_version 43779 (0.0008) +[2026-06-07 03:38:07,812][528169] Updated weights for policy 0, policy_version 43791 (0.0008) +[2026-06-07 03:38:07,979][528169] Updated weights for policy 0, policy_version 43804 (0.0008) +[2026-06-07 03:38:08,129][528169] Updated weights for policy 0, policy_version 43816 (0.0009) +[2026-06-07 03:38:08,281][528169] Updated weights for policy 0, policy_version 43828 (0.0008) +[2026-06-07 03:38:08,429][528169] Updated weights for policy 0, policy_version 43839 (0.0008) +[2026-06-07 03:38:09,052][528169] Updated weights for policy 0, policy_version 43849 (0.0006) +[2026-06-07 03:38:09,205][528169] Updated weights for policy 0, policy_version 43859 (0.0008) +[2026-06-07 03:38:09,360][528169] Updated weights for policy 0, policy_version 43871 (0.0008) +[2026-06-07 03:38:09,510][528169] Updated weights for policy 0, policy_version 43883 (0.0008) +[2026-06-07 03:38:09,671][528169] Updated weights for policy 0, policy_version 43896 (0.0008) +[2026-06-07 03:38:09,834][528169] Updated weights for policy 0, policy_version 43908 (0.0008) +[2026-06-07 03:38:10,103][527010] Fps is (10 sec: 26214.9, 60 sec: 25122.1, 300 sec: 24770.4). Total num frames: 22511616. Throughput: 0: 24851.9. Samples: 22527616. Policy #0 lag: (min: 63.0, avg: 74.5, max: 127.0) +[2026-06-07 03:38:10,104][527010] Avg episode reward: [(0, '124.425')] +[2026-06-07 03:38:10,432][528169] Updated weights for policy 0, policy_version 43919 (0.0008) +[2026-06-07 03:38:10,567][528169] Updated weights for policy 0, policy_version 43930 (0.0009) +[2026-06-07 03:38:10,715][528169] Updated weights for policy 0, policy_version 43942 (0.0009) +[2026-06-07 03:38:10,857][528169] Updated weights for policy 0, policy_version 43953 (0.0008) +[2026-06-07 03:38:11,022][528169] Updated weights for policy 0, policy_version 43966 (0.0008) +[2026-06-07 03:38:11,158][528169] Updated weights for policy 0, policy_version 43976 (0.0008) +[2026-06-07 03:38:11,800][528169] Updated weights for policy 0, policy_version 43987 (0.0008) +[2026-06-07 03:38:11,962][528169] Updated weights for policy 0, policy_version 44000 (0.0009) +[2026-06-07 03:38:12,113][528169] Updated weights for policy 0, policy_version 44011 (0.0009) +[2026-06-07 03:38:12,258][528169] Updated weights for policy 0, policy_version 44022 (0.0008) +[2026-06-07 03:38:12,407][528169] Updated weights for policy 0, policy_version 44033 (0.0008) +[2026-06-07 03:38:13,020][528169] Updated weights for policy 0, policy_version 44043 (0.0008) +[2026-06-07 03:38:13,148][528169] Updated weights for policy 0, policy_version 44053 (0.0008) +[2026-06-07 03:38:13,283][528169] Updated weights for policy 0, policy_version 44063 (0.0005) +[2026-06-07 03:38:13,422][528169] Updated weights for policy 0, policy_version 44073 (0.0005) +[2026-06-07 03:38:13,552][528169] Updated weights for policy 0, policy_version 44083 (0.0005) +[2026-06-07 03:38:13,714][528169] Updated weights for policy 0, policy_version 44095 (0.0005) +[2026-06-07 03:38:14,301][528169] Updated weights for policy 0, policy_version 44105 (0.0005) +[2026-06-07 03:38:14,448][528169] Updated weights for policy 0, policy_version 44116 (0.0005) +[2026-06-07 03:38:14,583][528169] Updated weights for policy 0, policy_version 44126 (0.0004) +[2026-06-07 03:38:14,715][528169] Updated weights for policy 0, policy_version 44136 (0.0005) +[2026-06-07 03:38:14,857][528169] Updated weights for policy 0, policy_version 44147 (0.0005) +[2026-06-07 03:38:15,009][528169] Updated weights for policy 0, policy_version 44158 (0.0008) +[2026-06-07 03:38:15,103][527010] Fps is (10 sec: 22937.8, 60 sec: 24576.0, 300 sec: 24659.3). Total num frames: 22609920. Throughput: 0: 24786.5. Samples: 22595712. Policy #0 lag: (min: 63.0, avg: 74.5, max: 127.0) +[2026-06-07 03:38:15,104][527010] Avg episode reward: [(0, '124.655')] +[2026-06-07 03:38:15,139][528169] Updated weights for policy 0, policy_version 44168 (0.0008) +[2026-06-07 03:38:15,734][528169] Updated weights for policy 0, policy_version 44179 (0.0008) +[2026-06-07 03:38:15,862][528169] Updated weights for policy 0, policy_version 44189 (0.0008) +[2026-06-07 03:38:16,009][528169] Updated weights for policy 0, policy_version 44200 (0.0008) +[2026-06-07 03:38:16,141][528169] Updated weights for policy 0, policy_version 44210 (0.0008) +[2026-06-07 03:38:16,273][528169] Updated weights for policy 0, policy_version 44220 (0.0008) +[2026-06-07 03:38:16,412][528169] Updated weights for policy 0, policy_version 44230 (0.0008) +[2026-06-07 03:38:17,029][528169] Updated weights for policy 0, policy_version 44240 (0.0008) +[2026-06-07 03:38:17,161][528169] Updated weights for policy 0, policy_version 44250 (0.0008) +[2026-06-07 03:38:17,294][528169] Updated weights for policy 0, policy_version 44260 (0.0008) +[2026-06-07 03:38:17,433][528169] Updated weights for policy 0, policy_version 44270 (0.0008) +[2026-06-07 03:38:17,581][528169] Updated weights for policy 0, policy_version 44281 (0.0008) +[2026-06-07 03:38:17,713][528169] Updated weights for policy 0, policy_version 44291 (0.0008) +[2026-06-07 03:38:18,339][528169] Updated weights for policy 0, policy_version 44301 (0.0008) +[2026-06-07 03:38:18,469][528169] Updated weights for policy 0, policy_version 44311 (0.0008) +[2026-06-07 03:38:18,611][528169] Updated weights for policy 0, policy_version 44322 (0.0008) +[2026-06-07 03:38:18,746][528169] Updated weights for policy 0, policy_version 44332 (0.0008) +[2026-06-07 03:38:18,888][528169] Updated weights for policy 0, policy_version 44342 (0.0009) +[2026-06-07 03:38:19,028][528169] Updated weights for policy 0, policy_version 44353 (0.0008) +[2026-06-07 03:38:19,632][528169] Updated weights for policy 0, policy_version 44363 (0.0007) +[2026-06-07 03:38:19,779][528169] Updated weights for policy 0, policy_version 44374 (0.0005) +[2026-06-07 03:38:19,914][528169] Updated weights for policy 0, policy_version 44384 (0.0005) +[2026-06-07 03:38:20,039][528169] Updated weights for policy 0, policy_version 44394 (0.0005) +[2026-06-07 03:38:20,103][527010] Fps is (10 sec: 22937.6, 60 sec: 24576.0, 300 sec: 24770.4). Total num frames: 22740992. Throughput: 0: 24948.6. Samples: 22752512. Policy #0 lag: (min: 63.0, avg: 74.5, max: 127.0) +[2026-06-07 03:38:20,104][527010] Avg episode reward: [(0, '139.859')] +[2026-06-07 03:38:20,180][528169] Updated weights for policy 0, policy_version 44404 (0.0005) +[2026-06-07 03:38:20,312][528169] Updated weights for policy 0, policy_version 44414 (0.0004) +[2026-06-07 03:38:20,942][528169] Updated weights for policy 0, policy_version 44425 (0.0005) +[2026-06-07 03:38:21,073][528169] Updated weights for policy 0, policy_version 44435 (0.0005) +[2026-06-07 03:38:21,205][528169] Updated weights for policy 0, policy_version 44445 (0.0006) +[2026-06-07 03:38:21,352][528169] Updated weights for policy 0, policy_version 44456 (0.0008) +[2026-06-07 03:38:21,495][528169] Updated weights for policy 0, policy_version 44467 (0.0009) +[2026-06-07 03:38:21,639][528169] Updated weights for policy 0, policy_version 44478 (0.0008) +[2026-06-07 03:38:22,315][528169] Updated weights for policy 0, policy_version 44489 (0.0008) +[2026-06-07 03:38:22,446][528169] Updated weights for policy 0, policy_version 44499 (0.0004) +[2026-06-07 03:38:22,575][528169] Updated weights for policy 0, policy_version 44509 (0.0005) +[2026-06-07 03:38:22,700][528169] Updated weights for policy 0, policy_version 44519 (0.0005) +[2026-06-07 03:38:22,857][528169] Updated weights for policy 0, policy_version 44531 (0.0004) +[2026-06-07 03:38:23,009][528169] Updated weights for policy 0, policy_version 44542 (0.0005) +[2026-06-07 03:38:23,604][528169] Updated weights for policy 0, policy_version 44553 (0.0005) +[2026-06-07 03:38:23,730][528169] Updated weights for policy 0, policy_version 44563 (0.0008) +[2026-06-07 03:38:23,867][528169] Updated weights for policy 0, policy_version 44574 (0.0008) +[2026-06-07 03:38:24,000][528169] Updated weights for policy 0, policy_version 44584 (0.0008) +[2026-06-07 03:38:24,133][528169] Updated weights for policy 0, policy_version 44594 (0.0008) +[2026-06-07 03:38:24,275][528169] Updated weights for policy 0, policy_version 44604 (0.0009) +[2026-06-07 03:38:24,414][528169] Updated weights for policy 0, policy_version 44615 (0.0008) +[2026-06-07 03:38:25,062][528169] Updated weights for policy 0, policy_version 44627 (0.0008) +[2026-06-07 03:38:25,103][527010] Fps is (10 sec: 26214.0, 60 sec: 24575.9, 300 sec: 24770.4). Total num frames: 22872064. Throughput: 0: 24863.2. Samples: 22894336. Policy #0 lag: (min: 63.0, avg: 74.5, max: 127.0) +[2026-06-07 03:38:25,104][527010] Avg episode reward: [(0, '148.728')] +[2026-06-07 03:38:25,198][528169] Updated weights for policy 0, policy_version 44637 (0.0008) +[2026-06-07 03:38:25,341][528169] Updated weights for policy 0, policy_version 44648 (0.0008) +[2026-06-07 03:38:25,500][528169] Updated weights for policy 0, policy_version 44660 (0.0008) +[2026-06-07 03:38:25,655][528169] Updated weights for policy 0, policy_version 44672 (0.0008) +[2026-06-07 03:38:26,282][528169] Updated weights for policy 0, policy_version 44682 (0.0009) +[2026-06-07 03:38:26,413][528169] Updated weights for policy 0, policy_version 44693 (0.0009) +[2026-06-07 03:38:26,565][528169] Updated weights for policy 0, policy_version 44704 (0.0008) +[2026-06-07 03:38:26,693][528169] Updated weights for policy 0, policy_version 44714 (0.0008) +[2026-06-07 03:38:26,853][528169] Updated weights for policy 0, policy_version 44726 (0.0009) +[2026-06-07 03:38:26,986][528169] Updated weights for policy 0, policy_version 44736 (0.0008) +[2026-06-07 03:38:27,593][528169] Updated weights for policy 0, policy_version 44746 (0.0008) +[2026-06-07 03:38:27,715][528169] Updated weights for policy 0, policy_version 44756 (0.0008) +[2026-06-07 03:38:27,857][528169] Updated weights for policy 0, policy_version 44767 (0.0008) +[2026-06-07 03:38:27,993][528169] Updated weights for policy 0, policy_version 44777 (0.0009) +[2026-06-07 03:38:28,127][528169] Updated weights for policy 0, policy_version 44787 (0.0008) +[2026-06-07 03:38:28,258][528169] Updated weights for policy 0, policy_version 44797 (0.0008) +[2026-06-07 03:38:28,407][528169] Updated weights for policy 0, policy_version 44808 (0.0008) +[2026-06-07 03:38:29,018][528169] Updated weights for policy 0, policy_version 44818 (0.0008) +[2026-06-07 03:38:29,171][528169] Updated weights for policy 0, policy_version 44830 (0.0008) +[2026-06-07 03:38:29,313][528169] Updated weights for policy 0, policy_version 44840 (0.0009) +[2026-06-07 03:38:29,438][528169] Updated weights for policy 0, policy_version 44850 (0.0008) +[2026-06-07 03:38:29,615][528169] Updated weights for policy 0, policy_version 44863 (0.0008) +[2026-06-07 03:38:30,103][527010] Fps is (10 sec: 26214.5, 60 sec: 25122.1, 300 sec: 24770.4). Total num frames: 23003136. Throughput: 0: 24988.5. Samples: 22968960. Policy #0 lag: (min: 63.0, avg: 74.3, max: 127.0) +[2026-06-07 03:38:30,104][527010] Avg episode reward: [(0, '129.284')] +[2026-06-07 03:38:30,272][528169] Updated weights for policy 0, policy_version 44874 (0.0008) +[2026-06-07 03:38:30,402][528169] Updated weights for policy 0, policy_version 44884 (0.0008) +[2026-06-07 03:38:30,544][528169] Updated weights for policy 0, policy_version 44895 (0.0008) +[2026-06-07 03:38:30,674][528169] Updated weights for policy 0, policy_version 44905 (0.0008) +[2026-06-07 03:38:30,806][528169] Updated weights for policy 0, policy_version 44915 (0.0008) +[2026-06-07 03:38:30,946][528169] Updated weights for policy 0, policy_version 44925 (0.0008) +[2026-06-07 03:38:31,081][528169] Updated weights for policy 0, policy_version 44935 (0.0009) +[2026-06-07 03:38:31,667][528169] Updated weights for policy 0, policy_version 44945 (0.0008) +[2026-06-07 03:38:31,796][528169] Updated weights for policy 0, policy_version 44955 (0.0008) +[2026-06-07 03:38:31,929][528169] Updated weights for policy 0, policy_version 44965 (0.0009) +[2026-06-07 03:38:32,056][528169] Updated weights for policy 0, policy_version 44975 (0.0008) +[2026-06-07 03:38:32,190][528169] Updated weights for policy 0, policy_version 44985 (0.0008) +[2026-06-07 03:38:32,327][528169] Updated weights for policy 0, policy_version 44995 (0.0008) +[2026-06-07 03:38:32,916][528169] Updated weights for policy 0, policy_version 45005 (0.0008) +[2026-06-07 03:38:33,051][528169] Updated weights for policy 0, policy_version 45015 (0.0008) +[2026-06-07 03:38:33,187][528169] Updated weights for policy 0, policy_version 45025 (0.0008) +[2026-06-07 03:38:33,316][528169] Updated weights for policy 0, policy_version 45035 (0.0008) +[2026-06-07 03:38:33,449][528169] Updated weights for policy 0, policy_version 45045 (0.0008) +[2026-06-07 03:38:33,612][528169] Updated weights for policy 0, policy_version 45057 (0.0008) +[2026-06-07 03:38:34,241][528169] Updated weights for policy 0, policy_version 45069 (0.0008) +[2026-06-07 03:38:34,380][528169] Updated weights for policy 0, policy_version 45079 (0.0009) +[2026-06-07 03:38:34,500][528169] Updated weights for policy 0, policy_version 45089 (0.0008) +[2026-06-07 03:38:34,643][528169] Updated weights for policy 0, policy_version 45100 (0.0008) +[2026-06-07 03:38:34,782][528169] Updated weights for policy 0, policy_version 45110 (0.0008) +[2026-06-07 03:38:34,934][528169] Updated weights for policy 0, policy_version 45122 (0.0008) +[2026-06-07 03:38:35,103][527010] Fps is (10 sec: 26214.4, 60 sec: 25122.1, 300 sec: 24770.4). Total num frames: 23134208. Throughput: 0: 24860.4. Samples: 23121408. Policy #0 lag: (min: 63.0, avg: 74.3, max: 127.0) +[2026-06-07 03:38:35,104][527010] Avg episode reward: [(0, '137.623')] +[2026-06-07 03:38:35,545][528169] Updated weights for policy 0, policy_version 45132 (0.0008) +[2026-06-07 03:38:35,702][528169] Updated weights for policy 0, policy_version 45144 (0.0009) +[2026-06-07 03:38:35,836][528169] Updated weights for policy 0, policy_version 45154 (0.0008) +[2026-06-07 03:38:35,966][528169] Updated weights for policy 0, policy_version 45164 (0.0008) +[2026-06-07 03:38:36,118][528169] Updated weights for policy 0, policy_version 45175 (0.0008) +[2026-06-07 03:38:36,252][528169] Updated weights for policy 0, policy_version 45185 (0.0008) +[2026-06-07 03:38:36,879][528169] Updated weights for policy 0, policy_version 45196 (0.0008) +[2026-06-07 03:38:37,003][528169] Updated weights for policy 0, policy_version 45206 (0.0008) +[2026-06-07 03:38:37,165][528169] Updated weights for policy 0, policy_version 45218 (0.0008) +[2026-06-07 03:38:37,293][528169] Updated weights for policy 0, policy_version 45228 (0.0008) +[2026-06-07 03:38:37,461][528169] Updated weights for policy 0, policy_version 45240 (0.0008) +[2026-06-07 03:38:37,601][528169] Updated weights for policy 0, policy_version 45251 (0.0008) +[2026-06-07 03:38:38,230][528169] Updated weights for policy 0, policy_version 45262 (0.0006) +[2026-06-07 03:38:38,362][528169] Updated weights for policy 0, policy_version 45272 (0.0005) +[2026-06-07 03:38:38,492][528169] Updated weights for policy 0, policy_version 45282 (0.0005) +[2026-06-07 03:38:38,628][528169] Updated weights for policy 0, policy_version 45292 (0.0005) +[2026-06-07 03:38:38,760][528169] Updated weights for policy 0, policy_version 45302 (0.0005) +[2026-06-07 03:38:38,904][528169] Updated weights for policy 0, policy_version 45313 (0.0005) +[2026-06-07 03:38:39,551][528169] Updated weights for policy 0, policy_version 45325 (0.0006) +[2026-06-07 03:38:39,673][528169] Updated weights for policy 0, policy_version 45335 (0.0005) +[2026-06-07 03:38:39,850][528169] Updated weights for policy 0, policy_version 45349 (0.0007) +[2026-06-07 03:38:40,020][528169] Updated weights for policy 0, policy_version 45362 (0.0008) +[2026-06-07 03:38:40,103][527010] Fps is (10 sec: 22937.6, 60 sec: 24576.0, 300 sec: 24770.4). Total num frames: 23232512. Throughput: 0: 24900.3. Samples: 23269504. Policy #0 lag: (min: 63.0, avg: 74.3, max: 127.0) +[2026-06-07 03:38:40,104][527010] Avg episode reward: [(0, '150.473')] +[2026-06-07 03:38:40,173][528169] Updated weights for policy 0, policy_version 45374 (0.0008) +[2026-06-07 03:38:40,297][528093] Saving new best policy, reward=150.473! +[2026-06-07 03:38:40,821][528169] Updated weights for policy 0, policy_version 45385 (0.0008) +[2026-06-07 03:38:40,957][528169] Updated weights for policy 0, policy_version 45396 (0.0008) +[2026-06-07 03:38:41,104][528169] Updated weights for policy 0, policy_version 45408 (0.0008) +[2026-06-07 03:38:41,252][528169] Updated weights for policy 0, policy_version 45420 (0.0006) +[2026-06-07 03:38:41,418][528169] Updated weights for policy 0, policy_version 45433 (0.0005) +[2026-06-07 03:38:41,564][528169] Updated weights for policy 0, policy_version 45444 (0.0008) +[2026-06-07 03:38:42,202][528169] Updated weights for policy 0, policy_version 45456 (0.0011) +[2026-06-07 03:38:42,338][528169] Updated weights for policy 0, policy_version 45467 (0.0010) +[2026-06-07 03:38:42,463][528169] Updated weights for policy 0, policy_version 45477 (0.0009) +[2026-06-07 03:38:42,619][528169] Updated weights for policy 0, policy_version 45489 (0.0008) +[2026-06-07 03:38:42,777][528169] Updated weights for policy 0, policy_version 45501 (0.0008) +[2026-06-07 03:38:43,437][528169] Updated weights for policy 0, policy_version 45513 (0.0009) +[2026-06-07 03:38:43,578][528169] Updated weights for policy 0, policy_version 45524 (0.0009) +[2026-06-07 03:38:43,702][528169] Updated weights for policy 0, policy_version 45534 (0.0008) +[2026-06-07 03:38:43,856][528169] Updated weights for policy 0, policy_version 45546 (0.0008) +[2026-06-07 03:38:43,993][528169] Updated weights for policy 0, policy_version 45557 (0.0009) +[2026-06-07 03:38:44,176][528169] Updated weights for policy 0, policy_version 45571 (0.0008) +[2026-06-07 03:38:44,836][528169] Updated weights for policy 0, policy_version 45584 (0.0009) +[2026-06-07 03:38:44,961][528169] Updated weights for policy 0, policy_version 45594 (0.0008) +[2026-06-07 03:38:45,091][528169] Updated weights for policy 0, policy_version 45604 (0.0008) +[2026-06-07 03:38:45,103][527010] Fps is (10 sec: 22938.0, 60 sec: 24576.0, 300 sec: 24770.4). Total num frames: 23363584. Throughput: 0: 24854.9. Samples: 23347200. Policy #0 lag: (min: 63.0, avg: 74.3, max: 127.0) +[2026-06-07 03:38:45,104][527010] Avg episode reward: [(0, '150.211')] +[2026-06-07 03:38:45,234][528169] Updated weights for policy 0, policy_version 45616 (0.0009) +[2026-06-07 03:38:45,384][528169] Updated weights for policy 0, policy_version 45627 (0.0008) +[2026-06-07 03:38:45,508][528169] Updated weights for policy 0, policy_version 45637 (0.0008) +[2026-06-07 03:38:46,169][528169] Updated weights for policy 0, policy_version 45649 (0.0008) +[2026-06-07 03:38:46,310][528169] Updated weights for policy 0, policy_version 45660 (0.0008) +[2026-06-07 03:38:46,477][528169] Updated weights for policy 0, policy_version 45673 (0.0008) +[2026-06-07 03:38:46,633][528169] Updated weights for policy 0, policy_version 45685 (0.0008) +[2026-06-07 03:38:46,794][528169] Updated weights for policy 0, policy_version 45698 (0.0008) +[2026-06-07 03:38:47,442][528169] Updated weights for policy 0, policy_version 45710 (0.0008) +[2026-06-07 03:38:47,603][528169] Updated weights for policy 0, policy_version 45723 (0.0008) +[2026-06-07 03:38:47,762][528169] Updated weights for policy 0, policy_version 45735 (0.0008) +[2026-06-07 03:38:47,893][528169] Updated weights for policy 0, policy_version 45746 (0.0008) +[2026-06-07 03:38:48,052][528169] Updated weights for policy 0, policy_version 45758 (0.0008) +[2026-06-07 03:38:48,707][528169] Updated weights for policy 0, policy_version 45770 (0.0008) +[2026-06-07 03:38:48,842][528169] Updated weights for policy 0, policy_version 45781 (0.0008) +[2026-06-07 03:38:48,988][528169] Updated weights for policy 0, policy_version 45792 (0.0008) +[2026-06-07 03:38:49,124][528169] Updated weights for policy 0, policy_version 45803 (0.0008) +[2026-06-07 03:38:49,302][528169] Updated weights for policy 0, policy_version 45817 (0.0008) +[2026-06-07 03:38:49,457][528169] Updated weights for policy 0, policy_version 45829 (0.0008) +[2026-06-07 03:38:50,103][527010] Fps is (10 sec: 26214.4, 60 sec: 24576.0, 300 sec: 24770.4). Total num frames: 23494656. Throughput: 0: 24863.3. Samples: 23488512. Policy #0 lag: (min: 63.0, avg: 74.3, max: 127.0) +[2026-06-07 03:38:50,104][527010] Avg episode reward: [(0, '150.284')] +[2026-06-07 03:38:50,124][528169] Updated weights for policy 0, policy_version 45839 (0.0008) +[2026-06-07 03:38:50,252][528169] Updated weights for policy 0, policy_version 45849 (0.0009) +[2026-06-07 03:38:50,396][528169] Updated weights for policy 0, policy_version 45860 (0.0008) +[2026-06-07 03:38:50,530][528169] Updated weights for policy 0, policy_version 45870 (0.0009) +[2026-06-07 03:38:50,687][528169] Updated weights for policy 0, policy_version 45882 (0.0008) +[2026-06-07 03:38:50,819][528169] Updated weights for policy 0, policy_version 45892 (0.0008) +[2026-06-07 03:38:51,429][528169] Updated weights for policy 0, policy_version 45902 (0.0008) +[2026-06-07 03:38:51,569][528169] Updated weights for policy 0, policy_version 45913 (0.0008) +[2026-06-07 03:38:51,697][528169] Updated weights for policy 0, policy_version 45923 (0.0008) +[2026-06-07 03:38:51,865][528169] Updated weights for policy 0, policy_version 45935 (0.0008) +[2026-06-07 03:38:51,989][528169] Updated weights for policy 0, policy_version 45945 (0.0008) +[2026-06-07 03:38:52,131][528169] Updated weights for policy 0, policy_version 45955 (0.0008) +[2026-06-07 03:38:52,739][528169] Updated weights for policy 0, policy_version 45965 (0.0008) +[2026-06-07 03:38:52,882][528169] Updated weights for policy 0, policy_version 45976 (0.0008) +[2026-06-07 03:38:53,029][528169] Updated weights for policy 0, policy_version 45987 (0.0008) +[2026-06-07 03:38:53,177][528169] Updated weights for policy 0, policy_version 45998 (0.0008) +[2026-06-07 03:38:53,307][528169] Updated weights for policy 0, policy_version 46008 (0.0008) +[2026-06-07 03:38:53,445][528169] Updated weights for policy 0, policy_version 46018 (0.0008) +[2026-06-07 03:38:54,056][528169] Updated weights for policy 0, policy_version 46029 (0.0008) +[2026-06-07 03:38:54,187][528169] Updated weights for policy 0, policy_version 46039 (0.0008) +[2026-06-07 03:38:54,313][528169] Updated weights for policy 0, policy_version 46049 (0.0008) +[2026-06-07 03:38:54,443][528169] Updated weights for policy 0, policy_version 46059 (0.0008) +[2026-06-07 03:38:54,582][528169] Updated weights for policy 0, policy_version 46069 (0.0008) +[2026-06-07 03:38:54,722][528169] Updated weights for policy 0, policy_version 46080 (0.0008) +[2026-06-07 03:38:55,103][527010] Fps is (10 sec: 26214.4, 60 sec: 25122.1, 300 sec: 24770.4). Total num frames: 23625728. Throughput: 0: 24840.5. Samples: 23645440. Policy #0 lag: (min: 63.0, avg: 74.3, max: 127.0) +[2026-06-07 03:38:55,104][527010] Avg episode reward: [(0, '114.131')] +[2026-06-07 03:38:55,352][528169] Updated weights for policy 0, policy_version 46091 (0.0008) +[2026-06-07 03:38:55,481][528169] Updated weights for policy 0, policy_version 46101 (0.0008) +[2026-06-07 03:38:55,625][528169] Updated weights for policy 0, policy_version 46112 (0.0008) +[2026-06-07 03:38:55,781][528169] Updated weights for policy 0, policy_version 46124 (0.0008) +[2026-06-07 03:38:55,920][528169] Updated weights for policy 0, policy_version 46134 (0.0008) +[2026-06-07 03:38:56,053][528169] Updated weights for policy 0, policy_version 46144 (0.0008) +[2026-06-07 03:38:56,680][528169] Updated weights for policy 0, policy_version 46154 (0.0008) +[2026-06-07 03:38:56,802][528169] Updated weights for policy 0, policy_version 46164 (0.0008) +[2026-06-07 03:38:56,930][528169] Updated weights for policy 0, policy_version 46174 (0.0009) +[2026-06-07 03:38:57,088][528169] Updated weights for policy 0, policy_version 46186 (0.0008) +[2026-06-07 03:38:57,224][528169] Updated weights for policy 0, policy_version 46196 (0.0008) +[2026-06-07 03:38:57,361][528169] Updated weights for policy 0, policy_version 46206 (0.0008) +[2026-06-07 03:38:57,495][528169] Updated weights for policy 0, policy_version 46216 (0.0010) +[2026-06-07 03:38:58,096][528169] Updated weights for policy 0, policy_version 46226 (0.0008) +[2026-06-07 03:38:58,233][528169] Updated weights for policy 0, policy_version 46236 (0.0008) +[2026-06-07 03:38:58,364][528169] Updated weights for policy 0, policy_version 46246 (0.0008) +[2026-06-07 03:38:58,508][528169] Updated weights for policy 0, policy_version 46257 (0.0009) +[2026-06-07 03:38:58,646][528169] Updated weights for policy 0, policy_version 46267 (0.0008) +[2026-06-07 03:38:58,805][528169] Updated weights for policy 0, policy_version 46279 (0.0008) +[2026-06-07 03:38:59,428][528169] Updated weights for policy 0, policy_version 46290 (0.0008) +[2026-06-07 03:38:59,572][528169] Updated weights for policy 0, policy_version 46301 (0.0008) +[2026-06-07 03:38:59,698][528169] Updated weights for policy 0, policy_version 46311 (0.0009) +[2026-06-07 03:38:59,841][528169] Updated weights for policy 0, policy_version 46321 (0.0008) +[2026-06-07 03:38:59,970][528169] Updated weights for policy 0, policy_version 46331 (0.0008) +[2026-06-07 03:39:00,103][527010] Fps is (10 sec: 22937.6, 60 sec: 24576.1, 300 sec: 24770.4). Total num frames: 23724032. Throughput: 0: 24897.4. Samples: 23716096. Policy #0 lag: (min: 63.0, avg: 75.4, max: 127.0) +[2026-06-07 03:39:00,104][527010] Avg episode reward: [(0, '137.234')] +[2026-06-07 03:39:00,110][528169] Updated weights for policy 0, policy_version 46341 (0.0008) +[2026-06-07 03:39:00,711][528169] Updated weights for policy 0, policy_version 46351 (0.0008) +[2026-06-07 03:39:00,859][528169] Updated weights for policy 0, policy_version 46362 (0.0009) +[2026-06-07 03:39:00,986][528169] Updated weights for policy 0, policy_version 46372 (0.0008) +[2026-06-07 03:39:01,120][528169] Updated weights for policy 0, policy_version 46382 (0.0009) +[2026-06-07 03:39:01,260][528169] Updated weights for policy 0, policy_version 46392 (0.0008) +[2026-06-07 03:39:01,392][528169] Updated weights for policy 0, policy_version 46402 (0.0008) +[2026-06-07 03:39:02,004][528169] Updated weights for policy 0, policy_version 46413 (0.0008) +[2026-06-07 03:39:02,124][528169] Updated weights for policy 0, policy_version 46423 (0.0008) +[2026-06-07 03:39:02,261][528169] Updated weights for policy 0, policy_version 46433 (0.0008) +[2026-06-07 03:39:02,407][528169] Updated weights for policy 0, policy_version 46444 (0.0008) +[2026-06-07 03:39:02,541][528169] Updated weights for policy 0, policy_version 46454 (0.0009) +[2026-06-07 03:39:02,678][528169] Updated weights for policy 0, policy_version 46464 (0.0009) +[2026-06-07 03:39:03,290][528169] Updated weights for policy 0, policy_version 46475 (0.0008) +[2026-06-07 03:39:03,425][528169] Updated weights for policy 0, policy_version 46485 (0.0008) +[2026-06-07 03:39:03,553][528169] Updated weights for policy 0, policy_version 46495 (0.0008) +[2026-06-07 03:39:03,681][528169] Updated weights for policy 0, policy_version 46505 (0.0008) +[2026-06-07 03:39:03,815][528169] Updated weights for policy 0, policy_version 46515 (0.0008) +[2026-06-07 03:39:03,964][528169] Updated weights for policy 0, policy_version 46526 (0.0008) +[2026-06-07 03:39:04,094][528169] Updated weights for policy 0, policy_version 46536 (0.0009) +[2026-06-07 03:39:04,697][528169] Updated weights for policy 0, policy_version 46546 (0.0008) +[2026-06-07 03:39:04,833][528169] Updated weights for policy 0, policy_version 46556 (0.0008) +[2026-06-07 03:39:04,986][528169] Updated weights for policy 0, policy_version 46568 (0.0008) +[2026-06-07 03:39:05,103][527010] Fps is (10 sec: 22937.5, 60 sec: 24576.0, 300 sec: 24770.4). Total num frames: 23855104. Throughput: 0: 24871.8. Samples: 23871744. Policy #0 lag: (min: 63.0, avg: 75.4, max: 127.0) +[2026-06-07 03:39:05,103][527010] Avg episode reward: [(0, '134.110')] +[2026-06-07 03:39:05,107][528169] Updated weights for policy 0, policy_version 46578 (0.0009) +[2026-06-07 03:39:05,267][528169] Updated weights for policy 0, policy_version 46590 (0.0008) +[2026-06-07 03:39:05,398][528169] Updated weights for policy 0, policy_version 46600 (0.0008) +[2026-06-07 03:39:06,013][528169] Updated weights for policy 0, policy_version 46611 (0.0008) +[2026-06-07 03:39:06,163][528169] Updated weights for policy 0, policy_version 46623 (0.0008) +[2026-06-07 03:39:06,295][528169] Updated weights for policy 0, policy_version 46633 (0.0008) +[2026-06-07 03:39:06,444][528169] Updated weights for policy 0, policy_version 46645 (0.0008) +[2026-06-07 03:39:06,587][528169] Updated weights for policy 0, policy_version 46656 (0.0008) +[2026-06-07 03:39:07,230][528169] Updated weights for policy 0, policy_version 46667 (0.0008) +[2026-06-07 03:39:07,389][528169] Updated weights for policy 0, policy_version 46680 (0.0008) +[2026-06-07 03:39:07,539][528169] Updated weights for policy 0, policy_version 46691 (0.0008) +[2026-06-07 03:39:07,688][528169] Updated weights for policy 0, policy_version 46703 (0.0008) +[2026-06-07 03:39:07,812][528169] Updated weights for policy 0, policy_version 46713 (0.0008) +[2026-06-07 03:39:07,959][528169] Updated weights for policy 0, policy_version 46724 (0.0008) +[2026-06-07 03:39:08,619][528169] Updated weights for policy 0, policy_version 46736 (0.0008) +[2026-06-07 03:39:08,746][528169] Updated weights for policy 0, policy_version 46746 (0.0008) +[2026-06-07 03:39:08,899][528169] Updated weights for policy 0, policy_version 46758 (0.0008) +[2026-06-07 03:39:09,031][528169] Updated weights for policy 0, policy_version 46769 (0.0008) +[2026-06-07 03:39:09,177][528169] Updated weights for policy 0, policy_version 46780 (0.0009) +[2026-06-07 03:39:09,323][528169] Updated weights for policy 0, policy_version 46791 (0.0008) +[2026-06-07 03:39:09,943][528169] Updated weights for policy 0, policy_version 46801 (0.0008) +[2026-06-07 03:39:10,069][528169] Updated weights for policy 0, policy_version 46811 (0.0008) +[2026-06-07 03:39:10,103][527010] Fps is (10 sec: 26214.2, 60 sec: 24576.0, 300 sec: 24770.4). Total num frames: 23986176. Throughput: 0: 24840.6. Samples: 24012160. Policy #0 lag: (min: 63.0, avg: 75.4, max: 127.0) +[2026-06-07 03:39:10,104][527010] Avg episode reward: [(0, '137.922')] +[2026-06-07 03:39:10,218][528169] Updated weights for policy 0, policy_version 46822 (0.0008) +[2026-06-07 03:39:10,403][528169] Updated weights for policy 0, policy_version 46836 (0.0008) +[2026-06-07 03:39:10,551][528169] Updated weights for policy 0, policy_version 46847 (0.0008) +[2026-06-07 03:39:11,159][528169] Updated weights for policy 0, policy_version 46858 (0.0009) +[2026-06-07 03:39:11,306][528169] Updated weights for policy 0, policy_version 46869 (0.0008) +[2026-06-07 03:39:11,438][528169] Updated weights for policy 0, policy_version 46879 (0.0008) +[2026-06-07 03:39:11,566][528169] Updated weights for policy 0, policy_version 46889 (0.0009) +[2026-06-07 03:39:11,712][528169] Updated weights for policy 0, policy_version 46900 (0.0008) +[2026-06-07 03:39:11,863][528169] Updated weights for policy 0, policy_version 46911 (0.0008) +[2026-06-07 03:39:12,452][528169] Updated weights for policy 0, policy_version 46921 (0.0008) +[2026-06-07 03:39:12,596][528169] Updated weights for policy 0, policy_version 46932 (0.0010) +[2026-06-07 03:39:12,723][528169] Updated weights for policy 0, policy_version 46942 (0.0008) +[2026-06-07 03:39:12,847][528169] Updated weights for policy 0, policy_version 46952 (0.0008) +[2026-06-07 03:39:13,013][528169] Updated weights for policy 0, policy_version 46964 (0.0008) +[2026-06-07 03:39:13,144][528169] Updated weights for policy 0, policy_version 46974 (0.0009) +[2026-06-07 03:39:13,775][528169] Updated weights for policy 0, policy_version 46985 (0.0009) +[2026-06-07 03:39:13,916][528169] Updated weights for policy 0, policy_version 46996 (0.0011) +[2026-06-07 03:39:14,046][528169] Updated weights for policy 0, policy_version 47006 (0.0011) +[2026-06-07 03:39:14,177][528169] Updated weights for policy 0, policy_version 47016 (0.0012) +[2026-06-07 03:39:14,307][528169] Updated weights for policy 0, policy_version 47026 (0.0010) +[2026-06-07 03:39:14,460][528169] Updated weights for policy 0, policy_version 47037 (0.0008) +[2026-06-07 03:39:14,591][528169] Updated weights for policy 0, policy_version 47047 (0.0008) +[2026-06-07 03:39:15,103][527010] Fps is (10 sec: 26214.4, 60 sec: 25122.1, 300 sec: 24770.4). Total num frames: 24117248. Throughput: 0: 24911.6. Samples: 24089984. Policy #0 lag: (min: 63.0, avg: 75.4, max: 127.0) +[2026-06-07 03:39:15,104][527010] Avg episode reward: [(0, '148.669')] +[2026-06-07 03:39:15,206][528169] Updated weights for policy 0, policy_version 47059 (0.0008) +[2026-06-07 03:39:15,346][528169] Updated weights for policy 0, policy_version 47070 (0.0008) +[2026-06-07 03:39:15,484][528169] Updated weights for policy 0, policy_version 47080 (0.0008) +[2026-06-07 03:39:15,618][528169] Updated weights for policy 0, policy_version 47090 (0.0008) +[2026-06-07 03:39:15,751][528169] Updated weights for policy 0, policy_version 47100 (0.0008) +[2026-06-07 03:39:15,883][528169] Updated weights for policy 0, policy_version 47110 (0.0008) +[2026-06-07 03:39:16,482][528169] Updated weights for policy 0, policy_version 47120 (0.0008) +[2026-06-07 03:39:16,617][528169] Updated weights for policy 0, policy_version 47130 (0.0008) +[2026-06-07 03:39:16,747][528169] Updated weights for policy 0, policy_version 47140 (0.0008) +[2026-06-07 03:39:16,887][528169] Updated weights for policy 0, policy_version 47151 (0.0008) +[2026-06-07 03:39:17,038][528169] Updated weights for policy 0, policy_version 47162 (0.0009) +[2026-06-07 03:39:17,173][528169] Updated weights for policy 0, policy_version 47172 (0.0008) +[2026-06-07 03:39:17,807][528169] Updated weights for policy 0, policy_version 47184 (0.0009) +[2026-06-07 03:39:17,941][528169] Updated weights for policy 0, policy_version 47194 (0.0009) +[2026-06-07 03:39:18,084][528169] Updated weights for policy 0, policy_version 47205 (0.0010) +[2026-06-07 03:39:18,231][528169] Updated weights for policy 0, policy_version 47216 (0.0009) +[2026-06-07 03:39:18,362][528169] Updated weights for policy 0, policy_version 47226 (0.0008) +[2026-06-07 03:39:18,513][528169] Updated weights for policy 0, policy_version 47237 (0.0008) +[2026-06-07 03:39:19,137][528169] Updated weights for policy 0, policy_version 47248 (0.0010) +[2026-06-07 03:39:19,265][528169] Updated weights for policy 0, policy_version 47259 (0.0008) +[2026-06-07 03:39:19,406][528169] Updated weights for policy 0, policy_version 47269 (0.0008) +[2026-06-07 03:39:19,538][528169] Updated weights for policy 0, policy_version 47279 (0.0008) +[2026-06-07 03:39:19,682][528169] Updated weights for policy 0, policy_version 47290 (0.0009) +[2026-06-07 03:39:19,823][528169] Updated weights for policy 0, policy_version 47300 (0.0008) +[2026-06-07 03:39:20,103][527010] Fps is (10 sec: 26214.1, 60 sec: 25122.1, 300 sec: 24770.4). Total num frames: 24248320. Throughput: 0: 24792.2. Samples: 24237056. Policy #0 lag: (min: 63.0, avg: 75.4, max: 127.0) +[2026-06-07 03:39:20,104][527010] Avg episode reward: [(0, '136.633')] +[2026-06-07 03:39:20,461][528169] Updated weights for policy 0, policy_version 47311 (0.0007) +[2026-06-07 03:39:20,596][528169] Updated weights for policy 0, policy_version 47321 (0.0008) +[2026-06-07 03:39:20,733][528169] Updated weights for policy 0, policy_version 47332 (0.0009) +[2026-06-07 03:39:20,874][528169] Updated weights for policy 0, policy_version 47342 (0.0008) +[2026-06-07 03:39:21,018][528169] Updated weights for policy 0, policy_version 47353 (0.0008) +[2026-06-07 03:39:21,164][528169] Updated weights for policy 0, policy_version 47364 (0.0009) +[2026-06-07 03:39:21,767][528169] Updated weights for policy 0, policy_version 47374 (0.0009) +[2026-06-07 03:39:21,896][528169] Updated weights for policy 0, policy_version 47384 (0.0009) +[2026-06-07 03:39:22,024][528169] Updated weights for policy 0, policy_version 47394 (0.0010) +[2026-06-07 03:39:22,173][528169] Updated weights for policy 0, policy_version 47405 (0.0009) +[2026-06-07 03:39:22,298][528169] Updated weights for policy 0, policy_version 47415 (0.0008) +[2026-06-07 03:39:22,452][528169] Updated weights for policy 0, policy_version 47426 (0.0009) +[2026-06-07 03:39:23,037][528169] Updated weights for policy 0, policy_version 47437 (0.0007) +[2026-06-07 03:39:23,177][528169] Updated weights for policy 0, policy_version 47448 (0.0008) +[2026-06-07 03:39:23,323][528169] Updated weights for policy 0, policy_version 47459 (0.0008) +[2026-06-07 03:39:23,480][528169] Updated weights for policy 0, policy_version 47471 (0.0008) +[2026-06-07 03:39:23,626][528169] Updated weights for policy 0, policy_version 47482 (0.0008) +[2026-06-07 03:39:23,758][528169] Updated weights for policy 0, policy_version 47492 (0.0008) +[2026-06-07 03:39:24,398][528169] Updated weights for policy 0, policy_version 47503 (0.0008) +[2026-06-07 03:39:24,527][528169] Updated weights for policy 0, policy_version 47513 (0.0008) +[2026-06-07 03:39:24,687][528169] Updated weights for policy 0, policy_version 47525 (0.0008) +[2026-06-07 03:39:24,815][528169] Updated weights for policy 0, policy_version 47535 (0.0008) +[2026-06-07 03:39:24,949][528169] Updated weights for policy 0, policy_version 47545 (0.0008) +[2026-06-07 03:39:25,080][528169] Updated weights for policy 0, policy_version 47555 (0.0008) +[2026-06-07 03:39:25,103][527010] Fps is (10 sec: 22937.5, 60 sec: 24576.0, 300 sec: 24770.4). Total num frames: 24346624. Throughput: 0: 24923.0. Samples: 24391040. Policy #0 lag: (min: 63.0, avg: 75.4, max: 127.0) +[2026-06-07 03:39:25,104][527010] Avg episode reward: [(0, '162.209')] +[2026-06-07 03:39:25,143][528093] Saving new best policy, reward=162.209! +[2026-06-07 03:39:25,698][528169] Updated weights for policy 0, policy_version 47566 (0.0008) +[2026-06-07 03:39:25,828][528169] Updated weights for policy 0, policy_version 47576 (0.0008) +[2026-06-07 03:39:25,969][528169] Updated weights for policy 0, policy_version 47586 (0.0008) +[2026-06-07 03:39:26,107][528169] Updated weights for policy 0, policy_version 47597 (0.0008) +[2026-06-07 03:39:26,246][528169] Updated weights for policy 0, policy_version 47607 (0.0008) +[2026-06-07 03:39:26,374][528169] Updated weights for policy 0, policy_version 47617 (0.0008) +[2026-06-07 03:39:26,985][528169] Updated weights for policy 0, policy_version 47628 (0.0008) +[2026-06-07 03:39:27,116][528169] Updated weights for policy 0, policy_version 47638 (0.0007) +[2026-06-07 03:39:27,263][528169] Updated weights for policy 0, policy_version 47649 (0.0008) +[2026-06-07 03:39:27,414][528169] Updated weights for policy 0, policy_version 47660 (0.0008) +[2026-06-07 03:39:27,545][528169] Updated weights for policy 0, policy_version 47670 (0.0008) +[2026-06-07 03:39:27,685][528169] Updated weights for policy 0, policy_version 47680 (0.0008) +[2026-06-07 03:39:28,292][528169] Updated weights for policy 0, policy_version 47691 (0.0009) +[2026-06-07 03:39:28,432][528169] Updated weights for policy 0, policy_version 47701 (0.0008) +[2026-06-07 03:39:28,562][528169] Updated weights for policy 0, policy_version 47711 (0.0008) +[2026-06-07 03:39:28,691][528169] Updated weights for policy 0, policy_version 47721 (0.0008) +[2026-06-07 03:39:28,837][528169] Updated weights for policy 0, policy_version 47732 (0.0009) +[2026-06-07 03:39:28,980][528169] Updated weights for policy 0, policy_version 47742 (0.0008) +[2026-06-07 03:39:29,632][528169] Updated weights for policy 0, policy_version 47756 (0.0008) +[2026-06-07 03:39:29,775][528169] Updated weights for policy 0, policy_version 47767 (0.0008) +[2026-06-07 03:39:29,908][528169] Updated weights for policy 0, policy_version 47777 (0.0008) +[2026-06-07 03:39:30,038][528169] Updated weights for policy 0, policy_version 47787 (0.0008) +[2026-06-07 03:39:30,103][527010] Fps is (10 sec: 22937.8, 60 sec: 24576.0, 300 sec: 24770.4). Total num frames: 24477696. Throughput: 0: 24780.7. Samples: 24462336. Policy #0 lag: (min: 54.0, avg: 67.4, max: 118.0) +[2026-06-07 03:39:30,104][527010] Avg episode reward: [(0, '159.792')] +[2026-06-07 03:39:30,179][528169] Updated weights for policy 0, policy_version 47798 (0.0008) +[2026-06-07 03:39:30,331][528169] Updated weights for policy 0, policy_version 47809 (0.0008) +[2026-06-07 03:39:30,948][528169] Updated weights for policy 0, policy_version 47820 (0.0008) +[2026-06-07 03:39:31,075][528169] Updated weights for policy 0, policy_version 47830 (0.0008) +[2026-06-07 03:39:31,227][528169] Updated weights for policy 0, policy_version 47841 (0.0008) +[2026-06-07 03:39:31,349][528169] Updated weights for policy 0, policy_version 47851 (0.0008) +[2026-06-07 03:39:31,498][528169] Updated weights for policy 0, policy_version 47862 (0.0008) +[2026-06-07 03:39:31,636][528169] Updated weights for policy 0, policy_version 47872 (0.0008) +[2026-06-07 03:39:32,249][528169] Updated weights for policy 0, policy_version 47882 (0.0008) +[2026-06-07 03:39:32,391][528169] Updated weights for policy 0, policy_version 47893 (0.0008) +[2026-06-07 03:39:32,516][528169] Updated weights for policy 0, policy_version 47903 (0.0008) +[2026-06-07 03:39:32,666][528169] Updated weights for policy 0, policy_version 47914 (0.0008) +[2026-06-07 03:39:32,816][528169] Updated weights for policy 0, policy_version 47925 (0.0008) +[2026-06-07 03:39:32,953][528169] Updated weights for policy 0, policy_version 47936 (0.0008) +[2026-06-07 03:39:33,562][528169] Updated weights for policy 0, policy_version 47946 (0.0008) +[2026-06-07 03:39:33,689][528169] Updated weights for policy 0, policy_version 47956 (0.0008) +[2026-06-07 03:39:33,817][528169] Updated weights for policy 0, policy_version 47966 (0.0008) +[2026-06-07 03:39:33,954][528169] Updated weights for policy 0, policy_version 47976 (0.0008) +[2026-06-07 03:39:34,102][528169] Updated weights for policy 0, policy_version 47987 (0.0008) +[2026-06-07 03:39:34,258][528169] Updated weights for policy 0, policy_version 47999 (0.0008) +[2026-06-07 03:39:34,882][528169] Updated weights for policy 0, policy_version 48010 (0.0009) +[2026-06-07 03:39:35,033][528169] Updated weights for policy 0, policy_version 48022 (0.0008) +[2026-06-07 03:39:35,103][527010] Fps is (10 sec: 26214.2, 60 sec: 24576.0, 300 sec: 24770.4). Total num frames: 24608768. Throughput: 0: 24866.1. Samples: 24607488. Policy #0 lag: (min: 54.0, avg: 67.4, max: 118.0) +[2026-06-07 03:39:35,104][527010] Avg episode reward: [(0, '144.332')] +[2026-06-07 03:39:35,169][528169] Updated weights for policy 0, policy_version 48032 (0.0008) +[2026-06-07 03:39:35,300][528169] Updated weights for policy 0, policy_version 48042 (0.0008) +[2026-06-07 03:39:35,428][528169] Updated weights for policy 0, policy_version 48052 (0.0008) +[2026-06-07 03:39:35,566][528169] Updated weights for policy 0, policy_version 48062 (0.0008) +[2026-06-07 03:39:35,695][528169] Updated weights for policy 0, policy_version 48072 (0.0008) +[2026-06-07 03:39:36,320][528169] Updated weights for policy 0, policy_version 48082 (0.0008) +[2026-06-07 03:39:36,466][528169] Updated weights for policy 0, policy_version 48093 (0.0008) +[2026-06-07 03:39:36,600][528169] Updated weights for policy 0, policy_version 48103 (0.0008) +[2026-06-07 03:39:36,727][528169] Updated weights for policy 0, policy_version 48113 (0.0008) +[2026-06-07 03:39:36,860][528169] Updated weights for policy 0, policy_version 48123 (0.0008) +[2026-06-07 03:39:36,994][528169] Updated weights for policy 0, policy_version 48133 (0.0008) +[2026-06-07 03:39:37,604][528169] Updated weights for policy 0, policy_version 48143 (0.0008) +[2026-06-07 03:39:37,773][528169] Updated weights for policy 0, policy_version 48156 (0.0008) +[2026-06-07 03:39:37,902][528169] Updated weights for policy 0, policy_version 48166 (0.0008) +[2026-06-07 03:39:38,034][528169] Updated weights for policy 0, policy_version 48176 (0.0008) +[2026-06-07 03:39:38,181][528169] Updated weights for policy 0, policy_version 48186 (0.0008) +[2026-06-07 03:39:38,319][528169] Updated weights for policy 0, policy_version 48197 (0.0008) +[2026-06-07 03:39:38,927][528169] Updated weights for policy 0, policy_version 48207 (0.0008) +[2026-06-07 03:39:39,066][528169] Updated weights for policy 0, policy_version 48218 (0.0008) +[2026-06-07 03:39:39,198][528169] Updated weights for policy 0, policy_version 48228 (0.0008) +[2026-06-07 03:39:39,331][528169] Updated weights for policy 0, policy_version 48238 (0.0008) +[2026-06-07 03:39:39,465][528169] Updated weights for policy 0, policy_version 48248 (0.0009) +[2026-06-07 03:39:39,597][528169] Updated weights for policy 0, policy_version 48258 (0.0008) +[2026-06-07 03:39:40,103][527010] Fps is (10 sec: 26214.6, 60 sec: 25122.1, 300 sec: 24770.4). Total num frames: 24739840. Throughput: 0: 24743.8. Samples: 24758912. Policy #0 lag: (min: 54.0, avg: 67.4, max: 118.0) +[2026-06-07 03:39:40,104][527010] Avg episode reward: [(0, '150.649')] +[2026-06-07 03:39:40,268][528169] Updated weights for policy 0, policy_version 48269 (0.0009) +[2026-06-07 03:39:40,401][528169] Updated weights for policy 0, policy_version 48279 (0.0008) +[2026-06-07 03:39:40,548][528169] Updated weights for policy 0, policy_version 48290 (0.0008) +[2026-06-07 03:39:40,681][528169] Updated weights for policy 0, policy_version 48300 (0.0008) +[2026-06-07 03:39:40,813][528169] Updated weights for policy 0, policy_version 48310 (0.0008) +[2026-06-07 03:39:40,942][528169] Updated weights for policy 0, policy_version 48320 (0.0008) +[2026-06-07 03:39:41,460][528169] Updated weights for policy 0, policy_version 48330 (0.0007) +[2026-06-07 03:39:41,589][528169] Updated weights for policy 0, policy_version 48340 (0.0004) +[2026-06-07 03:39:41,737][528169] Updated weights for policy 0, policy_version 48351 (0.0004) +[2026-06-07 03:39:41,873][528169] Updated weights for policy 0, policy_version 48361 (0.0004) +[2026-06-07 03:39:42,002][528169] Updated weights for policy 0, policy_version 48371 (0.0005) +[2026-06-07 03:39:42,154][528169] Updated weights for policy 0, policy_version 48382 (0.0008) +[2026-06-07 03:39:42,282][528169] Updated weights for policy 0, policy_version 48392 (0.0009) +[2026-06-07 03:39:42,893][528169] Updated weights for policy 0, policy_version 48403 (0.0004) +[2026-06-07 03:39:43,035][528169] Updated weights for policy 0, policy_version 48414 (0.0008) +[2026-06-07 03:39:43,187][528169] Updated weights for policy 0, policy_version 48425 (0.0007) +[2026-06-07 03:39:43,324][528169] Updated weights for policy 0, policy_version 48435 (0.0006) +[2026-06-07 03:39:43,452][528169] Updated weights for policy 0, policy_version 48445 (0.0007) +[2026-06-07 03:39:43,582][528169] Updated weights for policy 0, policy_version 48455 (0.0007) +[2026-06-07 03:39:44,168][528169] Updated weights for policy 0, policy_version 48465 (0.0007) +[2026-06-07 03:39:44,296][528169] Updated weights for policy 0, policy_version 48475 (0.0007) +[2026-06-07 03:39:44,448][528169] Updated weights for policy 0, policy_version 48486 (0.0007) +[2026-06-07 03:39:44,581][528169] Updated weights for policy 0, policy_version 48496 (0.0007) +[2026-06-07 03:39:44,736][528169] Updated weights for policy 0, policy_version 48508 (0.0007) +[2026-06-07 03:39:45,103][527010] Fps is (10 sec: 26214.7, 60 sec: 25122.1, 300 sec: 24881.5). Total num frames: 24870912. Throughput: 0: 24735.3. Samples: 24829184. Policy #0 lag: (min: 54.0, avg: 67.4, max: 118.0) +[2026-06-07 03:39:45,104][527010] Avg episode reward: [(0, '158.854')] +[2026-06-07 03:39:45,389][528169] Updated weights for policy 0, policy_version 48521 (0.0007) +[2026-06-07 03:39:45,518][528169] Updated weights for policy 0, policy_version 48531 (0.0008) +[2026-06-07 03:39:45,651][528169] Updated weights for policy 0, policy_version 48541 (0.0008) +[2026-06-07 03:39:45,774][528169] Updated weights for policy 0, policy_version 48551 (0.0008) +[2026-06-07 03:39:45,915][528169] Updated weights for policy 0, policy_version 48561 (0.0008) +[2026-06-07 03:39:46,049][528169] Updated weights for policy 0, policy_version 48571 (0.0008) +[2026-06-07 03:39:46,182][528169] Updated weights for policy 0, policy_version 48581 (0.0008) +[2026-06-07 03:39:46,819][528169] Updated weights for policy 0, policy_version 48593 (0.0008) +[2026-06-07 03:39:46,966][528169] Updated weights for policy 0, policy_version 48604 (0.0008) +[2026-06-07 03:39:47,110][528169] Updated weights for policy 0, policy_version 48615 (0.0008) +[2026-06-07 03:39:47,243][528169] Updated weights for policy 0, policy_version 48625 (0.0008) +[2026-06-07 03:39:47,387][528169] Updated weights for policy 0, policy_version 48636 (0.0010) +[2026-06-07 03:39:47,537][528169] Updated weights for policy 0, policy_version 48647 (0.0011) +[2026-06-07 03:39:48,134][528169] Updated weights for policy 0, policy_version 48657 (0.0009) +[2026-06-07 03:39:48,278][528169] Updated weights for policy 0, policy_version 48668 (0.0008) +[2026-06-07 03:39:48,407][528169] Updated weights for policy 0, policy_version 48678 (0.0008) +[2026-06-07 03:39:48,548][528169] Updated weights for policy 0, policy_version 48688 (0.0008) +[2026-06-07 03:39:48,677][528169] Updated weights for policy 0, policy_version 48698 (0.0008) +[2026-06-07 03:39:48,819][528169] Updated weights for policy 0, policy_version 48708 (0.0008) +[2026-06-07 03:39:49,430][528169] Updated weights for policy 0, policy_version 48719 (0.0008) +[2026-06-07 03:39:49,559][528169] Updated weights for policy 0, policy_version 48729 (0.0008) +[2026-06-07 03:39:49,684][528169] Updated weights for policy 0, policy_version 48739 (0.0008) +[2026-06-07 03:39:49,821][528169] Updated weights for policy 0, policy_version 48749 (0.0008) +[2026-06-07 03:39:49,970][528169] Updated weights for policy 0, policy_version 48760 (0.0008) +[2026-06-07 03:39:50,103][527010] Fps is (10 sec: 22937.3, 60 sec: 24575.9, 300 sec: 24770.4). Total num frames: 24969216. Throughput: 0: 24732.4. Samples: 24984704. Policy #0 lag: (min: 54.0, avg: 67.4, max: 118.0) +[2026-06-07 03:39:50,104][527010] Avg episode reward: [(0, '152.077')] +[2026-06-07 03:39:50,104][528169] Updated weights for policy 0, policy_version 48770 (0.0009) +[2026-06-07 03:39:50,698][528169] Updated weights for policy 0, policy_version 48780 (0.0008) +[2026-06-07 03:39:50,841][528169] Updated weights for policy 0, policy_version 48791 (0.0009) +[2026-06-07 03:39:50,984][528169] Updated weights for policy 0, policy_version 48802 (0.0008) +[2026-06-07 03:39:51,116][528169] Updated weights for policy 0, policy_version 48812 (0.0008) +[2026-06-07 03:39:51,249][528169] Updated weights for policy 0, policy_version 48822 (0.0008) +[2026-06-07 03:39:51,406][528169] Updated weights for policy 0, policy_version 48834 (0.0009) +[2026-06-07 03:39:51,475][528093] Early stopping after 8 epochs (64 sgd steps), loss delta 0.0000000 +[2026-06-07 03:39:51,477][528093] Stopping Batcher_0... +[2026-06-07 03:39:51,477][528093] Saving results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_uniform_u1_3_fs2_seed11/checkpoint_p0/checkpoint_000048840_25034752.pth... +[2026-06-07 03:39:51,477][527010] Component Batcher_0 stopped! +[2026-06-07 03:39:51,478][527010] Component RolloutWorker_w1 stopped! +[2026-06-07 03:39:51,478][528168] Stopping RolloutWorker_w1... +[2026-06-07 03:39:51,478][528168] Loop rollout_proc1_evt_loop terminating... +[2026-06-07 03:39:51,477][528093] Loop batcher_evt_loop terminating... +[2026-06-07 03:39:51,480][527010] Component RolloutWorker_w0 stopped! +[2026-06-07 03:39:51,480][528167] Stopping RolloutWorker_w0... +[2026-06-07 03:39:51,480][528167] Loop rollout_proc0_evt_loop terminating... +[2026-06-07 03:39:51,495][528093] Saving results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_uniform_u1_3_fs2_seed11/checkpoint_p0/checkpoint_000048840_25034752.pth... +[2026-06-07 03:39:51,515][528093] Stopping LearnerWorker_p0... +[2026-06-07 03:39:51,516][528093] Loop learner_proc0_evt_loop terminating... +[2026-06-07 03:39:51,516][527010] Component LearnerWorker_p0 stopped! +[2026-06-07 03:39:51,524][528169] Weights refcount: 2 0 +[2026-06-07 03:39:51,526][528169] Stopping InferenceWorker_p0-w0... +[2026-06-07 03:39:51,526][528169] Loop inference_proc0-0_evt_loop terminating... +[2026-06-07 03:39:51,526][527010] Component InferenceWorker_p0-w0 stopped! +[2026-06-07 03:39:51,526][527010] Waiting for process learner_proc0 to stop... +[2026-06-07 03:39:52,319][527010] Waiting for process inference_proc0-0 to join... +[2026-06-07 03:39:52,320][527010] Waiting for process rollout_proc0 to join... +[2026-06-07 03:39:52,320][527010] Waiting for process rollout_proc1 to join... +[2026-06-07 03:39:52,321][527010] Batcher 0 profile tree view: +batching: 0.8997, releasing_batches: 0.0307 +[2026-06-07 03:39:52,321][527010] InferenceWorker_p0-w0 profile tree view: +wait_policy: 0.0000 + wait_policy_total: 601.8400 +update_model: 41.5055 + weight_update: 0.0008 +one_step: 0.0017 + handle_policy_step: 362.4102 + deserialize: 4.9450, stack: 0.3662, obs_to_device_normalize: 53.4161, forward: 139.9124, prepare_outputs: 138.1813, send_messages: 9.8983 +[2026-06-07 03:39:52,322][527010] Learner 0 profile tree view: +misc: 0.0041, prepare_batch: 50.4633 +train: 638.2240 + epoch_init: 0.0543, minibatch_init: 2.4052, losses_postprocess: 189.5036, kl_divergence: 22.3305, after_optimizer: 252.2298 + calculate_losses: 38.0455 + losses_init: 0.0769, forward_head: 12.3796, bptt_initial: 0.3633, bptt: 0.4093, tail: 8.5963, advantages_returns: 2.8063, losses: 10.5185 + update: 130.5986 + clip: 12.7877 +[2026-06-07 03:39:52,322][527010] RolloutWorker_w0 profile tree view: +wait_for_trajectories: 0.0332, enqueue_policy_requests: 108.5014, process_policy_outputs: 7.4392, env_step: 657.9408, finalize_trajectories: 0.1077, complete_rollouts: 0.0777 +post_env_step: 18.0830 + process_env_step: 6.6180 +[2026-06-07 03:39:52,322][527010] RolloutWorker_w1 profile tree view: +wait_for_trajectories: 0.0331, enqueue_policy_requests: 105.6862, process_policy_outputs: 7.3273, env_step: 658.3309, finalize_trajectories: 0.1104, complete_rollouts: 0.0776 +post_env_step: 17.8632 + process_env_step: 6.5559 +[2026-06-07 03:39:52,323][527010] Loop Runner_EvtLoop terminating... +[2026-06-07 03:39:52,324][527010] Runner profile tree view: +main_loop: 1033.5477 +[2026-06-07 03:39:52,324][527010] Collected {0: 25034752}, FPS: 24222.2