Upload training_log.json with huggingface_hub
Browse files- training_log.json +450 -0
training_log.json
CHANGED
|
@@ -2518,5 +2518,455 @@
|
|
| 2518 |
"mean_length": 6926.31,
|
| 2519 |
"loss": 0.08463311195373535,
|
| 2520 |
"sps": 3081.918148797146
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2521 |
}
|
| 2522 |
]
|
|
|
|
| 2518 |
"mean_length": 6926.31,
|
| 2519 |
"loss": 0.08463311195373535,
|
| 2520 |
"sps": 3081.918148797146
|
| 2521 |
+
},
|
| 2522 |
+
{
|
| 2523 |
+
"update": 1405,
|
| 2524 |
+
"global_step": 5754880,
|
| 2525 |
+
"num_episodes": 893,
|
| 2526 |
+
"mean_reward": 179.84025070667266,
|
| 2527 |
+
"mean_length": 7044.6,
|
| 2528 |
+
"loss": 11.629227638244629,
|
| 2529 |
+
"sps": 191.9713547057929
|
| 2530 |
+
},
|
| 2531 |
+
{
|
| 2532 |
+
"update": 1410,
|
| 2533 |
+
"global_step": 5775360,
|
| 2534 |
+
"num_episodes": 894,
|
| 2535 |
+
"mean_reward": 174.45908066272736,
|
| 2536 |
+
"mean_length": 7044.6,
|
| 2537 |
+
"loss": 0.7166698575019836,
|
| 2538 |
+
"sps": 487.6342667466037
|
| 2539 |
+
},
|
| 2540 |
+
{
|
| 2541 |
+
"update": 1415,
|
| 2542 |
+
"global_step": 5795840,
|
| 2543 |
+
"num_episodes": 895,
|
| 2544 |
+
"mean_reward": 173.22874859333038,
|
| 2545 |
+
"mean_length": 7044.6,
|
| 2546 |
+
"loss": 2.819256067276001,
|
| 2547 |
+
"sps": 804.3018578156708
|
| 2548 |
+
},
|
| 2549 |
+
{
|
| 2550 |
+
"update": 1420,
|
| 2551 |
+
"global_step": 5816320,
|
| 2552 |
+
"num_episodes": 899,
|
| 2553 |
+
"mean_reward": 176.70803850889206,
|
| 2554 |
+
"mean_length": 7143.29,
|
| 2555 |
+
"loss": 8.249241828918457,
|
| 2556 |
+
"sps": 154.735822589327
|
| 2557 |
+
},
|
| 2558 |
+
{
|
| 2559 |
+
"update": 1425,
|
| 2560 |
+
"global_step": 5836800,
|
| 2561 |
+
"num_episodes": 906,
|
| 2562 |
+
"mean_reward": 178.0406158566475,
|
| 2563 |
+
"mean_length": 7045.96,
|
| 2564 |
+
"loss": 23.5118408203125,
|
| 2565 |
+
"sps": 174.4884682201926
|
| 2566 |
+
},
|
| 2567 |
+
{
|
| 2568 |
+
"update": 1430,
|
| 2569 |
+
"global_step": 5857280,
|
| 2570 |
+
"num_episodes": 907,
|
| 2571 |
+
"mean_reward": 178.821298725605,
|
| 2572 |
+
"mean_length": 7144.72,
|
| 2573 |
+
"loss": 30.70025634765625,
|
| 2574 |
+
"sps": 216.094202192432
|
| 2575 |
+
},
|
| 2576 |
+
{
|
| 2577 |
+
"update": 1435,
|
| 2578 |
+
"global_step": 5877760,
|
| 2579 |
+
"num_episodes": 913,
|
| 2580 |
+
"mean_reward": 176.4720972943306,
|
| 2581 |
+
"mean_length": 7145.4,
|
| 2582 |
+
"loss": 7.781428813934326,
|
| 2583 |
+
"sps": 1208.7161327543458
|
| 2584 |
+
},
|
| 2585 |
+
{
|
| 2586 |
+
"update": 1440,
|
| 2587 |
+
"global_step": 5898240,
|
| 2588 |
+
"num_episodes": 914,
|
| 2589 |
+
"mean_reward": 175.2331176495552,
|
| 2590 |
+
"mean_length": 7145.4,
|
| 2591 |
+
"loss": 0.31977832317352295,
|
| 2592 |
+
"sps": 1229.1269815173093
|
| 2593 |
+
},
|
| 2594 |
+
{
|
| 2595 |
+
"update": 1445,
|
| 2596 |
+
"global_step": 5918720,
|
| 2597 |
+
"num_episodes": 917,
|
| 2598 |
+
"mean_reward": 194.64220715761184,
|
| 2599 |
+
"mean_length": 7145.4,
|
| 2600 |
+
"loss": 4.373215198516846,
|
| 2601 |
+
"sps": 492.457155299295
|
| 2602 |
+
},
|
| 2603 |
+
{
|
| 2604 |
+
"update": 1450,
|
| 2605 |
+
"global_step": 5939200,
|
| 2606 |
+
"num_episodes": 917,
|
| 2607 |
+
"mean_reward": 194.64220715761184,
|
| 2608 |
+
"mean_length": 7145.4,
|
| 2609 |
+
"loss": 1.9693742990493774,
|
| 2610 |
+
"sps": 298.24318986957036
|
| 2611 |
+
},
|
| 2612 |
+
{
|
| 2613 |
+
"update": 1455,
|
| 2614 |
+
"global_step": 5959680,
|
| 2615 |
+
"num_episodes": 924,
|
| 2616 |
+
"mean_reward": 207.0149205994606,
|
| 2617 |
+
"mean_length": 7167.29,
|
| 2618 |
+
"loss": 10.92724323272705,
|
| 2619 |
+
"sps": 244.35456207965237
|
| 2620 |
+
},
|
| 2621 |
+
{
|
| 2622 |
+
"update": 1460,
|
| 2623 |
+
"global_step": 5980160,
|
| 2624 |
+
"num_episodes": 925,
|
| 2625 |
+
"mean_reward": 205.1948454117775,
|
| 2626 |
+
"mean_length": 7167.29,
|
| 2627 |
+
"loss": 1.1308797597885132,
|
| 2628 |
+
"sps": 229.29791971281753
|
| 2629 |
+
},
|
| 2630 |
+
{
|
| 2631 |
+
"update": 1465,
|
| 2632 |
+
"global_step": 6000640,
|
| 2633 |
+
"num_episodes": 929,
|
| 2634 |
+
"mean_reward": 210.25953838586807,
|
| 2635 |
+
"mean_length": 7296.31,
|
| 2636 |
+
"loss": 3.534721612930298,
|
| 2637 |
+
"sps": 592.749111404352
|
| 2638 |
+
},
|
| 2639 |
+
{
|
| 2640 |
+
"update": 1470,
|
| 2641 |
+
"global_step": 6021120,
|
| 2642 |
+
"num_episodes": 930,
|
| 2643 |
+
"mean_reward": 208.1203717112541,
|
| 2644 |
+
"mean_length": 7266.63,
|
| 2645 |
+
"loss": 1.5503363609313965,
|
| 2646 |
+
"sps": 2447.564196906472
|
| 2647 |
+
},
|
| 2648 |
+
{
|
| 2649 |
+
"update": 1475,
|
| 2650 |
+
"global_step": 6041600,
|
| 2651 |
+
"num_episodes": 933,
|
| 2652 |
+
"mean_reward": 201.2189755320549,
|
| 2653 |
+
"mean_length": 7266.63,
|
| 2654 |
+
"loss": 0.6554332375526428,
|
| 2655 |
+
"sps": 295.24147179578677
|
| 2656 |
+
},
|
| 2657 |
+
{
|
| 2658 |
+
"update": 1480,
|
| 2659 |
+
"global_step": 6062080,
|
| 2660 |
+
"num_episodes": 934,
|
| 2661 |
+
"mean_reward": 202.5496774840355,
|
| 2662 |
+
"mean_length": 7282.98,
|
| 2663 |
+
"loss": 4.126849174499512,
|
| 2664 |
+
"sps": 646.598178325608
|
| 2665 |
+
},
|
| 2666 |
+
{
|
| 2667 |
+
"update": 1485,
|
| 2668 |
+
"global_step": 6082560,
|
| 2669 |
+
"num_episodes": 940,
|
| 2670 |
+
"mean_reward": 205.9382851600647,
|
| 2671 |
+
"mean_length": 7375.73,
|
| 2672 |
+
"loss": 0.5179982781410217,
|
| 2673 |
+
"sps": 2704.744530210794
|
| 2674 |
+
},
|
| 2675 |
+
{
|
| 2676 |
+
"update": 1490,
|
| 2677 |
+
"global_step": 6103040,
|
| 2678 |
+
"num_episodes": 941,
|
| 2679 |
+
"mean_reward": 206.08987416267394,
|
| 2680 |
+
"mean_length": 7473.15,
|
| 2681 |
+
"loss": 3.0351297855377197,
|
| 2682 |
+
"sps": 304.0684542240306
|
| 2683 |
+
},
|
| 2684 |
+
{
|
| 2685 |
+
"update": 1495,
|
| 2686 |
+
"global_step": 6123520,
|
| 2687 |
+
"num_episodes": 943,
|
| 2688 |
+
"mean_reward": 213.63924886703492,
|
| 2689 |
+
"mean_length": 7572.02,
|
| 2690 |
+
"loss": 2.000380754470825,
|
| 2691 |
+
"sps": 221.62826006300585
|
| 2692 |
+
},
|
| 2693 |
+
{
|
| 2694 |
+
"update": 1500,
|
| 2695 |
+
"global_step": 6144000,
|
| 2696 |
+
"num_episodes": 944,
|
| 2697 |
+
"mean_reward": 212.38942768096925,
|
| 2698 |
+
"mean_length": 7572.02,
|
| 2699 |
+
"loss": 5.654113292694092,
|
| 2700 |
+
"sps": 427.42645749613314
|
| 2701 |
+
},
|
| 2702 |
+
{
|
| 2703 |
+
"update": 1505,
|
| 2704 |
+
"global_step": 6164480,
|
| 2705 |
+
"num_episodes": 951,
|
| 2706 |
+
"mean_reward": 202.7977014017105,
|
| 2707 |
+
"mean_length": 7366.39,
|
| 2708 |
+
"loss": 6.195871829986572,
|
| 2709 |
+
"sps": 214.22522946745585
|
| 2710 |
+
},
|
| 2711 |
+
{
|
| 2712 |
+
"update": 1510,
|
| 2713 |
+
"global_step": 6184960,
|
| 2714 |
+
"num_episodes": 954,
|
| 2715 |
+
"mean_reward": 201.14605865955352,
|
| 2716 |
+
"mean_length": 7236.97,
|
| 2717 |
+
"loss": 5.950435638427734,
|
| 2718 |
+
"sps": 217.84006537381956
|
| 2719 |
+
},
|
| 2720 |
+
{
|
| 2721 |
+
"update": 1515,
|
| 2722 |
+
"global_step": 6205440,
|
| 2723 |
+
"num_episodes": 960,
|
| 2724 |
+
"mean_reward": 185.95387471675872,
|
| 2725 |
+
"mean_length": 7023.6,
|
| 2726 |
+
"loss": 7.220864772796631,
|
| 2727 |
+
"sps": 516.9270272810767
|
| 2728 |
+
},
|
| 2729 |
+
{
|
| 2730 |
+
"update": 1520,
|
| 2731 |
+
"global_step": 6225920,
|
| 2732 |
+
"num_episodes": 964,
|
| 2733 |
+
"mean_reward": 171.7426621770859,
|
| 2734 |
+
"mean_length": 6906.26,
|
| 2735 |
+
"loss": 1.038556694984436,
|
| 2736 |
+
"sps": 1541.5344112491593
|
| 2737 |
+
},
|
| 2738 |
+
{
|
| 2739 |
+
"update": 1525,
|
| 2740 |
+
"global_step": 6246400,
|
| 2741 |
+
"num_episodes": 969,
|
| 2742 |
+
"mean_reward": 184.1564519548416,
|
| 2743 |
+
"mean_length": 6966.53,
|
| 2744 |
+
"loss": 0.6786921620368958,
|
| 2745 |
+
"sps": 2829.1006312828786
|
| 2746 |
+
},
|
| 2747 |
+
{
|
| 2748 |
+
"update": 1530,
|
| 2749 |
+
"global_step": 6266880,
|
| 2750 |
+
"num_episodes": 969,
|
| 2751 |
+
"mean_reward": 184.1564519548416,
|
| 2752 |
+
"mean_length": 6966.53,
|
| 2753 |
+
"loss": 1.1222167015075684,
|
| 2754 |
+
"sps": 2615.546877868864
|
| 2755 |
+
},
|
| 2756 |
+
{
|
| 2757 |
+
"update": 1535,
|
| 2758 |
+
"global_step": 6287360,
|
| 2759 |
+
"num_episodes": 973,
|
| 2760 |
+
"mean_reward": 181.29534606933595,
|
| 2761 |
+
"mean_length": 6864.66,
|
| 2762 |
+
"loss": 0.8305673599243164,
|
| 2763 |
+
"sps": 165.56638087166775
|
| 2764 |
+
},
|
| 2765 |
+
{
|
| 2766 |
+
"update": 1540,
|
| 2767 |
+
"global_step": 6307840,
|
| 2768 |
+
"num_episodes": 975,
|
| 2769 |
+
"mean_reward": 182.1956338787079,
|
| 2770 |
+
"mean_length": 6941.24,
|
| 2771 |
+
"loss": 7.593855857849121,
|
| 2772 |
+
"sps": 345.2319256598677
|
| 2773 |
+
},
|
| 2774 |
+
{
|
| 2775 |
+
"update": 1545,
|
| 2776 |
+
"global_step": 6328320,
|
| 2777 |
+
"num_episodes": 985,
|
| 2778 |
+
"mean_reward": 163.70111170768737,
|
| 2779 |
+
"mean_length": 6506.28,
|
| 2780 |
+
"loss": 3.5074303150177,
|
| 2781 |
+
"sps": 607.6658652702555
|
| 2782 |
+
},
|
| 2783 |
+
{
|
| 2784 |
+
"update": 1550,
|
| 2785 |
+
"global_step": 6348800,
|
| 2786 |
+
"num_episodes": 986,
|
| 2787 |
+
"mean_reward": 150.90680050373078,
|
| 2788 |
+
"mean_length": 6485.25,
|
| 2789 |
+
"loss": -0.0576794408261776,
|
| 2790 |
+
"sps": 2294.650789556882
|
| 2791 |
+
},
|
| 2792 |
+
{
|
| 2793 |
+
"update": 1555,
|
| 2794 |
+
"global_step": 6369280,
|
| 2795 |
+
"num_episodes": 986,
|
| 2796 |
+
"mean_reward": 150.90680050373078,
|
| 2797 |
+
"mean_length": 6485.25,
|
| 2798 |
+
"loss": -0.06482464075088501,
|
| 2799 |
+
"sps": 2276.202621733714
|
| 2800 |
+
},
|
| 2801 |
+
{
|
| 2802 |
+
"update": 1560,
|
| 2803 |
+
"global_step": 6389760,
|
| 2804 |
+
"num_episodes": 990,
|
| 2805 |
+
"mean_reward": 148.66352381229402,
|
| 2806 |
+
"mean_length": 6385.85,
|
| 2807 |
+
"loss": 1.5638670921325684,
|
| 2808 |
+
"sps": 1452.822050860829
|
| 2809 |
+
},
|
| 2810 |
+
{
|
| 2811 |
+
"update": 1565,
|
| 2812 |
+
"global_step": 6410240,
|
| 2813 |
+
"num_episodes": 997,
|
| 2814 |
+
"mean_reward": 142.64638622045516,
|
| 2815 |
+
"mean_length": 6262.48,
|
| 2816 |
+
"loss": 10.640795707702637,
|
| 2817 |
+
"sps": 831.3271413345293
|
| 2818 |
+
},
|
| 2819 |
+
{
|
| 2820 |
+
"update": 1570,
|
| 2821 |
+
"global_step": 6430720,
|
| 2822 |
+
"num_episodes": 998,
|
| 2823 |
+
"mean_reward": 147.54794536352156,
|
| 2824 |
+
"mean_length": 6262.48,
|
| 2825 |
+
"loss": 0.4938640296459198,
|
| 2826 |
+
"sps": 1275.2283874235693
|
| 2827 |
+
},
|
| 2828 |
+
{
|
| 2829 |
+
"update": 1575,
|
| 2830 |
+
"global_step": 6451200,
|
| 2831 |
+
"num_episodes": 999,
|
| 2832 |
+
"mean_reward": 147.54794582128525,
|
| 2833 |
+
"mean_length": 6262.48,
|
| 2834 |
+
"loss": 0.6068828105926514,
|
| 2835 |
+
"sps": 1238.905594097976
|
| 2836 |
+
},
|
| 2837 |
+
{
|
| 2838 |
+
"update": 1580,
|
| 2839 |
+
"global_step": 6471680,
|
| 2840 |
+
"num_episodes": 1001,
|
| 2841 |
+
"mean_reward": 157.1391297507286,
|
| 2842 |
+
"mean_length": 6262.48,
|
| 2843 |
+
"loss": 2.0471107959747314,
|
| 2844 |
+
"sps": 791.2682917819899
|
| 2845 |
+
},
|
| 2846 |
+
{
|
| 2847 |
+
"update": 1585,
|
| 2848 |
+
"global_step": 6492160,
|
| 2849 |
+
"num_episodes": 1007,
|
| 2850 |
+
"mean_reward": 181.8120165514946,
|
| 2851 |
+
"mean_length": 6453.69,
|
| 2852 |
+
"loss": 2.0775344371795654,
|
| 2853 |
+
"sps": 158.97643330398787
|
| 2854 |
+
},
|
| 2855 |
+
{
|
| 2856 |
+
"update": 1590,
|
| 2857 |
+
"global_step": 6512640,
|
| 2858 |
+
"num_episodes": 1009,
|
| 2859 |
+
"mean_reward": 182.60089690208434,
|
| 2860 |
+
"mean_length": 6440.96,
|
| 2861 |
+
"loss": 0.2661677896976471,
|
| 2862 |
+
"sps": 516.263511797793
|
| 2863 |
+
},
|
| 2864 |
+
{
|
| 2865 |
+
"update": 1595,
|
| 2866 |
+
"global_step": 6533120,
|
| 2867 |
+
"num_episodes": 1010,
|
| 2868 |
+
"mean_reward": 184.2022120523453,
|
| 2869 |
+
"mean_length": 6462.06,
|
| 2870 |
+
"loss": 8.214560508728027,
|
| 2871 |
+
"sps": 505.3776855932051
|
| 2872 |
+
},
|
| 2873 |
+
{
|
| 2874 |
+
"update": 1600,
|
| 2875 |
+
"global_step": 6553600,
|
| 2876 |
+
"num_episodes": 1014,
|
| 2877 |
+
"mean_reward": 198.42190223693848,
|
| 2878 |
+
"mean_length": 6548.4,
|
| 2879 |
+
"loss": 7.701492786407471,
|
| 2880 |
+
"sps": 276.9719165588502
|
| 2881 |
+
},
|
| 2882 |
+
{
|
| 2883 |
+
"update": 1605,
|
| 2884 |
+
"global_step": 6574080,
|
| 2885 |
+
"num_episodes": 1016,
|
| 2886 |
+
"mean_reward": 196.53872619628908,
|
| 2887 |
+
"mean_length": 6548.4,
|
| 2888 |
+
"loss": 8.10093879699707,
|
| 2889 |
+
"sps": 183.92737027550902
|
| 2890 |
+
},
|
| 2891 |
+
{
|
| 2892 |
+
"update": 1610,
|
| 2893 |
+
"global_step": 6594560,
|
| 2894 |
+
"num_episodes": 1021,
|
| 2895 |
+
"mean_reward": 172.8975705099106,
|
| 2896 |
+
"mean_length": 6434.35,
|
| 2897 |
+
"loss": 7.104397296905518,
|
| 2898 |
+
"sps": 1251.426107351179
|
| 2899 |
+
},
|
| 2900 |
+
{
|
| 2901 |
+
"update": 1615,
|
| 2902 |
+
"global_step": 6615040,
|
| 2903 |
+
"num_episodes": 1022,
|
| 2904 |
+
"mean_reward": 178.2296389913559,
|
| 2905 |
+
"mean_length": 6533.6,
|
| 2906 |
+
"loss": 9.21767520904541,
|
| 2907 |
+
"sps": 348.4657145461528
|
| 2908 |
+
},
|
| 2909 |
+
{
|
| 2910 |
+
"update": 1620,
|
| 2911 |
+
"global_step": 6635520,
|
| 2912 |
+
"num_episodes": 1024,
|
| 2913 |
+
"mean_reward": 190.50105198383332,
|
| 2914 |
+
"mean_length": 6609.82,
|
| 2915 |
+
"loss": 18.506481170654297,
|
| 2916 |
+
"sps": 335.4327754993037
|
| 2917 |
+
},
|
| 2918 |
+
{
|
| 2919 |
+
"update": 1625,
|
| 2920 |
+
"global_step": 6656000,
|
| 2921 |
+
"num_episodes": 1025,
|
| 2922 |
+
"mean_reward": 203.21644562244416,
|
| 2923 |
+
"mean_length": 6609.82,
|
| 2924 |
+
"loss": 2.3920085430145264,
|
| 2925 |
+
"sps": 595.2335834592267
|
| 2926 |
+
},
|
| 2927 |
+
{
|
| 2928 |
+
"update": 1630,
|
| 2929 |
+
"global_step": 6676480,
|
| 2930 |
+
"num_episodes": 1029,
|
| 2931 |
+
"mean_reward": 207.58161754131316,
|
| 2932 |
+
"mean_length": 6719.36,
|
| 2933 |
+
"loss": 31.76652717590332,
|
| 2934 |
+
"sps": 203.75921758331614
|
| 2935 |
+
},
|
| 2936 |
+
{
|
| 2937 |
+
"update": 1635,
|
| 2938 |
+
"global_step": 6696960,
|
| 2939 |
+
"num_episodes": 1032,
|
| 2940 |
+
"mean_reward": 226.69792892456056,
|
| 2941 |
+
"mean_length": 6678.85,
|
| 2942 |
+
"loss": 19.729021072387695,
|
| 2943 |
+
"sps": 726.913341411244
|
| 2944 |
+
},
|
| 2945 |
+
{
|
| 2946 |
+
"update": 1640,
|
| 2947 |
+
"global_step": 6717440,
|
| 2948 |
+
"num_episodes": 1034,
|
| 2949 |
+
"mean_reward": 227.13838208675384,
|
| 2950 |
+
"mean_length": 6579.05,
|
| 2951 |
+
"loss": 0.6724852323532104,
|
| 2952 |
+
"sps": 1094.927298568171
|
| 2953 |
+
},
|
| 2954 |
+
{
|
| 2955 |
+
"update": 1645,
|
| 2956 |
+
"global_step": 6737920,
|
| 2957 |
+
"num_episodes": 1036,
|
| 2958 |
+
"mean_reward": 224.31010818958282,
|
| 2959 |
+
"mean_length": 6678.82,
|
| 2960 |
+
"loss": 11.505419731140137,
|
| 2961 |
+
"sps": 1080.2913604455412
|
| 2962 |
+
},
|
| 2963 |
+
{
|
| 2964 |
+
"update": 1650,
|
| 2965 |
+
"global_step": 6758400,
|
| 2966 |
+
"num_episodes": 1038,
|
| 2967 |
+
"mean_reward": 231.36178754091262,
|
| 2968 |
+
"mean_length": 6722.13,
|
| 2969 |
+
"loss": 1.217943549156189,
|
| 2970 |
+
"sps": 1558.2708562037928
|
| 2971 |
}
|
| 2972 |
]
|