JoshuaFreeman commited on
Commit
67964a4
·
verified ·
1 Parent(s): 9484826

Upload training_log.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. training_log.json +450 -0
training_log.json CHANGED
@@ -2518,5 +2518,455 @@
2518
  "mean_length": 6926.31,
2519
  "loss": 0.08463311195373535,
2520
  "sps": 3081.918148797146
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2521
  }
2522
  ]
 
2518
  "mean_length": 6926.31,
2519
  "loss": 0.08463311195373535,
2520
  "sps": 3081.918148797146
2521
+ },
2522
+ {
2523
+ "update": 1405,
2524
+ "global_step": 5754880,
2525
+ "num_episodes": 893,
2526
+ "mean_reward": 179.84025070667266,
2527
+ "mean_length": 7044.6,
2528
+ "loss": 11.629227638244629,
2529
+ "sps": 191.9713547057929
2530
+ },
2531
+ {
2532
+ "update": 1410,
2533
+ "global_step": 5775360,
2534
+ "num_episodes": 894,
2535
+ "mean_reward": 174.45908066272736,
2536
+ "mean_length": 7044.6,
2537
+ "loss": 0.7166698575019836,
2538
+ "sps": 487.6342667466037
2539
+ },
2540
+ {
2541
+ "update": 1415,
2542
+ "global_step": 5795840,
2543
+ "num_episodes": 895,
2544
+ "mean_reward": 173.22874859333038,
2545
+ "mean_length": 7044.6,
2546
+ "loss": 2.819256067276001,
2547
+ "sps": 804.3018578156708
2548
+ },
2549
+ {
2550
+ "update": 1420,
2551
+ "global_step": 5816320,
2552
+ "num_episodes": 899,
2553
+ "mean_reward": 176.70803850889206,
2554
+ "mean_length": 7143.29,
2555
+ "loss": 8.249241828918457,
2556
+ "sps": 154.735822589327
2557
+ },
2558
+ {
2559
+ "update": 1425,
2560
+ "global_step": 5836800,
2561
+ "num_episodes": 906,
2562
+ "mean_reward": 178.0406158566475,
2563
+ "mean_length": 7045.96,
2564
+ "loss": 23.5118408203125,
2565
+ "sps": 174.4884682201926
2566
+ },
2567
+ {
2568
+ "update": 1430,
2569
+ "global_step": 5857280,
2570
+ "num_episodes": 907,
2571
+ "mean_reward": 178.821298725605,
2572
+ "mean_length": 7144.72,
2573
+ "loss": 30.70025634765625,
2574
+ "sps": 216.094202192432
2575
+ },
2576
+ {
2577
+ "update": 1435,
2578
+ "global_step": 5877760,
2579
+ "num_episodes": 913,
2580
+ "mean_reward": 176.4720972943306,
2581
+ "mean_length": 7145.4,
2582
+ "loss": 7.781428813934326,
2583
+ "sps": 1208.7161327543458
2584
+ },
2585
+ {
2586
+ "update": 1440,
2587
+ "global_step": 5898240,
2588
+ "num_episodes": 914,
2589
+ "mean_reward": 175.2331176495552,
2590
+ "mean_length": 7145.4,
2591
+ "loss": 0.31977832317352295,
2592
+ "sps": 1229.1269815173093
2593
+ },
2594
+ {
2595
+ "update": 1445,
2596
+ "global_step": 5918720,
2597
+ "num_episodes": 917,
2598
+ "mean_reward": 194.64220715761184,
2599
+ "mean_length": 7145.4,
2600
+ "loss": 4.373215198516846,
2601
+ "sps": 492.457155299295
2602
+ },
2603
+ {
2604
+ "update": 1450,
2605
+ "global_step": 5939200,
2606
+ "num_episodes": 917,
2607
+ "mean_reward": 194.64220715761184,
2608
+ "mean_length": 7145.4,
2609
+ "loss": 1.9693742990493774,
2610
+ "sps": 298.24318986957036
2611
+ },
2612
+ {
2613
+ "update": 1455,
2614
+ "global_step": 5959680,
2615
+ "num_episodes": 924,
2616
+ "mean_reward": 207.0149205994606,
2617
+ "mean_length": 7167.29,
2618
+ "loss": 10.92724323272705,
2619
+ "sps": 244.35456207965237
2620
+ },
2621
+ {
2622
+ "update": 1460,
2623
+ "global_step": 5980160,
2624
+ "num_episodes": 925,
2625
+ "mean_reward": 205.1948454117775,
2626
+ "mean_length": 7167.29,
2627
+ "loss": 1.1308797597885132,
2628
+ "sps": 229.29791971281753
2629
+ },
2630
+ {
2631
+ "update": 1465,
2632
+ "global_step": 6000640,
2633
+ "num_episodes": 929,
2634
+ "mean_reward": 210.25953838586807,
2635
+ "mean_length": 7296.31,
2636
+ "loss": 3.534721612930298,
2637
+ "sps": 592.749111404352
2638
+ },
2639
+ {
2640
+ "update": 1470,
2641
+ "global_step": 6021120,
2642
+ "num_episodes": 930,
2643
+ "mean_reward": 208.1203717112541,
2644
+ "mean_length": 7266.63,
2645
+ "loss": 1.5503363609313965,
2646
+ "sps": 2447.564196906472
2647
+ },
2648
+ {
2649
+ "update": 1475,
2650
+ "global_step": 6041600,
2651
+ "num_episodes": 933,
2652
+ "mean_reward": 201.2189755320549,
2653
+ "mean_length": 7266.63,
2654
+ "loss": 0.6554332375526428,
2655
+ "sps": 295.24147179578677
2656
+ },
2657
+ {
2658
+ "update": 1480,
2659
+ "global_step": 6062080,
2660
+ "num_episodes": 934,
2661
+ "mean_reward": 202.5496774840355,
2662
+ "mean_length": 7282.98,
2663
+ "loss": 4.126849174499512,
2664
+ "sps": 646.598178325608
2665
+ },
2666
+ {
2667
+ "update": 1485,
2668
+ "global_step": 6082560,
2669
+ "num_episodes": 940,
2670
+ "mean_reward": 205.9382851600647,
2671
+ "mean_length": 7375.73,
2672
+ "loss": 0.5179982781410217,
2673
+ "sps": 2704.744530210794
2674
+ },
2675
+ {
2676
+ "update": 1490,
2677
+ "global_step": 6103040,
2678
+ "num_episodes": 941,
2679
+ "mean_reward": 206.08987416267394,
2680
+ "mean_length": 7473.15,
2681
+ "loss": 3.0351297855377197,
2682
+ "sps": 304.0684542240306
2683
+ },
2684
+ {
2685
+ "update": 1495,
2686
+ "global_step": 6123520,
2687
+ "num_episodes": 943,
2688
+ "mean_reward": 213.63924886703492,
2689
+ "mean_length": 7572.02,
2690
+ "loss": 2.000380754470825,
2691
+ "sps": 221.62826006300585
2692
+ },
2693
+ {
2694
+ "update": 1500,
2695
+ "global_step": 6144000,
2696
+ "num_episodes": 944,
2697
+ "mean_reward": 212.38942768096925,
2698
+ "mean_length": 7572.02,
2699
+ "loss": 5.654113292694092,
2700
+ "sps": 427.42645749613314
2701
+ },
2702
+ {
2703
+ "update": 1505,
2704
+ "global_step": 6164480,
2705
+ "num_episodes": 951,
2706
+ "mean_reward": 202.7977014017105,
2707
+ "mean_length": 7366.39,
2708
+ "loss": 6.195871829986572,
2709
+ "sps": 214.22522946745585
2710
+ },
2711
+ {
2712
+ "update": 1510,
2713
+ "global_step": 6184960,
2714
+ "num_episodes": 954,
2715
+ "mean_reward": 201.14605865955352,
2716
+ "mean_length": 7236.97,
2717
+ "loss": 5.950435638427734,
2718
+ "sps": 217.84006537381956
2719
+ },
2720
+ {
2721
+ "update": 1515,
2722
+ "global_step": 6205440,
2723
+ "num_episodes": 960,
2724
+ "mean_reward": 185.95387471675872,
2725
+ "mean_length": 7023.6,
2726
+ "loss": 7.220864772796631,
2727
+ "sps": 516.9270272810767
2728
+ },
2729
+ {
2730
+ "update": 1520,
2731
+ "global_step": 6225920,
2732
+ "num_episodes": 964,
2733
+ "mean_reward": 171.7426621770859,
2734
+ "mean_length": 6906.26,
2735
+ "loss": 1.038556694984436,
2736
+ "sps": 1541.5344112491593
2737
+ },
2738
+ {
2739
+ "update": 1525,
2740
+ "global_step": 6246400,
2741
+ "num_episodes": 969,
2742
+ "mean_reward": 184.1564519548416,
2743
+ "mean_length": 6966.53,
2744
+ "loss": 0.6786921620368958,
2745
+ "sps": 2829.1006312828786
2746
+ },
2747
+ {
2748
+ "update": 1530,
2749
+ "global_step": 6266880,
2750
+ "num_episodes": 969,
2751
+ "mean_reward": 184.1564519548416,
2752
+ "mean_length": 6966.53,
2753
+ "loss": 1.1222167015075684,
2754
+ "sps": 2615.546877868864
2755
+ },
2756
+ {
2757
+ "update": 1535,
2758
+ "global_step": 6287360,
2759
+ "num_episodes": 973,
2760
+ "mean_reward": 181.29534606933595,
2761
+ "mean_length": 6864.66,
2762
+ "loss": 0.8305673599243164,
2763
+ "sps": 165.56638087166775
2764
+ },
2765
+ {
2766
+ "update": 1540,
2767
+ "global_step": 6307840,
2768
+ "num_episodes": 975,
2769
+ "mean_reward": 182.1956338787079,
2770
+ "mean_length": 6941.24,
2771
+ "loss": 7.593855857849121,
2772
+ "sps": 345.2319256598677
2773
+ },
2774
+ {
2775
+ "update": 1545,
2776
+ "global_step": 6328320,
2777
+ "num_episodes": 985,
2778
+ "mean_reward": 163.70111170768737,
2779
+ "mean_length": 6506.28,
2780
+ "loss": 3.5074303150177,
2781
+ "sps": 607.6658652702555
2782
+ },
2783
+ {
2784
+ "update": 1550,
2785
+ "global_step": 6348800,
2786
+ "num_episodes": 986,
2787
+ "mean_reward": 150.90680050373078,
2788
+ "mean_length": 6485.25,
2789
+ "loss": -0.0576794408261776,
2790
+ "sps": 2294.650789556882
2791
+ },
2792
+ {
2793
+ "update": 1555,
2794
+ "global_step": 6369280,
2795
+ "num_episodes": 986,
2796
+ "mean_reward": 150.90680050373078,
2797
+ "mean_length": 6485.25,
2798
+ "loss": -0.06482464075088501,
2799
+ "sps": 2276.202621733714
2800
+ },
2801
+ {
2802
+ "update": 1560,
2803
+ "global_step": 6389760,
2804
+ "num_episodes": 990,
2805
+ "mean_reward": 148.66352381229402,
2806
+ "mean_length": 6385.85,
2807
+ "loss": 1.5638670921325684,
2808
+ "sps": 1452.822050860829
2809
+ },
2810
+ {
2811
+ "update": 1565,
2812
+ "global_step": 6410240,
2813
+ "num_episodes": 997,
2814
+ "mean_reward": 142.64638622045516,
2815
+ "mean_length": 6262.48,
2816
+ "loss": 10.640795707702637,
2817
+ "sps": 831.3271413345293
2818
+ },
2819
+ {
2820
+ "update": 1570,
2821
+ "global_step": 6430720,
2822
+ "num_episodes": 998,
2823
+ "mean_reward": 147.54794536352156,
2824
+ "mean_length": 6262.48,
2825
+ "loss": 0.4938640296459198,
2826
+ "sps": 1275.2283874235693
2827
+ },
2828
+ {
2829
+ "update": 1575,
2830
+ "global_step": 6451200,
2831
+ "num_episodes": 999,
2832
+ "mean_reward": 147.54794582128525,
2833
+ "mean_length": 6262.48,
2834
+ "loss": 0.6068828105926514,
2835
+ "sps": 1238.905594097976
2836
+ },
2837
+ {
2838
+ "update": 1580,
2839
+ "global_step": 6471680,
2840
+ "num_episodes": 1001,
2841
+ "mean_reward": 157.1391297507286,
2842
+ "mean_length": 6262.48,
2843
+ "loss": 2.0471107959747314,
2844
+ "sps": 791.2682917819899
2845
+ },
2846
+ {
2847
+ "update": 1585,
2848
+ "global_step": 6492160,
2849
+ "num_episodes": 1007,
2850
+ "mean_reward": 181.8120165514946,
2851
+ "mean_length": 6453.69,
2852
+ "loss": 2.0775344371795654,
2853
+ "sps": 158.97643330398787
2854
+ },
2855
+ {
2856
+ "update": 1590,
2857
+ "global_step": 6512640,
2858
+ "num_episodes": 1009,
2859
+ "mean_reward": 182.60089690208434,
2860
+ "mean_length": 6440.96,
2861
+ "loss": 0.2661677896976471,
2862
+ "sps": 516.263511797793
2863
+ },
2864
+ {
2865
+ "update": 1595,
2866
+ "global_step": 6533120,
2867
+ "num_episodes": 1010,
2868
+ "mean_reward": 184.2022120523453,
2869
+ "mean_length": 6462.06,
2870
+ "loss": 8.214560508728027,
2871
+ "sps": 505.3776855932051
2872
+ },
2873
+ {
2874
+ "update": 1600,
2875
+ "global_step": 6553600,
2876
+ "num_episodes": 1014,
2877
+ "mean_reward": 198.42190223693848,
2878
+ "mean_length": 6548.4,
2879
+ "loss": 7.701492786407471,
2880
+ "sps": 276.9719165588502
2881
+ },
2882
+ {
2883
+ "update": 1605,
2884
+ "global_step": 6574080,
2885
+ "num_episodes": 1016,
2886
+ "mean_reward": 196.53872619628908,
2887
+ "mean_length": 6548.4,
2888
+ "loss": 8.10093879699707,
2889
+ "sps": 183.92737027550902
2890
+ },
2891
+ {
2892
+ "update": 1610,
2893
+ "global_step": 6594560,
2894
+ "num_episodes": 1021,
2895
+ "mean_reward": 172.8975705099106,
2896
+ "mean_length": 6434.35,
2897
+ "loss": 7.104397296905518,
2898
+ "sps": 1251.426107351179
2899
+ },
2900
+ {
2901
+ "update": 1615,
2902
+ "global_step": 6615040,
2903
+ "num_episodes": 1022,
2904
+ "mean_reward": 178.2296389913559,
2905
+ "mean_length": 6533.6,
2906
+ "loss": 9.21767520904541,
2907
+ "sps": 348.4657145461528
2908
+ },
2909
+ {
2910
+ "update": 1620,
2911
+ "global_step": 6635520,
2912
+ "num_episodes": 1024,
2913
+ "mean_reward": 190.50105198383332,
2914
+ "mean_length": 6609.82,
2915
+ "loss": 18.506481170654297,
2916
+ "sps": 335.4327754993037
2917
+ },
2918
+ {
2919
+ "update": 1625,
2920
+ "global_step": 6656000,
2921
+ "num_episodes": 1025,
2922
+ "mean_reward": 203.21644562244416,
2923
+ "mean_length": 6609.82,
2924
+ "loss": 2.3920085430145264,
2925
+ "sps": 595.2335834592267
2926
+ },
2927
+ {
2928
+ "update": 1630,
2929
+ "global_step": 6676480,
2930
+ "num_episodes": 1029,
2931
+ "mean_reward": 207.58161754131316,
2932
+ "mean_length": 6719.36,
2933
+ "loss": 31.76652717590332,
2934
+ "sps": 203.75921758331614
2935
+ },
2936
+ {
2937
+ "update": 1635,
2938
+ "global_step": 6696960,
2939
+ "num_episodes": 1032,
2940
+ "mean_reward": 226.69792892456056,
2941
+ "mean_length": 6678.85,
2942
+ "loss": 19.729021072387695,
2943
+ "sps": 726.913341411244
2944
+ },
2945
+ {
2946
+ "update": 1640,
2947
+ "global_step": 6717440,
2948
+ "num_episodes": 1034,
2949
+ "mean_reward": 227.13838208675384,
2950
+ "mean_length": 6579.05,
2951
+ "loss": 0.6724852323532104,
2952
+ "sps": 1094.927298568171
2953
+ },
2954
+ {
2955
+ "update": 1645,
2956
+ "global_step": 6737920,
2957
+ "num_episodes": 1036,
2958
+ "mean_reward": 224.31010818958282,
2959
+ "mean_length": 6678.82,
2960
+ "loss": 11.505419731140137,
2961
+ "sps": 1080.2913604455412
2962
+ },
2963
+ {
2964
+ "update": 1650,
2965
+ "global_step": 6758400,
2966
+ "num_episodes": 1038,
2967
+ "mean_reward": 231.36178754091262,
2968
+ "mean_length": 6722.13,
2969
+ "loss": 1.217943549156189,
2970
+ "sps": 1558.2708562037928
2971
  }
2972
  ]