| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.8314567815693746, |
| "eval_steps": 500, |
| "global_step": 600, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.013857613026156245, |
| "grad_norm": 0.1854863315820694, |
| "learning_rate": 2.0930232558139536e-05, |
| "loss": 0.8494, |
| "memory/device_reserved (GiB)": 89.89, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 10, |
| "tokens_per_second_per_gpu": 1111.65 |
| }, |
| { |
| "epoch": 0.02771522605231249, |
| "grad_norm": 0.09567277133464813, |
| "learning_rate": 4.418604651162791e-05, |
| "loss": 0.7842, |
| "memory/device_reserved (GiB)": 90.37, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 20, |
| "tokens_per_second_per_gpu": 1041.18 |
| }, |
| { |
| "epoch": 0.04157283907846873, |
| "grad_norm": 0.1211227998137474, |
| "learning_rate": 6.744186046511628e-05, |
| "loss": 0.7353, |
| "memory/device_reserved (GiB)": 90.37, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 30, |
| "tokens_per_second_per_gpu": 993.82 |
| }, |
| { |
| "epoch": 0.05543045210462498, |
| "grad_norm": 0.09696491807699203, |
| "learning_rate": 9.069767441860465e-05, |
| "loss": 0.6943, |
| "memory/device_reserved (GiB)": 90.37, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 40, |
| "tokens_per_second_per_gpu": 849.91 |
| }, |
| { |
| "epoch": 0.06928806513078123, |
| "grad_norm": 0.12007619440555573, |
| "learning_rate": 9.999547457436221e-05, |
| "loss": 0.6814, |
| "memory/device_reserved (GiB)": 90.38, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 50, |
| "tokens_per_second_per_gpu": 971.92 |
| }, |
| { |
| "epoch": 0.08314567815693746, |
| "grad_norm": 0.12358752638101578, |
| "learning_rate": 9.996782216198338e-05, |
| "loss": 0.69, |
| "memory/device_reserved (GiB)": 90.38, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 60, |
| "tokens_per_second_per_gpu": 862.06 |
| }, |
| { |
| "epoch": 0.09700329118309371, |
| "grad_norm": 0.11916535347700119, |
| "learning_rate": 9.991504534967746e-05, |
| "loss": 0.7048, |
| "memory/device_reserved (GiB)": 90.38, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 70, |
| "tokens_per_second_per_gpu": 1024.18 |
| }, |
| { |
| "epoch": 0.11086090420924996, |
| "grad_norm": 0.12464027106761932, |
| "learning_rate": 9.983717067423721e-05, |
| "loss": 0.6705, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 80, |
| "tokens_per_second_per_gpu": 1009.08 |
| }, |
| { |
| "epoch": 0.1247185172354062, |
| "grad_norm": 0.1264505237340927, |
| "learning_rate": 9.973423729195168e-05, |
| "loss": 0.6387, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 90, |
| "tokens_per_second_per_gpu": 1027.91 |
| }, |
| { |
| "epoch": 0.13857613026156246, |
| "grad_norm": 0.1262999027967453, |
| "learning_rate": 9.960629695891814e-05, |
| "loss": 0.6447, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 100, |
| "tokens_per_second_per_gpu": 935.25 |
| }, |
| { |
| "epoch": 0.1524337432877187, |
| "grad_norm": 0.12777547538280487, |
| "learning_rate": 9.945341400501838e-05, |
| "loss": 0.6846, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 110, |
| "tokens_per_second_per_gpu": 997.71 |
| }, |
| { |
| "epoch": 0.16629135631387493, |
| "grad_norm": 0.12563012540340424, |
| "learning_rate": 9.927566530157298e-05, |
| "loss": 0.6765, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 120, |
| "tokens_per_second_per_gpu": 1120.19 |
| }, |
| { |
| "epoch": 0.1801489693400312, |
| "grad_norm": 0.13785897195339203, |
| "learning_rate": 9.907314022268946e-05, |
| "loss": 0.6315, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 130, |
| "tokens_per_second_per_gpu": 801.56 |
| }, |
| { |
| "epoch": 0.19400658236618742, |
| "grad_norm": 0.14731284976005554, |
| "learning_rate": 9.884594060032406e-05, |
| "loss": 0.6642, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 140, |
| "tokens_per_second_per_gpu": 944.29 |
| }, |
| { |
| "epoch": 0.20786419539234366, |
| "grad_norm": 0.1298578828573227, |
| "learning_rate": 9.859418067307928e-05, |
| "loss": 0.6696, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 150, |
| "tokens_per_second_per_gpu": 913.99 |
| }, |
| { |
| "epoch": 0.22172180841849992, |
| "grad_norm": 0.13122966885566711, |
| "learning_rate": 9.831798702876352e-05, |
| "loss": 0.6768, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 160, |
| "tokens_per_second_per_gpu": 983.71 |
| }, |
| { |
| "epoch": 0.23557942144465616, |
| "grad_norm": 0.12356515228748322, |
| "learning_rate": 9.801749854074122e-05, |
| "loss": 0.6526, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 170, |
| "tokens_per_second_per_gpu": 903.88 |
| }, |
| { |
| "epoch": 0.2494370344708124, |
| "grad_norm": 0.11831440776586533, |
| "learning_rate": 9.769286629810572e-05, |
| "loss": 0.6415, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 180, |
| "tokens_per_second_per_gpu": 819.58 |
| }, |
| { |
| "epoch": 0.2632946474969686, |
| "grad_norm": 0.12409751862287521, |
| "learning_rate": 9.73442535297099e-05, |
| "loss": 0.6685, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 190, |
| "tokens_per_second_per_gpu": 973.62 |
| }, |
| { |
| "epoch": 0.2771522605231249, |
| "grad_norm": 0.1352369785308838, |
| "learning_rate": 9.697183552209288e-05, |
| "loss": 0.6329, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 200, |
| "tokens_per_second_per_gpu": 903.58 |
| }, |
| { |
| "epoch": 0.29100987354928115, |
| "grad_norm": 0.12526443600654602, |
| "learning_rate": 9.657579953134383e-05, |
| "loss": 0.6452, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 210, |
| "tokens_per_second_per_gpu": 859.06 |
| }, |
| { |
| "epoch": 0.3048674865754374, |
| "grad_norm": 0.11614521592855453, |
| "learning_rate": 9.615634468894752e-05, |
| "loss": 0.6407, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 220, |
| "tokens_per_second_per_gpu": 850.93 |
| }, |
| { |
| "epoch": 0.3187250996015936, |
| "grad_norm": 0.1328686773777008, |
| "learning_rate": 9.571368190165863e-05, |
| "loss": 0.6741, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 230, |
| "tokens_per_second_per_gpu": 1038.17 |
| }, |
| { |
| "epoch": 0.33258271262774985, |
| "grad_norm": 0.13082517683506012, |
| "learning_rate": 9.524803374545548e-05, |
| "loss": 0.6906, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 240, |
| "tokens_per_second_per_gpu": 1066.43 |
| }, |
| { |
| "epoch": 0.3464403256539061, |
| "grad_norm": 0.1282692551612854, |
| "learning_rate": 9.475963435362614e-05, |
| "loss": 0.6609, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 250, |
| "tokens_per_second_per_gpu": 986.84 |
| }, |
| { |
| "epoch": 0.3602979386800624, |
| "grad_norm": 0.13708311319351196, |
| "learning_rate": 9.424872929904358e-05, |
| "loss": 0.6169, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 260, |
| "tokens_per_second_per_gpu": 1126.25 |
| }, |
| { |
| "epoch": 0.3741555517062186, |
| "grad_norm": 0.1323172152042389, |
| "learning_rate": 9.371557547068878e-05, |
| "loss": 0.6574, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 270, |
| "tokens_per_second_per_gpu": 1062.03 |
| }, |
| { |
| "epoch": 0.38801316473237485, |
| "grad_norm": 0.12729060649871826, |
| "learning_rate": 9.316044094448392e-05, |
| "loss": 0.6583, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 280, |
| "tokens_per_second_per_gpu": 941.33 |
| }, |
| { |
| "epoch": 0.4018707777585311, |
| "grad_norm": 0.14719286561012268, |
| "learning_rate": 9.25836048485008e-05, |
| "loss": 0.6392, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 290, |
| "tokens_per_second_per_gpu": 883.56 |
| }, |
| { |
| "epoch": 0.4157283907846873, |
| "grad_norm": 0.12530402839183807, |
| "learning_rate": 9.198535722261181e-05, |
| "loss": 0.6623, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 300, |
| "tokens_per_second_per_gpu": 902.6 |
| }, |
| { |
| "epoch": 0.4295860038108436, |
| "grad_norm": 0.1330760419368744, |
| "learning_rate": 9.136599887265483e-05, |
| "loss": 0.645, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 310, |
| "tokens_per_second_per_gpu": 997.96 |
| }, |
| { |
| "epoch": 0.44344361683699984, |
| "grad_norm": 0.13317464292049408, |
| "learning_rate": 9.072584121918425e-05, |
| "loss": 0.6139, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 320, |
| "tokens_per_second_per_gpu": 980.08 |
| }, |
| { |
| "epoch": 0.4573012298631561, |
| "grad_norm": 0.12773385643959045, |
| "learning_rate": 9.006520614088535e-05, |
| "loss": 0.6658, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 330, |
| "tokens_per_second_per_gpu": 937.57 |
| }, |
| { |
| "epoch": 0.4711588428893123, |
| "grad_norm": 0.13415341079235077, |
| "learning_rate": 8.938442581272983e-05, |
| "loss": 0.6737, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 340, |
| "tokens_per_second_per_gpu": 1046.92 |
| }, |
| { |
| "epoch": 0.48501645591546855, |
| "grad_norm": 0.13382680714130402, |
| "learning_rate": 8.868384253895445e-05, |
| "loss": 0.6575, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 350, |
| "tokens_per_second_per_gpu": 1049.72 |
| }, |
| { |
| "epoch": 0.4988740689416248, |
| "grad_norm": 0.12621234357357025, |
| "learning_rate": 8.796380858094643e-05, |
| "loss": 0.6423, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 360, |
| "tokens_per_second_per_gpu": 926.35 |
| }, |
| { |
| "epoch": 0.5127316819677811, |
| "grad_norm": 0.14663882553577423, |
| "learning_rate": 8.722468598012245e-05, |
| "loss": 0.6524, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 370, |
| "tokens_per_second_per_gpu": 993.98 |
| }, |
| { |
| "epoch": 0.5265892949939373, |
| "grad_norm": 0.12107036262750626, |
| "learning_rate": 8.646684637588991e-05, |
| "loss": 0.6158, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 380, |
| "tokens_per_second_per_gpu": 882.74 |
| }, |
| { |
| "epoch": 0.5404469080200935, |
| "grad_norm": 0.12905746698379517, |
| "learning_rate": 8.56906708187824e-05, |
| "loss": 0.6359, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 390, |
| "tokens_per_second_per_gpu": 992.61 |
| }, |
| { |
| "epoch": 0.5543045210462498, |
| "grad_norm": 0.14433123171329498, |
| "learning_rate": 8.489654957886306e-05, |
| "loss": 0.6124, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 400, |
| "tokens_per_second_per_gpu": 871.5 |
| }, |
| { |
| "epoch": 0.568162134072406, |
| "grad_norm": 0.13294072449207306, |
| "learning_rate": 8.40848819494923e-05, |
| "loss": 0.6803, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 410, |
| "tokens_per_second_per_gpu": 950.11 |
| }, |
| { |
| "epoch": 0.5820197470985623, |
| "grad_norm": 0.1526036411523819, |
| "learning_rate": 8.325607604655839e-05, |
| "loss": 0.6088, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 420, |
| "tokens_per_second_per_gpu": 1025.1 |
| }, |
| { |
| "epoch": 0.5958773601247185, |
| "grad_norm": 0.1453717052936554, |
| "learning_rate": 8.241054860327216e-05, |
| "loss": 0.6669, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 430, |
| "tokens_per_second_per_gpu": 1074.36 |
| }, |
| { |
| "epoch": 0.6097349731508748, |
| "grad_norm": 0.1466919481754303, |
| "learning_rate": 8.154872476062868e-05, |
| "loss": 0.6147, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 440, |
| "tokens_per_second_per_gpu": 1010.83 |
| }, |
| { |
| "epoch": 0.6235925861770311, |
| "grad_norm": 0.12707076966762543, |
| "learning_rate": 8.067103785364139e-05, |
| "loss": 0.6096, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 450, |
| "tokens_per_second_per_gpu": 919.83 |
| }, |
| { |
| "epoch": 0.6374501992031872, |
| "grad_norm": 0.13485883176326752, |
| "learning_rate": 7.977792919345633e-05, |
| "loss": 0.6342, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 460, |
| "tokens_per_second_per_gpu": 1033.87 |
| }, |
| { |
| "epoch": 0.6513078122293435, |
| "grad_norm": 0.12489234656095505, |
| "learning_rate": 7.886984784545566e-05, |
| "loss": 0.6256, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 470, |
| "tokens_per_second_per_gpu": 1018.58 |
| }, |
| { |
| "epoch": 0.6651654252554997, |
| "grad_norm": 0.16094225645065308, |
| "learning_rate": 7.794725040346251e-05, |
| "loss": 0.6455, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 480, |
| "tokens_per_second_per_gpu": 933.16 |
| }, |
| { |
| "epoch": 0.679023038281656, |
| "grad_norm": 0.1351306140422821, |
| "learning_rate": 7.701060076016024e-05, |
| "loss": 0.6613, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 490, |
| "tokens_per_second_per_gpu": 970.62 |
| }, |
| { |
| "epoch": 0.6928806513078122, |
| "grad_norm": 0.1223958283662796, |
| "learning_rate": 7.606036987384184e-05, |
| "loss": 0.6186, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 500, |
| "tokens_per_second_per_gpu": 925.45 |
| }, |
| { |
| "epoch": 0.7067382643339685, |
| "grad_norm": 0.11873335391283035, |
| "learning_rate": 7.509703553160666e-05, |
| "loss": 0.646, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 510, |
| "tokens_per_second_per_gpu": 990.93 |
| }, |
| { |
| "epoch": 0.7205958773601248, |
| "grad_norm": 0.12990343570709229, |
| "learning_rate": 7.412108210912345e-05, |
| "loss": 0.6155, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 520, |
| "tokens_per_second_per_gpu": 955.97 |
| }, |
| { |
| "epoch": 0.7344534903862809, |
| "grad_norm": 0.1376057118177414, |
| "learning_rate": 7.31330003270808e-05, |
| "loss": 0.6443, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 530, |
| "tokens_per_second_per_gpu": 895.07 |
| }, |
| { |
| "epoch": 0.7483111034124372, |
| "grad_norm": 0.13277359306812286, |
| "learning_rate": 7.213328700444696e-05, |
| "loss": 0.6188, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 540, |
| "tokens_per_second_per_gpu": 907.27 |
| }, |
| { |
| "epoch": 0.7621687164385934, |
| "grad_norm": 0.13623632490634918, |
| "learning_rate": 7.112244480866356e-05, |
| "loss": 0.6471, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 550, |
| "tokens_per_second_per_gpu": 1002.06 |
| }, |
| { |
| "epoch": 0.7760263294647497, |
| "grad_norm": 0.13037075102329254, |
| "learning_rate": 7.010098200289859e-05, |
| "loss": 0.647, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 560, |
| "tokens_per_second_per_gpu": 1047.58 |
| }, |
| { |
| "epoch": 0.789883942490906, |
| "grad_norm": 0.12997141480445862, |
| "learning_rate": 6.906941219048584e-05, |
| "loss": 0.6071, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 570, |
| "tokens_per_second_per_gpu": 1073.59 |
| }, |
| { |
| "epoch": 0.8037415555170622, |
| "grad_norm": 0.14118416607379913, |
| "learning_rate": 6.802825405667905e-05, |
| "loss": 0.6101, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 580, |
| "tokens_per_second_per_gpu": 1097.31 |
| }, |
| { |
| "epoch": 0.8175991685432185, |
| "grad_norm": 0.126139834523201, |
| "learning_rate": 6.697803110785115e-05, |
| "loss": 0.6084, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 590, |
| "tokens_per_second_per_gpu": 933.94 |
| }, |
| { |
| "epoch": 0.8314567815693746, |
| "grad_norm": 0.1207822933793068, |
| "learning_rate": 6.591927140826902e-05, |
| "loss": 0.6416, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 600, |
| "tokens_per_second_per_gpu": 1007.52 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1444, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 300, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.327934896681779e+18, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|