Instructions to use amburger66/robometer-4b-lora-robotsmith-task08-real with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use amburger66/robometer-4b-lora-robotsmith-task08-real with Transformers:
# Load model directly from transformers import AutoProcessor, RBM processor = AutoProcessor.from_pretrained("amburger66/robometer-4b-lora-robotsmith-task08-real") model = RBM.from_pretrained("amburger66/robometer-4b-lora-robotsmith-task08-real") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.05755395683453238, | |
| "eval_steps": 50, | |
| "global_step": 1000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 5.755395683453237e-05, | |
| "grad_norm": 19.742076873779297, | |
| "learning_rate": 0.0, | |
| "loss": 1.1824, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.00011510791366906474, | |
| "grad_norm": 14.090814590454102, | |
| "learning_rate": 2.0000000000000002e-07, | |
| "loss": 1.5161, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.00017266187050359714, | |
| "grad_norm": 9.585816383361816, | |
| "learning_rate": 4.0000000000000003e-07, | |
| "loss": 0.4535, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.0002302158273381295, | |
| "grad_norm": 13.000758171081543, | |
| "learning_rate": 6.000000000000001e-07, | |
| "loss": 1.1017, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.00028776978417266187, | |
| "grad_norm": 7.664703369140625, | |
| "learning_rate": 8.000000000000001e-07, | |
| "loss": 0.7945, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.00034532374100719427, | |
| "grad_norm": 12.25283145904541, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "loss": 1.1652, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0004028776978417266, | |
| "grad_norm": 11.274255752563477, | |
| "learning_rate": 1.2000000000000002e-06, | |
| "loss": 0.719, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.000460431654676259, | |
| "grad_norm": 7.998445510864258, | |
| "learning_rate": 1.4000000000000001e-06, | |
| "loss": 0.6316, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0005179856115107913, | |
| "grad_norm": 8.421154022216797, | |
| "learning_rate": 1.6000000000000001e-06, | |
| "loss": 0.8872, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.0005755395683453237, | |
| "grad_norm": 9.403829574584961, | |
| "learning_rate": 1.8000000000000001e-06, | |
| "loss": 0.7519, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0006330935251798561, | |
| "grad_norm": 14.926182746887207, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 0.8313, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.0006906474820143885, | |
| "grad_norm": 5.170341491699219, | |
| "learning_rate": 2.2e-06, | |
| "loss": 0.4195, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.0007482014388489208, | |
| "grad_norm": 4.143105506896973, | |
| "learning_rate": 2.4000000000000003e-06, | |
| "loss": 0.3894, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.0008057553956834532, | |
| "grad_norm": 7.147073268890381, | |
| "learning_rate": 2.6e-06, | |
| "loss": 0.5323, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.0008633093525179857, | |
| "grad_norm": 5.619192600250244, | |
| "learning_rate": 2.8000000000000003e-06, | |
| "loss": 0.3687, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.000920863309352518, | |
| "grad_norm": 6.790902614593506, | |
| "learning_rate": 3e-06, | |
| "loss": 0.8943, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.0009784172661870504, | |
| "grad_norm": 7.149299621582031, | |
| "learning_rate": 3.2000000000000003e-06, | |
| "loss": 0.5624, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.0010359712230215827, | |
| "grad_norm": 6.0668439865112305, | |
| "learning_rate": 3.4000000000000005e-06, | |
| "loss": 0.5257, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.0010935251798561152, | |
| "grad_norm": 7.927511692047119, | |
| "learning_rate": 3.6000000000000003e-06, | |
| "loss": 0.7319, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.0011510791366906475, | |
| "grad_norm": 3.352524757385254, | |
| "learning_rate": 3.8000000000000005e-06, | |
| "loss": 0.3106, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0012086330935251798, | |
| "grad_norm": 17.440523147583008, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 1.4364, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.0012661870503597123, | |
| "grad_norm": 3.9956955909729004, | |
| "learning_rate": 4.2000000000000004e-06, | |
| "loss": 0.4213, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.0013237410071942446, | |
| "grad_norm": 8.989081382751465, | |
| "learning_rate": 4.4e-06, | |
| "loss": 0.552, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.001381294964028777, | |
| "grad_norm": 8.471776008605957, | |
| "learning_rate": 4.600000000000001e-06, | |
| "loss": 0.8194, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.0014388489208633094, | |
| "grad_norm": 9.24136734008789, | |
| "learning_rate": 4.800000000000001e-06, | |
| "loss": 0.7052, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.0014964028776978417, | |
| "grad_norm": 4.3442158699035645, | |
| "learning_rate": 5e-06, | |
| "loss": 0.3331, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.0015539568345323742, | |
| "grad_norm": 9.657572746276855, | |
| "learning_rate": 5.2e-06, | |
| "loss": 0.5311, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.0016115107913669065, | |
| "grad_norm": 7.1384992599487305, | |
| "learning_rate": 5.400000000000001e-06, | |
| "loss": 0.3951, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.0016690647482014388, | |
| "grad_norm": 16.853891372680664, | |
| "learning_rate": 5.600000000000001e-06, | |
| "loss": 1.3869, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.0017266187050359713, | |
| "grad_norm": 9.793233871459961, | |
| "learning_rate": 5.8e-06, | |
| "loss": 0.8952, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.0017841726618705036, | |
| "grad_norm": 7.212555408477783, | |
| "learning_rate": 6e-06, | |
| "loss": 0.7198, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.001841726618705036, | |
| "grad_norm": 6.492921829223633, | |
| "learning_rate": 6.200000000000001e-06, | |
| "loss": 0.5619, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.0018992805755395684, | |
| "grad_norm": 6.3283915519714355, | |
| "learning_rate": 6.4000000000000006e-06, | |
| "loss": 1.0968, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.0019568345323741007, | |
| "grad_norm": 9.577136993408203, | |
| "learning_rate": 6.600000000000001e-06, | |
| "loss": 0.8173, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.002014388489208633, | |
| "grad_norm": 9.083664894104004, | |
| "learning_rate": 6.800000000000001e-06, | |
| "loss": 0.8457, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.0020719424460431653, | |
| "grad_norm": 4.176952362060547, | |
| "learning_rate": 7e-06, | |
| "loss": 0.3089, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.002129496402877698, | |
| "grad_norm": 5.284554958343506, | |
| "learning_rate": 7.2000000000000005e-06, | |
| "loss": 0.3452, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.0021870503597122303, | |
| "grad_norm": 12.007137298583984, | |
| "learning_rate": 7.4e-06, | |
| "loss": 1.4472, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.0022446043165467626, | |
| "grad_norm": 9.785158157348633, | |
| "learning_rate": 7.600000000000001e-06, | |
| "loss": 1.2897, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.002302158273381295, | |
| "grad_norm": 8.333564758300781, | |
| "learning_rate": 7.800000000000002e-06, | |
| "loss": 0.3833, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0023597122302158272, | |
| "grad_norm": 5.145090103149414, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 0.2996, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.0024172661870503595, | |
| "grad_norm": 10.46713924407959, | |
| "learning_rate": 8.2e-06, | |
| "loss": 0.6667, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.0024748201438848923, | |
| "grad_norm": 4.508591175079346, | |
| "learning_rate": 8.400000000000001e-06, | |
| "loss": 0.4044, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.0025323741007194246, | |
| "grad_norm": 9.932887077331543, | |
| "learning_rate": 8.6e-06, | |
| "loss": 0.6707, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.002589928057553957, | |
| "grad_norm": 4.800012588500977, | |
| "learning_rate": 8.8e-06, | |
| "loss": 0.5579, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.002647482014388489, | |
| "grad_norm": 2.2920901775360107, | |
| "learning_rate": 9e-06, | |
| "loss": 0.2296, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.0027050359712230214, | |
| "grad_norm": 7.941588878631592, | |
| "learning_rate": 9.200000000000002e-06, | |
| "loss": 0.5172, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.002762589928057554, | |
| "grad_norm": 13.144063949584961, | |
| "learning_rate": 9.4e-06, | |
| "loss": 1.1074, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.0028201438848920865, | |
| "grad_norm": 4.377220630645752, | |
| "learning_rate": 9.600000000000001e-06, | |
| "loss": 0.2969, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.0028776978417266188, | |
| "grad_norm": 6.273579120635986, | |
| "learning_rate": 9.800000000000001e-06, | |
| "loss": 0.382, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.002935251798561151, | |
| "grad_norm": 6.432647705078125, | |
| "learning_rate": 1e-05, | |
| "loss": 0.7747, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.0029928057553956834, | |
| "grad_norm": 9.977968215942383, | |
| "learning_rate": 1.02e-05, | |
| "loss": 0.6861, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.0030503597122302157, | |
| "grad_norm": 6.838860034942627, | |
| "learning_rate": 1.04e-05, | |
| "loss": 0.4142, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.0031079136690647484, | |
| "grad_norm": 15.348876953125, | |
| "learning_rate": 1.0600000000000002e-05, | |
| "loss": 1.1043, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.0031654676258992807, | |
| "grad_norm": 4.6307291984558105, | |
| "learning_rate": 1.0800000000000002e-05, | |
| "loss": 0.5295, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.003223021582733813, | |
| "grad_norm": 13.125807762145996, | |
| "learning_rate": 1.1000000000000001e-05, | |
| "loss": 0.9708, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.0032805755395683453, | |
| "grad_norm": 11.310308456420898, | |
| "learning_rate": 1.1200000000000001e-05, | |
| "loss": 1.0685, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.0033381294964028776, | |
| "grad_norm": 4.394148826599121, | |
| "learning_rate": 1.14e-05, | |
| "loss": 0.274, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.00339568345323741, | |
| "grad_norm": 2.017930030822754, | |
| "learning_rate": 1.16e-05, | |
| "loss": 0.1795, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.0034532374100719426, | |
| "grad_norm": 5.994359970092773, | |
| "learning_rate": 1.18e-05, | |
| "loss": 0.5362, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.003510791366906475, | |
| "grad_norm": 5.567146301269531, | |
| "learning_rate": 1.2e-05, | |
| "loss": 0.7449, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.003568345323741007, | |
| "grad_norm": 4.293890476226807, | |
| "learning_rate": 1.22e-05, | |
| "loss": 0.5452, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.0036258992805755395, | |
| "grad_norm": 8.327472686767578, | |
| "learning_rate": 1.2400000000000002e-05, | |
| "loss": 0.6844, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.003683453237410072, | |
| "grad_norm": 4.723773002624512, | |
| "learning_rate": 1.2600000000000001e-05, | |
| "loss": 0.3286, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.0037410071942446045, | |
| "grad_norm": 10.427044868469238, | |
| "learning_rate": 1.2800000000000001e-05, | |
| "loss": 0.7127, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.003798561151079137, | |
| "grad_norm": 3.913362741470337, | |
| "learning_rate": 1.3000000000000001e-05, | |
| "loss": 0.4672, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.003856115107913669, | |
| "grad_norm": 4.887617588043213, | |
| "learning_rate": 1.3200000000000002e-05, | |
| "loss": 0.5162, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.003913669064748201, | |
| "grad_norm": 6.661849021911621, | |
| "learning_rate": 1.3400000000000002e-05, | |
| "loss": 0.6116, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.003971223021582734, | |
| "grad_norm": 7.069644451141357, | |
| "learning_rate": 1.3600000000000002e-05, | |
| "loss": 0.637, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.004028776978417266, | |
| "grad_norm": 4.68384313583374, | |
| "learning_rate": 1.38e-05, | |
| "loss": 0.4406, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.004086330935251799, | |
| "grad_norm": 5.5008134841918945, | |
| "learning_rate": 1.4e-05, | |
| "loss": 0.3483, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.004143884892086331, | |
| "grad_norm": 4.3354105949401855, | |
| "learning_rate": 1.4200000000000001e-05, | |
| "loss": 0.5613, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.004201438848920863, | |
| "grad_norm": 5.105152130126953, | |
| "learning_rate": 1.4400000000000001e-05, | |
| "loss": 0.3011, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.004258992805755396, | |
| "grad_norm": 2.7917816638946533, | |
| "learning_rate": 1.46e-05, | |
| "loss": 0.2686, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.004316546762589928, | |
| "grad_norm": 4.981333255767822, | |
| "learning_rate": 1.48e-05, | |
| "loss": 0.3631, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.004374100719424461, | |
| "grad_norm": 3.1867117881774902, | |
| "learning_rate": 1.5000000000000002e-05, | |
| "loss": 0.4055, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.0044316546762589925, | |
| "grad_norm": 7.01395320892334, | |
| "learning_rate": 1.5200000000000002e-05, | |
| "loss": 0.5945, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.004489208633093525, | |
| "grad_norm": 7.518090724945068, | |
| "learning_rate": 1.54e-05, | |
| "loss": 0.9833, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.004546762589928057, | |
| "grad_norm": 3.8727071285247803, | |
| "learning_rate": 1.5600000000000003e-05, | |
| "loss": 0.3201, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.00460431654676259, | |
| "grad_norm": 3.5626542568206787, | |
| "learning_rate": 1.58e-05, | |
| "loss": 0.4482, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.004661870503597123, | |
| "grad_norm": 4.564299583435059, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 0.3674, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.0047194244604316545, | |
| "grad_norm": 3.150601387023926, | |
| "learning_rate": 1.62e-05, | |
| "loss": 0.4653, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.004776978417266187, | |
| "grad_norm": 4.487133502960205, | |
| "learning_rate": 1.64e-05, | |
| "loss": 0.3553, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.004834532374100719, | |
| "grad_norm": 9.726006507873535, | |
| "learning_rate": 1.66e-05, | |
| "loss": 0.7446, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.004892086330935252, | |
| "grad_norm": 4.138358116149902, | |
| "learning_rate": 1.6800000000000002e-05, | |
| "loss": 0.2264, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.0049496402877697845, | |
| "grad_norm": 4.193305015563965, | |
| "learning_rate": 1.7e-05, | |
| "loss": 0.3754, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.005007194244604316, | |
| "grad_norm": 5.699273109436035, | |
| "learning_rate": 1.72e-05, | |
| "loss": 0.2857, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.005064748201438849, | |
| "grad_norm": 3.635141134262085, | |
| "learning_rate": 1.7400000000000003e-05, | |
| "loss": 0.4759, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.005122302158273381, | |
| "grad_norm": 4.9962592124938965, | |
| "learning_rate": 1.76e-05, | |
| "loss": 0.3983, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.005179856115107914, | |
| "grad_norm": 9.600940704345703, | |
| "learning_rate": 1.7800000000000002e-05, | |
| "loss": 0.6997, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.005237410071942446, | |
| "grad_norm": 5.990379333496094, | |
| "learning_rate": 1.8e-05, | |
| "loss": 0.325, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.005294964028776978, | |
| "grad_norm": 3.9311363697052, | |
| "learning_rate": 1.8200000000000002e-05, | |
| "loss": 0.221, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.005352517985611511, | |
| "grad_norm": 4.358214378356934, | |
| "learning_rate": 1.8400000000000003e-05, | |
| "loss": 0.5484, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.005410071942446043, | |
| "grad_norm": 4.867093086242676, | |
| "learning_rate": 1.86e-05, | |
| "loss": 0.3255, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.005467625899280576, | |
| "grad_norm": 5.905974864959717, | |
| "learning_rate": 1.88e-05, | |
| "loss": 0.7437, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.005525179856115108, | |
| "grad_norm": 3.0502068996429443, | |
| "learning_rate": 1.9e-05, | |
| "loss": 0.3985, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.00558273381294964, | |
| "grad_norm": 3.0659244060516357, | |
| "learning_rate": 1.9200000000000003e-05, | |
| "loss": 0.4518, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.005640287769784173, | |
| "grad_norm": 5.97324800491333, | |
| "learning_rate": 1.94e-05, | |
| "loss": 0.5574, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.005697841726618705, | |
| "grad_norm": 5.410953998565674, | |
| "learning_rate": 1.9600000000000002e-05, | |
| "loss": 0.6145, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.0057553956834532375, | |
| "grad_norm": 3.2696614265441895, | |
| "learning_rate": 1.98e-05, | |
| "loss": 0.3616, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.005812949640287769, | |
| "grad_norm": 6.245858192443848, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2664, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.005870503597122302, | |
| "grad_norm": 8.132523536682129, | |
| "learning_rate": 1.9999939076577906e-05, | |
| "loss": 0.4025, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.005928057553956835, | |
| "grad_norm": 11.821637153625488, | |
| "learning_rate": 1.9999756307053947e-05, | |
| "loss": 0.9175, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.005985611510791367, | |
| "grad_norm": 5.507436752319336, | |
| "learning_rate": 1.9999451693655125e-05, | |
| "loss": 0.5407, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.0060431654676258995, | |
| "grad_norm": 2.0903306007385254, | |
| "learning_rate": 1.9999025240093045e-05, | |
| "loss": 0.2037, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.006100719424460431, | |
| "grad_norm": 6.703708648681641, | |
| "learning_rate": 1.9998476951563914e-05, | |
| "loss": 0.3923, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.006158273381294964, | |
| "grad_norm": 7.479000091552734, | |
| "learning_rate": 1.9997806834748455e-05, | |
| "loss": 0.5959, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.006215827338129497, | |
| "grad_norm": 3.7436177730560303, | |
| "learning_rate": 1.9997014897811834e-05, | |
| "loss": 0.3848, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.006273381294964029, | |
| "grad_norm": 6.0655388832092285, | |
| "learning_rate": 1.9996101150403543e-05, | |
| "loss": 0.3867, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.006330935251798561, | |
| "grad_norm": 2.6236915588378906, | |
| "learning_rate": 1.9995065603657317e-05, | |
| "loss": 0.2235, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.006388489208633093, | |
| "grad_norm": 3.6318767070770264, | |
| "learning_rate": 1.999390827019096e-05, | |
| "loss": 0.2775, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.006446043165467626, | |
| "grad_norm": 3.6131348609924316, | |
| "learning_rate": 1.999262916410621e-05, | |
| "loss": 0.473, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.006503597122302159, | |
| "grad_norm": 5.235411167144775, | |
| "learning_rate": 1.9991228300988586e-05, | |
| "loss": 0.5327, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.0065611510791366906, | |
| "grad_norm": 4.830928802490234, | |
| "learning_rate": 1.998970569790715e-05, | |
| "loss": 0.3054, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.006618705035971223, | |
| "grad_norm": 3.149054527282715, | |
| "learning_rate": 1.9988061373414342e-05, | |
| "loss": 0.2049, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.006676258992805755, | |
| "grad_norm": 0.9228267669677734, | |
| "learning_rate": 1.9986295347545738e-05, | |
| "loss": 0.137, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.006733812949640288, | |
| "grad_norm": 1.2999602556228638, | |
| "learning_rate": 1.9984407641819812e-05, | |
| "loss": 0.1165, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.00679136690647482, | |
| "grad_norm": 5.368565082550049, | |
| "learning_rate": 1.9982398279237657e-05, | |
| "loss": 0.4314, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.0068489208633093525, | |
| "grad_norm": 11.419666290283203, | |
| "learning_rate": 1.9980267284282718e-05, | |
| "loss": 0.7708, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.006906474820143885, | |
| "grad_norm": 5.084501266479492, | |
| "learning_rate": 1.9978014682920503e-05, | |
| "loss": 0.2652, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.006964028776978417, | |
| "grad_norm": 5.778707027435303, | |
| "learning_rate": 1.9975640502598243e-05, | |
| "loss": 0.6306, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.00702158273381295, | |
| "grad_norm": 4.678982257843018, | |
| "learning_rate": 1.997314477224458e-05, | |
| "loss": 0.2769, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.007079136690647482, | |
| "grad_norm": 3.2127490043640137, | |
| "learning_rate": 1.9970527522269204e-05, | |
| "loss": 0.3237, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.007136690647482014, | |
| "grad_norm": 8.614684104919434, | |
| "learning_rate": 1.9967788784562474e-05, | |
| "loss": 0.7173, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.007194244604316547, | |
| "grad_norm": 7.342788219451904, | |
| "learning_rate": 1.9964928592495046e-05, | |
| "loss": 0.6905, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.007251798561151079, | |
| "grad_norm": 3.408464193344116, | |
| "learning_rate": 1.9961946980917457e-05, | |
| "loss": 0.2835, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.007309352517985612, | |
| "grad_norm": 4.440028667449951, | |
| "learning_rate": 1.9958843986159705e-05, | |
| "loss": 0.2777, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.007366906474820144, | |
| "grad_norm": 2.4154090881347656, | |
| "learning_rate": 1.99556196460308e-05, | |
| "loss": 0.205, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.007424460431654676, | |
| "grad_norm": 2.5792348384857178, | |
| "learning_rate": 1.9952273999818312e-05, | |
| "loss": 0.2947, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.007482014388489209, | |
| "grad_norm": 5.055807113647461, | |
| "learning_rate": 1.9948807088287884e-05, | |
| "loss": 0.4372, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.007539568345323741, | |
| "grad_norm": 5.54684591293335, | |
| "learning_rate": 1.9945218953682736e-05, | |
| "loss": 0.305, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.007597122302158274, | |
| "grad_norm": 5.550899028778076, | |
| "learning_rate": 1.9941509639723155e-05, | |
| "loss": 0.5005, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.0076546762589928055, | |
| "grad_norm": 6.129327297210693, | |
| "learning_rate": 1.9937679191605964e-05, | |
| "loss": 0.7251, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.007712230215827338, | |
| "grad_norm": 14.2527437210083, | |
| "learning_rate": 1.9933727656003964e-05, | |
| "loss": 0.6724, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.00776978417266187, | |
| "grad_norm": 5.460100173950195, | |
| "learning_rate": 1.992965508106537e-05, | |
| "loss": 0.3455, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.007827338129496403, | |
| "grad_norm": 6.3173828125, | |
| "learning_rate": 1.9925461516413224e-05, | |
| "loss": 0.5253, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.007884892086330935, | |
| "grad_norm": 2.2781403064727783, | |
| "learning_rate": 1.9921147013144782e-05, | |
| "loss": 0.284, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.007942446043165468, | |
| "grad_norm": 8.100839614868164, | |
| "learning_rate": 1.9916711623830904e-05, | |
| "loss": 0.561, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.008, | |
| "grad_norm": 2.185936212539673, | |
| "learning_rate": 1.991215540251542e-05, | |
| "loss": 0.2032, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.008057553956834532, | |
| "grad_norm": 3.2472143173217773, | |
| "learning_rate": 1.9907478404714438e-05, | |
| "loss": 0.3126, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.008115107913669064, | |
| "grad_norm": 2.253760576248169, | |
| "learning_rate": 1.9902680687415704e-05, | |
| "loss": 0.28, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.008172661870503597, | |
| "grad_norm": 2.6927177906036377, | |
| "learning_rate": 1.989776230907789e-05, | |
| "loss": 0.32, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.00823021582733813, | |
| "grad_norm": 6.3567681312561035, | |
| "learning_rate": 1.9892723329629885e-05, | |
| "loss": 0.4475, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.008287769784172661, | |
| "grad_norm": 2.409682512283325, | |
| "learning_rate": 1.988756381047006e-05, | |
| "loss": 0.1429, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.008345323741007195, | |
| "grad_norm": 1.8926228284835815, | |
| "learning_rate": 1.988228381446553e-05, | |
| "loss": 0.1483, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.008402877697841727, | |
| "grad_norm": 4.373330116271973, | |
| "learning_rate": 1.9876883405951378e-05, | |
| "loss": 0.2895, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.008460431654676259, | |
| "grad_norm": 5.756573677062988, | |
| "learning_rate": 1.987136265072988e-05, | |
| "loss": 0.3329, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.008517985611510792, | |
| "grad_norm": 4.497378826141357, | |
| "learning_rate": 1.9865721616069695e-05, | |
| "loss": 0.4046, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.008575539568345324, | |
| "grad_norm": 3.908770799636841, | |
| "learning_rate": 1.985996037070505e-05, | |
| "loss": 0.3289, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.008633093525179856, | |
| "grad_norm": 9.844822883605957, | |
| "learning_rate": 1.9854078984834904e-05, | |
| "loss": 0.4939, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.008690647482014388, | |
| "grad_norm": 8.726676940917969, | |
| "learning_rate": 1.9848077530122083e-05, | |
| "loss": 0.5217, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.008748201438848921, | |
| "grad_norm": 2.3976569175720215, | |
| "learning_rate": 1.984195607969242e-05, | |
| "loss": 0.0699, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.008805755395683453, | |
| "grad_norm": 5.398916244506836, | |
| "learning_rate": 1.983571470813386e-05, | |
| "loss": 0.4521, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.008863309352517985, | |
| "grad_norm": 6.7461137771606445, | |
| "learning_rate": 1.9829353491495545e-05, | |
| "loss": 0.3828, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.008920863309352519, | |
| "grad_norm": 4.338736057281494, | |
| "learning_rate": 1.982287250728689e-05, | |
| "loss": 0.3047, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.00897841726618705, | |
| "grad_norm": 6.214763164520264, | |
| "learning_rate": 1.9816271834476642e-05, | |
| "loss": 0.3477, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.009035971223021582, | |
| "grad_norm": 2.8336939811706543, | |
| "learning_rate": 1.9809551553491918e-05, | |
| "loss": 0.3321, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.009093525179856114, | |
| "grad_norm": 4.502871990203857, | |
| "learning_rate": 1.9802711746217222e-05, | |
| "loss": 0.4364, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.009151079136690648, | |
| "grad_norm": 8.72002124786377, | |
| "learning_rate": 1.979575249599344e-05, | |
| "loss": 0.4955, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.00920863309352518, | |
| "grad_norm": 6.201029300689697, | |
| "learning_rate": 1.9788673887616852e-05, | |
| "loss": 0.2945, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.009266187050359712, | |
| "grad_norm": 8.309669494628906, | |
| "learning_rate": 1.9781476007338058e-05, | |
| "loss": 0.3601, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.009323741007194245, | |
| "grad_norm": 2.303840398788452, | |
| "learning_rate": 1.9774158942860962e-05, | |
| "loss": 0.2439, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.009381294964028777, | |
| "grad_norm": 1.9197461605072021, | |
| "learning_rate": 1.9766722783341682e-05, | |
| "loss": 0.1073, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.009438848920863309, | |
| "grad_norm": 2.906818389892578, | |
| "learning_rate": 1.9759167619387474e-05, | |
| "loss": 0.3538, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.009496402877697842, | |
| "grad_norm": 6.95295524597168, | |
| "learning_rate": 1.9751493543055634e-05, | |
| "loss": 0.7104, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.009553956834532374, | |
| "grad_norm": 13.919471740722656, | |
| "learning_rate": 1.9743700647852356e-05, | |
| "loss": 0.5481, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.009611510791366906, | |
| "grad_norm": 5.098160743713379, | |
| "learning_rate": 1.9735789028731603e-05, | |
| "loss": 0.6219, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.009669064748201438, | |
| "grad_norm": 2.4541916847229004, | |
| "learning_rate": 1.972775878209397e-05, | |
| "loss": 0.1824, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.009726618705035972, | |
| "grad_norm": 2.5615649223327637, | |
| "learning_rate": 1.9719610005785466e-05, | |
| "loss": 0.2294, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.009784172661870504, | |
| "grad_norm": 5.56582498550415, | |
| "learning_rate": 1.971134279909636e-05, | |
| "loss": 0.4881, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.009841726618705035, | |
| "grad_norm": 3.3772428035736084, | |
| "learning_rate": 1.9702957262759964e-05, | |
| "loss": 0.2457, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.009899280575539569, | |
| "grad_norm": 3.7561800479888916, | |
| "learning_rate": 1.9694453498951392e-05, | |
| "loss": 0.2876, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.009956834532374101, | |
| "grad_norm": 4.44319486618042, | |
| "learning_rate": 1.9685831611286312e-05, | |
| "loss": 0.3955, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.010014388489208633, | |
| "grad_norm": 7.627588748931885, | |
| "learning_rate": 1.9677091704819714e-05, | |
| "loss": 0.5145, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.010071942446043165, | |
| "grad_norm": 2.9267642498016357, | |
| "learning_rate": 1.9668233886044597e-05, | |
| "loss": 0.2853, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.010129496402877698, | |
| "grad_norm": 2.577763319015503, | |
| "learning_rate": 1.9659258262890683e-05, | |
| "loss": 0.1479, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.01018705035971223, | |
| "grad_norm": 3.698162794113159, | |
| "learning_rate": 1.9650164944723116e-05, | |
| "loss": 0.3178, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.010244604316546762, | |
| "grad_norm": 3.1427218914031982, | |
| "learning_rate": 1.96409540423411e-05, | |
| "loss": 0.3621, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.010302158273381296, | |
| "grad_norm": 6.883255958557129, | |
| "learning_rate": 1.9631625667976584e-05, | |
| "loss": 0.6752, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.010359712230215827, | |
| "grad_norm": 5.5977044105529785, | |
| "learning_rate": 1.9622179935292855e-05, | |
| "loss": 0.2751, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.01041726618705036, | |
| "grad_norm": 6.071504592895508, | |
| "learning_rate": 1.961261695938319e-05, | |
| "loss": 0.2445, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.010474820143884893, | |
| "grad_norm": 3.8929355144500732, | |
| "learning_rate": 1.9602936856769432e-05, | |
| "loss": 0.3608, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.010532374100719425, | |
| "grad_norm": 3.836050033569336, | |
| "learning_rate": 1.9593139745400575e-05, | |
| "loss": 0.2327, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.010589928057553957, | |
| "grad_norm": 11.480710983276367, | |
| "learning_rate": 1.9583225744651334e-05, | |
| "loss": 0.6428, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.010647482014388488, | |
| "grad_norm": 4.123970985412598, | |
| "learning_rate": 1.9573194975320672e-05, | |
| "loss": 0.2694, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.010705035971223022, | |
| "grad_norm": 5.072369575500488, | |
| "learning_rate": 1.9563047559630356e-05, | |
| "loss": 0.3314, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.010762589928057554, | |
| "grad_norm": 6.05760383605957, | |
| "learning_rate": 1.9552783621223437e-05, | |
| "loss": 0.4093, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.010820143884892086, | |
| "grad_norm": 1.4690526723861694, | |
| "learning_rate": 1.954240328516277e-05, | |
| "loss": 0.1567, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.01087769784172662, | |
| "grad_norm": 4.700074672698975, | |
| "learning_rate": 1.9531906677929472e-05, | |
| "loss": 0.224, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.010935251798561151, | |
| "grad_norm": 7.230707168579102, | |
| "learning_rate": 1.9521293927421388e-05, | |
| "loss": 0.5614, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.010992805755395683, | |
| "grad_norm": 3.8735275268554688, | |
| "learning_rate": 1.9510565162951538e-05, | |
| "loss": 0.2567, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.011050359712230217, | |
| "grad_norm": 1.882738471031189, | |
| "learning_rate": 1.9499720515246524e-05, | |
| "loss": 0.1471, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.011107913669064749, | |
| "grad_norm": 11.0597505569458, | |
| "learning_rate": 1.9488760116444966e-05, | |
| "loss": 0.6557, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.01116546762589928, | |
| "grad_norm": 8.127897262573242, | |
| "learning_rate": 1.947768410009586e-05, | |
| "loss": 0.6195, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.011223021582733812, | |
| "grad_norm": 10.546082496643066, | |
| "learning_rate": 1.9466492601156964e-05, | |
| "loss": 0.6428, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.011280575539568346, | |
| "grad_norm": 5.149999141693115, | |
| "learning_rate": 1.945518575599317e-05, | |
| "loss": 0.5332, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.011338129496402878, | |
| "grad_norm": 1.2085461616516113, | |
| "learning_rate": 1.944376370237481e-05, | |
| "loss": 0.0889, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.01139568345323741, | |
| "grad_norm": 4.670867919921875, | |
| "learning_rate": 1.943222657947601e-05, | |
| "loss": 0.3224, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.011453237410071943, | |
| "grad_norm": 10.440308570861816, | |
| "learning_rate": 1.942057452787297e-05, | |
| "loss": 0.5673, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.011510791366906475, | |
| "grad_norm": 2.601593017578125, | |
| "learning_rate": 1.9408807689542257e-05, | |
| "loss": 0.1987, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.011568345323741007, | |
| "grad_norm": 3.630814552307129, | |
| "learning_rate": 1.9396926207859085e-05, | |
| "loss": 0.1889, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.011625899280575539, | |
| "grad_norm": 5.2653632164001465, | |
| "learning_rate": 1.938493022759556e-05, | |
| "loss": 0.5126, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.011683453237410072, | |
| "grad_norm": 5.171159744262695, | |
| "learning_rate": 1.937281989491892e-05, | |
| "loss": 0.4045, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.011741007194244604, | |
| "grad_norm": 4.25119686126709, | |
| "learning_rate": 1.9360595357389735e-05, | |
| "loss": 0.4102, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.011798561151079136, | |
| "grad_norm": 4.690221309661865, | |
| "learning_rate": 1.9348256763960146e-05, | |
| "loss": 0.1851, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.01185611510791367, | |
| "grad_norm": 2.5318408012390137, | |
| "learning_rate": 1.9335804264972018e-05, | |
| "loss": 0.1637, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.011913669064748202, | |
| "grad_norm": 3.1044423580169678, | |
| "learning_rate": 1.9323238012155125e-05, | |
| "loss": 0.2009, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.011971223021582733, | |
| "grad_norm": 3.260160207748413, | |
| "learning_rate": 1.9310558158625286e-05, | |
| "loss": 0.3784, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.012028776978417267, | |
| "grad_norm": 2.753958225250244, | |
| "learning_rate": 1.9297764858882516e-05, | |
| "loss": 0.2527, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.012086330935251799, | |
| "grad_norm": 2.3980395793914795, | |
| "learning_rate": 1.9284858268809135e-05, | |
| "loss": 0.2148, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.01214388489208633, | |
| "grad_norm": 7.618435382843018, | |
| "learning_rate": 1.9271838545667876e-05, | |
| "loss": 0.4407, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.012201438848920863, | |
| "grad_norm": 2.246246099472046, | |
| "learning_rate": 1.925870584809995e-05, | |
| "loss": 0.2267, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.012258992805755396, | |
| "grad_norm": 6.476871490478516, | |
| "learning_rate": 1.9245460336123136e-05, | |
| "loss": 0.3225, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.012316546762589928, | |
| "grad_norm": 3.0804295539855957, | |
| "learning_rate": 1.923210217112981e-05, | |
| "loss": 0.2715, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.01237410071942446, | |
| "grad_norm": 2.8532204627990723, | |
| "learning_rate": 1.9218631515885007e-05, | |
| "loss": 0.1833, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.012431654676258994, | |
| "grad_norm": 5.331664562225342, | |
| "learning_rate": 1.9205048534524405e-05, | |
| "loss": 0.2848, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.012489208633093525, | |
| "grad_norm": 6.048542022705078, | |
| "learning_rate": 1.9191353392552346e-05, | |
| "loss": 0.5696, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.012546762589928057, | |
| "grad_norm": 1.7071490287780762, | |
| "learning_rate": 1.9177546256839814e-05, | |
| "loss": 0.1342, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.01260431654676259, | |
| "grad_norm": 2.181581974029541, | |
| "learning_rate": 1.9163627295622397e-05, | |
| "loss": 0.123, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.012661870503597123, | |
| "grad_norm": 3.1651432514190674, | |
| "learning_rate": 1.914959667849825e-05, | |
| "loss": 0.1772, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.012719424460431655, | |
| "grad_norm": 5.666025638580322, | |
| "learning_rate": 1.913545457642601e-05, | |
| "loss": 0.3085, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.012776978417266186, | |
| "grad_norm": 2.379805088043213, | |
| "learning_rate": 1.9121201161722732e-05, | |
| "loss": 0.1892, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.01283453237410072, | |
| "grad_norm": 2.259615182876587, | |
| "learning_rate": 1.910683660806177e-05, | |
| "loss": 0.1345, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.012892086330935252, | |
| "grad_norm": 5.213752746582031, | |
| "learning_rate": 1.9092361090470688e-05, | |
| "loss": 0.3298, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.012949640287769784, | |
| "grad_norm": 4.78728723526001, | |
| "learning_rate": 1.907777478532909e-05, | |
| "loss": 0.2825, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.013007194244604317, | |
| "grad_norm": 0.8624676465988159, | |
| "learning_rate": 1.9063077870366504e-05, | |
| "loss": 0.109, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.01306474820143885, | |
| "grad_norm": 1.283377766609192, | |
| "learning_rate": 1.9048270524660197e-05, | |
| "loss": 0.1188, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.013122302158273381, | |
| "grad_norm": 9.221684455871582, | |
| "learning_rate": 1.903335292863301e-05, | |
| "loss": 0.5137, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.013179856115107913, | |
| "grad_norm": 11.753008842468262, | |
| "learning_rate": 1.901832526405114e-05, | |
| "loss": 0.4472, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.013237410071942447, | |
| "grad_norm": 1.6660693883895874, | |
| "learning_rate": 1.9003187714021936e-05, | |
| "loss": 0.1548, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.013294964028776978, | |
| "grad_norm": 0.9586846232414246, | |
| "learning_rate": 1.8987940462991673e-05, | |
| "loss": 0.0959, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.01335251798561151, | |
| "grad_norm": 0.9193116426467896, | |
| "learning_rate": 1.8972583696743284e-05, | |
| "loss": 0.0977, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.013410071942446044, | |
| "grad_norm": 0.9497302174568176, | |
| "learning_rate": 1.895711760239413e-05, | |
| "loss": 0.0993, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.013467625899280576, | |
| "grad_norm": 2.8770296573638916, | |
| "learning_rate": 1.8941542368393683e-05, | |
| "loss": 0.1175, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.013525179856115108, | |
| "grad_norm": 5.624111175537109, | |
| "learning_rate": 1.892585818452126e-05, | |
| "loss": 0.2421, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.01358273381294964, | |
| "grad_norm": 1.8553048372268677, | |
| "learning_rate": 1.891006524188368e-05, | |
| "loss": 0.1538, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.013640287769784173, | |
| "grad_norm": 6.076197624206543, | |
| "learning_rate": 1.889416373291298e-05, | |
| "loss": 0.2427, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.013697841726618705, | |
| "grad_norm": 3.465892791748047, | |
| "learning_rate": 1.8878153851364013e-05, | |
| "loss": 0.1723, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.013755395683453237, | |
| "grad_norm": 15.277708053588867, | |
| "learning_rate": 1.8862035792312148e-05, | |
| "loss": 0.5853, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.01381294964028777, | |
| "grad_norm": 4.846475124359131, | |
| "learning_rate": 1.884580975215084e-05, | |
| "loss": 0.2031, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.013870503597122302, | |
| "grad_norm": 3.0435233116149902, | |
| "learning_rate": 1.8829475928589272e-05, | |
| "loss": 0.2053, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.013928057553956834, | |
| "grad_norm": 6.824376583099365, | |
| "learning_rate": 1.8813034520649923e-05, | |
| "loss": 0.1688, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.013985611510791368, | |
| "grad_norm": 1.9684380292892456, | |
| "learning_rate": 1.879648572866617e-05, | |
| "loss": 0.1105, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.0140431654676259, | |
| "grad_norm": 7.668671131134033, | |
| "learning_rate": 1.8779829754279806e-05, | |
| "loss": 0.4672, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.014100719424460431, | |
| "grad_norm": 7.829854488372803, | |
| "learning_rate": 1.8763066800438638e-05, | |
| "loss": 0.229, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.014158273381294963, | |
| "grad_norm": 2.082008123397827, | |
| "learning_rate": 1.874619707139396e-05, | |
| "loss": 0.1406, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.014215827338129497, | |
| "grad_norm": 6.171285629272461, | |
| "learning_rate": 1.8729220772698096e-05, | |
| "loss": 0.1633, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.014273381294964029, | |
| "grad_norm": 2.4114866256713867, | |
| "learning_rate": 1.8712138111201898e-05, | |
| "loss": 0.1011, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.01433093525179856, | |
| "grad_norm": 6.919300079345703, | |
| "learning_rate": 1.869494929505219e-05, | |
| "loss": 0.2706, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.014388489208633094, | |
| "grad_norm": 2.967616558074951, | |
| "learning_rate": 1.8677654533689287e-05, | |
| "loss": 0.1721, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.014446043165467626, | |
| "grad_norm": 19.468122482299805, | |
| "learning_rate": 1.866025403784439e-05, | |
| "loss": 0.7281, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.014503597122302158, | |
| "grad_norm": 5.602455139160156, | |
| "learning_rate": 1.864274801953705e-05, | |
| "loss": 0.108, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.01456115107913669, | |
| "grad_norm": 16.94603729248047, | |
| "learning_rate": 1.8625136692072577e-05, | |
| "loss": 0.4322, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.014618705035971223, | |
| "grad_norm": 5.4511823654174805, | |
| "learning_rate": 1.860742027003944e-05, | |
| "loss": 0.2628, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.014676258992805755, | |
| "grad_norm": 3.1577935218811035, | |
| "learning_rate": 1.8589598969306646e-05, | |
| "loss": 0.1408, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.014733812949640287, | |
| "grad_norm": 0.9684641361236572, | |
| "learning_rate": 1.8571673007021124e-05, | |
| "loss": 0.0771, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.01479136690647482, | |
| "grad_norm": 10.628517150878906, | |
| "learning_rate": 1.855364260160507e-05, | |
| "loss": 0.4681, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.014848920863309353, | |
| "grad_norm": 1.4679758548736572, | |
| "learning_rate": 1.8535507972753275e-05, | |
| "loss": 0.1161, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.014906474820143885, | |
| "grad_norm": 2.098684549331665, | |
| "learning_rate": 1.851726934143048e-05, | |
| "loss": 0.1365, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.014964028776978418, | |
| "grad_norm": 1.1517246961593628, | |
| "learning_rate": 1.849892692986864e-05, | |
| "loss": 0.1261, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.01502158273381295, | |
| "grad_norm": 11.601617813110352, | |
| "learning_rate": 1.848048096156426e-05, | |
| "loss": 0.5597, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.015079136690647482, | |
| "grad_norm": 9.081189155578613, | |
| "learning_rate": 1.8461931661275642e-05, | |
| "loss": 0.3012, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.015136690647482014, | |
| "grad_norm": 1.1501121520996094, | |
| "learning_rate": 1.8443279255020153e-05, | |
| "loss": 0.1101, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.015194244604316547, | |
| "grad_norm": 1.6358212232589722, | |
| "learning_rate": 1.842452397007148e-05, | |
| "loss": 0.0907, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.01525179856115108, | |
| "grad_norm": 1.1782243251800537, | |
| "learning_rate": 1.8405666034956842e-05, | |
| "loss": 0.0821, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.015309352517985611, | |
| "grad_norm": 3.5769565105438232, | |
| "learning_rate": 1.8386705679454243e-05, | |
| "loss": 0.1347, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.015366906474820145, | |
| "grad_norm": 9.618369102478027, | |
| "learning_rate": 1.836764313458962e-05, | |
| "loss": 0.2999, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.015424460431654676, | |
| "grad_norm": 3.8770649433135986, | |
| "learning_rate": 1.8348478632634067e-05, | |
| "loss": 0.1558, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.015482014388489208, | |
| "grad_norm": 5.656462669372559, | |
| "learning_rate": 1.8329212407100996e-05, | |
| "loss": 0.2134, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.01553956834532374, | |
| "grad_norm": 7.180418491363525, | |
| "learning_rate": 1.8309844692743283e-05, | |
| "loss": 0.2609, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.015597122302158274, | |
| "grad_norm": 1.3020349740982056, | |
| "learning_rate": 1.8290375725550417e-05, | |
| "loss": 0.0988, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.015654676258992806, | |
| "grad_norm": 5.860103607177734, | |
| "learning_rate": 1.827080574274562e-05, | |
| "loss": 0.2128, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.01571223021582734, | |
| "grad_norm": 10.250383377075195, | |
| "learning_rate": 1.8251134982782952e-05, | |
| "loss": 0.2957, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.01576978417266187, | |
| "grad_norm": 0.8903178572654724, | |
| "learning_rate": 1.8231363685344422e-05, | |
| "loss": 0.1002, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.015827338129496403, | |
| "grad_norm": 9.552652359008789, | |
| "learning_rate": 1.821149209133704e-05, | |
| "loss": 0.2226, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.015884892086330937, | |
| "grad_norm": 8.22533893585205, | |
| "learning_rate": 1.819152044288992e-05, | |
| "loss": 0.4168, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.015942446043165467, | |
| "grad_norm": 3.7411088943481445, | |
| "learning_rate": 1.8171448983351284e-05, | |
| "loss": 0.133, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.016, | |
| "grad_norm": 6.830077171325684, | |
| "learning_rate": 1.815127795728554e-05, | |
| "loss": 0.1879, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.016057553956834534, | |
| "grad_norm": 6.609959125518799, | |
| "learning_rate": 1.8131007610470278e-05, | |
| "loss": 0.3989, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.016115107913669064, | |
| "grad_norm": 8.54151725769043, | |
| "learning_rate": 1.8110638189893267e-05, | |
| "loss": 0.2935, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.016172661870503598, | |
| "grad_norm": 2.277355909347534, | |
| "learning_rate": 1.8090169943749477e-05, | |
| "loss": 0.0824, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.016230215827338128, | |
| "grad_norm": 0.6691151857376099, | |
| "learning_rate": 1.806960312143802e-05, | |
| "loss": 0.0981, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.01628776978417266, | |
| "grad_norm": 1.0405458211898804, | |
| "learning_rate": 1.804893797355914e-05, | |
| "loss": 0.1054, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.016345323741007195, | |
| "grad_norm": 3.0072195529937744, | |
| "learning_rate": 1.8028174751911147e-05, | |
| "loss": 0.1007, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.016402877697841725, | |
| "grad_norm": 5.276819229125977, | |
| "learning_rate": 1.8007313709487334e-05, | |
| "loss": 0.2197, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.01646043165467626, | |
| "grad_norm": 0.3860107362270355, | |
| "learning_rate": 1.798635510047293e-05, | |
| "loss": 0.0449, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.016517985611510792, | |
| "grad_norm": 7.217343330383301, | |
| "learning_rate": 1.7965299180241963e-05, | |
| "loss": 0.2553, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.016575539568345322, | |
| "grad_norm": 6.964923858642578, | |
| "learning_rate": 1.7944146205354182e-05, | |
| "loss": 0.3295, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.016633093525179856, | |
| "grad_norm": 6.25247859954834, | |
| "learning_rate": 1.792289643355191e-05, | |
| "loss": 0.2184, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.01669064748201439, | |
| "grad_norm": 2.1732687950134277, | |
| "learning_rate": 1.7901550123756906e-05, | |
| "loss": 0.0992, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.01674820143884892, | |
| "grad_norm": 4.60042142868042, | |
| "learning_rate": 1.788010753606722e-05, | |
| "loss": 0.241, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.016805755395683453, | |
| "grad_norm": 1.7903494834899902, | |
| "learning_rate": 1.785856893175402e-05, | |
| "loss": 0.1072, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.016863309352517987, | |
| "grad_norm": 9.696785926818848, | |
| "learning_rate": 1.78369345732584e-05, | |
| "loss": 0.2978, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.016920863309352517, | |
| "grad_norm": 9.798710823059082, | |
| "learning_rate": 1.781520472418819e-05, | |
| "loss": 0.3003, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.01697841726618705, | |
| "grad_norm": 1.50823175907135, | |
| "learning_rate": 1.7793379649314743e-05, | |
| "loss": 0.1167, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.017035971223021584, | |
| "grad_norm": 15.534778594970703, | |
| "learning_rate": 1.777145961456971e-05, | |
| "loss": 0.3782, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.017093525179856114, | |
| "grad_norm": 1.2554796934127808, | |
| "learning_rate": 1.7749444887041797e-05, | |
| "loss": 0.0898, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.017151079136690648, | |
| "grad_norm": 4.486696720123291, | |
| "learning_rate": 1.7727335734973512e-05, | |
| "loss": 0.1792, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.017208633093525178, | |
| "grad_norm": 4.518349647521973, | |
| "learning_rate": 1.7705132427757895e-05, | |
| "loss": 0.2248, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.017266187050359712, | |
| "grad_norm": 11.271574020385742, | |
| "learning_rate": 1.7682835235935236e-05, | |
| "loss": 0.7558, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.017323741007194245, | |
| "grad_norm": 7.780160427093506, | |
| "learning_rate": 1.766044443118978e-05, | |
| "loss": 0.2102, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.017381294964028775, | |
| "grad_norm": 4.762843132019043, | |
| "learning_rate": 1.7637960286346423e-05, | |
| "loss": 0.2039, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.01743884892086331, | |
| "grad_norm": 2.5707688331604004, | |
| "learning_rate": 1.761538307536737e-05, | |
| "loss": 0.1405, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.017496402877697843, | |
| "grad_norm": 1.7889554500579834, | |
| "learning_rate": 1.759271307334881e-05, | |
| "loss": 0.1155, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.017553956834532373, | |
| "grad_norm": 4.67526912689209, | |
| "learning_rate": 1.7569950556517566e-05, | |
| "loss": 0.2787, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.017611510791366906, | |
| "grad_norm": 3.5515453815460205, | |
| "learning_rate": 1.7547095802227723e-05, | |
| "loss": 0.1925, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.01766906474820144, | |
| "grad_norm": 5.906973838806152, | |
| "learning_rate": 1.7524149088957244e-05, | |
| "loss": 0.251, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.01772661870503597, | |
| "grad_norm": 11.664872169494629, | |
| "learning_rate": 1.7501110696304598e-05, | |
| "loss": 0.9332, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.017784172661870504, | |
| "grad_norm": 6.628945827484131, | |
| "learning_rate": 1.747798090498532e-05, | |
| "loss": 0.294, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.017841726618705037, | |
| "grad_norm": 1.0758421421051025, | |
| "learning_rate": 1.7454759996828622e-05, | |
| "loss": 0.1241, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.017899280575539567, | |
| "grad_norm": 5.263257026672363, | |
| "learning_rate": 1.7431448254773943e-05, | |
| "loss": 0.2367, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.0179568345323741, | |
| "grad_norm": 1.0564324855804443, | |
| "learning_rate": 1.74080459628675e-05, | |
| "loss": 0.1437, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.018014388489208635, | |
| "grad_norm": 7.245335578918457, | |
| "learning_rate": 1.7384553406258842e-05, | |
| "loss": 0.2376, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.018071942446043165, | |
| "grad_norm": 1.8886218070983887, | |
| "learning_rate": 1.7360970871197347e-05, | |
| "loss": 0.1031, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.0181294964028777, | |
| "grad_norm": 16.046236038208008, | |
| "learning_rate": 1.7337298645028764e-05, | |
| "loss": 0.6099, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.01818705035971223, | |
| "grad_norm": 3.0381016731262207, | |
| "learning_rate": 1.7313537016191706e-05, | |
| "loss": 0.13, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.018244604316546762, | |
| "grad_norm": 4.647341251373291, | |
| "learning_rate": 1.7289686274214116e-05, | |
| "loss": 0.1707, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.018302158273381296, | |
| "grad_norm": 7.225495338439941, | |
| "learning_rate": 1.7265746709709762e-05, | |
| "loss": 0.3956, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.018359712230215826, | |
| "grad_norm": 4.374240875244141, | |
| "learning_rate": 1.7241718614374678e-05, | |
| "loss": 0.2331, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.01841726618705036, | |
| "grad_norm": 13.612607955932617, | |
| "learning_rate": 1.7217602280983622e-05, | |
| "loss": 0.645, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.018474820143884893, | |
| "grad_norm": 4.012824535369873, | |
| "learning_rate": 1.7193398003386514e-05, | |
| "loss": 0.2226, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.018532374100719423, | |
| "grad_norm": 1.8649362325668335, | |
| "learning_rate": 1.716910607650483e-05, | |
| "loss": 0.1594, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.018589928057553957, | |
| "grad_norm": 5.170797824859619, | |
| "learning_rate": 1.7144726796328034e-05, | |
| "loss": 0.4557, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.01864748201438849, | |
| "grad_norm": 7.867353916168213, | |
| "learning_rate": 1.712026045990997e-05, | |
| "loss": 0.325, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.01870503597122302, | |
| "grad_norm": 4.790438175201416, | |
| "learning_rate": 1.709570736536521e-05, | |
| "loss": 0.2865, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.018762589928057554, | |
| "grad_norm": 2.507946252822876, | |
| "learning_rate": 1.7071067811865477e-05, | |
| "loss": 0.104, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.018820143884892088, | |
| "grad_norm": 3.4884064197540283, | |
| "learning_rate": 1.7046342099635948e-05, | |
| "loss": 0.1544, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.018877697841726618, | |
| "grad_norm": 4.94096565246582, | |
| "learning_rate": 1.7021530529951627e-05, | |
| "loss": 0.2202, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.01893525179856115, | |
| "grad_norm": 2.591435432434082, | |
| "learning_rate": 1.6996633405133656e-05, | |
| "loss": 0.1226, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.018992805755395685, | |
| "grad_norm": 1.8974847793579102, | |
| "learning_rate": 1.697165102854565e-05, | |
| "loss": 0.1559, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.019050359712230215, | |
| "grad_norm": 1.4150128364562988, | |
| "learning_rate": 1.6946583704589973e-05, | |
| "loss": 0.1162, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.01910791366906475, | |
| "grad_norm": 3.744464874267578, | |
| "learning_rate": 1.692143173870407e-05, | |
| "loss": 0.2291, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.01916546762589928, | |
| "grad_norm": 0.9393559694290161, | |
| "learning_rate": 1.68961954373567e-05, | |
| "loss": 0.1093, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.019223021582733812, | |
| "grad_norm": 4.989112377166748, | |
| "learning_rate": 1.6870875108044233e-05, | |
| "loss": 0.2268, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.019280575539568346, | |
| "grad_norm": 3.0673670768737793, | |
| "learning_rate": 1.684547105928689e-05, | |
| "loss": 0.1281, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.019338129496402876, | |
| "grad_norm": 7.947831153869629, | |
| "learning_rate": 1.6819983600624986e-05, | |
| "loss": 0.665, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.01939568345323741, | |
| "grad_norm": 6.416712284088135, | |
| "learning_rate": 1.6794413042615168e-05, | |
| "loss": 0.2137, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.019453237410071943, | |
| "grad_norm": 16.095256805419922, | |
| "learning_rate": 1.6768759696826608e-05, | |
| "loss": 0.3756, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.019510791366906474, | |
| "grad_norm": 2.4824581146240234, | |
| "learning_rate": 1.6743023875837233e-05, | |
| "loss": 0.117, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.019568345323741007, | |
| "grad_norm": 5.838296890258789, | |
| "learning_rate": 1.6717205893229904e-05, | |
| "loss": 0.3567, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.01962589928057554, | |
| "grad_norm": 5.100681781768799, | |
| "learning_rate": 1.6691306063588583e-05, | |
| "loss": 0.2389, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.01968345323741007, | |
| "grad_norm": 7.527962684631348, | |
| "learning_rate": 1.6665324702494524e-05, | |
| "loss": 0.4136, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.019741007194244604, | |
| "grad_norm": 2.3168222904205322, | |
| "learning_rate": 1.6639262126522417e-05, | |
| "loss": 0.158, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.019798561151079138, | |
| "grad_norm": 0.8575424551963806, | |
| "learning_rate": 1.661311865323652e-05, | |
| "loss": 0.0539, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.019856115107913668, | |
| "grad_norm": 3.8268239498138428, | |
| "learning_rate": 1.6586894601186804e-05, | |
| "loss": 0.2135, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.019913669064748202, | |
| "grad_norm": 2.2744338512420654, | |
| "learning_rate": 1.6560590289905074e-05, | |
| "loss": 0.1278, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.019971223021582735, | |
| "grad_norm": 5.595208168029785, | |
| "learning_rate": 1.6534206039901057e-05, | |
| "loss": 0.2693, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.020028776978417265, | |
| "grad_norm": 2.456190347671509, | |
| "learning_rate": 1.650774217265851e-05, | |
| "loss": 0.1437, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.0200863309352518, | |
| "grad_norm": 10.795378684997559, | |
| "learning_rate": 1.6481199010631312e-05, | |
| "loss": 0.7296, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.02014388489208633, | |
| "grad_norm": 0.6766440868377686, | |
| "learning_rate": 1.645457687723951e-05, | |
| "loss": 0.104, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.020201438848920863, | |
| "grad_norm": 2.074960708618164, | |
| "learning_rate": 1.6427876096865394e-05, | |
| "loss": 0.1433, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.020258992805755396, | |
| "grad_norm": 3.726234197616577, | |
| "learning_rate": 1.6401096994849558e-05, | |
| "loss": 0.1568, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.020316546762589927, | |
| "grad_norm": 1.0608885288238525, | |
| "learning_rate": 1.63742398974869e-05, | |
| "loss": 0.095, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.02037410071942446, | |
| "grad_norm": 3.429298162460327, | |
| "learning_rate": 1.6347305132022677e-05, | |
| "loss": 0.1117, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.020431654676258994, | |
| "grad_norm": 5.276256561279297, | |
| "learning_rate": 1.632029302664851e-05, | |
| "loss": 0.4022, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.020489208633093524, | |
| "grad_norm": 6.028234481811523, | |
| "learning_rate": 1.6293203910498375e-05, | |
| "loss": 0.1726, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.020546762589928057, | |
| "grad_norm": 7.822464942932129, | |
| "learning_rate": 1.6266038113644605e-05, | |
| "loss": 0.3716, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.02060431654676259, | |
| "grad_norm": 9.866429328918457, | |
| "learning_rate": 1.6238795967093865e-05, | |
| "loss": 0.383, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.02066187050359712, | |
| "grad_norm": 2.492753505706787, | |
| "learning_rate": 1.6211477802783105e-05, | |
| "loss": 0.1362, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.020719424460431655, | |
| "grad_norm": 5.538491249084473, | |
| "learning_rate": 1.6184083953575543e-05, | |
| "loss": 0.2055, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.02077697841726619, | |
| "grad_norm": 1.3676029443740845, | |
| "learning_rate": 1.6156614753256583e-05, | |
| "loss": 0.0914, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.02083453237410072, | |
| "grad_norm": 5.788948059082031, | |
| "learning_rate": 1.6129070536529767e-05, | |
| "loss": 0.195, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.020892086330935252, | |
| "grad_norm": 1.366302251815796, | |
| "learning_rate": 1.610145163901268e-05, | |
| "loss": 0.112, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.020949640287769786, | |
| "grad_norm": 2.1532888412475586, | |
| "learning_rate": 1.607375839723287e-05, | |
| "loss": 0.1321, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.021007194244604316, | |
| "grad_norm": 2.4224016666412354, | |
| "learning_rate": 1.6045991148623752e-05, | |
| "loss": 0.1307, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.02106474820143885, | |
| "grad_norm": 2.301203489303589, | |
| "learning_rate": 1.6018150231520486e-05, | |
| "loss": 0.1129, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.02112230215827338, | |
| "grad_norm": 2.732968807220459, | |
| "learning_rate": 1.599023598515586e-05, | |
| "loss": 0.1113, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.021179856115107913, | |
| "grad_norm": 4.585814952850342, | |
| "learning_rate": 1.5962248749656158e-05, | |
| "loss": 0.2737, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.021237410071942447, | |
| "grad_norm": 3.1472392082214355, | |
| "learning_rate": 1.5934188866037017e-05, | |
| "loss": 0.1172, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.021294964028776977, | |
| "grad_norm": 0.760041356086731, | |
| "learning_rate": 1.5906056676199256e-05, | |
| "loss": 0.0888, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.02135251798561151, | |
| "grad_norm": 1.1722584962844849, | |
| "learning_rate": 1.5877852522924733e-05, | |
| "loss": 0.0718, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.021410071942446044, | |
| "grad_norm": 3.714165449142456, | |
| "learning_rate": 1.584957674987216e-05, | |
| "loss": 0.2098, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.021467625899280574, | |
| "grad_norm": 13.37151050567627, | |
| "learning_rate": 1.5821229701572897e-05, | |
| "loss": 0.6504, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.021525179856115108, | |
| "grad_norm": 7.64274787902832, | |
| "learning_rate": 1.5792811723426787e-05, | |
| "loss": 0.1673, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.02158273381294964, | |
| "grad_norm": 5.333349227905273, | |
| "learning_rate": 1.5764323161697933e-05, | |
| "loss": 0.1838, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.02164028776978417, | |
| "grad_norm": 5.268608570098877, | |
| "learning_rate": 1.573576436351046e-05, | |
| "loss": 0.201, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.021697841726618705, | |
| "grad_norm": 1.2459615468978882, | |
| "learning_rate": 1.570713567684432e-05, | |
| "loss": 0.0831, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.02175539568345324, | |
| "grad_norm": 0.3400329053401947, | |
| "learning_rate": 1.5678437450531014e-05, | |
| "loss": 0.0705, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.02181294964028777, | |
| "grad_norm": 3.784626007080078, | |
| "learning_rate": 1.564967003424938e-05, | |
| "loss": 0.1281, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.021870503597122302, | |
| "grad_norm": 2.0622975826263428, | |
| "learning_rate": 1.5620833778521306e-05, | |
| "loss": 0.1074, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.021928057553956836, | |
| "grad_norm": 2.558227777481079, | |
| "learning_rate": 1.5591929034707468e-05, | |
| "loss": 0.1039, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.021985611510791366, | |
| "grad_norm": 1.8183974027633667, | |
| "learning_rate": 1.556295615500305e-05, | |
| "loss": 0.1491, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.0220431654676259, | |
| "grad_norm": 3.397756814956665, | |
| "learning_rate": 1.553391549243344e-05, | |
| "loss": 0.0963, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.022100719424460433, | |
| "grad_norm": 1.9542852640151978, | |
| "learning_rate": 1.5504807400849957e-05, | |
| "loss": 0.1328, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.022158273381294964, | |
| "grad_norm": 1.1182966232299805, | |
| "learning_rate": 1.5475632234925505e-05, | |
| "loss": 0.0329, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.022215827338129497, | |
| "grad_norm": 6.058446407318115, | |
| "learning_rate": 1.5446390350150272e-05, | |
| "loss": 0.2127, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.022273381294964027, | |
| "grad_norm": 4.640816688537598, | |
| "learning_rate": 1.54170821028274e-05, | |
| "loss": 0.1501, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.02233093525179856, | |
| "grad_norm": 12.117772102355957, | |
| "learning_rate": 1.5387707850068633e-05, | |
| "loss": 0.4365, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.022388489208633094, | |
| "grad_norm": 3.080798625946045, | |
| "learning_rate": 1.5358267949789968e-05, | |
| "loss": 0.1568, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.022446043165467625, | |
| "grad_norm": 9.55615520477295, | |
| "learning_rate": 1.53287627607073e-05, | |
| "loss": 0.2771, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.022503597122302158, | |
| "grad_norm": 0.7347004413604736, | |
| "learning_rate": 1.529919264233205e-05, | |
| "loss": 0.078, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.022561151079136692, | |
| "grad_norm": 2.543454647064209, | |
| "learning_rate": 1.5269557954966777e-05, | |
| "loss": 0.0838, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.022618705035971222, | |
| "grad_norm": 0.8127100467681885, | |
| "learning_rate": 1.5239859059700794e-05, | |
| "loss": 0.05, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.022676258992805755, | |
| "grad_norm": 15.900167465209961, | |
| "learning_rate": 1.5210096318405768e-05, | |
| "loss": 0.313, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.02273381294964029, | |
| "grad_norm": 3.528336524963379, | |
| "learning_rate": 1.5180270093731305e-05, | |
| "loss": 0.0843, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.02279136690647482, | |
| "grad_norm": 0.9315251708030701, | |
| "learning_rate": 1.5150380749100545e-05, | |
| "loss": 0.0912, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.022848920863309353, | |
| "grad_norm": 13.889352798461914, | |
| "learning_rate": 1.5120428648705716e-05, | |
| "loss": 0.3968, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.022906474820143886, | |
| "grad_norm": 4.333874225616455, | |
| "learning_rate": 1.5090414157503715e-05, | |
| "loss": 0.1176, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.022964028776978417, | |
| "grad_norm": 1.4791545867919922, | |
| "learning_rate": 1.5060337641211637e-05, | |
| "loss": 0.1132, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.02302158273381295, | |
| "grad_norm": 17.43948745727539, | |
| "learning_rate": 1.5030199466302354e-05, | |
| "loss": 0.3854, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.023079136690647484, | |
| "grad_norm": 1.4201478958129883, | |
| "learning_rate": 1.5000000000000002e-05, | |
| "loss": 0.0633, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.023136690647482014, | |
| "grad_norm": 6.711745738983154, | |
| "learning_rate": 1.4969739610275556e-05, | |
| "loss": 0.1732, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.023194244604316547, | |
| "grad_norm": 1.3818823099136353, | |
| "learning_rate": 1.493941866584231e-05, | |
| "loss": 0.0769, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.023251798561151078, | |
| "grad_norm": 11.818349838256836, | |
| "learning_rate": 1.490903753615141e-05, | |
| "loss": 0.6309, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.02330935251798561, | |
| "grad_norm": 6.571465969085693, | |
| "learning_rate": 1.4878596591387329e-05, | |
| "loss": 0.1149, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.023366906474820145, | |
| "grad_norm": 8.349808692932129, | |
| "learning_rate": 1.4848096202463373e-05, | |
| "loss": 0.1896, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.023424460431654675, | |
| "grad_norm": 2.857787609100342, | |
| "learning_rate": 1.4817536741017153e-05, | |
| "loss": 0.0704, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.02348201438848921, | |
| "grad_norm": 8.498452186584473, | |
| "learning_rate": 1.478691857940607e-05, | |
| "loss": 0.2247, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.023539568345323742, | |
| "grad_norm": 0.468039333820343, | |
| "learning_rate": 1.4756242090702756e-05, | |
| "loss": 0.0615, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.023597122302158272, | |
| "grad_norm": 0.5978952646255493, | |
| "learning_rate": 1.4725507648690542e-05, | |
| "loss": 0.0501, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.023654676258992806, | |
| "grad_norm": 11.912870407104492, | |
| "learning_rate": 1.469471562785891e-05, | |
| "loss": 0.3164, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.02371223021582734, | |
| "grad_norm": 8.567428588867188, | |
| "learning_rate": 1.4663866403398915e-05, | |
| "loss": 0.2484, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.02376978417266187, | |
| "grad_norm": 5.721550941467285, | |
| "learning_rate": 1.463296035119862e-05, | |
| "loss": 0.1515, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.023827338129496403, | |
| "grad_norm": 11.112815856933594, | |
| "learning_rate": 1.4601997847838518e-05, | |
| "loss": 0.218, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.023884892086330937, | |
| "grad_norm": 0.5129345655441284, | |
| "learning_rate": 1.4570979270586944e-05, | |
| "loss": 0.0569, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.023942446043165467, | |
| "grad_norm": 7.7596259117126465, | |
| "learning_rate": 1.4539904997395468e-05, | |
| "loss": 0.1584, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.024, | |
| "grad_norm": 6.294882774353027, | |
| "learning_rate": 1.4508775406894308e-05, | |
| "loss": 0.0663, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.024057553956834534, | |
| "grad_norm": 10.160890579223633, | |
| "learning_rate": 1.4477590878387697e-05, | |
| "loss": 0.3199, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.024115107913669064, | |
| "grad_norm": 0.9614742398262024, | |
| "learning_rate": 1.4446351791849276e-05, | |
| "loss": 0.077, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.024172661870503598, | |
| "grad_norm": 3.362921953201294, | |
| "learning_rate": 1.4415058527917454e-05, | |
| "loss": 0.1266, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.024230215827338128, | |
| "grad_norm": 8.087349891662598, | |
| "learning_rate": 1.4383711467890776e-05, | |
| "loss": 0.3689, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.02428776978417266, | |
| "grad_norm": 8.182154655456543, | |
| "learning_rate": 1.4352310993723277e-05, | |
| "loss": 0.1767, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.024345323741007195, | |
| "grad_norm": 1.1635339260101318, | |
| "learning_rate": 1.4320857488019826e-05, | |
| "loss": 0.1257, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.024402877697841725, | |
| "grad_norm": 3.1351680755615234, | |
| "learning_rate": 1.4289351334031461e-05, | |
| "loss": 0.1368, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.02446043165467626, | |
| "grad_norm": 8.145259857177734, | |
| "learning_rate": 1.4257792915650728e-05, | |
| "loss": 0.2471, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.024517985611510792, | |
| "grad_norm": 0.6130021810531616, | |
| "learning_rate": 1.4226182617406996e-05, | |
| "loss": 0.0769, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.024575539568345323, | |
| "grad_norm": 30.53700065612793, | |
| "learning_rate": 1.4194520824461773e-05, | |
| "loss": 0.2786, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.024633093525179856, | |
| "grad_norm": 10.179473876953125, | |
| "learning_rate": 1.4162807922604014e-05, | |
| "loss": 0.22, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.02469064748201439, | |
| "grad_norm": 20.345251083374023, | |
| "learning_rate": 1.413104429824542e-05, | |
| "loss": 1.2058, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.02474820143884892, | |
| "grad_norm": 17.933361053466797, | |
| "learning_rate": 1.4099230338415728e-05, | |
| "loss": 0.5017, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.024805755395683454, | |
| "grad_norm": 16.122068405151367, | |
| "learning_rate": 1.4067366430758004e-05, | |
| "loss": 0.4488, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.024863309352517987, | |
| "grad_norm": 3.910707712173462, | |
| "learning_rate": 1.4035452963523903e-05, | |
| "loss": 0.1475, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.024920863309352517, | |
| "grad_norm": 3.197533130645752, | |
| "learning_rate": 1.4003490325568953e-05, | |
| "loss": 0.1384, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.02497841726618705, | |
| "grad_norm": 1.7127132415771484, | |
| "learning_rate": 1.3971478906347806e-05, | |
| "loss": 0.0953, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.025035971223021584, | |
| "grad_norm": 0.6795534491539001, | |
| "learning_rate": 1.3939419095909513e-05, | |
| "loss": 0.084, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.025093525179856115, | |
| "grad_norm": 0.7117164731025696, | |
| "learning_rate": 1.3907311284892737e-05, | |
| "loss": 0.1028, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.025151079136690648, | |
| "grad_norm": 5.165547847747803, | |
| "learning_rate": 1.3875155864521031e-05, | |
| "loss": 0.1473, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.02520863309352518, | |
| "grad_norm": 1.277207374572754, | |
| "learning_rate": 1.3842953226598036e-05, | |
| "loss": 0.1054, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.025266187050359712, | |
| "grad_norm": 6.697636604309082, | |
| "learning_rate": 1.3810703763502744e-05, | |
| "loss": 0.2188, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.025323741007194246, | |
| "grad_norm": 0.8036256432533264, | |
| "learning_rate": 1.3778407868184674e-05, | |
| "loss": 0.0717, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.025381294964028776, | |
| "grad_norm": 14.702827453613281, | |
| "learning_rate": 1.3746065934159123e-05, | |
| "loss": 0.5481, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.02543884892086331, | |
| "grad_norm": 0.643853485584259, | |
| "learning_rate": 1.371367835550235e-05, | |
| "loss": 0.0649, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.025496402877697843, | |
| "grad_norm": 6.889122009277344, | |
| "learning_rate": 1.3681245526846782e-05, | |
| "loss": 0.1057, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.025553956834532373, | |
| "grad_norm": 3.3967247009277344, | |
| "learning_rate": 1.3648767843376196e-05, | |
| "loss": 0.1841, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.025611510791366907, | |
| "grad_norm": 4.166593074798584, | |
| "learning_rate": 1.3616245700820922e-05, | |
| "loss": 0.1322, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.02566906474820144, | |
| "grad_norm": 0.9764627814292908, | |
| "learning_rate": 1.3583679495453e-05, | |
| "loss": 0.0708, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.02572661870503597, | |
| "grad_norm": 8.78773307800293, | |
| "learning_rate": 1.3551069624081372e-05, | |
| "loss": 0.2444, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.025784172661870504, | |
| "grad_norm": 0.8957186341285706, | |
| "learning_rate": 1.3518416484047018e-05, | |
| "loss": 0.0952, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.025841726618705037, | |
| "grad_norm": 2.754924774169922, | |
| "learning_rate": 1.3485720473218153e-05, | |
| "loss": 0.1718, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.025899280575539568, | |
| "grad_norm": 9.26056957244873, | |
| "learning_rate": 1.3452981989985347e-05, | |
| "loss": 0.1871, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.0259568345323741, | |
| "grad_norm": 2.4848737716674805, | |
| "learning_rate": 1.342020143325669e-05, | |
| "loss": 0.0396, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.026014388489208635, | |
| "grad_norm": 3.2999818325042725, | |
| "learning_rate": 1.3387379202452917e-05, | |
| "loss": 0.1324, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.026071942446043165, | |
| "grad_norm": 13.430712699890137, | |
| "learning_rate": 1.3354515697502552e-05, | |
| "loss": 0.4404, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.0261294964028777, | |
| "grad_norm": 0.6919059753417969, | |
| "learning_rate": 1.3321611318837033e-05, | |
| "loss": 0.0747, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.02618705035971223, | |
| "grad_norm": 1.3426858186721802, | |
| "learning_rate": 1.3288666467385834e-05, | |
| "loss": 0.1142, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.026244604316546762, | |
| "grad_norm": 0.5844228863716125, | |
| "learning_rate": 1.3255681544571568e-05, | |
| "loss": 0.0768, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.026302158273381296, | |
| "grad_norm": 1.4332048892974854, | |
| "learning_rate": 1.3222656952305113e-05, | |
| "loss": 0.0896, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.026359712230215826, | |
| "grad_norm": 5.514071464538574, | |
| "learning_rate": 1.3189593092980701e-05, | |
| "loss": 0.2002, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.02641726618705036, | |
| "grad_norm": 10.462922096252441, | |
| "learning_rate": 1.3156490369471026e-05, | |
| "loss": 0.3964, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.026474820143884893, | |
| "grad_norm": 7.807409763336182, | |
| "learning_rate": 1.3123349185122328e-05, | |
| "loss": 0.2995, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.026532374100719423, | |
| "grad_norm": 2.9369232654571533, | |
| "learning_rate": 1.3090169943749475e-05, | |
| "loss": 0.1574, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.026589928057553957, | |
| "grad_norm": 2.5351967811584473, | |
| "learning_rate": 1.3056953049631059e-05, | |
| "loss": 0.0965, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.02664748201438849, | |
| "grad_norm": 1.906512975692749, | |
| "learning_rate": 1.3023698907504447e-05, | |
| "loss": 0.1145, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.02670503597122302, | |
| "grad_norm": 9.540634155273438, | |
| "learning_rate": 1.2990407922560869e-05, | |
| "loss": 0.3393, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.026762589928057554, | |
| "grad_norm": 13.937700271606445, | |
| "learning_rate": 1.2957080500440469e-05, | |
| "loss": 0.4293, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.026820143884892088, | |
| "grad_norm": 2.0209853649139404, | |
| "learning_rate": 1.2923717047227368e-05, | |
| "loss": 0.049, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.026877697841726618, | |
| "grad_norm": 8.90571117401123, | |
| "learning_rate": 1.2890317969444716e-05, | |
| "loss": 0.3679, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.02693525179856115, | |
| "grad_norm": 1.0252469778060913, | |
| "learning_rate": 1.2856883674049736e-05, | |
| "loss": 0.0684, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.026992805755395685, | |
| "grad_norm": 4.494703769683838, | |
| "learning_rate": 1.2823414568428767e-05, | |
| "loss": 0.1366, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.027050359712230215, | |
| "grad_norm": 4.636646747589111, | |
| "learning_rate": 1.2789911060392295e-05, | |
| "loss": 0.1983, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.02710791366906475, | |
| "grad_norm": 1.6157455444335938, | |
| "learning_rate": 1.2756373558169992e-05, | |
| "loss": 0.1423, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.02716546762589928, | |
| "grad_norm": 1.4866031408309937, | |
| "learning_rate": 1.2722802470405744e-05, | |
| "loss": 0.1039, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.027223021582733813, | |
| "grad_norm": 0.6129380464553833, | |
| "learning_rate": 1.2689198206152657e-05, | |
| "loss": 0.0738, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.027280575539568346, | |
| "grad_norm": 8.018013954162598, | |
| "learning_rate": 1.265556117486809e-05, | |
| "loss": 0.1237, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.027338129496402876, | |
| "grad_norm": 8.371533393859863, | |
| "learning_rate": 1.2621891786408648e-05, | |
| "loss": 0.2127, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.02739568345323741, | |
| "grad_norm": 9.338693618774414, | |
| "learning_rate": 1.2588190451025209e-05, | |
| "loss": 0.313, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.027453237410071944, | |
| "grad_norm": 2.514599323272705, | |
| "learning_rate": 1.2554457579357906e-05, | |
| "loss": 0.1236, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.027510791366906474, | |
| "grad_norm": 36.57285690307617, | |
| "learning_rate": 1.252069358243114e-05, | |
| "loss": 0.8909, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.027568345323741007, | |
| "grad_norm": 8.022867202758789, | |
| "learning_rate": 1.2486898871648552e-05, | |
| "loss": 0.243, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.02762589928057554, | |
| "grad_norm": 3.656367063522339, | |
| "learning_rate": 1.2453073858788027e-05, | |
| "loss": 0.1721, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.02768345323741007, | |
| "grad_norm": 7.827700614929199, | |
| "learning_rate": 1.2419218955996677e-05, | |
| "loss": 0.1393, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.027741007194244605, | |
| "grad_norm": 0.9252501130104065, | |
| "learning_rate": 1.238533457578581e-05, | |
| "loss": 0.091, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.027798561151079138, | |
| "grad_norm": 1.7856941223144531, | |
| "learning_rate": 1.23514211310259e-05, | |
| "loss": 0.1275, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.02785611510791367, | |
| "grad_norm": 23.890789031982422, | |
| "learning_rate": 1.2317479034941572e-05, | |
| "loss": 0.3568, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.027913669064748202, | |
| "grad_norm": 2.6963653564453125, | |
| "learning_rate": 1.2283508701106559e-05, | |
| "loss": 0.1143, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.027971223021582736, | |
| "grad_norm": 5.141092777252197, | |
| "learning_rate": 1.2249510543438652e-05, | |
| "loss": 0.2145, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.028028776978417266, | |
| "grad_norm": 4.484059810638428, | |
| "learning_rate": 1.2215484976194675e-05, | |
| "loss": 0.134, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.0280863309352518, | |
| "grad_norm": 13.956079483032227, | |
| "learning_rate": 1.2181432413965428e-05, | |
| "loss": 0.2296, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.02814388489208633, | |
| "grad_norm": 57.16883087158203, | |
| "learning_rate": 1.2147353271670634e-05, | |
| "loss": 0.3778, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.028201438848920863, | |
| "grad_norm": 0.9326462745666504, | |
| "learning_rate": 1.211324796455389e-05, | |
| "loss": 0.0922, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.028258992805755397, | |
| "grad_norm": 0.6182975172996521, | |
| "learning_rate": 1.2079116908177592e-05, | |
| "loss": 0.0514, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.028316546762589927, | |
| "grad_norm": 0.42364293336868286, | |
| "learning_rate": 1.2044960518417902e-05, | |
| "loss": 0.0181, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.02837410071942446, | |
| "grad_norm": 3.4781551361083984, | |
| "learning_rate": 1.2010779211459649e-05, | |
| "loss": 0.161, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.028431654676258994, | |
| "grad_norm": 2.329385280609131, | |
| "learning_rate": 1.1976573403791263e-05, | |
| "loss": 0.0878, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.028489208633093524, | |
| "grad_norm": 5.409578323364258, | |
| "learning_rate": 1.194234351219972e-05, | |
| "loss": 0.1646, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.028546762589928058, | |
| "grad_norm": 0.2559332847595215, | |
| "learning_rate": 1.190808995376545e-05, | |
| "loss": 0.0438, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.02860431654676259, | |
| "grad_norm": 7.3071088790893555, | |
| "learning_rate": 1.187381314585725e-05, | |
| "loss": 0.2373, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.02866187050359712, | |
| "grad_norm": 13.68780517578125, | |
| "learning_rate": 1.1839513506127202e-05, | |
| "loss": 0.5267, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.028719424460431655, | |
| "grad_norm": 14.798240661621094, | |
| "learning_rate": 1.1805191452505602e-05, | |
| "loss": 0.4246, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.02877697841726619, | |
| "grad_norm": 0.8802618980407715, | |
| "learning_rate": 1.1770847403195836e-05, | |
| "loss": 0.0126, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.02883453237410072, | |
| "grad_norm": 2.4958908557891846, | |
| "learning_rate": 1.1736481776669307e-05, | |
| "loss": 0.0668, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.028892086330935252, | |
| "grad_norm": 8.02234172821045, | |
| "learning_rate": 1.1702094991660326e-05, | |
| "loss": 0.2478, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.028949640287769786, | |
| "grad_norm": 6.714267730712891, | |
| "learning_rate": 1.1667687467161025e-05, | |
| "loss": 0.2143, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.029007194244604316, | |
| "grad_norm": 7.356170177459717, | |
| "learning_rate": 1.1633259622416224e-05, | |
| "loss": 0.2208, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.02906474820143885, | |
| "grad_norm": 4.378060340881348, | |
| "learning_rate": 1.159881187691835e-05, | |
| "loss": 0.1349, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.02912230215827338, | |
| "grad_norm": 9.72322940826416, | |
| "learning_rate": 1.156434465040231e-05, | |
| "loss": 0.277, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.029179856115107913, | |
| "grad_norm": 1.8536237478256226, | |
| "learning_rate": 1.1529858362840383e-05, | |
| "loss": 0.1448, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.029237410071942447, | |
| "grad_norm": 3.8134891986846924, | |
| "learning_rate": 1.1495353434437098e-05, | |
| "loss": 0.1221, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.029294964028776977, | |
| "grad_norm": 8.059671401977539, | |
| "learning_rate": 1.1460830285624119e-05, | |
| "loss": 0.3355, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.02935251798561151, | |
| "grad_norm": 10.443696022033691, | |
| "learning_rate": 1.1426289337055119e-05, | |
| "loss": 0.329, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.029410071942446044, | |
| "grad_norm": 1.0404391288757324, | |
| "learning_rate": 1.1391731009600655e-05, | |
| "loss": 0.0193, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.029467625899280574, | |
| "grad_norm": 9.676289558410645, | |
| "learning_rate": 1.1357155724343046e-05, | |
| "loss": 0.2689, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.029525179856115108, | |
| "grad_norm": 25.724231719970703, | |
| "learning_rate": 1.1322563902571227e-05, | |
| "loss": 0.7274, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.02958273381294964, | |
| "grad_norm": 3.297560214996338, | |
| "learning_rate": 1.128795596577563e-05, | |
| "loss": 0.1354, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.02964028776978417, | |
| "grad_norm": 7.687169551849365, | |
| "learning_rate": 1.1253332335643043e-05, | |
| "loss": 0.8631, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.029697841726618705, | |
| "grad_norm": 1.4026639461517334, | |
| "learning_rate": 1.1218693434051475e-05, | |
| "loss": 0.0832, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.02975539568345324, | |
| "grad_norm": 1.158055067062378, | |
| "learning_rate": 1.1184039683065014e-05, | |
| "loss": 0.1053, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.02981294964028777, | |
| "grad_norm": 1.325383186340332, | |
| "learning_rate": 1.1149371504928667e-05, | |
| "loss": 0.0601, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.029870503597122303, | |
| "grad_norm": 21.55183219909668, | |
| "learning_rate": 1.1114689322063255e-05, | |
| "loss": 0.4671, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.029928057553956836, | |
| "grad_norm": 5.480937480926514, | |
| "learning_rate": 1.1079993557060228e-05, | |
| "loss": 0.19, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.029985611510791366, | |
| "grad_norm": 7.440226078033447, | |
| "learning_rate": 1.1045284632676535e-05, | |
| "loss": 0.2281, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.0300431654676259, | |
| "grad_norm": 2.201996326446533, | |
| "learning_rate": 1.1010562971829464e-05, | |
| "loss": 0.1137, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.03010071942446043, | |
| "grad_norm": 2.157567024230957, | |
| "learning_rate": 1.0975828997591496e-05, | |
| "loss": 0.1234, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.030158273381294964, | |
| "grad_norm": 1.4759501218795776, | |
| "learning_rate": 1.0941083133185146e-05, | |
| "loss": 0.0717, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.030215827338129497, | |
| "grad_norm": 4.390008449554443, | |
| "learning_rate": 1.0906325801977804e-05, | |
| "loss": 0.1566, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.030273381294964027, | |
| "grad_norm": 2.1558635234832764, | |
| "learning_rate": 1.0871557427476585e-05, | |
| "loss": 0.1084, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.03033093525179856, | |
| "grad_norm": 5.547511577606201, | |
| "learning_rate": 1.083677843332316e-05, | |
| "loss": 0.2485, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.030388489208633095, | |
| "grad_norm": 1.3841583728790283, | |
| "learning_rate": 1.0801989243288588e-05, | |
| "loss": 0.0932, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.030446043165467625, | |
| "grad_norm": 3.9912872314453125, | |
| "learning_rate": 1.0767190281268187e-05, | |
| "loss": 0.1574, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.03050359712230216, | |
| "grad_norm": 0.8950588703155518, | |
| "learning_rate": 1.0732381971276318e-05, | |
| "loss": 0.0841, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.030561151079136692, | |
| "grad_norm": 1.7501403093338013, | |
| "learning_rate": 1.0697564737441254e-05, | |
| "loss": 0.1257, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.030618705035971222, | |
| "grad_norm": 3.171156406402588, | |
| "learning_rate": 1.0662739004000005e-05, | |
| "loss": 0.1247, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.030676258992805756, | |
| "grad_norm": 8.79008960723877, | |
| "learning_rate": 1.0627905195293135e-05, | |
| "loss": 0.4406, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.03073381294964029, | |
| "grad_norm": 0.8652887344360352, | |
| "learning_rate": 1.0593063735759619e-05, | |
| "loss": 0.0557, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.03079136690647482, | |
| "grad_norm": 2.49337100982666, | |
| "learning_rate": 1.055821504993164e-05, | |
| "loss": 0.0957, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.030848920863309353, | |
| "grad_norm": 0.9073213934898376, | |
| "learning_rate": 1.0523359562429441e-05, | |
| "loss": 0.081, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.030906474820143887, | |
| "grad_norm": 2.930410385131836, | |
| "learning_rate": 1.0488497697956134e-05, | |
| "loss": 0.1306, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.030964028776978417, | |
| "grad_norm": 15.254547119140625, | |
| "learning_rate": 1.0453629881292537e-05, | |
| "loss": 0.5873, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.03102158273381295, | |
| "grad_norm": 9.663077354431152, | |
| "learning_rate": 1.0418756537291996e-05, | |
| "loss": 0.255, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.03107913669064748, | |
| "grad_norm": 0.9969314932823181, | |
| "learning_rate": 1.03838780908752e-05, | |
| "loss": 0.0821, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.031136690647482014, | |
| "grad_norm": 8.922150611877441, | |
| "learning_rate": 1.0348994967025012e-05, | |
| "loss": 0.2661, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.031194244604316548, | |
| "grad_norm": 1.0743968486785889, | |
| "learning_rate": 1.0314107590781284e-05, | |
| "loss": 0.1089, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.03125179856115108, | |
| "grad_norm": 4.709200382232666, | |
| "learning_rate": 1.0279216387235691e-05, | |
| "loss": 0.158, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.03130935251798561, | |
| "grad_norm": 6.3187947273254395, | |
| "learning_rate": 1.0244321781526533e-05, | |
| "loss": 0.2659, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.03136690647482014, | |
| "grad_norm": 0.4948660433292389, | |
| "learning_rate": 1.0209424198833571e-05, | |
| "loss": 0.0685, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.03142446043165468, | |
| "grad_norm": 1.0766798257827759, | |
| "learning_rate": 1.0174524064372837e-05, | |
| "loss": 0.0989, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.03148201438848921, | |
| "grad_norm": 4.577028751373291, | |
| "learning_rate": 1.0139621803391454e-05, | |
| "loss": 0.177, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.03153956834532374, | |
| "grad_norm": 0.9121817946434021, | |
| "learning_rate": 1.010471784116246e-05, | |
| "loss": 0.086, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.031597122302158276, | |
| "grad_norm": 8.214136123657227, | |
| "learning_rate": 1.0069812602979617e-05, | |
| "loss": 0.2782, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.031654676258992806, | |
| "grad_norm": 0.6466065049171448, | |
| "learning_rate": 1.0034906514152239e-05, | |
| "loss": 0.0647, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.031712230215827336, | |
| "grad_norm": 2.8535683155059814, | |
| "learning_rate": 1e-05, | |
| "loss": 0.1092, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.03176978417266187, | |
| "grad_norm": 0.7769191861152649, | |
| "learning_rate": 9.965093485847766e-06, | |
| "loss": 0.0612, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.0318273381294964, | |
| "grad_norm": 1.8424514532089233, | |
| "learning_rate": 9.930187397020385e-06, | |
| "loss": 0.1221, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.031884892086330933, | |
| "grad_norm": 1.2432526350021362, | |
| "learning_rate": 9.895282158837545e-06, | |
| "loss": 0.0838, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.03194244604316547, | |
| "grad_norm": 4.903514862060547, | |
| "learning_rate": 9.860378196608549e-06, | |
| "loss": 0.1696, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.032, | |
| "grad_norm": 0.8313055038452148, | |
| "learning_rate": 9.825475935627165e-06, | |
| "loss": 0.0785, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.03205755395683453, | |
| "grad_norm": 0.8260055780410767, | |
| "learning_rate": 9.790575801166432e-06, | |
| "loss": 0.0783, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.03211510791366907, | |
| "grad_norm": 15.278909683227539, | |
| "learning_rate": 9.75567821847347e-06, | |
| "loss": 0.4247, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.0321726618705036, | |
| "grad_norm": 1.1578339338302612, | |
| "learning_rate": 9.720783612764314e-06, | |
| "loss": 0.0904, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.03223021582733813, | |
| "grad_norm": 2.438309669494629, | |
| "learning_rate": 9.685892409218718e-06, | |
| "loss": 0.1525, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.032287769784172665, | |
| "grad_norm": 7.4440693855285645, | |
| "learning_rate": 9.651005032974994e-06, | |
| "loss": 0.2666, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.032345323741007195, | |
| "grad_norm": 1.554640293121338, | |
| "learning_rate": 9.616121909124801e-06, | |
| "loss": 0.0693, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.032402877697841725, | |
| "grad_norm": 3.4629485607147217, | |
| "learning_rate": 9.581243462708007e-06, | |
| "loss": 0.1241, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.032460431654676256, | |
| "grad_norm": 1.3280606269836426, | |
| "learning_rate": 9.546370118707463e-06, | |
| "loss": 0.0894, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.03251798561151079, | |
| "grad_norm": 3.824627637863159, | |
| "learning_rate": 9.511502302043867e-06, | |
| "loss": 0.1586, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.03257553956834532, | |
| "grad_norm": 9.243664741516113, | |
| "learning_rate": 9.476640437570562e-06, | |
| "loss": 0.258, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.03263309352517985, | |
| "grad_norm": 3.4505608081817627, | |
| "learning_rate": 9.441784950068362e-06, | |
| "loss": 0.134, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.03269064748201439, | |
| "grad_norm": 13.31429672241211, | |
| "learning_rate": 9.406936264240386e-06, | |
| "loss": 0.4339, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.03274820143884892, | |
| "grad_norm": 0.4915928542613983, | |
| "learning_rate": 9.372094804706867e-06, | |
| "loss": 0.0526, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.03280575539568345, | |
| "grad_norm": 9.329924583435059, | |
| "learning_rate": 9.337260996000002e-06, | |
| "loss": 0.1747, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.03286330935251799, | |
| "grad_norm": 1.121419906616211, | |
| "learning_rate": 9.302435262558748e-06, | |
| "loss": 0.0665, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.03292086330935252, | |
| "grad_norm": 1.8707423210144043, | |
| "learning_rate": 9.267618028723687e-06, | |
| "loss": 0.1144, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.03297841726618705, | |
| "grad_norm": 0.3859129548072815, | |
| "learning_rate": 9.232809718731815e-06, | |
| "loss": 0.0301, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.033035971223021585, | |
| "grad_norm": 0.4926592707633972, | |
| "learning_rate": 9.198010756711413e-06, | |
| "loss": 0.0795, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.033093525179856115, | |
| "grad_norm": 7.28505802154541, | |
| "learning_rate": 9.163221566676847e-06, | |
| "loss": 0.1051, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.033151079136690645, | |
| "grad_norm": 10.257981300354004, | |
| "learning_rate": 9.128442572523418e-06, | |
| "loss": 0.4004, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.03320863309352518, | |
| "grad_norm": 5.706249713897705, | |
| "learning_rate": 9.093674198022201e-06, | |
| "loss": 0.2009, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.03326618705035971, | |
| "grad_norm": 3.457526922225952, | |
| "learning_rate": 9.058916866814857e-06, | |
| "loss": 0.1433, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.03332374100719424, | |
| "grad_norm": 6.841573715209961, | |
| "learning_rate": 9.024171002408507e-06, | |
| "loss": 0.5298, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.03338129496402878, | |
| "grad_norm": 1.4832878112792969, | |
| "learning_rate": 8.989437028170537e-06, | |
| "loss": 0.0853, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.03343884892086331, | |
| "grad_norm": 11.173535346984863, | |
| "learning_rate": 8.954715367323468e-06, | |
| "loss": 0.2538, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.03349640287769784, | |
| "grad_norm": 0.7396141290664673, | |
| "learning_rate": 8.920006442939772e-06, | |
| "loss": 0.025, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.03355395683453238, | |
| "grad_norm": 4.9781293869018555, | |
| "learning_rate": 8.885310677936746e-06, | |
| "loss": 0.1923, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.03361151079136691, | |
| "grad_norm": 0.557285726070404, | |
| "learning_rate": 8.850628495071336e-06, | |
| "loss": 0.0087, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.03366906474820144, | |
| "grad_norm": 11.92111873626709, | |
| "learning_rate": 8.815960316934991e-06, | |
| "loss": 0.2586, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.033726618705035974, | |
| "grad_norm": 1.8631489276885986, | |
| "learning_rate": 8.781306565948528e-06, | |
| "loss": 0.1197, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.033784172661870504, | |
| "grad_norm": 1.890995740890503, | |
| "learning_rate": 8.746667664356957e-06, | |
| "loss": 0.1011, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.033841726618705034, | |
| "grad_norm": 0.8469424247741699, | |
| "learning_rate": 8.712044034224374e-06, | |
| "loss": 0.0862, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.03389928057553957, | |
| "grad_norm": 0.5587536096572876, | |
| "learning_rate": 8.677436097428775e-06, | |
| "loss": 0.0414, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.0339568345323741, | |
| "grad_norm": 14.286009788513184, | |
| "learning_rate": 8.642844275656957e-06, | |
| "loss": 0.2033, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.03401438848920863, | |
| "grad_norm": 0.828925371170044, | |
| "learning_rate": 8.60826899039935e-06, | |
| "loss": 0.0879, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.03407194244604317, | |
| "grad_norm": 2.5899553298950195, | |
| "learning_rate": 8.573710662944884e-06, | |
| "loss": 0.1128, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.0341294964028777, | |
| "grad_norm": 0.7016512751579285, | |
| "learning_rate": 8.539169714375885e-06, | |
| "loss": 0.0559, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.03418705035971223, | |
| "grad_norm": 0.7938787937164307, | |
| "learning_rate": 8.504646565562907e-06, | |
| "loss": 0.0751, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.034244604316546766, | |
| "grad_norm": 4.789397716522217, | |
| "learning_rate": 8.47014163715962e-06, | |
| "loss": 0.1729, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.034302158273381296, | |
| "grad_norm": 8.780428886413574, | |
| "learning_rate": 8.43565534959769e-06, | |
| "loss": 0.2045, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.034359712230215826, | |
| "grad_norm": 1.7636146545410156, | |
| "learning_rate": 8.401188123081653e-06, | |
| "loss": 0.0915, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.034417266187050356, | |
| "grad_norm": 4.673388957977295, | |
| "learning_rate": 8.366740377583781e-06, | |
| "loss": 0.1396, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.03447482014388489, | |
| "grad_norm": 10.594704627990723, | |
| "learning_rate": 8.332312532838978e-06, | |
| "loss": 0.2385, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.034532374100719423, | |
| "grad_norm": 0.38678157329559326, | |
| "learning_rate": 8.297905008339677e-06, | |
| "loss": 0.0511, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.034589928057553954, | |
| "grad_norm": 2.4528400897979736, | |
| "learning_rate": 8.263518223330698e-06, | |
| "loss": 0.1191, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.03464748201438849, | |
| "grad_norm": 9.87134075164795, | |
| "learning_rate": 8.22915259680417e-06, | |
| "loss": 0.3747, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.03470503597122302, | |
| "grad_norm": 2.0023324489593506, | |
| "learning_rate": 8.194808547494401e-06, | |
| "loss": 0.0745, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.03476258992805755, | |
| "grad_norm": 14.806257247924805, | |
| "learning_rate": 8.1604864938728e-06, | |
| "loss": 0.5362, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.03482014388489209, | |
| "grad_norm": 4.73097562789917, | |
| "learning_rate": 8.126186854142752e-06, | |
| "loss": 0.1718, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.03487769784172662, | |
| "grad_norm": 1.0992658138275146, | |
| "learning_rate": 8.091910046234552e-06, | |
| "loss": 0.0868, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.03493525179856115, | |
| "grad_norm": 0.5213341116905212, | |
| "learning_rate": 8.057656487800283e-06, | |
| "loss": 0.0579, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.034992805755395685, | |
| "grad_norm": 0.8178665637969971, | |
| "learning_rate": 8.023426596208739e-06, | |
| "loss": 0.0724, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.035050359712230215, | |
| "grad_norm": 2.403118133544922, | |
| "learning_rate": 7.989220788540356e-06, | |
| "loss": 0.1376, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.035107913669064746, | |
| "grad_norm": 6.1761579513549805, | |
| "learning_rate": 7.955039481582098e-06, | |
| "loss": 0.2501, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.03516546762589928, | |
| "grad_norm": 1.1938904523849487, | |
| "learning_rate": 7.92088309182241e-06, | |
| "loss": 0.1253, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.03522302158273381, | |
| "grad_norm": 1.653713345527649, | |
| "learning_rate": 7.886752035446116e-06, | |
| "loss": 0.0934, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.03528057553956834, | |
| "grad_norm": 0.9503543972969055, | |
| "learning_rate": 7.852646728329368e-06, | |
| "loss": 0.0567, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.03533812949640288, | |
| "grad_norm": 4.422244548797607, | |
| "learning_rate": 7.818567586034578e-06, | |
| "loss": 0.1249, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.03539568345323741, | |
| "grad_norm": 3.623196840286255, | |
| "learning_rate": 7.784515023805328e-06, | |
| "loss": 0.1158, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.03545323741007194, | |
| "grad_norm": 9.297059059143066, | |
| "learning_rate": 7.750489456561351e-06, | |
| "loss": 0.2269, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.03551079136690648, | |
| "grad_norm": 4.102265357971191, | |
| "learning_rate": 7.716491298893443e-06, | |
| "loss": 0.1605, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.03556834532374101, | |
| "grad_norm": 1.0876208543777466, | |
| "learning_rate": 7.68252096505843e-06, | |
| "loss": 0.0712, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.03562589928057554, | |
| "grad_norm": 3.1616358757019043, | |
| "learning_rate": 7.6485788689741e-06, | |
| "loss": 0.1216, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.035683453237410075, | |
| "grad_norm": 1.6394915580749512, | |
| "learning_rate": 7.6146654242141935e-06, | |
| "loss": 0.1088, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.035741007194244605, | |
| "grad_norm": 0.6849850416183472, | |
| "learning_rate": 7.580781044003324e-06, | |
| "loss": 0.0639, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.035798561151079135, | |
| "grad_norm": 0.7542056441307068, | |
| "learning_rate": 7.546926141211975e-06, | |
| "loss": 0.0541, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.03585611510791367, | |
| "grad_norm": 1.4969114065170288, | |
| "learning_rate": 7.513101128351454e-06, | |
| "loss": 0.0822, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.0359136690647482, | |
| "grad_norm": 0.6035341620445251, | |
| "learning_rate": 7.4793064175688635e-06, | |
| "loss": 0.0831, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.03597122302158273, | |
| "grad_norm": 9.536357879638672, | |
| "learning_rate": 7.445542420642097e-06, | |
| "loss": 0.297, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.03602877697841727, | |
| "grad_norm": 10.490472793579102, | |
| "learning_rate": 7.411809548974792e-06, | |
| "loss": 0.2122, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.0360863309352518, | |
| "grad_norm": 2.7282614707946777, | |
| "learning_rate": 7.378108213591355e-06, | |
| "loss": 0.0946, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.03614388489208633, | |
| "grad_norm": 4.524618625640869, | |
| "learning_rate": 7.344438825131912e-06, | |
| "loss": 0.1423, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.03620143884892087, | |
| "grad_norm": 7.327822685241699, | |
| "learning_rate": 7.310801793847344e-06, | |
| "loss": 0.2327, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.0362589928057554, | |
| "grad_norm": 2.4597315788269043, | |
| "learning_rate": 7.277197529594257e-06, | |
| "loss": 0.1116, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.03631654676258993, | |
| "grad_norm": 8.071950912475586, | |
| "learning_rate": 7.243626441830009e-06, | |
| "loss": 0.1441, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.03637410071942446, | |
| "grad_norm": 0.2197929471731186, | |
| "learning_rate": 7.210088939607709e-06, | |
| "loss": 0.0039, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.036431654676258994, | |
| "grad_norm": 1.8408621549606323, | |
| "learning_rate": 7.176585431571235e-06, | |
| "loss": 0.0781, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.036489208633093524, | |
| "grad_norm": 9.146245002746582, | |
| "learning_rate": 7.143116325950266e-06, | |
| "loss": 0.2037, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.036546762589928054, | |
| "grad_norm": 4.6287922859191895, | |
| "learning_rate": 7.109682030555283e-06, | |
| "loss": 0.1195, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.03660431654676259, | |
| "grad_norm": 1.386813998222351, | |
| "learning_rate": 7.076282952772634e-06, | |
| "loss": 0.124, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.03666187050359712, | |
| "grad_norm": 0.7986965775489807, | |
| "learning_rate": 7.042919499559538e-06, | |
| "loss": 0.0724, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.03671942446043165, | |
| "grad_norm": 2.6151773929595947, | |
| "learning_rate": 7.009592077439135e-06, | |
| "loss": 0.0991, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.03677697841726619, | |
| "grad_norm": 9.886137962341309, | |
| "learning_rate": 6.976301092495556e-06, | |
| "loss": 0.2132, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.03683453237410072, | |
| "grad_norm": 1.0213325023651123, | |
| "learning_rate": 6.943046950368944e-06, | |
| "loss": 0.0824, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.03689208633093525, | |
| "grad_norm": 5.319864273071289, | |
| "learning_rate": 6.909830056250527e-06, | |
| "loss": 0.1426, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.036949640287769786, | |
| "grad_norm": 11.022806167602539, | |
| "learning_rate": 6.876650814877675e-06, | |
| "loss": 0.3286, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.037007194244604316, | |
| "grad_norm": 7.383906841278076, | |
| "learning_rate": 6.843509630528977e-06, | |
| "loss": 0.1446, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.037064748201438846, | |
| "grad_norm": 1.5146830081939697, | |
| "learning_rate": 6.8104069070193e-06, | |
| "loss": 0.0942, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.03712230215827338, | |
| "grad_norm": 18.990802764892578, | |
| "learning_rate": 6.777343047694891e-06, | |
| "loss": 0.2854, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.037179856115107913, | |
| "grad_norm": 2.318795680999756, | |
| "learning_rate": 6.744318455428436e-06, | |
| "loss": 0.0937, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.037237410071942444, | |
| "grad_norm": 0.372890830039978, | |
| "learning_rate": 6.711333532614168e-06, | |
| "loss": 0.0548, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.03729496402877698, | |
| "grad_norm": 2.1778178215026855, | |
| "learning_rate": 6.67838868116297e-06, | |
| "loss": 0.0613, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.03735251798561151, | |
| "grad_norm": 7.633350372314453, | |
| "learning_rate": 6.645484302497452e-06, | |
| "loss": 0.2113, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.03741007194244604, | |
| "grad_norm": 2.2798593044281006, | |
| "learning_rate": 6.612620797547087e-06, | |
| "loss": 0.1007, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.03746762589928058, | |
| "grad_norm": 1.3645232915878296, | |
| "learning_rate": 6.579798566743314e-06, | |
| "loss": 0.1054, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.03752517985611511, | |
| "grad_norm": 3.271883249282837, | |
| "learning_rate": 6.547018010014654e-06, | |
| "loss": 0.1515, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.03758273381294964, | |
| "grad_norm": 10.690248489379883, | |
| "learning_rate": 6.5142795267818505e-06, | |
| "loss": 0.2535, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.037640287769784175, | |
| "grad_norm": 7.398156642913818, | |
| "learning_rate": 6.481583515952983e-06, | |
| "loss": 0.1515, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.037697841726618705, | |
| "grad_norm": 5.868314266204834, | |
| "learning_rate": 6.448930375918632e-06, | |
| "loss": 0.114, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.037755395683453236, | |
| "grad_norm": 7.998626708984375, | |
| "learning_rate": 6.4163205045469975e-06, | |
| "loss": 0.1563, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.03781294964028777, | |
| "grad_norm": 13.154156684875488, | |
| "learning_rate": 6.383754299179079e-06, | |
| "loss": 0.5158, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.0378705035971223, | |
| "grad_norm": 1.3445124626159668, | |
| "learning_rate": 6.351232156623803e-06, | |
| "loss": 0.1183, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.03792805755395683, | |
| "grad_norm": 1.747786521911621, | |
| "learning_rate": 6.318754473153221e-06, | |
| "loss": 0.0859, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.03798561151079137, | |
| "grad_norm": 0.8909703493118286, | |
| "learning_rate": 6.286321644497655e-06, | |
| "loss": 0.0959, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.0380431654676259, | |
| "grad_norm": 2.6019179821014404, | |
| "learning_rate": 6.25393406584088e-06, | |
| "loss": 0.1215, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.03810071942446043, | |
| "grad_norm": 0.6137844324111938, | |
| "learning_rate": 6.22159213181533e-06, | |
| "loss": 0.0674, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.03815827338129497, | |
| "grad_norm": 5.27604341506958, | |
| "learning_rate": 6.18929623649726e-06, | |
| "loss": 0.1627, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.0382158273381295, | |
| "grad_norm": 1.312740445137024, | |
| "learning_rate": 6.157046773401964e-06, | |
| "loss": 0.0954, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.03827338129496403, | |
| "grad_norm": 10.17847728729248, | |
| "learning_rate": 6.124844135478971e-06, | |
| "loss": 0.2274, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.03833093525179856, | |
| "grad_norm": 2.691664457321167, | |
| "learning_rate": 6.092688715107265e-06, | |
| "loss": 0.0243, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.038388489208633095, | |
| "grad_norm": 5.8143229484558105, | |
| "learning_rate": 6.06058090409049e-06, | |
| "loss": 0.1523, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.038446043165467625, | |
| "grad_norm": 23.19382095336914, | |
| "learning_rate": 6.028521093652195e-06, | |
| "loss": 0.4124, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.038503597122302155, | |
| "grad_norm": 9.377325057983398, | |
| "learning_rate": 5.996509674431053e-06, | |
| "loss": 0.2113, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.03856115107913669, | |
| "grad_norm": 12.410694122314453, | |
| "learning_rate": 5.9645470364761e-06, | |
| "loss": 0.2557, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.03861870503597122, | |
| "grad_norm": 7.3218865394592285, | |
| "learning_rate": 5.932633569242e-06, | |
| "loss": 0.1555, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.03867625899280575, | |
| "grad_norm": 5.622396945953369, | |
| "learning_rate": 5.900769661584273e-06, | |
| "loss": 0.0826, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.03873381294964029, | |
| "grad_norm": 5.398272514343262, | |
| "learning_rate": 5.868955701754584e-06, | |
| "loss": 0.1456, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.03879136690647482, | |
| "grad_norm": 0.751691460609436, | |
| "learning_rate": 5.83719207739599e-06, | |
| "loss": 0.0772, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.03884892086330935, | |
| "grad_norm": 0.6720765233039856, | |
| "learning_rate": 5.8054791755382286e-06, | |
| "loss": 0.0869, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.03890647482014389, | |
| "grad_norm": 1.0747441053390503, | |
| "learning_rate": 5.773817382593008e-06, | |
| "loss": 0.072, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.03896402877697842, | |
| "grad_norm": 10.877379417419434, | |
| "learning_rate": 5.742207084349274e-06, | |
| "loss": 0.2319, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.03902158273381295, | |
| "grad_norm": 1.0769011974334717, | |
| "learning_rate": 5.710648665968543e-06, | |
| "loss": 0.072, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.039079136690647484, | |
| "grad_norm": 17.446186065673828, | |
| "learning_rate": 5.679142511980176e-06, | |
| "loss": 0.3885, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.039136690647482014, | |
| "grad_norm": 0.5651721358299255, | |
| "learning_rate": 5.647689006276727e-06, | |
| "loss": 0.0675, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.039194244604316544, | |
| "grad_norm": 12.628124237060547, | |
| "learning_rate": 5.616288532109225e-06, | |
| "loss": 0.5707, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.03925179856115108, | |
| "grad_norm": 1.9146260023117065, | |
| "learning_rate": 5.584941472082549e-06, | |
| "loss": 0.0913, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.03930935251798561, | |
| "grad_norm": 13.525870323181152, | |
| "learning_rate": 5.553648208150728e-06, | |
| "loss": 0.4603, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.03936690647482014, | |
| "grad_norm": 15.693315505981445, | |
| "learning_rate": 5.522409121612304e-06, | |
| "loss": 0.1253, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.03942446043165468, | |
| "grad_norm": 2.555248975753784, | |
| "learning_rate": 5.491224593105695e-06, | |
| "loss": 0.065, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.03948201438848921, | |
| "grad_norm": 13.866741180419922, | |
| "learning_rate": 5.460095002604533e-06, | |
| "loss": 0.2646, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.03953956834532374, | |
| "grad_norm": 1.9787272214889526, | |
| "learning_rate": 5.429020729413062e-06, | |
| "loss": 0.0937, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.039597122302158276, | |
| "grad_norm": 0.8634059429168701, | |
| "learning_rate": 5.398002152161484e-06, | |
| "loss": 0.0827, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.039654676258992806, | |
| "grad_norm": 7.688801288604736, | |
| "learning_rate": 5.367039648801386e-06, | |
| "loss": 0.187, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.039712230215827336, | |
| "grad_norm": 3.1192588806152344, | |
| "learning_rate": 5.336133596601089e-06, | |
| "loss": 0.1103, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.03976978417266187, | |
| "grad_norm": 0.4575349688529968, | |
| "learning_rate": 5.305284372141095e-06, | |
| "loss": 0.0628, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.039827338129496404, | |
| "grad_norm": 3.0432817935943604, | |
| "learning_rate": 5.274492351309462e-06, | |
| "loss": 0.0893, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.039884892086330934, | |
| "grad_norm": 7.722862243652344, | |
| "learning_rate": 5.243757909297247e-06, | |
| "loss": 0.1858, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.03994244604316547, | |
| "grad_norm": 1.459063172340393, | |
| "learning_rate": 5.213081420593933e-06, | |
| "loss": 0.0857, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 2.9894986152648926, | |
| "learning_rate": 5.1824632589828465e-06, | |
| "loss": 0.1136, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.04005755395683453, | |
| "grad_norm": 0.5634929537773132, | |
| "learning_rate": 5.151903797536631e-06, | |
| "loss": 0.0685, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.04011510791366907, | |
| "grad_norm": 4.808594703674316, | |
| "learning_rate": 5.121403408612672e-06, | |
| "loss": 0.1148, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.0401726618705036, | |
| "grad_norm": 0.5084381103515625, | |
| "learning_rate": 5.090962463848592e-06, | |
| "loss": 0.0665, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.04023021582733813, | |
| "grad_norm": 11.494372367858887, | |
| "learning_rate": 5.060581334157693e-06, | |
| "loss": 0.212, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.04028776978417266, | |
| "grad_norm": 13.936042785644531, | |
| "learning_rate": 5.030260389724447e-06, | |
| "loss": 0.403, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.040345323741007195, | |
| "grad_norm": 4.563905239105225, | |
| "learning_rate": 5.000000000000003e-06, | |
| "loss": 0.2219, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.040402877697841726, | |
| "grad_norm": 1.8324753046035767, | |
| "learning_rate": 4.96980053369765e-06, | |
| "loss": 0.0575, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.040460431654676256, | |
| "grad_norm": 0.6643631458282471, | |
| "learning_rate": 4.939662358788364e-06, | |
| "loss": 0.0747, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.04051798561151079, | |
| "grad_norm": 0.9275362491607666, | |
| "learning_rate": 4.909585842496287e-06, | |
| "loss": 0.0876, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.04057553956834532, | |
| "grad_norm": 13.703351020812988, | |
| "learning_rate": 4.879571351294287e-06, | |
| "loss": 0.3226, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.04063309352517985, | |
| "grad_norm": 0.6237985491752625, | |
| "learning_rate": 4.849619250899458e-06, | |
| "loss": 0.0687, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.04069064748201439, | |
| "grad_norm": 0.9500293135643005, | |
| "learning_rate": 4.8197299062687e-06, | |
| "loss": 0.0092, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.04074820143884892, | |
| "grad_norm": 31.131847381591797, | |
| "learning_rate": 4.78990368159424e-06, | |
| "loss": 0.7294, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.04080575539568345, | |
| "grad_norm": 2.998800277709961, | |
| "learning_rate": 4.76014094029921e-06, | |
| "loss": 0.0865, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.04086330935251799, | |
| "grad_norm": 2.464053153991699, | |
| "learning_rate": 4.7304420450332244e-06, | |
| "loss": 0.0947, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.04092086330935252, | |
| "grad_norm": 5.868266582489014, | |
| "learning_rate": 4.700807357667953e-06, | |
| "loss": 0.1665, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.04097841726618705, | |
| "grad_norm": 0.5691702365875244, | |
| "learning_rate": 4.671237239292699e-06, | |
| "loss": 0.0655, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.041035971223021585, | |
| "grad_norm": 2.4520723819732666, | |
| "learning_rate": 4.641732050210032e-06, | |
| "loss": 0.1552, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.041093525179856115, | |
| "grad_norm": 0.49852254986763, | |
| "learning_rate": 4.612292149931369e-06, | |
| "loss": 0.0324, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.041151079136690645, | |
| "grad_norm": 12.27933120727539, | |
| "learning_rate": 4.582917897172603e-06, | |
| "loss": 0.1786, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.04120863309352518, | |
| "grad_norm": 5.295310020446777, | |
| "learning_rate": 4.5536096498497295e-06, | |
| "loss": 0.0978, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.04126618705035971, | |
| "grad_norm": 0.554686427116394, | |
| "learning_rate": 4.524367765074499e-06, | |
| "loss": 0.0519, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.04132374100719424, | |
| "grad_norm": 0.8725594282150269, | |
| "learning_rate": 4.495192599150045e-06, | |
| "loss": 0.037, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.04138129496402878, | |
| "grad_norm": 1.3279333114624023, | |
| "learning_rate": 4.46608450756656e-06, | |
| "loss": 0.0527, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.04143884892086331, | |
| "grad_norm": 7.157567501068115, | |
| "learning_rate": 4.437043844996952e-06, | |
| "loss": 0.1266, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.04149640287769784, | |
| "grad_norm": 1.4394761323928833, | |
| "learning_rate": 4.408070965292534e-06, | |
| "loss": 0.1102, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.04155395683453238, | |
| "grad_norm": 1.952061653137207, | |
| "learning_rate": 4.379166221478697e-06, | |
| "loss": 0.1155, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.04161151079136691, | |
| "grad_norm": 3.076018810272217, | |
| "learning_rate": 4.350329965750622e-06, | |
| "loss": 0.0957, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.04166906474820144, | |
| "grad_norm": 5.298429012298584, | |
| "learning_rate": 4.321562549468991e-06, | |
| "loss": 0.0811, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.041726618705035974, | |
| "grad_norm": 1.0459011793136597, | |
| "learning_rate": 4.292864323155684e-06, | |
| "loss": 0.0744, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.041784172661870504, | |
| "grad_norm": 3.071275234222412, | |
| "learning_rate": 4.264235636489542e-06, | |
| "loss": 0.1764, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.041841726618705034, | |
| "grad_norm": 0.6313245296478271, | |
| "learning_rate": 4.235676838302069e-06, | |
| "loss": 0.0659, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.04189928057553957, | |
| "grad_norm": 0.4360724687576294, | |
| "learning_rate": 4.207188276573214e-06, | |
| "loss": 0.0432, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.0419568345323741, | |
| "grad_norm": 1.621596097946167, | |
| "learning_rate": 4.178770298427107e-06, | |
| "loss": 0.0963, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.04201438848920863, | |
| "grad_norm": 7.333670139312744, | |
| "learning_rate": 4.150423250127846e-06, | |
| "loss": 0.1117, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.04207194244604317, | |
| "grad_norm": 9.378557205200195, | |
| "learning_rate": 4.12214747707527e-06, | |
| "loss": 0.1928, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.0421294964028777, | |
| "grad_norm": 7.514365196228027, | |
| "learning_rate": 4.093943323800746e-06, | |
| "loss": 0.1798, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.04218705035971223, | |
| "grad_norm": 3.0120790004730225, | |
| "learning_rate": 4.065811133962987e-06, | |
| "loss": 0.1246, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.04224460431654676, | |
| "grad_norm": 1.4734376668930054, | |
| "learning_rate": 4.037751250343841e-06, | |
| "loss": 0.0872, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.042302158273381296, | |
| "grad_norm": 0.6325392723083496, | |
| "learning_rate": 4.009764014844143e-06, | |
| "loss": 0.0912, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.042359712230215826, | |
| "grad_norm": 2.082659959793091, | |
| "learning_rate": 3.981849768479516e-06, | |
| "loss": 0.1113, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.042417266187050356, | |
| "grad_norm": 3.347140312194824, | |
| "learning_rate": 3.954008851376252e-06, | |
| "loss": 0.1088, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.042474820143884894, | |
| "grad_norm": 0.7007344365119934, | |
| "learning_rate": 3.9262416027671354e-06, | |
| "loss": 0.0753, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.042532374100719424, | |
| "grad_norm": 6.338494300842285, | |
| "learning_rate": 3.898548360987325e-06, | |
| "loss": 0.1733, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.042589928057553954, | |
| "grad_norm": 7.861074924468994, | |
| "learning_rate": 3.8709294634702374e-06, | |
| "loss": 0.1722, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.04264748201438849, | |
| "grad_norm": 1.3125121593475342, | |
| "learning_rate": 3.8433852467434175e-06, | |
| "loss": 0.0739, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.04270503597122302, | |
| "grad_norm": 2.1111292839050293, | |
| "learning_rate": 3.81591604642446e-06, | |
| "loss": 0.1041, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.04276258992805755, | |
| "grad_norm": 2.2051801681518555, | |
| "learning_rate": 3.7885221972168974e-06, | |
| "loss": 0.0826, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.04282014388489209, | |
| "grad_norm": 4.703516960144043, | |
| "learning_rate": 3.7612040329061405e-06, | |
| "loss": 0.1218, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.04287769784172662, | |
| "grad_norm": 0.9855811595916748, | |
| "learning_rate": 3.7339618863553983e-06, | |
| "loss": 0.0765, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.04293525179856115, | |
| "grad_norm": 0.4522510766983032, | |
| "learning_rate": 3.7067960895016277e-06, | |
| "loss": 0.0639, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.042992805755395685, | |
| "grad_norm": 0.5402107834815979, | |
| "learning_rate": 3.679706973351491e-06, | |
| "loss": 0.0463, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.043050359712230216, | |
| "grad_norm": 8.379833221435547, | |
| "learning_rate": 3.6526948679773256e-06, | |
| "loss": 0.2167, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.043107913669064746, | |
| "grad_norm": 0.9145704507827759, | |
| "learning_rate": 3.625760102513103e-06, | |
| "loss": 0.098, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.04316546762589928, | |
| "grad_norm": 1.1772428750991821, | |
| "learning_rate": 3.598903005150444e-06, | |
| "loss": 0.0908, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.04322302158273381, | |
| "grad_norm": 1.089911699295044, | |
| "learning_rate": 3.5721239031346067e-06, | |
| "loss": 0.0803, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.04328057553956834, | |
| "grad_norm": 0.725675642490387, | |
| "learning_rate": 3.545423122760493e-06, | |
| "loss": 0.0763, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.04333812949640288, | |
| "grad_norm": 1.3648688793182373, | |
| "learning_rate": 3.5188009893686916e-06, | |
| "loss": 0.0807, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.04339568345323741, | |
| "grad_norm": 5.510678768157959, | |
| "learning_rate": 3.492257827341492e-06, | |
| "loss": 0.1315, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.04345323741007194, | |
| "grad_norm": 1.459609031677246, | |
| "learning_rate": 3.4657939600989453e-06, | |
| "loss": 0.1219, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.04351079136690648, | |
| "grad_norm": 8.776249885559082, | |
| "learning_rate": 3.4394097100949286e-06, | |
| "loss": 0.1991, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.04356834532374101, | |
| "grad_norm": 2.417057514190674, | |
| "learning_rate": 3.4131053988131947e-06, | |
| "loss": 0.0998, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.04362589928057554, | |
| "grad_norm": 12.036551475524902, | |
| "learning_rate": 3.3868813467634833e-06, | |
| "loss": 0.2524, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.043683453237410075, | |
| "grad_norm": 14.389498710632324, | |
| "learning_rate": 3.360737873477584e-06, | |
| "loss": 0.3022, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.043741007194244605, | |
| "grad_norm": 12.136941909790039, | |
| "learning_rate": 3.3346752975054763e-06, | |
| "loss": 0.2324, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.043798561151079135, | |
| "grad_norm": 5.893527507781982, | |
| "learning_rate": 3.308693936411421e-06, | |
| "loss": 0.1189, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.04385611510791367, | |
| "grad_norm": 1.1870757341384888, | |
| "learning_rate": 3.2827941067700996e-06, | |
| "loss": 0.0794, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.0439136690647482, | |
| "grad_norm": 0.639008641242981, | |
| "learning_rate": 3.2569761241627694e-06, | |
| "loss": 0.0606, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.04397122302158273, | |
| "grad_norm": 2.8850464820861816, | |
| "learning_rate": 3.2312403031733943e-06, | |
| "loss": 0.1593, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.04402877697841727, | |
| "grad_norm": 15.170232772827148, | |
| "learning_rate": 3.2055869573848374e-06, | |
| "loss": 0.2961, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.0440863309352518, | |
| "grad_norm": 1.2211562395095825, | |
| "learning_rate": 3.1800163993750166e-06, | |
| "loss": 0.1089, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.04414388489208633, | |
| "grad_norm": 0.4650379717350006, | |
| "learning_rate": 3.1545289407131128e-06, | |
| "loss": 0.0683, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.04420143884892087, | |
| "grad_norm": 1.6909816265106201, | |
| "learning_rate": 3.1291248919557717e-06, | |
| "loss": 0.123, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.0442589928057554, | |
| "grad_norm": 0.07863793522119522, | |
| "learning_rate": 3.103804562643302e-06, | |
| "loss": 0.0008, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.04431654676258993, | |
| "grad_norm": 8.237005233764648, | |
| "learning_rate": 3.0785682612959334e-06, | |
| "loss": 0.1946, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.04437410071942446, | |
| "grad_norm": 1.3069466352462769, | |
| "learning_rate": 3.0534162954100264e-06, | |
| "loss": 0.0659, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.044431654676258994, | |
| "grad_norm": 0.9894955158233643, | |
| "learning_rate": 3.028348971454356e-06, | |
| "loss": 0.0554, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.044489208633093524, | |
| "grad_norm": 1.0656694173812866, | |
| "learning_rate": 3.003366594866345e-06, | |
| "loss": 0.0713, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.044546762589928054, | |
| "grad_norm": 2.17790150642395, | |
| "learning_rate": 2.978469470048376e-06, | |
| "loss": 0.1119, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.04460431654676259, | |
| "grad_norm": 1.1125558614730835, | |
| "learning_rate": 2.953657900364053e-06, | |
| "loss": 0.0799, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.04466187050359712, | |
| "grad_norm": 0.7158892154693604, | |
| "learning_rate": 2.9289321881345257e-06, | |
| "loss": 0.0711, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.04471942446043165, | |
| "grad_norm": 1.9382431507110596, | |
| "learning_rate": 2.9042926346347932e-06, | |
| "loss": 0.0626, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.04477697841726619, | |
| "grad_norm": 3.6688735485076904, | |
| "learning_rate": 2.8797395400900362e-06, | |
| "loss": 0.1775, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.04483453237410072, | |
| "grad_norm": 0.9889999032020569, | |
| "learning_rate": 2.855273203671969e-06, | |
| "loss": 0.0765, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.04489208633093525, | |
| "grad_norm": 14.821270942687988, | |
| "learning_rate": 2.830893923495173e-06, | |
| "loss": 0.5594, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.044949640287769786, | |
| "grad_norm": 2.756847620010376, | |
| "learning_rate": 2.8066019966134907e-06, | |
| "loss": 0.0805, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.045007194244604316, | |
| "grad_norm": 2.8205065727233887, | |
| "learning_rate": 2.7823977190163788e-06, | |
| "loss": 0.1263, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.045064748201438846, | |
| "grad_norm": 12.99278736114502, | |
| "learning_rate": 2.7582813856253276e-06, | |
| "loss": 0.4152, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.045122302158273384, | |
| "grad_norm": 2.810309410095215, | |
| "learning_rate": 2.7342532902902418e-06, | |
| "loss": 0.0943, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.045179856115107914, | |
| "grad_norm": 1.1653300523757935, | |
| "learning_rate": 2.7103137257858867e-06, | |
| "loss": 0.0877, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.045237410071942444, | |
| "grad_norm": 5.321506023406982, | |
| "learning_rate": 2.6864629838082957e-06, | |
| "loss": 0.1012, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.04529496402877698, | |
| "grad_norm": 0.6735225915908813, | |
| "learning_rate": 2.6627013549712355e-06, | |
| "loss": 0.066, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.04535251798561151, | |
| "grad_norm": 0.7565364241600037, | |
| "learning_rate": 2.639029128802657e-06, | |
| "loss": 0.0728, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.04541007194244604, | |
| "grad_norm": 1.2969605922698975, | |
| "learning_rate": 2.615446593741161e-06, | |
| "loss": 0.0699, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.04546762589928058, | |
| "grad_norm": 1.2415449619293213, | |
| "learning_rate": 2.5919540371325005e-06, | |
| "loss": 0.0802, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.04552517985611511, | |
| "grad_norm": 3.416377067565918, | |
| "learning_rate": 2.5685517452260566e-06, | |
| "loss": 0.0328, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.04558273381294964, | |
| "grad_norm": 3.912259340286255, | |
| "learning_rate": 2.5452400031713786e-06, | |
| "loss": 0.0989, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.045640287769784176, | |
| "grad_norm": 4.516401767730713, | |
| "learning_rate": 2.522019095014683e-06, | |
| "loss": 0.1032, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 0.045697841726618706, | |
| "grad_norm": 1.5598896741867065, | |
| "learning_rate": 2.4988893036954045e-06, | |
| "loss": 0.0709, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.045755395683453236, | |
| "grad_norm": 4.611602306365967, | |
| "learning_rate": 2.4758509110427576e-06, | |
| "loss": 0.1643, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.04581294964028777, | |
| "grad_norm": 5.493204593658447, | |
| "learning_rate": 2.45290419777228e-06, | |
| "loss": 0.0297, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.0458705035971223, | |
| "grad_norm": 0.6497253775596619, | |
| "learning_rate": 2.4300494434824373e-06, | |
| "loss": 0.0486, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.04592805755395683, | |
| "grad_norm": 1.2920628786087036, | |
| "learning_rate": 2.407286926651192e-06, | |
| "loss": 0.0787, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.04598561151079137, | |
| "grad_norm": 2.440001964569092, | |
| "learning_rate": 2.3846169246326345e-06, | |
| "loss": 0.1111, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.0460431654676259, | |
| "grad_norm": 1.9451384544372559, | |
| "learning_rate": 2.362039713653581e-06, | |
| "loss": 0.0868, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.04610071942446043, | |
| "grad_norm": 2.6515021324157715, | |
| "learning_rate": 2.339555568810221e-06, | |
| "loss": 0.1016, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.04615827338129497, | |
| "grad_norm": 2.210442304611206, | |
| "learning_rate": 2.317164764064769e-06, | |
| "loss": 0.0648, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.0462158273381295, | |
| "grad_norm": 1.4524075984954834, | |
| "learning_rate": 2.2948675722421086e-06, | |
| "loss": 0.0752, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 0.04627338129496403, | |
| "grad_norm": 16.323625564575195, | |
| "learning_rate": 2.27266426502649e-06, | |
| "loss": 0.2994, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.04633093525179856, | |
| "grad_norm": 0.39062535762786865, | |
| "learning_rate": 2.2505551129582047e-06, | |
| "loss": 0.0519, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.046388489208633095, | |
| "grad_norm": 1.672765851020813, | |
| "learning_rate": 2.2285403854302912e-06, | |
| "loss": 0.064, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.046446043165467625, | |
| "grad_norm": 8.528423309326172, | |
| "learning_rate": 2.206620350685257e-06, | |
| "loss": 0.1726, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 0.046503597122302155, | |
| "grad_norm": 2.00990629196167, | |
| "learning_rate": 2.1847952758118118e-06, | |
| "loss": 0.0689, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.04656115107913669, | |
| "grad_norm": 1.1098554134368896, | |
| "learning_rate": 2.163065426741603e-06, | |
| "loss": 0.0746, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.04661870503597122, | |
| "grad_norm": 6.2346014976501465, | |
| "learning_rate": 2.1414310682459805e-06, | |
| "loss": 0.128, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.04667625899280575, | |
| "grad_norm": 20.44989776611328, | |
| "learning_rate": 2.119892463932781e-06, | |
| "loss": 0.2095, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 0.04673381294964029, | |
| "grad_norm": 6.084557056427002, | |
| "learning_rate": 2.098449876243096e-06, | |
| "loss": 0.1533, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.04679136690647482, | |
| "grad_norm": 18.029020309448242, | |
| "learning_rate": 2.0771035664480944e-06, | |
| "loss": 0.2013, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 0.04684892086330935, | |
| "grad_norm": 2.0399179458618164, | |
| "learning_rate": 2.0558537946458177e-06, | |
| "loss": 0.0732, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.04690647482014389, | |
| "grad_norm": 0.7458027005195618, | |
| "learning_rate": 2.0347008197580376e-06, | |
| "loss": 0.0815, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.04696402877697842, | |
| "grad_norm": 1.1791083812713623, | |
| "learning_rate": 2.013644899527074e-06, | |
| "loss": 0.0646, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.04702158273381295, | |
| "grad_norm": 1.5898396968841553, | |
| "learning_rate": 1.9926862905126663e-06, | |
| "loss": 0.0786, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 0.047079136690647484, | |
| "grad_norm": 0.5349504947662354, | |
| "learning_rate": 1.9718252480888567e-06, | |
| "loss": 0.0573, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.047136690647482014, | |
| "grad_norm": 1.0246250629425049, | |
| "learning_rate": 1.95106202644086e-06, | |
| "loss": 0.0941, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.047194244604316545, | |
| "grad_norm": 2.1197612285614014, | |
| "learning_rate": 1.930396878561983e-06, | |
| "loss": 0.0905, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.04725179856115108, | |
| "grad_norm": 4.380585670471191, | |
| "learning_rate": 1.9098300562505266e-06, | |
| "loss": 0.1047, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 0.04730935251798561, | |
| "grad_norm": 0.4877808094024658, | |
| "learning_rate": 1.8893618101067357e-06, | |
| "loss": 0.0526, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.04736690647482014, | |
| "grad_norm": 5.169129848480225, | |
| "learning_rate": 1.8689923895297247e-06, | |
| "loss": 0.1075, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 0.04742446043165468, | |
| "grad_norm": 0.528556227684021, | |
| "learning_rate": 1.848722042714457e-06, | |
| "loss": 0.0662, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.04748201438848921, | |
| "grad_norm": 1.4885525703430176, | |
| "learning_rate": 1.8285510166487154e-06, | |
| "loss": 0.0809, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.04753956834532374, | |
| "grad_norm": 0.9235038161277771, | |
| "learning_rate": 1.808479557110081e-06, | |
| "loss": 0.049, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.047597122302158276, | |
| "grad_norm": 3.030466079711914, | |
| "learning_rate": 1.7885079086629598e-06, | |
| "loss": 0.0674, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 0.047654676258992806, | |
| "grad_norm": 0.3380258083343506, | |
| "learning_rate": 1.7686363146555807e-06, | |
| "loss": 0.0407, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.047712230215827336, | |
| "grad_norm": 0.3373538553714752, | |
| "learning_rate": 1.7488650172170496e-06, | |
| "loss": 0.0557, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 0.047769784172661874, | |
| "grad_norm": 8.12345027923584, | |
| "learning_rate": 1.7291942572543806e-06, | |
| "loss": 0.1242, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.047827338129496404, | |
| "grad_norm": 18.61945915222168, | |
| "learning_rate": 1.709624274449584e-06, | |
| "loss": 0.1312, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 0.047884892086330934, | |
| "grad_norm": 1.0393412113189697, | |
| "learning_rate": 1.6901553072567189e-06, | |
| "loss": 0.0669, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.04794244604316547, | |
| "grad_norm": 0.405079185962677, | |
| "learning_rate": 1.6707875928990059e-06, | |
| "loss": 0.0455, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 0.048, | |
| "grad_norm": 1.6890630722045898, | |
| "learning_rate": 1.651521367365936e-06, | |
| "loss": 0.0977, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.04805755395683453, | |
| "grad_norm": 2.578721523284912, | |
| "learning_rate": 1.6323568654103838e-06, | |
| "loss": 0.0692, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.04811510791366907, | |
| "grad_norm": 1.0500586032867432, | |
| "learning_rate": 1.6132943205457607e-06, | |
| "loss": 0.1101, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.0481726618705036, | |
| "grad_norm": 25.174041748046875, | |
| "learning_rate": 1.5943339650431578e-06, | |
| "loss": 0.341, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.04823021582733813, | |
| "grad_norm": 0.7596455216407776, | |
| "learning_rate": 1.5754760299285255e-06, | |
| "loss": 0.0793, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.04828776978417266, | |
| "grad_norm": 0.9107263684272766, | |
| "learning_rate": 1.5567207449798517e-06, | |
| "loss": 0.079, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 0.048345323741007196, | |
| "grad_norm": 15.38805866241455, | |
| "learning_rate": 1.538068338724361e-06, | |
| "loss": 0.4072, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.048402877697841726, | |
| "grad_norm": 0.3658386766910553, | |
| "learning_rate": 1.5195190384357405e-06, | |
| "loss": 0.0568, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 0.048460431654676256, | |
| "grad_norm": 5.78350305557251, | |
| "learning_rate": 1.5010730701313626e-06, | |
| "loss": 0.078, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.04851798561151079, | |
| "grad_norm": 0.8679736256599426, | |
| "learning_rate": 1.4827306585695234e-06, | |
| "loss": 0.0967, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 0.04857553956834532, | |
| "grad_norm": 11.486621856689453, | |
| "learning_rate": 1.4644920272467245e-06, | |
| "loss": 0.1852, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.04863309352517985, | |
| "grad_norm": 7.43509578704834, | |
| "learning_rate": 1.446357398394934e-06, | |
| "loss": 0.1715, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.04869064748201439, | |
| "grad_norm": 0.7901409864425659, | |
| "learning_rate": 1.4283269929788779e-06, | |
| "loss": 0.0405, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.04874820143884892, | |
| "grad_norm": 1.712028980255127, | |
| "learning_rate": 1.4104010306933558e-06, | |
| "loss": 0.088, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 0.04880575539568345, | |
| "grad_norm": 1.992058277130127, | |
| "learning_rate": 1.3925797299605649e-06, | |
| "loss": 0.1193, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.04886330935251799, | |
| "grad_norm": 0.7998857498168945, | |
| "learning_rate": 1.3748633079274254e-06, | |
| "loss": 0.0473, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 0.04892086330935252, | |
| "grad_norm": 0.4824025332927704, | |
| "learning_rate": 1.3572519804629537e-06, | |
| "loss": 0.0652, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.04897841726618705, | |
| "grad_norm": 0.8086461424827576, | |
| "learning_rate": 1.339745962155613e-06, | |
| "loss": 0.0645, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 0.049035971223021585, | |
| "grad_norm": 27.238941192626953, | |
| "learning_rate": 1.322345466310717e-06, | |
| "loss": 0.4208, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.049093525179856115, | |
| "grad_norm": 1.820080041885376, | |
| "learning_rate": 1.30505070494781e-06, | |
| "loss": 0.1134, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 0.049151079136690645, | |
| "grad_norm": 0.9083067178726196, | |
| "learning_rate": 1.2878618887981064e-06, | |
| "loss": 0.0782, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.04920863309352518, | |
| "grad_norm": 0.7896227240562439, | |
| "learning_rate": 1.2707792273019049e-06, | |
| "loss": 0.0586, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.04926618705035971, | |
| "grad_norm": 0.8508791923522949, | |
| "learning_rate": 1.2538029286060428e-06, | |
| "loss": 0.059, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.04932374100719424, | |
| "grad_norm": 1.0554744005203247, | |
| "learning_rate": 1.2369331995613664e-06, | |
| "loss": 0.0841, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 0.04938129496402878, | |
| "grad_norm": 1.7418127059936523, | |
| "learning_rate": 1.2201702457201948e-06, | |
| "loss": 0.0688, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.04943884892086331, | |
| "grad_norm": 3.3948047161102295, | |
| "learning_rate": 1.2035142713338366e-06, | |
| "loss": 0.0954, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 0.04949640287769784, | |
| "grad_norm": 1.1235921382904053, | |
| "learning_rate": 1.1869654793500784e-06, | |
| "loss": 0.0726, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.04955395683453238, | |
| "grad_norm": 4.876044750213623, | |
| "learning_rate": 1.1705240714107301e-06, | |
| "loss": 0.1062, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 0.04961151079136691, | |
| "grad_norm": 6.03566837310791, | |
| "learning_rate": 1.1541902478491607e-06, | |
| "loss": 0.1015, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.04966906474820144, | |
| "grad_norm": 0.6971962451934814, | |
| "learning_rate": 1.1379642076878528e-06, | |
| "loss": 0.0514, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 0.049726618705035974, | |
| "grad_norm": 11.001822471618652, | |
| "learning_rate": 1.1218461486359878e-06, | |
| "loss": 0.467, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.049784172661870504, | |
| "grad_norm": 1.605398178100586, | |
| "learning_rate": 1.1058362670870248e-06, | |
| "loss": 0.1081, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.049841726618705035, | |
| "grad_norm": 17.996734619140625, | |
| "learning_rate": 1.0899347581163222e-06, | |
| "loss": 0.3179, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.04989928057553957, | |
| "grad_norm": 0.2730165123939514, | |
| "learning_rate": 1.0741418154787443e-06, | |
| "loss": 0.0261, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 0.0499568345323741, | |
| "grad_norm": 5.960131645202637, | |
| "learning_rate": 1.058457631606319e-06, | |
| "loss": 0.1051, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.05001438848920863, | |
| "grad_norm": 2.7779898643493652, | |
| "learning_rate": 1.042882397605871e-06, | |
| "loss": 0.0692, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 0.05007194244604317, | |
| "grad_norm": 0.33091625571250916, | |
| "learning_rate": 1.0274163032567165e-06, | |
| "loss": 0.0534, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.0501294964028777, | |
| "grad_norm": 0.6273438334465027, | |
| "learning_rate": 1.012059537008332e-06, | |
| "loss": 0.0617, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 0.05018705035971223, | |
| "grad_norm": 0.8654516935348511, | |
| "learning_rate": 9.968122859780648e-07, | |
| "loss": 0.0568, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.05024460431654676, | |
| "grad_norm": 13.922266960144043, | |
| "learning_rate": 9.816747359488632e-07, | |
| "loss": 0.1749, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 0.050302158273381296, | |
| "grad_norm": 16.455718994140625, | |
| "learning_rate": 9.666470713669918e-07, | |
| "loss": 0.2171, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.050359712230215826, | |
| "grad_norm": 11.806230545043945, | |
| "learning_rate": 9.517294753398066e-07, | |
| "loss": 0.25, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.05041726618705036, | |
| "grad_norm": 0.3645191192626953, | |
| "learning_rate": 9.369221296335007e-07, | |
| "loss": 0.05, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.050474820143884894, | |
| "grad_norm": 0.4936099648475647, | |
| "learning_rate": 9.222252146709143e-07, | |
| "loss": 0.0519, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 0.050532374100719424, | |
| "grad_norm": 13.692974090576172, | |
| "learning_rate": 9.076389095293148e-07, | |
| "loss": 0.2554, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.050589928057553954, | |
| "grad_norm": 0.6284224987030029, | |
| "learning_rate": 8.931633919382299e-07, | |
| "loss": 0.0659, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 0.05064748201438849, | |
| "grad_norm": 0.7923092842102051, | |
| "learning_rate": 8.787988382772705e-07, | |
| "loss": 0.0568, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.05070503597122302, | |
| "grad_norm": 22.334707260131836, | |
| "learning_rate": 8.645454235739903e-07, | |
| "loss": 0.2308, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 0.05076258992805755, | |
| "grad_norm": 4.818454742431641, | |
| "learning_rate": 8.504033215017527e-07, | |
| "loss": 0.1232, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.05082014388489209, | |
| "grad_norm": 1.418079137802124, | |
| "learning_rate": 8.363727043776037e-07, | |
| "loss": 0.0688, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 0.05087769784172662, | |
| "grad_norm": 0.7452939748764038, | |
| "learning_rate": 8.224537431601886e-07, | |
| "loss": 0.0562, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.05093525179856115, | |
| "grad_norm": 1.132005214691162, | |
| "learning_rate": 8.086466074476562e-07, | |
| "loss": 0.0804, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.050992805755395686, | |
| "grad_norm": 1.1662274599075317, | |
| "learning_rate": 7.949514654755963e-07, | |
| "loss": 0.0778, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.051050359712230216, | |
| "grad_norm": 0.6592736840248108, | |
| "learning_rate": 7.81368484114996e-07, | |
| "loss": 0.0662, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 0.051107913669064746, | |
| "grad_norm": 9.107586860656738, | |
| "learning_rate": 7.678978288701911e-07, | |
| "loss": 0.1515, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.05116546762589928, | |
| "grad_norm": 11.478974342346191, | |
| "learning_rate": 7.545396638768698e-07, | |
| "loss": 0.496, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 0.05122302158273381, | |
| "grad_norm": 0.9263907670974731, | |
| "learning_rate": 7.412941519000527e-07, | |
| "loss": 0.0756, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.05128057553956834, | |
| "grad_norm": 20.043466567993164, | |
| "learning_rate": 7.281614543321269e-07, | |
| "loss": 0.5612, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 0.05133812949640288, | |
| "grad_norm": 9.131746292114258, | |
| "learning_rate": 7.151417311908648e-07, | |
| "loss": 0.1701, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.05139568345323741, | |
| "grad_norm": 2.0880978107452393, | |
| "learning_rate": 7.022351411174866e-07, | |
| "loss": 0.1408, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 0.05145323741007194, | |
| "grad_norm": 0.7419898509979248, | |
| "learning_rate": 6.894418413747183e-07, | |
| "loss": 0.0609, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.05151079136690648, | |
| "grad_norm": 18.692054748535156, | |
| "learning_rate": 6.767619878448783e-07, | |
| "loss": 0.3365, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.05156834532374101, | |
| "grad_norm": 1.1873613595962524, | |
| "learning_rate": 6.641957350279838e-07, | |
| "loss": 0.0662, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.05162589928057554, | |
| "grad_norm": 1.0309622287750244, | |
| "learning_rate": 6.517432360398556e-07, | |
| "loss": 0.062, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 0.051683453237410075, | |
| "grad_norm": 1.502915620803833, | |
| "learning_rate": 6.394046426102673e-07, | |
| "loss": 0.0679, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.051741007194244605, | |
| "grad_norm": 14.485411643981934, | |
| "learning_rate": 6.271801050810856e-07, | |
| "loss": 0.2849, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 0.051798561151079135, | |
| "grad_norm": 0.6515581607818604, | |
| "learning_rate": 6.150697724044407e-07, | |
| "loss": 0.0497, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.05185611510791367, | |
| "grad_norm": 1.2064189910888672, | |
| "learning_rate": 6.030737921409169e-07, | |
| "loss": 0.0801, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 0.0519136690647482, | |
| "grad_norm": 9.05174446105957, | |
| "learning_rate": 5.911923104577455e-07, | |
| "loss": 0.1349, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.05197122302158273, | |
| "grad_norm": 0.8396304845809937, | |
| "learning_rate": 5.794254721270331e-07, | |
| "loss": 0.0493, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 0.05202877697841727, | |
| "grad_norm": 22.91259002685547, | |
| "learning_rate": 5.677734205239904e-07, | |
| "loss": 0.4679, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.0520863309352518, | |
| "grad_norm": 0.31920236349105835, | |
| "learning_rate": 5.562362976251901e-07, | |
| "loss": 0.0485, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.05214388489208633, | |
| "grad_norm": 0.7108873724937439, | |
| "learning_rate": 5.448142440068316e-07, | |
| "loss": 0.0509, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.05220143884892086, | |
| "grad_norm": 1.8079339265823364, | |
| "learning_rate": 5.335073988430373e-07, | |
| "loss": 0.0969, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 0.0522589928057554, | |
| "grad_norm": 10.256385803222656, | |
| "learning_rate": 5.223158999041444e-07, | |
| "loss": 0.2056, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.05231654676258993, | |
| "grad_norm": 0.8384624123573303, | |
| "learning_rate": 5.112398835550348e-07, | |
| "loss": 0.0781, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 0.05237410071942446, | |
| "grad_norm": 0.9950568079948425, | |
| "learning_rate": 5.002794847534765e-07, | |
| "loss": 0.0569, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.052431654676258994, | |
| "grad_norm": 0.7972750067710876, | |
| "learning_rate": 4.894348370484648e-07, | |
| "loss": 0.072, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 0.052489208633093525, | |
| "grad_norm": 8.564375877380371, | |
| "learning_rate": 4.787060725786141e-07, | |
| "loss": 0.1262, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.052546762589928055, | |
| "grad_norm": 0.7976115942001343, | |
| "learning_rate": 4.6809332207053083e-07, | |
| "loss": 0.0669, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 0.05260431654676259, | |
| "grad_norm": 0.6580243706703186, | |
| "learning_rate": 4.575967148372318e-07, | |
| "loss": 0.071, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.05266187050359712, | |
| "grad_norm": 0.8931045532226562, | |
| "learning_rate": 4.4721637877656377e-07, | |
| "loss": 0.0581, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.05271942446043165, | |
| "grad_norm": 0.581693172454834, | |
| "learning_rate": 4.3695244036964567e-07, | |
| "loss": 0.0569, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.05277697841726619, | |
| "grad_norm": 14.89986801147461, | |
| "learning_rate": 4.268050246793276e-07, | |
| "loss": 0.3469, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 0.05283453237410072, | |
| "grad_norm": 4.767395496368408, | |
| "learning_rate": 4.167742553486676e-07, | |
| "loss": 0.0892, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.05289208633093525, | |
| "grad_norm": 1.3745571374893188, | |
| "learning_rate": 4.068602545994249e-07, | |
| "loss": 0.0874, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 0.052949640287769786, | |
| "grad_norm": 7.833735466003418, | |
| "learning_rate": 3.9706314323056936e-07, | |
| "loss": 0.1356, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.053007194244604317, | |
| "grad_norm": 1.1918085813522339, | |
| "learning_rate": 3.8738304061681107e-07, | |
| "loss": 0.0653, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 0.05306474820143885, | |
| "grad_norm": 0.6890537142753601, | |
| "learning_rate": 3.7782006470714614e-07, | |
| "loss": 0.0692, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.053122302158273384, | |
| "grad_norm": 0.6393564939498901, | |
| "learning_rate": 3.68374332023419e-07, | |
| "loss": 0.0206, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 0.053179856115107914, | |
| "grad_norm": 1.7775517702102661, | |
| "learning_rate": 3.590459576589e-07, | |
| "loss": 0.1072, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.053237410071942444, | |
| "grad_norm": 0.5203324556350708, | |
| "learning_rate": 3.498350552768859e-07, | |
| "loss": 0.0651, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.05329496402877698, | |
| "grad_norm": 0.6219776272773743, | |
| "learning_rate": 3.4074173710931804e-07, | |
| "loss": 0.0653, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.05335251798561151, | |
| "grad_norm": 1.8375036716461182, | |
| "learning_rate": 3.3176611395540625e-07, | |
| "loss": 0.0614, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 0.05341007194244604, | |
| "grad_norm": 2.563394784927368, | |
| "learning_rate": 3.2290829518028867e-07, | |
| "loss": 0.1311, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.05346762589928058, | |
| "grad_norm": 17.644411087036133, | |
| "learning_rate": 3.1416838871368925e-07, | |
| "loss": 0.3422, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 0.05352517985611511, | |
| "grad_norm": 0.7539731860160828, | |
| "learning_rate": 3.0554650104861137e-07, | |
| "loss": 0.0837, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.05358273381294964, | |
| "grad_norm": 5.77402400970459, | |
| "learning_rate": 2.970427372400353e-07, | |
| "loss": 0.1455, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 0.053640287769784176, | |
| "grad_norm": 1.0641944408416748, | |
| "learning_rate": 2.8865720090364037e-07, | |
| "loss": 0.1101, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.053697841726618706, | |
| "grad_norm": 1.1459119319915771, | |
| "learning_rate": 2.8038999421453827e-07, | |
| "loss": 0.0608, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 0.053755395683453236, | |
| "grad_norm": 12.172619819641113, | |
| "learning_rate": 2.7224121790603517e-07, | |
| "loss": 0.3217, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.05381294964028777, | |
| "grad_norm": 11.910560607910156, | |
| "learning_rate": 2.6421097126839714e-07, | |
| "loss": 0.5355, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.0538705035971223, | |
| "grad_norm": 2.0426604747772217, | |
| "learning_rate": 2.5629935214764866e-07, | |
| "loss": 0.1398, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.05392805755395683, | |
| "grad_norm": 1.1188652515411377, | |
| "learning_rate": 2.4850645694436736e-07, | |
| "loss": 0.1328, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 0.05398561151079137, | |
| "grad_norm": 2.1541991233825684, | |
| "learning_rate": 2.4083238061252565e-07, | |
| "loss": 0.1437, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.0540431654676259, | |
| "grad_norm": 0.8203990459442139, | |
| "learning_rate": 2.332772166583208e-07, | |
| "loss": 0.0617, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 0.05410071942446043, | |
| "grad_norm": 14.203892707824707, | |
| "learning_rate": 2.2584105713904126e-07, | |
| "loss": 0.151, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.05415827338129496, | |
| "grad_norm": 0.3919861912727356, | |
| "learning_rate": 2.1852399266194312e-07, | |
| "loss": 0.0057, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 0.0542158273381295, | |
| "grad_norm": 3.8022513389587402, | |
| "learning_rate": 2.1132611238315004e-07, | |
| "loss": 0.118, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.05427338129496403, | |
| "grad_norm": 15.274145126342773, | |
| "learning_rate": 2.0424750400655947e-07, | |
| "loss": 0.2713, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 0.05433093525179856, | |
| "grad_norm": 1.0683073997497559, | |
| "learning_rate": 1.9728825378278248e-07, | |
| "loss": 0.0581, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.054388489208633095, | |
| "grad_norm": 13.709763526916504, | |
| "learning_rate": 1.9044844650808468e-07, | |
| "loss": 0.2315, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.054446043165467625, | |
| "grad_norm": 2.2819840908050537, | |
| "learning_rate": 1.8372816552336025e-07, | |
| "loss": 0.1143, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.054503597122302155, | |
| "grad_norm": 1.1235344409942627, | |
| "learning_rate": 1.7712749271311392e-07, | |
| "loss": 0.0717, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 0.05456115107913669, | |
| "grad_norm": 1.0503443479537964, | |
| "learning_rate": 1.706465085044584e-07, | |
| "loss": 0.0961, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.05461870503597122, | |
| "grad_norm": 0.7241622805595398, | |
| "learning_rate": 1.6428529186614195e-07, | |
| "loss": 0.078, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 0.05467625899280575, | |
| "grad_norm": 1.2095136642456055, | |
| "learning_rate": 1.580439203075812e-07, | |
| "loss": 0.0911, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.05473381294964029, | |
| "grad_norm": 4.268467426300049, | |
| "learning_rate": 1.519224698779198e-07, | |
| "loss": 0.0737, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 0.05479136690647482, | |
| "grad_norm": 0.36049625277519226, | |
| "learning_rate": 1.4592101516509916e-07, | |
| "loss": 0.0585, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.05484892086330935, | |
| "grad_norm": 2.7988123893737793, | |
| "learning_rate": 1.400396292949513e-07, | |
| "loss": 0.0986, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 0.05490647482014389, | |
| "grad_norm": 16.90985870361328, | |
| "learning_rate": 1.3427838393030634e-07, | |
| "loss": 0.5819, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.05496402877697842, | |
| "grad_norm": 0.7780059576034546, | |
| "learning_rate": 1.2863734927012094e-07, | |
| "loss": 0.0765, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.05502158273381295, | |
| "grad_norm": 1.1509917974472046, | |
| "learning_rate": 1.231165940486234e-07, | |
| "loss": 0.0709, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.055079136690647484, | |
| "grad_norm": 1.6221928596496582, | |
| "learning_rate": 1.1771618553447217e-07, | |
| "loss": 0.0934, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 0.055136690647482015, | |
| "grad_norm": 1.4992139339447021, | |
| "learning_rate": 1.1243618952994195e-07, | |
| "loss": 0.0649, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.055194244604316545, | |
| "grad_norm": 19.954071044921875, | |
| "learning_rate": 1.0727667037011668e-07, | |
| "loss": 0.4697, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 0.05525179856115108, | |
| "grad_norm": 1.0766866207122803, | |
| "learning_rate": 1.0223769092211012e-07, | |
| "loss": 0.0877, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.05530935251798561, | |
| "grad_norm": 5.830136299133301, | |
| "learning_rate": 9.731931258429638e-08, | |
| "loss": 0.1061, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 0.05536690647482014, | |
| "grad_norm": 0.7689708471298218, | |
| "learning_rate": 9.252159528556404e-08, | |
| "loss": 0.0681, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.05542446043165468, | |
| "grad_norm": 2.8470983505249023, | |
| "learning_rate": 8.784459748458318e-08, | |
| "loss": 0.1102, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 0.05548201438848921, | |
| "grad_norm": 13.154084205627441, | |
| "learning_rate": 8.328837616909612e-08, | |
| "loss": 0.3053, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.05553956834532374, | |
| "grad_norm": 3.423447370529175, | |
| "learning_rate": 7.885298685522235e-08, | |
| "loss": 0.071, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.055597122302158276, | |
| "grad_norm": 0.8389829397201538, | |
| "learning_rate": 7.453848358678018e-08, | |
| "loss": 0.0703, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.055654676258992807, | |
| "grad_norm": 1.1691104173660278, | |
| "learning_rate": 7.034491893463059e-08, | |
| "loss": 0.0365, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 0.05571223021582734, | |
| "grad_norm": 0.6704636216163635, | |
| "learning_rate": 6.627234399603554e-08, | |
| "loss": 0.0514, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.055769784172661874, | |
| "grad_norm": 1.450444221496582, | |
| "learning_rate": 6.232080839403631e-08, | |
| "loss": 0.0943, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 0.055827338129496404, | |
| "grad_norm": 1.0474416017532349, | |
| "learning_rate": 5.849036027684607e-08, | |
| "loss": 0.1006, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.055884892086330934, | |
| "grad_norm": 0.4784161150455475, | |
| "learning_rate": 5.4781046317267103e-08, | |
| "loss": 0.0408, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 0.05594244604316547, | |
| "grad_norm": 16.443510055541992, | |
| "learning_rate": 5.119291171211793e-08, | |
| "loss": 0.1611, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 0.056, | |
| "grad_norm": 0.6649782657623291, | |
| "learning_rate": 4.772600018168816e-08, | |
| "loss": 0.0723, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 0.05605755395683453, | |
| "grad_norm": 11.152069091796875, | |
| "learning_rate": 4.438035396920004e-08, | |
| "loss": 0.7074, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.05611510791366906, | |
| "grad_norm": 0.5316632390022278, | |
| "learning_rate": 4.115601384029666e-08, | |
| "loss": 0.0642, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.0561726618705036, | |
| "grad_norm": 2.629650354385376, | |
| "learning_rate": 3.805301908254455e-08, | |
| "loss": 0.1437, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.05623021582733813, | |
| "grad_norm": 1.2182719707489014, | |
| "learning_rate": 3.50714075049563e-08, | |
| "loss": 0.094, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 0.05628776978417266, | |
| "grad_norm": 1.433416485786438, | |
| "learning_rate": 3.22112154375287e-08, | |
| "loss": 0.1248, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 0.056345323741007196, | |
| "grad_norm": 0.4791356027126312, | |
| "learning_rate": 2.947247773079753e-08, | |
| "loss": 0.0573, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 0.056402877697841726, | |
| "grad_norm": 0.43710970878601074, | |
| "learning_rate": 2.6855227755419046e-08, | |
| "loss": 0.0632, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.056460431654676256, | |
| "grad_norm": 1.2036529779434204, | |
| "learning_rate": 2.4359497401758026e-08, | |
| "loss": 0.0758, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 0.05651798561151079, | |
| "grad_norm": 1.0879039764404297, | |
| "learning_rate": 2.1985317079500358e-08, | |
| "loss": 0.0864, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 0.05657553956834532, | |
| "grad_norm": 2.7998852729797363, | |
| "learning_rate": 1.973271571728441e-08, | |
| "loss": 0.0904, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 0.05663309352517985, | |
| "grad_norm": 0.8842798471450806, | |
| "learning_rate": 1.7601720762346895e-08, | |
| "loss": 0.072, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.05669064748201439, | |
| "grad_norm": 30.26481819152832, | |
| "learning_rate": 1.5592358180189782e-08, | |
| "loss": 0.3083, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.05674820143884892, | |
| "grad_norm": 1.791191577911377, | |
| "learning_rate": 1.370465245426167e-08, | |
| "loss": 0.0949, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.05680575539568345, | |
| "grad_norm": 1.102792501449585, | |
| "learning_rate": 1.1938626585660252e-08, | |
| "loss": 0.078, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 0.05686330935251799, | |
| "grad_norm": 1.305518627166748, | |
| "learning_rate": 1.0294302092853647e-08, | |
| "loss": 0.1071, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.05692086330935252, | |
| "grad_norm": 1.006156325340271, | |
| "learning_rate": 8.771699011416169e-09, | |
| "loss": 0.0736, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 0.05697841726618705, | |
| "grad_norm": 0.8825615048408508, | |
| "learning_rate": 7.370835893788508e-09, | |
| "loss": 0.0776, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.057035971223021585, | |
| "grad_norm": 1.2495946884155273, | |
| "learning_rate": 6.091729809042379e-09, | |
| "loss": 0.0883, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 0.057093525179856115, | |
| "grad_norm": 4.397529125213623, | |
| "learning_rate": 4.9343963426840006e-09, | |
| "loss": 0.1277, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.057151079136690645, | |
| "grad_norm": 20.961580276489258, | |
| "learning_rate": 3.898849596456477e-09, | |
| "loss": 0.3655, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 0.05720863309352518, | |
| "grad_norm": 0.7759275436401367, | |
| "learning_rate": 2.9851021881688314e-09, | |
| "loss": 0.0783, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.05726618705035971, | |
| "grad_norm": 0.40887534618377686, | |
| "learning_rate": 2.193165251545004e-09, | |
| "loss": 0.059, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.05732374100719424, | |
| "grad_norm": 1.2675024271011353, | |
| "learning_rate": 1.5230484360873043e-09, | |
| "loss": 0.0846, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 0.05738129496402878, | |
| "grad_norm": 0.7930789589881897, | |
| "learning_rate": 9.74759906957612e-10, | |
| "loss": 0.0724, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 0.05743884892086331, | |
| "grad_norm": 13.144028663635254, | |
| "learning_rate": 5.483063448785686e-10, | |
| "loss": 0.2582, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 0.05749640287769784, | |
| "grad_norm": 2.8434152603149414, | |
| "learning_rate": 2.436929460525317e-10, | |
| "loss": 0.0955, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 0.05755395683453238, | |
| "grad_norm": 0.9370110630989075, | |
| "learning_rate": 6.092342209607083e-11, | |
| "loss": 0.0814, | |
| "step": 1000 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |