CocoRoF commited on
Commit
b9e8514
·
verified ·
1 Parent(s): 46bc43a

Training in progress, step 4000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:71d0eabfbdd37b2fae13fa797c3f0436a08906b6795d79ec3027697d278db38e
3
  size 598434152
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fbedc25a7e866c5375d17390d5eb67e93e519d92626a3f97c7daddf7702fcab
3
  size 598434152
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65be9700ffb2ead35027d047544f87600617dedade6f5c3f6c069f12bb951ba1
3
  size 1196955962
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff74a5fb5071ae21ddeb0a899cb40c982a667d261c8d2108985988527a0acde6
3
  size 1196955962
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:59efe62e4ca0647678855566a69eaafc20fb9e01c9af7b6b454bf0717d7bf5f7
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f9aa43992237c34da047eae3e4635545e3cbee9026436669a8ec61ef48f58c1
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f7bf369a63d501e73812630c6dd5a11aa21e67b93b5316e7295ec988e3218d6
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4dc07d6a74a610820a65e18607f99e43ef207c98dc79fcbe54bd9e13a24f2410
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.4058106841611997,
5
  "eval_steps": 250,
6
- "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2299,6 +2299,770 @@
2299
  "eval_spearman_manhattan": 0.7832975474858643,
2300
  "eval_steps_per_second": 31.371,
2301
  "step": 3000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2302
  }
2303
  ],
2304
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.8744142455482662,
5
  "eval_steps": 250,
6
+ "global_step": 4000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2299
  "eval_spearman_manhattan": 0.7832975474858643,
2300
  "eval_steps_per_second": 31.371,
2301
  "step": 3000
2302
+ },
2303
+ {
2304
+ "epoch": 1.4104967197750704,
2305
+ "grad_norm": 4.2792487144470215,
2306
+ "learning_rate": 9.118439550140582e-06,
2307
+ "loss": 0.4878,
2308
+ "step": 3010
2309
+ },
2310
+ {
2311
+ "epoch": 1.415182755388941,
2312
+ "grad_norm": 3.8892383575439453,
2313
+ "learning_rate": 9.115510777881913e-06,
2314
+ "loss": 0.4676,
2315
+ "step": 3020
2316
+ },
2317
+ {
2318
+ "epoch": 1.4198687910028116,
2319
+ "grad_norm": 5.0008745193481445,
2320
+ "learning_rate": 9.112582005623244e-06,
2321
+ "loss": 0.4729,
2322
+ "step": 3030
2323
+ },
2324
+ {
2325
+ "epoch": 1.4245548266166823,
2326
+ "grad_norm": 5.607409477233887,
2327
+ "learning_rate": 9.109653233364575e-06,
2328
+ "loss": 0.4762,
2329
+ "step": 3040
2330
+ },
2331
+ {
2332
+ "epoch": 1.429240862230553,
2333
+ "grad_norm": 3.0340139865875244,
2334
+ "learning_rate": 9.106724461105905e-06,
2335
+ "loss": 0.4438,
2336
+ "step": 3050
2337
+ },
2338
+ {
2339
+ "epoch": 1.4339268978444237,
2340
+ "grad_norm": 4.310724258422852,
2341
+ "learning_rate": 9.103795688847236e-06,
2342
+ "loss": 0.4499,
2343
+ "step": 3060
2344
+ },
2345
+ {
2346
+ "epoch": 1.4386129334582942,
2347
+ "grad_norm": 4.481917381286621,
2348
+ "learning_rate": 9.100866916588567e-06,
2349
+ "loss": 0.4493,
2350
+ "step": 3070
2351
+ },
2352
+ {
2353
+ "epoch": 1.443298969072165,
2354
+ "grad_norm": 4.330621719360352,
2355
+ "learning_rate": 9.097938144329898e-06,
2356
+ "loss": 0.4505,
2357
+ "step": 3080
2358
+ },
2359
+ {
2360
+ "epoch": 1.4479850046860356,
2361
+ "grad_norm": 4.335081577301025,
2362
+ "learning_rate": 9.095009372071228e-06,
2363
+ "loss": 0.446,
2364
+ "step": 3090
2365
+ },
2366
+ {
2367
+ "epoch": 1.4526710402999063,
2368
+ "grad_norm": 3.0894672870635986,
2369
+ "learning_rate": 9.092080599812559e-06,
2370
+ "loss": 0.4404,
2371
+ "step": 3100
2372
+ },
2373
+ {
2374
+ "epoch": 1.457357075913777,
2375
+ "grad_norm": 4.6363983154296875,
2376
+ "learning_rate": 9.08915182755389e-06,
2377
+ "loss": 0.5358,
2378
+ "step": 3110
2379
+ },
2380
+ {
2381
+ "epoch": 1.4620431115276475,
2382
+ "grad_norm": 3.80387806892395,
2383
+ "learning_rate": 9.086223055295221e-06,
2384
+ "loss": 0.4374,
2385
+ "step": 3120
2386
+ },
2387
+ {
2388
+ "epoch": 1.4667291471415183,
2389
+ "grad_norm": 3.276442289352417,
2390
+ "learning_rate": 9.083294283036552e-06,
2391
+ "loss": 0.5013,
2392
+ "step": 3130
2393
+ },
2394
+ {
2395
+ "epoch": 1.471415182755389,
2396
+ "grad_norm": 3.843419075012207,
2397
+ "learning_rate": 9.080365510777884e-06,
2398
+ "loss": 0.4694,
2399
+ "step": 3140
2400
+ },
2401
+ {
2402
+ "epoch": 1.4761012183692597,
2403
+ "grad_norm": 4.7606730461120605,
2404
+ "learning_rate": 9.077436738519213e-06,
2405
+ "loss": 0.4215,
2406
+ "step": 3150
2407
+ },
2408
+ {
2409
+ "epoch": 1.4807872539831304,
2410
+ "grad_norm": 3.739225149154663,
2411
+ "learning_rate": 9.074507966260544e-06,
2412
+ "loss": 0.4756,
2413
+ "step": 3160
2414
+ },
2415
+ {
2416
+ "epoch": 1.4854732895970009,
2417
+ "grad_norm": 3.36938214302063,
2418
+ "learning_rate": 9.071579194001876e-06,
2419
+ "loss": 0.4243,
2420
+ "step": 3170
2421
+ },
2422
+ {
2423
+ "epoch": 1.4901593252108716,
2424
+ "grad_norm": 6.589993476867676,
2425
+ "learning_rate": 9.068650421743205e-06,
2426
+ "loss": 0.4698,
2427
+ "step": 3180
2428
+ },
2429
+ {
2430
+ "epoch": 1.4948453608247423,
2431
+ "grad_norm": 3.8416695594787598,
2432
+ "learning_rate": 9.065721649484536e-06,
2433
+ "loss": 0.4964,
2434
+ "step": 3190
2435
+ },
2436
+ {
2437
+ "epoch": 1.499531396438613,
2438
+ "grad_norm": 4.367741584777832,
2439
+ "learning_rate": 9.062792877225867e-06,
2440
+ "loss": 0.4417,
2441
+ "step": 3200
2442
+ },
2443
+ {
2444
+ "epoch": 1.5042174320524837,
2445
+ "grad_norm": 3.500617742538452,
2446
+ "learning_rate": 9.059864104967199e-06,
2447
+ "loss": 0.4522,
2448
+ "step": 3210
2449
+ },
2450
+ {
2451
+ "epoch": 1.5089034676663542,
2452
+ "grad_norm": 3.5349769592285156,
2453
+ "learning_rate": 9.05693533270853e-06,
2454
+ "loss": 0.4393,
2455
+ "step": 3220
2456
+ },
2457
+ {
2458
+ "epoch": 1.513589503280225,
2459
+ "grad_norm": 3.8469526767730713,
2460
+ "learning_rate": 9.054006560449861e-06,
2461
+ "loss": 0.4453,
2462
+ "step": 3230
2463
+ },
2464
+ {
2465
+ "epoch": 1.5182755388940956,
2466
+ "grad_norm": 3.209933280944824,
2467
+ "learning_rate": 9.051077788191192e-06,
2468
+ "loss": 0.4599,
2469
+ "step": 3240
2470
+ },
2471
+ {
2472
+ "epoch": 1.522961574507966,
2473
+ "grad_norm": 3.7976036071777344,
2474
+ "learning_rate": 9.048149015932522e-06,
2475
+ "loss": 0.4373,
2476
+ "step": 3250
2477
+ },
2478
+ {
2479
+ "epoch": 1.522961574507966,
2480
+ "eval_loss": 0.049798864871263504,
2481
+ "eval_pearson_cosine": 0.7866421286010308,
2482
+ "eval_pearson_dot": 0.6641640853451243,
2483
+ "eval_pearson_euclidean": 0.7777378719378305,
2484
+ "eval_pearson_manhattan": 0.7764827785285746,
2485
+ "eval_runtime": 43.7509,
2486
+ "eval_samples_per_second": 34.285,
2487
+ "eval_spearman_cosine": 0.7870351053050699,
2488
+ "eval_spearman_dot": 0.6708598238937284,
2489
+ "eval_spearman_euclidean": 0.7874683707378692,
2490
+ "eval_spearman_manhattan": 0.7865203522698128,
2491
+ "eval_steps_per_second": 34.285,
2492
+ "step": 3250
2493
+ },
2494
+ {
2495
+ "epoch": 1.527647610121837,
2496
+ "grad_norm": 4.851262092590332,
2497
+ "learning_rate": 9.045220243673853e-06,
2498
+ "loss": 0.491,
2499
+ "step": 3260
2500
+ },
2501
+ {
2502
+ "epoch": 1.5323336457357075,
2503
+ "grad_norm": 4.183891773223877,
2504
+ "learning_rate": 9.042291471415184e-06,
2505
+ "loss": 0.453,
2506
+ "step": 3270
2507
+ },
2508
+ {
2509
+ "epoch": 1.5370196813495782,
2510
+ "grad_norm": 4.280774116516113,
2511
+ "learning_rate": 9.039362699156514e-06,
2512
+ "loss": 0.4413,
2513
+ "step": 3280
2514
+ },
2515
+ {
2516
+ "epoch": 1.541705716963449,
2517
+ "grad_norm": 4.118307113647461,
2518
+ "learning_rate": 9.036433926897845e-06,
2519
+ "loss": 0.4661,
2520
+ "step": 3290
2521
+ },
2522
+ {
2523
+ "epoch": 1.5463917525773194,
2524
+ "grad_norm": 5.99712610244751,
2525
+ "learning_rate": 9.033505154639176e-06,
2526
+ "loss": 0.5205,
2527
+ "step": 3300
2528
+ },
2529
+ {
2530
+ "epoch": 1.5510777881911904,
2531
+ "grad_norm": 4.146691799163818,
2532
+ "learning_rate": 9.030576382380507e-06,
2533
+ "loss": 0.428,
2534
+ "step": 3310
2535
+ },
2536
+ {
2537
+ "epoch": 1.5557638238050608,
2538
+ "grad_norm": 3.899887800216675,
2539
+ "learning_rate": 9.027647610121838e-06,
2540
+ "loss": 0.4564,
2541
+ "step": 3320
2542
+ },
2543
+ {
2544
+ "epoch": 1.5604498594189316,
2545
+ "grad_norm": 3.9663302898406982,
2546
+ "learning_rate": 9.02471883786317e-06,
2547
+ "loss": 0.4539,
2548
+ "step": 3330
2549
+ },
2550
+ {
2551
+ "epoch": 1.5651358950328023,
2552
+ "grad_norm": 3.526458263397217,
2553
+ "learning_rate": 9.021790065604499e-06,
2554
+ "loss": 0.4844,
2555
+ "step": 3340
2556
+ },
2557
+ {
2558
+ "epoch": 1.569821930646673,
2559
+ "grad_norm": 4.192911624908447,
2560
+ "learning_rate": 9.01886129334583e-06,
2561
+ "loss": 0.4278,
2562
+ "step": 3350
2563
+ },
2564
+ {
2565
+ "epoch": 1.5745079662605437,
2566
+ "grad_norm": 4.185749530792236,
2567
+ "learning_rate": 9.015932521087161e-06,
2568
+ "loss": 0.4632,
2569
+ "step": 3360
2570
+ },
2571
+ {
2572
+ "epoch": 1.5791940018744142,
2573
+ "grad_norm": 3.411773204803467,
2574
+ "learning_rate": 9.013003748828491e-06,
2575
+ "loss": 0.436,
2576
+ "step": 3370
2577
+ },
2578
+ {
2579
+ "epoch": 1.5838800374882849,
2580
+ "grad_norm": 4.467881679534912,
2581
+ "learning_rate": 9.010074976569822e-06,
2582
+ "loss": 0.4133,
2583
+ "step": 3380
2584
+ },
2585
+ {
2586
+ "epoch": 1.5885660731021556,
2587
+ "grad_norm": 3.77736496925354,
2588
+ "learning_rate": 9.007146204311153e-06,
2589
+ "loss": 0.4452,
2590
+ "step": 3390
2591
+ },
2592
+ {
2593
+ "epoch": 1.5932521087160263,
2594
+ "grad_norm": 4.084095478057861,
2595
+ "learning_rate": 9.004217432052485e-06,
2596
+ "loss": 0.4605,
2597
+ "step": 3400
2598
+ },
2599
+ {
2600
+ "epoch": 1.597938144329897,
2601
+ "grad_norm": 3.3393008708953857,
2602
+ "learning_rate": 9.001288659793816e-06,
2603
+ "loss": 0.4157,
2604
+ "step": 3410
2605
+ },
2606
+ {
2607
+ "epoch": 1.6026241799437675,
2608
+ "grad_norm": 3.096881151199341,
2609
+ "learning_rate": 8.998359887535147e-06,
2610
+ "loss": 0.4478,
2611
+ "step": 3420
2612
+ },
2613
+ {
2614
+ "epoch": 1.6073102155576382,
2615
+ "grad_norm": 3.0557243824005127,
2616
+ "learning_rate": 8.995431115276478e-06,
2617
+ "loss": 0.4452,
2618
+ "step": 3430
2619
+ },
2620
+ {
2621
+ "epoch": 1.611996251171509,
2622
+ "grad_norm": 3.7997219562530518,
2623
+ "learning_rate": 8.992502343017808e-06,
2624
+ "loss": 0.4287,
2625
+ "step": 3440
2626
+ },
2627
+ {
2628
+ "epoch": 1.6166822867853796,
2629
+ "grad_norm": 3.6995465755462646,
2630
+ "learning_rate": 8.989573570759139e-06,
2631
+ "loss": 0.4423,
2632
+ "step": 3450
2633
+ },
2634
+ {
2635
+ "epoch": 1.6213683223992503,
2636
+ "grad_norm": 4.1384053230285645,
2637
+ "learning_rate": 8.986644798500468e-06,
2638
+ "loss": 0.4563,
2639
+ "step": 3460
2640
+ },
2641
+ {
2642
+ "epoch": 1.6260543580131208,
2643
+ "grad_norm": 4.637014865875244,
2644
+ "learning_rate": 8.9837160262418e-06,
2645
+ "loss": 0.4538,
2646
+ "step": 3470
2647
+ },
2648
+ {
2649
+ "epoch": 1.6307403936269915,
2650
+ "grad_norm": 4.30952262878418,
2651
+ "learning_rate": 8.98078725398313e-06,
2652
+ "loss": 0.3993,
2653
+ "step": 3480
2654
+ },
2655
+ {
2656
+ "epoch": 1.6354264292408622,
2657
+ "grad_norm": 4.746737003326416,
2658
+ "learning_rate": 8.977858481724462e-06,
2659
+ "loss": 0.4274,
2660
+ "step": 3490
2661
+ },
2662
+ {
2663
+ "epoch": 1.640112464854733,
2664
+ "grad_norm": 3.8592286109924316,
2665
+ "learning_rate": 8.974929709465793e-06,
2666
+ "loss": 0.4066,
2667
+ "step": 3500
2668
+ },
2669
+ {
2670
+ "epoch": 1.640112464854733,
2671
+ "eval_loss": 0.050406068563461304,
2672
+ "eval_pearson_cosine": 0.7840015528942317,
2673
+ "eval_pearson_dot": 0.659932129633507,
2674
+ "eval_pearson_euclidean": 0.7769297052026758,
2675
+ "eval_pearson_manhattan": 0.7754185185705609,
2676
+ "eval_runtime": 44.0859,
2677
+ "eval_samples_per_second": 34.024,
2678
+ "eval_spearman_cosine": 0.7845451302239834,
2679
+ "eval_spearman_dot": 0.6667296644451466,
2680
+ "eval_spearman_euclidean": 0.7868327314956118,
2681
+ "eval_spearman_manhattan": 0.7856021398727839,
2682
+ "eval_steps_per_second": 34.024,
2683
+ "step": 3500
2684
+ },
2685
+ {
2686
+ "epoch": 1.6447985004686037,
2687
+ "grad_norm": 5.983098030090332,
2688
+ "learning_rate": 8.972000937207124e-06,
2689
+ "loss": 0.4451,
2690
+ "step": 3510
2691
+ },
2692
+ {
2693
+ "epoch": 1.6494845360824741,
2694
+ "grad_norm": 4.052550315856934,
2695
+ "learning_rate": 8.969072164948455e-06,
2696
+ "loss": 0.4331,
2697
+ "step": 3520
2698
+ },
2699
+ {
2700
+ "epoch": 1.6541705716963448,
2701
+ "grad_norm": 3.7970380783081055,
2702
+ "learning_rate": 8.966143392689785e-06,
2703
+ "loss": 0.4427,
2704
+ "step": 3530
2705
+ },
2706
+ {
2707
+ "epoch": 1.6588566073102156,
2708
+ "grad_norm": 4.695807456970215,
2709
+ "learning_rate": 8.963214620431116e-06,
2710
+ "loss": 0.4522,
2711
+ "step": 3540
2712
+ },
2713
+ {
2714
+ "epoch": 1.6635426429240863,
2715
+ "grad_norm": 4.41202974319458,
2716
+ "learning_rate": 8.960285848172446e-06,
2717
+ "loss": 0.4275,
2718
+ "step": 3550
2719
+ },
2720
+ {
2721
+ "epoch": 1.668228678537957,
2722
+ "grad_norm": 5.364877223968506,
2723
+ "learning_rate": 8.957357075913777e-06,
2724
+ "loss": 0.4321,
2725
+ "step": 3560
2726
+ },
2727
+ {
2728
+ "epoch": 1.6729147141518275,
2729
+ "grad_norm": 3.801132917404175,
2730
+ "learning_rate": 8.954428303655108e-06,
2731
+ "loss": 0.4494,
2732
+ "step": 3570
2733
+ },
2734
+ {
2735
+ "epoch": 1.6776007497656982,
2736
+ "grad_norm": 4.197866439819336,
2737
+ "learning_rate": 8.95149953139644e-06,
2738
+ "loss": 0.4126,
2739
+ "step": 3580
2740
+ },
2741
+ {
2742
+ "epoch": 1.6822867853795689,
2743
+ "grad_norm": 5.34595251083374,
2744
+ "learning_rate": 8.94857075913777e-06,
2745
+ "loss": 0.4757,
2746
+ "step": 3590
2747
+ },
2748
+ {
2749
+ "epoch": 1.6869728209934396,
2750
+ "grad_norm": 4.772789478302002,
2751
+ "learning_rate": 8.945641986879102e-06,
2752
+ "loss": 0.4037,
2753
+ "step": 3600
2754
+ },
2755
+ {
2756
+ "epoch": 1.6916588566073103,
2757
+ "grad_norm": 4.81839656829834,
2758
+ "learning_rate": 8.942713214620433e-06,
2759
+ "loss": 0.4192,
2760
+ "step": 3610
2761
+ },
2762
+ {
2763
+ "epoch": 1.6963448922211808,
2764
+ "grad_norm": 3.470919132232666,
2765
+ "learning_rate": 8.939784442361762e-06,
2766
+ "loss": 0.4106,
2767
+ "step": 3620
2768
+ },
2769
+ {
2770
+ "epoch": 1.7010309278350515,
2771
+ "grad_norm": 3.2051522731781006,
2772
+ "learning_rate": 8.936855670103094e-06,
2773
+ "loss": 0.4162,
2774
+ "step": 3630
2775
+ },
2776
+ {
2777
+ "epoch": 1.7057169634489222,
2778
+ "grad_norm": 3.8122334480285645,
2779
+ "learning_rate": 8.933926897844423e-06,
2780
+ "loss": 0.4054,
2781
+ "step": 3640
2782
+ },
2783
+ {
2784
+ "epoch": 1.710402999062793,
2785
+ "grad_norm": 5.07956075668335,
2786
+ "learning_rate": 8.930998125585754e-06,
2787
+ "loss": 0.4164,
2788
+ "step": 3650
2789
+ },
2790
+ {
2791
+ "epoch": 1.7150890346766636,
2792
+ "grad_norm": 3.754542112350464,
2793
+ "learning_rate": 8.928069353327085e-06,
2794
+ "loss": 0.3703,
2795
+ "step": 3660
2796
+ },
2797
+ {
2798
+ "epoch": 1.7197750702905341,
2799
+ "grad_norm": 3.4620890617370605,
2800
+ "learning_rate": 8.925140581068417e-06,
2801
+ "loss": 0.4667,
2802
+ "step": 3670
2803
+ },
2804
+ {
2805
+ "epoch": 1.7244611059044048,
2806
+ "grad_norm": 4.179393768310547,
2807
+ "learning_rate": 8.922211808809748e-06,
2808
+ "loss": 0.4384,
2809
+ "step": 3680
2810
+ },
2811
+ {
2812
+ "epoch": 1.7291471415182755,
2813
+ "grad_norm": 3.0865719318389893,
2814
+ "learning_rate": 8.919283036551079e-06,
2815
+ "loss": 0.4248,
2816
+ "step": 3690
2817
+ },
2818
+ {
2819
+ "epoch": 1.7338331771321462,
2820
+ "grad_norm": 3.9282147884368896,
2821
+ "learning_rate": 8.91635426429241e-06,
2822
+ "loss": 0.4231,
2823
+ "step": 3700
2824
+ },
2825
+ {
2826
+ "epoch": 1.738519212746017,
2827
+ "grad_norm": 3.9746885299682617,
2828
+ "learning_rate": 8.91342549203374e-06,
2829
+ "loss": 0.4152,
2830
+ "step": 3710
2831
+ },
2832
+ {
2833
+ "epoch": 1.7432052483598874,
2834
+ "grad_norm": 3.8340625762939453,
2835
+ "learning_rate": 8.910496719775071e-06,
2836
+ "loss": 0.4458,
2837
+ "step": 3720
2838
+ },
2839
+ {
2840
+ "epoch": 1.7478912839737581,
2841
+ "grad_norm": 4.861859321594238,
2842
+ "learning_rate": 8.907567947516402e-06,
2843
+ "loss": 0.4274,
2844
+ "step": 3730
2845
+ },
2846
+ {
2847
+ "epoch": 1.7525773195876289,
2848
+ "grad_norm": 3.3457283973693848,
2849
+ "learning_rate": 8.904639175257732e-06,
2850
+ "loss": 0.4534,
2851
+ "step": 3740
2852
+ },
2853
+ {
2854
+ "epoch": 1.7572633552014996,
2855
+ "grad_norm": 4.057953834533691,
2856
+ "learning_rate": 8.901710402999063e-06,
2857
+ "loss": 0.484,
2858
+ "step": 3750
2859
+ },
2860
+ {
2861
+ "epoch": 1.7572633552014996,
2862
+ "eval_loss": 0.05240313336253166,
2863
+ "eval_pearson_cosine": 0.7879299521989642,
2864
+ "eval_pearson_dot": 0.6605985065084816,
2865
+ "eval_pearson_euclidean": 0.7797438530556207,
2866
+ "eval_pearson_manhattan": 0.778216782480726,
2867
+ "eval_runtime": 44.9916,
2868
+ "eval_samples_per_second": 33.34,
2869
+ "eval_spearman_cosine": 0.7888982276270184,
2870
+ "eval_spearman_dot": 0.6669965792210436,
2871
+ "eval_spearman_euclidean": 0.7899037728263932,
2872
+ "eval_spearman_manhattan": 0.7886320032383264,
2873
+ "eval_steps_per_second": 33.34,
2874
+ "step": 3750
2875
+ },
2876
+ {
2877
+ "epoch": 1.7619493908153703,
2878
+ "grad_norm": 3.281102418899536,
2879
+ "learning_rate": 8.898781630740394e-06,
2880
+ "loss": 0.4074,
2881
+ "step": 3760
2882
+ },
2883
+ {
2884
+ "epoch": 1.7666354264292408,
2885
+ "grad_norm": 4.710203170776367,
2886
+ "learning_rate": 8.895852858481725e-06,
2887
+ "loss": 0.4537,
2888
+ "step": 3770
2889
+ },
2890
+ {
2891
+ "epoch": 1.7713214620431117,
2892
+ "grad_norm": 4.636346817016602,
2893
+ "learning_rate": 8.892924086223056e-06,
2894
+ "loss": 0.4348,
2895
+ "step": 3780
2896
+ },
2897
+ {
2898
+ "epoch": 1.7760074976569822,
2899
+ "grad_norm": 4.518571376800537,
2900
+ "learning_rate": 8.889995313964388e-06,
2901
+ "loss": 0.4515,
2902
+ "step": 3790
2903
+ },
2904
+ {
2905
+ "epoch": 1.780693533270853,
2906
+ "grad_norm": 4.0576066970825195,
2907
+ "learning_rate": 8.887066541705717e-06,
2908
+ "loss": 0.4276,
2909
+ "step": 3800
2910
+ },
2911
+ {
2912
+ "epoch": 1.7853795688847236,
2913
+ "grad_norm": 5.657445430755615,
2914
+ "learning_rate": 8.884137769447048e-06,
2915
+ "loss": 0.4277,
2916
+ "step": 3810
2917
+ },
2918
+ {
2919
+ "epoch": 1.790065604498594,
2920
+ "grad_norm": 5.393405437469482,
2921
+ "learning_rate": 8.88120899718838e-06,
2922
+ "loss": 0.428,
2923
+ "step": 3820
2924
+ },
2925
+ {
2926
+ "epoch": 1.794751640112465,
2927
+ "grad_norm": 4.101112365722656,
2928
+ "learning_rate": 8.87828022492971e-06,
2929
+ "loss": 0.4489,
2930
+ "step": 3830
2931
+ },
2932
+ {
2933
+ "epoch": 1.7994376757263355,
2934
+ "grad_norm": 3.531888246536255,
2935
+ "learning_rate": 8.87535145267104e-06,
2936
+ "loss": 0.3673,
2937
+ "step": 3840
2938
+ },
2939
+ {
2940
+ "epoch": 1.8041237113402062,
2941
+ "grad_norm": 3.4490315914154053,
2942
+ "learning_rate": 8.872422680412371e-06,
2943
+ "loss": 0.4059,
2944
+ "step": 3850
2945
+ },
2946
+ {
2947
+ "epoch": 1.808809746954077,
2948
+ "grad_norm": 3.034252643585205,
2949
+ "learning_rate": 8.869493908153702e-06,
2950
+ "loss": 0.3832,
2951
+ "step": 3860
2952
+ },
2953
+ {
2954
+ "epoch": 1.8134957825679474,
2955
+ "grad_norm": 4.064283847808838,
2956
+ "learning_rate": 8.866565135895034e-06,
2957
+ "loss": 0.4704,
2958
+ "step": 3870
2959
+ },
2960
+ {
2961
+ "epoch": 1.8181818181818183,
2962
+ "grad_norm": 3.2689194679260254,
2963
+ "learning_rate": 8.863636363636365e-06,
2964
+ "loss": 0.4428,
2965
+ "step": 3880
2966
+ },
2967
+ {
2968
+ "epoch": 1.8228678537956888,
2969
+ "grad_norm": 3.173530101776123,
2970
+ "learning_rate": 8.860707591377694e-06,
2971
+ "loss": 0.4283,
2972
+ "step": 3890
2973
+ },
2974
+ {
2975
+ "epoch": 1.8275538894095595,
2976
+ "grad_norm": 3.638122081756592,
2977
+ "learning_rate": 8.857778819119026e-06,
2978
+ "loss": 0.4225,
2979
+ "step": 3900
2980
+ },
2981
+ {
2982
+ "epoch": 1.8322399250234302,
2983
+ "grad_norm": 3.636679172515869,
2984
+ "learning_rate": 8.854850046860357e-06,
2985
+ "loss": 0.4154,
2986
+ "step": 3910
2987
+ },
2988
+ {
2989
+ "epoch": 1.8369259606373007,
2990
+ "grad_norm": 3.810847520828247,
2991
+ "learning_rate": 8.851921274601688e-06,
2992
+ "loss": 0.3931,
2993
+ "step": 3920
2994
+ },
2995
+ {
2996
+ "epoch": 1.8416119962511717,
2997
+ "grad_norm": 3.7469394207000732,
2998
+ "learning_rate": 8.848992502343019e-06,
2999
+ "loss": 0.4472,
3000
+ "step": 3930
3001
+ },
3002
+ {
3003
+ "epoch": 1.8462980318650422,
3004
+ "grad_norm": 4.962492942810059,
3005
+ "learning_rate": 8.846063730084349e-06,
3006
+ "loss": 0.4324,
3007
+ "step": 3940
3008
+ },
3009
+ {
3010
+ "epoch": 1.8509840674789129,
3011
+ "grad_norm": 3.4641172885894775,
3012
+ "learning_rate": 8.84313495782568e-06,
3013
+ "loss": 0.4234,
3014
+ "step": 3950
3015
+ },
3016
+ {
3017
+ "epoch": 1.8556701030927836,
3018
+ "grad_norm": 3.8601555824279785,
3019
+ "learning_rate": 8.840206185567011e-06,
3020
+ "loss": 0.4045,
3021
+ "step": 3960
3022
+ },
3023
+ {
3024
+ "epoch": 1.860356138706654,
3025
+ "grad_norm": 6.290759086608887,
3026
+ "learning_rate": 8.837277413308342e-06,
3027
+ "loss": 0.4655,
3028
+ "step": 3970
3029
+ },
3030
+ {
3031
+ "epoch": 1.865042174320525,
3032
+ "grad_norm": 3.5882256031036377,
3033
+ "learning_rate": 8.834348641049673e-06,
3034
+ "loss": 0.4298,
3035
+ "step": 3980
3036
+ },
3037
+ {
3038
+ "epoch": 1.8697282099343955,
3039
+ "grad_norm": 3.133535623550415,
3040
+ "learning_rate": 8.831419868791003e-06,
3041
+ "loss": 0.4508,
3042
+ "step": 3990
3043
+ },
3044
+ {
3045
+ "epoch": 1.8744142455482662,
3046
+ "grad_norm": 3.220383644104004,
3047
+ "learning_rate": 8.828491096532334e-06,
3048
+ "loss": 0.4348,
3049
+ "step": 4000
3050
+ },
3051
+ {
3052
+ "epoch": 1.8744142455482662,
3053
+ "eval_loss": 0.04981923848390579,
3054
+ "eval_pearson_cosine": 0.790612878761543,
3055
+ "eval_pearson_dot": 0.6612786229229286,
3056
+ "eval_pearson_euclidean": 0.7799249806775554,
3057
+ "eval_pearson_manhattan": 0.7784476870813819,
3058
+ "eval_runtime": 45.9371,
3059
+ "eval_samples_per_second": 32.653,
3060
+ "eval_spearman_cosine": 0.7908100570922554,
3061
+ "eval_spearman_dot": 0.6689224987064551,
3062
+ "eval_spearman_euclidean": 0.7902520878335856,
3063
+ "eval_spearman_manhattan": 0.7892503488739743,
3064
+ "eval_steps_per_second": 32.653,
3065
+ "step": 4000
3066
  }
3067
  ],
3068
  "logging_steps": 10,