robertou2 commited on
Commit
cc8a53b
·
verified ·
1 Parent(s): 8319b34

Upload folder using huggingface_hub

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07d1432b81afb8b1c9b8a744002a2a19f6f2b8a03c380138733e9d4cc7703199
3
  size 738232680
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e689d366b6b1f9af5f5ff2b3a42b870f7943ad7be28f2166e8166a7dcdfa876a
3
  size 738232680
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f7bbf72886e01a48dac6ee38e56cb54de7d11394f5e3b568d3406678b8add32e
3
  size 1476611275
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34942080677a5bfc1e6937dc1f807d3e2478a43450cf01078151d65f53ecb22a
3
  size 1476611275
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1978ff613baf8cb1b30a1bdc5826e9439217fa692b15137f4bd6509f24bd92bb
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc68d513fd3f1dcd81811bb7d8ba9f54286dc534ae7648cf0048790d5ce39fbf
3
  size 14645
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9cfb2c4e7f5b85204aebf8454da8cb3b4b0d2808506f763786b16d6b47eab0a0
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:432fe31cc8feaadc988ff87816d3eb23d869c5008676f20a3367d6de19e5cf4c
3
  size 1465
trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 190,
3
- "best_metric": 0.001307736849412322,
4
- "best_model_checkpoint": "/content/drive/MyDrive/lora_model/outputs/task15_microsoft/Phi-4-mini-instruct/checkpoint-190",
5
- "epoch": 10.0,
6
  "eval_steps": 1,
7
- "global_step": 190,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2108,756 +2108,6 @@
2108
  "eval_samples_per_second": 8.845,
2109
  "eval_steps_per_second": 1.179,
2110
  "step": 140
2111
- },
2112
- {
2113
- "epoch": 7.421052631578947,
2114
- "grad_norm": 0.35066893696784973,
2115
- "learning_rate": 0.0002249760770939754,
2116
- "loss": 0.0336,
2117
- "step": 141
2118
- },
2119
- {
2120
- "epoch": 7.421052631578947,
2121
- "eval_loss": 0.028481462970376015,
2122
- "eval_runtime": 3.376,
2123
- "eval_samples_per_second": 8.886,
2124
- "eval_steps_per_second": 1.185,
2125
- "step": 141
2126
- },
2127
- {
2128
- "epoch": 7.473684210526316,
2129
- "grad_norm": 0.40360027551651,
2130
- "learning_rate": 0.0002216530770986795,
2131
- "loss": 0.0205,
2132
- "step": 142
2133
- },
2134
- {
2135
- "epoch": 7.473684210526316,
2136
- "eval_loss": 0.027093667536973953,
2137
- "eval_runtime": 3.3851,
2138
- "eval_samples_per_second": 8.862,
2139
- "eval_steps_per_second": 1.182,
2140
- "step": 142
2141
- },
2142
- {
2143
- "epoch": 7.526315789473684,
2144
- "grad_norm": 0.3854162395000458,
2145
- "learning_rate": 0.0002183351430834358,
2146
- "loss": 0.0307,
2147
- "step": 143
2148
- },
2149
- {
2150
- "epoch": 7.526315789473684,
2151
- "eval_loss": 0.02729531191289425,
2152
- "eval_runtime": 3.4029,
2153
- "eval_samples_per_second": 8.816,
2154
- "eval_steps_per_second": 1.175,
2155
- "step": 143
2156
- },
2157
- {
2158
- "epoch": 7.578947368421053,
2159
- "grad_norm": 0.3493349254131317,
2160
- "learning_rate": 0.0002150228680081079,
2161
- "loss": 0.0229,
2162
- "step": 144
2163
- },
2164
- {
2165
- "epoch": 7.578947368421053,
2166
- "eval_loss": 0.025978045538067818,
2167
- "eval_runtime": 3.4085,
2168
- "eval_samples_per_second": 8.801,
2169
- "eval_steps_per_second": 1.174,
2170
- "step": 144
2171
- },
2172
- {
2173
- "epoch": 7.631578947368421,
2174
- "grad_norm": 0.47219786047935486,
2175
- "learning_rate": 0.00021171684382123,
2176
- "loss": 0.0405,
2177
- "step": 145
2178
- },
2179
- {
2180
- "epoch": 7.631578947368421,
2181
- "eval_loss": 0.02607414871454239,
2182
- "eval_runtime": 3.3976,
2183
- "eval_samples_per_second": 8.83,
2184
- "eval_steps_per_second": 1.177,
2185
- "step": 145
2186
- },
2187
- {
2188
- "epoch": 7.684210526315789,
2189
- "grad_norm": 0.3866257965564728,
2190
- "learning_rate": 0.0002084176613542175,
2191
- "loss": 0.028,
2192
- "step": 146
2193
- },
2194
- {
2195
- "epoch": 7.684210526315789,
2196
- "eval_loss": 0.02371269464492798,
2197
- "eval_runtime": 3.3925,
2198
- "eval_samples_per_second": 8.843,
2199
- "eval_steps_per_second": 1.179,
2200
- "step": 146
2201
- },
2202
- {
2203
- "epoch": 7.7368421052631575,
2204
- "grad_norm": 0.6331397891044617,
2205
- "learning_rate": 0.00020512591021577773,
2206
- "loss": 0.0501,
2207
- "step": 147
2208
- },
2209
- {
2210
- "epoch": 7.7368421052631575,
2211
- "eval_loss": 0.024511611089110374,
2212
- "eval_runtime": 3.3882,
2213
- "eval_samples_per_second": 8.854,
2214
- "eval_steps_per_second": 1.181,
2215
- "step": 147
2216
- },
2217
- {
2218
- "epoch": 7.7894736842105265,
2219
- "grad_norm": 0.45335713028907776,
2220
- "learning_rate": 0.00020184217868653867,
2221
- "loss": 0.0279,
2222
- "step": 148
2223
- },
2224
- {
2225
- "epoch": 7.7894736842105265,
2226
- "eval_loss": 0.02352703921496868,
2227
- "eval_runtime": 3.3857,
2228
- "eval_samples_per_second": 8.861,
2229
- "eval_steps_per_second": 1.181,
2230
- "step": 148
2231
- },
2232
- {
2233
- "epoch": 7.842105263157895,
2234
- "grad_norm": 0.4972074627876282,
2235
- "learning_rate": 0.0001985670536139151,
2236
- "loss": 0.033,
2237
- "step": 149
2238
- },
2239
- {
2240
- "epoch": 7.842105263157895,
2241
- "eval_loss": 0.02143845707178116,
2242
- "eval_runtime": 3.3819,
2243
- "eval_samples_per_second": 8.871,
2244
- "eval_steps_per_second": 1.183,
2245
- "step": 149
2246
- },
2247
- {
2248
- "epoch": 7.894736842105263,
2249
- "grad_norm": 0.352267861366272,
2250
- "learning_rate": 0.0001953011203072312,
2251
- "loss": 0.0337,
2252
- "step": 150
2253
- },
2254
- {
2255
- "epoch": 7.894736842105263,
2256
- "eval_loss": 0.02080574445426464,
2257
- "eval_runtime": 3.3774,
2258
- "eval_samples_per_second": 8.883,
2259
- "eval_steps_per_second": 1.184,
2260
- "step": 150
2261
- },
2262
- {
2263
- "epoch": 7.947368421052632,
2264
- "grad_norm": 0.5821244120597839,
2265
- "learning_rate": 0.00019204496243311792,
2266
- "loss": 0.0634,
2267
- "step": 151
2268
- },
2269
- {
2270
- "epoch": 7.947368421052632,
2271
- "eval_loss": 0.016822049394249916,
2272
- "eval_runtime": 3.3726,
2273
- "eval_samples_per_second": 8.895,
2274
- "eval_steps_per_second": 1.186,
2275
- "step": 151
2276
- },
2277
- {
2278
- "epoch": 8.0,
2279
- "grad_norm": 0.38316017389297485,
2280
- "learning_rate": 0.00018879916191120349,
2281
- "loss": 0.0422,
2282
- "step": 152
2283
- },
2284
- {
2285
- "epoch": 8.0,
2286
- "eval_loss": 0.013590247370302677,
2287
- "eval_runtime": 3.3896,
2288
- "eval_samples_per_second": 8.851,
2289
- "eval_steps_per_second": 1.18,
2290
- "step": 152
2291
- },
2292
- {
2293
- "epoch": 8.052631578947368,
2294
- "grad_norm": 0.10196978598833084,
2295
- "learning_rate": 0.00018556429881011656,
2296
- "loss": 0.0059,
2297
- "step": 153
2298
- },
2299
- {
2300
- "epoch": 8.052631578947368,
2301
- "eval_loss": 0.013392569497227669,
2302
- "eval_runtime": 3.3989,
2303
- "eval_samples_per_second": 8.826,
2304
- "eval_steps_per_second": 1.177,
2305
- "step": 153
2306
- },
2307
- {
2308
- "epoch": 8.105263157894736,
2309
- "grad_norm": 0.13090473413467407,
2310
- "learning_rate": 0.0001823409512438203,
2311
- "loss": 0.0097,
2312
- "step": 154
2313
- },
2314
- {
2315
- "epoch": 8.105263157894736,
2316
- "eval_loss": 0.013524877838790417,
2317
- "eval_runtime": 3.4041,
2318
- "eval_samples_per_second": 8.813,
2319
- "eval_steps_per_second": 1.175,
2320
- "step": 154
2321
- },
2322
- {
2323
- "epoch": 8.157894736842104,
2324
- "grad_norm": 0.08641204982995987,
2325
- "learning_rate": 0.00017912969526829559,
2326
- "loss": 0.0046,
2327
- "step": 155
2328
- },
2329
- {
2330
- "epoch": 8.157894736842104,
2331
- "eval_loss": 0.014381513930857182,
2332
- "eval_runtime": 3.4025,
2333
- "eval_samples_per_second": 8.817,
2334
- "eval_steps_per_second": 1.176,
2335
- "step": 155
2336
- },
2337
- {
2338
- "epoch": 8.210526315789474,
2339
- "grad_norm": 0.2780037522315979,
2340
- "learning_rate": 0.00017593110477859153,
2341
- "loss": 0.0212,
2342
- "step": 156
2343
- },
2344
- {
2345
- "epoch": 8.210526315789474,
2346
- "eval_loss": 0.01327499095350504,
2347
- "eval_runtime": 3.3964,
2348
- "eval_samples_per_second": 8.833,
2349
- "eval_steps_per_second": 1.178,
2350
- "step": 156
2351
- },
2352
- {
2353
- "epoch": 8.263157894736842,
2354
- "grad_norm": 0.31865352392196655,
2355
- "learning_rate": 0.00017274575140626317,
2356
- "loss": 0.0212,
2357
- "step": 157
2358
- },
2359
- {
2360
- "epoch": 8.263157894736842,
2361
- "eval_loss": 0.012452797032892704,
2362
- "eval_runtime": 3.3903,
2363
- "eval_samples_per_second": 8.849,
2364
- "eval_steps_per_second": 1.18,
2365
- "step": 157
2366
- },
2367
- {
2368
- "epoch": 8.31578947368421,
2369
- "grad_norm": 0.35457736253738403,
2370
- "learning_rate": 0.00016957420441721284,
2371
- "loss": 0.0098,
2372
- "step": 158
2373
- },
2374
- {
2375
- "epoch": 8.31578947368421,
2376
- "eval_loss": 0.011020404286682606,
2377
- "eval_runtime": 3.3806,
2378
- "eval_samples_per_second": 8.874,
2379
- "eval_steps_per_second": 1.183,
2380
- "step": 158
2381
- },
2382
- {
2383
- "epoch": 8.368421052631579,
2384
- "grad_norm": 0.3316934108734131,
2385
- "learning_rate": 0.00016641703060995457,
2386
- "loss": 0.022,
2387
- "step": 159
2388
- },
2389
- {
2390
- "epoch": 8.368421052631579,
2391
- "eval_loss": 0.010090429335832596,
2392
- "eval_runtime": 3.3847,
2393
- "eval_samples_per_second": 8.863,
2394
- "eval_steps_per_second": 1.182,
2395
- "step": 159
2396
- },
2397
- {
2398
- "epoch": 8.421052631578947,
2399
- "grad_norm": 0.2943498492240906,
2400
- "learning_rate": 0.00016327479421431983,
2401
- "loss": 0.0208,
2402
- "step": 160
2403
- },
2404
- {
2405
- "epoch": 8.421052631578947,
2406
- "eval_loss": 0.008338144980370998,
2407
- "eval_runtime": 3.3812,
2408
- "eval_samples_per_second": 8.872,
2409
- "eval_steps_per_second": 1.183,
2410
- "step": 160
2411
- },
2412
- {
2413
- "epoch": 8.473684210526315,
2414
- "grad_norm": 0.2523714303970337,
2415
- "learning_rate": 0.00016014805679062183,
2416
- "loss": 0.017,
2417
- "step": 161
2418
- },
2419
- {
2420
- "epoch": 8.473684210526315,
2421
- "eval_loss": 0.007794048171490431,
2422
- "eval_runtime": 3.379,
2423
- "eval_samples_per_second": 8.878,
2424
- "eval_steps_per_second": 1.184,
2425
- "step": 161
2426
- },
2427
- {
2428
- "epoch": 8.526315789473685,
2429
- "grad_norm": 0.45208269357681274,
2430
- "learning_rate": 0.0001570373771292967,
2431
- "loss": 0.0161,
2432
- "step": 162
2433
- },
2434
- {
2435
- "epoch": 8.526315789473685,
2436
- "eval_loss": 0.00768243195489049,
2437
- "eval_runtime": 3.4065,
2438
- "eval_samples_per_second": 8.807,
2439
- "eval_steps_per_second": 1.174,
2440
- "step": 162
2441
- },
2442
- {
2443
- "epoch": 8.578947368421053,
2444
- "grad_norm": 0.20215876400470734,
2445
- "learning_rate": 0.00015394331115104075,
2446
- "loss": 0.0143,
2447
- "step": 163
2448
- },
2449
- {
2450
- "epoch": 8.578947368421053,
2451
- "eval_loss": 0.007556635420769453,
2452
- "eval_runtime": 3.4167,
2453
- "eval_samples_per_second": 8.78,
2454
- "eval_steps_per_second": 1.171,
2455
- "step": 163
2456
- },
2457
- {
2458
- "epoch": 8.631578947368421,
2459
- "grad_norm": 0.4235493540763855,
2460
- "learning_rate": 0.00015086641180745932,
2461
- "loss": 0.02,
2462
- "step": 164
2463
- },
2464
- {
2465
- "epoch": 8.631578947368421,
2466
- "eval_loss": 0.008587359450757504,
2467
- "eval_runtime": 3.4278,
2468
- "eval_samples_per_second": 8.752,
2469
- "eval_steps_per_second": 1.167,
2470
- "step": 164
2471
- },
2472
- {
2473
- "epoch": 8.68421052631579,
2474
- "grad_norm": 0.3459453582763672,
2475
- "learning_rate": 0.00014780722898224708,
2476
- "loss": 0.0156,
2477
- "step": 165
2478
- },
2479
- {
2480
- "epoch": 8.68421052631579,
2481
- "eval_loss": 0.009687132202088833,
2482
- "eval_runtime": 3.4089,
2483
- "eval_samples_per_second": 8.801,
2484
- "eval_steps_per_second": 1.173,
2485
- "step": 165
2486
- },
2487
- {
2488
- "epoch": 8.736842105263158,
2489
- "grad_norm": 0.46791887283325195,
2490
- "learning_rate": 0.0001447663093929163,
2491
- "loss": 0.0254,
2492
- "step": 166
2493
- },
2494
- {
2495
- "epoch": 8.736842105263158,
2496
- "eval_loss": 0.005360104609280825,
2497
- "eval_runtime": 3.4012,
2498
- "eval_samples_per_second": 8.82,
2499
- "eval_steps_per_second": 1.176,
2500
- "step": 166
2501
- },
2502
- {
2503
- "epoch": 8.789473684210526,
2504
- "grad_norm": 0.32888978719711304,
2505
- "learning_rate": 0.00014174419649309089,
2506
- "loss": 0.0193,
2507
- "step": 167
2508
- },
2509
- {
2510
- "epoch": 8.789473684210526,
2511
- "eval_loss": 0.005120207089930773,
2512
- "eval_runtime": 3.393,
2513
- "eval_samples_per_second": 8.842,
2514
- "eval_steps_per_second": 1.179,
2515
- "step": 167
2516
- },
2517
- {
2518
- "epoch": 8.842105263157894,
2519
- "grad_norm": 0.36381661891937256,
2520
- "learning_rate": 0.00013874143037538418,
2521
- "loss": 0.0115,
2522
- "step": 168
2523
- },
2524
- {
2525
- "epoch": 8.842105263157894,
2526
- "eval_loss": 0.005558122880756855,
2527
- "eval_runtime": 3.397,
2528
- "eval_samples_per_second": 8.831,
2529
- "eval_steps_per_second": 1.178,
2530
- "step": 168
2531
- },
2532
- {
2533
- "epoch": 8.894736842105264,
2534
- "grad_norm": 0.15391984581947327,
2535
- "learning_rate": 0.0001357585476748766,
2536
- "loss": 0.0077,
2537
- "step": 169
2538
- },
2539
- {
2540
- "epoch": 8.894736842105264,
2541
- "eval_loss": 0.006869714241474867,
2542
- "eval_runtime": 3.392,
2543
- "eval_samples_per_second": 8.844,
2544
- "eval_steps_per_second": 1.179,
2545
- "step": 169
2546
- },
2547
- {
2548
- "epoch": 8.947368421052632,
2549
- "grad_norm": 0.3248370289802551,
2550
- "learning_rate": 0.00013279608147321223,
2551
- "loss": 0.0066,
2552
- "step": 170
2553
- },
2554
- {
2555
- "epoch": 8.947368421052632,
2556
- "eval_loss": 0.00868891179561615,
2557
- "eval_runtime": 3.391,
2558
- "eval_samples_per_second": 8.847,
2559
- "eval_steps_per_second": 1.18,
2560
- "step": 170
2561
- },
2562
- {
2563
- "epoch": 9.0,
2564
- "grad_norm": 0.22031106054782867,
2565
- "learning_rate": 0.00012985456120332905,
2566
- "loss": 0.0103,
2567
- "step": 171
2568
- },
2569
- {
2570
- "epoch": 9.0,
2571
- "eval_loss": 0.009011917747557163,
2572
- "eval_runtime": 3.3794,
2573
- "eval_samples_per_second": 8.877,
2574
- "eval_steps_per_second": 1.184,
2575
- "step": 171
2576
- },
2577
- {
2578
- "epoch": 9.052631578947368,
2579
- "grad_norm": 0.19521355628967285,
2580
- "learning_rate": 0.00012693451255484312,
2581
- "loss": 0.0035,
2582
- "step": 172
2583
- },
2584
- {
2585
- "epoch": 9.052631578947368,
2586
- "eval_loss": 0.0076974560506641865,
2587
- "eval_runtime": 3.3987,
2588
- "eval_samples_per_second": 8.827,
2589
- "eval_steps_per_second": 1.177,
2590
- "step": 172
2591
- },
2592
- {
2593
- "epoch": 9.105263157894736,
2594
- "grad_norm": 0.22541294991970062,
2595
- "learning_rate": 0.00012403645738009997,
2596
- "loss": 0.0089,
2597
- "step": 173
2598
- },
2599
- {
2600
- "epoch": 9.105263157894736,
2601
- "eval_loss": 0.004544786177575588,
2602
- "eval_runtime": 3.41,
2603
- "eval_samples_per_second": 8.798,
2604
- "eval_steps_per_second": 1.173,
2605
- "step": 173
2606
- },
2607
- {
2608
- "epoch": 9.157894736842104,
2609
- "grad_norm": 0.1337708979845047,
2610
- "learning_rate": 0.00012116091360091261,
2611
- "loss": 0.005,
2612
- "step": 174
2613
- },
2614
- {
2615
- "epoch": 9.157894736842104,
2616
- "eval_loss": 0.0032975501380860806,
2617
- "eval_runtime": 3.4164,
2618
- "eval_samples_per_second": 8.781,
2619
- "eval_steps_per_second": 1.171,
2620
- "step": 174
2621
- },
2622
- {
2623
- "epoch": 9.210526315789474,
2624
- "grad_norm": 0.08104129135608673,
2625
- "learning_rate": 0.00011830839511600211,
2626
- "loss": 0.0028,
2627
- "step": 175
2628
- },
2629
- {
2630
- "epoch": 9.210526315789474,
2631
- "eval_loss": 0.002934858202934265,
2632
- "eval_runtime": 3.4116,
2633
- "eval_samples_per_second": 8.793,
2634
- "eval_steps_per_second": 1.172,
2635
- "step": 175
2636
- },
2637
- {
2638
- "epoch": 9.263157894736842,
2639
- "grad_norm": 0.03561758995056152,
2640
- "learning_rate": 0.00011547941170915685,
2641
- "loss": 0.0017,
2642
- "step": 176
2643
- },
2644
- {
2645
- "epoch": 9.263157894736842,
2646
- "eval_loss": 0.0032255654223263264,
2647
- "eval_runtime": 3.4066,
2648
- "eval_samples_per_second": 8.806,
2649
- "eval_steps_per_second": 1.174,
2650
- "step": 176
2651
- },
2652
- {
2653
- "epoch": 9.31578947368421,
2654
- "grad_norm": 0.11387041211128235,
2655
- "learning_rate": 0.00011267446895812702,
2656
- "loss": 0.0044,
2657
- "step": 177
2658
- },
2659
- {
2660
- "epoch": 9.31578947368421,
2661
- "eval_loss": 0.003973633516579866,
2662
- "eval_runtime": 3.4044,
2663
- "eval_samples_per_second": 8.812,
2664
- "eval_steps_per_second": 1.175,
2665
- "step": 177
2666
- },
2667
- {
2668
- "epoch": 9.368421052631579,
2669
- "grad_norm": 0.19635799527168274,
2670
- "learning_rate": 0.0001098940681442713,
2671
- "loss": 0.0085,
2672
- "step": 178
2673
- },
2674
- {
2675
- "epoch": 9.368421052631579,
2676
- "eval_loss": 0.004082486033439636,
2677
- "eval_runtime": 3.3977,
2678
- "eval_samples_per_second": 8.83,
2679
- "eval_steps_per_second": 1.177,
2680
- "step": 178
2681
- },
2682
- {
2683
- "epoch": 9.421052631578947,
2684
- "grad_norm": 0.16695837676525116,
2685
- "learning_rate": 0.00010713870616297092,
2686
- "loss": 0.0041,
2687
- "step": 179
2688
- },
2689
- {
2690
- "epoch": 9.421052631578947,
2691
- "eval_loss": 0.004096930380910635,
2692
- "eval_runtime": 3.3946,
2693
- "eval_samples_per_second": 8.838,
2694
- "eval_steps_per_second": 1.178,
2695
- "step": 179
2696
- },
2697
- {
2698
- "epoch": 9.473684210526315,
2699
- "grad_norm": 0.10704478621482849,
2700
- "learning_rate": 0.00010440887543482746,
2701
- "loss": 0.0061,
2702
- "step": 180
2703
- },
2704
- {
2705
- "epoch": 9.473684210526315,
2706
- "eval_loss": 0.003820668673142791,
2707
- "eval_runtime": 3.3815,
2708
- "eval_samples_per_second": 8.872,
2709
- "eval_steps_per_second": 1.183,
2710
- "step": 180
2711
- },
2712
- {
2713
- "epoch": 9.526315789473685,
2714
- "grad_norm": 0.15356966853141785,
2715
- "learning_rate": 0.0001017050638176612,
2716
- "loss": 0.0042,
2717
- "step": 181
2718
- },
2719
- {
2720
- "epoch": 9.526315789473685,
2721
- "eval_loss": 0.002816816559061408,
2722
- "eval_runtime": 3.3759,
2723
- "eval_samples_per_second": 8.887,
2724
- "eval_steps_per_second": 1.185,
2725
- "step": 181
2726
- },
2727
- {
2728
- "epoch": 9.578947368421053,
2729
- "grad_norm": 0.14018815755844116,
2730
- "learning_rate": 9.902775451932386e-05,
2731
- "loss": 0.0035,
2732
- "step": 182
2733
- },
2734
- {
2735
- "epoch": 9.578947368421053,
2736
- "eval_loss": 0.0024158721789717674,
2737
- "eval_runtime": 3.385,
2738
- "eval_samples_per_second": 8.863,
2739
- "eval_steps_per_second": 1.182,
2740
- "step": 182
2741
- },
2742
- {
2743
- "epoch": 9.631578947368421,
2744
- "grad_norm": 0.131745383143425,
2745
- "learning_rate": 9.637742601134286e-05,
2746
- "loss": 0.0072,
2747
- "step": 183
2748
- },
2749
- {
2750
- "epoch": 9.631578947368421,
2751
- "eval_loss": 0.002450426109135151,
2752
- "eval_runtime": 3.42,
2753
- "eval_samples_per_second": 8.772,
2754
- "eval_steps_per_second": 1.17,
2755
- "step": 183
2756
- },
2757
- {
2758
- "epoch": 9.68421052631579,
2759
- "grad_norm": 0.1151895672082901,
2760
- "learning_rate": 9.375455194341214e-05,
2761
- "loss": 0.0036,
2762
- "step": 184
2763
- },
2764
- {
2765
- "epoch": 9.68421052631579,
2766
- "eval_loss": 0.0024695699103176594,
2767
- "eval_runtime": 3.4266,
2768
- "eval_samples_per_second": 8.755,
2769
- "eval_steps_per_second": 1.167,
2770
- "step": 184
2771
- },
2772
- {
2773
- "epoch": 9.736842105263158,
2774
- "grad_norm": 0.10937950760126114,
2775
- "learning_rate": 9.11596010587441e-05,
2776
- "loss": 0.0069,
2777
- "step": 185
2778
- },
2779
- {
2780
- "epoch": 9.736842105263158,
2781
- "eval_loss": 0.002432518871501088,
2782
- "eval_runtime": 3.412,
2783
- "eval_samples_per_second": 8.792,
2784
- "eval_steps_per_second": 1.172,
2785
- "step": 185
2786
- },
2787
- {
2788
- "epoch": 9.789473684210526,
2789
- "grad_norm": 0.11447066813707352,
2790
- "learning_rate": 8.85930371102994e-05,
2791
- "loss": 0.009,
2792
- "step": 186
2793
- },
2794
- {
2795
- "epoch": 9.789473684210526,
2796
- "eval_loss": 0.0024716572370380163,
2797
- "eval_runtime": 3.4131,
2798
- "eval_samples_per_second": 8.79,
2799
- "eval_steps_per_second": 1.172,
2800
- "step": 186
2801
- },
2802
- {
2803
- "epoch": 9.842105263157894,
2804
- "grad_norm": 0.17368115484714508,
2805
- "learning_rate": 8.605531877790762e-05,
2806
- "loss": 0.0049,
2807
- "step": 187
2808
- },
2809
- {
2810
- "epoch": 9.842105263157894,
2811
- "eval_loss": 0.002253969432786107,
2812
- "eval_runtime": 3.4001,
2813
- "eval_samples_per_second": 8.823,
2814
- "eval_steps_per_second": 1.176,
2815
- "step": 187
2816
- },
2817
- {
2818
- "epoch": 9.894736842105264,
2819
- "grad_norm": 0.11908090114593506,
2820
- "learning_rate": 8.354689958629513e-05,
2821
- "loss": 0.0041,
2822
- "step": 188
2823
- },
2824
- {
2825
- "epoch": 9.894736842105264,
2826
- "eval_loss": 0.0017488420708104968,
2827
- "eval_runtime": 3.3957,
2828
- "eval_samples_per_second": 8.835,
2829
- "eval_steps_per_second": 1.178,
2830
- "step": 188
2831
- },
2832
- {
2833
- "epoch": 9.947368421052632,
2834
- "grad_norm": 0.022854585200548172,
2835
- "learning_rate": 8.106822782403376e-05,
2836
- "loss": 0.0009,
2837
- "step": 189
2838
- },
2839
- {
2840
- "epoch": 9.947368421052632,
2841
- "eval_loss": 0.0015173099236562848,
2842
- "eval_runtime": 3.3937,
2843
- "eval_samples_per_second": 8.84,
2844
- "eval_steps_per_second": 1.179,
2845
- "step": 189
2846
- },
2847
- {
2848
- "epoch": 10.0,
2849
- "grad_norm": 0.11229632049798965,
2850
- "learning_rate": 7.861974646342596e-05,
2851
- "loss": 0.0041,
2852
- "step": 190
2853
- },
2854
- {
2855
- "epoch": 10.0,
2856
- "eval_loss": 0.001307736849412322,
2857
- "eval_runtime": 3.3916,
2858
- "eval_samples_per_second": 8.845,
2859
- "eval_steps_per_second": 1.179,
2860
- "step": 190
2861
  }
2862
  ],
2863
  "logging_steps": 1,
@@ -2877,7 +2127,7 @@
2877
  "attributes": {}
2878
  }
2879
  },
2880
- "total_flos": 8096682647961600.0,
2881
  "train_batch_size": 1,
2882
  "trial_name": null,
2883
  "trial_params": null
 
1
  {
2
+ "best_global_step": 135,
3
+ "best_metric": 0.028628086671233177,
4
+ "best_model_checkpoint": "/content/drive/MyDrive/lora_model/outputs/task15_microsoft/Phi-4-mini-instruct/checkpoint-130",
5
+ "epoch": 7.368421052631579,
6
  "eval_steps": 1,
7
+ "global_step": 140,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2108
  "eval_samples_per_second": 8.845,
2109
  "eval_steps_per_second": 1.179,
2110
  "step": 140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2111
  }
2112
  ],
2113
  "logging_steps": 1,
 
2127
  "attributes": {}
2128
  }
2129
  },
2130
+ "total_flos": 5968350472955904.0,
2131
  "train_batch_size": 1,
2132
  "trial_name": null,
2133
  "trial_params": null