irishprancer commited on
Commit
ba4f8f2
·
verified ·
1 Parent(s): 7fc12ed

Training in progress, step 3300, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:850413a014ce1afd15263ab6c3f76035a2a29120ea6d95d7c7a3b16fe27706ee
3
  size 1482788592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de70de6b2da329ca8f9cd9782e5a8c63d35eb534a3da73d45c4d69f261b03dcd
3
  size 1482788592
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81f1e42b57804054b595cc72714a0e29484e0a4f3f3db1a9b40226e25e29ae9d
3
- size 2897966842
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68600c4e47ab0d9fdeb2af5cf775c372faa1ed05d5bd6f5ca06681a6894cec35
3
+ size 2897966906
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48324ba9470305ba9a4f19447fdf98548c8bfaa12e85cde93aac4b610d7b8e47
3
  size 14180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17668b377d77858255543f365889c1f2a879ebada82f52b754b1ab5a50728344
3
  size 14180
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63611ac194f86478fc723503311fa2cab6531bc9b416ef9b084dffa82990342b
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5e8d6bc6f3de9c0fa44c9cf11dc26e20febd8fd847e70f98451b9a254aee2c2
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.000785625830758363,
3
  "best_model_checkpoint": "./output/checkpoint-2400",
4
- "epoch": 2.4725274725274726,
5
  "eval_steps": 150,
6
- "global_step": 3150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2380,6 +2380,119 @@
2380
  "eval_samples_per_second": 9.443,
2381
  "eval_steps_per_second": 9.443,
2382
  "step": 3150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2383
  }
2384
  ],
2385
  "logging_steps": 10,
@@ -2399,7 +2512,7 @@
2399
  "attributes": {}
2400
  }
2401
  },
2402
- "total_flos": 2.7840567611990016e+17,
2403
  "train_batch_size": 16,
2404
  "trial_name": null,
2405
  "trial_params": null
 
1
  {
2
  "best_metric": 0.000785625830758363,
3
  "best_model_checkpoint": "./output/checkpoint-2400",
4
+ "epoch": 2.5902668759811616,
5
  "eval_steps": 150,
6
+ "global_step": 3300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2380
  "eval_samples_per_second": 9.443,
2381
  "eval_steps_per_second": 9.443,
2382
  "step": 3150
2383
+ },
2384
+ {
2385
+ "epoch": 2.4803767660910516,
2386
+ "grad_norm": 0.06127491593360901,
2387
+ "learning_rate": 2.373287048582997e-06,
2388
+ "loss": 0.0,
2389
+ "step": 3160
2390
+ },
2391
+ {
2392
+ "epoch": 2.488226059654631,
2393
+ "grad_norm": 0.0004414320283103734,
2394
+ "learning_rate": 2.3505841745683646e-06,
2395
+ "loss": 0.0022,
2396
+ "step": 3170
2397
+ },
2398
+ {
2399
+ "epoch": 2.4960753532182105,
2400
+ "grad_norm": 0.027753857895731926,
2401
+ "learning_rate": 2.3279423321670197e-06,
2402
+ "loss": 0.0,
2403
+ "step": 3180
2404
+ },
2405
+ {
2406
+ "epoch": 2.5039246467817895,
2407
+ "grad_norm": 0.0014922439586371183,
2408
+ "learning_rate": 2.305362452096422e-06,
2409
+ "loss": 0.0,
2410
+ "step": 3190
2411
+ },
2412
+ {
2413
+ "epoch": 2.511773940345369,
2414
+ "grad_norm": 0.0009659648058004677,
2415
+ "learning_rate": 2.2828454625270022e-06,
2416
+ "loss": 0.0001,
2417
+ "step": 3200
2418
+ },
2419
+ {
2420
+ "epoch": 2.5196232339089484,
2421
+ "grad_norm": 0.0004056979960296303,
2422
+ "learning_rate": 2.260392289044009e-06,
2423
+ "loss": 0.0,
2424
+ "step": 3210
2425
+ },
2426
+ {
2427
+ "epoch": 2.5274725274725274,
2428
+ "grad_norm": 0.011858894489705563,
2429
+ "learning_rate": 2.2380038546094595e-06,
2430
+ "loss": 0.0,
2431
+ "step": 3220
2432
+ },
2433
+ {
2434
+ "epoch": 2.535321821036107,
2435
+ "grad_norm": 0.15677396953105927,
2436
+ "learning_rate": 2.2156810795242044e-06,
2437
+ "loss": 0.0,
2438
+ "step": 3230
2439
+ },
2440
+ {
2441
+ "epoch": 2.5431711145996863,
2442
+ "grad_norm": 0.0002763714292086661,
2443
+ "learning_rate": 2.1934248813900955e-06,
2444
+ "loss": 0.0,
2445
+ "step": 3240
2446
+ },
2447
+ {
2448
+ "epoch": 2.5510204081632653,
2449
+ "grad_norm": 0.000499962130561471,
2450
+ "learning_rate": 2.1712361750722664e-06,
2451
+ "loss": 0.0,
2452
+ "step": 3250
2453
+ },
2454
+ {
2455
+ "epoch": 2.5588697017268447,
2456
+ "grad_norm": 0.0006597275496460497,
2457
+ "learning_rate": 2.1491158726615267e-06,
2458
+ "loss": 0.0,
2459
+ "step": 3260
2460
+ },
2461
+ {
2462
+ "epoch": 2.566718995290424,
2463
+ "grad_norm": 7.362648466369137e-05,
2464
+ "learning_rate": 2.127064883436869e-06,
2465
+ "loss": 0.0,
2466
+ "step": 3270
2467
+ },
2468
+ {
2469
+ "epoch": 2.574568288854003,
2470
+ "grad_norm": 0.010624408721923828,
2471
+ "learning_rate": 2.1050841138280927e-06,
2472
+ "loss": 0.0,
2473
+ "step": 3280
2474
+ },
2475
+ {
2476
+ "epoch": 2.5824175824175826,
2477
+ "grad_norm": 0.0006421171128749847,
2478
+ "learning_rate": 2.0831744673785437e-06,
2479
+ "loss": 0.0,
2480
+ "step": 3290
2481
+ },
2482
+ {
2483
+ "epoch": 2.5902668759811616,
2484
+ "grad_norm": 0.0017485865391790867,
2485
+ "learning_rate": 2.0613368447079754e-06,
2486
+ "loss": 0.0,
2487
+ "step": 3300
2488
+ },
2489
+ {
2490
+ "epoch": 2.5902668759811616,
2491
+ "eval_loss": 0.0013308656634762883,
2492
+ "eval_runtime": 52.675,
2493
+ "eval_samples_per_second": 9.492,
2494
+ "eval_steps_per_second": 9.492,
2495
+ "step": 3300
2496
  }
2497
  ],
2498
  "logging_steps": 10,
 
2512
  "attributes": {}
2513
  }
2514
  },
2515
+ "total_flos": 2.918295426915533e+17,
2516
  "train_batch_size": 16,
2517
  "trial_name": null,
2518
  "trial_params": null