Kudod commited on
Commit
2f8e4c4
·
verified ·
1 Parent(s): f69b099

Training in progress, step 160000, checkpoint

Browse files
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4736a53f23b2d9813cdb31f71244daee8a9d4f05d12eb17bb18c233756dd0c26
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3fd70c2ee9103dacc2e8460e11a544a0da08573c15a524763aa473249698e80
3
  size 14244
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 3.7246722288438616,
6
  "eval_steps": 5000,
7
- "global_step": 150000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2348,6 +2348,162 @@
2348
  "eval_samples_per_second": 440.711,
2349
  "eval_steps_per_second": 13.772,
2350
  "step": 150000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2351
  }
2352
  ],
2353
  "logging_steps": 500,
@@ -2367,7 +2523,7 @@
2367
  "attributes": {}
2368
  }
2369
  },
2370
- "total_flos": 3.185652398278902e+17,
2371
  "train_batch_size": 32,
2372
  "trial_name": null,
2373
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 3.972983710766786,
6
  "eval_steps": 5000,
7
+ "global_step": 160000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2348
  "eval_samples_per_second": 440.711,
2349
  "eval_steps_per_second": 13.772,
2350
  "step": 150000
2351
+ },
2352
+ {
2353
+ "epoch": 3.737087802940008,
2354
+ "grad_norm": NaN,
2355
+ "learning_rate": 0.0004900742091849483,
2356
+ "loss": 0.0,
2357
+ "step": 150500
2358
+ },
2359
+ {
2360
+ "epoch": 3.749503377036154,
2361
+ "grad_norm": NaN,
2362
+ "learning_rate": 0.0004900742091849483,
2363
+ "loss": 0.0,
2364
+ "step": 151000
2365
+ },
2366
+ {
2367
+ "epoch": 3.7619189511323006,
2368
+ "grad_norm": NaN,
2369
+ "learning_rate": 0.0004900742091849483,
2370
+ "loss": 0.0,
2371
+ "step": 151500
2372
+ },
2373
+ {
2374
+ "epoch": 3.7743345252284466,
2375
+ "grad_norm": NaN,
2376
+ "learning_rate": 0.0004900742091849483,
2377
+ "loss": 0.0,
2378
+ "step": 152000
2379
+ },
2380
+ {
2381
+ "epoch": 3.7867500993245926,
2382
+ "grad_norm": NaN,
2383
+ "learning_rate": 0.0004900742091849483,
2384
+ "loss": 0.0,
2385
+ "step": 152500
2386
+ },
2387
+ {
2388
+ "epoch": 3.799165673420739,
2389
+ "grad_norm": NaN,
2390
+ "learning_rate": 0.0004900742091849483,
2391
+ "loss": 0.0,
2392
+ "step": 153000
2393
+ },
2394
+ {
2395
+ "epoch": 3.811581247516885,
2396
+ "grad_norm": NaN,
2397
+ "learning_rate": 0.0004900742091849483,
2398
+ "loss": 0.0,
2399
+ "step": 153500
2400
+ },
2401
+ {
2402
+ "epoch": 3.823996821613031,
2403
+ "grad_norm": NaN,
2404
+ "learning_rate": 0.0004900742091849483,
2405
+ "loss": 0.0,
2406
+ "step": 154000
2407
+ },
2408
+ {
2409
+ "epoch": 3.8364123957091776,
2410
+ "grad_norm": NaN,
2411
+ "learning_rate": 0.0004900742091849483,
2412
+ "loss": 0.0,
2413
+ "step": 154500
2414
+ },
2415
+ {
2416
+ "epoch": 3.848827969805324,
2417
+ "grad_norm": NaN,
2418
+ "learning_rate": 0.0004900742091849483,
2419
+ "loss": 0.0,
2420
+ "step": 155000
2421
+ },
2422
+ {
2423
+ "epoch": 3.848827969805324,
2424
+ "eval_loss": NaN,
2425
+ "eval_runtime": 2929.1046,
2426
+ "eval_samples_per_second": 439.961,
2427
+ "eval_steps_per_second": 13.749,
2428
+ "step": 155000
2429
+ },
2430
+ {
2431
+ "epoch": 3.86124354390147,
2432
+ "grad_norm": NaN,
2433
+ "learning_rate": 0.0004900742091849483,
2434
+ "loss": 0.0,
2435
+ "step": 155500
2436
+ },
2437
+ {
2438
+ "epoch": 3.873659117997616,
2439
+ "grad_norm": NaN,
2440
+ "learning_rate": 0.0004900742091849483,
2441
+ "loss": 0.0,
2442
+ "step": 156000
2443
+ },
2444
+ {
2445
+ "epoch": 3.8860746920937626,
2446
+ "grad_norm": NaN,
2447
+ "learning_rate": 0.0004900742091849483,
2448
+ "loss": 0.0,
2449
+ "step": 156500
2450
+ },
2451
+ {
2452
+ "epoch": 3.8984902661899086,
2453
+ "grad_norm": NaN,
2454
+ "learning_rate": 0.0004900742091849483,
2455
+ "loss": 0.0,
2456
+ "step": 157000
2457
+ },
2458
+ {
2459
+ "epoch": 3.9109058402860546,
2460
+ "grad_norm": NaN,
2461
+ "learning_rate": 0.0004900742091849483,
2462
+ "loss": 0.0,
2463
+ "step": 157500
2464
+ },
2465
+ {
2466
+ "epoch": 3.923321414382201,
2467
+ "grad_norm": NaN,
2468
+ "learning_rate": 0.0004900742091849483,
2469
+ "loss": 0.0,
2470
+ "step": 158000
2471
+ },
2472
+ {
2473
+ "epoch": 3.935736988478347,
2474
+ "grad_norm": NaN,
2475
+ "learning_rate": 0.0004900742091849483,
2476
+ "loss": 0.0,
2477
+ "step": 158500
2478
+ },
2479
+ {
2480
+ "epoch": 3.9481525625744935,
2481
+ "grad_norm": NaN,
2482
+ "learning_rate": 0.0004900742091849483,
2483
+ "loss": 0.0,
2484
+ "step": 159000
2485
+ },
2486
+ {
2487
+ "epoch": 3.9605681366706396,
2488
+ "grad_norm": NaN,
2489
+ "learning_rate": 0.0004900742091849483,
2490
+ "loss": 0.0,
2491
+ "step": 159500
2492
+ },
2493
+ {
2494
+ "epoch": 3.972983710766786,
2495
+ "grad_norm": NaN,
2496
+ "learning_rate": 0.0004900742091849483,
2497
+ "loss": 0.0,
2498
+ "step": 160000
2499
+ },
2500
+ {
2501
+ "epoch": 3.972983710766786,
2502
+ "eval_loss": NaN,
2503
+ "eval_runtime": 2925.5422,
2504
+ "eval_samples_per_second": 440.496,
2505
+ "eval_steps_per_second": 13.766,
2506
+ "step": 160000
2507
  }
2508
  ],
2509
  "logging_steps": 500,
 
2523
  "attributes": {}
2524
  }
2525
  },
2526
+ "total_flos": 3.3978863868862464e+17,
2527
  "train_batch_size": 32,
2528
  "trial_name": null,
2529
  "trial_params": null