rootxhacker commited on
Commit
bc4b2a0
·
verified ·
1 Parent(s): 83930b4

Training in progress, step 15000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:048b14e8250392932daba4d22d09a460365951dbb6b68b5a41a1a0e5ce624541
3
  size 36730224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35e8b88b22335eff4aa95a5db7b6615364abd358d14317af2dec16bcdb2efc61
3
  size 36730224
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b29852e5d0ff827b5e17125c299364e88ca1c70914de6e2a1ee93e58f54b613
3
  size 73588346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8a93a0264eb6605e0d626be93ba5f8dee38234bcac32f7e6d5d1cccc2a1057f
3
  size 73588346
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8ea42701e6bd5369fc7af2447cde260122e2711abf45d46209ed918523c1a2c
3
- size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c298ef220b2440e2c8688c2918aba70637e9e1c8a2951767772c4969343135c8
3
+ size 14308
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bbaf929d345a2a40a03fb2b8f0c27dc126d0b7d5e2424d21837b1af94ede355e
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08eda93be407e2e8125df91c801018bcedfb4cde17a8cf02b9f76837f9ade1c1
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3969f570a31885b978deed67646d88ac34c614ef8cb8214e9d16328676e8af9
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a00183886b48331e49cfb9b953bfa8a92696629d3ac41c1b27b8636569368855
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 13750,
3
- "best_metric": 1.5006794929504395,
4
  "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-13000",
5
- "epoch": 1.1152988231674485,
6
  "eval_steps": 250,
7
- "global_step": 14500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2502,6 +2502,92 @@
2502
  "eval_samples_per_second": 55.51,
2503
  "eval_steps_per_second": 13.877,
2504
  "step": 14500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2505
  }
2506
  ],
2507
  "logging_steps": 50,
 
1
  {
2
+ "best_global_step": 14750,
3
+ "best_metric": 1.4990500211715698,
4
  "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-13000",
5
+ "epoch": 1.153757403276671,
6
  "eval_steps": 250,
7
+ "global_step": 15000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2502
  "eval_samples_per_second": 55.51,
2503
  "eval_steps_per_second": 13.877,
2504
  "step": 14500
2505
+ },
2506
+ {
2507
+ "epoch": 1.119144681178371,
2508
+ "grad_norm": 1.9716360569000244,
2509
+ "learning_rate": 6.568434984007941e-05,
2510
+ "loss": 1.5349,
2511
+ "step": 14550
2512
+ },
2513
+ {
2514
+ "epoch": 1.1229905391892931,
2515
+ "grad_norm": 0.710033655166626,
2516
+ "learning_rate": 6.542700636006029e-05,
2517
+ "loss": 1.4107,
2518
+ "step": 14600
2519
+ },
2520
+ {
2521
+ "epoch": 1.1268363972002153,
2522
+ "grad_norm": 1.4398375749588013,
2523
+ "learning_rate": 6.516966288004117e-05,
2524
+ "loss": 1.4185,
2525
+ "step": 14650
2526
+ },
2527
+ {
2528
+ "epoch": 1.1306822552111375,
2529
+ "grad_norm": 2.5566532611846924,
2530
+ "learning_rate": 6.491231940002206e-05,
2531
+ "loss": 1.5758,
2532
+ "step": 14700
2533
+ },
2534
+ {
2535
+ "epoch": 1.13452811322206,
2536
+ "grad_norm": 1.2500799894332886,
2537
+ "learning_rate": 6.465497592000294e-05,
2538
+ "loss": 1.4751,
2539
+ "step": 14750
2540
+ },
2541
+ {
2542
+ "epoch": 1.13452811322206,
2543
+ "eval_loss": 1.4990500211715698,
2544
+ "eval_runtime": 17.9979,
2545
+ "eval_samples_per_second": 55.562,
2546
+ "eval_steps_per_second": 13.891,
2547
+ "step": 14750
2548
+ },
2549
+ {
2550
+ "epoch": 1.1383739712329821,
2551
+ "grad_norm": 1.5937495231628418,
2552
+ "learning_rate": 6.439763243998382e-05,
2553
+ "loss": 1.5215,
2554
+ "step": 14800
2555
+ },
2556
+ {
2557
+ "epoch": 1.1422198292439043,
2558
+ "grad_norm": 1.362358570098877,
2559
+ "learning_rate": 6.41402889599647e-05,
2560
+ "loss": 1.5125,
2561
+ "step": 14850
2562
+ },
2563
+ {
2564
+ "epoch": 1.1460656872548265,
2565
+ "grad_norm": 2.1192502975463867,
2566
+ "learning_rate": 6.388294547994558e-05,
2567
+ "loss": 1.4485,
2568
+ "step": 14900
2569
+ },
2570
+ {
2571
+ "epoch": 1.149911545265749,
2572
+ "grad_norm": 1.4089174270629883,
2573
+ "learning_rate": 6.362560199992647e-05,
2574
+ "loss": 1.5331,
2575
+ "step": 14950
2576
+ },
2577
+ {
2578
+ "epoch": 1.153757403276671,
2579
+ "grad_norm": 1.3750373125076294,
2580
+ "learning_rate": 6.336825851990735e-05,
2581
+ "loss": 1.5177,
2582
+ "step": 15000
2583
+ },
2584
+ {
2585
+ "epoch": 1.153757403276671,
2586
+ "eval_loss": 1.5118192434310913,
2587
+ "eval_runtime": 17.9213,
2588
+ "eval_samples_per_second": 55.799,
2589
+ "eval_steps_per_second": 13.95,
2590
+ "step": 15000
2591
  }
2592
  ],
2593
  "logging_steps": 50,