azherali commited on
Commit
bded327
·
verified ·
1 Parent(s): 37bf1af

Training in progress, step 36000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f5cafd52307ebb0d4ad02660f4fa895f4176ab48ccb18d8698902129b3b1209
3
  size 3555504
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4a4b1c16a17272dd3d5d44145b877d0c14dc6e19c78bd41637a76bc87a0e7ea
3
  size 3555504
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26d3696bade1cccaee17e0a3be137fdd3e3380bab51d20a657d51f20328ef6ef
3
  size 7141515
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68ca690b9a0c619477d3038e9b1f2189091b893c636b800c671b19aeb6d28dba
3
  size 7141515
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:819540c05377c6a98947b46ab098a8f4456dd635badbf1ea38d292430af1a819
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d04e325551f27b0abb1677ad51b5250cc6db4cb37a98cc2cc710661788306e4
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d960d60d0c4ed45cc4422d14d1920d93529483656b6082492613ed8fd865f5f
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eba75c9103403d8e6ccb33e21824c3bb41c5512582b8b3e0415b8a731bc74943
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25361f5602fa6c5f9d3627497e44e73d38bd7e5fabd543717ca1fb3c15c5a95b
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bef14adf96a5be9d64a1df76f457ac5b2fcd08c7a9b48176945077bc67a3f2f
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 32000,
3
- "best_metric": 0.9879419419991998,
4
- "best_model_checkpoint": "CodeGenDetect-CodeBert_Lora/checkpoint-32000",
5
- "epoch": 1.024,
6
  "eval_steps": 4000,
7
- "global_step": 32000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2344,6 +2344,298 @@
2344
  "eval_samples_per_second": 128.957,
2345
  "eval_steps_per_second": 8.06,
2346
  "step": 32000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2347
  }
2348
  ],
2349
  "logging_steps": 100,
@@ -2372,7 +2664,7 @@
2372
  "attributes": {}
2373
  }
2374
  },
2375
- "total_flos": 1.3588330900845235e+17,
2376
  "train_batch_size": 16,
2377
  "trial_name": null,
2378
  "trial_params": null
 
1
  {
2
+ "best_global_step": 36000,
3
+ "best_metric": 0.9893807849919393,
4
+ "best_model_checkpoint": "CodeGenDetect-CodeBert_Lora/checkpoint-36000",
5
+ "epoch": 1.152,
6
  "eval_steps": 4000,
7
+ "global_step": 36000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2344
  "eval_samples_per_second": 128.957,
2345
  "eval_steps_per_second": 8.06,
2346
  "step": 32000
2347
+ },
2348
+ {
2349
+ "epoch": 1.0272,
2350
+ "grad_norm": 1.0557373762130737,
2351
+ "learning_rate": 1.5942343499197433e-05,
2352
+ "loss": 0.0594,
2353
+ "step": 32100
2354
+ },
2355
+ {
2356
+ "epoch": 1.0304,
2357
+ "grad_norm": 0.03562343493103981,
2358
+ "learning_rate": 1.5929502407704655e-05,
2359
+ "loss": 0.0537,
2360
+ "step": 32200
2361
+ },
2362
+ {
2363
+ "epoch": 1.0336,
2364
+ "grad_norm": 0.06482692807912827,
2365
+ "learning_rate": 1.591666131621188e-05,
2366
+ "loss": 0.0463,
2367
+ "step": 32300
2368
+ },
2369
+ {
2370
+ "epoch": 1.0368,
2371
+ "grad_norm": 0.49017927050590515,
2372
+ "learning_rate": 1.59038202247191e-05,
2373
+ "loss": 0.0661,
2374
+ "step": 32400
2375
+ },
2376
+ {
2377
+ "epoch": 1.04,
2378
+ "grad_norm": 15.849417686462402,
2379
+ "learning_rate": 1.5890979133226326e-05,
2380
+ "loss": 0.0237,
2381
+ "step": 32500
2382
+ },
2383
+ {
2384
+ "epoch": 1.0432,
2385
+ "grad_norm": 0.6910932064056396,
2386
+ "learning_rate": 1.5878138041733547e-05,
2387
+ "loss": 0.0383,
2388
+ "step": 32600
2389
+ },
2390
+ {
2391
+ "epoch": 1.0464,
2392
+ "grad_norm": 0.06304027885198593,
2393
+ "learning_rate": 1.5865296950240772e-05,
2394
+ "loss": 0.0541,
2395
+ "step": 32700
2396
+ },
2397
+ {
2398
+ "epoch": 1.0496,
2399
+ "grad_norm": 11.758397102355957,
2400
+ "learning_rate": 1.5852455858747994e-05,
2401
+ "loss": 0.0573,
2402
+ "step": 32800
2403
+ },
2404
+ {
2405
+ "epoch": 1.0528,
2406
+ "grad_norm": 0.007580827921628952,
2407
+ "learning_rate": 1.583961476725522e-05,
2408
+ "loss": 0.0551,
2409
+ "step": 32900
2410
+ },
2411
+ {
2412
+ "epoch": 1.056,
2413
+ "grad_norm": 0.7389895915985107,
2414
+ "learning_rate": 1.582677367576244e-05,
2415
+ "loss": 0.0408,
2416
+ "step": 33000
2417
+ },
2418
+ {
2419
+ "epoch": 1.0592,
2420
+ "grad_norm": 0.011490071192383766,
2421
+ "learning_rate": 1.5813932584269665e-05,
2422
+ "loss": 0.0474,
2423
+ "step": 33100
2424
+ },
2425
+ {
2426
+ "epoch": 1.0624,
2427
+ "grad_norm": 0.00892989058047533,
2428
+ "learning_rate": 1.5801091492776886e-05,
2429
+ "loss": 0.0338,
2430
+ "step": 33200
2431
+ },
2432
+ {
2433
+ "epoch": 1.0656,
2434
+ "grad_norm": 0.21890480816364288,
2435
+ "learning_rate": 1.578825040128411e-05,
2436
+ "loss": 0.0465,
2437
+ "step": 33300
2438
+ },
2439
+ {
2440
+ "epoch": 1.0688,
2441
+ "grad_norm": 20.05085563659668,
2442
+ "learning_rate": 1.5775409309791332e-05,
2443
+ "loss": 0.0633,
2444
+ "step": 33400
2445
+ },
2446
+ {
2447
+ "epoch": 1.072,
2448
+ "grad_norm": 3.5141100883483887,
2449
+ "learning_rate": 1.5762568218298557e-05,
2450
+ "loss": 0.0743,
2451
+ "step": 33500
2452
+ },
2453
+ {
2454
+ "epoch": 1.0752,
2455
+ "grad_norm": 0.03382471203804016,
2456
+ "learning_rate": 1.5749727126805782e-05,
2457
+ "loss": 0.0409,
2458
+ "step": 33600
2459
+ },
2460
+ {
2461
+ "epoch": 1.0784,
2462
+ "grad_norm": 9.916868209838867,
2463
+ "learning_rate": 1.5736886035313004e-05,
2464
+ "loss": 0.0498,
2465
+ "step": 33700
2466
+ },
2467
+ {
2468
+ "epoch": 1.0816,
2469
+ "grad_norm": 0.007973396219313145,
2470
+ "learning_rate": 1.572404494382023e-05,
2471
+ "loss": 0.0399,
2472
+ "step": 33800
2473
+ },
2474
+ {
2475
+ "epoch": 1.0848,
2476
+ "grad_norm": 0.15778931975364685,
2477
+ "learning_rate": 1.571120385232745e-05,
2478
+ "loss": 0.0406,
2479
+ "step": 33900
2480
+ },
2481
+ {
2482
+ "epoch": 1.088,
2483
+ "grad_norm": 0.027699623256921768,
2484
+ "learning_rate": 1.569836276083467e-05,
2485
+ "loss": 0.0481,
2486
+ "step": 34000
2487
+ },
2488
+ {
2489
+ "epoch": 1.0912,
2490
+ "grad_norm": 2.7082841396331787,
2491
+ "learning_rate": 1.5685521669341893e-05,
2492
+ "loss": 0.0427,
2493
+ "step": 34100
2494
+ },
2495
+ {
2496
+ "epoch": 1.0944,
2497
+ "grad_norm": 2.933098316192627,
2498
+ "learning_rate": 1.5672680577849118e-05,
2499
+ "loss": 0.0605,
2500
+ "step": 34200
2501
+ },
2502
+ {
2503
+ "epoch": 1.0976,
2504
+ "grad_norm": 5.093338489532471,
2505
+ "learning_rate": 1.5659839486356342e-05,
2506
+ "loss": 0.0625,
2507
+ "step": 34300
2508
+ },
2509
+ {
2510
+ "epoch": 1.1008,
2511
+ "grad_norm": 0.14764878153800964,
2512
+ "learning_rate": 1.5646998394863564e-05,
2513
+ "loss": 0.0467,
2514
+ "step": 34400
2515
+ },
2516
+ {
2517
+ "epoch": 1.104,
2518
+ "grad_norm": 0.45360442996025085,
2519
+ "learning_rate": 1.563415730337079e-05,
2520
+ "loss": 0.0517,
2521
+ "step": 34500
2522
+ },
2523
+ {
2524
+ "epoch": 1.1072,
2525
+ "grad_norm": 0.007702784612774849,
2526
+ "learning_rate": 1.562131621187801e-05,
2527
+ "loss": 0.0367,
2528
+ "step": 34600
2529
+ },
2530
+ {
2531
+ "epoch": 1.1104,
2532
+ "grad_norm": 0.01513522956520319,
2533
+ "learning_rate": 1.5608475120385235e-05,
2534
+ "loss": 0.0514,
2535
+ "step": 34700
2536
+ },
2537
+ {
2538
+ "epoch": 1.1136,
2539
+ "grad_norm": 0.29241982102394104,
2540
+ "learning_rate": 1.5595634028892456e-05,
2541
+ "loss": 0.0428,
2542
+ "step": 34800
2543
+ },
2544
+ {
2545
+ "epoch": 1.1168,
2546
+ "grad_norm": 0.02503369376063347,
2547
+ "learning_rate": 1.558279293739968e-05,
2548
+ "loss": 0.0583,
2549
+ "step": 34900
2550
+ },
2551
+ {
2552
+ "epoch": 1.12,
2553
+ "grad_norm": 0.094393789768219,
2554
+ "learning_rate": 1.5569951845906903e-05,
2555
+ "loss": 0.0636,
2556
+ "step": 35000
2557
+ },
2558
+ {
2559
+ "epoch": 1.1232,
2560
+ "grad_norm": 0.011400578543543816,
2561
+ "learning_rate": 1.5557110754414127e-05,
2562
+ "loss": 0.0392,
2563
+ "step": 35100
2564
+ },
2565
+ {
2566
+ "epoch": 1.1264,
2567
+ "grad_norm": 0.015260876156389713,
2568
+ "learning_rate": 1.554426966292135e-05,
2569
+ "loss": 0.0452,
2570
+ "step": 35200
2571
+ },
2572
+ {
2573
+ "epoch": 1.1296,
2574
+ "grad_norm": 0.07425595074892044,
2575
+ "learning_rate": 1.5531428571428574e-05,
2576
+ "loss": 0.0506,
2577
+ "step": 35300
2578
+ },
2579
+ {
2580
+ "epoch": 1.1328,
2581
+ "grad_norm": 2.20249605178833,
2582
+ "learning_rate": 1.5518587479935795e-05,
2583
+ "loss": 0.0742,
2584
+ "step": 35400
2585
+ },
2586
+ {
2587
+ "epoch": 1.1360000000000001,
2588
+ "grad_norm": 0.027821656316518784,
2589
+ "learning_rate": 1.550574638844302e-05,
2590
+ "loss": 0.062,
2591
+ "step": 35500
2592
+ },
2593
+ {
2594
+ "epoch": 1.1392,
2595
+ "grad_norm": 0.02665848284959793,
2596
+ "learning_rate": 1.549290529695024e-05,
2597
+ "loss": 0.0739,
2598
+ "step": 35600
2599
+ },
2600
+ {
2601
+ "epoch": 1.1424,
2602
+ "grad_norm": 5.973049163818359,
2603
+ "learning_rate": 1.5480064205457466e-05,
2604
+ "loss": 0.0504,
2605
+ "step": 35700
2606
+ },
2607
+ {
2608
+ "epoch": 1.1456,
2609
+ "grad_norm": 10.988951683044434,
2610
+ "learning_rate": 1.5467223113964688e-05,
2611
+ "loss": 0.0493,
2612
+ "step": 35800
2613
+ },
2614
+ {
2615
+ "epoch": 1.1488,
2616
+ "grad_norm": 15.630033493041992,
2617
+ "learning_rate": 1.5454382022471913e-05,
2618
+ "loss": 0.0492,
2619
+ "step": 35900
2620
+ },
2621
+ {
2622
+ "epoch": 1.152,
2623
+ "grad_norm": 4.891533851623535,
2624
+ "learning_rate": 1.5441540930979134e-05,
2625
+ "loss": 0.0427,
2626
+ "step": 36000
2627
+ },
2628
+ {
2629
+ "epoch": 1.152,
2630
+ "eval_accuracy": 0.98938,
2631
+ "eval_f1": 0.9893807849919393,
2632
+ "eval_loss": 0.0424417182803154,
2633
+ "eval_precision": 0.9893870324245919,
2634
+ "eval_recall": 0.98938,
2635
+ "eval_runtime": 778.7856,
2636
+ "eval_samples_per_second": 128.405,
2637
+ "eval_steps_per_second": 8.025,
2638
+ "step": 36000
2639
  }
2640
  ],
2641
  "logging_steps": 100,
 
2664
  "attributes": {}
2665
  }
2666
  },
2667
+ "total_flos": 1.528656883652379e+17,
2668
  "train_batch_size": 16,
2669
  "trial_name": null,
2670
  "trial_params": null