Alawy21 commited on
Commit
63d78e2
·
verified ·
1 Parent(s): 055c899

Training in progress, step 600, checkpoint

Browse files
last-checkpoint/README.md CHANGED
@@ -199,4 +199,4 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
199
  [More Information Needed]
200
  ### Framework versions
201
 
202
- - PEFT 0.14.0
 
199
  [More Information Needed]
200
  ### Framework versions
201
 
202
+ - PEFT 0.15.1
last-checkpoint/adapter_config.json CHANGED
@@ -3,6 +3,7 @@
3
  "auto_mapping": null,
4
  "base_model_name_or_path": "Qwen/Qwen2.5-1.5B-Instruct",
5
  "bias": "none",
 
6
  "eva_config": null,
7
  "exclude_modules": null,
8
  "fan_in_fan_out": false,
@@ -23,15 +24,16 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "v_proj",
27
- "k_proj",
28
- "o_proj",
29
  "down_proj",
 
 
 
30
  "q_proj",
31
- "up_proj",
32
- "gate_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
 
35
  "use_dora": false,
36
  "use_rslora": false
37
  }
 
3
  "auto_mapping": null,
4
  "base_model_name_or_path": "Qwen/Qwen2.5-1.5B-Instruct",
5
  "bias": "none",
6
+ "corda_config": null,
7
  "eva_config": null,
8
  "exclude_modules": null,
9
  "fan_in_fan_out": false,
 
24
  "rank_pattern": {},
25
  "revision": null,
26
  "target_modules": [
27
+ "up_proj",
 
 
28
  "down_proj",
29
+ "gate_proj",
30
+ "k_proj",
31
+ "v_proj",
32
  "q_proj",
33
+ "o_proj"
 
34
  ],
35
  "task_type": "CAUSAL_LM",
36
+ "trainable_token_indices": null,
37
  "use_dora": false,
38
  "use_rslora": false
39
  }
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9358cbbc386d6b2282eaab719fb91f7944ec851f1dd9dfa3cdc3e4d904626bd1
3
  size 295488936
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09d9cb540c5a860ee18428d97e9c6a2c9413c826ef861b10bfe2273b4ba87918
3
  size 295488936
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e366a5a55caf8970a6aed382a5162612264d1e701e1528dcdcf2b975d6c4e2ba
3
  size 591203178
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb303aed8062be2e28e6c28799dc86de07c6cd39ee42a654f3ed5866ddcc7f5a
3
  size 591203178
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b3ee827a7a00012c0a116546df467feee35e70376d81a7a85b1a70eb90414d3
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9899ccda7f0d8d9511991180b93aab508ce6e8489de708c88ad1188e7e1d90d6
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce0591490dee1b15c2e96586a4eec4dc847b9a9979d5ab281d97b6f030d569a5
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4be6285b51eb8628843ebcf538440997e9983602416b820387811937bd0b2278
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.824317362184441,
6
  "eval_steps": 100,
7
- "global_step": 400,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -320,6 +320,162 @@
320
  "eval_samples_per_second": 0.391,
321
  "eval_steps_per_second": 0.391,
322
  "step": 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
323
  }
324
  ],
325
  "logging_steps": 10,
@@ -339,7 +495,7 @@
339
  "attributes": {}
340
  }
341
  },
342
- "total_flos": 2.166912553132032e+16,
343
  "train_batch_size": 1,
344
  "trial_name": null,
345
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.2369912416280269,
6
  "eval_steps": 100,
7
+ "global_step": 600,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
320
  "eval_samples_per_second": 0.391,
321
  "eval_steps_per_second": 0.391,
322
  "step": 400
323
+ },
324
+ {
325
+ "epoch": 0.844925296239052,
326
+ "grad_norm": 0.4683637022972107,
327
+ "learning_rate": 9.036604120048799e-05,
328
+ "loss": 0.2139,
329
+ "step": 410
330
+ },
331
+ {
332
+ "epoch": 0.865533230293663,
333
+ "grad_norm": 0.643914520740509,
334
+ "learning_rate": 8.964635069757802e-05,
335
+ "loss": 0.1967,
336
+ "step": 420
337
+ },
338
+ {
339
+ "epoch": 0.8861411643482741,
340
+ "grad_norm": 0.6378000974655151,
341
+ "learning_rate": 8.890382509957928e-05,
342
+ "loss": 0.2141,
343
+ "step": 430
344
+ },
345
+ {
346
+ "epoch": 0.9067490984028851,
347
+ "grad_norm": 0.5705697536468506,
348
+ "learning_rate": 8.813889207870718e-05,
349
+ "loss": 0.1967,
350
+ "step": 440
351
+ },
352
+ {
353
+ "epoch": 0.9273570324574961,
354
+ "grad_norm": 0.5441785454750061,
355
+ "learning_rate": 8.735199221317285e-05,
356
+ "loss": 0.2231,
357
+ "step": 450
358
+ },
359
+ {
360
+ "epoch": 0.9479649665121072,
361
+ "grad_norm": 0.5542232990264893,
362
+ "learning_rate": 8.654357873342345e-05,
363
+ "loss": 0.1916,
364
+ "step": 460
365
+ },
366
+ {
367
+ "epoch": 0.9685729005667182,
368
+ "grad_norm": 0.5035462975502014,
369
+ "learning_rate": 8.571411726109519e-05,
370
+ "loss": 0.2261,
371
+ "step": 470
372
+ },
373
+ {
374
+ "epoch": 0.9891808346213292,
375
+ "grad_norm": 0.49977976083755493,
376
+ "learning_rate": 8.486408554082935e-05,
377
+ "loss": 0.1987,
378
+ "step": 480
379
+ },
380
+ {
381
+ "epoch": 1.0103039670273055,
382
+ "grad_norm": 0.44534987211227417,
383
+ "learning_rate": 8.399397316510596e-05,
384
+ "loss": 0.2067,
385
+ "step": 490
386
+ },
387
+ {
388
+ "epoch": 1.0309119010819165,
389
+ "grad_norm": 0.4200068414211273,
390
+ "learning_rate": 8.310428129225325e-05,
391
+ "loss": 0.1384,
392
+ "step": 500
393
+ },
394
+ {
395
+ "epoch": 1.0309119010819165,
396
+ "eval_loss": 0.19919553399085999,
397
+ "eval_runtime": 147.0946,
398
+ "eval_samples_per_second": 0.408,
399
+ "eval_steps_per_second": 0.408,
400
+ "step": 500
401
+ },
402
+ {
403
+ "epoch": 1.0515198351365276,
404
+ "grad_norm": 0.4512649476528168,
405
+ "learning_rate": 8.219552235779578e-05,
406
+ "loss": 0.1319,
407
+ "step": 510
408
+ },
409
+ {
410
+ "epoch": 1.0721277691911386,
411
+ "grad_norm": 0.4668980538845062,
412
+ "learning_rate": 8.126821977930711e-05,
413
+ "loss": 0.126,
414
+ "step": 520
415
+ },
416
+ {
417
+ "epoch": 1.0927357032457496,
418
+ "grad_norm": 0.5090588331222534,
419
+ "learning_rate": 8.032290765493704e-05,
420
+ "loss": 0.1473,
421
+ "step": 530
422
+ },
423
+ {
424
+ "epoch": 1.1133436373003607,
425
+ "grad_norm": 0.6163284182548523,
426
+ "learning_rate": 7.936013045578745e-05,
427
+ "loss": 0.146,
428
+ "step": 540
429
+ },
430
+ {
431
+ "epoch": 1.1339515713549717,
432
+ "grad_norm": 0.5462138056755066,
433
+ "learning_rate": 7.838044271231333e-05,
434
+ "loss": 0.1349,
435
+ "step": 550
436
+ },
437
+ {
438
+ "epoch": 1.1545595054095827,
439
+ "grad_norm": 0.5338026881217957,
440
+ "learning_rate": 7.738440869493018e-05,
441
+ "loss": 0.14,
442
+ "step": 560
443
+ },
444
+ {
445
+ "epoch": 1.1751674394641938,
446
+ "grad_norm": 0.6935913562774658,
447
+ "learning_rate": 7.63726020890112e-05,
448
+ "loss": 0.1395,
449
+ "step": 570
450
+ },
451
+ {
452
+ "epoch": 1.1957753735188048,
453
+ "grad_norm": 0.4336049556732178,
454
+ "learning_rate": 7.534560566446216e-05,
455
+ "loss": 0.1381,
456
+ "step": 580
457
+ },
458
+ {
459
+ "epoch": 1.2163833075734158,
460
+ "grad_norm": 0.5455029606819153,
461
+ "learning_rate": 7.430401094006339e-05,
462
+ "loss": 0.1267,
463
+ "step": 590
464
+ },
465
+ {
466
+ "epoch": 1.2369912416280269,
467
+ "grad_norm": 0.6405333280563354,
468
+ "learning_rate": 7.324841784277302e-05,
469
+ "loss": 0.1487,
470
+ "step": 600
471
+ },
472
+ {
473
+ "epoch": 1.2369912416280269,
474
+ "eval_loss": 0.19719114899635315,
475
+ "eval_runtime": 147.2125,
476
+ "eval_samples_per_second": 0.408,
477
+ "eval_steps_per_second": 0.408,
478
+ "step": 600
479
  }
480
  ],
481
  "logging_steps": 10,
 
495
  "attributes": {}
496
  }
497
  },
498
+ "total_flos": 3.221911898996736e+16,
499
  "train_batch_size": 1,
500
  "trial_name": null,
501
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45d3b60c6cfeb2594183cbb1041a6b81ad0bf3b4f10261892a7b472c3a0d744d
3
- size 5752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a137e88e0e55a17fda65e854b20c699b62b6ccacbdb5fb18d98d0daf1f24b9f
3
+ size 5816