sharukat commited on
Commit
b7f3d2b
·
verified ·
1 Parent(s): 6aee025

Training in progress, epoch 2

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90be379e4d65a8b5b910b9bc0554fa08239403d5c3152f49aada5887906f518d
3
  size 502675828
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef7bced2bc60385c21f03fcae9691a5b296c12a7e761e636ae2518088d067e8c
3
  size 502675828
runs/Mar06_14-59-58_41759fa8e6ad/events.out.tfevents.1709737199.41759fa8e6ad.34.3 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e92a20cc2a2ec4a6464065726cf070f4981477d74645a951f55f86afbf60e13
3
- size 5838
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:494669d6f22754997b246b9b888db23db9911c97b050782fb231af3e436332d6
3
+ size 6508
wandb/debug-internal.log CHANGED
@@ -369,3 +369,32 @@
369
  2024-03-06 15:00:49,309 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
370
  2024-03-06 15:00:49,830 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
371
  2024-03-06 15:00:50,266 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
369
  2024-03-06 15:00:49,309 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
370
  2024-03-06 15:00:49,830 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
371
  2024-03-06 15:00:50,266 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
372
+ 2024-03-06 15:00:54,309 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
373
+ 2024-03-06 15:00:54,831 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
374
+ 2024-03-06 15:00:55,267 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
375
+ 2024-03-06 15:00:56,788 DEBUG SenderThread:137 [sender.py:send():382] send: stats
376
+ 2024-03-06 15:00:59,311 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
377
+ 2024-03-06 15:01:00,267 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
378
+ 2024-03-06 15:01:00,789 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
379
+ 2024-03-06 15:01:01,367 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: partial_history
380
+ 2024-03-06 15:01:01,368 DEBUG SenderThread:137 [sender.py:send():382] send: history
381
+ 2024-03-06 15:01:01,369 DEBUG SenderThread:137 [sender.py:send_request():409] send_request: summary_record
382
+ 2024-03-06 15:01:01,369 INFO SenderThread:137 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end
383
+ 2024-03-06 15:01:01,880 INFO Thread-18 :137 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_145455-h1uv5tyi/files/wandb-summary.json
384
+ 2024-03-06 15:01:02,484 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: partial_history
385
+ 2024-03-06 15:01:02,485 DEBUG SenderThread:137 [sender.py:send():382] send: history
386
+ 2024-03-06 15:01:02,486 DEBUG SenderThread:137 [sender.py:send_request():409] send_request: summary_record
387
+ 2024-03-06 15:01:02,486 INFO SenderThread:137 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end
388
+ 2024-03-06 15:01:02,880 INFO Thread-18 :137 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_145455-h1uv5tyi/files/wandb-summary.json
389
+ 2024-03-06 15:01:03,881 INFO Thread-18 :137 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_145455-h1uv5tyi/files/output.log
390
+ 2024-03-06 15:01:04,991 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
391
+ 2024-03-06 15:01:05,268 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
392
+ 2024-03-06 15:01:06,488 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
393
+ 2024-03-06 15:01:10,269 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
394
+ 2024-03-06 15:01:10,633 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
395
+ 2024-03-06 15:01:11,489 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
396
+ 2024-03-06 15:01:15,270 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
397
+ 2024-03-06 15:01:15,634 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
398
+ 2024-03-06 15:01:16,490 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
399
+ 2024-03-06 15:01:20,271 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
400
+ 2024-03-06 15:01:20,636 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
wandb/run-20240306_145424-trm7fvg4/logs/debug-internal.log CHANGED
@@ -420,3 +420,32 @@ wandb.errors.AuthenticationError: The API key you provided is either invalid or
420
  2024-03-06 15:00:49,309 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
421
  2024-03-06 15:00:49,830 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
422
  2024-03-06 15:00:50,266 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
420
  2024-03-06 15:00:49,309 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
421
  2024-03-06 15:00:49,830 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
422
  2024-03-06 15:00:50,266 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
423
+ 2024-03-06 15:00:54,309 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
424
+ 2024-03-06 15:00:54,831 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
425
+ 2024-03-06 15:00:55,267 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
426
+ 2024-03-06 15:00:56,788 DEBUG SenderThread:137 [sender.py:send():382] send: stats
427
+ 2024-03-06 15:00:59,311 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
428
+ 2024-03-06 15:01:00,267 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
429
+ 2024-03-06 15:01:00,789 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
430
+ 2024-03-06 15:01:01,367 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: partial_history
431
+ 2024-03-06 15:01:01,368 DEBUG SenderThread:137 [sender.py:send():382] send: history
432
+ 2024-03-06 15:01:01,369 DEBUG SenderThread:137 [sender.py:send_request():409] send_request: summary_record
433
+ 2024-03-06 15:01:01,369 INFO SenderThread:137 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end
434
+ 2024-03-06 15:01:01,880 INFO Thread-18 :137 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_145455-h1uv5tyi/files/wandb-summary.json
435
+ 2024-03-06 15:01:02,484 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: partial_history
436
+ 2024-03-06 15:01:02,485 DEBUG SenderThread:137 [sender.py:send():382] send: history
437
+ 2024-03-06 15:01:02,486 DEBUG SenderThread:137 [sender.py:send_request():409] send_request: summary_record
438
+ 2024-03-06 15:01:02,486 INFO SenderThread:137 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end
439
+ 2024-03-06 15:01:02,880 INFO Thread-18 :137 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_145455-h1uv5tyi/files/wandb-summary.json
440
+ 2024-03-06 15:01:03,881 INFO Thread-18 :137 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_145455-h1uv5tyi/files/output.log
441
+ 2024-03-06 15:01:04,991 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
442
+ 2024-03-06 15:01:05,268 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
443
+ 2024-03-06 15:01:06,488 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
444
+ 2024-03-06 15:01:10,269 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
445
+ 2024-03-06 15:01:10,633 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
446
+ 2024-03-06 15:01:11,489 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
447
+ 2024-03-06 15:01:15,270 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
448
+ 2024-03-06 15:01:15,634 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
449
+ 2024-03-06 15:01:16,490 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
450
+ 2024-03-06 15:01:20,271 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
451
+ 2024-03-06 15:01:20,636 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
wandb/run-20240306_145455-h1uv5tyi/files/output.log CHANGED
@@ -14,3 +14,6 @@ Some weights of MegatronBertForSequenceClassification were not initialized from
14
  You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
15
  /opt/conda/lib/python3.10/site-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
16
  _warn_prf(average, modifier, msg_start, len(result))
 
 
 
 
14
  You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
15
  /opt/conda/lib/python3.10/site-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
16
  _warn_prf(average, modifier, msg_start, len(result))
17
+ Checkpoint destination directory /kaggle/working/checkpoint-62 already exists and is non-empty. Saving will proceed but saved results may be invalid.
18
+ /opt/conda/lib/python3.10/site-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
19
+ _warn_prf(average, modifier, msg_start, len(result))
wandb/run-20240306_145455-h1uv5tyi/files/wandb-summary.json CHANGED
@@ -1 +1 @@
1
- {"train/loss": 1.822, "train/grad_norm": 8.18305492401123, "train/learning_rate": 9.016129032258065e-06, "train/epoch": 1.0, "train/global_step": 62, "_timestamp": 1709737228.8204873, "_runtime": 333.1017303466797, "_step": 13, "eval/loss": 1.7698227167129517, "eval/accuracy": 0.34545454545454546, "eval/precision": 0.11933884297520661, "eval/recall": 0.34545454545454546, "eval/f1": 0.17739557739557738, "eval/runtime": 1.1111, "eval/samples_per_second": 49.499, "eval/steps_per_second": 6.3, "train/train_runtime": 237.4526, "train/train_samples_per_second": 10.339, "train/train_steps_per_second": 1.306, "train/total_flos": 645966638976000.0, "train/train_loss": 1.7031736066264491}
 
1
+ {"train/loss": 1.7375, "train/grad_norm": 11.856295585632324, "train/learning_rate": 8.016129032258066e-06, "train/epoch": 2.0, "train/global_step": 124, "_timestamp": 1709737262.4837022, "_runtime": 366.764945268631, "_step": 15, "eval/loss": 1.7562878131866455, "eval/accuracy": 0.34545454545454546, "eval/precision": 0.11933884297520661, "eval/recall": 0.34545454545454546, "eval/f1": 0.17739557739557738, "eval/runtime": 1.1126, "eval/samples_per_second": 49.433, "eval/steps_per_second": 6.292, "train/train_runtime": 237.4526, "train/train_samples_per_second": 10.339, "train/train_steps_per_second": 1.306, "train/total_flos": 645966638976000.0, "train/train_loss": 1.7031736066264491}
wandb/run-20240306_145455-h1uv5tyi/logs/debug-internal.log CHANGED
@@ -369,3 +369,32 @@
369
  2024-03-06 15:00:49,309 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
370
  2024-03-06 15:00:49,830 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
371
  2024-03-06 15:00:50,266 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
369
  2024-03-06 15:00:49,309 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
370
  2024-03-06 15:00:49,830 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
371
  2024-03-06 15:00:50,266 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
372
+ 2024-03-06 15:00:54,309 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
373
+ 2024-03-06 15:00:54,831 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
374
+ 2024-03-06 15:00:55,267 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
375
+ 2024-03-06 15:00:56,788 DEBUG SenderThread:137 [sender.py:send():382] send: stats
376
+ 2024-03-06 15:00:59,311 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
377
+ 2024-03-06 15:01:00,267 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
378
+ 2024-03-06 15:01:00,789 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
379
+ 2024-03-06 15:01:01,367 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: partial_history
380
+ 2024-03-06 15:01:01,368 DEBUG SenderThread:137 [sender.py:send():382] send: history
381
+ 2024-03-06 15:01:01,369 DEBUG SenderThread:137 [sender.py:send_request():409] send_request: summary_record
382
+ 2024-03-06 15:01:01,369 INFO SenderThread:137 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end
383
+ 2024-03-06 15:01:01,880 INFO Thread-18 :137 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_145455-h1uv5tyi/files/wandb-summary.json
384
+ 2024-03-06 15:01:02,484 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: partial_history
385
+ 2024-03-06 15:01:02,485 DEBUG SenderThread:137 [sender.py:send():382] send: history
386
+ 2024-03-06 15:01:02,486 DEBUG SenderThread:137 [sender.py:send_request():409] send_request: summary_record
387
+ 2024-03-06 15:01:02,486 INFO SenderThread:137 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end
388
+ 2024-03-06 15:01:02,880 INFO Thread-18 :137 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_145455-h1uv5tyi/files/wandb-summary.json
389
+ 2024-03-06 15:01:03,881 INFO Thread-18 :137 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_145455-h1uv5tyi/files/output.log
390
+ 2024-03-06 15:01:04,991 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
391
+ 2024-03-06 15:01:05,268 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
392
+ 2024-03-06 15:01:06,488 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
393
+ 2024-03-06 15:01:10,269 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
394
+ 2024-03-06 15:01:10,633 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
395
+ 2024-03-06 15:01:11,489 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
396
+ 2024-03-06 15:01:15,270 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
397
+ 2024-03-06 15:01:15,634 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
398
+ 2024-03-06 15:01:16,490 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
399
+ 2024-03-06 15:01:20,271 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
400
+ 2024-03-06 15:01:20,636 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive