sharukat commited on
Commit
8363cf9
·
verified ·
1 Parent(s): 27fe8a7

Training in progress, epoch 1

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0655282b3c26d39cb6f722edab8cf5fb3f46b98806cf214937baa673bee90259
3
  size 502675828
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24d66d5c08239982b291b18547bf7a3b413e1f5f8f379ea71084959d0e365310
3
  size 502675828
runs/Mar06_15-06-49_41759fa8e6ad/events.out.tfevents.1709737609.41759fa8e6ad.34.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eda21a91222defd1d9c04a5595afc81f0a4f282670fd283512e28c8aabc0465c
3
+ size 5840
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0334548ea320d060d256556d98f48c6190b373c3072c01cc2834528cf956053
3
  size 4856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a2b29be7f40a9eb19df11a7f2c0494973f61eb9a5930882dcc502ea9202e15a
3
  size 4856
wandb/debug-internal.log CHANGED
@@ -565,3 +565,145 @@
565
  2024-03-06 15:04:15,301 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
566
  2024-03-06 15:04:16,549 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
567
  2024-03-06 15:04:16,797 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
565
  2024-03-06 15:04:15,301 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
566
  2024-03-06 15:04:16,549 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
567
  2024-03-06 15:04:16,797 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
568
+ 2024-03-06 15:04:20,302 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
569
+ 2024-03-06 15:04:21,550 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
570
+ 2024-03-06 15:04:21,798 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
571
+ 2024-03-06 15:04:25,302 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
572
+ 2024-03-06 15:04:26,552 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
573
+ 2024-03-06 15:04:26,793 DEBUG SenderThread:137 [sender.py:send():382] send: stats
574
+ 2024-03-06 15:04:27,794 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
575
+ 2024-03-06 15:04:30,303 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
576
+ 2024-03-06 15:04:31,553 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
577
+ 2024-03-06 15:04:32,795 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
578
+ 2024-03-06 15:04:35,304 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
579
+ 2024-03-06 15:04:36,554 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
580
+ 2024-03-06 15:04:37,796 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
581
+ 2024-03-06 15:04:40,305 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
582
+ 2024-03-06 15:04:41,556 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
583
+ 2024-03-06 15:04:41,963 INFO Thread-18 :137 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_145455-h1uv5tyi/files/output.log
584
+ 2024-03-06 15:04:43,556 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
585
+ 2024-03-06 15:04:45,306 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
586
+ 2024-03-06 15:04:46,557 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
587
+ 2024-03-06 15:04:48,557 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
588
+ 2024-03-06 15:04:50,307 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
589
+ 2024-03-06 15:04:51,558 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
590
+ 2024-03-06 15:04:53,558 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
591
+ 2024-03-06 15:04:55,307 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
592
+ 2024-03-06 15:04:56,559 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
593
+ 2024-03-06 15:04:56,794 DEBUG SenderThread:137 [sender.py:send():382] send: stats
594
+ 2024-03-06 15:04:58,795 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
595
+ 2024-03-06 15:05:00,308 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
596
+ 2024-03-06 15:05:01,560 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
597
+ 2024-03-06 15:05:03,796 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
598
+ 2024-03-06 15:05:05,309 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
599
+ 2024-03-06 15:05:06,561 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
600
+ 2024-03-06 15:05:08,797 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
601
+ 2024-03-06 15:05:10,310 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
602
+ 2024-03-06 15:05:11,562 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
603
+ 2024-03-06 15:05:13,798 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
604
+ 2024-03-06 15:05:15,311 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
605
+ 2024-03-06 15:05:16,563 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
606
+ 2024-03-06 15:05:18,799 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
607
+ 2024-03-06 15:05:20,311 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
608
+ 2024-03-06 15:05:21,564 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
609
+ 2024-03-06 15:05:23,800 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
610
+ 2024-03-06 15:05:25,312 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
611
+ 2024-03-06 15:05:26,565 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
612
+ 2024-03-06 15:05:26,795 DEBUG SenderThread:137 [sender.py:send():382] send: stats
613
+ 2024-03-06 15:05:29,796 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
614
+ 2024-03-06 15:05:30,313 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
615
+ 2024-03-06 15:05:31,567 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
616
+ 2024-03-06 15:05:34,797 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
617
+ 2024-03-06 15:05:35,314 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
618
+ 2024-03-06 15:05:36,568 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
619
+ 2024-03-06 15:05:39,798 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
620
+ 2024-03-06 15:05:40,315 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
621
+ 2024-03-06 15:05:41,569 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
622
+ 2024-03-06 15:05:44,799 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
623
+ 2024-03-06 15:05:45,316 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
624
+ 2024-03-06 15:05:46,570 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
625
+ 2024-03-06 15:05:49,800 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
626
+ 2024-03-06 15:05:50,317 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
627
+ 2024-03-06 15:05:51,571 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
628
+ 2024-03-06 15:05:54,801 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
629
+ 2024-03-06 15:05:55,318 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
630
+ 2024-03-06 15:05:56,572 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
631
+ 2024-03-06 15:05:56,795 DEBUG SenderThread:137 [sender.py:send():382] send: stats
632
+ 2024-03-06 15:06:00,320 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
633
+ 2024-03-06 15:06:00,797 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
634
+ 2024-03-06 15:06:01,573 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
635
+ 2024-03-06 15:06:05,320 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
636
+ 2024-03-06 15:06:05,798 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
637
+ 2024-03-06 15:06:06,574 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
638
+ 2024-03-06 15:06:10,322 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
639
+ 2024-03-06 15:06:10,799 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
640
+ 2024-03-06 15:06:11,575 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
641
+ 2024-03-06 15:06:15,323 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
642
+ 2024-03-06 15:06:15,800 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
643
+ 2024-03-06 15:06:16,576 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
644
+ 2024-03-06 15:06:20,324 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
645
+ 2024-03-06 15:06:20,801 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
646
+ 2024-03-06 15:06:21,577 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
647
+ 2024-03-06 15:06:25,325 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
648
+ 2024-03-06 15:06:25,802 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
649
+ 2024-03-06 15:06:26,578 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
650
+ 2024-03-06 15:06:26,796 DEBUG SenderThread:137 [sender.py:send():382] send: stats
651
+ 2024-03-06 15:06:30,326 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
652
+ 2024-03-06 15:06:31,579 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
653
+ 2024-03-06 15:06:31,798 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
654
+ 2024-03-06 15:06:35,327 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
655
+ 2024-03-06 15:06:36,580 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
656
+ 2024-03-06 15:06:36,799 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
657
+ 2024-03-06 15:06:40,328 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
658
+ 2024-03-06 15:06:41,581 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
659
+ 2024-03-06 15:06:41,800 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
660
+ 2024-03-06 15:06:45,328 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
661
+ 2024-03-06 15:06:46,582 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
662
+ 2024-03-06 15:06:46,801 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
663
+ 2024-03-06 15:06:49,869 DEBUG SenderThread:137 [sender.py:send():382] send: config
664
+ 2024-03-06 15:06:49,871 DEBUG SenderThread:137 [sender.py:send():382] send: metric
665
+ 2024-03-06 15:06:49,871 DEBUG SenderThread:137 [sender.py:send():382] send: metric
666
+ 2024-03-06 15:06:49,871 WARNING SenderThread:137 [sender.py:send_metric():1354] Seen metric with glob (shouldn't happen)
667
+ 2024-03-06 15:06:50,021 INFO Thread-18 :137 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_145455-h1uv5tyi/files/output.log
668
+ 2024-03-06 15:06:50,329 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
669
+ 2024-03-06 15:06:51,649 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
670
+ 2024-03-06 15:06:51,872 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
671
+ 2024-03-06 15:06:55,330 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
672
+ 2024-03-06 15:06:56,650 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
673
+ 2024-03-06 15:06:56,797 DEBUG SenderThread:137 [sender.py:send():382] send: stats
674
+ 2024-03-06 15:06:57,798 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
675
+ 2024-03-06 15:07:00,331 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
676
+ 2024-03-06 15:07:01,651 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
677
+ 2024-03-06 15:07:02,799 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
678
+ 2024-03-06 15:07:05,331 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
679
+ 2024-03-06 15:07:06,652 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
680
+ 2024-03-06 15:07:07,800 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
681
+ 2024-03-06 15:07:10,332 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
682
+ 2024-03-06 15:07:11,654 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
683
+ 2024-03-06 15:07:12,801 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
684
+ 2024-03-06 15:07:15,333 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
685
+ 2024-03-06 15:07:16,655 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
686
+ 2024-03-06 15:07:17,025 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: partial_history
687
+ 2024-03-06 15:07:17,026 DEBUG SenderThread:137 [sender.py:send():382] send: history
688
+ 2024-03-06 15:07:17,026 DEBUG SenderThread:137 [sender.py:send_request():409] send_request: summary_record
689
+ 2024-03-06 15:07:17,027 INFO SenderThread:137 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end
690
+ 2024-03-06 15:07:17,033 INFO Thread-18 :137 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_145455-h1uv5tyi/files/wandb-summary.json
691
+ 2024-03-06 15:07:18,033 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
692
+ 2024-03-06 15:07:18,154 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: partial_history
693
+ 2024-03-06 15:07:18,357 DEBUG SenderThread:137 [sender.py:send():382] send: history
694
+ 2024-03-06 15:07:18,357 DEBUG SenderThread:137 [sender.py:send_request():409] send_request: summary_record
695
+ 2024-03-06 15:07:18,358 INFO SenderThread:137 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end
696
+ 2024-03-06 15:07:19,038 INFO Thread-18 :137 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_145455-h1uv5tyi/files/config.yaml
697
+ 2024-03-06 15:07:19,038 INFO Thread-18 :137 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_145455-h1uv5tyi/files/wandb-summary.json
698
+ 2024-03-06 15:07:20,038 INFO Thread-18 :137 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_145455-h1uv5tyi/files/output.log
699
+ 2024-03-06 15:07:20,338 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
700
+ 2024-03-06 15:07:21,908 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
701
+ 2024-03-06 15:07:23,359 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
702
+ 2024-03-06 15:07:25,338 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
703
+ 2024-03-06 15:07:26,798 DEBUG SenderThread:137 [sender.py:send():382] send: stats
704
+ 2024-03-06 15:07:26,909 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
705
+ 2024-03-06 15:07:28,799 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
706
+ 2024-03-06 15:07:30,339 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
707
+ 2024-03-06 15:07:31,922 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
708
+ 2024-03-06 15:07:33,800 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
709
+ 2024-03-06 15:07:35,340 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
wandb/debug.log CHANGED
@@ -27,3 +27,4 @@ config: {}
27
  2024-03-06 14:55:56,791 INFO MainThread:34 [wandb_init.py:init():847] run started, returning control to user process
28
  2024-03-06 14:55:56,798 INFO MainThread:34 [wandb_run.py:_config_callback():1343] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'Documentation Ambiguity', 1: 'Documentation Completeness', 2: 'Documentation Replicability', 3: 'Documentation Replication on Other Examples', 4: 'Inadequate Examples', 5: 'Lack of Alternative Solutions/Documentation', 6: 'Requesting (Additional) Documentation/Examples'}, 'label2id': {'Documentation Ambiguity': 0, 'Documentation Completeness': 1, 'Documentation Replicability': 2, 'Documentation Replication on Other Examples': 3, 'Inadequate Examples': 4, 'Lack of Alternative Solutions/Documentation': 5, 'Requesting (Additional) Documentation/Examples': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'mmukh/SOBertBase', 'transformers_version': '4.38.1', 'model_type': 'megatron-bert', 'tokenizer_type': 'SentencePieceTokenizer', 'vocab_size': 50048, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 2048, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar06_14-54-50_41759fa8e6ad', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
29
  2024-03-06 14:59:59,464 INFO MainThread:34 [wandb_run.py:_config_callback():1343] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'Documentation Ambiguity', 1: 'Documentation Completeness', 2: 'Documentation Replicability', 3: 'Documentation Replication on Other Examples', 4: 'Inadequate Examples', 5: 'Lack of Alternative Solutions/Documentation', 6: 'Requesting (Additional) Documentation/Examples'}, 'label2id': {'Documentation Ambiguity': 0, 'Documentation Completeness': 1, 'Documentation Replicability': 2, 'Documentation Replication on Other Examples': 3, 'Inadequate Examples': 4, 'Lack of Alternative Solutions/Documentation': 5, 'Requesting (Additional) Documentation/Examples': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'mmukh/SOBertBase', 'transformers_version': '4.38.1', 'model_type': 'megatron-bert', 'tokenizer_type': 'SentencePieceTokenizer', 'vocab_size': 50048, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 2048, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 10, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar06_14-59-58_41759fa8e6ad', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
 
 
27
  2024-03-06 14:55:56,791 INFO MainThread:34 [wandb_init.py:init():847] run started, returning control to user process
28
  2024-03-06 14:55:56,798 INFO MainThread:34 [wandb_run.py:_config_callback():1343] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'Documentation Ambiguity', 1: 'Documentation Completeness', 2: 'Documentation Replicability', 3: 'Documentation Replication on Other Examples', 4: 'Inadequate Examples', 5: 'Lack of Alternative Solutions/Documentation', 6: 'Requesting (Additional) Documentation/Examples'}, 'label2id': {'Documentation Ambiguity': 0, 'Documentation Completeness': 1, 'Documentation Replicability': 2, 'Documentation Replication on Other Examples': 3, 'Inadequate Examples': 4, 'Lack of Alternative Solutions/Documentation': 5, 'Requesting (Additional) Documentation/Examples': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'mmukh/SOBertBase', 'transformers_version': '4.38.1', 'model_type': 'megatron-bert', 'tokenizer_type': 'SentencePieceTokenizer', 'vocab_size': 50048, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 2048, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar06_14-54-50_41759fa8e6ad', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
29
  2024-03-06 14:59:59,464 INFO MainThread:34 [wandb_run.py:_config_callback():1343] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'Documentation Ambiguity', 1: 'Documentation Completeness', 2: 'Documentation Replicability', 3: 'Documentation Replication on Other Examples', 4: 'Inadequate Examples', 5: 'Lack of Alternative Solutions/Documentation', 6: 'Requesting (Additional) Documentation/Examples'}, 'label2id': {'Documentation Ambiguity': 0, 'Documentation Completeness': 1, 'Documentation Replicability': 2, 'Documentation Replication on Other Examples': 3, 'Inadequate Examples': 4, 'Lack of Alternative Solutions/Documentation': 5, 'Requesting (Additional) Documentation/Examples': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'mmukh/SOBertBase', 'transformers_version': '4.38.1', 'model_type': 'megatron-bert', 'tokenizer_type': 'SentencePieceTokenizer', 'vocab_size': 50048, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 2048, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 10, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar06_14-59-58_41759fa8e6ad', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
30
+ 2024-03-06 15:06:49,865 INFO MainThread:34 [wandb_run.py:_config_callback():1343] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'Documentation Ambiguity', 1: 'Documentation Completeness', 2: 'Documentation Replicability', 3: 'Documentation Replication on Other Examples', 4: 'Inadequate Examples', 5: 'Lack of Alternative Solutions/Documentation', 6: 'Requesting (Additional) Documentation/Examples'}, 'label2id': {'Documentation Ambiguity': 0, 'Documentation Completeness': 1, 'Documentation Replicability': 2, 'Documentation Replication on Other Examples': 3, 'Inadequate Examples': 4, 'Lack of Alternative Solutions/Documentation': 5, 'Requesting (Additional) Documentation/Examples': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'mmukh/SOBertBase', 'transformers_version': '4.38.1', 'model_type': 'megatron-bert', 'tokenizer_type': 'SentencePieceTokenizer', 'vocab_size': 50048, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 2048, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 16, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 3e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 10, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar06_15-06-49_41759fa8e6ad', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
wandb/run-20240306_145424-trm7fvg4/logs/debug-internal.log CHANGED
@@ -616,3 +616,145 @@ wandb.errors.AuthenticationError: The API key you provided is either invalid or
616
  2024-03-06 15:04:15,301 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
617
  2024-03-06 15:04:16,549 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
618
  2024-03-06 15:04:16,797 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
616
  2024-03-06 15:04:15,301 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
617
  2024-03-06 15:04:16,549 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
618
  2024-03-06 15:04:16,797 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
619
+ 2024-03-06 15:04:20,302 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
620
+ 2024-03-06 15:04:21,550 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
621
+ 2024-03-06 15:04:21,798 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
622
+ 2024-03-06 15:04:25,302 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
623
+ 2024-03-06 15:04:26,552 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
624
+ 2024-03-06 15:04:26,793 DEBUG SenderThread:137 [sender.py:send():382] send: stats
625
+ 2024-03-06 15:04:27,794 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
626
+ 2024-03-06 15:04:30,303 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
627
+ 2024-03-06 15:04:31,553 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
628
+ 2024-03-06 15:04:32,795 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
629
+ 2024-03-06 15:04:35,304 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
630
+ 2024-03-06 15:04:36,554 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
631
+ 2024-03-06 15:04:37,796 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
632
+ 2024-03-06 15:04:40,305 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
633
+ 2024-03-06 15:04:41,556 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
634
+ 2024-03-06 15:04:41,963 INFO Thread-18 :137 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_145455-h1uv5tyi/files/output.log
635
+ 2024-03-06 15:04:43,556 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
636
+ 2024-03-06 15:04:45,306 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
637
+ 2024-03-06 15:04:46,557 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
638
+ 2024-03-06 15:04:48,557 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
639
+ 2024-03-06 15:04:50,307 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
640
+ 2024-03-06 15:04:51,558 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
641
+ 2024-03-06 15:04:53,558 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
642
+ 2024-03-06 15:04:55,307 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
643
+ 2024-03-06 15:04:56,559 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
644
+ 2024-03-06 15:04:56,794 DEBUG SenderThread:137 [sender.py:send():382] send: stats
645
+ 2024-03-06 15:04:58,795 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
646
+ 2024-03-06 15:05:00,308 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
647
+ 2024-03-06 15:05:01,560 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
648
+ 2024-03-06 15:05:03,796 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
649
+ 2024-03-06 15:05:05,309 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
650
+ 2024-03-06 15:05:06,561 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
651
+ 2024-03-06 15:05:08,797 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
652
+ 2024-03-06 15:05:10,310 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
653
+ 2024-03-06 15:05:11,562 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
654
+ 2024-03-06 15:05:13,798 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
655
+ 2024-03-06 15:05:15,311 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
656
+ 2024-03-06 15:05:16,563 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
657
+ 2024-03-06 15:05:18,799 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
658
+ 2024-03-06 15:05:20,311 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
659
+ 2024-03-06 15:05:21,564 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
660
+ 2024-03-06 15:05:23,800 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
661
+ 2024-03-06 15:05:25,312 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
662
+ 2024-03-06 15:05:26,565 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
663
+ 2024-03-06 15:05:26,795 DEBUG SenderThread:137 [sender.py:send():382] send: stats
664
+ 2024-03-06 15:05:29,796 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
665
+ 2024-03-06 15:05:30,313 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
666
+ 2024-03-06 15:05:31,567 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
667
+ 2024-03-06 15:05:34,797 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
668
+ 2024-03-06 15:05:35,314 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
669
+ 2024-03-06 15:05:36,568 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
670
+ 2024-03-06 15:05:39,798 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
671
+ 2024-03-06 15:05:40,315 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
672
+ 2024-03-06 15:05:41,569 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
673
+ 2024-03-06 15:05:44,799 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
674
+ 2024-03-06 15:05:45,316 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
675
+ 2024-03-06 15:05:46,570 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
676
+ 2024-03-06 15:05:49,800 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
677
+ 2024-03-06 15:05:50,317 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
678
+ 2024-03-06 15:05:51,571 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
679
+ 2024-03-06 15:05:54,801 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
680
+ 2024-03-06 15:05:55,318 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
681
+ 2024-03-06 15:05:56,572 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
682
+ 2024-03-06 15:05:56,795 DEBUG SenderThread:137 [sender.py:send():382] send: stats
683
+ 2024-03-06 15:06:00,320 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
684
+ 2024-03-06 15:06:00,797 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
685
+ 2024-03-06 15:06:01,573 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
686
+ 2024-03-06 15:06:05,320 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
687
+ 2024-03-06 15:06:05,798 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
688
+ 2024-03-06 15:06:06,574 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
689
+ 2024-03-06 15:06:10,322 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
690
+ 2024-03-06 15:06:10,799 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
691
+ 2024-03-06 15:06:11,575 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
692
+ 2024-03-06 15:06:15,323 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
693
+ 2024-03-06 15:06:15,800 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
694
+ 2024-03-06 15:06:16,576 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
695
+ 2024-03-06 15:06:20,324 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
696
+ 2024-03-06 15:06:20,801 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
697
+ 2024-03-06 15:06:21,577 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
698
+ 2024-03-06 15:06:25,325 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
699
+ 2024-03-06 15:06:25,802 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
700
+ 2024-03-06 15:06:26,578 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
701
+ 2024-03-06 15:06:26,796 DEBUG SenderThread:137 [sender.py:send():382] send: stats
702
+ 2024-03-06 15:06:30,326 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
703
+ 2024-03-06 15:06:31,579 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
704
+ 2024-03-06 15:06:31,798 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
705
+ 2024-03-06 15:06:35,327 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
706
+ 2024-03-06 15:06:36,580 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
707
+ 2024-03-06 15:06:36,799 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
708
+ 2024-03-06 15:06:40,328 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
709
+ 2024-03-06 15:06:41,581 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
710
+ 2024-03-06 15:06:41,800 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
711
+ 2024-03-06 15:06:45,328 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
712
+ 2024-03-06 15:06:46,582 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
713
+ 2024-03-06 15:06:46,801 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
714
+ 2024-03-06 15:06:49,869 DEBUG SenderThread:137 [sender.py:send():382] send: config
715
+ 2024-03-06 15:06:49,871 DEBUG SenderThread:137 [sender.py:send():382] send: metric
716
+ 2024-03-06 15:06:49,871 DEBUG SenderThread:137 [sender.py:send():382] send: metric
717
+ 2024-03-06 15:06:49,871 WARNING SenderThread:137 [sender.py:send_metric():1354] Seen metric with glob (shouldn't happen)
718
+ 2024-03-06 15:06:50,021 INFO Thread-18 :137 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_145455-h1uv5tyi/files/output.log
719
+ 2024-03-06 15:06:50,329 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
720
+ 2024-03-06 15:06:51,649 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
721
+ 2024-03-06 15:06:51,872 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
722
+ 2024-03-06 15:06:55,330 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
723
+ 2024-03-06 15:06:56,650 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
724
+ 2024-03-06 15:06:56,797 DEBUG SenderThread:137 [sender.py:send():382] send: stats
725
+ 2024-03-06 15:06:57,798 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
726
+ 2024-03-06 15:07:00,331 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
727
+ 2024-03-06 15:07:01,651 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
728
+ 2024-03-06 15:07:02,799 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
729
+ 2024-03-06 15:07:05,331 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
730
+ 2024-03-06 15:07:06,652 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
731
+ 2024-03-06 15:07:07,800 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
732
+ 2024-03-06 15:07:10,332 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
733
+ 2024-03-06 15:07:11,654 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
734
+ 2024-03-06 15:07:12,801 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
735
+ 2024-03-06 15:07:15,333 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
736
+ 2024-03-06 15:07:16,655 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
737
+ 2024-03-06 15:07:17,025 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: partial_history
738
+ 2024-03-06 15:07:17,026 DEBUG SenderThread:137 [sender.py:send():382] send: history
739
+ 2024-03-06 15:07:17,026 DEBUG SenderThread:137 [sender.py:send_request():409] send_request: summary_record
740
+ 2024-03-06 15:07:17,027 INFO SenderThread:137 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end
741
+ 2024-03-06 15:07:17,033 INFO Thread-18 :137 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_145455-h1uv5tyi/files/wandb-summary.json
742
+ 2024-03-06 15:07:18,033 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
743
+ 2024-03-06 15:07:18,154 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: partial_history
744
+ 2024-03-06 15:07:18,357 DEBUG SenderThread:137 [sender.py:send():382] send: history
745
+ 2024-03-06 15:07:18,357 DEBUG SenderThread:137 [sender.py:send_request():409] send_request: summary_record
746
+ 2024-03-06 15:07:18,358 INFO SenderThread:137 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end
747
+ 2024-03-06 15:07:19,038 INFO Thread-18 :137 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_145455-h1uv5tyi/files/config.yaml
748
+ 2024-03-06 15:07:19,038 INFO Thread-18 :137 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_145455-h1uv5tyi/files/wandb-summary.json
749
+ 2024-03-06 15:07:20,038 INFO Thread-18 :137 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_145455-h1uv5tyi/files/output.log
750
+ 2024-03-06 15:07:20,338 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
751
+ 2024-03-06 15:07:21,908 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
752
+ 2024-03-06 15:07:23,359 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
753
+ 2024-03-06 15:07:25,338 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
754
+ 2024-03-06 15:07:26,798 DEBUG SenderThread:137 [sender.py:send():382] send: stats
755
+ 2024-03-06 15:07:26,909 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
756
+ 2024-03-06 15:07:28,799 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
757
+ 2024-03-06 15:07:30,339 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
758
+ 2024-03-06 15:07:31,922 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
759
+ 2024-03-06 15:07:33,800 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
760
+ 2024-03-06 15:07:35,340 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
wandb/run-20240306_145424-trm7fvg4/logs/debug.log CHANGED
@@ -67,3 +67,4 @@ config: {}
67
  2024-03-06 14:55:56,791 INFO MainThread:34 [wandb_init.py:init():847] run started, returning control to user process
68
  2024-03-06 14:55:56,798 INFO MainThread:34 [wandb_run.py:_config_callback():1343] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'Documentation Ambiguity', 1: 'Documentation Completeness', 2: 'Documentation Replicability', 3: 'Documentation Replication on Other Examples', 4: 'Inadequate Examples', 5: 'Lack of Alternative Solutions/Documentation', 6: 'Requesting (Additional) Documentation/Examples'}, 'label2id': {'Documentation Ambiguity': 0, 'Documentation Completeness': 1, 'Documentation Replicability': 2, 'Documentation Replication on Other Examples': 3, 'Inadequate Examples': 4, 'Lack of Alternative Solutions/Documentation': 5, 'Requesting (Additional) Documentation/Examples': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'mmukh/SOBertBase', 'transformers_version': '4.38.1', 'model_type': 'megatron-bert', 'tokenizer_type': 'SentencePieceTokenizer', 'vocab_size': 50048, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 2048, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar06_14-54-50_41759fa8e6ad', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
69
  2024-03-06 14:59:59,464 INFO MainThread:34 [wandb_run.py:_config_callback():1343] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'Documentation Ambiguity', 1: 'Documentation Completeness', 2: 'Documentation Replicability', 3: 'Documentation Replication on Other Examples', 4: 'Inadequate Examples', 5: 'Lack of Alternative Solutions/Documentation', 6: 'Requesting (Additional) Documentation/Examples'}, 'label2id': {'Documentation Ambiguity': 0, 'Documentation Completeness': 1, 'Documentation Replicability': 2, 'Documentation Replication on Other Examples': 3, 'Inadequate Examples': 4, 'Lack of Alternative Solutions/Documentation': 5, 'Requesting (Additional) Documentation/Examples': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'mmukh/SOBertBase', 'transformers_version': '4.38.1', 'model_type': 'megatron-bert', 'tokenizer_type': 'SentencePieceTokenizer', 'vocab_size': 50048, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 2048, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 10, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar06_14-59-58_41759fa8e6ad', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
 
 
67
  2024-03-06 14:55:56,791 INFO MainThread:34 [wandb_init.py:init():847] run started, returning control to user process
68
  2024-03-06 14:55:56,798 INFO MainThread:34 [wandb_run.py:_config_callback():1343] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'Documentation Ambiguity', 1: 'Documentation Completeness', 2: 'Documentation Replicability', 3: 'Documentation Replication on Other Examples', 4: 'Inadequate Examples', 5: 'Lack of Alternative Solutions/Documentation', 6: 'Requesting (Additional) Documentation/Examples'}, 'label2id': {'Documentation Ambiguity': 0, 'Documentation Completeness': 1, 'Documentation Replicability': 2, 'Documentation Replication on Other Examples': 3, 'Inadequate Examples': 4, 'Lack of Alternative Solutions/Documentation': 5, 'Requesting (Additional) Documentation/Examples': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'mmukh/SOBertBase', 'transformers_version': '4.38.1', 'model_type': 'megatron-bert', 'tokenizer_type': 'SentencePieceTokenizer', 'vocab_size': 50048, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 2048, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar06_14-54-50_41759fa8e6ad', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
69
  2024-03-06 14:59:59,464 INFO MainThread:34 [wandb_run.py:_config_callback():1343] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'Documentation Ambiguity', 1: 'Documentation Completeness', 2: 'Documentation Replicability', 3: 'Documentation Replication on Other Examples', 4: 'Inadequate Examples', 5: 'Lack of Alternative Solutions/Documentation', 6: 'Requesting (Additional) Documentation/Examples'}, 'label2id': {'Documentation Ambiguity': 0, 'Documentation Completeness': 1, 'Documentation Replicability': 2, 'Documentation Replication on Other Examples': 3, 'Inadequate Examples': 4, 'Lack of Alternative Solutions/Documentation': 5, 'Requesting (Additional) Documentation/Examples': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'mmukh/SOBertBase', 'transformers_version': '4.38.1', 'model_type': 'megatron-bert', 'tokenizer_type': 'SentencePieceTokenizer', 'vocab_size': 50048, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 2048, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 10, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar06_14-59-58_41759fa8e6ad', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
70
+ 2024-03-06 15:06:49,865 INFO MainThread:34 [wandb_run.py:_config_callback():1343] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'Documentation Ambiguity', 1: 'Documentation Completeness', 2: 'Documentation Replicability', 3: 'Documentation Replication on Other Examples', 4: 'Inadequate Examples', 5: 'Lack of Alternative Solutions/Documentation', 6: 'Requesting (Additional) Documentation/Examples'}, 'label2id': {'Documentation Ambiguity': 0, 'Documentation Completeness': 1, 'Documentation Replicability': 2, 'Documentation Replication on Other Examples': 3, 'Inadequate Examples': 4, 'Lack of Alternative Solutions/Documentation': 5, 'Requesting (Additional) Documentation/Examples': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'mmukh/SOBertBase', 'transformers_version': '4.38.1', 'model_type': 'megatron-bert', 'tokenizer_type': 'SentencePieceTokenizer', 'vocab_size': 50048, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 2048, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 16, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 3e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 10, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar06_15-06-49_41759fa8e6ad', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
wandb/run-20240306_145455-h1uv5tyi/files/config.yaml CHANGED
@@ -372,10 +372,10 @@ prediction_loss_only:
372
  value: false
373
  per_device_train_batch_size:
374
  desc: null
375
- value: 8
376
  per_device_eval_batch_size:
377
  desc: null
378
- value: 8
379
  per_gpu_train_batch_size:
380
  desc: null
381
  value: null
@@ -393,7 +393,7 @@ eval_delay:
393
  value: 0
394
  learning_rate:
395
  desc: null
396
- value: 1.0e-05
397
  weight_decay:
398
  desc: null
399
  value: 0.0
@@ -438,7 +438,7 @@ log_on_each_node:
438
  value: true
439
  logging_dir:
440
  desc: null
441
- value: /kaggle/working/runs/Mar06_14-59-58_41759fa8e6ad
442
  logging_strategy:
443
  desc: null
444
  value: epoch
 
372
  value: false
373
  per_device_train_batch_size:
374
  desc: null
375
+ value: 16
376
  per_device_eval_batch_size:
377
  desc: null
378
+ value: 16
379
  per_gpu_train_batch_size:
380
  desc: null
381
  value: null
 
393
  value: 0
394
  learning_rate:
395
  desc: null
396
+ value: 3.0e-05
397
  weight_decay:
398
  desc: null
399
  value: 0.0
 
438
  value: true
439
  logging_dir:
440
  desc: null
441
+ value: /kaggle/working/runs/Mar06_15-06-49_41759fa8e6ad
442
  logging_strategy:
443
  desc: null
444
  value: epoch
wandb/run-20240306_145455-h1uv5tyi/files/output.log CHANGED
@@ -29,4 +29,10 @@ Checkpoint destination directory /kaggle/working/checkpoint-248 already exists a
29
  Checkpoint destination directory /kaggle/working/checkpoint-310 already exists and is non-empty. Saving will proceed but saved results may be invalid.
30
  /opt/conda/lib/python3.10/site-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
31
  _warn_prf(average, modifier, msg_start, len(result))
 
 
 
 
 
 
32
  /opt/conda/lib/python3.10/site-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
 
29
  Checkpoint destination directory /kaggle/working/checkpoint-310 already exists and is non-empty. Saving will proceed but saved results may be invalid.
30
  /opt/conda/lib/python3.10/site-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
31
  _warn_prf(average, modifier, msg_start, len(result))
32
+ /opt/conda/lib/python3.10/site-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
33
+ _warn_prf(average, modifier, msg_start, len(result))
34
+ Some weights of MegatronBertForSequenceClassification were not initialized from the model checkpoint at mmukh/SOBertBase and are newly initialized: ['bert.embeddings.token_type_embeddings.weight', 'bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
35
+ You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
36
+ Some weights of MegatronBertForSequenceClassification were not initialized from the model checkpoint at mmukh/SOBertBase and are newly initialized: ['bert.embeddings.token_type_embeddings.weight', 'bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
37
+ You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
38
  /opt/conda/lib/python3.10/site-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
wandb/run-20240306_145455-h1uv5tyi/files/wandb-summary.json CHANGED
@@ -1 +1 @@
1
- {"train/loss": 1.63, "train/grad_norm": 15.767184257507324, "train/learning_rate": 3.0322580645161295e-06, "train/epoch": 7.0, "train/global_step": 434, "_timestamp": 1709737432.2685022, "_runtime": 536.5497453212738, "_step": 25, "eval/loss": 1.7277967929840088, "eval/accuracy": 0.38181818181818183, "eval/precision": 0.2177777777777778, "eval/recall": 0.38181818181818183, "eval/f1": 0.2628615702479339, "eval/runtime": 1.108, "eval/samples_per_second": 49.638, "eval/steps_per_second": 6.318, "train/train_runtime": 237.4526, "train/train_samples_per_second": 10.339, "train/train_steps_per_second": 1.306, "train/total_flos": 645966638976000.0, "train/train_loss": 1.7031736066264491}
 
1
+ {"train/loss": 1.8204, "train/grad_norm": 5.6789045333862305, "train/learning_rate": 2.7000000000000002e-05, "train/epoch": 1.0, "train/global_step": 31, "_timestamp": 1709737638.1537485, "_runtime": 742.4349915981293, "_step": 27, "eval/loss": 1.7834787368774414, "eval/accuracy": 0.32727272727272727, "eval/precision": 0.11515151515151514, "eval/recall": 0.32727272727272727, "eval/f1": 0.17036114570361147, "eval/runtime": 1.1251, "eval/samples_per_second": 48.886, "eval/steps_per_second": 3.555, "train/train_runtime": 237.4526, "train/train_samples_per_second": 10.339, "train/train_steps_per_second": 1.306, "train/total_flos": 645966638976000.0, "train/train_loss": 1.7031736066264491}
wandb/run-20240306_145455-h1uv5tyi/logs/debug-internal.log CHANGED
@@ -565,3 +565,145 @@
565
  2024-03-06 15:04:15,301 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
566
  2024-03-06 15:04:16,549 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
567
  2024-03-06 15:04:16,797 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
565
  2024-03-06 15:04:15,301 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
566
  2024-03-06 15:04:16,549 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
567
  2024-03-06 15:04:16,797 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
568
+ 2024-03-06 15:04:20,302 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
569
+ 2024-03-06 15:04:21,550 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
570
+ 2024-03-06 15:04:21,798 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
571
+ 2024-03-06 15:04:25,302 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
572
+ 2024-03-06 15:04:26,552 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
573
+ 2024-03-06 15:04:26,793 DEBUG SenderThread:137 [sender.py:send():382] send: stats
574
+ 2024-03-06 15:04:27,794 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
575
+ 2024-03-06 15:04:30,303 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
576
+ 2024-03-06 15:04:31,553 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
577
+ 2024-03-06 15:04:32,795 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
578
+ 2024-03-06 15:04:35,304 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
579
+ 2024-03-06 15:04:36,554 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
580
+ 2024-03-06 15:04:37,796 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
581
+ 2024-03-06 15:04:40,305 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
582
+ 2024-03-06 15:04:41,556 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
583
+ 2024-03-06 15:04:41,963 INFO Thread-18 :137 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_145455-h1uv5tyi/files/output.log
584
+ 2024-03-06 15:04:43,556 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
585
+ 2024-03-06 15:04:45,306 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
586
+ 2024-03-06 15:04:46,557 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
587
+ 2024-03-06 15:04:48,557 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
588
+ 2024-03-06 15:04:50,307 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
589
+ 2024-03-06 15:04:51,558 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
590
+ 2024-03-06 15:04:53,558 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
591
+ 2024-03-06 15:04:55,307 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
592
+ 2024-03-06 15:04:56,559 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
593
+ 2024-03-06 15:04:56,794 DEBUG SenderThread:137 [sender.py:send():382] send: stats
594
+ 2024-03-06 15:04:58,795 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
595
+ 2024-03-06 15:05:00,308 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
596
+ 2024-03-06 15:05:01,560 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
597
+ 2024-03-06 15:05:03,796 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
598
+ 2024-03-06 15:05:05,309 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
599
+ 2024-03-06 15:05:06,561 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
600
+ 2024-03-06 15:05:08,797 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
601
+ 2024-03-06 15:05:10,310 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
602
+ 2024-03-06 15:05:11,562 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
603
+ 2024-03-06 15:05:13,798 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
604
+ 2024-03-06 15:05:15,311 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
605
+ 2024-03-06 15:05:16,563 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
606
+ 2024-03-06 15:05:18,799 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
607
+ 2024-03-06 15:05:20,311 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
608
+ 2024-03-06 15:05:21,564 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
609
+ 2024-03-06 15:05:23,800 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
610
+ 2024-03-06 15:05:25,312 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
611
+ 2024-03-06 15:05:26,565 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
612
+ 2024-03-06 15:05:26,795 DEBUG SenderThread:137 [sender.py:send():382] send: stats
613
+ 2024-03-06 15:05:29,796 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
614
+ 2024-03-06 15:05:30,313 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
615
+ 2024-03-06 15:05:31,567 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
616
+ 2024-03-06 15:05:34,797 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
617
+ 2024-03-06 15:05:35,314 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
618
+ 2024-03-06 15:05:36,568 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
619
+ 2024-03-06 15:05:39,798 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
620
+ 2024-03-06 15:05:40,315 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
621
+ 2024-03-06 15:05:41,569 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
622
+ 2024-03-06 15:05:44,799 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
623
+ 2024-03-06 15:05:45,316 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
624
+ 2024-03-06 15:05:46,570 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
625
+ 2024-03-06 15:05:49,800 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
626
+ 2024-03-06 15:05:50,317 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
627
+ 2024-03-06 15:05:51,571 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
628
+ 2024-03-06 15:05:54,801 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
629
+ 2024-03-06 15:05:55,318 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
630
+ 2024-03-06 15:05:56,572 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
631
+ 2024-03-06 15:05:56,795 DEBUG SenderThread:137 [sender.py:send():382] send: stats
632
+ 2024-03-06 15:06:00,320 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
633
+ 2024-03-06 15:06:00,797 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
634
+ 2024-03-06 15:06:01,573 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
635
+ 2024-03-06 15:06:05,320 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
636
+ 2024-03-06 15:06:05,798 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
637
+ 2024-03-06 15:06:06,574 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
638
+ 2024-03-06 15:06:10,322 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
639
+ 2024-03-06 15:06:10,799 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
640
+ 2024-03-06 15:06:11,575 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
641
+ 2024-03-06 15:06:15,323 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
642
+ 2024-03-06 15:06:15,800 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
643
+ 2024-03-06 15:06:16,576 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
644
+ 2024-03-06 15:06:20,324 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
645
+ 2024-03-06 15:06:20,801 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
646
+ 2024-03-06 15:06:21,577 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
647
+ 2024-03-06 15:06:25,325 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
648
+ 2024-03-06 15:06:25,802 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
649
+ 2024-03-06 15:06:26,578 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
650
+ 2024-03-06 15:06:26,796 DEBUG SenderThread:137 [sender.py:send():382] send: stats
651
+ 2024-03-06 15:06:30,326 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
652
+ 2024-03-06 15:06:31,579 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
653
+ 2024-03-06 15:06:31,798 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
654
+ 2024-03-06 15:06:35,327 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
655
+ 2024-03-06 15:06:36,580 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
656
+ 2024-03-06 15:06:36,799 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
657
+ 2024-03-06 15:06:40,328 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
658
+ 2024-03-06 15:06:41,581 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
659
+ 2024-03-06 15:06:41,800 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
660
+ 2024-03-06 15:06:45,328 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
661
+ 2024-03-06 15:06:46,582 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
662
+ 2024-03-06 15:06:46,801 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
663
+ 2024-03-06 15:06:49,869 DEBUG SenderThread:137 [sender.py:send():382] send: config
664
+ 2024-03-06 15:06:49,871 DEBUG SenderThread:137 [sender.py:send():382] send: metric
665
+ 2024-03-06 15:06:49,871 DEBUG SenderThread:137 [sender.py:send():382] send: metric
666
+ 2024-03-06 15:06:49,871 WARNING SenderThread:137 [sender.py:send_metric():1354] Seen metric with glob (shouldn't happen)
667
+ 2024-03-06 15:06:50,021 INFO Thread-18 :137 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_145455-h1uv5tyi/files/output.log
668
+ 2024-03-06 15:06:50,329 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
669
+ 2024-03-06 15:06:51,649 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
670
+ 2024-03-06 15:06:51,872 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
671
+ 2024-03-06 15:06:55,330 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
672
+ 2024-03-06 15:06:56,650 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
673
+ 2024-03-06 15:06:56,797 DEBUG SenderThread:137 [sender.py:send():382] send: stats
674
+ 2024-03-06 15:06:57,798 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
675
+ 2024-03-06 15:07:00,331 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
676
+ 2024-03-06 15:07:01,651 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
677
+ 2024-03-06 15:07:02,799 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
678
+ 2024-03-06 15:07:05,331 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
679
+ 2024-03-06 15:07:06,652 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
680
+ 2024-03-06 15:07:07,800 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
681
+ 2024-03-06 15:07:10,332 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
682
+ 2024-03-06 15:07:11,654 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
683
+ 2024-03-06 15:07:12,801 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
684
+ 2024-03-06 15:07:15,333 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
685
+ 2024-03-06 15:07:16,655 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
686
+ 2024-03-06 15:07:17,025 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: partial_history
687
+ 2024-03-06 15:07:17,026 DEBUG SenderThread:137 [sender.py:send():382] send: history
688
+ 2024-03-06 15:07:17,026 DEBUG SenderThread:137 [sender.py:send_request():409] send_request: summary_record
689
+ 2024-03-06 15:07:17,027 INFO SenderThread:137 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end
690
+ 2024-03-06 15:07:17,033 INFO Thread-18 :137 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_145455-h1uv5tyi/files/wandb-summary.json
691
+ 2024-03-06 15:07:18,033 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
692
+ 2024-03-06 15:07:18,154 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: partial_history
693
+ 2024-03-06 15:07:18,357 DEBUG SenderThread:137 [sender.py:send():382] send: history
694
+ 2024-03-06 15:07:18,357 DEBUG SenderThread:137 [sender.py:send_request():409] send_request: summary_record
695
+ 2024-03-06 15:07:18,358 INFO SenderThread:137 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end
696
+ 2024-03-06 15:07:19,038 INFO Thread-18 :137 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_145455-h1uv5tyi/files/config.yaml
697
+ 2024-03-06 15:07:19,038 INFO Thread-18 :137 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_145455-h1uv5tyi/files/wandb-summary.json
698
+ 2024-03-06 15:07:20,038 INFO Thread-18 :137 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_145455-h1uv5tyi/files/output.log
699
+ 2024-03-06 15:07:20,338 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
700
+ 2024-03-06 15:07:21,908 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
701
+ 2024-03-06 15:07:23,359 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
702
+ 2024-03-06 15:07:25,338 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
703
+ 2024-03-06 15:07:26,798 DEBUG SenderThread:137 [sender.py:send():382] send: stats
704
+ 2024-03-06 15:07:26,909 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
705
+ 2024-03-06 15:07:28,799 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
706
+ 2024-03-06 15:07:30,339 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
707
+ 2024-03-06 15:07:31,922 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: keepalive
708
+ 2024-03-06 15:07:33,800 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
709
+ 2024-03-06 15:07:35,340 DEBUG HandlerThread:137 [handler.py:handle_request():146] handle_request: status_report
wandb/run-20240306_145455-h1uv5tyi/logs/debug.log CHANGED
@@ -27,3 +27,4 @@ config: {}
27
  2024-03-06 14:55:56,791 INFO MainThread:34 [wandb_init.py:init():847] run started, returning control to user process
28
  2024-03-06 14:55:56,798 INFO MainThread:34 [wandb_run.py:_config_callback():1343] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'Documentation Ambiguity', 1: 'Documentation Completeness', 2: 'Documentation Replicability', 3: 'Documentation Replication on Other Examples', 4: 'Inadequate Examples', 5: 'Lack of Alternative Solutions/Documentation', 6: 'Requesting (Additional) Documentation/Examples'}, 'label2id': {'Documentation Ambiguity': 0, 'Documentation Completeness': 1, 'Documentation Replicability': 2, 'Documentation Replication on Other Examples': 3, 'Inadequate Examples': 4, 'Lack of Alternative Solutions/Documentation': 5, 'Requesting (Additional) Documentation/Examples': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'mmukh/SOBertBase', 'transformers_version': '4.38.1', 'model_type': 'megatron-bert', 'tokenizer_type': 'SentencePieceTokenizer', 'vocab_size': 50048, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 2048, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar06_14-54-50_41759fa8e6ad', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
29
  2024-03-06 14:59:59,464 INFO MainThread:34 [wandb_run.py:_config_callback():1343] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'Documentation Ambiguity', 1: 'Documentation Completeness', 2: 'Documentation Replicability', 3: 'Documentation Replication on Other Examples', 4: 'Inadequate Examples', 5: 'Lack of Alternative Solutions/Documentation', 6: 'Requesting (Additional) Documentation/Examples'}, 'label2id': {'Documentation Ambiguity': 0, 'Documentation Completeness': 1, 'Documentation Replicability': 2, 'Documentation Replication on Other Examples': 3, 'Inadequate Examples': 4, 'Lack of Alternative Solutions/Documentation': 5, 'Requesting (Additional) Documentation/Examples': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'mmukh/SOBertBase', 'transformers_version': '4.38.1', 'model_type': 'megatron-bert', 'tokenizer_type': 'SentencePieceTokenizer', 'vocab_size': 50048, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 2048, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 10, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar06_14-59-58_41759fa8e6ad', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
 
 
27
  2024-03-06 14:55:56,791 INFO MainThread:34 [wandb_init.py:init():847] run started, returning control to user process
28
  2024-03-06 14:55:56,798 INFO MainThread:34 [wandb_run.py:_config_callback():1343] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'Documentation Ambiguity', 1: 'Documentation Completeness', 2: 'Documentation Replicability', 3: 'Documentation Replication on Other Examples', 4: 'Inadequate Examples', 5: 'Lack of Alternative Solutions/Documentation', 6: 'Requesting (Additional) Documentation/Examples'}, 'label2id': {'Documentation Ambiguity': 0, 'Documentation Completeness': 1, 'Documentation Replicability': 2, 'Documentation Replication on Other Examples': 3, 'Inadequate Examples': 4, 'Lack of Alternative Solutions/Documentation': 5, 'Requesting (Additional) Documentation/Examples': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'mmukh/SOBertBase', 'transformers_version': '4.38.1', 'model_type': 'megatron-bert', 'tokenizer_type': 'SentencePieceTokenizer', 'vocab_size': 50048, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 2048, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar06_14-54-50_41759fa8e6ad', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
29
  2024-03-06 14:59:59,464 INFO MainThread:34 [wandb_run.py:_config_callback():1343] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'Documentation Ambiguity', 1: 'Documentation Completeness', 2: 'Documentation Replicability', 3: 'Documentation Replication on Other Examples', 4: 'Inadequate Examples', 5: 'Lack of Alternative Solutions/Documentation', 6: 'Requesting (Additional) Documentation/Examples'}, 'label2id': {'Documentation Ambiguity': 0, 'Documentation Completeness': 1, 'Documentation Replicability': 2, 'Documentation Replication on Other Examples': 3, 'Inadequate Examples': 4, 'Lack of Alternative Solutions/Documentation': 5, 'Requesting (Additional) Documentation/Examples': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'mmukh/SOBertBase', 'transformers_version': '4.38.1', 'model_type': 'megatron-bert', 'tokenizer_type': 'SentencePieceTokenizer', 'vocab_size': 50048, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 2048, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 10, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar06_14-59-58_41759fa8e6ad', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
30
+ 2024-03-06 15:06:49,865 INFO MainThread:34 [wandb_run.py:_config_callback():1343] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'Documentation Ambiguity', 1: 'Documentation Completeness', 2: 'Documentation Replicability', 3: 'Documentation Replication on Other Examples', 4: 'Inadequate Examples', 5: 'Lack of Alternative Solutions/Documentation', 6: 'Requesting (Additional) Documentation/Examples'}, 'label2id': {'Documentation Ambiguity': 0, 'Documentation Completeness': 1, 'Documentation Replicability': 2, 'Documentation Replication on Other Examples': 3, 'Inadequate Examples': 4, 'Lack of Alternative Solutions/Documentation': 5, 'Requesting (Additional) Documentation/Examples': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'mmukh/SOBertBase', 'transformers_version': '4.38.1', 'model_type': 'megatron-bert', 'tokenizer_type': 'SentencePieceTokenizer', 'vocab_size': 50048, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 2048, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 16, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 3e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 10, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar06_15-06-49_41759fa8e6ad', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
wandb/run-20240306_145455-h1uv5tyi/run-h1uv5tyi.wandb CHANGED
Binary files a/wandb/run-20240306_145455-h1uv5tyi/run-h1uv5tyi.wandb and b/wandb/run-20240306_145455-h1uv5tyi/run-h1uv5tyi.wandb differ