Muhammad Farrukh Mehmood commited on
Commit
4deee95
·
verified ·
1 Parent(s): e17baa8

End of training

Browse files
README.md ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: HuggingFaceTB/SmolLM2-135M
3
+ library_name: transformers
4
+ model_name: smollm2-sft-test1
5
+ tags:
6
+ - generated_from_trainer
7
+ - smol-course
8
+ - module_1
9
+ - trl
10
+ - sft
11
+ licence: license
12
+ ---
13
+
14
+ # Model Card for smollm2-sft-test1
15
+
16
+ This model is a fine-tuned version of [HuggingFaceTB/SmolLM2-135M](https://huggingface.co/HuggingFaceTB/SmolLM2-135M).
17
+ It has been trained using [TRL](https://github.com/huggingface/trl).
18
+
19
+ ## Quick start
20
+
21
+ ```python
22
+ from transformers import pipeline
23
+
24
+ question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
25
+ generator = pipeline("text-generation", model="sfarrukh/smollm2-sft-test1", device="cuda")
26
+ output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
27
+ print(output["generated_text"])
28
+ ```
29
+
30
+ ## Training procedure
31
+
32
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/farrukhmehmood-nts-the-islamia-university-of-bahawalpur/huggingface/runs/kk1dm8nx)
33
+
34
+
35
+ This model was trained with SFT.
36
+
37
+ ### Framework versions
38
+
39
+ - TRL: 0.13.0
40
+ - Transformers: 4.47.1
41
+ - Pytorch: 2.5.1+cu121
42
+ - Datasets: 3.2.0
43
+ - Tokenizers: 0.21.0
44
+
45
+ ## Citations
46
+
47
+
48
+
49
+ Cite TRL as:
50
+
51
+ ```bibtex
52
+ @misc{vonwerra2022trl,
53
+ title = {{TRL: Transformer Reinforcement Learning}},
54
+ author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallouédec},
55
+ year = 2020,
56
+ journal = {GitHub repository},
57
+ publisher = {GitHub},
58
+ howpublished = {\url{https://github.com/huggingface/trl}}
59
+ }
60
+ ```
config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "HuggingFaceTB/SmolLM2-135M",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 1,
9
+ "eos_token_id": 2,
10
+ "head_dim": 64,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 576,
13
+ "initializer_range": 0.041666666666666664,
14
+ "intermediate_size": 1536,
15
+ "is_llama_config": true,
16
+ "max_position_embeddings": 8192,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "num_attention_heads": 9,
20
+ "num_hidden_layers": 30,
21
+ "num_key_value_heads": 3,
22
+ "pad_token_id": 2,
23
+ "pretraining_tp": 1,
24
+ "rms_norm_eps": 1e-05,
25
+ "rope_interleaved": false,
26
+ "rope_scaling": null,
27
+ "rope_theta": 100000,
28
+ "tie_word_embeddings": true,
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.47.1",
31
+ "use_cache": true,
32
+ "vocab_size": 49152
33
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "pad_token_id": 2,
6
+ "transformers_version": "4.47.1"
7
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcffccc97b1ea08e794e534190b323e296d32c5de7077f75e7012b9148ca47b4
3
+ size 538090408
runs/Dec23_06-10-57_f534e42ab4c2/events.out.tfevents.1734934368.f534e42ab4c2.632.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9bf40d539cc705b2fd631b9e3e59983d6c13a1bf88c4823551a0901049fe0c3
3
+ size 32517
smollm2-sft-test1/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "HuggingFaceTB/SmolLM2-135M",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 1,
9
+ "eos_token_id": 2,
10
+ "head_dim": 64,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 576,
13
+ "initializer_range": 0.041666666666666664,
14
+ "intermediate_size": 1536,
15
+ "is_llama_config": true,
16
+ "max_position_embeddings": 8192,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "num_attention_heads": 9,
20
+ "num_hidden_layers": 30,
21
+ "num_key_value_heads": 3,
22
+ "pad_token_id": 2,
23
+ "pretraining_tp": 1,
24
+ "rms_norm_eps": 1e-05,
25
+ "rope_interleaved": false,
26
+ "rope_scaling": null,
27
+ "rope_theta": 100000,
28
+ "tie_word_embeddings": true,
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.47.1",
31
+ "use_cache": true,
32
+ "vocab_size": 49152
33
+ }
smollm2-sft-test1/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "pad_token_id": 2,
6
+ "transformers_version": "4.47.1"
7
+ }
smollm2-sft-test1/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
smollm2-sft-test1/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcffccc97b1ea08e794e534190b323e296d32c5de7077f75e7012b9148ca47b4
3
+ size 538090408
smollm2-sft-test1/special_tokens_map.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<|im_start|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "<|im_end|>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ }
17
+ ],
18
+ "bos_token": "<|im_start|>",
19
+ "eos_token": "<|im_end|>",
20
+ "pad_token": "<|im_end|>",
21
+ "unk_token": {
22
+ "content": "<|endoftext|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false
27
+ }
28
+ }
smollm2-sft-test1/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
smollm2-sft-test1/tokenizer_config.json ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<|im_start|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "<|im_end|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<repo_name>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "4": {
37
+ "content": "<reponame>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "5": {
45
+ "content": "<file_sep>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "6": {
53
+ "content": "<filename>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ },
60
+ "7": {
61
+ "content": "<gh_stars>",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "8": {
69
+ "content": "<issue_start>",
70
+ "lstrip": false,
71
+ "normalized": false,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": true
75
+ },
76
+ "9": {
77
+ "content": "<issue_comment>",
78
+ "lstrip": false,
79
+ "normalized": false,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": true
83
+ },
84
+ "10": {
85
+ "content": "<issue_closed>",
86
+ "lstrip": false,
87
+ "normalized": false,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": true
91
+ },
92
+ "11": {
93
+ "content": "<jupyter_start>",
94
+ "lstrip": false,
95
+ "normalized": false,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": true
99
+ },
100
+ "12": {
101
+ "content": "<jupyter_text>",
102
+ "lstrip": false,
103
+ "normalized": false,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": true
107
+ },
108
+ "13": {
109
+ "content": "<jupyter_code>",
110
+ "lstrip": false,
111
+ "normalized": false,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": true
115
+ },
116
+ "14": {
117
+ "content": "<jupyter_output>",
118
+ "lstrip": false,
119
+ "normalized": false,
120
+ "rstrip": false,
121
+ "single_word": false,
122
+ "special": true
123
+ },
124
+ "15": {
125
+ "content": "<jupyter_script>",
126
+ "lstrip": false,
127
+ "normalized": false,
128
+ "rstrip": false,
129
+ "single_word": false,
130
+ "special": true
131
+ },
132
+ "16": {
133
+ "content": "<empty_output>",
134
+ "lstrip": false,
135
+ "normalized": false,
136
+ "rstrip": false,
137
+ "single_word": false,
138
+ "special": true
139
+ }
140
+ },
141
+ "additional_special_tokens": [
142
+ "<|im_start|>",
143
+ "<|im_end|>"
144
+ ],
145
+ "bos_token": "<|im_start|>",
146
+ "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
147
+ "clean_up_tokenization_spaces": false,
148
+ "eos_token": "<|im_end|>",
149
+ "extra_special_tokens": {},
150
+ "model_max_length": 8192,
151
+ "pad_token": "<|im_end|>",
152
+ "tokenizer_class": "GPT2Tokenizer",
153
+ "unk_token": "<|endoftext|>",
154
+ "vocab_size": 49152
155
+ }
smollm2-sft-test1/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35459363358047d4a2f46e71bc01618c53808c4628467f77f88dbf99f150f477
3
+ size 5688
smollm2-sft-test1/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<|im_start|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "<|im_end|>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ }
17
+ ],
18
+ "bos_token": "<|im_start|>",
19
+ "eos_token": "<|im_end|>",
20
+ "pad_token": "<|im_end|>",
21
+ "unk_token": {
22
+ "content": "<|endoftext|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false
27
+ }
28
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<|im_start|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "<|im_end|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<repo_name>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "4": {
37
+ "content": "<reponame>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "5": {
45
+ "content": "<file_sep>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "6": {
53
+ "content": "<filename>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ },
60
+ "7": {
61
+ "content": "<gh_stars>",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "8": {
69
+ "content": "<issue_start>",
70
+ "lstrip": false,
71
+ "normalized": false,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": true
75
+ },
76
+ "9": {
77
+ "content": "<issue_comment>",
78
+ "lstrip": false,
79
+ "normalized": false,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": true
83
+ },
84
+ "10": {
85
+ "content": "<issue_closed>",
86
+ "lstrip": false,
87
+ "normalized": false,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": true
91
+ },
92
+ "11": {
93
+ "content": "<jupyter_start>",
94
+ "lstrip": false,
95
+ "normalized": false,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": true
99
+ },
100
+ "12": {
101
+ "content": "<jupyter_text>",
102
+ "lstrip": false,
103
+ "normalized": false,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": true
107
+ },
108
+ "13": {
109
+ "content": "<jupyter_code>",
110
+ "lstrip": false,
111
+ "normalized": false,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": true
115
+ },
116
+ "14": {
117
+ "content": "<jupyter_output>",
118
+ "lstrip": false,
119
+ "normalized": false,
120
+ "rstrip": false,
121
+ "single_word": false,
122
+ "special": true
123
+ },
124
+ "15": {
125
+ "content": "<jupyter_script>",
126
+ "lstrip": false,
127
+ "normalized": false,
128
+ "rstrip": false,
129
+ "single_word": false,
130
+ "special": true
131
+ },
132
+ "16": {
133
+ "content": "<empty_output>",
134
+ "lstrip": false,
135
+ "normalized": false,
136
+ "rstrip": false,
137
+ "single_word": false,
138
+ "special": true
139
+ }
140
+ },
141
+ "additional_special_tokens": [
142
+ "<|im_start|>",
143
+ "<|im_end|>"
144
+ ],
145
+ "bos_token": "<|im_start|>",
146
+ "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
147
+ "clean_up_tokenization_spaces": false,
148
+ "eos_token": "<|im_end|>",
149
+ "extra_special_tokens": {},
150
+ "model_max_length": 8192,
151
+ "pad_token": "<|im_end|>",
152
+ "tokenizer_class": "GPT2Tokenizer",
153
+ "unk_token": "<|endoftext|>",
154
+ "vocab_size": 49152
155
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35459363358047d4a2f46e71bc01618c53808c4628467f77f88dbf99f150f477
3
+ size 5688
vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
wandb/debug-internal.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2024-12-23T06:24:33.403481622Z","level":"INFO","msg":"using version","core version":"0.19.1"}
2
+ {"time":"2024-12-23T06:24:33.403525688Z","level":"INFO","msg":"created symlink","path":"/content/smollm-fine-tuning/wandb/run-20241223_062433-kk1dm8nx/logs/debug-core.log"}
3
+ {"time":"2024-12-23T06:24:33.515469919Z","level":"INFO","msg":"created new stream","id":"kk1dm8nx"}
4
+ {"time":"2024-12-23T06:24:33.515527717Z","level":"INFO","msg":"stream: started","id":"kk1dm8nx"}
5
+ {"time":"2024-12-23T06:24:33.515537628Z","level":"INFO","msg":"writer: Do: started","stream_id":"kk1dm8nx"}
6
+ {"time":"2024-12-23T06:24:33.515733694Z","level":"INFO","msg":"handler: started","stream_id":"kk1dm8nx"}
7
+ {"time":"2024-12-23T06:24:33.515756662Z","level":"INFO","msg":"sender: started","stream_id":"kk1dm8nx"}
8
+ {"time":"2024-12-23T06:24:36.718018301Z","level":"INFO","msg":"Starting system monitor"}
9
+ {"time":"2024-12-23T06:42:14.432322766Z","level":"INFO","msg":"Pausing system monitor"}
10
+ {"time":"2024-12-23T06:58:34.001396346Z","level":"INFO","msg":"Resuming system monitor"}
11
+ {"time":"2024-12-23T06:58:34.115544844Z","level":"INFO","msg":"Pausing system monitor"}
12
+ {"time":"2024-12-23T06:59:50.035860315Z","level":"INFO","msg":"Resuming system monitor"}
13
+ {"time":"2024-12-23T06:59:57.29236364Z","level":"INFO","msg":"Pausing system monitor"}
14
+ {"time":"2024-12-23T07:00:55.04796864Z","level":"INFO","msg":"Resuming system monitor"}
wandb/debug.log ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-12-23 06:24:33,393 INFO MainThread:632 [wandb_setup.py:_flush():68] Current SDK version is 0.19.1
2
+ 2024-12-23 06:24:33,393 INFO MainThread:632 [wandb_setup.py:_flush():68] Configure stats pid to 632
3
+ 2024-12-23 06:24:33,393 INFO MainThread:632 [wandb_setup.py:_flush():68] Loading settings from /root/.config/wandb/settings
4
+ 2024-12-23 06:24:33,393 INFO MainThread:632 [wandb_setup.py:_flush():68] Loading settings from /content/smollm-fine-tuning/wandb/settings
5
+ 2024-12-23 06:24:33,393 INFO MainThread:632 [wandb_setup.py:_flush():68] Loading settings from environment variables
6
+ 2024-12-23 06:24:33,393 INFO MainThread:632 [wandb_init.py:_log_setup():528] Logging user logs to /content/smollm-fine-tuning/wandb/run-20241223_062433-kk1dm8nx/logs/debug.log
7
+ 2024-12-23 06:24:33,393 INFO MainThread:632 [wandb_init.py:_log_setup():529] Logging internal logs to /content/smollm-fine-tuning/wandb/run-20241223_062433-kk1dm8nx/logs/debug-internal.log
8
+ 2024-12-23 06:24:33,393 INFO MainThread:632 [wandb_init.py:_jupyter_setup():474] configuring jupyter hooks <wandb.sdk.wandb_init._WandbInit object at 0x78e9883258d0>
9
+ 2024-12-23 06:24:33,394 INFO MainThread:632 [wandb_init.py:init():644] calling init triggers
10
+ 2024-12-23 06:24:33,394 INFO MainThread:632 [wandb_init.py:init():650] wandb.init called with sweep_config: {}
11
+ config: {}
12
+ 2024-12-23 06:24:33,394 INFO MainThread:632 [wandb_init.py:init():680] starting backend
13
+ 2024-12-23 06:24:33,394 INFO MainThread:632 [wandb_init.py:init():684] sending inform_init request
14
+ 2024-12-23 06:24:33,400 INFO MainThread:632 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
15
+ 2024-12-23 06:24:33,400 INFO MainThread:632 [wandb_init.py:init():697] backend started and connected
16
+ 2024-12-23 06:24:33,414 INFO MainThread:632 [wandb_run.py:_label_probe_notebook():1222] probe notebook
17
+ 2024-12-23 06:24:36,588 INFO MainThread:632 [wandb_init.py:init():790] updated telemetry
18
+ 2024-12-23 06:24:36,594 INFO MainThread:632 [wandb_init.py:init():822] communicating run to backend with 90.0 second timeout
19
+ 2024-12-23 06:24:36,712 INFO MainThread:632 [wandb_init.py:init():874] starting run threads in backend
20
+ 2024-12-23 06:24:37,146 INFO MainThread:632 [wandb_run.py:_console_start():2374] atexit reg
21
+ 2024-12-23 06:24:37,147 INFO MainThread:632 [wandb_run.py:_redirect():2224] redirect: wrap_raw
22
+ 2024-12-23 06:24:37,147 INFO MainThread:632 [wandb_run.py:_redirect():2289] Wrapping output streams.
23
+ 2024-12-23 06:24:37,147 INFO MainThread:632 [wandb_run.py:_redirect():2314] Redirects installed.
24
+ 2024-12-23 06:24:37,152 INFO MainThread:632 [wandb_init.py:init():916] run started, returning control to user process
25
+ 2024-12-23 06:24:37,156 INFO MainThread:632 [wandb_run.py:_config_callback():1279] config_cb None None {'vocab_size': 49152, 'max_position_embeddings': 8192, 'hidden_size': 576, 'intermediate_size': 1536, 'num_hidden_layers': 30, 'num_attention_heads': 9, 'num_key_value_heads': 3, 'hidden_act': 'silu', 'initializer_range': 0.041666666666666664, 'rms_norm_eps': 1e-05, 'pretraining_tp': 1, 'use_cache': True, 'rope_theta': 100000, 'rope_scaling': None, 'attention_bias': False, 'attention_dropout': 0.0, 'mlp_bias': False, 'head_dim': 64, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['LlamaForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': 2, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'HuggingFaceTB/SmolLM2-135M', '_attn_implementation_autoset': True, 'transformers_version': '4.47.1', 'is_llama_config': True, 'model_type': 'llama', 'rope_interleaved': False, 'output_dir': '/content/drive/MyDrive/smollm-fine-tuning/trained_models', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 10, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 1000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/content/drive/MyDrive/smollm-fine-tuning/trained_models/runs/Dec23_06-10-57_f534e42ab4c2', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 50, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/content/drive/MyDrive/smollm-fine-tuning/trained_models', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': 'smollm2-sft-test1', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'dataset_text_field': 'text', 'packing': False, 'max_seq_length': 1024, 'dataset_num_proc': None, 'dataset_batch_size': 1000, 'model_init_kwargs': None, 'dataset_kwargs': {'add_special_tokens': False}, 'eval_packing': None, 'num_of_sequences': 1024, 'chars_per_token': '<CHARS_PER_TOKEN>', 'use_liger': False}
26
+ 2024-12-23 06:24:37,159 INFO MainThread:632 [wandb_config.py:__setitem__():154] config set model/num_parameters = 134515008 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x78e95840efe0>>
27
+ 2024-12-23 06:24:37,159 INFO MainThread:632 [wandb_run.py:_config_callback():1279] config_cb model/num_parameters 134515008 None
28
+ 2024-12-23 06:42:14,430 INFO MainThread:632 [jupyter.py:save_ipynb():386] not saving jupyter notebook
29
+ 2024-12-23 06:42:14,431 INFO MainThread:632 [wandb_init.py:_pause_backend():439] pausing backend
30
+ 2024-12-23 06:58:34,000 INFO MainThread:632 [wandb_init.py:_resume_backend():444] resuming backend
31
+ 2024-12-23 06:58:34,115 INFO MainThread:632 [jupyter.py:save_ipynb():386] not saving jupyter notebook
32
+ 2024-12-23 06:58:34,115 INFO MainThread:632 [wandb_init.py:_pause_backend():439] pausing backend
33
+ 2024-12-23 06:59:50,031 INFO MainThread:632 [wandb_init.py:_resume_backend():444] resuming backend
34
+ 2024-12-23 06:59:57,291 INFO MainThread:632 [jupyter.py:save_ipynb():386] not saving jupyter notebook
35
+ 2024-12-23 06:59:57,291 INFO MainThread:632 [wandb_init.py:_pause_backend():439] pausing backend
36
+ 2024-12-23 07:00:55,043 INFO MainThread:632 [wandb_init.py:_resume_backend():444] resuming backend
wandb/run-20241223_062433-kk1dm8nx/files/output.log ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
2
+ cp: -r not specified; omitting directory '/content/smollm-fine-tuning/smollm2-sft-test1'
wandb/run-20241223_062433-kk1dm8nx/files/requirements.txt ADDED
@@ -0,0 +1,579 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ multiprocess==0.70.16
2
+ fsspec==2024.9.0
3
+ dill==0.3.8
4
+ xxhash==3.5.0
5
+ datasets==3.2.0
6
+ trl==0.13.0
7
+ google-colab==1.0.0
8
+ colour==0.1.5
9
+ httpimport==1.4.0
10
+ ipyfilechooser==0.6.0
11
+ miniKanren==1.0.3
12
+ protobuf==4.25.5
13
+ pycocotools==2.0.8
14
+ cudf-cu12==24.10.1
15
+ yarl==1.18.3
16
+ safetensors==0.4.5
17
+ en-core-web-sm==3.7.1
18
+ nest-asyncio==1.6.0
19
+ pandas-gbq==0.25.0
20
+ Cython==3.0.11
21
+ torchsummary==1.5.1
22
+ weasel==0.4.1
23
+ markdown-it-py==3.0.0
24
+ pydantic==2.10.3
25
+ cvxpy==1.6.0
26
+ tables==3.10.1
27
+ optree==0.13.1
28
+ backcall==0.2.0
29
+ ipykernel==5.5.6
30
+ google-cloud-resource-manager==1.14.0
31
+ ipyparallel==8.8.0
32
+ pickleshare==0.7.5
33
+ scipy==1.13.1
34
+ prettytable==3.12.0
35
+ ml-dtypes==0.4.1
36
+ multidict==6.1.0
37
+ grpcio-status==1.62.3
38
+ nx-cugraph-cu12==24.10.0
39
+ cloudpickle==3.1.0
40
+ websocket-client==1.8.0
41
+ pillow==11.0.0
42
+ GitPython==3.1.43
43
+ exceptiongroup==1.2.2
44
+ propcache==0.2.1
45
+ tensorflow-probability==0.24.0
46
+ patsy==1.0.1
47
+ traitlets==5.7.1
48
+ gitdb==4.0.11
49
+ mistune==3.0.2
50
+ nltk==3.9.1
51
+ alabaster==1.0.0
52
+ h5netcdf==1.4.1
53
+ pytest==8.3.4
54
+ blinker==1.9.0
55
+ language_data==1.3.0
56
+ cupy-cuda12x==12.2.0
57
+ librosa==0.10.2.post1
58
+ entrypoints==0.4
59
+ tornado==6.3.3
60
+ traittypes==0.2.1
61
+ ipyleaflet==0.19.2
62
+ iniconfig==2.0.0
63
+ blis==0.7.11
64
+ cmake==3.31.2
65
+ datascience==0.17.6
66
+ xyzservices==2024.9.0
67
+ Markdown==3.7
68
+ pymc==5.19.1
69
+ async-timeout==4.0.3
70
+ decorator==4.4.2
71
+ google-api-core==2.19.2
72
+ argon2-cffi-bindings==21.2.0
73
+ fastcore==1.7.27
74
+ h11==0.14.0
75
+ more-itertools==10.5.0
76
+ terminado==0.18.1
77
+ fastprogress==1.0.3
78
+ sphinxcontrib-htmlhelp==2.1.0
79
+ tensorflow==2.17.1
80
+ blosc2==2.7.1
81
+ jupyter_core==5.7.2
82
+ jaxlib==0.4.33
83
+ python-box==7.3.0
84
+ itsdangerous==2.2.0
85
+ community==1.0.0b1
86
+ imageio==2.36.1
87
+ joblib==1.4.2
88
+ platformdirs==4.3.6
89
+ pandas-stubs==2.2.2.240909
90
+ pygit2==1.16.0
91
+ mlxtend==0.23.3
92
+ lightgbm==4.5.0
93
+ tbb==2022.0.0
94
+ wasabi==1.1.3
95
+ jsonpatch==1.33
96
+ packaging==24.2
97
+ nvidia-nvjitlink-cu12==12.6.85
98
+ Bottleneck==1.4.2
99
+ peewee==3.17.8
100
+ pyparsing==3.2.0
101
+ httplib2==0.22.0
102
+ moviepy==1.0.3
103
+ sniffio==1.3.1
104
+ pyviz_comms==3.0.3
105
+ jiter==0.8.2
106
+ array_record==0.5.1
107
+ google-cloud-core==2.4.1
108
+ attrs==24.3.0
109
+ progressbar2==4.5.0
110
+ colorcet==3.1.0
111
+ PyYAML==6.0.2
112
+ pydantic_core==2.27.1
113
+ google-cloud-aiplatform==1.74.0
114
+ sphinxcontrib-devhelp==2.0.0
115
+ google-cloud-bigquery-storage==2.27.0
116
+ opentelemetry-sdk==1.29.0
117
+ gast==0.6.0
118
+ pyzmq==24.0.1
119
+ uritemplate==4.1.1
120
+ pytz==2024.2
121
+ pydotplus==2.0.2
122
+ nvtx==0.2.10
123
+ tensorboard==2.17.1
124
+ missingno==0.5.2
125
+ flatbuffers==24.3.25
126
+ cons==0.4.6
127
+ libclang==18.1.1
128
+ defusedxml==0.7.1
129
+ jellyfish==1.1.0
130
+ grpcio==1.68.1
131
+ argon2-cffi==23.1.0
132
+ linkify-it-py==2.0.3
133
+ rsa==4.9
134
+ psycopg2==2.9.10
135
+ MarkupSafe==3.0.2
136
+ uc-micro-py==1.0.3
137
+ typing_extensions==4.12.2
138
+ ipython==7.34.0
139
+ pyarrow==17.0.0
140
+ toolz==0.12.1
141
+ jupyterlab_pygments==0.3.0
142
+ osqp==0.6.7.post3
143
+ huggingface-hub==0.27.0
144
+ opentelemetry-semantic-conventions==0.50b0
145
+ cloudpathlib==0.20.0
146
+ msgpack==1.1.0
147
+ google-cloud-bigtable==2.27.0
148
+ rich==13.9.4
149
+ cachetools==5.5.0
150
+ editdistance==0.8.1
151
+ regex==2024.11.6
152
+ param==2.2.0
153
+ cffi==1.17.1
154
+ google==2.0.3
155
+ promise==2.3
156
+ hyperopt==0.2.7
157
+ python-slugify==8.0.4
158
+ astunparse==1.6.3
159
+ pyperclip==1.9.0
160
+ tensorstore==0.1.71
161
+ aiohttp==3.11.10
162
+ altair==5.5.0
163
+ catalogue==2.0.10
164
+ astropy==6.1.7
165
+ sphinxcontrib-qthelp==2.0.0
166
+ GDAL==3.6.4
167
+ google-generativeai==0.8.3
168
+ networkx==3.4.2
169
+ pyasn1==0.6.1
170
+ sqlglot==25.1.0
171
+ gensim==4.3.3
172
+ albumentations==1.4.20
173
+ CacheControl==0.14.1
174
+ ipywidgets==7.7.1
175
+ toml==0.10.2
176
+ annotated-types==0.7.0
177
+ yfinance==0.2.50
178
+ googledrivedownloader==0.4
179
+ srsly==2.5.0
180
+ proto-plus==1.25.0
181
+ tabulate==0.9.0
182
+ fastai==2.7.18
183
+ Send2Trash==1.8.3
184
+ dask==2024.10.0
185
+ jsonpickle==4.0.1
186
+ seaborn==0.13.2
187
+ setproctitle==1.3.4
188
+ referencing==0.35.1
189
+ music21==9.3.0
190
+ xarray-einstats==0.8.0
191
+ astropy-iers-data==0.2024.12.16.0.35.48
192
+ chardet==5.2.0
193
+ wrapt==1.17.0
194
+ mdurl==0.1.2
195
+ openai==1.57.4
196
+ google-resumable-media==2.7.2
197
+ geopy==2.4.1
198
+ plotnine==0.14.4
199
+ statsmodels==0.14.4
200
+ google-crc32c==1.6.0
201
+ scs==3.2.7
202
+ Pyomo==6.8.2
203
+ keras==3.5.0
204
+ gspread-dataframe==3.3.1
205
+ notebook_shim==0.2.4
206
+ langchain-text-splitters==0.3.3
207
+ oauthlib==3.2.2
208
+ tcmlib==1.2.0
209
+ tifffile==2024.12.12
210
+ cmdstanpy==1.2.5
211
+ diffusers==0.31.0
212
+ aiohappyeyeballs==2.4.4
213
+ autograd==1.7.0
214
+ lazy_loader==0.4
215
+ graphviz==0.20.3
216
+ nvidia-nccl-cu12==2.23.4
217
+ pydot==3.0.3
218
+ tf_keras==2.17.0
219
+ bqplot==0.12.43
220
+ torchaudio==2.5.1+cu121
221
+ kagglehub==0.3.5
222
+ imgaug==0.4.0
223
+ nvidia-curand-cu12==10.3.7.77
224
+ cymem==2.0.10
225
+ glob2==0.7
226
+ eerepr==0.0.4
227
+ yellowbrick==1.5
228
+ umf==0.9.1
229
+ PyDrive==1.3.1
230
+ langsmith==0.2.3
231
+ ratelim==0.1.6
232
+ importlib_resources==6.4.5
233
+ einops==0.8.0
234
+ peft==0.14.0
235
+ langchain-core==0.3.25
236
+ cycler==0.12.1
237
+ html5lib==1.1
238
+ smart-open==7.1.0
239
+ ply==3.11
240
+ sphinxcontrib-serializinghtml==2.0.0
241
+ simple-parsing==0.1.6
242
+ smmap==5.0.1
243
+ tzdata==2024.2
244
+ libcudf-cu12==24.10.1
245
+ dopamine_rl==4.1.0
246
+ zipp==3.21.0
247
+ imageio-ffmpeg==0.5.1
248
+ wcwidth==0.2.13
249
+ text-unidecode==1.3
250
+ orbax-checkpoint==0.6.4
251
+ et_xmlfile==2.0.0
252
+ frozenlist==1.5.0
253
+ google-cloud-pubsub==2.27.1
254
+ marisa-trie==1.2.1
255
+ db-dtypes==1.3.1
256
+ nvidia-cuda-cupti-cu12==12.6.80
257
+ pexpect==4.9.0
258
+ psutil==5.9.5
259
+ google-cloud-language==2.16.0
260
+ opentelemetry-api==1.29.0
261
+ SQLAlchemy==2.0.36
262
+ soupsieve==2.6
263
+ Sphinx==8.1.3
264
+ pyogrio==0.10.0
265
+ qdldl==0.1.7.post4
266
+ branca==0.8.1
267
+ oauth2client==4.1.3
268
+ google-auth==2.27.0
269
+ google-cloud-functions==1.19.0
270
+ tinycss2==1.4.0
271
+ jupyter-server==1.24.0
272
+ scikit-learn==1.6.0
273
+ jsonschema-specifications==2024.10.1
274
+ ndindex==1.9.2
275
+ geographiclib==2.0
276
+ Jinja2==3.1.4
277
+ googleapis-common-protos==1.66.0
278
+ urllib3==2.2.3
279
+ opencv-python-headless==4.10.0.84
280
+ google-cloud-bigquery==3.25.0
281
+ transformers==4.47.1
282
+ wandb==0.19.1
283
+ torch==2.5.1+cu121
284
+ pymystem3==0.2.0
285
+ pyOpenSSL==24.2.1
286
+ stringzilla==3.11.1
287
+ numba==0.60.0
288
+ intel-cmplr-lib-ur==2025.0.4
289
+ polars==1.9.0
290
+ tweepy==4.14.0
291
+ plotly==5.24.1
292
+ nvidia-cuda-runtime-cu12==12.6.77
293
+ narwhals==1.18.4
294
+ widgetsnbextension==3.6.10
295
+ PyJWT==2.10.1
296
+ etils==1.11.0
297
+ proglog==0.1.10
298
+ tomli==2.2.1
299
+ rpds-py==0.22.3
300
+ google-cloud-iam==2.17.0
301
+ immutabledict==4.2.1
302
+ portpicker==1.5.2
303
+ nvidia-cuda-nvcc-cu12==12.6.85
304
+ nbclassic==1.1.0
305
+ llvmlite==0.43.0
306
+ importlib_metadata==8.5.0
307
+ rmm-cu12==24.10.0
308
+ tzlocal==5.2
309
+ nbconvert==7.16.4
310
+ mizani==0.13.1
311
+ shapely==2.0.6
312
+ python-louvain==0.16
313
+ dm-tree==0.1.8
314
+ opencv-contrib-python==4.10.0.84
315
+ ipython-sql==0.5.0
316
+ jsonschema==4.23.0
317
+ google-auth-oauthlib==1.2.1
318
+ webencodings==0.5.1
319
+ spacy==3.7.5
320
+ prompt_toolkit==3.0.48
321
+ slicer==0.0.8
322
+ fonttools==4.55.3
323
+ filelock==3.16.1
324
+ etuples==0.3.9
325
+ google-ai-generativelanguage==0.6.10
326
+ nvidia-cusolver-cu12==11.7.1.2
327
+ mpmath==1.3.0
328
+ prophet==1.1.6
329
+ click==8.1.7
330
+ pyasn1_modules==0.4.1
331
+ bleach==6.2.0
332
+ pandas-datareader==0.10.0
333
+ confection==0.1.5
334
+ namex==0.0.8
335
+ websockets==14.1
336
+ duckdb==1.1.3
337
+ typeguard==4.4.1
338
+ httpx==0.28.1
339
+ notebook==6.5.5
340
+ pygame==2.6.1
341
+ google-cloud-datastore==2.20.2
342
+ requests-oauthlib==1.3.1
343
+ nibabel==5.3.2
344
+ timm==1.0.12
345
+ prometheus_client==0.21.1
346
+ debugpy==1.8.0
347
+ natsort==8.4.0
348
+ ipytree==0.2.2
349
+ partd==1.4.2
350
+ sentry-sdk==2.19.2
351
+ future==1.0.0
352
+ tokenizers==0.21.0
353
+ jsonpointer==3.0.0
354
+ accelerate==1.2.1
355
+ jupyter-console==6.1.0
356
+ kiwisolver==1.4.7
357
+ geopandas==1.0.1
358
+ easydict==1.13
359
+ StrEnum==0.4.15
360
+ absl-py==1.4.0
361
+ lxml==5.3.0
362
+ tqdm==4.67.1
363
+ jupyter-leaflet==0.19.2
364
+ spacy-legacy==3.0.12
365
+ requests-toolbelt==1.0.0
366
+ multipledispatch==1.0.0
367
+ gcsfs==2024.10.0
368
+ docstring_parser==0.16
369
+ sentence-transformers==3.3.1
370
+ ipython-genutils==0.2.0
371
+ spacy-loggers==1.0.5
372
+ python-dateutil==2.8.2
373
+ thinc==8.2.5
374
+ gspread==6.0.2
375
+ mkl==2025.0.1
376
+ google-cloud-translate==3.19.0
377
+ Deprecated==1.2.15
378
+ aiosignal==1.3.2
379
+ bigframes==1.29.0
380
+ opencv-python==4.10.0.84
381
+ intel-openmp==2025.0.4
382
+ google-auth-httplib2==0.2.0
383
+ vega-datasets==0.9.0
384
+ orjson==3.10.12
385
+ dlib==19.24.2
386
+ tf-slim==1.1.0
387
+ py4j==0.10.9.7
388
+ locket==1.0.0
389
+ charset-normalizer==3.4.0
390
+ beautifulsoup4==4.12.3
391
+ parso==0.8.4
392
+ sphinxcontrib-applehelp==2.0.0
393
+ pyspark==3.5.3
394
+ textblob==0.17.1
395
+ pynvjitlink-cu12==0.4.0
396
+ sqlparse==0.5.3
397
+ langchain==0.3.12
398
+ sphinxcontrib-jsmath==1.0.1
399
+ holidays==0.63
400
+ jieba==0.42.1
401
+ rpy2==3.4.2
402
+ google-genai==0.3.0
403
+ PySocks==1.7.1
404
+ pyshp==2.3.1
405
+ tensorflow-metadata==1.13.1
406
+ pydata-google-auth==1.9.0
407
+ logical-unification==0.4.6
408
+ sklearn-pandas==2.2.0
409
+ soundfile==0.12.1
410
+ holoviews==1.20.0
411
+ parsy==2.1
412
+ geocoder==1.38.1
413
+ matplotlib==3.8.0
414
+ cuda-python==12.2.1
415
+ imbalanced-learn==0.12.4
416
+ ibis-framework==9.2.0
417
+ numexpr==2.10.2
418
+ nbformat==5.10.4
419
+ multitasking==0.0.11
420
+ openpyxl==3.1.5
421
+ nvidia-cufft-cu12==11.3.0.4
422
+ nbclient==0.10.1
423
+ nvidia-cusparse-cu12==12.5.4.2
424
+ colorlover==0.3.0
425
+ shellingham==1.5.4
426
+ jax==0.4.33
427
+ tensorflow-io-gcs-filesystem==0.37.1
428
+ types-pytz==2024.2.0.20241003
429
+ python-utils==3.9.1
430
+ xlrd==2.0.1
431
+ mdit-py-plugins==0.4.2
432
+ google-cloud-storage==2.19.0
433
+ google-cloud-firestore==2.19.0
434
+ certifi==2024.12.14
435
+ Pygments==2.18.0
436
+ fastjsonschema==2.21.1
437
+ httpcore==1.0.7
438
+ snowballstemmer==2.2.0
439
+ cryptography==43.0.3
440
+ tensorflow-hub==0.16.1
441
+ pandas==2.2.2
442
+ optax==0.2.4
443
+ scikit-image==0.25.0
444
+ imutils==0.5.4
445
+ tenacity==9.0.0
446
+ langcodes==3.5.0
447
+ gym==0.25.2
448
+ termcolor==2.5.0
449
+ kaggle==1.6.17
450
+ pyproj==3.7.0
451
+ preshed==3.0.9
452
+ nvidia-cublas-cu12==12.6.4.1
453
+ fastrlock==0.8.3
454
+ anyio==3.7.1
455
+ gdown==5.2.0
456
+ jax-cuda12-plugin==0.4.33
457
+ clarabel==0.9.0
458
+ matplotlib-inline==0.1.7
459
+ torchvision==0.20.1+cu121
460
+ gym-notices==0.0.8
461
+ jax-cuda12-pjrt==0.4.33
462
+ wheel==0.45.1
463
+ tensorboard-data-server==0.7.2
464
+ pooch==1.8.2
465
+ imagesize==1.4.1
466
+ pandocfilters==1.5.1
467
+ h5py==3.12.1
468
+ geemap==0.35.1
469
+ pycparser==2.22
470
+ contourpy==1.3.1
471
+ babel==2.16.0
472
+ matplotlib-venn==1.1.1
473
+ PyOpenGL==3.1.7
474
+ xarray==2024.11.0
475
+ numpy==1.26.4
476
+ grpc-google-iam-v1==0.13.1
477
+ jupyterlab_widgets==3.0.13
478
+ typer==0.15.1
479
+ idna==3.10
480
+ google-pasta==0.2.0
481
+ greenlet==3.1.1
482
+ cufflinks==0.17.3
483
+ bigquery-magics==0.4.0
484
+ sentencepiece==0.2.0
485
+ wordcloud==1.9.4
486
+ docker-pycreds==0.4.0
487
+ murmurhash==1.0.11
488
+ atpublic==4.1.0
489
+ docutils==0.21.2
490
+ earthengine-api==1.4.3
491
+ pyerfa==2.0.1.5
492
+ gin-config==0.5.0
493
+ google-cloud-bigquery-connection==1.17.0
494
+ six==1.17.0
495
+ arviz==0.20.0
496
+ jupyter-client==6.1.12
497
+ folium==0.19.2
498
+ webcolors==24.11.1
499
+ pluggy==1.5.0
500
+ eval_type_backport==0.2.0
501
+ chex==0.1.88
502
+ xgboost==2.1.3
503
+ bokeh==3.6.2
504
+ soxr==0.5.0.post1
505
+ ipyevents==2.0.2
506
+ Flask==3.1.0
507
+ albucore==0.0.19
508
+ distro==1.9.0
509
+ threadpoolctl==3.5.0
510
+ frozendict==2.4.6
511
+ inflect==7.4.0
512
+ fastdownload==0.0.7
513
+ nvidia-cudnn-cu12==9.6.0.74
514
+ sympy==1.13.1
515
+ pylibraft-cu12==24.10.0
516
+ requests==2.32.3
517
+ opt_einsum==3.4.0
518
+ google-api-python-client==2.155.0
519
+ tensorflow-datasets==4.9.7
520
+ Werkzeug==3.1.3
521
+ pylibcudf-cu12==24.10.1
522
+ py-cpuinfo==9.0.0
523
+ pylibcugraph-cu12==24.10.0
524
+ pytensor==2.26.4
525
+ PyDrive2==1.21.3
526
+ audioread==3.0.1
527
+ pathlib==1.0.1
528
+ stanio==0.5.1
529
+ firebase-admin==6.6.0
530
+ cvxopt==1.3.2
531
+ shap==0.46.0
532
+ humanize==4.11.0
533
+ ptyprocess==0.7.0
534
+ panel==1.5.4
535
+ flax==0.8.5
536
+ scooby==0.10.0
537
+ python-apt==0.0.0
538
+ requirements-parser==0.9.0
539
+ pip==24.1.2
540
+ setuptools==75.1.0
541
+ types-setuptools==75.6.0.20241126
542
+ cryptography==3.4.8
543
+ lazr.uri==1.0.6
544
+ importlib-metadata==4.6.4
545
+ distro==1.7.0
546
+ pyparsing==2.4.7
547
+ wadllib==1.3.6
548
+ python-apt==2.4.0+ubuntu4
549
+ httplib2==0.20.2
550
+ PyGObject==3.42.1
551
+ blinker==1.4
552
+ oauthlib==3.2.0
553
+ more-itertools==8.10.0
554
+ SecretStorage==3.3.1
555
+ PyJWT==2.3.0
556
+ lazr.restfulclient==0.14.4
557
+ six==1.16.0
558
+ jeepney==0.7.1
559
+ dbus-python==1.2.18
560
+ keyring==23.5.0
561
+ zipp==1.0.0
562
+ launchpadlib==1.10.16
563
+ wheel==0.43.0
564
+ inflect==7.3.1
565
+ backports.tarfile==1.2.0
566
+ jaraco.context==5.3.0
567
+ typing_extensions==4.12.2
568
+ importlib_resources==6.4.0
569
+ zipp==3.19.2
570
+ jaraco.text==3.12.1
571
+ jaraco.collections==5.1.0
572
+ typeguard==4.3.0
573
+ jaraco.functools==4.0.1
574
+ platformdirs==4.2.2
575
+ autocommand==2.2.2
576
+ more-itertools==10.3.0
577
+ importlib_metadata==8.0.0
578
+ packaging==24.1
579
+ tomli==2.0.1
wandb/run-20241223_062433-kk1dm8nx/files/wandb-metadata.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.1.85+-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.12",
4
+ "startedAt": "2024-12-23T06:24:33.401066Z",
5
+ "program": "sft_smollm.ipynb",
6
+ "git": {
7
+ "remote": "https://github.com/farrukh602/smollm-fine-tuning",
8
+ "commit": "f060c88b98a1447662f698d1c81d063f9f2e3b9a"
9
+ },
10
+ "email": "farrukhmehmood.nts@iub.edu.pk",
11
+ "root": "/content/smollm-fine-tuning",
12
+ "host": "f534e42ab4c2",
13
+ "executable": "/usr/bin/python3",
14
+ "colab": "https://colab.research.google.com/notebook#fileId=1Q8h1M6Pw6nkQ1buMh1fXAdKupG98exLe",
15
+ "cpu_count": 1,
16
+ "cpu_count_logical": 2,
17
+ "gpu": "Tesla T4",
18
+ "gpu_count": 1,
19
+ "disk": {
20
+ "/": {
21
+ "total": "120942624768",
22
+ "used": "35434274816"
23
+ }
24
+ },
25
+ "memory": {
26
+ "total": "13609431040"
27
+ },
28
+ "cpu": {
29
+ "count": 1,
30
+ "countLogical": 2
31
+ },
32
+ "gpu_nvidia": [
33
+ {
34
+ "name": "Tesla T4",
35
+ "memoryTotal": "16106127360",
36
+ "cudaCores": 2560,
37
+ "architecture": "Turing"
38
+ }
39
+ ],
40
+ "cudaVersion": "12.2"
41
+ }
wandb/run-20241223_062433-kk1dm8nx/logs/debug-core.log ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {"time":"2024-12-23T06:12:50.975330129Z","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmpuo6nfebj/port-632.txt","pid":632,"debug":false,"disable-analytics":false}
2
+ {"time":"2024-12-23T06:12:50.975390635Z","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
3
+ {"time":"2024-12-23T06:12:50.983185882Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":632}
4
+ {"time":"2024-12-23T06:12:50.983291852Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":39815,"Zone":""}}
5
+ {"time":"2024-12-23T06:12:51.175371695Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:43168"}
6
+ {"time":"2024-12-23T06:24:33.40327228Z","level":"INFO","msg":"handleInformInit: received","streamId":"kk1dm8nx","id":"127.0.0.1:43168"}
7
+ {"time":"2024-12-23T06:24:33.515538151Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"kk1dm8nx","id":"127.0.0.1:43168"}
wandb/run-20241223_062433-kk1dm8nx/logs/debug-internal.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2024-12-23T06:24:33.403481622Z","level":"INFO","msg":"using version","core version":"0.19.1"}
2
+ {"time":"2024-12-23T06:24:33.403525688Z","level":"INFO","msg":"created symlink","path":"/content/smollm-fine-tuning/wandb/run-20241223_062433-kk1dm8nx/logs/debug-core.log"}
3
+ {"time":"2024-12-23T06:24:33.515469919Z","level":"INFO","msg":"created new stream","id":"kk1dm8nx"}
4
+ {"time":"2024-12-23T06:24:33.515527717Z","level":"INFO","msg":"stream: started","id":"kk1dm8nx"}
5
+ {"time":"2024-12-23T06:24:33.515537628Z","level":"INFO","msg":"writer: Do: started","stream_id":"kk1dm8nx"}
6
+ {"time":"2024-12-23T06:24:33.515733694Z","level":"INFO","msg":"handler: started","stream_id":"kk1dm8nx"}
7
+ {"time":"2024-12-23T06:24:33.515756662Z","level":"INFO","msg":"sender: started","stream_id":"kk1dm8nx"}
8
+ {"time":"2024-12-23T06:24:36.718018301Z","level":"INFO","msg":"Starting system monitor"}
9
+ {"time":"2024-12-23T06:42:14.432322766Z","level":"INFO","msg":"Pausing system monitor"}
10
+ {"time":"2024-12-23T06:58:34.001396346Z","level":"INFO","msg":"Resuming system monitor"}
11
+ {"time":"2024-12-23T06:58:34.115544844Z","level":"INFO","msg":"Pausing system monitor"}
12
+ {"time":"2024-12-23T06:59:50.035860315Z","level":"INFO","msg":"Resuming system monitor"}
13
+ {"time":"2024-12-23T06:59:57.29236364Z","level":"INFO","msg":"Pausing system monitor"}
14
+ {"time":"2024-12-23T07:00:55.04796864Z","level":"INFO","msg":"Resuming system monitor"}
wandb/run-20241223_062433-kk1dm8nx/logs/debug.log ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-12-23 06:24:33,393 INFO MainThread:632 [wandb_setup.py:_flush():68] Current SDK version is 0.19.1
2
+ 2024-12-23 06:24:33,393 INFO MainThread:632 [wandb_setup.py:_flush():68] Configure stats pid to 632
3
+ 2024-12-23 06:24:33,393 INFO MainThread:632 [wandb_setup.py:_flush():68] Loading settings from /root/.config/wandb/settings
4
+ 2024-12-23 06:24:33,393 INFO MainThread:632 [wandb_setup.py:_flush():68] Loading settings from /content/smollm-fine-tuning/wandb/settings
5
+ 2024-12-23 06:24:33,393 INFO MainThread:632 [wandb_setup.py:_flush():68] Loading settings from environment variables
6
+ 2024-12-23 06:24:33,393 INFO MainThread:632 [wandb_init.py:_log_setup():528] Logging user logs to /content/smollm-fine-tuning/wandb/run-20241223_062433-kk1dm8nx/logs/debug.log
7
+ 2024-12-23 06:24:33,393 INFO MainThread:632 [wandb_init.py:_log_setup():529] Logging internal logs to /content/smollm-fine-tuning/wandb/run-20241223_062433-kk1dm8nx/logs/debug-internal.log
8
+ 2024-12-23 06:24:33,393 INFO MainThread:632 [wandb_init.py:_jupyter_setup():474] configuring jupyter hooks <wandb.sdk.wandb_init._WandbInit object at 0x78e9883258d0>
9
+ 2024-12-23 06:24:33,394 INFO MainThread:632 [wandb_init.py:init():644] calling init triggers
10
+ 2024-12-23 06:24:33,394 INFO MainThread:632 [wandb_init.py:init():650] wandb.init called with sweep_config: {}
11
+ config: {}
12
+ 2024-12-23 06:24:33,394 INFO MainThread:632 [wandb_init.py:init():680] starting backend
13
+ 2024-12-23 06:24:33,394 INFO MainThread:632 [wandb_init.py:init():684] sending inform_init request
14
+ 2024-12-23 06:24:33,400 INFO MainThread:632 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
15
+ 2024-12-23 06:24:33,400 INFO MainThread:632 [wandb_init.py:init():697] backend started and connected
16
+ 2024-12-23 06:24:33,414 INFO MainThread:632 [wandb_run.py:_label_probe_notebook():1222] probe notebook
17
+ 2024-12-23 06:24:36,588 INFO MainThread:632 [wandb_init.py:init():790] updated telemetry
18
+ 2024-12-23 06:24:36,594 INFO MainThread:632 [wandb_init.py:init():822] communicating run to backend with 90.0 second timeout
19
+ 2024-12-23 06:24:36,712 INFO MainThread:632 [wandb_init.py:init():874] starting run threads in backend
20
+ 2024-12-23 06:24:37,146 INFO MainThread:632 [wandb_run.py:_console_start():2374] atexit reg
21
+ 2024-12-23 06:24:37,147 INFO MainThread:632 [wandb_run.py:_redirect():2224] redirect: wrap_raw
22
+ 2024-12-23 06:24:37,147 INFO MainThread:632 [wandb_run.py:_redirect():2289] Wrapping output streams.
23
+ 2024-12-23 06:24:37,147 INFO MainThread:632 [wandb_run.py:_redirect():2314] Redirects installed.
24
+ 2024-12-23 06:24:37,152 INFO MainThread:632 [wandb_init.py:init():916] run started, returning control to user process
25
+ 2024-12-23 06:24:37,156 INFO MainThread:632 [wandb_run.py:_config_callback():1279] config_cb None None {'vocab_size': 49152, 'max_position_embeddings': 8192, 'hidden_size': 576, 'intermediate_size': 1536, 'num_hidden_layers': 30, 'num_attention_heads': 9, 'num_key_value_heads': 3, 'hidden_act': 'silu', 'initializer_range': 0.041666666666666664, 'rms_norm_eps': 1e-05, 'pretraining_tp': 1, 'use_cache': True, 'rope_theta': 100000, 'rope_scaling': None, 'attention_bias': False, 'attention_dropout': 0.0, 'mlp_bias': False, 'head_dim': 64, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['LlamaForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': 2, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'HuggingFaceTB/SmolLM2-135M', '_attn_implementation_autoset': True, 'transformers_version': '4.47.1', 'is_llama_config': True, 'model_type': 'llama', 'rope_interleaved': False, 'output_dir': '/content/drive/MyDrive/smollm-fine-tuning/trained_models', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 10, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 1000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/content/drive/MyDrive/smollm-fine-tuning/trained_models/runs/Dec23_06-10-57_f534e42ab4c2', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 50, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/content/drive/MyDrive/smollm-fine-tuning/trained_models', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': 'smollm2-sft-test1', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'dataset_text_field': 'text', 'packing': False, 'max_seq_length': 1024, 'dataset_num_proc': None, 'dataset_batch_size': 1000, 'model_init_kwargs': None, 'dataset_kwargs': {'add_special_tokens': False}, 'eval_packing': None, 'num_of_sequences': 1024, 'chars_per_token': '<CHARS_PER_TOKEN>', 'use_liger': False}
26
+ 2024-12-23 06:24:37,159 INFO MainThread:632 [wandb_config.py:__setitem__():154] config set model/num_parameters = 134515008 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x78e95840efe0>>
27
+ 2024-12-23 06:24:37,159 INFO MainThread:632 [wandb_run.py:_config_callback():1279] config_cb model/num_parameters 134515008 None
28
+ 2024-12-23 06:42:14,430 INFO MainThread:632 [jupyter.py:save_ipynb():386] not saving jupyter notebook
29
+ 2024-12-23 06:42:14,431 INFO MainThread:632 [wandb_init.py:_pause_backend():439] pausing backend
30
+ 2024-12-23 06:58:34,000 INFO MainThread:632 [wandb_init.py:_resume_backend():444] resuming backend
31
+ 2024-12-23 06:58:34,115 INFO MainThread:632 [jupyter.py:save_ipynb():386] not saving jupyter notebook
32
+ 2024-12-23 06:58:34,115 INFO MainThread:632 [wandb_init.py:_pause_backend():439] pausing backend
33
+ 2024-12-23 06:59:50,031 INFO MainThread:632 [wandb_init.py:_resume_backend():444] resuming backend
34
+ 2024-12-23 06:59:57,291 INFO MainThread:632 [jupyter.py:save_ipynb():386] not saving jupyter notebook
35
+ 2024-12-23 06:59:57,291 INFO MainThread:632 [wandb_init.py:_pause_backend():439] pausing backend
36
+ 2024-12-23 07:00:55,043 INFO MainThread:632 [wandb_init.py:_resume_backend():444] resuming backend
wandb/run-20241223_062433-kk1dm8nx/run-kk1dm8nx.wandb ADDED
Binary file (164 kB). View file