Manoj-kanur commited on
Commit
3f1394e
·
verified ·
1 Parent(s): 926b45b

Training in progress, step 500

Browse files
config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "add_cross_attention": false,
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 1,
9
+ "dtype": "float32",
10
+ "embd_pdrop": 0.1,
11
+ "eos_token_id": 2,
12
+ "initializer_range": 0.02,
13
+ "layer_norm_epsilon": 1e-05,
14
+ "model_type": "gpt2",
15
+ "n_embd": 256,
16
+ "n_head": 4,
17
+ "n_inner": 1024,
18
+ "n_layer": 4,
19
+ "n_positions": 256,
20
+ "pad_token_id": 0,
21
+ "reorder_and_upcast_attn": false,
22
+ "resid_pdrop": 0.1,
23
+ "scale_attn_by_inverse_layer_idx": false,
24
+ "scale_attn_weights": true,
25
+ "summary_activation": null,
26
+ "summary_first_dropout": 0.1,
27
+ "summary_proj_to_labels": true,
28
+ "summary_type": "cls_index",
29
+ "summary_use_proj": true,
30
+ "tie_word_embeddings": true,
31
+ "transformers_version": "5.12.1",
32
+ "use_cache": false,
33
+ "vocab_size": 47
34
+ }
generation_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": [
5
+ 2
6
+ ],
7
+ "output_attentions": false,
8
+ "output_hidden_states": false,
9
+ "pad_token_id": 0,
10
+ "transformers_version": "5.12.1",
11
+ "use_cache": true
12
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16e7ae653908ec1097f2131d006e20e1231f2157e53bfd38a7e180a9f47bd980
3
+ size 12953576
tokenizer.json ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": {
5
+ "strategy": "BatchLongest",
6
+ "direction": "Left",
7
+ "pad_to_multiple_of": null,
8
+ "pad_id": 0,
9
+ "pad_type_id": 0,
10
+ "pad_token": "<pad>"
11
+ },
12
+ "added_tokens": [
13
+ {
14
+ "id": 0,
15
+ "content": "<pad>",
16
+ "single_word": false,
17
+ "lstrip": false,
18
+ "rstrip": false,
19
+ "normalized": false,
20
+ "special": true
21
+ },
22
+ {
23
+ "id": 1,
24
+ "content": "<bos>",
25
+ "single_word": false,
26
+ "lstrip": false,
27
+ "rstrip": false,
28
+ "normalized": false,
29
+ "special": true
30
+ },
31
+ {
32
+ "id": 2,
33
+ "content": "<eos>",
34
+ "single_word": false,
35
+ "lstrip": false,
36
+ "rstrip": false,
37
+ "normalized": false,
38
+ "special": true
39
+ },
40
+ {
41
+ "id": 3,
42
+ "content": "<unk>",
43
+ "single_word": false,
44
+ "lstrip": false,
45
+ "rstrip": false,
46
+ "normalized": false,
47
+ "special": true
48
+ }
49
+ ],
50
+ "normalizer": null,
51
+ "pre_tokenizer": {
52
+ "type": "Split",
53
+ "pattern": {
54
+ "Regex": "[\\s\\S]"
55
+ },
56
+ "behavior": "Isolated",
57
+ "invert": false
58
+ },
59
+ "post_processor": {
60
+ "type": "TemplateProcessing",
61
+ "single": [
62
+ {
63
+ "Sequence": {
64
+ "id": "A",
65
+ "type_id": 0
66
+ }
67
+ }
68
+ ],
69
+ "pair": [
70
+ {
71
+ "Sequence": {
72
+ "id": "A",
73
+ "type_id": 0
74
+ }
75
+ },
76
+ {
77
+ "Sequence": {
78
+ "id": "B",
79
+ "type_id": 1
80
+ }
81
+ }
82
+ ],
83
+ "special_tokens": {}
84
+ },
85
+ "decoder": {
86
+ "type": "Fuse"
87
+ },
88
+ "model": {
89
+ "type": "WordLevel",
90
+ "vocab": {
91
+ "<pad>": 0,
92
+ "<bos>": 1,
93
+ "<eos>": 2,
94
+ "<unk>": 3,
95
+ "\n": 4,
96
+ "+": 5,
97
+ "0": 6,
98
+ "1": 7,
99
+ "2": 8,
100
+ "3": 9,
101
+ "4": 10,
102
+ "5": 11,
103
+ "6": 12,
104
+ "7": 13,
105
+ "8": 14,
106
+ "9": 15,
107
+ "=": 16,
108
+ "٠": 17,
109
+ "١": 18,
110
+ "٢": 19,
111
+ "٣": 20,
112
+ "٤": 21,
113
+ "٥": 22,
114
+ "٦": 23,
115
+ "٧": 24,
116
+ "٨": 25,
117
+ "٩": 26,
118
+ "०": 27,
119
+ "१": 28,
120
+ "२": 29,
121
+ "३": 30,
122
+ "४": 31,
123
+ "५": 32,
124
+ "६": 33,
125
+ "७": 34,
126
+ "८": 35,
127
+ "९": 36,
128
+ "一": 37,
129
+ "七": 38,
130
+ "三": 39,
131
+ "九": 40,
132
+ "二": 41,
133
+ "五": 42,
134
+ "八": 43,
135
+ "六": 44,
136
+ "四": 45,
137
+ "零": 46
138
+ },
139
+ "unk_token": "<unk>"
140
+ }
141
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "bos_token": "<bos>",
4
+ "eos_token": "<eos>",
5
+ "is_local": false,
6
+ "local_files_only": false,
7
+ "model_max_length": 1000000000000000019884624838656,
8
+ "pad_token": "<pad>",
9
+ "tokenizer_class": "TokenizersBackend",
10
+ "unk_token": "<unk>"
11
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98df703cae131f341c3e77ea1501564987302367867b74ee44f234b92e5305e9
3
+ size 5265