mblak3 commited on
Commit
42c7a37
·
verified ·
1 Parent(s): 87fe395

Training in progress, step 5000

Browse files
config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "HGRNBitForCausalLM"
4
+ ],
5
+ "attn_mode": "fused_recurrent",
6
+ "bos_token_id": 1,
7
+ "conv_size": 4,
8
+ "eos_token_id": 2,
9
+ "expand_ratio": 1,
10
+ "fuse_cross_entropy": true,
11
+ "hidden_act": "swish",
12
+ "hidden_ratio": 4,
13
+ "hidden_size": 1024,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": null,
16
+ "max_position_embeddings": 2048,
17
+ "model_type": "hgrn_bit",
18
+ "num_heads": 1,
19
+ "num_hidden_layers": 24,
20
+ "rms_norm_eps": 1e-06,
21
+ "share_conv_kernel": true,
22
+ "tie_word_embeddings": false,
23
+ "torch_dtype": "float32",
24
+ "transformers_version": "4.44.0",
25
+ "use_cache": true,
26
+ "use_lower_bound": true,
27
+ "use_short_conv": false,
28
+ "vocab_size": 32000
29
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20571227db1d258318b257ce5c1781cafd8a448af672eb75e13ee899313aa11b
3
+ size 1496472568
runs/Apr23_14-55-35_quad3090/events.out.tfevents.1745412936.quad3090.148976.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fb6aeb4e75e3150f6b7d599e75b1ded4ff578c739cae5e2ee9ba950477495ea
3
+ size 4884
runs/Apr23_15-03-26_quad3090/events.out.tfevents.1745413407.quad3090.150059.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95be3bcbdcc19ec8bdac21fe95f31b53d83e864953d8d8c77245ad7613bcbaba
3
+ size 4884
runs/Apr23_15-05-14_quad3090/events.out.tfevents.1745413515.quad3090.150520.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff18fa2a3c7aaa6dc8448913c6c28b0ab3c40384f05911afb88dbde58887b734
3
+ size 4884
runs/Apr23_15-15-54_quad3090/events.out.tfevents.1745414156.quad3090.151886.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fabde9340e5caaa0b9845f54772715b4596ee4e0975b39c721bfea4b9decb08
3
+ size 4884
runs/Apr23_15-30-01_quad3090/events.out.tfevents.1745415002.quad3090.5205.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:056ea8c2e6562cc6466baca1a1b16a4a322efc3b929ebcd82244b50ef439f9ba
3
+ size 4884
runs/Apr23_15-38-21_quad3090/events.out.tfevents.1745415503.quad3090.8735.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a708327bfe986c110503b8a17d41856b22da02833f5faa72bd622d1bcaaaf08
3
+ size 4884
runs/Apr23_15-40-51_quad3090/events.out.tfevents.1745415652.quad3090.9338.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25f5a6b0607465047dc7c31fc64f15bcfbb98032b7450054d5789ed12f5cf967
3
+ size 4884
runs/Apr23_15-41-54_quad3090/events.out.tfevents.1745415716.quad3090.9959.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8879bacf02b464e5d46ee3dd026ae0c12a796a32cce06d40f308299f9abdc150
3
+ size 4884
runs/Apr23_15-42-52_quad3090/events.out.tfevents.1745415774.quad3090.10407.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f80617389bc83ae23a2f8506a9a45a2934c7c2f4f6674315c88fe5977858a91
3
+ size 4884
runs/Apr23_15-43-44_quad3090/events.out.tfevents.1745415826.quad3090.10838.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c52a442adbe694272a8b51c6f375d7b614e68b2b953667e84cad4f277a0ff7d
3
+ size 4882
runs/Apr23_15-44-43_quad3090/events.out.tfevents.1745415884.quad3090.11300.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbe2ee3834c901d2913a3eacbe8edc26b599add464ba0b889eb7e0f9989e664e
3
+ size 4884
runs/Apr23_15-51-57_quad3090/events.out.tfevents.1745416319.quad3090.11905.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48e230b612ec6facced6bc400be80895ed37b3db2f5d1cf6570c8f768b46b652
3
+ size 4884
runs/Apr23_16-00-29_quad3090/events.out.tfevents.1745416831.quad3090.12665.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6347f43643cfbe72b4cbe6349a86089f9e6048c9bf76ede17dd092567b194b99
3
+ size 4884
runs/Apr23_16-01-29_quad3090/events.out.tfevents.1745416890.quad3090.13104.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc57daf2eb06db1b7495aa5bc70b1657c5c4dd25294198f010fd82fae5975268
3
+ size 4882
runs/Apr23_16-08-16_quad3090/events.out.tfevents.1745417297.quad3090.13643.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b3b84bdd6eb00d8b47277a38051d5c4ccb67ae6a24889b4daabad457b26ac84
3
+ size 5364
special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "</s>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
3
+ size 493443
tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ }
30
+ },
31
+ "additional_special_tokens": [],
32
+ "bos_token": "<s>",
33
+ "clean_up_tokenization_spaces": false,
34
+ "eos_token": "</s>",
35
+ "legacy": true,
36
+ "model_max_length": 1000000000000000019884624838656,
37
+ "pad_token": "</s>",
38
+ "sp_model_kwargs": {},
39
+ "spaces_between_special_tokens": false,
40
+ "tokenizer_class": "LlamaTokenizer",
41
+ "unk_token": "<unk>",
42
+ "use_default_system_prompt": false
43
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fce1ee7f8fb3432241deb99baa88ebfe51158d08736f286404de39dcc05d2d8
3
+ size 5176