Training in progress, step 5000
Browse files- config.json +29 -0
- model.safetensors +3 -0
- runs/Apr23_14-55-35_quad3090/events.out.tfevents.1745412936.quad3090.148976.0 +3 -0
- runs/Apr23_15-03-26_quad3090/events.out.tfevents.1745413407.quad3090.150059.0 +3 -0
- runs/Apr23_15-05-14_quad3090/events.out.tfevents.1745413515.quad3090.150520.0 +3 -0
- runs/Apr23_15-15-54_quad3090/events.out.tfevents.1745414156.quad3090.151886.0 +3 -0
- runs/Apr23_15-30-01_quad3090/events.out.tfevents.1745415002.quad3090.5205.0 +3 -0
- runs/Apr23_15-38-21_quad3090/events.out.tfevents.1745415503.quad3090.8735.0 +3 -0
- runs/Apr23_15-40-51_quad3090/events.out.tfevents.1745415652.quad3090.9338.0 +3 -0
- runs/Apr23_15-41-54_quad3090/events.out.tfevents.1745415716.quad3090.9959.0 +3 -0
- runs/Apr23_15-42-52_quad3090/events.out.tfevents.1745415774.quad3090.10407.0 +3 -0
- runs/Apr23_15-43-44_quad3090/events.out.tfevents.1745415826.quad3090.10838.0 +3 -0
- runs/Apr23_15-44-43_quad3090/events.out.tfevents.1745415884.quad3090.11300.0 +3 -0
- runs/Apr23_15-51-57_quad3090/events.out.tfevents.1745416319.quad3090.11905.0 +3 -0
- runs/Apr23_16-00-29_quad3090/events.out.tfevents.1745416831.quad3090.12665.0 +3 -0
- runs/Apr23_16-01-29_quad3090/events.out.tfevents.1745416890.quad3090.13104.0 +3 -0
- runs/Apr23_16-08-16_quad3090/events.out.tfevents.1745417297.quad3090.13643.0 +3 -0
- special_tokens_map.json +24 -0
- tokenizer.json +0 -0
- tokenizer.model +3 -0
- tokenizer_config.json +43 -0
- training_args.bin +3 -0
config.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"HGRNBitForCausalLM"
|
| 4 |
+
],
|
| 5 |
+
"attn_mode": "fused_recurrent",
|
| 6 |
+
"bos_token_id": 1,
|
| 7 |
+
"conv_size": 4,
|
| 8 |
+
"eos_token_id": 2,
|
| 9 |
+
"expand_ratio": 1,
|
| 10 |
+
"fuse_cross_entropy": true,
|
| 11 |
+
"hidden_act": "swish",
|
| 12 |
+
"hidden_ratio": 4,
|
| 13 |
+
"hidden_size": 1024,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": null,
|
| 16 |
+
"max_position_embeddings": 2048,
|
| 17 |
+
"model_type": "hgrn_bit",
|
| 18 |
+
"num_heads": 1,
|
| 19 |
+
"num_hidden_layers": 24,
|
| 20 |
+
"rms_norm_eps": 1e-06,
|
| 21 |
+
"share_conv_kernel": true,
|
| 22 |
+
"tie_word_embeddings": false,
|
| 23 |
+
"torch_dtype": "float32",
|
| 24 |
+
"transformers_version": "4.44.0",
|
| 25 |
+
"use_cache": true,
|
| 26 |
+
"use_lower_bound": true,
|
| 27 |
+
"use_short_conv": false,
|
| 28 |
+
"vocab_size": 32000
|
| 29 |
+
}
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:20571227db1d258318b257ce5c1781cafd8a448af672eb75e13ee899313aa11b
|
| 3 |
+
size 1496472568
|
runs/Apr23_14-55-35_quad3090/events.out.tfevents.1745412936.quad3090.148976.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1fb6aeb4e75e3150f6b7d599e75b1ded4ff578c739cae5e2ee9ba950477495ea
|
| 3 |
+
size 4884
|
runs/Apr23_15-03-26_quad3090/events.out.tfevents.1745413407.quad3090.150059.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:95be3bcbdcc19ec8bdac21fe95f31b53d83e864953d8d8c77245ad7613bcbaba
|
| 3 |
+
size 4884
|
runs/Apr23_15-05-14_quad3090/events.out.tfevents.1745413515.quad3090.150520.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ff18fa2a3c7aaa6dc8448913c6c28b0ab3c40384f05911afb88dbde58887b734
|
| 3 |
+
size 4884
|
runs/Apr23_15-15-54_quad3090/events.out.tfevents.1745414156.quad3090.151886.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7fabde9340e5caaa0b9845f54772715b4596ee4e0975b39c721bfea4b9decb08
|
| 3 |
+
size 4884
|
runs/Apr23_15-30-01_quad3090/events.out.tfevents.1745415002.quad3090.5205.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:056ea8c2e6562cc6466baca1a1b16a4a322efc3b929ebcd82244b50ef439f9ba
|
| 3 |
+
size 4884
|
runs/Apr23_15-38-21_quad3090/events.out.tfevents.1745415503.quad3090.8735.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7a708327bfe986c110503b8a17d41856b22da02833f5faa72bd622d1bcaaaf08
|
| 3 |
+
size 4884
|
runs/Apr23_15-40-51_quad3090/events.out.tfevents.1745415652.quad3090.9338.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:25f5a6b0607465047dc7c31fc64f15bcfbb98032b7450054d5789ed12f5cf967
|
| 3 |
+
size 4884
|
runs/Apr23_15-41-54_quad3090/events.out.tfevents.1745415716.quad3090.9959.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8879bacf02b464e5d46ee3dd026ae0c12a796a32cce06d40f308299f9abdc150
|
| 3 |
+
size 4884
|
runs/Apr23_15-42-52_quad3090/events.out.tfevents.1745415774.quad3090.10407.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8f80617389bc83ae23a2f8506a9a45a2934c7c2f4f6674315c88fe5977858a91
|
| 3 |
+
size 4884
|
runs/Apr23_15-43-44_quad3090/events.out.tfevents.1745415826.quad3090.10838.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5c52a442adbe694272a8b51c6f375d7b614e68b2b953667e84cad4f277a0ff7d
|
| 3 |
+
size 4882
|
runs/Apr23_15-44-43_quad3090/events.out.tfevents.1745415884.quad3090.11300.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cbe2ee3834c901d2913a3eacbe8edc26b599add464ba0b889eb7e0f9989e664e
|
| 3 |
+
size 4884
|
runs/Apr23_15-51-57_quad3090/events.out.tfevents.1745416319.quad3090.11905.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:48e230b612ec6facced6bc400be80895ed37b3db2f5d1cf6570c8f768b46b652
|
| 3 |
+
size 4884
|
runs/Apr23_16-00-29_quad3090/events.out.tfevents.1745416831.quad3090.12665.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6347f43643cfbe72b4cbe6349a86089f9e6048c9bf76ede17dd092567b194b99
|
| 3 |
+
size 4884
|
runs/Apr23_16-01-29_quad3090/events.out.tfevents.1745416890.quad3090.13104.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bc57daf2eb06db1b7495aa5bc70b1657c5c4dd25294198f010fd82fae5975268
|
| 3 |
+
size 4882
|
runs/Apr23_16-08-16_quad3090/events.out.tfevents.1745417297.quad3090.13643.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5b3b84bdd6eb00d8b47277a38051d5c4ccb67ae6a24889b4daabad457b26ac84
|
| 3 |
+
size 5364
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<s>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"eos_token": {
|
| 10 |
+
"content": "</s>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"pad_token": "</s>",
|
| 17 |
+
"unk_token": {
|
| 18 |
+
"content": "<unk>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": false,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
}
|
| 24 |
+
}
|
tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
|
| 3 |
+
size 493443
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_bos_token": true,
|
| 3 |
+
"add_eos_token": false,
|
| 4 |
+
"add_prefix_space": null,
|
| 5 |
+
"added_tokens_decoder": {
|
| 6 |
+
"0": {
|
| 7 |
+
"content": "<unk>",
|
| 8 |
+
"lstrip": false,
|
| 9 |
+
"normalized": false,
|
| 10 |
+
"rstrip": false,
|
| 11 |
+
"single_word": false,
|
| 12 |
+
"special": true
|
| 13 |
+
},
|
| 14 |
+
"1": {
|
| 15 |
+
"content": "<s>",
|
| 16 |
+
"lstrip": false,
|
| 17 |
+
"normalized": false,
|
| 18 |
+
"rstrip": false,
|
| 19 |
+
"single_word": false,
|
| 20 |
+
"special": true
|
| 21 |
+
},
|
| 22 |
+
"2": {
|
| 23 |
+
"content": "</s>",
|
| 24 |
+
"lstrip": false,
|
| 25 |
+
"normalized": false,
|
| 26 |
+
"rstrip": false,
|
| 27 |
+
"single_word": false,
|
| 28 |
+
"special": true
|
| 29 |
+
}
|
| 30 |
+
},
|
| 31 |
+
"additional_special_tokens": [],
|
| 32 |
+
"bos_token": "<s>",
|
| 33 |
+
"clean_up_tokenization_spaces": false,
|
| 34 |
+
"eos_token": "</s>",
|
| 35 |
+
"legacy": true,
|
| 36 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 37 |
+
"pad_token": "</s>",
|
| 38 |
+
"sp_model_kwargs": {},
|
| 39 |
+
"spaces_between_special_tokens": false,
|
| 40 |
+
"tokenizer_class": "LlamaTokenizer",
|
| 41 |
+
"unk_token": "<unk>",
|
| 42 |
+
"use_default_system_prompt": false
|
| 43 |
+
}
|
training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4fce1ee7f8fb3432241deb99baa88ebfe51158d08736f286404de39dcc05d2d8
|
| 3 |
+
size 5176
|