Training in progress, step 500
Browse files- config.json +1 -1
- model.safetensors +2 -2
- runs/Mar03_09-43-16_f606be61dfea/events.out.tfevents.1772530996.f606be61dfea.275.0 +3 -0
- tokenizer.json +9 -42
- tokenizer_config.json +3 -109
config.json
CHANGED
|
@@ -42,5 +42,5 @@
|
|
| 42 |
"transformers_version": "5.0.0",
|
| 43 |
"use_cache": false,
|
| 44 |
"use_weighted_layer_sum": false,
|
| 45 |
-
"vocab_size":
|
| 46 |
}
|
|
|
|
| 42 |
"transformers_version": "5.0.0",
|
| 43 |
"use_cache": false,
|
| 44 |
"use_weighted_layer_sum": false,
|
| 45 |
+
"vocab_size": 51866
|
| 46 |
}
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0433f83140d2e7971370fa122b17130631af22bd02399046847a03e7d919cac4
|
| 3 |
+
size 966998152
|
runs/Mar03_09-43-16_f606be61dfea/events.out.tfevents.1772530996.f606be61dfea.275.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:54e031dee7e386c2c27753959acb91d5a77ac23965a663ce74db2ecf5df836fd
|
| 3 |
+
size 9537
|
tokenizer.json
CHANGED
|
@@ -14474,6 +14474,15 @@
|
|
| 14474 |
"rstrip": false,
|
| 14475 |
"normalized": true,
|
| 14476 |
"special": false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14477 |
}
|
| 14478 |
],
|
| 14479 |
"normalizer": null,
|
|
@@ -14492,18 +14501,6 @@
|
|
| 14492 |
"type_id": 0
|
| 14493 |
}
|
| 14494 |
},
|
| 14495 |
-
{
|
| 14496 |
-
"SpecialToken": {
|
| 14497 |
-
"id": "<|ha|>",
|
| 14498 |
-
"type_id": 0
|
| 14499 |
-
}
|
| 14500 |
-
},
|
| 14501 |
-
{
|
| 14502 |
-
"SpecialToken": {
|
| 14503 |
-
"id": "<|transcribe|>",
|
| 14504 |
-
"type_id": 0
|
| 14505 |
-
}
|
| 14506 |
-
},
|
| 14507 |
{
|
| 14508 |
"SpecialToken": {
|
| 14509 |
"id": "<|notimestamps|>",
|
|
@@ -14530,18 +14527,6 @@
|
|
| 14530 |
"type_id": 0
|
| 14531 |
}
|
| 14532 |
},
|
| 14533 |
-
{
|
| 14534 |
-
"SpecialToken": {
|
| 14535 |
-
"id": "<|ha|>",
|
| 14536 |
-
"type_id": 0
|
| 14537 |
-
}
|
| 14538 |
-
},
|
| 14539 |
-
{
|
| 14540 |
-
"SpecialToken": {
|
| 14541 |
-
"id": "<|transcribe|>",
|
| 14542 |
-
"type_id": 0
|
| 14543 |
-
}
|
| 14544 |
-
},
|
| 14545 |
{
|
| 14546 |
"SpecialToken": {
|
| 14547 |
"id": "<|notimestamps|>",
|
|
@@ -14577,15 +14562,6 @@
|
|
| 14577 |
"<|endoftext|>"
|
| 14578 |
]
|
| 14579 |
},
|
| 14580 |
-
"<|ha|>": {
|
| 14581 |
-
"id": "<|ha|>",
|
| 14582 |
-
"ids": [
|
| 14583 |
-
50354
|
| 14584 |
-
],
|
| 14585 |
-
"tokens": [
|
| 14586 |
-
"<|ha|>"
|
| 14587 |
-
]
|
| 14588 |
-
},
|
| 14589 |
"<|notimestamps|>": {
|
| 14590 |
"id": "<|notimestamps|>",
|
| 14591 |
"ids": [
|
|
@@ -14603,15 +14579,6 @@
|
|
| 14603 |
"tokens": [
|
| 14604 |
"<|startoftranscript|>"
|
| 14605 |
]
|
| 14606 |
-
},
|
| 14607 |
-
"<|transcribe|>": {
|
| 14608 |
-
"id": "<|transcribe|>",
|
| 14609 |
-
"ids": [
|
| 14610 |
-
50359
|
| 14611 |
-
],
|
| 14612 |
-
"tokens": [
|
| 14613 |
-
"<|transcribe|>"
|
| 14614 |
-
]
|
| 14615 |
}
|
| 14616 |
}
|
| 14617 |
},
|
|
|
|
| 14474 |
"rstrip": false,
|
| 14475 |
"normalized": true,
|
| 14476 |
"special": false
|
| 14477 |
+
},
|
| 14478 |
+
{
|
| 14479 |
+
"id": 51865,
|
| 14480 |
+
"content": "<|ig|>",
|
| 14481 |
+
"single_word": false,
|
| 14482 |
+
"lstrip": false,
|
| 14483 |
+
"rstrip": false,
|
| 14484 |
+
"normalized": false,
|
| 14485 |
+
"special": true
|
| 14486 |
}
|
| 14487 |
],
|
| 14488 |
"normalizer": null,
|
|
|
|
| 14501 |
"type_id": 0
|
| 14502 |
}
|
| 14503 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14504 |
{
|
| 14505 |
"SpecialToken": {
|
| 14506 |
"id": "<|notimestamps|>",
|
|
|
|
| 14527 |
"type_id": 0
|
| 14528 |
}
|
| 14529 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14530 |
{
|
| 14531 |
"SpecialToken": {
|
| 14532 |
"id": "<|notimestamps|>",
|
|
|
|
| 14562 |
"<|endoftext|>"
|
| 14563 |
]
|
| 14564 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14565 |
"<|notimestamps|>": {
|
| 14566 |
"id": "<|notimestamps|>",
|
| 14567 |
"ids": [
|
|
|
|
| 14579 |
"tokens": [
|
| 14580 |
"<|startoftranscript|>"
|
| 14581 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14582 |
}
|
| 14583 |
}
|
| 14584 |
},
|
tokenizer_config.json
CHANGED
|
@@ -6,122 +6,16 @@
|
|
| 6 |
"eos_token": "<|endoftext|>",
|
| 7 |
"errors": "replace",
|
| 8 |
"extra_special_tokens": [
|
| 9 |
-
"<|
|
| 10 |
-
"<|startoftranscript|>",
|
| 11 |
-
"<|en|>",
|
| 12 |
-
"<|zh|>",
|
| 13 |
-
"<|de|>",
|
| 14 |
-
"<|es|>",
|
| 15 |
-
"<|ru|>",
|
| 16 |
-
"<|ko|>",
|
| 17 |
-
"<|fr|>",
|
| 18 |
-
"<|ja|>",
|
| 19 |
-
"<|pt|>",
|
| 20 |
-
"<|tr|>",
|
| 21 |
-
"<|pl|>",
|
| 22 |
-
"<|ca|>",
|
| 23 |
-
"<|nl|>",
|
| 24 |
-
"<|ar|>",
|
| 25 |
-
"<|sv|>",
|
| 26 |
-
"<|it|>",
|
| 27 |
-
"<|id|>",
|
| 28 |
-
"<|hi|>",
|
| 29 |
-
"<|fi|>",
|
| 30 |
-
"<|vi|>",
|
| 31 |
-
"<|he|>",
|
| 32 |
-
"<|uk|>",
|
| 33 |
-
"<|el|>",
|
| 34 |
-
"<|ms|>",
|
| 35 |
-
"<|cs|>",
|
| 36 |
-
"<|ro|>",
|
| 37 |
-
"<|da|>",
|
| 38 |
-
"<|hu|>",
|
| 39 |
-
"<|ta|>",
|
| 40 |
-
"<|no|>",
|
| 41 |
-
"<|th|>",
|
| 42 |
-
"<|ur|>",
|
| 43 |
-
"<|hr|>",
|
| 44 |
-
"<|bg|>",
|
| 45 |
-
"<|lt|>",
|
| 46 |
-
"<|la|>",
|
| 47 |
-
"<|mi|>",
|
| 48 |
-
"<|ml|>",
|
| 49 |
-
"<|cy|>",
|
| 50 |
-
"<|sk|>",
|
| 51 |
-
"<|te|>",
|
| 52 |
-
"<|fa|>",
|
| 53 |
-
"<|lv|>",
|
| 54 |
-
"<|bn|>",
|
| 55 |
-
"<|sr|>",
|
| 56 |
-
"<|az|>",
|
| 57 |
-
"<|sl|>",
|
| 58 |
-
"<|kn|>",
|
| 59 |
-
"<|et|>",
|
| 60 |
-
"<|mk|>",
|
| 61 |
-
"<|br|>",
|
| 62 |
-
"<|eu|>",
|
| 63 |
-
"<|is|>",
|
| 64 |
-
"<|hy|>",
|
| 65 |
-
"<|ne|>",
|
| 66 |
-
"<|mn|>",
|
| 67 |
-
"<|bs|>",
|
| 68 |
-
"<|kk|>",
|
| 69 |
-
"<|sq|>",
|
| 70 |
-
"<|sw|>",
|
| 71 |
-
"<|gl|>",
|
| 72 |
-
"<|mr|>",
|
| 73 |
-
"<|pa|>",
|
| 74 |
-
"<|si|>",
|
| 75 |
-
"<|km|>",
|
| 76 |
-
"<|sn|>",
|
| 77 |
-
"<|yo|>",
|
| 78 |
-
"<|so|>",
|
| 79 |
-
"<|af|>",
|
| 80 |
-
"<|oc|>",
|
| 81 |
-
"<|ka|>",
|
| 82 |
-
"<|be|>",
|
| 83 |
-
"<|tg|>",
|
| 84 |
-
"<|sd|>",
|
| 85 |
-
"<|gu|>",
|
| 86 |
-
"<|am|>",
|
| 87 |
-
"<|yi|>",
|
| 88 |
-
"<|lo|>",
|
| 89 |
-
"<|uz|>",
|
| 90 |
-
"<|fo|>",
|
| 91 |
-
"<|ht|>",
|
| 92 |
-
"<|ps|>",
|
| 93 |
-
"<|tk|>",
|
| 94 |
-
"<|nn|>",
|
| 95 |
-
"<|mt|>",
|
| 96 |
-
"<|sa|>",
|
| 97 |
-
"<|lb|>",
|
| 98 |
-
"<|my|>",
|
| 99 |
-
"<|bo|>",
|
| 100 |
-
"<|tl|>",
|
| 101 |
-
"<|mg|>",
|
| 102 |
-
"<|as|>",
|
| 103 |
-
"<|tt|>",
|
| 104 |
-
"<|haw|>",
|
| 105 |
-
"<|ln|>",
|
| 106 |
-
"<|ha|>",
|
| 107 |
-
"<|ba|>",
|
| 108 |
-
"<|jw|>",
|
| 109 |
-
"<|su|>",
|
| 110 |
-
"<|translate|>",
|
| 111 |
-
"<|transcribe|>",
|
| 112 |
-
"<|startoflm|>",
|
| 113 |
-
"<|startofprev|>",
|
| 114 |
-
"<|nocaptions|>",
|
| 115 |
-
"<|notimestamps|>"
|
| 116 |
],
|
| 117 |
"is_local": false,
|
| 118 |
-
"language":
|
| 119 |
"model_max_length": 1024,
|
| 120 |
"pad_token": "<|endoftext|>",
|
| 121 |
"predict_timestamps": false,
|
| 122 |
"processor_class": "WhisperProcessor",
|
| 123 |
"return_attention_mask": false,
|
| 124 |
-
"task":
|
| 125 |
"tokenizer_class": "WhisperTokenizer",
|
| 126 |
"unk_token": "<|endoftext|>"
|
| 127 |
}
|
|
|
|
| 6 |
"eos_token": "<|endoftext|>",
|
| 7 |
"errors": "replace",
|
| 8 |
"extra_special_tokens": [
|
| 9 |
+
"<|ig|>"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
],
|
| 11 |
"is_local": false,
|
| 12 |
+
"language": null,
|
| 13 |
"model_max_length": 1024,
|
| 14 |
"pad_token": "<|endoftext|>",
|
| 15 |
"predict_timestamps": false,
|
| 16 |
"processor_class": "WhisperProcessor",
|
| 17 |
"return_attention_mask": false,
|
| 18 |
+
"task": null,
|
| 19 |
"tokenizer_class": "WhisperTokenizer",
|
| 20 |
"unk_token": "<|endoftext|>"
|
| 21 |
}
|