pavanmantha commited on
Commit
44ca94b
·
verified ·
1 Parent(s): 69fec75

Add processor for Whisper Medium Sanskrit model

Browse files
Files changed (2) hide show
  1. tokenizer.json +0 -42
  2. tokenizer_config.json +2 -2
tokenizer.json CHANGED
@@ -14492,18 +14492,6 @@
14492
  "type_id": 0
14493
  }
14494
  },
14495
- {
14496
- "SpecialToken": {
14497
- "id": "<|sa|>",
14498
- "type_id": 0
14499
- }
14500
- },
14501
- {
14502
- "SpecialToken": {
14503
- "id": "<|transcribe|>",
14504
- "type_id": 0
14505
- }
14506
- },
14507
  {
14508
  "SpecialToken": {
14509
  "id": "<|notimestamps|>",
@@ -14530,18 +14518,6 @@
14530
  "type_id": 0
14531
  }
14532
  },
14533
- {
14534
- "SpecialToken": {
14535
- "id": "<|sa|>",
14536
- "type_id": 0
14537
- }
14538
- },
14539
- {
14540
- "SpecialToken": {
14541
- "id": "<|transcribe|>",
14542
- "type_id": 0
14543
- }
14544
- },
14545
  {
14546
  "SpecialToken": {
14547
  "id": "<|notimestamps|>",
@@ -14586,15 +14562,6 @@
14586
  "<|notimestamps|>"
14587
  ]
14588
  },
14589
- "<|sa|>": {
14590
- "id": "<|sa|>",
14591
- "ids": [
14592
- 50344
14593
- ],
14594
- "tokens": [
14595
- "<|sa|>"
14596
- ]
14597
- },
14598
  "<|startoftranscript|>": {
14599
  "id": "<|startoftranscript|>",
14600
  "ids": [
@@ -14603,15 +14570,6 @@
14603
  "tokens": [
14604
  "<|startoftranscript|>"
14605
  ]
14606
- },
14607
- "<|transcribe|>": {
14608
- "id": "<|transcribe|>",
14609
- "ids": [
14610
- 50359
14611
- ],
14612
- "tokens": [
14613
- "<|transcribe|>"
14614
- ]
14615
  }
14616
  }
14617
  },
 
14492
  "type_id": 0
14493
  }
14494
  },
 
 
 
 
 
 
 
 
 
 
 
 
14495
  {
14496
  "SpecialToken": {
14497
  "id": "<|notimestamps|>",
 
14518
  "type_id": 0
14519
  }
14520
  },
 
 
 
 
 
 
 
 
 
 
 
 
14521
  {
14522
  "SpecialToken": {
14523
  "id": "<|notimestamps|>",
 
14562
  "<|notimestamps|>"
14563
  ]
14564
  },
 
 
 
 
 
 
 
 
 
14565
  "<|startoftranscript|>": {
14566
  "id": "<|startoftranscript|>",
14567
  "ids": [
 
14570
  "tokens": [
14571
  "<|startoftranscript|>"
14572
  ]
 
 
 
 
 
 
 
 
 
14573
  }
14574
  }
14575
  },
tokenizer_config.json CHANGED
@@ -115,13 +115,13 @@
115
  "<|notimestamps|>"
116
  ],
117
  "is_local": true,
118
- "language": "sa",
119
  "model_max_length": 1024,
120
  "pad_token": "<|endoftext|>",
121
  "predict_timestamps": false,
122
  "processor_class": "WhisperProcessor",
123
  "return_attention_mask": false,
124
- "task": "transcribe",
125
  "tokenizer_class": "WhisperTokenizer",
126
  "unk_token": "<|endoftext|>"
127
  }
 
115
  "<|notimestamps|>"
116
  ],
117
  "is_local": true,
118
+ "language": null,
119
  "model_max_length": 1024,
120
  "pad_token": "<|endoftext|>",
121
  "predict_timestamps": false,
122
  "processor_class": "WhisperProcessor",
123
  "return_attention_mask": false,
124
+ "task": null,
125
  "tokenizer_class": "WhisperTokenizer",
126
  "unk_token": "<|endoftext|>"
127
  }