zarifmahir21 commited on
Commit
71f92ba
·
verified ·
1 Parent(s): b245161

Upload fine-tuned Bengali speaker diarization model

Browse files
Files changed (5) hide show
  1. README.md +32 -0
  2. USAGE.md +4 -0
  3. config.yaml +16 -0
  4. pipeline_config.json +16 -0
  5. pytorch_model.bin +3 -0
README.md ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - bn
4
+ tags:
5
+ - speaker-diarization
6
+ - pyannote
7
+ - pyannote-audio
8
+ - audio
9
+ - voice
10
+ - speech
11
+ - bengali
12
+ license: mit
13
+ datasets:
14
+ - custom
15
+ metrics:
16
+ - der
17
+ model-index:
18
+ - name: bengali-speaker-diarization_v1
19
+ results:
20
+ - task:
21
+ type: speaker-diarization
22
+ name: Speaker Diarization
23
+ metrics:
24
+ - type: der
25
+ value: Not computed
26
+ name: Diarization Error Rate
27
+ ---
28
+
29
+ # bengali-speaker-diarization_v1
30
+
31
+ This is a fine-tuned speaker diarization model based on pyannote.audio, specifically trained on Bengali audio data.
32
+
USAGE.md ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Example Usage: bengali-speaker-diarization_v1
2
+
3
+ This example shows how to use the model for speaker diarization.
4
+
config.yaml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # Model configuration for pyannote.audio
3
+ task:
4
+ name: SpeakerDiarization
5
+
6
+ architecture:
7
+ name: PyanNet
8
+
9
+ specifications:
10
+ duration: 5.0
11
+ sample_rate: 16000
12
+
13
+ training:
14
+ batch_size: 32
15
+ learning_rate: 0.0001
16
+ max_epochs: 20
pipeline_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "speaker-diarization",
3
+ "pyannote_version": "3.3.2",
4
+ "embedding_model": "pyannote/wespeaker-voxceleb-resnet34-LM",
5
+ "optimal_parameters": {
6
+ "segmentation": {
7
+ "threshold": 0.5,
8
+ "min_duration_off": 0.0
9
+ },
10
+ "clustering": {
11
+ "method": "centroid",
12
+ "threshold": 0.7,
13
+ "min_cluster_size": 12
14
+ }
15
+ }
16
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb89ca4e8ffeda8f86576af8c86fb0ee173aa1f1cb24820abe6d4b9b42402b77
3
+ size 17733969