File size: 906 Bytes
695ff37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
#This database.yml is telling pyannote where your audio files are and where your speaker labels (RTTM) are, and how to split them into train / dev / test.

Databases:
  HindiBhojpuri:    #dataset name
    - dataset/audio/{uri}.wav  #All my audio files live in this folder, and their names depend on {uri}.

Protocols:   #Now I will explain how this dataset will be used.
  HindiBhojpuri:
    SpeakerDiarization:  #I am doing a speaker diarization task.
      Segmentation: #I want to train / evaluate a segmentation model.
        train:
          uri: dataset/splits/train.txt
          annotation: dataset/rttm/{uri}.rttm
        development:
          uri: dataset/splits/dev.txt
          annotation: dataset/rttm/{uri}.rttm
        test:
          uri: dataset/splits/test.txt
          annotation: dataset/rttm/{uri}.rttm

#Segmentation =
# who is speaking at each moment in time