File size: 902 Bytes
79cf5f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import csv
import pathlib

import click


@click.command(help='Migrate transcriptions.txt in old datasets to transcriptions.csv')
@click.argument('input_txt', metavar='INPUT')
def convert_txt(
        input_txt: str
):
    input_txt = pathlib.Path(input_txt).resolve()
    assert input_txt.exists(), 'The input file does not exist.'
    with open(input_txt, 'r', encoding='utf8') as f:
        utterances = f.readlines()
    utterances = [u.split('|') for u in utterances]
    utterances = [
        {
            'name': u[0],
            'ph_seq': u[2],
            'ph_dur': u[5]
        }
        for u in utterances
    ]

    with open(input_txt.with_suffix('.csv'), 'w', encoding='utf8', newline='') as f:
        writer = csv.DictWriter(f, fieldnames=['name', 'ph_seq', 'ph_dur'])
        writer.writeheader()
        writer.writerows(utterances)


if __name__ == '__main__':
    convert_txt()