|
|
import pathlib |
|
|
|
|
|
import click |
|
|
import librosa |
|
|
import soundfile |
|
|
import textgrid |
|
|
import tqdm |
|
|
|
|
|
|
|
|
@click.command(help='Slice 3-tier TextGrids and long recordings into segmented 2-tier TextGrids and wavs') |
|
|
@click.option( |
|
|
'--wavs', required=True, |
|
|
help='Directory containing the segmented wav files' |
|
|
) |
|
|
@click.option( |
|
|
'--tg', required=False, |
|
|
help='Directory containing the segmented TextGrid files (defaults to wav directory)' |
|
|
) |
|
|
@click.option( |
|
|
'--out', required=True, |
|
|
help='Path to output directory for combined files' |
|
|
) |
|
|
@click.option( |
|
|
'--preserve_sentence_names', is_flag=True, |
|
|
help='Whether to use sentence marks as filenames (will be re-numbered by default)' |
|
|
) |
|
|
@click.option( |
|
|
'--digits', required=False, type=int, default=3, |
|
|
help='Number of suffix digits (defaults to 3, will be padded with zeros on the left)' |
|
|
) |
|
|
@click.option( |
|
|
'--wav_subtype', required=False, default='PCM_16', |
|
|
help='Wav subtype (defaults to PCM_16)' |
|
|
) |
|
|
@click.option( |
|
|
'--overwrite', is_flag=True, |
|
|
help='Overwrite existing files' |
|
|
) |
|
|
def slice_tg(wavs, tg, out, preserve_sentence_names, digits, wav_subtype, overwrite): |
|
|
wav_path_in = pathlib.Path(wavs) |
|
|
tg_path_in = wav_path_in if tg is None else pathlib.Path(tg) |
|
|
del tg |
|
|
sliced_path_out = pathlib.Path(out) |
|
|
sliced_path_out.mkdir(parents=True, exist_ok=True) |
|
|
for tg_file in tqdm.tqdm(tg_path_in.glob('*.TextGrid')): |
|
|
tg = textgrid.TextGrid() |
|
|
tg.read(tg_file) |
|
|
wav, sr = librosa.load((wav_path_in / tg_file.name).with_suffix('.wav'), sr=None) |
|
|
sentences_tier = tg[0] |
|
|
words_tier = tg[1] |
|
|
phones_tier = tg[2] |
|
|
idx = 0 |
|
|
for sentence in sentences_tier: |
|
|
if sentence.mark == '': |
|
|
continue |
|
|
sentence_tg = textgrid.TextGrid() |
|
|
sentence_words_tier = textgrid.IntervalTier(name='words') |
|
|
sentence_phones_tier = textgrid.IntervalTier(name='phones') |
|
|
for word in words_tier: |
|
|
min_time = max(sentence.minTime, word.minTime) |
|
|
max_time = min(sentence.maxTime, word.maxTime) |
|
|
if min_time >= max_time: |
|
|
continue |
|
|
sentence_words_tier.add( |
|
|
minTime=min_time - sentence.minTime, maxTime=max_time - sentence.minTime, mark=word.mark |
|
|
) |
|
|
for phone in phones_tier: |
|
|
min_time = max(sentence.minTime, phone.minTime) |
|
|
max_time = min(sentence.maxTime, phone.maxTime) |
|
|
if min_time >= max_time: |
|
|
continue |
|
|
sentence_phones_tier.add( |
|
|
minTime=min_time - sentence.minTime, maxTime=max_time - sentence.minTime, mark=phone.mark |
|
|
) |
|
|
sentence_tg.append(sentence_words_tier) |
|
|
sentence_tg.append(sentence_phones_tier) |
|
|
|
|
|
if preserve_sentence_names: |
|
|
tg_file_out = sliced_path_out / f'{sentence.mark}.TextGrid' |
|
|
wav_file_out = tg_file_out.with_suffix('.wav') |
|
|
else: |
|
|
tg_file_out = sliced_path_out / f'{tg_file.stem}_{str(idx).zfill(digits)}.TextGrid' |
|
|
wav_file_out = tg_file_out.with_suffix('.wav') |
|
|
if tg_file_out.exists() and not overwrite: |
|
|
raise FileExistsError(str(tg_file_out)) |
|
|
if wav_file_out.exists() and not overwrite: |
|
|
raise FileExistsError(str(wav_file_out)) |
|
|
|
|
|
sentence_tg.write(tg_file_out) |
|
|
sentence_wav = wav[int(sentence.minTime * sr): min(wav.shape[0], int(sentence.maxTime * sr) + 1)] |
|
|
soundfile.write( |
|
|
wav_file_out, |
|
|
sentence_wav, samplerate=sr, subtype=wav_subtype |
|
|
) |
|
|
idx += 1 |
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
slice_tg() |
|
|
|