File size: 4,056 Bytes

79cf5f5

import pathlib
import re
from typing import Dict, List

import click
import librosa
import natsort
import numpy
import soundfile
import textgrid
import tqdm


def remove_suffix(string, suffix_pattern):
    match = re.search(f'{suffix_pattern}$', string)
    if not match:
        return string
    return string[:-len(match.group())]


@click.command(help='Combine segmented 2-tier TextGrids and wavs into 3-tier TextGrids and long wavs')
@click.option(
    '--wavs', required=True,
    help='Directory containing the segmented wav files'
)
@click.option(
    '--tg', required=False,
    help='Directory containing the segmented TextGrid files (defaults to wav directory)'
)
@click.option(
    '--out', required=True,
    help='Path to output directory for combined files'
)
@click.option(
    '--suffix', required=False, default=r'_\d+',
    help='Filename suffix pattern for file combination'
)
@click.option(
    '--wav_subtype', required=False, default='PCM_16',
    help='Wav subtype (defaults to PCM_16)'
)
@click.option(
    '--overwrite', is_flag=True,
    help='Overwrite existing files'
)
def combine_tg(wavs, tg, out, suffix, wav_subtype, overwrite):
    wav_path_in = pathlib.Path(wavs)
    tg_path_in = wav_path_in if tg is None else pathlib.Path(tg)
    del tg
    combined_path_out = pathlib.Path(out)
    combined_path_out.mkdir(parents=True, exist_ok=True)
    filelist: Dict[str, List[pathlib.Path]] = {}
    for tg_file in tg_path_in.glob('*.TextGrid'):
        stem = remove_suffix(tg_file.stem, suffix)
        if stem not in filelist:
            filelist[stem] = [tg_file]
        else:
            filelist[stem].append(tg_file)
    for name, files in tqdm.tqdm(sorted(filelist.items(), key=lambda kv: kv[0])):
        wav_segments = []
        tg = textgrid.TextGrid()
        sentences_tier = textgrid.IntervalTier(name='sentences')
        words_tier = textgrid.IntervalTier(name='words')
        phones_tier = textgrid.IntervalTier(name='phones')
        sentence_start = 0.
        sr = None
        for tg_file in natsort.natsorted(files):
            wav_file = (wav_path_in / tg_file.name).with_suffix('.wav')
            waveform, sr_ = librosa.load(wav_file, sr=None)
            if sr is None:
                sr = sr_
            else:
                assert sr_ == sr, f'Cannot combine \'{tg_file.stem}\': incompatible samplerate ({sr_} != {sr})'
            sentence_end = waveform.shape[0] / sr + sentence_start
            wav_segments.append(waveform)
            sentences_tier.add(minTime=sentence_start, maxTime=sentence_end, mark=wav_file.stem)
            sentence_tg = textgrid.TextGrid()
            sentence_tg.read(tg_file)
            start = sentence_start
            for j, word in enumerate(sentence_tg[0]):
                if j == len(sentence_tg[0]) - 1:
                    end = sentence_end
                else:
                    end = start + word.duration()
                words_tier.add(minTime=start, maxTime=end, mark=word.mark)
                start = end
            start = sentence_start
            for j, phone in enumerate(sentence_tg[1]):
                if j == len(sentence_tg[1]) - 1:
                    end = sentence_end
                else:
                    end = start + phone.duration()
                phones_tier.add(minTime=start, maxTime=end, mark=phone.mark)
                start = end
            sentence_start = sentence_end
        tg.append(sentences_tier)
        tg.append(words_tier)
        tg.append(phones_tier)

        tg_file_out = combined_path_out / f'{name}.TextGrid'
        wav_file_out = tg_file_out.with_suffix('.wav')
        if wav_file_out.exists() and not overwrite:
            raise FileExistsError(str(wav_file_out))
        if tg_file_out.exists() and not overwrite:
            raise FileExistsError(str(tg_file_out))

        tg.write(tg_file_out)
        full_wav = numpy.concatenate(wav_segments)
        soundfile.write(wav_file_out, full_wav, samplerate=sr, subtype=wav_subtype)


if __name__ == '__main__':
    combine_tg()