ameerazam08's picture
Upload folder using huggingface_hub
79cf5f5 verified
import pathlib
import click
import librosa
import matplotlib.pyplot as plt
import numpy as np
import parselmouth as pm
import tqdm
from textgrid import TextGrid
import distribution
@click.command(help='Generate word-level pitch summary')
@click.option('--wavs', required=True, help='Path to the segments directory')
@click.option('--tg', required=True, help='Path to the TextGrids directory')
def summary_pitch(wavs, tg):
wavs = pathlib.Path(wavs)
tg_dir = pathlib.Path(tg)
del tg
filelist = list(wavs.glob('*.wav'))
pit_map = {}
f0_min = 40.
f0_max = 1100.
voicing_thresh_vowel = 0.45
for wavfile in tqdm.tqdm(filelist):
tg = TextGrid()
tg.read(tg_dir / wavfile.with_suffix('.TextGrid').name)
timestep = 0.01
f0 = pm.Sound(str(wavfile)).to_pitch_ac(
time_step=timestep,
voicing_threshold=voicing_thresh_vowel,
pitch_floor=f0_min,
pitch_ceiling=f0_max,
).selected_array['frequency']
pitch = 12. * np.log2(f0 / 440.) + 69.
for word in tg[0]:
if word.mark in ['AP', 'SP']:
continue
if word.maxTime - word.minTime < timestep:
continue
word_pit = pitch[int(word.minTime / timestep): int(word.maxTime / timestep)]
word_pit = np.extract(word_pit >= 0, word_pit)
if word_pit.shape[0] == 0:
continue
counts = np.bincount(word_pit.astype(np.int64))
midi = counts.argmax()
if midi in pit_map:
pit_map[midi] += 1
else:
pit_map[midi] = 1
midi_keys = sorted(pit_map.keys())
midi_keys = list(range(midi_keys[0], midi_keys[-1] + 1))
distribution.draw_distribution(
title='Pitch Distribution Summary',
x_label='Pitch',
y_label='Number of occurrences',
items=[librosa.midi_to_note(k) for k in midi_keys],
values=[pit_map.get(k, 0) for k in midi_keys]
)
pitch_summary = wavs / 'pitch_distribution.jpg'
plt.savefig(fname=pitch_summary,
bbox_inches='tight',
pad_inches=0.25)
print(f'Pitch distribution summary saved to {pitch_summary}')
if __name__ == '__main__':
summary_pitch()