Spaces:
Sleeping
Sleeping
| """Plot transcribed data""" | |
| import os | |
| from dataclasses import dataclass | |
| from re import sub | |
| import librosa | |
| import numpy | |
| from matplotlib import pyplot as plt | |
| from matplotlib.patches import Rectangle | |
| from modules.Ultrastar.ultrastar_txt import UltrastarTxtValue | |
| from modules.console_colors import ULTRASINGER_HEAD | |
| from modules.Pitcher.pitched_data import PitchedData | |
| from modules.Pitcher.pitcher import get_pitched_data_with_high_confidence | |
| from modules.Speech_Recognition.TranscribedData import TranscribedData | |
| class PlottedNote: | |
| """Plotted note""" | |
| note: str | |
| frequency: float | |
| frequency_log_10: float | |
| octave: int | |
| NOTES = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"] | |
| OCTAVES = [0, 1, 2, 3, 4, 5, 6, 7, 8] | |
| X_TICK_SIZE = 5 | |
| def get_frequency_range(midi_note: str) -> float: | |
| """Get frequency range""" | |
| midi = librosa.note_to_midi(midi_note) | |
| frequency_range = librosa.midi_to_hz(midi + 1) - librosa.midi_to_hz(midi) | |
| return frequency_range | |
| def create_plot_notes(notes: list[str], octaves: list[int]) -> list[PlottedNote]: | |
| """Create list of notes for plot y axis""" | |
| plotted_notes = [] | |
| for octave in octaves: | |
| for note in notes: | |
| note_with_octave = note + str(octave) | |
| frequency = librosa.note_to_hz(note_with_octave) | |
| frequency_log_10 = numpy.log10([frequency])[0] | |
| plotted_notes.append( | |
| PlottedNote(note_with_octave, frequency, frequency_log_10, octave) | |
| ) | |
| return plotted_notes | |
| PLOTTED_NOTES = create_plot_notes(NOTES, OCTAVES) | |
| def plot( | |
| pitched_data: PitchedData, | |
| output_path: str, | |
| transcribed_data: list[TranscribedData] = None, | |
| ultrastar_class: UltrastarTxtValue = None, | |
| midi_notes: list[str] = None, | |
| title: str = None, | |
| ) -> None: | |
| """Plot transcribed data""" | |
| # determine time between to datapoints if there is no gap (this is the step size crepe ran with) | |
| step_size = pitched_data.times[1] | |
| pitched_data = get_pitched_data_with_high_confidence(pitched_data) | |
| if len(pitched_data.frequencies) < 2: | |
| print(f"{ULTRASINGER_HEAD} Plot can't be created; too few datapoints") | |
| return | |
| print( | |
| f"{ULTRASINGER_HEAD} Creating plot{': ' + title if title is not None else ''}" | |
| ) | |
| # map each frequency to logarithm with base 10 for a linear progression of values between the musical notes | |
| # see http://www.phon.ox.ac.uk/jcoleman/LOGARITH.htm | |
| frequencies_log_10 = numpy.log10(pitched_data.frequencies) | |
| # add 'nan' where there are gaps for frequency values so the graph is only continuous where it should be | |
| pitched_data_with_gaps = create_gaps(pitched_data, step_size) | |
| frequencies_log_10_with_gaps = numpy.log10(pitched_data_with_gaps.frequencies) | |
| # dynamically set the minimum and maximum values for x and y axes based on data | |
| y_lower_bound, y_upper_bound = determine_bounds(frequencies_log_10) | |
| ymin = max(0, y_lower_bound - 0.05) | |
| ymax = y_upper_bound + 0.05 | |
| plt.ylim(ymin, ymax) | |
| xmin = min(pitched_data.times) | |
| xmax = max(pitched_data.times) | |
| plt.xlim(xmin, xmax) | |
| plt.xlabel("Time (s)") | |
| plt.ylabel("log10 of Frequency (Hz)") | |
| notes_within_range = set_axes_ticks_and_labels(pitched_data.times, ymin, ymax) | |
| # draw horizontal lines for each note | |
| for note in notes_within_range: | |
| color = "b" | |
| if note.note.startswith("C") and not note.note.startswith("C#"): | |
| color = "r" | |
| plt.axhline(y=note.frequency_log_10, color=color, linestyle="-", linewidth=0.2) | |
| # create line and scatter plot of pitched data | |
| plt.plot(pitched_data_with_gaps.times, frequencies_log_10_with_gaps, linewidth=0.1) | |
| scatter_path_collection = plt.scatter( | |
| pitched_data_with_gaps.times, | |
| frequencies_log_10_with_gaps, | |
| s=5, | |
| c=pitched_data_with_gaps.confidence, | |
| cmap=plt.colormaps.get_cmap("gray").reversed(), | |
| vmin=0, | |
| vmax=1, | |
| ) | |
| plt.figure(1).colorbar(scatter_path_collection, label="confidence") | |
| set_figure_dimensions(xmax - xmin, y_upper_bound - y_lower_bound) | |
| plot_words(transcribed_data, ultrastar_class, midi_notes) | |
| if title is not None: | |
| plt.title(label=title) | |
| plt.figure(1).tight_layout(h_pad=1.4) | |
| dpi = 200 | |
| plt.savefig( | |
| os.path.join( | |
| output_path, f"plot{'' if title is None else '_' + snake(title)}.svg" | |
| ), | |
| dpi=dpi, | |
| ) | |
| plt.clf() | |
| plt.cla() | |
| def set_axes_ticks_and_labels(confidence, ymin, ymax): | |
| """Set ticks and their labels for x and y axes""" | |
| notes_within_range = [ | |
| x for x in PLOTTED_NOTES if ymin <= x.frequency_log_10 <= ymax | |
| ] | |
| plt.yticks( | |
| [x.frequency_log_10 for x in notes_within_range], | |
| [x.note for x in notes_within_range], | |
| ) | |
| first_time = min(confidence) | |
| min_tick = first_time // X_TICK_SIZE * X_TICK_SIZE + X_TICK_SIZE | |
| last_time = max(confidence) | |
| max_tick = last_time // X_TICK_SIZE * X_TICK_SIZE + 0.1 | |
| ticks = numpy.arange(min_tick, max_tick, X_TICK_SIZE, dtype=int).tolist() | |
| if len(ticks) == 0 or ticks[0] != first_time: | |
| ticks.insert(0, first_time) | |
| if len(ticks) == 1 or ticks[-1] != last_time: | |
| ticks.append(last_time) | |
| plt.xticks(ticks, [str(x) for x in ticks]) | |
| return notes_within_range | |
| def determine_bounds(frequency_log_10: list[float]) -> tuple[float, float]: | |
| """Determine bounds based on 1st and 99th percentile of data""" | |
| lower = numpy.percentile(numpy.array(frequency_log_10), 1) | |
| upper = numpy.percentile(numpy.array(frequency_log_10), 99) | |
| return lower, upper | |
| def set_figure_dimensions(time_range, frequency_log_10_range): | |
| """Dynamically scale the figure dimensions based on the duration/frequency amplitude of the song""" | |
| height = frequency_log_10_range / 0.06 | |
| width = time_range / 2 | |
| plt.figure(1).set_figwidth(max(6.4, width)) | |
| plt.figure(1).set_figheight(max(4, height)) | |
| def create_gaps(pitched_data: PitchedData, step_size: float) -> PitchedData: | |
| """ | |
| Add 'nan' where there are no high confidence frequency values. | |
| This way the graph is only continuous where it should be. | |
| """ | |
| pitched_data_with_gaps = PitchedData([], [], []) | |
| previous_time = 0 | |
| for i, time in enumerate(pitched_data.times): | |
| comes_right_after_previous = time - previous_time <= step_size | |
| previous_frequency_is_not_gap = ( | |
| len(pitched_data_with_gaps.frequencies) > 0 | |
| and str(pitched_data_with_gaps.frequencies[-1]) != "nan" | |
| ) | |
| if previous_frequency_is_not_gap and not comes_right_after_previous: | |
| pitched_data_with_gaps.times.append(time) | |
| pitched_data_with_gaps.frequencies.append(float("nan")) | |
| pitched_data_with_gaps.confidence.append(pitched_data.confidence[i]) | |
| pitched_data_with_gaps.times.append(time) | |
| pitched_data_with_gaps.frequencies.append(pitched_data.frequencies[i]) | |
| pitched_data_with_gaps.confidence.append(pitched_data.confidence[i]) | |
| previous_time = time | |
| return pitched_data_with_gaps | |
| def plot_word(midi_note: str, start, end, word): | |
| note_frequency = librosa.note_to_hz(midi_note) | |
| frequency_range = get_frequency_range(midi_note) | |
| half_frequency_range = frequency_range / 2 | |
| height = ( | |
| numpy.log10([note_frequency + half_frequency_range])[0] | |
| - numpy.log10([note_frequency - half_frequency_range])[0] | |
| ) | |
| xy_start_pos = ( | |
| start, | |
| numpy.log10([note_frequency - half_frequency_range])[0], | |
| ) | |
| width = end - start | |
| rect = Rectangle( | |
| xy_start_pos, | |
| width, | |
| height, | |
| edgecolor="none", | |
| facecolor="red", | |
| alpha=0.5, | |
| ) | |
| plt.gca().add_patch(rect) | |
| plt.text(start + width / 4, numpy.log10([note_frequency + half_frequency_range])[0], word, rotation=90) | |
| def plot_words(transcribed_data: list[TranscribedData], ultrastar_class: UltrastarTxtValue, midi_notes: list[str]): | |
| """Draw rectangles for each word""" | |
| if transcribed_data is not None: | |
| for i, data in enumerate(transcribed_data): | |
| plot_word(midi_notes[i], data.start, data.end, data.word) | |
| elif ultrastar_class is not None: | |
| for i, data in enumerate(ultrastar_class.words): | |
| plot_word(midi_notes[i], ultrastar_class.startTimes[i], ultrastar_class.endTimes[i], | |
| ultrastar_class.words[i]) | |
| def snake(s): | |
| """Turn any string into a snake case string""" | |
| return "_".join( | |
| sub( | |
| "([A-Z][a-z]+)", r" \1", sub("([A-Z]+)", r" \1", s.replace("-", " ")) | |
| ).split() | |
| ).lower() | |
| def plot_spectrogram(audio_seperation_path: str, | |
| output_path: str, | |
| title: str = "Spectrogram", | |
| ) -> None: | |
| """Plot spectrogram of data""" | |
| print( | |
| f"{ULTRASINGER_HEAD} Creating plot{': ' + title}" | |
| ) | |
| audio, sr = librosa.load(audio_seperation_path, sr=None) | |
| powerSpectrum, frequenciesFound, time, imageAxis = plt.specgram(audio, Fs=sr) | |
| plt.colorbar() | |
| if title is not None: | |
| plt.title(label=title) | |
| plt.xlabel("Time (s)") | |
| plt.ylabel("Frequency (Hz)") | |
| ymin = 0 | |
| ymax = max(frequenciesFound) + 0.05 | |
| plt.ylim(ymin, ymax) | |
| xmin = 0 | |
| xmax = max(time) | |
| plt.xlim(xmin, xmax) | |
| plt.figure(1).set_figwidth(max(6.4, xmax)) | |
| plt.figure(1).set_figheight(4) | |
| plt.figure(1).tight_layout(h_pad=1.4) | |
| dpi = 200 | |
| plt.savefig( | |
| os.path.join( | |
| output_path, f"plot{'_' + snake(title)}.svg" | |
| ), | |
| dpi=dpi, | |
| ) | |
| plt.clf() | |
| plt.cla() | |