Spaces:

TIMBOVILL
/

UltraSingerUI

Sleeping

App Files Files Community

UltraSingerUI / src /modules /plot.py

TIMBOVILL

Upload 4 files

2542bcb verified over 1 year ago

raw

history blame contribute delete

9.74 kB

	"""Plot transcribed data"""
	import os
	from dataclasses import dataclass
	from re import sub

	import librosa
	import numpy
	from matplotlib import pyplot as plt
	from matplotlib.patches import Rectangle

	from modules.Ultrastar.ultrastar_txt import UltrastarTxtValue
	from modules.console_colors import ULTRASINGER_HEAD
	from modules.Pitcher.pitched_data import PitchedData
	from modules.Pitcher.pitcher import get_pitched_data_with_high_confidence
	from modules.Speech_Recognition.TranscribedData import TranscribedData


	@dataclass
	class PlottedNote:
	"""Plotted note"""

	note: str
	frequency: float
	frequency_log_10: float
	octave: int


	NOTES = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]
	OCTAVES = [0, 1, 2, 3, 4, 5, 6, 7, 8]
	X_TICK_SIZE = 5


	def get_frequency_range(midi_note: str) -> float:
	"""Get frequency range"""
	midi = librosa.note_to_midi(midi_note)
	frequency_range = librosa.midi_to_hz(midi + 1) - librosa.midi_to_hz(midi)
	return frequency_range


	def create_plot_notes(notes: list[str], octaves: list[int]) -> list[PlottedNote]:
	"""Create list of notes for plot y axis"""
	plotted_notes = []
	for octave in octaves:
	for note in notes:
	note_with_octave = note + str(octave)
	frequency = librosa.note_to_hz(note_with_octave)
	frequency_log_10 = numpy.log10([frequency])[0]
	plotted_notes.append(
	PlottedNote(note_with_octave, frequency, frequency_log_10, octave)
	)

	return plotted_notes


	PLOTTED_NOTES = create_plot_notes(NOTES, OCTAVES)


	def plot(
	pitched_data: PitchedData,
	output_path: str,
	transcribed_data: list[TranscribedData] = None,
	ultrastar_class: UltrastarTxtValue = None,
	midi_notes: list[str] = None,
	title: str = None,
	) -> None:
	"""Plot transcribed data"""

	# determine time between to datapoints if there is no gap (this is the step size crepe ran with)
	step_size = pitched_data.times[1]
	pitched_data = get_pitched_data_with_high_confidence(pitched_data)

	if len(pitched_data.frequencies) < 2:
	print(f"{ULTRASINGER_HEAD} Plot can't be created; too few datapoints")
	return

	print(
	f"{ULTRASINGER_HEAD} Creating plot{': ' + title if title is not None else ''}"
	)

	# map each frequency to logarithm with base 10 for a linear progression of values between the musical notes
	# see http://www.phon.ox.ac.uk/jcoleman/LOGARITH.htm
	frequencies_log_10 = numpy.log10(pitched_data.frequencies)

	# add 'nan' where there are gaps for frequency values so the graph is only continuous where it should be
	pitched_data_with_gaps = create_gaps(pitched_data, step_size)
	frequencies_log_10_with_gaps = numpy.log10(pitched_data_with_gaps.frequencies)

	# dynamically set the minimum and maximum values for x and y axes based on data
	y_lower_bound, y_upper_bound = determine_bounds(frequencies_log_10)
	ymin = max(0, y_lower_bound - 0.05)
	ymax = y_upper_bound + 0.05
	plt.ylim(ymin, ymax)
	xmin = min(pitched_data.times)
	xmax = max(pitched_data.times)
	plt.xlim(xmin, xmax)

	plt.xlabel("Time (s)")
	plt.ylabel("log10 of Frequency (Hz)")

	notes_within_range = set_axes_ticks_and_labels(pitched_data.times, ymin, ymax)

	# draw horizontal lines for each note
	for note in notes_within_range:
	color = "b"
	if note.note.startswith("C") and not note.note.startswith("C#"):
	color = "r"
	plt.axhline(y=note.frequency_log_10, color=color, linestyle="-", linewidth=0.2)

	# create line and scatter plot of pitched data
	plt.plot(pitched_data_with_gaps.times, frequencies_log_10_with_gaps, linewidth=0.1)
	scatter_path_collection = plt.scatter(
	pitched_data_with_gaps.times,
	frequencies_log_10_with_gaps,
	s=5,
	c=pitched_data_with_gaps.confidence,
	cmap=plt.colormaps.get_cmap("gray").reversed(),
	vmin=0,
	vmax=1,
	)
	plt.figure(1).colorbar(scatter_path_collection, label="confidence")

	set_figure_dimensions(xmax - xmin, y_upper_bound - y_lower_bound)

	plot_words(transcribed_data, ultrastar_class, midi_notes)

	if title is not None:
	plt.title(label=title)

	plt.figure(1).tight_layout(h_pad=1.4)

	dpi = 200
	plt.savefig(
	os.path.join(
	output_path, f"plot{'' if title is None else '_' + snake(title)}.svg"
	),
	dpi=dpi,
	)
	plt.clf()
	plt.cla()


	def set_axes_ticks_and_labels(confidence, ymin, ymax):
	"""Set ticks and their labels for x and y axes"""
	notes_within_range = [
	x for x in PLOTTED_NOTES if ymin <= x.frequency_log_10 <= ymax
	]
	plt.yticks(
	[x.frequency_log_10 for x in notes_within_range],
	[x.note for x in notes_within_range],
	)

	first_time = min(confidence)
	min_tick = first_time // X_TICK_SIZE * X_TICK_SIZE + X_TICK_SIZE

	last_time = max(confidence)
	max_tick = last_time // X_TICK_SIZE * X_TICK_SIZE + 0.1
	ticks = numpy.arange(min_tick, max_tick, X_TICK_SIZE, dtype=int).tolist()

	if len(ticks) == 0 or ticks[0] != first_time:
	ticks.insert(0, first_time)

	if len(ticks) == 1 or ticks[-1] != last_time:
	ticks.append(last_time)

	plt.xticks(ticks, [str(x) for x in ticks])
	return notes_within_range


	def determine_bounds(frequency_log_10: list[float]) -> tuple[float, float]:
	"""Determine bounds based on 1st and 99th percentile of data"""
	lower = numpy.percentile(numpy.array(frequency_log_10), 1)
	upper = numpy.percentile(numpy.array(frequency_log_10), 99)

	return lower, upper


	def set_figure_dimensions(time_range, frequency_log_10_range):
	"""Dynamically scale the figure dimensions based on the duration/frequency amplitude of the song"""
	height = frequency_log_10_range / 0.06
	width = time_range / 2

	plt.figure(1).set_figwidth(max(6.4, width))
	plt.figure(1).set_figheight(max(4, height))


	def create_gaps(pitched_data: PitchedData, step_size: float) -> PitchedData:
	"""
	Add 'nan' where there are no high confidence frequency values.
	This way the graph is only continuous where it should be.

	"""
	pitched_data_with_gaps = PitchedData([], [], [])

	previous_time = 0
	for i, time in enumerate(pitched_data.times):
	comes_right_after_previous = time - previous_time <= step_size
	previous_frequency_is_not_gap = (
	len(pitched_data_with_gaps.frequencies) > 0
	and str(pitched_data_with_gaps.frequencies[-1]) != "nan"
	)
	if previous_frequency_is_not_gap and not comes_right_after_previous:
	pitched_data_with_gaps.times.append(time)
	pitched_data_with_gaps.frequencies.append(float("nan"))
	pitched_data_with_gaps.confidence.append(pitched_data.confidence[i])

	pitched_data_with_gaps.times.append(time)
	pitched_data_with_gaps.frequencies.append(pitched_data.frequencies[i])
	pitched_data_with_gaps.confidence.append(pitched_data.confidence[i])

	previous_time = time

	return pitched_data_with_gaps


	def plot_word(midi_note: str, start, end, word):
	note_frequency = librosa.note_to_hz(midi_note)
	frequency_range = get_frequency_range(midi_note)

	half_frequency_range = frequency_range / 2
	height = (
	numpy.log10([note_frequency + half_frequency_range])[0]
	- numpy.log10([note_frequency - half_frequency_range])[0]
	)
	xy_start_pos = (
	start,
	numpy.log10([note_frequency - half_frequency_range])[0],
	)
	width = end - start
	rect = Rectangle(
	xy_start_pos,
	width,
	height,
	edgecolor="none",
	facecolor="red",
	alpha=0.5,
	)
	plt.gca().add_patch(rect)
	plt.text(start + width / 4, numpy.log10([note_frequency + half_frequency_range])[0], word, rotation=90)


	def plot_words(transcribed_data: list[TranscribedData], ultrastar_class: UltrastarTxtValue, midi_notes: list[str]):
	"""Draw rectangles for each word"""
	if transcribed_data is not None:
	for i, data in enumerate(transcribed_data):
	plot_word(midi_notes[i], data.start, data.end, data.word)

	elif ultrastar_class is not None:
	for i, data in enumerate(ultrastar_class.words):
	plot_word(midi_notes[i], ultrastar_class.startTimes[i], ultrastar_class.endTimes[i],
	ultrastar_class.words[i])


	def snake(s):
	"""Turn any string into a snake case string"""
	return "_".join(
	sub(
	"([A-Z][a-z]+)", r" \1", sub("([A-Z]+)", r" \1", s.replace("-", " "))
	).split()
	).lower()


	def plot_spectrogram(audio_seperation_path: str,
	output_path: str,
	title: str = "Spectrogram",

	) -> None:
	"""Plot spectrogram of data"""

	print(
	f"{ULTRASINGER_HEAD} Creating plot{': ' + title}"
	)

	audio, sr = librosa.load(audio_seperation_path, sr=None)
	powerSpectrum, frequenciesFound, time, imageAxis = plt.specgram(audio, Fs=sr)
	plt.colorbar()

	if title is not None:
	plt.title(label=title)

	plt.xlabel("Time (s)")
	plt.ylabel("Frequency (Hz)")

	ymin = 0
	ymax = max(frequenciesFound) + 0.05
	plt.ylim(ymin, ymax)
	xmin = 0
	xmax = max(time)
	plt.xlim(xmin, xmax)

	plt.figure(1).set_figwidth(max(6.4, xmax))
	plt.figure(1).set_figheight(4)

	plt.figure(1).tight_layout(h_pad=1.4)

	dpi = 200
	plt.savefig(
	os.path.join(
	output_path, f"plot{'_' + snake(title)}.svg"
	),
	dpi=dpi,
	)
	plt.clf()
	plt.cla()