Spaces:

TIMBOVILL
/

UltraSingerUI

Sleeping

App Files Files Community

TIMBOVILL commited on Jun 16, 2024

Commit

2542bcb

verified ·

1 Parent(s): 3ed071b

Upload 4 files

Browse files

Files changed (4) hide show

src/modules/console_colors.py +45 -0
src/modules/csv_handler.py +47 -0
src/modules/plot.py +303 -0
src/modules/timer.py +26 -0

src/modules/console_colors.py ADDED Viewed

	@@ -0,0 +1,45 @@

+"""Colors for the console"""
+ULTRASINGER_HEAD = "\033[92m[UltraSinger]\033[0m"
+def blue_highlighted(text: str) -> str:
+    """Returns a blue highlighted text"""
+    return f"{Bcolors.blue}{text}{Bcolors.endc}"
+def gold_highlighted(text: str) -> str:
+    """Returns a gold highlighted text"""
+    return f"{Bcolors.gold}{text}{Bcolors.endc}"
+def light_blue_highlighted(text: str) -> str:
+    """Returns a light blue highlighted text"""
+    return f"{Bcolors.light_blue}{text}{Bcolors.endc}"
+def underlined(text: str) -> str:
+    """Returns an underlined text"""
+    return f"{Bcolors.underline}{text}{Bcolors.endc}"
+def red_highlighted(text: str) -> str:
+    """Returns a red highlighted text"""
+    return f"{Bcolors.red}{text}{Bcolors.endc}"
+def cyan_highlighted(text: str) -> str:
+    """Returns a cyan highlighted text"""
+    return f"{Bcolors.cyan}{text}{Bcolors.endc}"
+class Bcolors:
+    """Colors for the console"""
+    blue = "\033[94m"
+    red = "\033[91m"
+    light_blue = "\033[96m"
+    cyan = "\033[36m"
+    gold = "\033[93m"
+    underline = "\033[4m"
+    endc = "\033[0m"

src/modules/csv_handler.py ADDED Viewed

	@@ -0,0 +1,47 @@

+"""CSV export module"""
+import csv
+from modules.console_colors import ULTRASINGER_HEAD
+from modules.Speech_Recognition.TranscribedData import TranscribedData
+def export_transcribed_data_to_csv(transcribed_data: list[TranscribedData], filename: str) -> None:
+    """Export transcribed data to csv"""
+    print(f"{ULTRASINGER_HEAD} Exporting transcribed data to CSV")
+    with open(filename, "w", encoding="utf-8", newline="") as csvfile:
+        writer = csv.writer(csvfile)
+        header = ["word", "start", "end", "confidence"]
+        writer.writerow(header)
+        for i, data in enumerate(transcribed_data):
+            writer.writerow(
+                [
+                    data.word,
+                    data.start,
+                    data.end,
+                    data.conf,
+                ]
+            )
+def write_lists_to_csv(times, frequencies, confidences, filename: str):
+    """Write lists to csv"""
+    with open(filename, "w", encoding="utf-8", newline="") as csvfile:
+        writer = csv.writer(csvfile)
+        header = ["time", "frequency", "confidence"]
+        writer.writerow(header)
+        for i in enumerate(times):
+            pos = i[0]
+            writer.writerow([times[pos], frequencies[pos], confidences[pos]])
+def read_data_from_csv(filename: str):
+    """Read data from csv"""
+    csv_data = []
+    with open(filename, "r", encoding="utf-8") as csv_file:
+        csv_reader = csv.reader(csv_file)
+        for line in csv_reader:
+            csv_data.append(line)
+    headless_data = csv_data[1:]
+    return headless_data

src/modules/plot.py ADDED Viewed

	@@ -0,0 +1,303 @@

+"""Plot transcribed data"""
+import os
+from dataclasses import dataclass
+from re import sub
+import librosa
+import numpy
+from matplotlib import pyplot as plt
+from matplotlib.patches import Rectangle
+from modules.Ultrastar.ultrastar_txt import UltrastarTxtValue
+from modules.console_colors import ULTRASINGER_HEAD
+from modules.Pitcher.pitched_data import PitchedData
+from modules.Pitcher.pitcher import get_pitched_data_with_high_confidence
+from modules.Speech_Recognition.TranscribedData import TranscribedData
+@dataclass
+class PlottedNote:
+    """Plotted note"""
+    note: str
+    frequency: float
+    frequency_log_10: float
+    octave: int
+NOTES = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]
+OCTAVES = [0, 1, 2, 3, 4, 5, 6, 7, 8]
+X_TICK_SIZE = 5
+def get_frequency_range(midi_note: str) -> float:
+    """Get frequency range"""
+    midi = librosa.note_to_midi(midi_note)
+    frequency_range = librosa.midi_to_hz(midi + 1) - librosa.midi_to_hz(midi)
+    return frequency_range
+def create_plot_notes(notes: list[str], octaves: list[int]) -> list[PlottedNote]:
+    """Create list of notes for plot y axis"""
+    plotted_notes = []
+    for octave in octaves:
+        for note in notes:
+            note_with_octave = note + str(octave)
+            frequency = librosa.note_to_hz(note_with_octave)
+            frequency_log_10 = numpy.log10([frequency])[0]
+            plotted_notes.append(
+                PlottedNote(note_with_octave, frequency, frequency_log_10, octave)
+            )
+    return plotted_notes
+PLOTTED_NOTES = create_plot_notes(NOTES, OCTAVES)
+def plot(
+        pitched_data: PitchedData,
+        output_path: str,
+        transcribed_data: list[TranscribedData] = None,
+        ultrastar_class: UltrastarTxtValue = None,
+        midi_notes: list[str] = None,
+        title: str = None,
+) -> None:
+    """Plot transcribed data"""
+    # determine time between to datapoints if there is no gap (this is the step size crepe ran with)
+    step_size = pitched_data.times[1]
+    pitched_data = get_pitched_data_with_high_confidence(pitched_data)
+    if len(pitched_data.frequencies) < 2:
+        print(f"{ULTRASINGER_HEAD} Plot can't be created; too few datapoints")
+        return
+    print(
+        f"{ULTRASINGER_HEAD} Creating plot{': ' + title if title is not None else ''}"
+    )
+    # map each frequency to logarithm with base 10 for a linear progression of values between the musical notes
+    # see http://www.phon.ox.ac.uk/jcoleman/LOGARITH.htm
+    frequencies_log_10 = numpy.log10(pitched_data.frequencies)
+    # add 'nan' where there are gaps for frequency values so the graph is only continuous where it should be
+    pitched_data_with_gaps = create_gaps(pitched_data, step_size)
+    frequencies_log_10_with_gaps = numpy.log10(pitched_data_with_gaps.frequencies)
+    # dynamically set the minimum and maximum values for x and y axes based on data
+    y_lower_bound, y_upper_bound = determine_bounds(frequencies_log_10)
+    ymin = max(0, y_lower_bound - 0.05)
+    ymax = y_upper_bound + 0.05
+    plt.ylim(ymin, ymax)
+    xmin = min(pitched_data.times)
+    xmax = max(pitched_data.times)
+    plt.xlim(xmin, xmax)
+    plt.xlabel("Time (s)")
+    plt.ylabel("log10 of Frequency (Hz)")
+    notes_within_range = set_axes_ticks_and_labels(pitched_data.times, ymin, ymax)
+    # draw horizontal lines for each note
+    for note in notes_within_range:
+        color = "b"
+        if note.note.startswith("C") and not note.note.startswith("C#"):
+            color = "r"
+        plt.axhline(y=note.frequency_log_10, color=color, linestyle="-", linewidth=0.2)
+    # create line and scatter plot of pitched data
+    plt.plot(pitched_data_with_gaps.times, frequencies_log_10_with_gaps, linewidth=0.1)
+    scatter_path_collection = plt.scatter(
+        pitched_data_with_gaps.times,
+        frequencies_log_10_with_gaps,
+        s=5,
+        c=pitched_data_with_gaps.confidence,
+        cmap=plt.colormaps.get_cmap("gray").reversed(),
+        vmin=0,
+        vmax=1,
+    )
+    plt.figure(1).colorbar(scatter_path_collection, label="confidence")
+    set_figure_dimensions(xmax - xmin, y_upper_bound - y_lower_bound)
+    plot_words(transcribed_data, ultrastar_class, midi_notes)
+    if title is not None:
+        plt.title(label=title)
+    plt.figure(1).tight_layout(h_pad=1.4)
+    dpi = 200
+    plt.savefig(
+        os.path.join(
+            output_path, f"plot{'' if title is None else '_' + snake(title)}.svg"
+        ),
+        dpi=dpi,
+    )
+    plt.clf()
+    plt.cla()
+def set_axes_ticks_and_labels(confidence, ymin, ymax):
+    """Set ticks and their labels for x and y axes"""
+    notes_within_range = [
+        x for x in PLOTTED_NOTES if ymin <= x.frequency_log_10 <= ymax
+    ]
+    plt.yticks(
+        [x.frequency_log_10 for x in notes_within_range],
+        [x.note for x in notes_within_range],
+    )
+    first_time = min(confidence)
+    min_tick = first_time // X_TICK_SIZE * X_TICK_SIZE + X_TICK_SIZE
+    last_time = max(confidence)
+    max_tick = last_time // X_TICK_SIZE * X_TICK_SIZE + 0.1
+    ticks = numpy.arange(min_tick, max_tick, X_TICK_SIZE, dtype=int).tolist()
+    if len(ticks) == 0 or ticks[0] != first_time:
+        ticks.insert(0, first_time)
+    if len(ticks) == 1 or ticks[-1] != last_time:
+        ticks.append(last_time)
+    plt.xticks(ticks, [str(x) for x in ticks])
+    return notes_within_range
+def determine_bounds(frequency_log_10: list[float]) -> tuple[float, float]:
+    """Determine bounds based on 1st and 99th percentile of data"""
+    lower = numpy.percentile(numpy.array(frequency_log_10), 1)
+    upper = numpy.percentile(numpy.array(frequency_log_10), 99)
+    return lower, upper
+def set_figure_dimensions(time_range, frequency_log_10_range):
+    """Dynamically scale the figure dimensions based on the duration/frequency amplitude of the song"""
+    height = frequency_log_10_range / 0.06
+    width = time_range / 2
+    plt.figure(1).set_figwidth(max(6.4, width))
+    plt.figure(1).set_figheight(max(4, height))
+def create_gaps(pitched_data: PitchedData, step_size: float) -> PitchedData:
+    """
+    Add 'nan' where there are no high confidence frequency values.
+    This way the graph is only continuous where it should be.
+    """
+    pitched_data_with_gaps = PitchedData([], [], [])
+    previous_time = 0
+    for i, time in enumerate(pitched_data.times):
+        comes_right_after_previous = time - previous_time <= step_size
+        previous_frequency_is_not_gap = (
+                len(pitched_data_with_gaps.frequencies) > 0
+                and str(pitched_data_with_gaps.frequencies[-1]) != "nan"
+        )
+        if previous_frequency_is_not_gap and not comes_right_after_previous:
+            pitched_data_with_gaps.times.append(time)
+            pitched_data_with_gaps.frequencies.append(float("nan"))
+            pitched_data_with_gaps.confidence.append(pitched_data.confidence[i])
+        pitched_data_with_gaps.times.append(time)
+        pitched_data_with_gaps.frequencies.append(pitched_data.frequencies[i])
+        pitched_data_with_gaps.confidence.append(pitched_data.confidence[i])
+        previous_time = time
+    return pitched_data_with_gaps
+def plot_word(midi_note: str, start, end, word):
+    note_frequency = librosa.note_to_hz(midi_note)
+    frequency_range = get_frequency_range(midi_note)
+    half_frequency_range = frequency_range / 2
+    height = (
+            numpy.log10([note_frequency + half_frequency_range])[0]
+            - numpy.log10([note_frequency - half_frequency_range])[0]
+    )
+    xy_start_pos = (
+        start,
+        numpy.log10([note_frequency - half_frequency_range])[0],
+    )
+    width = end - start
+    rect = Rectangle(
+        xy_start_pos,
+        width,
+        height,
+        edgecolor="none",
+        facecolor="red",
+        alpha=0.5,
+    )
+    plt.gca().add_patch(rect)
+    plt.text(start + width / 4, numpy.log10([note_frequency + half_frequency_range])[0], word, rotation=90)
+def plot_words(transcribed_data: list[TranscribedData], ultrastar_class: UltrastarTxtValue, midi_notes: list[str]):
+    """Draw rectangles for each word"""
+    if transcribed_data is not None:
+        for i, data in enumerate(transcribed_data):
+            plot_word(midi_notes[i], data.start, data.end, data.word)
+    elif ultrastar_class is not None:
+        for i, data in enumerate(ultrastar_class.words):
+            plot_word(midi_notes[i], ultrastar_class.startTimes[i], ultrastar_class.endTimes[i],
+                      ultrastar_class.words[i])
+def snake(s):
+    """Turn any string into a snake case string"""
+    return "_".join(
+        sub(
+            "([A-Z][a-z]+)", r" \1", sub("([A-Z]+)", r" \1", s.replace("-", " "))
+        ).split()
+    ).lower()
+def plot_spectrogram(audio_seperation_path: str,
+                     output_path: str,
+                     title: str = "Spectrogram",
+                     ) -> None:
+    """Plot spectrogram of data"""
+    print(
+        f"{ULTRASINGER_HEAD} Creating plot{': ' + title}"
+    )
+    audio, sr = librosa.load(audio_seperation_path, sr=None)
+    powerSpectrum, frequenciesFound, time, imageAxis = plt.specgram(audio, Fs=sr)
+    plt.colorbar()
+    if title is not None:
+        plt.title(label=title)
+    plt.xlabel("Time (s)")
+    plt.ylabel("Frequency (Hz)")
+    ymin = 0
+    ymax = max(frequenciesFound) + 0.05
+    plt.ylim(ymin, ymax)
+    xmin = 0
+    xmax = max(time)
+    plt.xlim(xmin, xmax)
+    plt.figure(1).set_figwidth(max(6.4, xmax))
+    plt.figure(1).set_figheight(4)
+    plt.figure(1).tight_layout(h_pad=1.4)
+    dpi = 200
+    plt.savefig(
+        os.path.join(
+            output_path, f"plot{'_' + snake(title)}.svg"
+        ),
+        dpi=dpi,
+    )
+    plt.clf()
+    plt.cla()

src/modules/timer.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import atexit
+from functools import reduce
+from time import process_time
+from modules.console_colors import ULTRASINGER_HEAD
+def seconds_to_str(t):
+    """Format seconds to string"""
+    return "%d:%02d:%02d.%03d" % reduce(
+        lambda ll, b: divmod(ll[0], b) + ll[1:], [(t * 1000,), 1000, 60, 60]
+    )
+def log(s):
+    """Log line with optional time elapsed"""
+    print(f"{ULTRASINGER_HEAD} {seconds_to_str(process_time())} - {s}")
+def end_log():
+    """Log at program end"""
+    log("End Program")
+atexit.register(end_log)
+log("Initialized...")