Spaces:
Sleeping
Sleeping
| """UltraSinger uses AI to automatically create UltraStar song files""" | |
| import copy | |
| import getopt | |
| import os | |
| import sys | |
| import re | |
| import Levenshtein | |
| import librosa | |
| from tqdm import tqdm | |
| from packaging import version | |
| import soundfile as sf | |
| from modules import os_helper | |
| from modules.Audio.denoise import ffmpeg_reduce_noise | |
| from modules.Audio.separation import separate_audio | |
| from modules.Audio.vocal_chunks import ( | |
| export_chunks_from_transcribed_data, | |
| export_chunks_from_ultrastar_data, | |
| ) | |
| from modules.Audio.silence_processing import remove_silence_from_transcription_data, get_silence_sections | |
| from modules.csv_handler import export_transcribed_data_to_csv | |
| from modules.Audio.convert_audio import convert_audio_to_mono_wav, convert_wav_to_mp3 | |
| from modules.Audio.youtube import ( | |
| download_youtube_audio, | |
| download_youtube_thumbnail, | |
| download_youtube_video, | |
| get_youtube_title, | |
| ) | |
| from modules.DeviceDetection.device_detection import check_gpu_support | |
| from modules.console_colors import ( | |
| ULTRASINGER_HEAD, | |
| blue_highlighted, | |
| gold_highlighted, | |
| light_blue_highlighted, | |
| red_highlighted, | |
| ) | |
| from modules.Midi import midi_creator | |
| from modules.Midi.midi_creator import ( | |
| convert_frequencies_to_notes, | |
| create_midi_notes_from_pitched_data, | |
| most_frequent, | |
| ) | |
| from modules.Pitcher.pitcher import ( | |
| get_frequencies_with_high_confidence, | |
| get_pitch_with_crepe_file, | |
| ) | |
| from modules.Pitcher.pitched_data import PitchedData | |
| from modules.Speech_Recognition.hyphenation import hyphenation, language_check, create_hyphenator | |
| from modules.Speech_Recognition.Whisper import transcribe_with_whisper | |
| from modules.Ultrastar import ultrastar_score_calculator, ultrastar_writer, ultrastar_converter, ultrastar_parser | |
| from modules.Ultrastar.ultrastar_txt import UltrastarTxtValue | |
| from Settings import Settings | |
| from modules.Speech_Recognition.TranscribedData import TranscribedData | |
| from modules.plot import plot, plot_spectrogram | |
| from modules.musicbrainz_client import get_music_infos | |
| settings = Settings() | |
| def convert_midi_notes_to_ultrastar_notes(midi_notes: list[str]) -> list[int]: | |
| """Convert midi notes to ultrastar notes""" | |
| print(f"{ULTRASINGER_HEAD} Creating Ultrastar notes from midi data") | |
| ultrastar_note_numbers = [] | |
| for i in enumerate(midi_notes): | |
| pos = i[0] | |
| note_number_librosa = librosa.note_to_midi(midi_notes[pos]) | |
| pitch = ultrastar_converter.midi_note_to_ultrastar_note( | |
| note_number_librosa | |
| ) | |
| ultrastar_note_numbers.append(pitch) | |
| # todo: Progress? | |
| # print( | |
| # f"Note: {midi_notes[i]} midi_note: {str(note_number_librosa)} pitch: {str(pitch)}" | |
| # ) | |
| return ultrastar_note_numbers | |
| def pitch_each_chunk_with_crepe(directory: str) -> list[str]: | |
| """Pitch each chunk with crepe and return midi notes""" | |
| print( | |
| f"{ULTRASINGER_HEAD} Pitching each chunk with {blue_highlighted('crepe')}" | |
| ) | |
| midi_notes = [] | |
| for filename in sorted( | |
| [f for f in os.listdir(directory) if f.endswith(".wav")], | |
| key=lambda x: int(x.split("_")[1]), | |
| ): | |
| filepath = os.path.join(directory, filename) | |
| # todo: stepsize = duration? then when shorter than "it" it should take the duration. Otherwise there a more notes | |
| pitched_data = get_pitch_with_crepe_file( | |
| filepath, | |
| settings.crepe_model_capacity, | |
| settings.crepe_step_size, | |
| settings.tensorflow_device, | |
| ) | |
| conf_f = get_frequencies_with_high_confidence( | |
| pitched_data.frequencies, pitched_data.confidence | |
| ) | |
| notes = convert_frequencies_to_notes(conf_f) | |
| note = most_frequent(notes)[0][0] | |
| midi_notes.append(note) | |
| # todo: Progress? | |
| # print(filename + " f: " + str(mean)) | |
| return midi_notes | |
| def add_hyphen_to_data(transcribed_data: list[TranscribedData], hyphen_words: list[list[str]]): | |
| """Add hyphen to transcribed data return new data list""" | |
| new_data = [] | |
| for i, data in enumerate(transcribed_data): | |
| if not hyphen_words[i]: | |
| new_data.append(data) | |
| else: | |
| chunk_duration = data.end - data.start | |
| chunk_duration = chunk_duration / (len(hyphen_words[i])) | |
| next_start = data.start | |
| for j in enumerate(hyphen_words[i]): | |
| hyphenated_word_index = j[0] | |
| dup = copy.copy(data) | |
| dup.start = next_start | |
| next_start = data.end - chunk_duration * ( | |
| len(hyphen_words[i]) - 1 - hyphenated_word_index | |
| ) | |
| dup.end = next_start | |
| dup.word = hyphen_words[i][hyphenated_word_index] | |
| dup.is_hyphen = True | |
| if hyphenated_word_index == len(hyphen_words[i]) - 1: | |
| dup.is_word_end = True | |
| else: | |
| dup.is_word_end = False | |
| new_data.append(dup) | |
| return new_data | |
| def get_bpm_from_data(data, sampling_rate): | |
| """Get real bpm from audio data""" | |
| onset_env = librosa.onset.onset_strength(y=data, sr=sampling_rate) | |
| wav_tempo = librosa.beat.tempo(onset_envelope=onset_env, sr=sampling_rate) | |
| print( | |
| f"{ULTRASINGER_HEAD} BPM is {blue_highlighted(str(round(wav_tempo[0], 2)))}" | |
| ) | |
| return wav_tempo[0] | |
| def get_bpm_from_file(wav_file: str) -> float: | |
| """Get real bpm from audio file""" | |
| data, sampling_rate = librosa.load(wav_file, sr=None) | |
| return get_bpm_from_data(data, sampling_rate) | |
| def correct_words(recognized_words, word_list_file): | |
| """Docstring""" | |
| with open(word_list_file, "r", encoding="utf-8") as file: | |
| text = file.read() | |
| word_list = text.split() | |
| for i, rec_word in enumerate(recognized_words): | |
| if rec_word.word in word_list: | |
| continue | |
| closest_word = min( | |
| word_list, key=lambda x: Levenshtein.distance(rec_word.word, x) | |
| ) | |
| print(recognized_words[i].word + " - " + closest_word) | |
| recognized_words[i].word = closest_word | |
| return recognized_words | |
| def print_help() -> None: | |
| """Print help text""" | |
| help_string = """ | |
| UltraSinger.py [opt] [mode] [transcription] [pitcher] [extra] | |
| [opt] | |
| -h This help text. | |
| -i Ultrastar.txt | |
| audio like .mp3, .wav, youtube link | |
| -o Output folder | |
| [mode] | |
| ## INPUT is audio ## | |
| default Creates all | |
| # Single file creation selection is in progress, you currently getting all! | |
| (-u Create ultrastar txt file) # In Progress | |
| (-m Create midi file) # In Progress | |
| (-s Create sheet file) # In Progress | |
| ## INPUT is ultrastar.txt ## | |
| default Creates all | |
| # Single selection is in progress, you currently getting all! | |
| (-r repitch Ultrastar.txt (input has to be audio)) # In Progress | |
| (-p Check pitch of Ultrastar.txt input) # In Progress | |
| (-m Create midi file) # In Progress | |
| [transcription] | |
| # Default is whisper | |
| --whisper Multilingual model > tiny|base|small|medium|large-v1|large-v2 >> ((default) is large-v2 | |
| English-only model > tiny.en|base.en|small.en|medium.en | |
| --whisper_align_model Use other languages model for Whisper provided from huggingface.co | |
| --language Override the language detected by whisper, does not affect transcription but steps after transcription | |
| --whisper_batch_size Reduce if low on GPU mem >> ((default) is 16) | |
| --whisper_compute_type Change to "int8" if low on GPU mem (may reduce accuracy) >> ((default) is "float16" for cuda devices, "int8" for cpu) | |
| [pitcher] | |
| # Default is crepe | |
| --crepe tiny|full >> ((default) is full) | |
| --crepe_step_size unit is miliseconds >> ((default) is 10) | |
| [extra] | |
| --hyphenation True|False >> ((default) is True) | |
| --disable_separation True|False >> ((default) is False) | |
| --disable_karaoke True|False >> ((default) is False) | |
| --create_audio_chunks True|False >> ((default) is False) | |
| --keep_cache True|False >> ((default) is False) | |
| --plot True|False >> ((default) is False) | |
| --format_version 0.3.0|1.0.0|1.1.0 >> ((default) is 1.0.0) | |
| [device] | |
| --force_cpu True|False >> ((default) is False) All steps will be forced to cpu | |
| --force_whisper_cpu True|False >> ((default) is False) Only whisper will be forced to cpu | |
| --force_crepe_cpu True|False >> ((default) is False) Only crepe will be forced to cpu | |
| """ | |
| print(help_string) | |
| def remove_unecessary_punctuations(transcribed_data: list[TranscribedData]) -> None: | |
| """Remove unecessary punctuations from transcribed data""" | |
| punctuation = ".," | |
| for i, data in enumerate(transcribed_data): | |
| data.word = data.word.translate( | |
| {ord(i): None for i in punctuation} | |
| ) | |
| def hyphenate_each_word(language: str, transcribed_data: list[TranscribedData]) -> list[list[str]] | None: | |
| """Hyphenate each word in the transcribed data.""" | |
| lang_region = language_check(language) | |
| if lang_region is None: | |
| print( | |
| f"{ULTRASINGER_HEAD} {red_highlighted('Error in hyphenation for language ')} {blue_highlighted(language)}{red_highlighted(', maybe you want to disable it?')}" | |
| ) | |
| return None | |
| hyphenated_word = [] | |
| try: | |
| hyphenator = create_hyphenator(lang_region) | |
| for i in tqdm(enumerate(transcribed_data)): | |
| pos = i[0] | |
| hyphenated_word.append( | |
| hyphenation(transcribed_data[pos].word, hyphenator) | |
| ) | |
| except: | |
| print(f"{ULTRASINGER_HEAD} {red_highlighted('Error in hyphenation for language ')} {blue_highlighted(language)}{red_highlighted(', maybe you want to disable it?')}") | |
| return None | |
| return hyphenated_word | |
| def print_support() -> None: | |
| """Print support text""" | |
| print() | |
| print( | |
| f"{ULTRASINGER_HEAD} {gold_highlighted('Do you like UltraSinger? Want it to be even better? Then help with your')} {light_blue_highlighted('support')}{gold_highlighted('!')}" | |
| ) | |
| print( | |
| f"{ULTRASINGER_HEAD} See project page -> https://github.com/rakuri255/UltraSinger" | |
| ) | |
| print( | |
| f"{ULTRASINGER_HEAD} {gold_highlighted('This will help a lot to keep this project alive and improved.')}" | |
| ) | |
| def print_version() -> None: | |
| """Print version text""" | |
| print() | |
| print( | |
| f"{ULTRASINGER_HEAD} {gold_highlighted('*****************************')}" | |
| ) | |
| print( | |
| f"{ULTRASINGER_HEAD} {gold_highlighted('UltraSinger Version:')} {light_blue_highlighted(settings.APP_VERSION)}" | |
| ) | |
| print( | |
| f"{ULTRASINGER_HEAD} {gold_highlighted('*****************************')}" | |
| ) | |
| def run() -> None: | |
| """The processing function of this program""" | |
| is_audio = ".txt" not in settings.input_file_path | |
| ultrastar_class = None | |
| real_bpm = None | |
| (title, artist, year, genre) = (None, None, None, None) | |
| if not is_audio: # Parse Ultrastar txt | |
| print( | |
| f"{ULTRASINGER_HEAD} {gold_highlighted('re-pitch mode')}" | |
| ) | |
| ( | |
| basename_without_ext, | |
| real_bpm, | |
| song_output, | |
| ultrastar_audio_input_path, | |
| ultrastar_class, | |
| ) = parse_ultrastar_txt() | |
| elif settings.input_file_path.startswith("https:"): # Youtube | |
| print( | |
| f"{ULTRASINGER_HEAD} {gold_highlighted('full automatic mode')}" | |
| ) | |
| ( | |
| basename_without_ext, | |
| song_output, | |
| ultrastar_audio_input_path, | |
| (title, artist, year, genre) | |
| ) = download_from_youtube() | |
| else: # Audio File | |
| print( | |
| f"{ULTRASINGER_HEAD} {gold_highlighted('full automatic mode')}" | |
| ) | |
| ( | |
| basename_without_ext, | |
| song_output, | |
| ultrastar_audio_input_path, | |
| (title, artist, year, genre) | |
| ) = infos_from_audio_input_file() | |
| cache_path = os.path.join(song_output, "cache") | |
| settings.processing_audio_path = os.path.join( | |
| cache_path, basename_without_ext + ".wav" | |
| ) | |
| os_helper.create_folder(cache_path) | |
| # Separate vocal from audio | |
| audio_separation_path = separate_vocal_from_audio( | |
| basename_without_ext, cache_path, ultrastar_audio_input_path | |
| ) | |
| vocals_path = os.path.join(audio_separation_path, "vocals.wav") | |
| instrumental_path = os.path.join(audio_separation_path, "no_vocals.wav") | |
| # Move instrumental and vocals | |
| if settings.create_karaoke and version.parse(settings.format_version) < version.parse("1.1.0"): | |
| karaoke_output_path = os.path.join(song_output, basename_without_ext + " [Karaoke].mp3") | |
| convert_wav_to_mp3(instrumental_path, karaoke_output_path) | |
| if version.parse(settings.format_version) >= version.parse("1.1.0"): | |
| instrumental_output_path = os.path.join(song_output, basename_without_ext + " [Instrumental].mp3") | |
| convert_wav_to_mp3(instrumental_path, instrumental_output_path) | |
| vocals_output_path = os.path.join(song_output, basename_without_ext + " [Vocals].mp3") | |
| convert_wav_to_mp3(vocals_path, vocals_output_path) | |
| if settings.use_separated_vocal: | |
| input_path = vocals_path | |
| else: | |
| input_path = ultrastar_audio_input_path | |
| # Denoise vocal audio | |
| denoised_output_path = os.path.join( | |
| cache_path, basename_without_ext + "_denoised.wav" | |
| ) | |
| denoise_vocal_audio(input_path, denoised_output_path) | |
| # Convert to mono audio | |
| mono_output_path = os.path.join( | |
| cache_path, basename_without_ext + "_mono.wav" | |
| ) | |
| convert_audio_to_mono_wav(denoised_output_path, mono_output_path) | |
| # Mute silence sections | |
| mute_output_path = os.path.join( | |
| cache_path, basename_without_ext + "_mute.wav" | |
| ) | |
| mute_no_singing_parts(mono_output_path, mute_output_path) | |
| # Define the audio file to process | |
| settings.processing_audio_path = mute_output_path | |
| # Audio transcription | |
| transcribed_data = None | |
| language = settings.language | |
| if is_audio: | |
| detected_language, transcribed_data = transcribe_audio() | |
| if language is None: | |
| language = detected_language | |
| remove_unecessary_punctuations(transcribed_data) | |
| if settings.hyphenation: | |
| hyphen_words = hyphenate_each_word(language, transcribed_data) | |
| if hyphen_words is not None: | |
| transcribed_data = add_hyphen_to_data(transcribed_data, hyphen_words) | |
| transcribed_data = remove_silence_from_transcription_data( | |
| settings.processing_audio_path, transcribed_data | |
| ) | |
| # todo: do we need to correct words? | |
| # lyric = 'input/faber_lyric.txt' | |
| # --corrected_words = correct_words(vosk_speech, lyric) | |
| # Create audio chunks | |
| if settings.create_audio_chunks: | |
| create_audio_chunks( | |
| cache_path, | |
| is_audio, | |
| transcribed_data, | |
| ultrastar_audio_input_path, | |
| ultrastar_class, | |
| ) | |
| # Pitch the audio | |
| midi_notes, pitched_data, ultrastar_note_numbers = pitch_audio( | |
| is_audio, transcribed_data, ultrastar_class | |
| ) | |
| # Create plot | |
| if settings.create_plot: | |
| vocals_path = os.path.join(audio_separation_path, "vocals.wav") | |
| plot_spectrogram(vocals_path, song_output, "vocals.wav") | |
| plot_spectrogram(settings.processing_audio_path, song_output, "processing audio") | |
| plot(pitched_data, song_output, transcribed_data, ultrastar_class, midi_notes) | |
| # Write Ultrastar txt | |
| if is_audio: | |
| real_bpm, ultrastar_file_output = create_ultrastar_txt_from_automation( | |
| basename_without_ext, | |
| song_output, | |
| transcribed_data, | |
| ultrastar_audio_input_path, | |
| ultrastar_note_numbers, | |
| language, | |
| title, | |
| artist, | |
| year, | |
| genre | |
| ) | |
| else: | |
| ultrastar_file_output = create_ultrastar_txt_from_ultrastar_data( | |
| song_output, ultrastar_class, ultrastar_note_numbers | |
| ) | |
| # Calc Points | |
| ultrastar_class, simple_score, accurate_score = calculate_score_points( | |
| is_audio, pitched_data, ultrastar_class, ultrastar_file_output | |
| ) | |
| # Add calculated score to Ultrastar txt #Todo: Missing Karaoke | |
| ultrastar_writer.add_score_to_ultrastar_txt( | |
| ultrastar_file_output, simple_score | |
| ) | |
| # Midi | |
| if settings.create_midi: | |
| create_midi_file(real_bpm, song_output, ultrastar_class, basename_without_ext) | |
| # Cleanup | |
| if not settings.keep_cache: | |
| remove_cache_folder(cache_path) | |
| # Print Support | |
| print_support() | |
| def mute_no_singing_parts(mono_output_path, mute_output_path): | |
| print( | |
| f"{ULTRASINGER_HEAD} Mute audio parts with no singing" | |
| ) | |
| silence_sections = get_silence_sections(mono_output_path) | |
| y, sr = librosa.load(mono_output_path, sr=None) | |
| # Mute the parts of the audio with no singing | |
| for i in silence_sections: | |
| # Define the time range to mute | |
| start_time = i[0] # Start time in seconds | |
| end_time = i[1] # End time in seconds | |
| # Convert time to sample indices | |
| start_sample = int(start_time * sr) | |
| end_sample = int(end_time * sr) | |
| y[start_sample:end_sample] = 0 | |
| sf.write(mute_output_path, y, sr) | |
| def get_unused_song_output_dir(path: str) -> str: | |
| """Get an unused song output dir""" | |
| # check if dir exists and add (i) if it does | |
| i = 1 | |
| if os_helper.check_if_folder_exists(path): | |
| path = f"{path} ({i})" | |
| else: | |
| return path | |
| while os_helper.check_if_folder_exists(path): | |
| path = path.replace(f"({i - 1})", f"({i})") | |
| i += 1 | |
| if i > 999: | |
| print( | |
| f"{ULTRASINGER_HEAD} {red_highlighted('Error: Could not create output folder! (999) is the maximum number of tries.')}" | |
| ) | |
| sys.exit(1) | |
| return path | |
| def transcribe_audio() -> (str, list[TranscribedData]): | |
| """Transcribe audio with AI""" | |
| if settings.transcriber == "whisper": | |
| device = "cpu" if settings.force_whisper_cpu else settings.pytorch_device | |
| transcribed_data, detected_language = transcribe_with_whisper( | |
| settings.processing_audio_path, | |
| settings.whisper_model, | |
| device, | |
| settings.whisper_align_model, | |
| settings.whisper_batch_size, | |
| settings.whisper_compute_type, | |
| settings.language, | |
| ) | |
| else: | |
| raise NotImplementedError | |
| return detected_language, transcribed_data | |
| def separate_vocal_from_audio( | |
| basename_without_ext: str, cache_path: str, ultrastar_audio_input_path: str | |
| ) -> str: | |
| """Separate vocal from audio""" | |
| audio_separation_path = os.path.join( | |
| cache_path, "separated", "htdemucs", basename_without_ext | |
| ) | |
| if settings.use_separated_vocal or settings.create_karaoke: | |
| separate_audio(ultrastar_audio_input_path, cache_path, settings.pytorch_device) | |
| return audio_separation_path | |
| def calculate_score_points( | |
| is_audio: bool, pitched_data: PitchedData, ultrastar_class: UltrastarTxtValue, ultrastar_file_output: str | |
| ): | |
| """Calculate score points""" | |
| if is_audio: | |
| ultrastar_class = ultrastar_parser.parse_ultrastar_txt( | |
| ultrastar_file_output | |
| ) | |
| ( | |
| simple_score, | |
| accurate_score, | |
| ) = ultrastar_score_calculator.calculate_score( | |
| pitched_data, ultrastar_class | |
| ) | |
| ultrastar_score_calculator.print_score_calculation( | |
| simple_score, accurate_score | |
| ) | |
| else: | |
| print( | |
| f"{ULTRASINGER_HEAD} {blue_highlighted('Score of original Ultrastar txt')}" | |
| ) | |
| ( | |
| simple_score, | |
| accurate_score, | |
| ) = ultrastar_score_calculator.calculate_score( | |
| pitched_data, ultrastar_class | |
| ) | |
| ultrastar_score_calculator.print_score_calculation( | |
| simple_score, accurate_score | |
| ) | |
| print( | |
| f"{ULTRASINGER_HEAD} {blue_highlighted('Score of re-pitched Ultrastar txt')}" | |
| ) | |
| ultrastar_class = ultrastar_parser.parse_ultrastar_txt( | |
| ultrastar_file_output | |
| ) | |
| ( | |
| simple_score, | |
| accurate_score, | |
| ) = ultrastar_score_calculator.calculate_score( | |
| pitched_data, ultrastar_class | |
| ) | |
| ultrastar_score_calculator.print_score_calculation( | |
| simple_score, accurate_score | |
| ) | |
| return ultrastar_class, simple_score, accurate_score | |
| def create_ultrastar_txt_from_ultrastar_data( | |
| song_output: str, ultrastar_class: UltrastarTxtValue, ultrastar_note_numbers: list[int] | |
| ) -> str: | |
| """Create Ultrastar txt from Ultrastar data""" | |
| output_repitched_ultrastar = os.path.join( | |
| song_output, ultrastar_class.title + ".txt" | |
| ) | |
| ultrastar_writer.create_repitched_txt_from_ultrastar_data( | |
| settings.input_file_path, | |
| ultrastar_note_numbers, | |
| output_repitched_ultrastar, | |
| ) | |
| return output_repitched_ultrastar | |
| def create_ultrastar_txt_from_automation( | |
| basename_without_ext: str, | |
| song_output: str, | |
| transcribed_data: list[TranscribedData], | |
| ultrastar_audio_input_path: str, | |
| ultrastar_note_numbers: list[int], | |
| language: str, | |
| title: str, | |
| artist: str, | |
| year: str, | |
| genre: str | |
| ): | |
| """Create Ultrastar txt from automation""" | |
| ultrastar_header = UltrastarTxtValue() | |
| ultrastar_header.version = settings.format_version | |
| ultrastar_header.title = basename_without_ext | |
| ultrastar_header.artist = basename_without_ext | |
| ultrastar_header.mp3 = basename_without_ext + ".mp3" | |
| ultrastar_header.audio = basename_without_ext + ".mp3" | |
| ultrastar_header.vocals = basename_without_ext + " [Vocals].mp3" | |
| ultrastar_header.instrumental = basename_without_ext + " [Instrumental].mp3" | |
| ultrastar_header.video = basename_without_ext + ".mp4" | |
| ultrastar_header.language = language | |
| cover = basename_without_ext + " [CO].jpg" | |
| ultrastar_header.cover = ( | |
| cover | |
| if os_helper.check_file_exists(os.path.join(song_output, cover)) | |
| else None | |
| ) | |
| ultrastar_header.creator = f"{ultrastar_header.creator} {Settings.APP_VERSION}" | |
| ultrastar_header.comment = f"{ultrastar_header.comment} {Settings.APP_VERSION}" | |
| # Additional data | |
| if title is not None: | |
| ultrastar_header.title = title | |
| if artist is not None: | |
| ultrastar_header.artist = artist | |
| if year is not None: | |
| ultrastar_header.year = extract_year(year) | |
| if genre is not None: | |
| ultrastar_header.genre = format_separated_string(genre) | |
| real_bpm = get_bpm_from_file(ultrastar_audio_input_path) | |
| ultrastar_file_output = os.path.join( | |
| song_output, basename_without_ext + ".txt" | |
| ) | |
| ultrastar_writer.create_ultrastar_txt_from_automation( | |
| transcribed_data, | |
| ultrastar_note_numbers, | |
| ultrastar_file_output, | |
| ultrastar_header, | |
| real_bpm, | |
| ) | |
| if settings.create_karaoke and version.parse(settings.format_version) < version.parse("1.1.0"): | |
| title = basename_without_ext + " [Karaoke]" | |
| ultrastar_header.title = title | |
| ultrastar_header.mp3 = title + ".mp3" | |
| karaoke_output_path = os.path.join(song_output, title) | |
| karaoke_txt_output_path = karaoke_output_path + ".txt" | |
| ultrastar_writer.create_ultrastar_txt_from_automation( | |
| transcribed_data, | |
| ultrastar_note_numbers, | |
| karaoke_txt_output_path, | |
| ultrastar_header, | |
| real_bpm, | |
| ) | |
| return real_bpm, ultrastar_file_output | |
| def extract_year(date: str) -> str: | |
| match = re.search(r'\b\d{4}\b', date) | |
| if match: | |
| return match.group(0) | |
| else: | |
| return date | |
| def format_separated_string(data: str) -> str: | |
| temp = re.sub(r'[;/]', ',', data) | |
| words = temp.split(',') | |
| words = [s for s in words if s.strip()] | |
| for i, word in enumerate(words): | |
| if "-" not in word: | |
| words[i] = word.strip().capitalize() + ', ' | |
| else: | |
| dash_words = word.split('-') | |
| capitalized_dash_words = [dash_word.strip().capitalize() for dash_word in dash_words] | |
| formatted_dash_word = '-'.join(capitalized_dash_words) + ', ' | |
| words[i] = formatted_dash_word | |
| formatted_string = ''.join(words) | |
| if formatted_string.endswith(', '): | |
| formatted_string = formatted_string[:-2] | |
| return formatted_string | |
| def infos_from_audio_input_file() -> tuple[str, str, str, tuple[str, str, str, str]]: | |
| """Infos from audio input file""" | |
| basename = os.path.basename(settings.input_file_path) | |
| basename_without_ext = os.path.splitext(basename)[0] | |
| artist, title = None, None | |
| if " - " in basename_without_ext: | |
| artist, title = basename_without_ext.split(" - ", 1) | |
| search_string = f"{artist} - {title}" | |
| else: | |
| search_string = basename_without_ext | |
| # Get additional data for song | |
| (title_info, artist_info, year_info, genre_info) = get_music_infos(search_string) | |
| if title_info is not None: | |
| title = title_info | |
| artist = artist_info | |
| if artist is not None and title is not None: | |
| basename_without_ext = f"{artist} - {title}" | |
| extension = os.path.splitext(basename)[1] | |
| basename = f"{basename_without_ext}{extension}" | |
| song_output = os.path.join(settings.output_file_path, basename_without_ext) | |
| song_output = get_unused_song_output_dir(song_output) | |
| os_helper.create_folder(song_output) | |
| os_helper.copy(settings.input_file_path, song_output) | |
| os_helper.rename(os.path.join(song_output, os.path.basename(settings.input_file_path)), os.path.join(song_output, basename)) | |
| ultrastar_audio_input_path = os.path.join(song_output, basename) | |
| return basename_without_ext, song_output, ultrastar_audio_input_path, (title, artist, year_info, genre_info) | |
| FILENAME_REPLACEMENTS = (('?:"', ""), ("<", "("), (">", ")"), ("/\\|*", "-")) | |
| def sanitize_filename(fname: str) -> str: | |
| """Sanitize filename""" | |
| for old, new in FILENAME_REPLACEMENTS: | |
| for char in old: | |
| fname = fname.replace(char, new) | |
| if fname.endswith("."): | |
| fname = fname.rstrip(" .") # Windows does not like trailing periods | |
| return fname | |
| def download_from_youtube() -> tuple[str, str, str, tuple[str, str, str, str]]: | |
| """Download from YouTube""" | |
| (artist, title) = get_youtube_title(settings.input_file_path) | |
| # Get additional data for song | |
| (title_info, artist_info, year_info, genre_info) = get_music_infos(f"{artist} - {title}") | |
| if title_info is not None: | |
| title = title_info | |
| artist = artist_info | |
| basename_without_ext = sanitize_filename(f"{artist} - {title}") | |
| basename = basename_without_ext + ".mp3" | |
| song_output = os.path.join(settings.output_file_path, basename_without_ext) | |
| song_output = get_unused_song_output_dir(song_output) | |
| os_helper.create_folder(song_output) | |
| download_youtube_audio( | |
| settings.input_file_path, basename_without_ext, song_output | |
| ) | |
| download_youtube_video( | |
| settings.input_file_path, basename_without_ext, song_output | |
| ) | |
| download_youtube_thumbnail( | |
| settings.input_file_path, basename_without_ext, song_output | |
| ) | |
| ultrastar_audio_input_path = os.path.join(song_output, basename) | |
| return basename_without_ext, song_output, ultrastar_audio_input_path, (title, artist, year_info, genre_info) | |
| def parse_ultrastar_txt() -> tuple[str, float, str, str, UltrastarTxtValue]: | |
| """Parse Ultrastar txt""" | |
| ultrastar_class = ultrastar_parser.parse_ultrastar_txt( | |
| settings.input_file_path | |
| ) | |
| real_bpm = ultrastar_converter.ultrastar_bpm_to_real_bpm( | |
| float(ultrastar_class.bpm.replace(",", ".")) | |
| ) | |
| ultrastar_mp3_name = ultrastar_class.mp3 | |
| basename_without_ext = os.path.splitext(ultrastar_mp3_name)[0] | |
| dirname = os.path.dirname(settings.input_file_path) | |
| ultrastar_audio_input_path = os.path.join(dirname, ultrastar_mp3_name) | |
| song_output = os.path.join( | |
| settings.output_file_path, | |
| ultrastar_class.artist.strip() + " - " + ultrastar_class.title.strip(), | |
| ) | |
| song_output = get_unused_song_output_dir(str(song_output)) | |
| os_helper.create_folder(song_output) | |
| return ( | |
| str(basename_without_ext), | |
| real_bpm, | |
| song_output, | |
| str(ultrastar_audio_input_path), | |
| ultrastar_class, | |
| ) | |
| def create_midi_file(real_bpm: float, | |
| song_output: str, | |
| ultrastar_class: UltrastarTxtValue, | |
| basename_without_ext: str) -> None: | |
| """Create midi file""" | |
| print( | |
| f"{ULTRASINGER_HEAD} Creating Midi with {blue_highlighted('pretty_midi')}" | |
| ) | |
| voice_instrument = [ | |
| midi_creator.convert_ultrastar_to_midi_instrument(ultrastar_class) | |
| ] | |
| midi_output = os.path.join(song_output, f"{basename_without_ext}.mid") | |
| midi_creator.instruments_to_midi( | |
| voice_instrument, real_bpm, midi_output | |
| ) | |
| def pitch_audio(is_audio: bool, transcribed_data: list[TranscribedData], ultrastar_class: UltrastarTxtValue) -> tuple[ | |
| list[str], PitchedData, list[int]]: | |
| """Pitch audio""" | |
| # todo: chunk pitching as option? | |
| # midi_notes = pitch_each_chunk_with_crepe(chunk_folder_name) | |
| device = "cpu" if settings.force_crepe_cpu else settings.tensorflow_device | |
| pitched_data = get_pitch_with_crepe_file( | |
| settings.processing_audio_path, | |
| settings.crepe_model_capacity, | |
| settings.crepe_step_size, | |
| device, | |
| ) | |
| if is_audio: | |
| start_times = [] | |
| end_times = [] | |
| for i, data in enumerate(transcribed_data): | |
| start_times.append(data.start) | |
| end_times.append(data.end) | |
| midi_notes = create_midi_notes_from_pitched_data( | |
| start_times, end_times, pitched_data | |
| ) | |
| else: | |
| midi_notes = create_midi_notes_from_pitched_data( | |
| ultrastar_class.startTimes, ultrastar_class.endTimes, pitched_data | |
| ) | |
| ultrastar_note_numbers = convert_midi_notes_to_ultrastar_notes(midi_notes) | |
| return midi_notes, pitched_data, ultrastar_note_numbers | |
| def create_audio_chunks( | |
| cache_path: str, | |
| is_audio: bool, | |
| transcribed_data: list[TranscribedData], | |
| ultrastar_audio_input_path: str, | |
| ultrastar_class: UltrastarTxtValue | |
| ) -> None: | |
| """Create audio chunks""" | |
| audio_chunks_path = os.path.join( | |
| cache_path, settings.audio_chunk_folder_name | |
| ) | |
| os_helper.create_folder(audio_chunks_path) | |
| if is_audio: # and csv | |
| csv_filename = os.path.join(audio_chunks_path, "_chunks.csv") | |
| export_chunks_from_transcribed_data( | |
| settings.processing_audio_path, transcribed_data, audio_chunks_path | |
| ) | |
| export_transcribed_data_to_csv(transcribed_data, csv_filename) | |
| else: | |
| export_chunks_from_ultrastar_data( | |
| ultrastar_audio_input_path, ultrastar_class, audio_chunks_path | |
| ) | |
| def denoise_vocal_audio(input_path: str, output_path: str) -> None: | |
| """Denoise vocal audio""" | |
| ffmpeg_reduce_noise(input_path, output_path) | |
| def main(argv: list[str]) -> None: | |
| """Main function""" | |
| print_version() | |
| init_settings(argv) | |
| run() | |
| sys.exit() | |
| def remove_cache_folder(cache_path: str) -> None: | |
| """Remove cache folder""" | |
| os_helper.remove_folder(cache_path) | |
| def init_settings(argv: list[str]) -> None: | |
| """Init settings""" | |
| long, short = arg_options() | |
| opts, args = getopt.getopt(argv, short, long) | |
| if len(opts) == 0: | |
| print_help() | |
| sys.exit() | |
| for opt, arg in opts: | |
| if opt == "-h": | |
| print_help() | |
| sys.exit() | |
| elif opt in ("-i", "--ifile"): | |
| settings.input_file_path = arg | |
| elif opt in ("-o", "--ofile"): | |
| settings.output_file_path = arg | |
| elif opt in ("--whisper"): | |
| settings.transcriber = "whisper" | |
| settings.whisper_model = arg | |
| elif opt in ("--whisper_align_model"): | |
| settings.whisper_align_model = arg | |
| elif opt in ("--whisper_batch_size"): | |
| settings.whisper_batch_size = int(arg) | |
| elif opt in ("--whisper_compute_type"): | |
| settings.whisper_compute_type = arg | |
| elif opt in ("--language"): | |
| settings.language = arg | |
| elif opt in ("--crepe"): | |
| settings.crepe_model_capacity = arg | |
| elif opt in ("--crepe_step_size"): | |
| settings.crepe_step_size = int(arg) | |
| elif opt in ("--plot"): | |
| settings.create_plot = arg in ["True", "true"] | |
| elif opt in ("--midi"): | |
| settings.create_midi = arg in ["True", "true"] | |
| elif opt in ("--hyphenation"): | |
| settings.hyphenation = eval(arg.title()) | |
| elif opt in ("--disable_separation"): | |
| settings.use_separated_vocal = not arg | |
| elif opt in ("--disable_karaoke"): | |
| settings.create_karaoke = not arg | |
| elif opt in ("--create_audio_chunks"): | |
| settings.create_audio_chunks = arg | |
| elif opt in ("--force_cpu"): | |
| settings.force_cpu = arg | |
| if settings.force_cpu: | |
| os.environ["CUDA_VISIBLE_DEVICES"] = "-1" | |
| elif opt in ("--force_whisper_cpu"): | |
| settings.force_whisper_cpu = eval(arg.title()) | |
| elif opt in ("--force_crepe_cpu"): | |
| settings.force_crepe_cpu = eval(arg.title()) | |
| elif opt in ("--format_version"): | |
| if arg != '0.3.0' and arg != '1.0.0' and arg != '1.1.0': | |
| print( | |
| f"{ULTRASINGER_HEAD} {red_highlighted('Error: Format version')} {blue_highlighted(arg)} {red_highlighted('is not supported.')}" | |
| ) | |
| sys.exit(1) | |
| settings.format_version = arg | |
| elif opt in ("--keep_cache"): | |
| settings.keep_cache = arg | |
| if settings.output_file_path == "": | |
| if settings.input_file_path.startswith("https:"): | |
| dirname = os.getcwd() | |
| else: | |
| dirname = os.path.dirname(settings.input_file_path) | |
| settings.output_file_path = os.path.join(dirname, "output") | |
| if not settings.force_cpu: | |
| settings.tensorflow_device, settings.pytorch_device = check_gpu_support() | |
| def arg_options(): | |
| short = "hi:o:amv:" | |
| long = [ | |
| "ifile=", | |
| "ofile=", | |
| "crepe=", | |
| "crepe_step_size=", | |
| "whisper=", | |
| "whisper_align_model=", | |
| "whisper_batch_size=", | |
| "whisper_compute_type=", | |
| "language=", | |
| "plot=", | |
| "midi=", | |
| "hyphenation=", | |
| "disable_separation=", | |
| "disable_karaoke=", | |
| "create_audio_chunks=", | |
| "force_cpu=", | |
| "force_whisper_cpu=", | |
| "force_crepe_cpu=", | |
| "format_version=", | |
| "keep_cache" | |
| ] | |
| return long, short | |
| if __name__ == "__main__": | |
| main(sys.argv[1:]) |