Spaces:
Runtime error
Runtime error
| import os | |
| from pytube import YouTube | |
| from src.music.utils import RATE_AUDIO_SAVE, slugify | |
| from src.music.config import MAX_LEN | |
| # define filtering keyworfds | |
| start_keywords = [' ', '(', ',', ':'] | |
| end_keywords = [')', ' ', '.', ',', '!', ':'] | |
| def get_all_keywords(k): | |
| all_keywords = [] | |
| for s in start_keywords: | |
| for e in end_keywords: | |
| all_keywords.append(s + k + e) | |
| return all_keywords | |
| filtered_keywords = ['duet', 'duo', 'quartet', 'orchestre', 'orchestra', | |
| 'quintet', 'sixtet', 'septet', 'octet', 'backing track', 'accompaniment', 'string', | |
| 'contrebrasse', 'drums', 'guitar'] + get_all_keywords('live') + get_all_keywords('trio') | |
| # list of playlist for which no filtering should occur on keywords (they were prefiltered already, it's supposed to be only piano) | |
| playlist_and_channel_not_to_filter = ["https://www.youtube.com/c/MySheetMusicTranscriptions", | |
| "https://www.youtube.com/c/PianoNotion", | |
| "https://www.youtube.com/c/PianoNotion", | |
| "https://www.youtube.com/watch?v=3F5glYefwio&list=PLFv3ZQw-ZPxi2DH3Bau7lBC5K6zfPJZxc", | |
| "https://www.youtube.com/user/Mercuziopianist", | |
| "https://www.youtube.com/channel/UCy6NPK6-xeX7MZLaMARa5qg", | |
| "https://www.youtube.com/channel/UCKMRNFV2dWTWIJnymtA9_Iw", | |
| "https://www.youtube.com/c/pianomaedaful", | |
| "https://www.youtube.com/c/FrancescoParrinoMusic", | |
| "https://www.youtube.com/c/itsremco"] | |
| playlist_ok = "https://www.youtube.com/watch?v=sYv_vk6bJtk&list=PLO9E3V4rGLD9-0BEd3t-AvvMcVF1zOJPj" | |
| def should_be_filtered(title, length, url, playlist_url, max_length): | |
| to_filter = False | |
| reason = '' | |
| lower_title = title.lower() | |
| if length > max_length: | |
| reason += f'it is too long (>{max_length/60:.1f} min), ' | |
| to_filter = True | |
| if any([f in lower_title for f in filtered_keywords]) \ | |
| and playlist_url not in playlist_and_channel_not_to_filter \ | |
| and 'to live' not in lower_title and 'alive' not in lower_title \ | |
| and url not in playlist_ok: | |
| reason += 'it contains a filtered keyword, ' | |
| to_filter = True | |
| return to_filter, reason | |
| def convert_mp4_to_mp3(path, verbose=True): | |
| if verbose: print(f"Converting mp4 to mp3, in {path}\n") | |
| assert '.mp4' == path[-4:] | |
| os.system(f'ffmpeg -i "{path}" -loglevel panic -y -ac 1 -ar {int(RATE_AUDIO_SAVE)} "{path[:-4] + ".mp3"}" ') | |
| os.remove(path) | |
| if verbose: print('\tDone.') | |
| def pipeline_video(video, playlist_path, filename): | |
| # extract best stream for this video | |
| stream, kbps = extract_best_stream(video.streams) | |
| stream.download(output_path=playlist_path, filename=filename + '.mp4') | |
| # convert to mp3 | |
| convert_mp4_to_mp3(playlist_path + filename + '.mp4', verbose=False) | |
| return kbps | |
| def extract_best_stream(streams): | |
| # extract best audio stream | |
| stream_out = streams.get_audio_only() | |
| kbps = int(stream_out.abr[:-4]) | |
| return stream_out, kbps | |
| def get_title_and_length(video): | |
| title = video.title | |
| filename = slugify(title) | |
| length = video.length | |
| return title, filename, length, video.metadata | |
| def url2audio(playlist_path, video_url=None, video=None, playlist_url='', apply_filters=False, verbose=False, level=0): | |
| assert video_url is not None or video is not None, 'needs either video or url' | |
| error_msg = 'Error in loading video?' | |
| try: | |
| if not video: | |
| video = YouTube(video_url) | |
| error_msg += ' Nope. In extracting title and length?' | |
| title, filename, length, video_meta_data = get_title_and_length(video) | |
| if apply_filters: | |
| to_filter, reason = should_be_filtered(title, length, video_url, playlist_url, MAX_LEN) | |
| else: | |
| to_filter = False | |
| if not to_filter: | |
| audio_path = playlist_path + filename + ".mp3" | |
| if verbose: print(' ' * level + f'Downloading {title}, Url: {video_url}') | |
| if not os.path.exists(audio_path): | |
| if length > MAX_LEN and verbose: print(' ' * (level + 2) + f'Long video ({int(length/60)} min), will be cut after {int(MAX_LEN/60)} min.') | |
| error_msg += ' Nope. In pipeline video?' | |
| kbps = None | |
| for _ in range(5): | |
| try: | |
| kbps = pipeline_video(video, playlist_path, filename) | |
| break | |
| except: | |
| pass | |
| assert kbps is not None | |
| error_msg += ' Nope. In dict filling?' | |
| data = dict(title=title, filename=filename, length=length, kbps=kbps, url=video_url, meta=video_meta_data) | |
| error_msg += ' Nope. ' | |
| else: | |
| if verbose: print(' ' * (level + 2) + 'Song already downloaded') | |
| data = None | |
| return audio_path, data, '' | |
| else: | |
| return None, None, f'Filtered because {reason}' | |
| except: | |
| if verbose: print(' ' * (level + 2) + f'Download failed with error {error_msg}') | |
| if os.path.exists(audio_path): | |
| os.remove(audio_path) | |
| return None, None, error_msg + ' Yes.' | |