Spaces:
Sleeping
Sleeping
| import gradio as gr, glob, os, auditok, zipfile, wave, pytube.exceptions, librosa, time | |
| from pytube import YouTube | |
| from moviepy.editor import VideoFileClip | |
| def download_video(url, download_as): | |
| try: | |
| yt = YouTube(url) | |
| except pytube.exceptions.RegexMatchError: | |
| raise gr.Error("URL not valid or is empty! Please fix the link or enter one!") | |
| video = yt.streams.get_highest_resolution() | |
| video.download() | |
| video_path = f"{video.default_filename}" | |
| video_clip = VideoFileClip(video_path) | |
| audio_clip = video_clip.audio | |
| if download_as == "wav": | |
| audio_clip.write_audiofile("output.wav") | |
| elif download_as == "mp3": | |
| audio_clip.write_audiofile("output.mp3") | |
| audio_clip.close() | |
| video_clip.close() | |
| for removalmp4 in glob.glob("*.mp4"): | |
| os.remove(removalmp4) | |
| return "Finished downloading! Please proceed to next tab." | |
| def split_audio_from_yt_video(mindur, maxdur, name_for_split_files, show_amount_of_files_and_file_dur): | |
| if show_amount_of_files_and_file_dur == True: | |
| gr.Warning(f"show_amount_of_files_and_file_dur set to True. This will take longer if your audio file is long.") | |
| if not os.path.exists("output.mp3") and not os.path.exists("output.wav"): | |
| raise gr.Error("Neither output.mp3 or output.wav exist! Did the video download correctly?") | |
| if mindur == maxdur: | |
| raise gr.Error(f"Cannot split mindur={mindur} and maxdur={maxdur}, min and max are the same number.") | |
| elif mindur > maxdur: | |
| raise gr.Error(f"Cannot split mindur={mindur} and maxdur={maxdur}, mindur is higher than maxdur.") | |
| elif name_for_split_files == None: | |
| raise gr.Error("Split files name cannot be empty!") | |
| else: | |
| audio_path = "output.wav" if not os.path.exists("output.mp3") else "output.mp3" | |
| audio_regions = auditok.split( | |
| audio_path, | |
| min_dur=mindur, | |
| max_dur=maxdur, | |
| max_silence=0.3, | |
| energy_threshold=45 | |
| ) | |
| os.remove(audio_path) | |
| for i, r in enumerate(audio_regions): | |
| filename = r.save(f"{name_for_split_files}-{i+1}.wav") | |
| for f in sorted(glob.glob("*.wav")): | |
| audio_files = glob.glob("*.wav") | |
| zip_file_name = "audio_files.zip" | |
| with zipfile.ZipFile(zip_file_name, "w") as zip_file: | |
| for audio_file in audio_files: | |
| zip_file.write(audio_file, os.path.basename(audio_file)) | |
| if show_amount_of_files_and_file_dur == False: | |
| for file2 in glob.glob("*.wav"): | |
| os.remove(file2) | |
| return "Files split successfully!\nCheck below for zipped files.", zip_file_name | |
| elif show_amount_of_files_and_file_dur == True: | |
| largest_file = ("", 0) | |
| total_files = 0 | |
| total_length = 0.0 | |
| for file_name in glob.glob("*.wav"): | |
| file_path = os.path.join(os.getcwd(), file_name) | |
| if file_path.lower().endswith(".wav"): | |
| try: | |
| with wave.open(file_path, 'r') as audio_file: | |
| frames = audio_file.getnframes() | |
| rate = audio_file.getframerate() | |
| duration = frames / float(rate) | |
| file_size = os.path.getsize(file_path) | |
| if file_size > largest_file[1]: | |
| largest_file = (file_name, file_size) | |
| total_length += duration | |
| total_files += 1 | |
| except wave.Error as e: | |
| raise gr.Error(f"Error reading file: {e}") | |
| length_mins = total_length / 60 | |
| for file2 in glob.glob("*.wav"): | |
| os.remove(file2) | |
| return f"Files split successfully!\nCheck below for zipped files.\n\n{total_files} files created, {length_mins:.2f} minutes total.", zip_file_name | |
| def analyze_audio(zip_file_path): | |
| with zipfile.ZipFile(zip_file_path, 'r') as zip_ref: | |
| zip_ref.extractall('unzipped_files') | |
| total_sample_rate = 0 | |
| total_files = 0 | |
| for file_name in os.listdir('unzipped_files'): | |
| if file_name.lower().endswith('.wav'): | |
| file_path = os.path.join('unzipped_files', file_name) | |
| try: | |
| with wave.open(file_path, 'r') as audio_file: | |
| sample_rate = audio_file.getframerate() | |
| total_sample_rate += sample_rate | |
| total_files += 1 | |
| except wave.Error as e: | |
| print(f"Error reading file: {e}") | |
| if total_files > 0: | |
| average_sample_rate = total_sample_rate / total_files | |
| return f"Average sample rate: {average_sample_rate}" | |
| else: | |
| return "No average sample rate could be found." | |
| def split_wav_or_mp3_file(audiofileuploader, mindur2, maxdur2, name_for_split_files2): | |
| if audiofileuploader == None: | |
| raise gr.Error("Audio file cannot be empty!") | |
| if mindur2 == maxdur2: | |
| raise gr.Error(f"Cannot split mindur={mindur} and maxdur={maxdur}, min and max are the same number.") | |
| elif mindur2 > maxdur2: | |
| raise gr.Error(f"Cannot split mindur={mindur} and maxdur={maxdur}, mindur is higher than maxdur.") | |
| elif name_for_split_files2 == None: | |
| raise gr.Error("Split files name cannot be empty!") | |
| else: | |
| audio_path = audiofileuploader | |
| audio_regions = auditok.split( | |
| audio_path, | |
| min_dur=mindur2, | |
| max_dur=maxdur2, | |
| max_silence=0.3, | |
| energy_threshold=45 | |
| ) | |
| os.remove(audio_path) | |
| for i, r in enumerate(audio_regions): | |
| filename = r.save(f"{name_for_split_files2}-{i+1}.wav") | |
| for f in sorted(glob.glob("*.wav")): | |
| audio_files = glob.glob("*.wav") | |
| zip_file_name2 = "audio_files.zip" | |
| with zipfile.ZipFile(zip_file_name2, "w") as zip_file: | |
| for audio_file in audio_files: | |
| zip_file.write(audio_file, os.path.basename(audio_file)) | |
| for file2 in glob.glob("*.wav"): | |
| os.remove(file2) | |
| return f"File split successfully!\nCheck below for zipped files.\nAmount created: {len(audio_files)}", zip_file_name2 | |
| def get_average_pitch(audio_file): | |
| if audio_file == None: | |
| raise gr.Error("Audio file cannot be empty!") | |
| else: | |
| y, sr = librosa.load(audio_file, sr=None) | |
| pitches, magnitudes = librosa.piptrack(y=y, sr=sr) | |
| mean_pitch = pitches.mean() | |
| return f"Average pitch: {mean_pitch:.2f} Hz" | |
| def all_in_one_inator(ytvideo, download_yt_video_as, min_duration, max_duration, name_for_outputted_split_files, progress=gr.Progress()): | |
| if download_as == "mp3": | |
| gr.Warning("MP3 is experimental, especially with this, so caution is advised.") | |
| if min_duration == max_duration: | |
| raise gr.Error(f"Cannot split mindur={min_duration} and maxdur={max_duration}, min and max are the same number.") | |
| elif min_duration > max_duration: | |
| raise gr.Error(f"Cannot split mindur={min_duration} and maxdur={max_duration}, mindur is higher than maxdur.") | |
| elif name_for_outputted_split_files == None: | |
| raise gr.Error("Split files name cannot be empty!") | |
| else: | |
| try: | |
| progress(0, "Downloading video...") | |
| yt = YouTube(ytvideo) | |
| except pytube.exceptions.RegexMatchError: | |
| raise gr.Error("URL not valid or was left empty! Please fix the link or enter one.") | |
| video = yt.streams.get_highest_resolution() | |
| video.download() | |
| video_path = f"{video.default_filename}" | |
| video_clip = VideoFileClip(video_path) | |
| audio_clip = video_clip.audio | |
| if download_yt_video_as == "wav": | |
| audio_clip.write_audiofile("output.wav") | |
| elif download_yt_video_as == "mp3": | |
| audio_clip.write_audiofile("output.mp3") | |
| audio_clip.close() | |
| video_clip.close() | |
| for removemp4 in glob.glob("*.mp4"): | |
| os.remove(removemp4) | |
| progress(0.5, "Video downloaded! Starting split process...") | |
| audio_path = "output.wav" if not os.path.exists("output.mp3") else "output.mp3" | |
| audio_regions = auditok.split( | |
| audio_path, | |
| min_dur=min_duration, | |
| max_dur=max_duration, | |
| max_silence=0.3, | |
| energy_threshold=45 | |
| ) | |
| os.remove(audio_path) | |
| for i, r in enumerate(audio_regions): | |
| filename = r.save(f"{name_for_outputted_split_files}-{i+1}.wav") | |
| for f in sorted(glob.glob("*.wav")): | |
| audio_files = glob.glob("*.wav") | |
| zip_file_name = "audio_files.zip" | |
| with zipfile.ZipFile(zip_file_name, 'w') as zip_file: | |
| for audio_file in audio_files: | |
| zip_file.write(audio_file, os.path.basename(audio_file)) | |
| for file2 in glob.glob("*.wav"): | |
| os.remove(file2) | |
| progress(1, "Done! Cleaning up...") | |
| time.sleep(2) | |
| return "Process done successfully! Check below for zipped files!", zip_file_name | |
| def download_video_as_audio_only(yt_video, audio_output_format): | |
| try: | |
| yt = YouTube(yt_video) | |
| except pytube.exceptions.RegexMatchError: | |
| raise gr.Error("URL not valid or is empty! Please fix the link or enter one!") | |
| video = yt.streams.get_highest_resolution() | |
| video.download() | |
| video_path = f"{video.default_filename}" | |
| video_clip = VideoFileClip(video_path) | |
| audio_clip = video_clip.audio | |
| if audio_output_format == "wav": | |
| audio_clip.write_audiofile("output.wav") | |
| elif audio_output_format == "mp3": | |
| audio_clip.write_audiofile("output.mp3") | |
| audio_clip.close() | |
| video_clip.close() | |
| for mp4remove in glob.glob("*.mp4"): | |
| os.remove(mp4remove) | |
| single_zip_name = "only_audio.zip" | |
| audio_files = glob.glob("*.wav") if audio_output_format == "wav" else glob.glob("*.mp3") | |
| with zipfile.ZipFile(single_zip_name, 'w') as zip_file: | |
| for audio_file in audio_files: | |
| zip_file.write(audio_file, os.path.basename(audio_file)) | |
| for outputwavremoval in glob.glob("*.wav"): | |
| if os.path.exists(outputwavremoval): | |
| os.remove(outputwavremoval) | |
| for outputmp3removal in glob.glob("*.mp3"): | |
| if os.path.exists(outputmp3removal): | |
| os.remove(outputmp3removal) | |
| return "Done! Download the zip file below! This only contains the audio file.", single_zip_name | |
| def check_for_remaining_wav_or_mp3_files(which_filetype): | |
| audio_files = glob.glob(f"*.{which_filetype}") | |
| return f"There are {len(audio_files)} leftover files." | |
| with gr.Blocks(theme='sudeepshouche/minimalist', title="Global Dataset Maker") as app: | |
| gr.HTML( | |
| "<h1> Welcome to the GDMGS! (GlobalDatasetMaker Gradio Space) </h1>" | |
| ) | |
| gr.Markdown("## Duplicate this space if you want to make your own changes!") | |
| gr.HTML( | |
| """<p style="margin:5px auto;display: flex;justify-content: left;"> | |
| <a href="https://huggingface.co/spaces/Kryptone/GDMGS?duplicate=true"><img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-md-dark.svg" alt="Duplicate this Space"></a> | |
| </p>""" | |
| ) | |
| gr.Markdown( | |
| "This Space will create a dataset for you, all automatically. **Please be warned that due to not having a GPU on this Space, some steps might take longer to complete.**" | |
| ) | |
| with gr.Tabs(): | |
| with gr.TabItem("Download Video"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| with gr.Row(): | |
| url = gr.Textbox(label="URL") | |
| download_as = gr.Radio(["wav", "mp3"], label="Audio format output", value="wav", info="What should the audio format be output as?") | |
| convertion = gr.Button("Download", variant='primary') | |
| convertion.click( | |
| fn=download_video, | |
| inputs=[url, download_as], | |
| outputs=gr.Text(label="Output") | |
| ) | |
| with gr.TabItem("Split audio files"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| with gr.Row(): | |
| mindur = gr.Number(label="Min duration", minimum=1, maximum=10, value=1) | |
| maxdur = gr.Number(label="Max duration", minimum=1, maximum=10, value=5) | |
| name_for_split_files = gr.Textbox(label="Name for split files") | |
| show_amount_of_files_and_file_dur = gr.Checkbox(False, label="Show total amount of files and duration?") | |
| splitbtn = gr.Button("Split", variant='primary') | |
| splitbtn.click( | |
| split_audio_from_yt_video, | |
| inputs=[mindur, maxdur, name_for_split_files, show_amount_of_files_and_file_dur], | |
| outputs=[gr.Text(label="Output"), gr.File(label="Zipped files")] | |
| ) | |
| with gr.TabItem("Misc tools"): | |
| with gr.Tab("SR analyzer"): | |
| gr.Markdown("Upload a zip file of your wavs here and this will determine the average sample rate.") | |
| with gr.Row(): | |
| with gr.Column(): | |
| with gr.Row(): | |
| zipuploader = gr.File(file_count='single', file_types=[".zip"], label="ZIP file") | |
| uploadbtn = gr.Button("Analyze", variant='primary') | |
| uploadbtn.click( | |
| analyze_audio, | |
| [zipuploader], | |
| [gr.Text(label="Result")] | |
| ) | |
| with gr.Tab("File splitter"): | |
| gr.Markdown("If you would rather split a single WAV or mp3 audio file, use this method instead.") | |
| with gr.Row(): | |
| with gr.Column(): | |
| with gr.Row(): | |
| audiofileuploader = gr.File(file_count='single', file_types=[".wav", ".mp3"], label="WAV or mp3 file") | |
| mindur2 = gr.Number(label="Min duration", minimum=1, maximum=10, value=1) | |
| maxdur2 = gr.Number(label="Max duration", minimum=1, maximum=10, value=5) | |
| name_for_split_files2 = gr.Textbox(label="Name for split files") | |
| audiofileuploadbtn = gr.Button("Split", variant='primary') | |
| audiofileuploadbtn.click( | |
| split_wav_or_mp3_file, | |
| [audiofileuploader, mindur2, maxdur2, name_for_split_files2], | |
| [gr.Text(label="Output"), gr.File(label="Zipped files")] | |
| ) | |
| with gr.Tab("Pitch analyzer"): | |
| gr.Markdown("Upload a wav file here, and this will determine the average pitch.") | |
| gr.HTML("<h1> Zip files and MP3 files are not supported as of now.") | |
| with gr.Row(): | |
| with gr.Column(): | |
| with gr.Row(): | |
| upload = gr.File(file_count='single', file_types=[".wav"], label="WAV file") | |
| analyze = gr.Button("Analyze", variant='primary') | |
| analyze.click( | |
| get_average_pitch, | |
| [upload], | |
| [gr.Text(label="Result")] | |
| ) | |
| with gr.Tab("All-in-one downloader and splitter"): | |
| gr.Markdown("This is very experimental and may break or change in the future. This essentially combines both the first 2 tabs into an all-in-one script.") | |
| with gr.Row(): | |
| with gr.Column(): | |
| with gr.Row(): | |
| ytvideo = gr.Textbox(label="URL") | |
| download_yt_video_as = gr.Radio(["wav", "mp3"], value="wav", label="Audio output format") | |
| min_duration = gr.Number(label="Min duration", minimum=1, maximum=10, value=1) | |
| max_duration = gr.Number(label="Max duration", minimum=1, maximum=10, value=5) | |
| name_for_outputted_split_files = gr.Textbox(label="Name for split files") | |
| download_and_split_btn = gr.Button("Download and split", variant='primary') | |
| download_and_split_btn.click( | |
| all_in_one_inator, | |
| [ytvideo, download_yt_video_as, min_duration, max_duration, name_for_outputted_split_files], | |
| [gr.Text(label="Result"), gr.File(label="Zipped files")] | |
| ) | |
| with gr.Tab("Audio only download"): | |
| gr.Markdown("If you want to download only the audio (to isolate bgm using UVR, etc), use this method, which will only extract audio and not split the audio.") | |
| with gr.Row(): | |
| with gr.Column(): | |
| with gr.Row(): | |
| yt_video = gr.Textbox(label="URL") | |
| audio_output_format = gr.Radio(["wav", "mp3"], value="wav", label="Download audio as:") | |
| commence_download = gr.Button("Download", variant='primary') | |
| commence_download.click( | |
| download_video_as_audio_only, | |
| [yt_video, audio_output_format], | |
| [gr.Text(label="Output"), gr.File(label="Zipped audio file")] | |
| ) | |
| with gr.Tab("Check for leftover mp3 or wav files"): | |
| gr.Markdown("There might be instances where sometimes a few wav or mp3 files are left over after a conversion. This section tells how many of those files are left, if any.") | |
| with gr.Row(): | |
| with gr.Column(): | |
| with gr.Row(): | |
| which_filetype = gr.Radio(["wav", "mp3"], value="wav", label="Search for what filetype?") | |
| checkbtn = gr.Button("Check for files", variant='primary') | |
| checkbtn.click( | |
| check_for_remaining_wav_or_mp3_files, | |
| which_filetype, | |
| gr.Text(label="Result") | |
| ) | |
| with gr.TabItem("Changelog"): | |
| gr.Markdown("v0.96 - Added new remaining files tool in Misc Tools.") | |
| gr.Markdown("v0.95 - Fixed issue with mp3 files not downloading audio properly.") | |
| gr.Markdown("v0.94a - Fixed issue with existing output.wav or output.mp3 files clashing with the split audio files with addition of the new tool.") | |
| gr.Markdown("v0.94 - Added new tool: YouTube-to-audio.") | |
| gr.Markdown("v0.93 - Removed obsolete warnings and fixed issue with all-in-one if output.mp3 or output.wav doesnt exist.") | |
| gr.Markdown("v0.92 - Added all-in-one tab under Misc Tools.") | |
| gr.Markdown("v0.91 - Added mp3 file support for single file splitting, and also fixed bug if neither output.wav or output.mp3 exists.") | |
| gr.Markdown("v0.90a - Fixed bug that if 'show_amount_of_files_and_file_dur' was False, split wav files would not be deleted.") | |
| gr.Markdown("v0.90 - Added mp3 support for downloading a Youtube video.") | |
| gr.Markdown("v0.85 - Fixed bug in pitch analyzer if no audio file was given.") | |
| gr.Markdown("v0.80 - Added new tool: Pitch Analyzer.") | |
| gr.Markdown("v0.75 - Fixed bug that would cause split wav files to be packaged with the previously split wav files.") | |
| gr.Markdown("v0.74 - Added new tool: WAV file splitter.") | |
| gr.Markdown("v0.73 - Added Misc Tools tab and new Sample Rate analyzer tool.") | |
| gr.Markdown("v0.70 - Fixed bug if no URL was passed or if the URL was invalid.") | |
| gr.Markdown("v0.65 - Fixed bug if user tried to split an audio file when 'output.wav' didnt exist.") | |
| gr.Markdown("v0.60 - Initial push to Huggingface Space.") | |
| app.launch() |